Skip to content

blob_manager

WorkspaceManager

Manages a local directory used to offload binary artifacts.

Tools that produce firmware dumps, memory captures, or otherwise large payloads write the bytes to disk through this manager and hand back a compact descriptor to the LLM. This keeps multi-megabyte blobs out of the model's context window while preserving an addressable handle the LLM (or the human operator) can pass to follow-up tools.

Source code in wintermute/utils/blob_manager.py
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
class WorkspaceManager:
    """Manages a local directory used to offload binary artifacts.

    Tools that produce firmware dumps, memory captures, or otherwise large
    payloads write the bytes to disk through this manager and hand back a
    compact descriptor to the LLM. This keeps multi-megabyte blobs out of
    the model's context window while preserving an addressable handle the
    LLM (or the human operator) can pass to follow-up tools.
    """

    def __init__(self, root: Union[str, Path, None] = None) -> None:
        chosen = root or os.getenv(WORKSPACE_ENV_VAR) or DEFAULT_WORKSPACE_DIR
        self.root: Path = Path(chosen).expanduser().resolve()
        self._lock = threading.Lock()
        self._ensure_root()

    def _ensure_root(self) -> None:
        self.root.mkdir(parents=True, exist_ok=True)

    @staticmethod
    def _digest(data: bytes) -> str:
        return hashlib.sha256(data).hexdigest()

    def save_blob(
        self,
        data: Union[bytes, bytearray, memoryview, str],
        *,
        suffix: str = ".bin",
    ) -> Dict[str, Union[str, int]]:
        """Persist ``data`` to the workspace and return its descriptor.

        Strings are encoded as UTF-8. The filename is content-addressed by
        SHA-256, so identical payloads collapse to a single file on disk.
        """
        if isinstance(data, str):
            payload = data.encode("utf-8")
        elif isinstance(data, (bytearray, memoryview)):
            payload = bytes(data)
        elif isinstance(data, bytes):
            payload = data
        else:
            raise TypeError(
                f"WorkspaceManager.save_blob expects bytes-like or str, got {type(data).__name__}"
            )

        sha256 = self._digest(payload)
        filename = f"{sha256}{suffix}"
        target = self.root / filename

        with self._lock:
            self._ensure_root()
            if not target.exists():
                tmp = target.with_suffix(target.suffix + ".part")
                tmp.write_bytes(payload)
                os.replace(tmp, target)

        log.info(
            "WorkspaceManager stored %d bytes at %s (sha256=%s)",
            len(payload),
            target,
            sha256,
        )

        return {
            "file_path": str(target),
            "size_bytes": len(payload),
            "sha256": sha256,
            "type": BLOB_TYPE,
        }

    def register_file(
        self,
        path: Union[str, Path],
        *,
        suffix: Union[str, None] = None,
    ) -> Dict[str, Union[str, int]]:
        """Adopt an existing file into the workspace and return its descriptor.

        The file is hashed in chunks (so multi-gigabyte firmware dumps never
        get loaded into Python memory) and then renamed into a content-
        addressed slot under :attr:`root`. If a file with the same digest is
        already present, the source is removed and the existing target is
        kept. Tools that stream output directly to disk (for example,
        OpenOCD's ``dump_image``) should produce their temporary file inside
        :attr:`root` so this rename stays on a single filesystem.

        Args:
            path: Path to the file to adopt.
            suffix: Optional suffix to append to the SHA-256 digest when
                naming the target. Defaults to the source file's suffix or
                ``.bin`` when none is present.

        Returns:
            A descriptor matching :meth:`save_blob` (``file_path``,
            ``size_bytes``, ``sha256``, ``type``).

        Raises:
            FileNotFoundError: If ``path`` does not exist or is not a file.
        """
        src = Path(path).expanduser().resolve()
        if not src.is_file():
            raise FileNotFoundError(f"File to register does not exist: {src}")

        sha = hashlib.sha256()
        size = 0
        with src.open("rb") as fh:
            for chunk in iter(lambda: fh.read(1024 * 1024), b""):
                sha.update(chunk)
                size += len(chunk)

        digest = sha.hexdigest()
        chosen_suffix = suffix if suffix is not None else (src.suffix or ".bin")
        target = self.root / f"{digest}{chosen_suffix}"

        with self._lock:
            self._ensure_root()
            if target.exists():
                if src != target:
                    src.unlink(missing_ok=True)
            else:
                os.replace(src, target)

        log.info(
            "WorkspaceManager registered %s as %s (sha256=%s, size=%d)",
            src,
            target,
            digest,
            size,
        )

        return {
            "file_path": str(target),
            "size_bytes": size,
            "sha256": digest,
            "type": BLOB_TYPE,
        }

register_file(path, *, suffix=None)

Adopt an existing file into the workspace and return its descriptor.

The file is hashed in chunks (so multi-gigabyte firmware dumps never get loaded into Python memory) and then renamed into a content- addressed slot under :attr:root. If a file with the same digest is already present, the source is removed and the existing target is kept. Tools that stream output directly to disk (for example, OpenOCD's dump_image) should produce their temporary file inside :attr:root so this rename stays on a single filesystem.

Parameters:

Name Type Description Default
path Union[str, Path]

Path to the file to adopt.

required
suffix Union[str, None]

Optional suffix to append to the SHA-256 digest when naming the target. Defaults to the source file's suffix or .bin when none is present.

None

Returns:

Type Description
Dict[str, Union[str, int]]

A descriptor matching :meth:save_blob (file_path,

Dict[str, Union[str, int]]

size_bytes, sha256, type).

Raises:

Type Description
FileNotFoundError

If path does not exist or is not a file.

Source code in wintermute/utils/blob_manager.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
def register_file(
    self,
    path: Union[str, Path],
    *,
    suffix: Union[str, None] = None,
) -> Dict[str, Union[str, int]]:
    """Adopt an existing file into the workspace and return its descriptor.

    The file is hashed in chunks (so multi-gigabyte firmware dumps never
    get loaded into Python memory) and then renamed into a content-
    addressed slot under :attr:`root`. If a file with the same digest is
    already present, the source is removed and the existing target is
    kept. Tools that stream output directly to disk (for example,
    OpenOCD's ``dump_image``) should produce their temporary file inside
    :attr:`root` so this rename stays on a single filesystem.

    Args:
        path: Path to the file to adopt.
        suffix: Optional suffix to append to the SHA-256 digest when
            naming the target. Defaults to the source file's suffix or
            ``.bin`` when none is present.

    Returns:
        A descriptor matching :meth:`save_blob` (``file_path``,
        ``size_bytes``, ``sha256``, ``type``).

    Raises:
        FileNotFoundError: If ``path`` does not exist or is not a file.
    """
    src = Path(path).expanduser().resolve()
    if not src.is_file():
        raise FileNotFoundError(f"File to register does not exist: {src}")

    sha = hashlib.sha256()
    size = 0
    with src.open("rb") as fh:
        for chunk in iter(lambda: fh.read(1024 * 1024), b""):
            sha.update(chunk)
            size += len(chunk)

    digest = sha.hexdigest()
    chosen_suffix = suffix if suffix is not None else (src.suffix or ".bin")
    target = self.root / f"{digest}{chosen_suffix}"

    with self._lock:
        self._ensure_root()
        if target.exists():
            if src != target:
                src.unlink(missing_ok=True)
        else:
            os.replace(src, target)

    log.info(
        "WorkspaceManager registered %s as %s (sha256=%s, size=%d)",
        src,
        target,
        digest,
        size,
    )

    return {
        "file_path": str(target),
        "size_bytes": size,
        "sha256": digest,
        "type": BLOB_TYPE,
    }

save_blob(data, *, suffix='.bin')

Persist data to the workspace and return its descriptor.

Strings are encoded as UTF-8. The filename is content-addressed by SHA-256, so identical payloads collapse to a single file on disk.

Source code in wintermute/utils/blob_manager.py
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
def save_blob(
    self,
    data: Union[bytes, bytearray, memoryview, str],
    *,
    suffix: str = ".bin",
) -> Dict[str, Union[str, int]]:
    """Persist ``data`` to the workspace and return its descriptor.

    Strings are encoded as UTF-8. The filename is content-addressed by
    SHA-256, so identical payloads collapse to a single file on disk.
    """
    if isinstance(data, str):
        payload = data.encode("utf-8")
    elif isinstance(data, (bytearray, memoryview)):
        payload = bytes(data)
    elif isinstance(data, bytes):
        payload = data
    else:
        raise TypeError(
            f"WorkspaceManager.save_blob expects bytes-like or str, got {type(data).__name__}"
        )

    sha256 = self._digest(payload)
    filename = f"{sha256}{suffix}"
    target = self.root / filename

    with self._lock:
        self._ensure_root()
        if not target.exists():
            tmp = target.with_suffix(target.suffix + ".part")
            tmp.write_bytes(payload)
            os.replace(tmp, target)

    log.info(
        "WorkspaceManager stored %d bytes at %s (sha256=%s)",
        len(payload),
        target,
        sha256,
    )

    return {
        "file_path": str(target),
        "size_bytes": len(payload),
        "sha256": sha256,
        "type": BLOB_TYPE,
    }

get_default_workspace()

Return a process-wide WorkspaceManager, creating it on first use.

Source code in wintermute/utils/blob_manager.py
184
185
186
187
188
189
190
191
def get_default_workspace() -> WorkspaceManager:
    """Return a process-wide WorkspaceManager, creating it on first use."""
    global _default_manager
    if _default_manager is None:
        with _default_lock:
            if _default_manager is None:
                _default_manager = WorkspaceManager()
    return _default_manager

set_default_workspace(manager)

Override the process-wide WorkspaceManager (useful for tests).

Source code in wintermute/utils/blob_manager.py
194
195
196
197
198
def set_default_workspace(manager: WorkspaceManager) -> None:
    """Override the process-wide WorkspaceManager (useful for tests)."""
    global _default_manager
    with _default_lock:
        _default_manager = manager