|
9 | 9 | import app.assets.services.hashing as hashing |
10 | 10 | from app.assets.database.queries import ( |
11 | 11 | add_tags_to_reference, |
| 12 | + count_active_siblings, |
| 13 | + create_stub_asset, |
| 14 | + ensure_tags_exist, |
12 | 15 | fetch_reference_and_asset, |
13 | 16 | get_asset_by_hash, |
14 | 17 | get_reference_by_file_path, |
|
23 | 26 | upsert_reference, |
24 | 27 | validate_tags_exist, |
25 | 28 | ) |
26 | | -from app.assets.helpers import normalize_tags |
| 29 | +from app.assets.helpers import get_utc_now, normalize_tags |
| 30 | +from app.assets.services.bulk_ingest import batch_insert_seed_assets |
27 | 31 | from app.assets.services.file_utils import get_size_and_mtime_ns |
28 | 32 | from app.assets.services.path_utils import ( |
29 | 33 | compute_relative_filename, |
@@ -130,6 +134,102 @@ def _ingest_file_from_path( |
130 | 134 | ) |
131 | 135 |
|
132 | 136 |
|
| 137 | +def register_output_files( |
| 138 | + file_paths: Sequence[str], |
| 139 | + user_metadata: UserMetadata = None, |
| 140 | + job_id: str | None = None, |
| 141 | +) -> int: |
| 142 | + """Register a batch of output file paths as assets. |
| 143 | +
|
| 144 | + Returns the number of files successfully registered. |
| 145 | + """ |
| 146 | + registered = 0 |
| 147 | + for abs_path in file_paths: |
| 148 | + if not os.path.isfile(abs_path): |
| 149 | + continue |
| 150 | + try: |
| 151 | + if ingest_existing_file( |
| 152 | + abs_path, user_metadata=user_metadata, job_id=job_id |
| 153 | + ): |
| 154 | + registered += 1 |
| 155 | + except Exception: |
| 156 | + logging.exception("Failed to register output: %s", abs_path) |
| 157 | + return registered |
| 158 | + |
| 159 | + |
| 160 | +def ingest_existing_file( |
| 161 | + abs_path: str, |
| 162 | + user_metadata: UserMetadata = None, |
| 163 | + extra_tags: Sequence[str] = (), |
| 164 | + owner_id: str = "", |
| 165 | + job_id: str | None = None, |
| 166 | +) -> bool: |
| 167 | + """Register an existing on-disk file as an asset stub. |
| 168 | +
|
| 169 | + If a reference already exists for this path, updates mtime_ns, job_id, |
| 170 | + size_bytes, and resets enrichment so the enricher will re-hash it. |
| 171 | +
|
| 172 | + For brand-new paths, inserts a stub record (hash=NULL) for immediate |
| 173 | + UX visibility. |
| 174 | +
|
| 175 | + Returns True if a row was inserted or updated, False otherwise. |
| 176 | + """ |
| 177 | + locator = os.path.abspath(abs_path) |
| 178 | + size_bytes, mtime_ns = get_size_and_mtime_ns(abs_path) |
| 179 | + mime_type = mimetypes.guess_type(abs_path, strict=False)[0] |
| 180 | + name, path_tags = get_name_and_tags_from_asset_path(abs_path) |
| 181 | + tags = list(dict.fromkeys(path_tags + list(extra_tags))) |
| 182 | + |
| 183 | + with create_session() as session: |
| 184 | + existing_ref = get_reference_by_file_path(session, locator) |
| 185 | + if existing_ref is not None: |
| 186 | + now = get_utc_now() |
| 187 | + existing_ref.mtime_ns = mtime_ns |
| 188 | + existing_ref.job_id = job_id |
| 189 | + existing_ref.is_missing = False |
| 190 | + existing_ref.deleted_at = None |
| 191 | + existing_ref.updated_at = now |
| 192 | + existing_ref.enrichment_level = 0 |
| 193 | + |
| 194 | + asset = existing_ref.asset |
| 195 | + if asset: |
| 196 | + # If other refs share this asset, detach to a new stub |
| 197 | + # instead of mutating the shared row. |
| 198 | + siblings = count_active_siblings(session, asset.id, existing_ref.id) |
| 199 | + if siblings > 0: |
| 200 | + new_asset = create_stub_asset( |
| 201 | + session, |
| 202 | + size_bytes=size_bytes, |
| 203 | + mime_type=mime_type or asset.mime_type, |
| 204 | + ) |
| 205 | + existing_ref.asset_id = new_asset.id |
| 206 | + else: |
| 207 | + asset.hash = None |
| 208 | + asset.size_bytes = size_bytes |
| 209 | + if mime_type: |
| 210 | + asset.mime_type = mime_type |
| 211 | + session.commit() |
| 212 | + return True |
| 213 | + |
| 214 | + spec = { |
| 215 | + "abs_path": abs_path, |
| 216 | + "size_bytes": size_bytes, |
| 217 | + "mtime_ns": mtime_ns, |
| 218 | + "info_name": name, |
| 219 | + "tags": tags, |
| 220 | + "fname": os.path.basename(abs_path), |
| 221 | + "metadata": None, |
| 222 | + "hash": None, |
| 223 | + "mime_type": mime_type, |
| 224 | + "job_id": job_id, |
| 225 | + } |
| 226 | + if tags: |
| 227 | + ensure_tags_exist(session, tags) |
| 228 | + result = batch_insert_seed_assets(session, [spec], owner_id=owner_id) |
| 229 | + session.commit() |
| 230 | + return result.won_paths > 0 |
| 231 | + |
| 232 | + |
133 | 233 | def _register_existing_asset( |
134 | 234 | asset_hash: str, |
135 | 235 | name: str, |
|
0 commit comments