11# Generated manually on 2025-12-15 14:00 for creating missing metadata artifacts
22
33from django .db import migrations
4+ from itertools import groupby
45
56BATCH_SIZE = 1000
67
@@ -118,6 +119,7 @@ def create_missing_metadata_artifacts(apps, schema_editor):
118119 import tempfile
119120 from django .conf import settings
120121 from django .db import models
122+ from django .db .utils import IntegrityError
121123
122124 PythonPackageContent = apps .get_model ("python" , "PythonPackageContent" )
123125 ContentArtifact = apps .get_model ("core" , "ContentArtifact" )
@@ -132,62 +134,89 @@ def create_missing_metadata_artifacts(apps, schema_editor):
132134 )
133135 .exclude (metadata_sha256 = "" )
134136 .prefetch_related ("_artifacts" )
135- .only ("filename" , "metadata_sha256" )
137+ .only ("filename" , "metadata_sha256" , "pulp_domain_id" )
138+ .order_by ("pulp_domain_id" )
136139 )
137- artifact_batch = []
140+ artifact_batch = {}
138141 contentartifact_batch = []
139142 packages_batch = []
140143
141- with tempfile .TemporaryDirectory (dir = settings .WORKING_DIRECTORY ) as temp_dir :
142- for package in packages :
143- # Get the main artifact for package
144- main_artifact = package ._artifacts .get ()
145-
146- filename = package .filename
147- metadata_digests = {"sha256" : package .metadata_sha256 }
148- result = artifact_to_metadata_artifact (
149- filename , main_artifact , metadata_digests , temp_dir , Artifact
150- )
151- if result is None :
152- # Unset metadata_sha256 when extraction or validation fails
153- package .metadata_sha256 = None
154- packages_batch .append (package )
155- continue
156- metadata_artifact , mismatched_sha256 = result
157- if mismatched_sha256 :
158- # Fix the package if its metadata_sha256 differs from the actual value
159- package .metadata_sha256 = mismatched_sha256
160- packages_batch .append (package )
161-
162- # Set the domain on the metadata artifact to match the package's domain
163- metadata_artifact .pulp_domain = package ._pulp_domain
164-
165- contentartifact = ContentArtifact (
166- artifact = metadata_artifact ,
167- content = package ,
168- relative_path = f"{ filename } .metadata" ,
144+ def batch_save_artifacts (domain_id ):
145+ try :
146+ Artifact .objects .bulk_create (artifact_batch .values (), batch_size = BATCH_SIZE )
147+ except IntegrityError :
148+ # Find the existing artifacts and update the contentartifacts to point to the existing artifacts
149+ digest_cas = {}
150+ for ca in contentartifact_batch :
151+ digest_cas .setdefault (ca .artifact .sha256 , []).append (ca )
152+ artifacts = Artifact .objects .filter (
153+ sha256__in = artifact_batch .keys (), pulp_domain_id = domain_id
169154 )
170- artifact_batch .append (metadata_artifact )
171- contentartifact_batch .append (contentartifact )
172-
173- if len (artifact_batch ) == BATCH_SIZE :
174- Artifact .objects .bulk_create (artifact_batch , batch_size = BATCH_SIZE )
175- ContentArtifact .objects .bulk_create (contentartifact_batch , batch_size = BATCH_SIZE )
176- artifact_batch .clear ()
177- contentartifact_batch .clear ()
178- if len (packages_batch ) == BATCH_SIZE :
155+ for artifact in artifacts :
156+ for ca in digest_cas [artifact .sha256 ]:
157+ ca .artifact = artifact
158+ artifact_batch .pop (artifact .sha256 )
159+ Artifact .objects .bulk_create (artifact_batch .values (), batch_size = BATCH_SIZE )
160+
161+ ContentArtifact .objects .bulk_create (
162+ contentartifact_batch ,
163+ batch_size = BATCH_SIZE ,
164+ update_conflicts = True ,
165+ update_fields = ["artifact" ],
166+ unique_fields = ["content" , "relative_path" ],
167+ )
168+ artifact_batch .clear ()
169+ contentartifact_batch .clear ()
170+
171+ with tempfile .TemporaryDirectory (dir = settings .WORKING_DIRECTORY ) as temp_dir :
172+ for domain_id , domain_packages in groupby (
173+ packages .iterator (chunk_size = BATCH_SIZE ), key = lambda x : x .pulp_domain_id
174+ ):
175+ for package in domain_packages :
176+ # Get the main artifact for package
177+ main_artifact = package ._artifacts .get ()
178+
179+ filename = package .filename
180+ metadata_digests = {"sha256" : package .metadata_sha256 }
181+ result = artifact_to_metadata_artifact (
182+ filename , main_artifact , metadata_digests , temp_dir , Artifact
183+ )
184+ if result is None :
185+ # Unset metadata_sha256 when extraction or validation fails
186+ package .metadata_sha256 = None
187+ packages_batch .append (package )
188+ continue
189+ metadata_artifact , mismatched_sha256 = result
190+ if mismatched_sha256 :
191+ # Fix the package if its metadata_sha256 differs from the actual value
192+ package .metadata_sha256 = mismatched_sha256
193+ packages_batch .append (package )
194+
195+ # Set the domain on the metadata artifact to match the package's domain
196+ metadata_artifact .pulp_domain_id = domain_id
197+
198+ art = artifact_batch .setdefault (metadata_artifact .sha256 , metadata_artifact )
199+ contentartifact = ContentArtifact (
200+ artifact = art ,
201+ content = package ,
202+ relative_path = f"{ filename } .metadata" ,
203+ )
204+ contentartifact_batch .append (contentartifact )
205+
206+ if len (contentartifact_batch ) == BATCH_SIZE :
207+ batch_save_artifacts (domain_id )
208+ if len (packages_batch ) == BATCH_SIZE :
209+ PythonPackageContent .objects .bulk_update (
210+ packages_batch , ["metadata_sha256" ], batch_size = BATCH_SIZE
211+ )
212+ packages_batch .clear ()
213+
214+ if artifact_batch :
215+ batch_save_artifacts (domain_id )
216+ if packages_batch :
179217 PythonPackageContent .objects .bulk_update (
180218 packages_batch , ["metadata_sha256" ], batch_size = BATCH_SIZE
181219 )
182- packages_batch .clear ()
183-
184- if artifact_batch :
185- Artifact .objects .bulk_create (artifact_batch , batch_size = BATCH_SIZE )
186- ContentArtifact .objects .bulk_create (contentartifact_batch , batch_size = BATCH_SIZE )
187- if packages_batch :
188- PythonPackageContent .objects .bulk_update (
189- packages_batch , ["metadata_sha256" ], batch_size = BATCH_SIZE
190- )
191220
192221
193222class Migration (migrations .Migration ):
0 commit comments