Skip to content

Commit bf66f2e

Browse files
committed
Fix migration 19 failing on duplicate artifact saves
fixes: #1071
1 parent 5e2bf26 commit bf66f2e

File tree

2 files changed

+78
-48
lines changed

2 files changed

+78
-48
lines changed

CHANGES/1071.bugfix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed migration 19 failing on duplicate metadata artifact saves.

pulp_python/app/migrations/0019_create_missing_metadata_artifacts.py

Lines changed: 77 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Generated manually on 2025-12-15 14:00 for creating missing metadata artifacts
22

33
from django.db import migrations
4+
from itertools import groupby
45

56
BATCH_SIZE = 1000
67

@@ -118,6 +119,7 @@ def create_missing_metadata_artifacts(apps, schema_editor):
118119
import tempfile
119120
from django.conf import settings
120121
from django.db import models
122+
from django.db.utils import IntegrityError
121123

122124
PythonPackageContent = apps.get_model("python", "PythonPackageContent")
123125
ContentArtifact = apps.get_model("core", "ContentArtifact")
@@ -132,62 +134,89 @@ def create_missing_metadata_artifacts(apps, schema_editor):
132134
)
133135
.exclude(metadata_sha256="")
134136
.prefetch_related("_artifacts")
135-
.only("filename", "metadata_sha256")
137+
.only("filename", "metadata_sha256", "pulp_domain_id")
138+
.order_by("pulp_domain_id")
136139
)
137-
artifact_batch = []
140+
artifact_batch = {}
138141
contentartifact_batch = []
139142
packages_batch = []
140143

141-
with tempfile.TemporaryDirectory(dir=settings.WORKING_DIRECTORY) as temp_dir:
142-
for package in packages:
143-
# Get the main artifact for package
144-
main_artifact = package._artifacts.get()
145-
146-
filename = package.filename
147-
metadata_digests = {"sha256": package.metadata_sha256}
148-
result = artifact_to_metadata_artifact(
149-
filename, main_artifact, metadata_digests, temp_dir, Artifact
150-
)
151-
if result is None:
152-
# Unset metadata_sha256 when extraction or validation fails
153-
package.metadata_sha256 = None
154-
packages_batch.append(package)
155-
continue
156-
metadata_artifact, mismatched_sha256 = result
157-
if mismatched_sha256:
158-
# Fix the package if its metadata_sha256 differs from the actual value
159-
package.metadata_sha256 = mismatched_sha256
160-
packages_batch.append(package)
161-
162-
# Set the domain on the metadata artifact to match the package's domain
163-
metadata_artifact.pulp_domain = package._pulp_domain
164-
165-
contentartifact = ContentArtifact(
166-
artifact=metadata_artifact,
167-
content=package,
168-
relative_path=f"{filename}.metadata",
144+
def batch_save_artifacts(domain_id):
145+
try:
146+
Artifact.objects.bulk_create(artifact_batch.values(), batch_size=BATCH_SIZE)
147+
except IntegrityError:
148+
# Find the existing artifacts and update the contentartifacts to point to the existing artifacts
149+
digest_cas = {}
150+
for ca in contentartifact_batch:
151+
digest_cas.setdefault(ca.artifact.sha256, []).append(ca)
152+
artifacts = Artifact.objects.filter(
153+
sha256__in=artifact_batch.keys(), pulp_domain_id=domain_id
169154
)
170-
artifact_batch.append(metadata_artifact)
171-
contentartifact_batch.append(contentartifact)
172-
173-
if len(artifact_batch) == BATCH_SIZE:
174-
Artifact.objects.bulk_create(artifact_batch, batch_size=BATCH_SIZE)
175-
ContentArtifact.objects.bulk_create(contentartifact_batch, batch_size=BATCH_SIZE)
176-
artifact_batch.clear()
177-
contentartifact_batch.clear()
178-
if len(packages_batch) == BATCH_SIZE:
155+
for artifact in artifacts:
156+
for ca in digest_cas[artifact.sha256]:
157+
ca.artifact = artifact
158+
artifact_batch.pop(artifact.sha256)
159+
Artifact.objects.bulk_create(artifact_batch.values(), batch_size=BATCH_SIZE)
160+
161+
ContentArtifact.objects.bulk_create(
162+
contentartifact_batch,
163+
batch_size=BATCH_SIZE,
164+
update_conflicts=True,
165+
update_fields=["artifact"],
166+
unique_fields=["content", "relative_path"],
167+
)
168+
artifact_batch.clear()
169+
contentartifact_batch.clear()
170+
171+
with tempfile.TemporaryDirectory(dir=settings.WORKING_DIRECTORY) as temp_dir:
172+
for domain_id, domain_packages in groupby(
173+
packages.iterator(chunk_size=BATCH_SIZE), key=lambda x: x.pulp_domain_id
174+
):
175+
for package in domain_packages:
176+
# Get the main artifact for package
177+
main_artifact = package._artifacts.get()
178+
179+
filename = package.filename
180+
metadata_digests = {"sha256": package.metadata_sha256}
181+
result = artifact_to_metadata_artifact(
182+
filename, main_artifact, metadata_digests, temp_dir, Artifact
183+
)
184+
if result is None:
185+
# Unset metadata_sha256 when extraction or validation fails
186+
package.metadata_sha256 = None
187+
packages_batch.append(package)
188+
continue
189+
metadata_artifact, mismatched_sha256 = result
190+
if mismatched_sha256:
191+
# Fix the package if its metadata_sha256 differs from the actual value
192+
package.metadata_sha256 = mismatched_sha256
193+
packages_batch.append(package)
194+
195+
# Set the domain on the metadata artifact to match the package's domain
196+
metadata_artifact.pulp_domain_id = domain_id
197+
198+
art = artifact_batch.setdefault(metadata_artifact.sha256, metadata_artifact)
199+
contentartifact = ContentArtifact(
200+
artifact=art,
201+
content=package,
202+
relative_path=f"{filename}.metadata",
203+
)
204+
contentartifact_batch.append(contentartifact)
205+
206+
if len(contentartifact_batch) == BATCH_SIZE:
207+
batch_save_artifacts(domain_id)
208+
if len(packages_batch) == BATCH_SIZE:
209+
PythonPackageContent.objects.bulk_update(
210+
packages_batch, ["metadata_sha256"], batch_size=BATCH_SIZE
211+
)
212+
packages_batch.clear()
213+
214+
if artifact_batch:
215+
batch_save_artifacts(domain_id)
216+
if packages_batch:
179217
PythonPackageContent.objects.bulk_update(
180218
packages_batch, ["metadata_sha256"], batch_size=BATCH_SIZE
181219
)
182-
packages_batch.clear()
183-
184-
if artifact_batch:
185-
Artifact.objects.bulk_create(artifact_batch, batch_size=BATCH_SIZE)
186-
ContentArtifact.objects.bulk_create(contentartifact_batch, batch_size=BATCH_SIZE)
187-
if packages_batch:
188-
PythonPackageContent.objects.bulk_update(
189-
packages_batch, ["metadata_sha256"], batch_size=BATCH_SIZE
190-
)
191220

192221

193222
class Migration(migrations.Migration):

0 commit comments

Comments
 (0)