Skip to content

Commit e164fd0

Browse files
committed
Turn migration 19 into a noop
fixes: #1071
1 parent 5e2bf26 commit e164fd0

File tree

2 files changed

+4
-188
lines changed

2 files changed

+4
-188
lines changed

CHANGES/1071.bugfix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Turned migration 19 into a no-op.
Lines changed: 3 additions & 188 deletions
Original file line numberDiff line numberDiff line change
@@ -1,194 +1,8 @@
11
# Generated manually on 2025-12-15 14:00 for creating missing metadata artifacts
2-
32
from django.db import migrations
43

5-
BATCH_SIZE = 1000
6-
7-
8-
def pulp_hashlib_new(name, *args, **kwargs):
9-
"""
10-
Copied and updated (to comply with migrations) from pulpcore.
11-
"""
12-
import hashlib as the_real_hashlib
13-
from django.conf import settings
14-
15-
if name not in settings.ALLOWED_CONTENT_CHECKSUMS:
16-
return None
17-
18-
return the_real_hashlib.new(name, *args, **kwargs)
19-
20-
21-
def init_and_validate(file, artifact_model, expected_digests):
22-
"""
23-
Copied and updated (to comply with migrations) from pulpcore.
24-
"""
25-
from django.conf import settings
26-
27-
digest_fields = []
28-
for alg in ("sha512", "sha384", "sha256", "sha224", "sha1", "md5"):
29-
if alg in settings.ALLOWED_CONTENT_CHECKSUMS:
30-
digest_fields.append(alg)
31-
32-
if isinstance(file, str):
33-
with open(file, "rb") as f:
34-
hashers = {
35-
n: hasher for n in digest_fields if (hasher := pulp_hashlib_new(n)) is not None
36-
}
37-
if not hashers:
38-
return None
39-
40-
size = 0
41-
while True:
42-
chunk = f.read(1048576) # 1 megabyte
43-
if not chunk:
44-
break
45-
for algorithm in hashers.values():
46-
algorithm.update(chunk)
47-
size = size + len(chunk)
48-
else:
49-
size = file.size
50-
hashers = file.hashers
51-
52-
mismatched_sha256 = None
53-
for algorithm, expected_digest in expected_digests.items():
54-
if algorithm not in hashers:
55-
return None
56-
actual_digest = hashers[algorithm].hexdigest()
57-
if expected_digest != actual_digest:
58-
# Store the actual value for later fixing if it differs from the package value
59-
mismatched_sha256 = actual_digest
60-
61-
attributes = {"size": size, "file": file}
62-
for algorithm in digest_fields:
63-
attributes[algorithm] = hashers[algorithm].hexdigest()
64-
65-
return artifact_model(**attributes), mismatched_sha256
66-
67-
68-
def extract_wheel_metadata(filename):
69-
"""
70-
Extract the metadata file content from a wheel file.
71-
Return the raw metadata content as bytes or None if metadata cannot be extracted.
72-
"""
73-
import zipfile
74-
75-
try:
76-
with zipfile.ZipFile(filename, "r") as f:
77-
for file_path in f.namelist():
78-
if file_path.endswith(".dist-info/METADATA"):
79-
return f.read(file_path)
80-
except (zipfile.BadZipFile, KeyError, OSError):
81-
pass
82-
return None
83-
84-
85-
def artifact_to_metadata_artifact(filename, artifact, md_digests, tmp_dir, artifact_model):
86-
"""
87-
Create artifact for metadata from the provided wheel artifact.
88-
Return (artifact, mismatched_sha256) on success, None on any failure.
89-
"""
90-
import shutil
91-
import tempfile
92-
93-
with tempfile.NamedTemporaryFile("wb", dir=tmp_dir, suffix=filename, delete=False) as temp_file:
94-
temp_wheel_path = temp_file.name
95-
artifact.file.seek(0)
96-
shutil.copyfileobj(artifact.file, temp_file)
97-
temp_file.flush()
98-
99-
metadata_content = extract_wheel_metadata(temp_wheel_path)
100-
if not metadata_content:
101-
return None
102-
103-
with tempfile.NamedTemporaryFile(
104-
"wb", dir=tmp_dir, suffix=".metadata", delete=False
105-
) as temp_md:
106-
temp_metadata_path = temp_md.name
107-
temp_md.write(metadata_content)
108-
temp_md.flush()
109-
110-
return init_and_validate(temp_metadata_path, artifact_model, md_digests)
111-
112-
113-
def create_missing_metadata_artifacts(apps, schema_editor):
114-
"""
115-
Create metadata artifacts for PythonPackageContent instances that have metadata_sha256
116-
but are missing the corresponding metadata artifact.
117-
"""
118-
import tempfile
119-
from django.conf import settings
120-
from django.db import models
121-
122-
PythonPackageContent = apps.get_model("python", "PythonPackageContent")
123-
ContentArtifact = apps.get_model("core", "ContentArtifact")
124-
Artifact = apps.get_model("core", "Artifact")
125-
126-
packages = (
127-
PythonPackageContent.objects.filter(
128-
metadata_sha256__isnull=False,
129-
filename__endswith=".whl",
130-
contentartifact__artifact__isnull=False,
131-
contentartifact__relative_path=models.F("filename"),
132-
)
133-
.exclude(metadata_sha256="")
134-
.prefetch_related("_artifacts")
135-
.only("filename", "metadata_sha256")
136-
)
137-
artifact_batch = []
138-
contentartifact_batch = []
139-
packages_batch = []
140-
141-
with tempfile.TemporaryDirectory(dir=settings.WORKING_DIRECTORY) as temp_dir:
142-
for package in packages:
143-
# Get the main artifact for package
144-
main_artifact = package._artifacts.get()
145-
146-
filename = package.filename
147-
metadata_digests = {"sha256": package.metadata_sha256}
148-
result = artifact_to_metadata_artifact(
149-
filename, main_artifact, metadata_digests, temp_dir, Artifact
150-
)
151-
if result is None:
152-
# Unset metadata_sha256 when extraction or validation fails
153-
package.metadata_sha256 = None
154-
packages_batch.append(package)
155-
continue
156-
metadata_artifact, mismatched_sha256 = result
157-
if mismatched_sha256:
158-
# Fix the package if its metadata_sha256 differs from the actual value
159-
package.metadata_sha256 = mismatched_sha256
160-
packages_batch.append(package)
161-
162-
# Set the domain on the metadata artifact to match the package's domain
163-
metadata_artifact.pulp_domain = package._pulp_domain
164-
165-
contentartifact = ContentArtifact(
166-
artifact=metadata_artifact,
167-
content=package,
168-
relative_path=f"{filename}.metadata",
169-
)
170-
artifact_batch.append(metadata_artifact)
171-
contentartifact_batch.append(contentartifact)
172-
173-
if len(artifact_batch) == BATCH_SIZE:
174-
Artifact.objects.bulk_create(artifact_batch, batch_size=BATCH_SIZE)
175-
ContentArtifact.objects.bulk_create(contentartifact_batch, batch_size=BATCH_SIZE)
176-
artifact_batch.clear()
177-
contentartifact_batch.clear()
178-
if len(packages_batch) == BATCH_SIZE:
179-
PythonPackageContent.objects.bulk_update(
180-
packages_batch, ["metadata_sha256"], batch_size=BATCH_SIZE
181-
)
182-
packages_batch.clear()
183-
184-
if artifact_batch:
185-
Artifact.objects.bulk_create(artifact_batch, batch_size=BATCH_SIZE)
186-
ContentArtifact.objects.bulk_create(contentartifact_batch, batch_size=BATCH_SIZE)
187-
if packages_batch:
188-
PythonPackageContent.objects.bulk_update(
189-
packages_batch, ["metadata_sha256"], batch_size=BATCH_SIZE
190-
)
1914

5+
# This data migration didn't work, so turning into noop.
1926

1937
class Migration(migrations.Migration):
1948

@@ -198,7 +12,8 @@ class Migration(migrations.Migration):
19812

19913
operations = [
20014
migrations.RunPython(
201-
create_missing_metadata_artifacts,
15+
migrations.RunPython.noop,
20216
reverse_code=migrations.RunPython.noop,
17+
elidable=True,
20318
),
20419
]

0 commit comments

Comments
 (0)