11# Generated manually on 2025-12-15 14:00 for creating missing metadata artifacts
2-
32from django .db import migrations
43
5- BATCH_SIZE = 1000
6-
7-
8- def pulp_hashlib_new (name , * args , ** kwargs ):
9- """
10- Copied and updated (to comply with migrations) from pulpcore.
11- """
12- import hashlib as the_real_hashlib
13- from django .conf import settings
14-
15- if name not in settings .ALLOWED_CONTENT_CHECKSUMS :
16- return None
17-
18- return the_real_hashlib .new (name , * args , ** kwargs )
19-
20-
21- def init_and_validate (file , artifact_model , expected_digests ):
22- """
23- Copied and updated (to comply with migrations) from pulpcore.
24- """
25- from django .conf import settings
26-
27- digest_fields = []
28- for alg in ("sha512" , "sha384" , "sha256" , "sha224" , "sha1" , "md5" ):
29- if alg in settings .ALLOWED_CONTENT_CHECKSUMS :
30- digest_fields .append (alg )
31-
32- if isinstance (file , str ):
33- with open (file , "rb" ) as f :
34- hashers = {
35- n : hasher for n in digest_fields if (hasher := pulp_hashlib_new (n )) is not None
36- }
37- if not hashers :
38- return None
39-
40- size = 0
41- while True :
42- chunk = f .read (1048576 ) # 1 megabyte
43- if not chunk :
44- break
45- for algorithm in hashers .values ():
46- algorithm .update (chunk )
47- size = size + len (chunk )
48- else :
49- size = file .size
50- hashers = file .hashers
51-
52- mismatched_sha256 = None
53- for algorithm , expected_digest in expected_digests .items ():
54- if algorithm not in hashers :
55- return None
56- actual_digest = hashers [algorithm ].hexdigest ()
57- if expected_digest != actual_digest :
58- # Store the actual value for later fixing if it differs from the package value
59- mismatched_sha256 = actual_digest
60-
61- attributes = {"size" : size , "file" : file }
62- for algorithm in digest_fields :
63- attributes [algorithm ] = hashers [algorithm ].hexdigest ()
64-
65- return artifact_model (** attributes ), mismatched_sha256
66-
67-
68- def extract_wheel_metadata (filename ):
69- """
70- Extract the metadata file content from a wheel file.
71- Return the raw metadata content as bytes or None if metadata cannot be extracted.
72- """
73- import zipfile
74-
75- try :
76- with zipfile .ZipFile (filename , "r" ) as f :
77- for file_path in f .namelist ():
78- if file_path .endswith (".dist-info/METADATA" ):
79- return f .read (file_path )
80- except (zipfile .BadZipFile , KeyError , OSError ):
81- pass
82- return None
83-
84-
85- def artifact_to_metadata_artifact (filename , artifact , md_digests , tmp_dir , artifact_model ):
86- """
87- Create artifact for metadata from the provided wheel artifact.
88- Return (artifact, mismatched_sha256) on success, None on any failure.
89- """
90- import shutil
91- import tempfile
92-
93- with tempfile .NamedTemporaryFile ("wb" , dir = tmp_dir , suffix = filename , delete = False ) as temp_file :
94- temp_wheel_path = temp_file .name
95- artifact .file .seek (0 )
96- shutil .copyfileobj (artifact .file , temp_file )
97- temp_file .flush ()
98-
99- metadata_content = extract_wheel_metadata (temp_wheel_path )
100- if not metadata_content :
101- return None
102-
103- with tempfile .NamedTemporaryFile (
104- "wb" , dir = tmp_dir , suffix = ".metadata" , delete = False
105- ) as temp_md :
106- temp_metadata_path = temp_md .name
107- temp_md .write (metadata_content )
108- temp_md .flush ()
109-
110- return init_and_validate (temp_metadata_path , artifact_model , md_digests )
111-
112-
113- def create_missing_metadata_artifacts (apps , schema_editor ):
114- """
115- Create metadata artifacts for PythonPackageContent instances that have metadata_sha256
116- but are missing the corresponding metadata artifact.
117- """
118- import tempfile
119- from django .conf import settings
120- from django .db import models
121-
122- PythonPackageContent = apps .get_model ("python" , "PythonPackageContent" )
123- ContentArtifact = apps .get_model ("core" , "ContentArtifact" )
124- Artifact = apps .get_model ("core" , "Artifact" )
125-
126- packages = (
127- PythonPackageContent .objects .filter (
128- metadata_sha256__isnull = False ,
129- filename__endswith = ".whl" ,
130- contentartifact__artifact__isnull = False ,
131- contentartifact__relative_path = models .F ("filename" ),
132- )
133- .exclude (metadata_sha256 = "" )
134- .prefetch_related ("_artifacts" )
135- .only ("filename" , "metadata_sha256" )
136- )
137- artifact_batch = []
138- contentartifact_batch = []
139- packages_batch = []
140-
141- with tempfile .TemporaryDirectory (dir = settings .WORKING_DIRECTORY ) as temp_dir :
142- for package in packages :
143- # Get the main artifact for package
144- main_artifact = package ._artifacts .get ()
145-
146- filename = package .filename
147- metadata_digests = {"sha256" : package .metadata_sha256 }
148- result = artifact_to_metadata_artifact (
149- filename , main_artifact , metadata_digests , temp_dir , Artifact
150- )
151- if result is None :
152- # Unset metadata_sha256 when extraction or validation fails
153- package .metadata_sha256 = None
154- packages_batch .append (package )
155- continue
156- metadata_artifact , mismatched_sha256 = result
157- if mismatched_sha256 :
158- # Fix the package if its metadata_sha256 differs from the actual value
159- package .metadata_sha256 = mismatched_sha256
160- packages_batch .append (package )
161-
162- # Set the domain on the metadata artifact to match the package's domain
163- metadata_artifact .pulp_domain = package ._pulp_domain
164-
165- contentartifact = ContentArtifact (
166- artifact = metadata_artifact ,
167- content = package ,
168- relative_path = f"{ filename } .metadata" ,
169- )
170- artifact_batch .append (metadata_artifact )
171- contentartifact_batch .append (contentartifact )
172-
173- if len (artifact_batch ) == BATCH_SIZE :
174- Artifact .objects .bulk_create (artifact_batch , batch_size = BATCH_SIZE )
175- ContentArtifact .objects .bulk_create (contentartifact_batch , batch_size = BATCH_SIZE )
176- artifact_batch .clear ()
177- contentartifact_batch .clear ()
178- if len (packages_batch ) == BATCH_SIZE :
179- PythonPackageContent .objects .bulk_update (
180- packages_batch , ["metadata_sha256" ], batch_size = BATCH_SIZE
181- )
182- packages_batch .clear ()
183-
184- if artifact_batch :
185- Artifact .objects .bulk_create (artifact_batch , batch_size = BATCH_SIZE )
186- ContentArtifact .objects .bulk_create (contentartifact_batch , batch_size = BATCH_SIZE )
187- if packages_batch :
188- PythonPackageContent .objects .bulk_update (
189- packages_batch , ["metadata_sha256" ], batch_size = BATCH_SIZE
190- )
1914
5+ # This data migration didn't work, so turning into noop.
1926
1937class Migration (migrations .Migration ):
1948
@@ -198,7 +12,8 @@ class Migration(migrations.Migration):
19812
19913 operations = [
20014 migrations .RunPython (
201- create_missing_metadata_artifacts ,
15+ migrations . RunPython . noop ,
20216 reverse_code = migrations .RunPython .noop ,
17+ elidable = True ,
20318 ),
20419 ]
0 commit comments