Skip to content
280 changes: 152 additions & 128 deletions openstack_image_manager/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,101 @@
import requests
import ruamel.yaml
import typer
import yaml

app = typer.Typer()
DEBUBU_REGEX = r'<a href="([^"]+)/">(?:release-)?([0-9]+)(\-[0-9]+)?/</a>'

IMAGES = ["almalinux", "centos", "debian", "rockylinux", "ubuntu"]

def get_latest_default(shortname, latest_checksum_url, latest_url, checksum_type="sha256"):
result = requests.get(latest_checksum_url)
result.raise_for_status()

latest_filename = os.path.basename(urlparse(latest_url).path)
filename_pattern = None
if shortname in ["centos-stream-8", "centos-stream-9", "centos-7"]:
filename_pattern = latest_filename.replace("HEREBE", "")
filename_pattern = filename_pattern.replace("DRAGONS", "")

checksums = {}
for line in result.text.split("\n"):
cs = re.split("\s+", line) # noqa W605
if shortname in ["rocky-8", "rocky-9"]:
if len(cs) == 4 and cs[0] == "SHA256":
checksums[latest_filename] = cs[3]
elif shortname in ["centos-7"]:
if len(cs) == 2 and re.search(filename_pattern, cs[1]):
checksums[cs[1]] = cs[0]
elif shortname in ["centos-stream-8", "centos-stream-9"]:
if len(cs) == 4 and cs[0] == "SHA256" and re.search(
filename_pattern, cs[1][1:-1]
):
checksums[cs[1][1:-1]] = cs[3]
else:
if len(cs) == 2:
checksums[cs[1]] = cs[0]

if filename_pattern:
new_latest_filename = natsorted(checksums.keys())[-1]
new_latest_url = latest_url.replace(latest_filename, new_latest_filename)

logger.info(f"Latest URL is now {new_latest_url}")
logger.info(f"Latest filename is now {new_latest_filename}")

latest_filename = new_latest_filename
latest_url = new_latest_url

current_checksum = f"{checksum_type}:{checksums[latest_filename]}"
return current_checksum, latest_url, None


def resolve_debubu(base_url, rex=re.compile(DEBUBU_REGEX)):
result = requests.get(base_url)
result.raise_for_status()
latest_folder, latest_date, latest_build = sorted(rex.findall(result.text))[-1]
return latest_folder, latest_date, latest_build


def get_latest_debubu(shortname, latest_checksum_url, latest_url, checksum_type=None):
base_url, _, filename = latest_url.rsplit("/", 2)
latest_folder, latest_date, latest_build = resolve_debubu(base_url)
current_base_url = f"{base_url}/{latest_folder}"
current_checksum_url = f"{current_base_url}/{latest_checksum_url.rsplit('/', 1)[-1]}"
result = requests.get(current_checksum_url)
result.raise_for_status()
current_checksum = None
current_filename = filename
if latest_build: # Debian includes date-build in file name
fn_pre, fn_suf = filename.rsplit('.', 1)
current_filename = f"{fn_pre}-{latest_date}{latest_build}.{fn_suf}"
for line in result.text.splitlines():
cs = line.split()
if len(cs) != 2:
continue
if cs[1].startswith("*"): # Ubuntu has the asterisk in front of the name
cs[1] = cs[1][1:]
if cs[1] != current_filename:
continue
if checksum_type is None: # use heuristics to distinguish sha256/sha512
checksum_type = "sha256" if len(cs[0]) == 64 else "sha512"
current_checksum = f"{checksum_type}:{cs[0]}"
break
if current_checksum is None:
raise RuntimeError(f"{current_checksum_url} does not contain {current_filename}")
current_url = f"{current_base_url}/{current_filename}"
return current_checksum, current_url, latest_date


IMAGES = {
"almalinux": get_latest_default,
"centos": get_latest_default,
"debian": get_latest_debubu,
"rockylinux": get_latest_default,
"ubuntu": get_latest_debubu,
}


def mirror_image(
image, latest_url, minio_server, minio_bucket, minio_access_key, minio_secret_key
image, minio_server, minio_bucket, minio_access_key, minio_secret_key
):
client = Minio(
minio_server,
Expand All @@ -35,9 +121,8 @@ def mirror_image(
)

version = image["versions"][0]
version["source"] = latest_url

path = urlparse(version["source"])
path = urlparse(version["url"])
dirname = image["shortname"]
filename, fileextension = os.path.splitext(os.path.basename(path.path))

Expand All @@ -54,8 +139,8 @@ def mirror_image(
logger.info("'%s' available in '%s'" % (new_filename, dirname))
except S3Error:
logger.info("'%s' not yet available in '%s'" % (new_filename, dirname))
logger.info("Downloading '%s'" % version["source"])
response = requests.get(version["source"], stream=True)
logger.info("Downloading '%s'" % version["url"])
response = requests.get(version["url"], stream=True)
with open(os.path.basename(path.path), "wb") as fp:
shutil.copyfileobj(response.raw, fp)
del response
Expand All @@ -73,85 +158,22 @@ def mirror_image(
os.remove(filename)


def update_image(image, minio_server, minio_bucket, minio_access_key, minio_secret_key):
def update_image(image, getter, minio_server, minio_bucket, minio_access_key, minio_secret_key):
name = image["name"]
logger.info(f"Checking image {name}")

latest_url = image["latest_url"]
logger.info(f"Latest download URL is {latest_url}")

parsed_url = urlparse(latest_url)
latest_filename = os.path.basename(parsed_url.path)

latest_checksum_url = image["latest_checksum_url"]
logger.info(f"Getting checksums from {latest_checksum_url}")

shortname = image["shortname"]
current_checksum, current_url, current_version = getter(shortname, latest_checksum_url, latest_url)

result = requests.get(latest_checksum_url)
checksums = {}

checksum_type = "sha256"
filename_pattern = None

if image["shortname"] in ["centos-stream-8", "centos-stream-9", "centos-7"]:
filename_pattern = latest_filename.replace("HEREBE", "")
filename_pattern = filename_pattern.replace("DRAGONS", "")
elif image["shortname"] in ["debian-10", "debian-11", "debian-12"]:
checksum_type = "sha512"

for line in result.text.split("\n"):
if image["shortname"] in ["rocky-8", "rocky-9"]:
splitted_line = re.split("\s+", line) # noqa W605
if splitted_line[0] == "SHA256":
checksums[latest_filename] = splitted_line[3]
elif image["shortname"] in [
"ubuntu-14.04",
"ubuntu-16.04",
"ubuntu-16.04-minimal",
"ubuntu-18.04",
"ubuntu-18.04-minimal",
"ubuntu-20.04",
"ubuntu-20.04-minimal",
"ubuntu-22.04",
"ubuntu-22.04-minimal",
]:
splitted_line = re.split("\s+", line) # noqa W605
if len(splitted_line) == 2:
checksums[splitted_line[1][1:]] = splitted_line[0]
elif image["shortname"] in ["centos-7"]:
splitted_line = re.split("\s+", line) # noqa W605
if len(splitted_line) == 2:
if re.search(filename_pattern, splitted_line[1]):
checksums[splitted_line[1]] = splitted_line[0]
elif image["shortname"] in ["centos-stream-8", "centos-stream-9"]:
splitted_line = re.split("\s+", line) # noqa W605
if splitted_line[0] == "SHA256" and re.search(
filename_pattern, splitted_line[1][1:-1]
):
checksums[splitted_line[1][1:-1]] = splitted_line[3]
else:
splitted_line = re.split("\s+", line) # noqa W605
if len(splitted_line) == 2:
checksums[splitted_line[1]] = splitted_line[0]

if filename_pattern:
new_latest_filename = natsorted(checksums.keys())[-1]
new_latest_url = latest_url.replace(latest_filename, new_latest_filename)

logger.info(f"Latest URL is now {new_latest_url}")
logger.info(f"Latest filename is now {new_latest_filename}")

latest_filename = new_latest_filename
latest_url = new_latest_url
logger.info(f"Checksum of current {current_url.rsplit('/', 1)[-1]} is {current_checksum}")

current_checksum = f"{checksum_type}:{checksums[latest_filename]}"
logger.info(f"Checksum of current {latest_filename} is {current_checksum}")

try:
latest_version = image["versions"][0]
latest_checksum = latest_version["checksum"]
logger.info(f"Our checksum is {latest_checksum}")
except IndexError:
latest_checksum = None
if not image["versions"]:
logger.info("No image available so far")
image["versions"].append(
{
Expand All @@ -162,50 +184,48 @@ def update_image(image, minio_server, minio_bucket, minio_access_key, minio_secr
}
)

if latest_checksum != current_checksum:
logger.info(f"Checking {latest_url}")
latest_checksum = image["versions"][0]["checksum"]
logger.info(f"Our checksum is {latest_checksum}")

if latest_checksum == current_checksum:
logger.info(f"Image {name} is up-to-date, nothing to do")
return 0

if current_version is None:
logger.info(f"Checking {current_url}")

conn = urlopen(latest_url, timeout=30)
struct = time.strptime(
conn = urlopen(current_url, timeout=30)
dt = datetime.strptime(
conn.headers["last-modified"], "%a, %d %b %Y %H:%M:%S %Z"
)
dt = datetime.fromtimestamp(time.mktime(struct))

new_version = dt.strftime("%Y%m%d")
logger.info(f"New version is {new_version}")
image["versions"][0]["version"] = new_version

new_build_date = dt.strftime("%Y-%m-%d")
logger.info(f"New build date is {new_build_date}")
image["versions"][0]["build_date"] = dt.date()

logger.info(f"New checksum is {current_checksum}")
image["versions"][0]["checksum"] = current_checksum

shortname = image["shortname"]
format = image["format"]

minio_server = str(minio_server)
minio_bucket = str(minio_bucket)
new_url = f"https://{minio_server}/{minio_bucket}/{shortname}/{new_version}-{shortname}.{format}"
logger.info(f"New URL is {new_url}")
image["versions"][0]["mirror_url"] = new_url
image["versions"][0]["url"] = latest_url

mirror_image(
image,
latest_url,
minio_server,
minio_bucket,
minio_access_key,
minio_secret_key,
)
del image["versions"][0]["source"]
current_version = dt.strftime("%Y%m%d")

else:
logger.info(f"Image {name} is up-to-date, nothing to do")
new_values = {
"version": current_version,
"build_date": datetime.strptime(current_version, "%Y%m%d").date(),
"checksum": current_checksum,
"url": current_url,
}
logger.info(f"New values are {new_values}")
image["versions"][0].update(new_values)

return image
shortname = image["shortname"]
format = image["format"]

minio_server = str(minio_server)
minio_bucket = str(minio_bucket)
new_url = f"https://{minio_server}/{minio_bucket}/{shortname}/{current_version}-{shortname}.{format}"
logger.info(f"New URL is {new_url}")
image["versions"][0]["mirror_url"] = new_url

mirror_image(
image,
minio_server,
minio_bucket,
minio_access_key,
minio_secret_key,
)
return 1


@app.command()
Expand Down Expand Up @@ -235,25 +255,29 @@ def main(
)
logger.add(sys.stderr, format=log_fmt, level=level, colorize=True)

for image in IMAGES:
for image, getter in IMAGES.items():
p = f"etc/images/{image}.yml"

ryaml = ruamel.yaml.YAML()
with open(p) as fp:
data = yaml.safe_load(fp)
data = ryaml.load(fp)

updates = 0
for index, image in enumerate(data["images"]):
if "latest_url" in image:
updated_image = update_image(
image,
minio_server,
minio_bucket,
minio_access_key,
minio_secret_key,
)
data["images"][index] = updated_image

if "latest_url" not in image:
continue
updates += update_image(
image,
getter,
minio_server,
minio_bucket,
minio_access_key,
minio_secret_key,
)

if not updates:
continue
with open(p, "w+") as fp:
ryaml = ruamel.yaml.YAML()
ryaml.explicit_start = True
ryaml.indent(sequence=4, offset=2)
ryaml.dump(data, fp)
Expand Down