pmbootstrap-meow/pmb/helpers/http.py
Caleb Connolly 198f302a36
treewide: add a Chroot type and adopt pathlib.Path (MR 2252)
Introduce a new module: pmb.core to contain explicitly typed pmbootstrap
API. The first component being Suffix and SuffixType. This explicitly
defines what suffixes are possible, future changes should aim to further
constrain this API (e.g. by validating against available device
codenames or architectures for buildroot suffixes).

Additionally, migrate the entire codebase over to using pathlib.Path.
This is a relatively new part of the Python standard library that uses a
more object oriented model for path handling. It also uses strong type
hinting and has other features that make it much cleaner and easier to
work with than pure f-strings. The Chroot class overloads the "/"
operator the same way the Path object does, allowing one to write paths
relative to a given chroot as:

builddir = chroot / "home/pmos/build"

The Chroot class also has a string representation ("native", or
"rootfs_valve-jupiter"), and a .path property for directly accessing the
absolute path (as a Path object).

The general idea here is to encapsulate common patterns into type hinted
code, and gradually reduce the amount of assumptions made around the
codebase so that future changes are easier to implement.

As the chroot suffixes are now part of the Chroot class, we also
implement validation for them, this encodes the rules on suffix naming
and will cause a runtime exception if a suffix doesn't follow the rules.
2024-06-23 12:38:37 +02:00

99 lines
3.3 KiB
Python

# Copyright 2023 Oliver Smith
# SPDX-License-Identifier: GPL-3.0-or-later
import hashlib
import json
import logging
import os
import shutil
import urllib.request
from pmb.core.types import PmbArgs
import pmb.helpers.run
def download(args: PmbArgs, url, prefix, cache=True, loglevel=logging.INFO,
allow_404=False):
"""Download a file to disk.
:param url: the http(s) address of to the file to download
:param prefix: for the cache, to make it easier to find (cache files
get a hash of the URL after the prefix)
:param cache: if True, and url is cached, do not download it again
:param loglevel: change to logging.DEBUG to only display the download
message in 'pmbootstrap log', not in stdout.
We use this when downloading many APKINDEX files at once, no
point in showing a dozen messages.
:param allow_404: do not raise an exception when the server responds with a 404 Not Found error.
Only display a warning on stdout (no matter if loglevel is changed).
:returns: path to the downloaded file in the cache or None on 404
"""
# Create cache folder
if not os.path.exists(pmb.config.work / "cache_http"):
pmb.helpers.run.user(args, ["mkdir", "-p", pmb.config.work / "cache_http"])
# Check if file exists in cache
prefix = prefix.replace("/", "_")
path = (pmb.config.work / "cache_http/" + prefix + "_" +
hashlib.sha256(url.encode("utf-8")).hexdigest())
if os.path.exists(path):
if cache:
return path
pmb.helpers.run.user(args, ["rm", path])
# Offline and not cached
if args.offline:
raise RuntimeError("File not found in cache and offline flag is"
f" enabled: {url}")
# Download the file
logging.log(loglevel, "Download " + url)
try:
with urllib.request.urlopen(url) as response:
with open(path, "wb") as handle:
shutil.copyfileobj(response, handle)
# Handle 404
except urllib.error.HTTPError as e:
if e.code == 404 and allow_404:
logging.warning("WARNING: file not found: " + url)
return None
raise
# Return path in cache
return path
def retrieve(url, headers=None, allow_404=False):
"""Fetch the content of a URL and returns it as string.
:param url: the http(s) address of to the resource to fetch
:param headers: dict of HTTP headers to use
:param allow_404: do not raise an exception when the server responds with a
404 Not Found error. Only display a warning
:returns: str with the content of the response
"""
# Download the file
logging.verbose("Retrieving " + url)
if headers is None:
headers = {}
req = urllib.request.Request(url, headers=headers)
try:
with urllib.request.urlopen(req) as response:
return response.read()
# Handle 404
except urllib.error.HTTPError as e:
if e.code == 404 and allow_404:
logging.warning("WARNING: failed to retrieve content from: " + url)
return None
raise
def retrieve_json(*args, **kwargs):
"""Fetch the contents of a URL, parse it as JSON and return it.
See retrieve() for the list of all parameters.
"""
return json.loads(retrieve(*args, **kwargs))