pmbootstrap-meow/pmb/helpers/http.py

# Copyright 2023 Oliver Smith
# SPDX-License-Identifier: GPL-3.0-or-later
import hashlib
import json
from pmb.helpers import logging
import os
from pathlib import Path
import shutil
import urllib.request

from pmb.core.context import get_context
import pmb.helpers.run

def cache_file(prefix: str, url: str) -> Path:
    prefix = prefix.replace("/", "_")
    return Path(f"{prefix}_{hashlib.sha256(url.encode('utf-8')).hexdigest()}")


def download(url, prefix, cache=True, loglevel=logging.INFO,
             allow_404=False):
    """Download a file to disk.

    :param url: the http(s) address of to the file to download
    :param prefix: for the cache, to make it easier to find (cache files
        get a hash of the URL after the prefix)
    :param cache: if True, and url is cached, do not download it again
    :param loglevel: change to logging.DEBUG to only display the download
        message in 'pmbootstrap log', not in stdout.
        We use this when downloading many APKINDEX files at once, no
        point in showing a dozen messages.
    :param allow_404: do not raise an exception when the server responds with a 404 Not Found error.
        Only display a warning on stdout (no matter if loglevel is changed).

    :returns: path to the downloaded file in the cache or None on 404
    """
    # Create cache folder
    context = get_context()
    if not os.path.exists(context.config.work / "cache_http"):
        pmb.helpers.run.user(["mkdir", "-p", context.config.work / "cache_http"])

    # Check if file exists in cache
    path = context.config.work / "cache_http" / cache_file(prefix, url)
    if os.path.exists(path):
        if cache:
            return path
        pmb.helpers.run.user(["rm", path])

    # Offline and not cached
    if context.offline:
        raise RuntimeError("File not found in cache and offline flag is"
                           f" enabled: {url}")

    # Download the file
    logging.log(loglevel, "Download " + url)
    try:
        with urllib.request.urlopen(url) as response:
            with open(path, "wb") as handle:
                shutil.copyfileobj(response, handle)
    # Handle 404
    except urllib.error.HTTPError as e:
        if e.code == 404 and allow_404:
            logging.warning("WARNING: file not found: " + url)
            return None
        raise

    # Return path in cache
    return path


def retrieve(url, headers=None, allow_404=False):
    """Fetch the content of a URL and returns it as string.

    :param url: the http(s) address of to the resource to fetch
    :param headers: dict of HTTP headers to use
    :param allow_404: do not raise an exception when the server responds with a
        404 Not Found error. Only display a warning

    :returns: str with the content of the response
    """
    # Download the file
    logging.verbose("Retrieving " + url)

    if headers is None:
        headers = {}

    req = urllib.request.Request(url, headers=headers)
    try:
        with urllib.request.urlopen(req) as response:
            return response.read()
    # Handle 404
    except urllib.error.HTTPError as e:
        if e.code == 404 and allow_404:
            logging.warning("WARNING: failed to retrieve content from: " + url)
            return None
        raise


def retrieve_json(*args, **kwargs):
    """Fetch the contents of a URL, parse it as JSON and return it.

    See retrieve() for the list of all parameters.
    """
    return json.loads(retrieve(*args, **kwargs))