Files
python-appimage/python_appimage/manylinux/download.py
2025-05-19 17:13:29 +02:00

146 lines
4.6 KiB
Python

import collections
from dataclasses import dataclass, field
import glob
import hashlib
import json
from pathlib import Path
import requests
import shutil
import tempfile
from typing import List, Optional
from .config import Arch, LinuxTag
from ..utils.log import debug, log
CHUNK_SIZE = 8189
SUCCESS = 200
class DownloadError(Exception):
pass
class TarError(Exception):
pass
@dataclass(frozen=True)
class Downloader:
'''Manylinux tag.'''
tag: LinuxTag
'''Platform architecture.'''
arch: Optional[Arch] = None
'''Docker image.'''
image: str = field(init=False)
'''Authentication token.'''
token: str = field(init=False)
def __post_init__(self):
# Set host arch if not explictly specified.
if self.arch is None:
arch = Arch.from_host()
object.__setattr__(self, 'arch', arch)
# Set image name.
image = f'{self.tag}_{self.arch}'
object.__setattr__(self, 'image', image)
def download(
self,
destination: Optional[Path]=None,
tag: Optional[str] = 'latest'):
destination = destination or Path(self.image)
# Authenticate to quay.io.
repository = f'pypa/{self.image}'
url = 'https://quay.io/v2/auth'
url = f'{url}?service=quay.io&scope=repository:{repository}:pull'
debug('GET', url)
r = requests.request('GET', url)
if r.status_code == SUCCESS:
object.__setattr__(self, 'token', r.json()['token'])
else:
raise DownloadError(r.status_code, r.text, r.headers)
# Fetch image manifest.
repository = f'pypa/{self.image}'
url = f'https://quay.io/v2/{repository}/manifests/{tag}'
headers = {
'Authorization': f'Bearer {self.token}',
'Accept': 'application/vnd.docker.distribution.manifest.v2+json'
}
debug('GET', url)
r = requests.request('GET', url, headers=headers)
if r.status_code == SUCCESS:
image_digest = r.headers['Docker-Content-Digest'].split(':', 1)[-1]
manifest = r.json()
else:
raise DownloadError(r.status_code, r.text, r.headers)
# Check missing layers to download.
required = [layer['digest'].split(':', 1)[-1] for layer in
manifest['layers']]
is_missing = lambda hash_: \
not (destination / f'layers/{hash_}.tar.gz').exists()
missing = tuple(filter(is_missing, required))
# Fetch missing layers.
with tempfile.TemporaryDirectory() as tmpdir:
workdir = Path(tmpdir)
for i, hash_ in enumerate(missing):
log('DOWNLOAD', f'{self.image} ({tag}) '
f'[{i + 1} / {len(missing)}]')
filename = f'{hash_}.tar.gz'
url = f'https://quay.io/v2/{repository}/blobs/sha256:{hash_}'
debug('GET', url)
r = requests.request('GET', url, headers=headers, stream=True)
if r.status_code == SUCCESS:
debug('STREAM', filename)
else:
raise DownloadError(r.status_code, r.text, r.headers)
hasher = hashlib.sha256()
tmp = workdir / 'layer.tgz'
with open(tmp, "wb") as f:
for chunk in r.iter_content(CHUNK_SIZE):
if chunk:
f.write(chunk)
hasher.update(chunk)
h = hasher.hexdigest()
if h != hash_:
raise DownloadError(
f'bad hash (expected {name}, found {h})'
)
layers_dir = destination / 'layers'
layers_dir.mkdir(exist_ok=True, parents=True)
shutil.move(tmp, layers_dir / filename)
tags_dir = destination / 'tags'
tags_dir.mkdir(exist_ok=True, parents=True)
with open(tags_dir / f'{tag}.json', "w") as f:
json.dump({'digest': image_digest, 'layers': required}, f)
# Remove unused layers.
required = set(required)
for tag in glob.glob(str(destination / 'tags/*.json')):
with open(tag) as f:
tag = json.load(f)
required |= set(tag["layers"])
required = [f'{hash_}.tar.gz' for hash_ in required]
for layer in glob.glob(str(destination / 'layers/*.tar.gz')):
layer = Path(layer)
if layer.name not in required:
debug('REMOVE', f'{self.image} [layer/{layer.stem}]')
layer.unlink()