Source code for esgprep.fetchtables.main

# -*- coding: utf-8 -*-

"""
    :platform: Unix
    :synopsis: Fetches ESGF configuration files from GitHub repository.

"""

import traceback

from constants import *
from context import ProcessingContext
from esgprep.utils.constants import GITHUB_API_PARAMETER
from esgprep.utils.github import *


[docs]def make_outdir(tables_dir, repository, reference=None): """ Build the output directory. :param str tables_dir: The CMOR tables directory submitted :param str repository: The GitHub repository name :param str reference: The GitHub reference name (tag or branch) """ outdir = os.path.join(tables_dir, repository) if reference: outdir = os.path.join(outdir, reference) # If directory does not already exist if not os.path.isdir(outdir): try: os.makedirs(outdir) Print.warning('{} created'.format(outdir)) except OSError as e: # If default tables directory does not exists and without write access msg = 'Cannot use "{}" (OSError {}: {}) -- '.format(outdir, e.errno, e.strerror) msg += 'Use "{}" instead.'.format(os.getcwd()) Print.warning(msg) outdir = os.path.join(os.getcwd(), repository) if reference: outdir = os.path.join(outdir, reference) if not os.path.isdir(outdir): os.makedirs(outdir) Print.warning('"{}" created'.format(outdir)) return outdir
[docs]def get_special_case(f, url, repo, ref, auth): """ Get a dictionary of (filename -> file_info) pairs to be used for named files in place of the file info from the general API call done for the directory. file_info should contain at least the elements 'sha' and 'download_url' """ url = url.format(repo) url += '/{}'.format(f) url += GITHUB_API_PARAMETER.format('ref', ref) r = gh_request_content(url=url, auth=auth) Print.debug('Set special GitHub reference -- "{}": "{}"'.format(f, ref)) return {f: r.json()}
[docs]def fetch_gh_ref(url, outdir, auth, keep, overwrite, backup_mode, filter, special_cases=None): """ Fetch all files for a single reference (e.g. tag or branch) of a GitHub repository """ # Get GitHub file content r = gh_request_content(url=url, auth=auth) files = dict([(f['name'], f) for f in r.json() if filter(f['name'])]) # Get number of files nfiles = len(files) if not nfiles: Print.warning('No files found on remote repository: {}'.format(url)) # Counter progress = 0 for f, info in files.items(): try: # Overwrite info by special cases ones if special_cases and f in special_cases.keys(): info = special_cases[f] # Set output file full path outfile = os.path.join(outdir, f) # Get checksum download_url = info['download_url'] sha = info['sha'] # Get GitHub file fetch(url=download_url, outfile=outfile, auth=auth, sha=sha, keep=keep, overwrite=overwrite, backup_mode=backup_mode) except KeyboardInterrupt: raise except Exception: download_url = info['download_url'] exc = traceback.format_exc().splitlines() msg = TAGS.FAIL + COLORS.HEADER(download_url) + '\n' msg += '\n'.join(exc) Print.exception(msg, buffer=True) finally: project = re.search(REPO_NAME_PATTERN, url).group(1).split('/')[-1] ref = url.split('=')[-1] progress += 1 percentage = int(progress * 100 / nfiles) msg = COLORS.OKBLUE('\rFetching {} tables from {} reference: '.format(project, ref)) msg += '{}% | {}/{} files'.format(percentage, progress, nfiles) Print.progress(msg) Print.progress('\n')
[docs]def run(args): """ Main process that: * Decide to fetch or not depending on file presence/absence and command-line arguments, * Gets the GitHub file content from full API URL, * Backups old file if desired, * Writes response into table file. :param ArgumentParser args: Parsed command-line arguments """ # Instantiate processing context manager with ProcessingContext(args) as ctx: for project in ctx.project: try: # Set repository name repo = REPO_PATTERN.format(project) # Get the list of available refs for that repository r = gh_request_content(url=ctx.ref_url.format(repo), auth=ctx.auth) refs = [os.path.basename(ref['url']) for ref in r.json()] # Get refs to fetch if hasattr(ctx, 'ref'): if ctx.ref not in refs: raise GitHubReferenceNotFound(ctx.ref, refs) fetch_refs = [ctx.ref] else: fetch_refs = filter(re.compile(ctx.ref_regex).match, refs) if not fetch_refs: raise GitHubReferenceNotFound(ctx.ref_regex.pattern, refs) Print.debug('GitHub Available reference(s): {}'.format(', '.join(sorted(refs)))) Print.info('Selected GitHub reference(s): {}'.format(', '.join(sorted(fetch_refs)))) # Get special case for CMIP6_CV.json file special_cases = get_special_case(f='CMIP6_CV.json', url=ctx.url, repo=repo, ref='master', auth=ctx.auth) # Fetch each ref for ref in fetch_refs: try: # Set reference url url = ctx.url.format(repo) if ref: url += GITHUB_API_PARAMETER.format('ref', ref) Print.debug('Fetch {} tables from "{}" GitHub reference'.format(project, ref)) # Build output directory if ctx.no_subfolder: outdir = make_outdir(ctx.tables_dir, repo) else: outdir = make_outdir(ctx.tables_dir, repo, ref) # Fetch GitHub reference fetch_gh_ref(url=url, outdir=outdir, auth=ctx.auth, keep=ctx.keep, overwrite=ctx.overwrite, backup_mode=ctx.backup_mode, filter=ctx.file_filter, special_cases=special_cases) except Exception: exc = traceback.format_exc().splitlines() msg = TAGS.FAIL msg += 'Fetching {} tables from {} GitHub reference'.format(COLORS.HEADER(project), COLORS.HEADER(ref)) + '\n' msg += '\n'.join(exc) Print.exception(msg, buffer=True) ctx.error = True except Exception: exc = traceback.format_exc().splitlines() msg = TAGS.FAIL msg += 'Fetching {} tables'.format(COLORS.HEADER(project)) + '\n' msg += '\n'.join(exc) Print.exception(msg, buffer=True) ctx.error = True # Flush buffer Print.flush() # Evaluate errors and exit with appropriated return code if ctx.error: sys.exit(1)