Jinja2-3.1.2
------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


MarkupSafe-2.0.1
----------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


PyYAML-6.0.1
------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


aiohttp-3.9.5
-------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


async-timeout-4.0.2
-------------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


atomicwrites-1.4.0
------------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


attrs-21.4.0
------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


charset-normalizer-2.1.1
------------------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


colorama-0.4.4
--------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


cryptography-42.0.7
-------------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


idna-3.3
--------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


idna-ssl-1.1.0
--------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


importlib-metadata-7.2.1
------------------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


more-itertools-8.12.0
---------------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


multidict-5.2.0
---------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


packaging-24.0
--------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


pluggy-1.5.0
------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


py-1.11.0
---------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


pyjwt-2.6.0
-----------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


pyparsing-3.0.7
---------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


pytest-7.2.2
------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


pytest-aiohttp-0.3.0
--------------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


python-dateutil-2.9.0.post0
---------------------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


teamcity-messages-1.27
----------------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


typing_extensions-4.1.1
-----------------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


wcwidth-0.2.5
-------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


websockets-10.3
---------------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


yarl-1.7.2
----------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


zipp-3.6.0
----------
import argparse
import asyncio
import operator
import os
import platform
import re
import ssl
import sys
import tarfile
import urllib
import zipfile
from os.path import join, exists
import tempfile
from typing import Optional, Dict, Union, List
from queue import Queue

try:
    import aiohttp
except:
    if all([platform.uname().system == "Linux", platform.uname().machine == "aarch64"]):
        print("[Warning] Unable to import aiohttp on Linux aarch64. Its assumed that aarch64 builds are not public yet.")
        print("[Warning] This will be fixed in the near future but if your aarch64 build is being released please reach out in #ct-omni-repoman")
        sys.exit(0)
    raise

DependencyName = str
LicenseText = str

_session: Optional[aiohttp.ClientSession] = None

# License file names sorted by popularity on GitHub
# (from most popular to least)
license_names = [
    "LICENSE",
    "LICENSE.txt",
    "LICENSE.md",
    "LICENSE.rst",
    "LICENCE.mit",
    "LICENSE.mit",
    "LICENSE-MIT",
    "LICENSE-APACHE",
    "COPYING",
    "COPYING.txt",
    "COPYING.md",
    "COPYING.rst",
    "license",
    "license.txt",
    "license.md",
    "license.rst",
    "license.mit",
    "copying",
    "copying.txt",
    "copying.md",
    "copying.rst",
    # added manually to pair common english spellings
    "LICENCE",
    "LICENCE.txt",
    "LICENCE.md",
    "LICENCE.rst",
    "LICENCE-MIT",
    "LICENCE-APACHE",
    "licence",
    "licence.txt",
    "licence.md",
    "licence.rst",
    "licence.mit",
    "License.txt",  # used in nvidia-* pip packages
    "Licence.txt",
]

main_branches = [
    "master",
    "main",
    "latest",
]

PACKAGES_FILE = "packages_list.txt"

LICENSE_CACHE_URL = os.environ.get("REPO_LICENSING_CACHE_URL")
if LICENSE_CACHE_URL:
    LICENSE_CACHE_URL = LICENSE_CACHE_URL.rstrip("/")

LICENSE_CACHE_API_KEY = os.environ.get("REPO_LICENSING_CACHE_API_KEY")


async def get_licenses(
        requirements_path: str,
        pull_licenses: List[str] = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False
):
    global PACKAGES_FILE

    if ignore is None:
        ignore = []

    if not LICENSE_CACHE_URL:
        print("License cache is disabled -- REPO_LICENSING_CACHE_URL is not specified.")

    with open(requirements_path) as file:
        requirements = file.readlines()

    global _session
    async with aiohttp.ClientSession(headers={"User-Agent": "repo_licensing_pip"}) as _session:
        license_tasks = {}
        for requirement in requirements:
            requirement = requirement.strip()
            if not requirement[:1].isalnum():
                # Skip everything that does not start alphanumeric like: empty lines, pip arguments like -i and comments
                continue
            try:
                dependency, version = requirement.split("==", maxsplit=1)
            except ValueError:
                raise ValueError(f"{requirement} must contain a version.")

            markers = ""
            if ";" in version:
                # Version contains environment markers
                # https://www.python.org/dev/peps/pep-0508/#environment-markers
                version, markers = version.split(";")
                version = version.strip()

            # https://nvidia-omniverse.atlassian.net/browse/OM-39577
            # Handle additional args on the pip package line (for --extra-index type stuff)
            # ex1: "maglev.data==0.0.3 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local/simple --find-links https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            # ex2: "nvidiaapis==2.36.0 -f https://archives.nvda.ai/pypi/nvidiaapis/index.html",
            extra_pip_args = None
            if " " in version:
                version, extra_pip_args = [x.strip() for x in version.split(' ', 1)]

            if must_include_dependency(markers) and dependency not in ignore and (pull_licenses is None or dependency in pull_licenses):
                task = get_license(
                    dependency, version, download_dir, ignore, ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args
                )
                license_tasks[dependency] = asyncio.ensure_future(task)
            else:
                markers = markers.strip()
                if markers:
                    print(f"Skip {requirement} with {markers}.")
                else:
                    print(f"Skip {requirement}.")

        await asyncio.gather(*license_tasks.values())

        licenses = {}
        pending = Queue()
        for dependency, task in license_tasks.items():
            dependencies = task.result()
            if not dependencies:
                if dependency in ignore:
                    continue
                raise ValueError(f"License for {dependency} was not found.")

            for package, license_text in dependencies.items():
                if package in ignore:
                    continue

                if licenses.get(package):
                    continue

                if license_text:
                    licenses[package] = license_text

                    # Keep track of all licenses gathered
                    with open(PACKAGES_FILE, "a") as f:
                        f.write(f"{package}\n")

                if not license_text and package not in ignore:
                    pending.put((package, 0))
                    # raise ValueError(f"License for {package} was not found.")

        missing = []
        while pending.qsize() > 0:
            package, ct = pending.get(timeout=0.01)
            if licenses.get(package):
                print(f"\tverified found license for (after repeat {ct}): {package}")
                continue

            # max dependency nesting depth check to detect missing cases
            if ct > 3:
                missing.append(package)
                continue
            pending.put((package, ct+1))

        if len(missing) > 0:
            raise ValueError(f"Licenses not found for {missing}")

        dependencies = sorted(list(licenses.keys()))

        licenses = {dependency: licenses[dependency] for dependency in dependencies}
        for dependency, license_text in licenses.items():
            if license_text:
                check_gpl(dependency, license_text)
        return licenses


def must_include_dependency(markers: str) -> bool:
    """
    Parses environment markers and checks if dependency should be analyzed
    """
    if not markers:
        return True

    if re.match("\b(and|or)\b", markers):
        # Conditions require to find or implement a grammar parser
        # https://www.python.org/dev/peps/pep-0508/#complete-grammar
        raise ValueError("Conditions in environment markers are not supported yet.")

    # The order matters because operator is found by using 'in'
    operators = {
        ">=": operator.ge,
        "<=": operator.le,
        "<": operator.lt,
        ">": operator.gt,
        "==": operator.eq,
        "!=": operator.ne,
        "not in": lambda a, b: not operator.contains(a, b),
        "in": operator.contains,
    }

    for op, cmp in operators.items():
        if op in markers:
            break
    else:
        # No operators were found, include the dependency.
        return True

    marker, value = markers.split(op)
    marker, value = marker.strip(" '\""), value.strip(" '\"")

    bindings = get_marker_bindings()
    binding = bindings.get(marker)
    if binding:
        return cmp(binding, value)

    # Unknown marker binding -- skip marker and include the dependency
    return True


_bindings = {}


def get_marker_bindings():
    """
    Returns bindings for marker values which include os name, platform, python version and
    other system information.
    """

    def format_full_version(info):
        version = '{0.major}.{0.minor}.{0.micro}'.format(info)
        kind = info.releaselevel
        if kind != 'final':
            version += kind[0] + str(info.serial)
        return version

    global _bindings
    if _bindings:
        return _bindings

    if hasattr(sys, 'implementation'):
        implementation_version = format_full_version(sys.implementation.version)
        implementation_name = sys.implementation.name
    else:
        implementation_version = '0'
        implementation_name = ''

    _bindings = {
        'implementation_name': implementation_name,
        'implementation_version': implementation_version,
        'os_name': os.name,
        'platform_machine': platform.machine(),
        'platform_python_implementation': platform.python_implementation(),
        'platform_release': platform.release(),
        'platform_system': platform.system(),
        'platform_version': platform.version(),
        'python_full_version': platform.python_version(),
        'python_version': '.'.join(platform.python_version_tuple()[:2]),
        'sys_platform': sys.platform,
    }
    return _bindings


async def get_license_from_s3(package: str, version: str, package_manager="pip") -> str:
    try:
        response = await _session.get(f"https://package-metadata.s3.us-west-2.amazonaws.com/{package_manager}/{package}/{version}/license")
        body = await response.text()
        return body
    except:
        return ""


async def get_license(
        dependency: str,
        version: str,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    dependency_str = f"{dependency}-{version}"
    if download_dir:
        if os.path.exists(download_dir):
            # Check if package has already been downloaded
            for path in os.listdir(download_dir):
                if f"{dependency.replace('-', '_')}-{version}" in path:
                    license_text = await get_local_license(dependency, version, os.path.join(download_dir, path))
                    if license_text:
                        print(f"License for {dependency_str} found locally.")

                        await put_license_into_internal_cache(dependency, version, license_text)
                        return {dependency_str: license_text}
                    else:
                        license_text = await get_license_from_s3(dependency, version)
                        if license_text:
                            print(f"Downloaded a license for {dependency_str} from s3")

                            await put_license_into_internal_cache(dependency, version, license_text)
                            return {dependency_str: license_text}
                        print(f"No license available for {dependency_str}. Downloading a clean copy.")
                        tempdir = tempfile.TemporaryDirectory()
                        download_dir = tempdir.name

        licenses = await pip_download(dependency, version, download_dir, ignore,
                                      ignore_all_dependencies=ignore_all_dependencies, extra_pip_args=extra_pip_args)
        if licenses:
            print(f"Downloaded a license for {dependency_str}")
            return licenses
    else:
        license_text = await get_license_from_python_path(dependency, version)
        if license_text:
            print(f"License for {dependency_str} found locally.")

            await put_license_into_internal_cache(dependency, version, license_text)
            return {dependency_str: license_text}

    license_text = await fetch_license(dependency, version)
    if license_text:
        print(f"Downloaded a license for {dependency_str}")

        await put_license_into_internal_cache(dependency, version, license_text)
        return {dependency_str: license_text}
    print(f"No license found for: {dependency_str}")


dependency_regex = re.compile(r"^([\w\-]*)-([.\w]+).*")


async def pip_download(
        dependency: str, version: str, download_dir: str, ignore: List[str], ignore_all_dependencies: bool = False, extra_pip_args: Union[None, str] = None
) -> Dict[DependencyName, LicenseText]:
    if dependency in ignore:
        return {}

    additional_args = []
    if ignore_all_dependencies:
        # If there's no need to pull all dependencies,
        # then we can check the cache directly and see if license is available
        license_text = await fetch_license_from_internal_cache(dependency, version)
        if license_text:
            dependency_str = f"{dependency}-{version}"
            return {dependency_str: license_text}

        additional_args.append("--no-deps")
    if extra_pip_args is not None:
        additional_args.extend(extra_pip_args.split())

    download_dep_dir = join(download_dir, dependency)
    args = \
        [
            sys.executable,
            "-m",
            "pip",
            "download",
            f"{dependency}=={version}",
            "-d",
            download_dep_dir,
            "-q",
            "--disable-pip-version-check",
            *additional_args
        ]
    try:
        process = await asyncio.create_subprocess_exec(*args)
        await process.wait()
    except:
        raise Exception(f"Failed to find and download a license for {dependency}=={version} using pip.")

    licenses = {}
    for path in os.listdir(download_dep_dir):
        if os.path.isdir(join(download_dep_dir, path)):
            # Skip extracted folders.
            continue

        match = dependency_regex.match(path)
        if match:
            dependency_name = match.group(1)
            dependency_version = match.group(2)
            dependency_path = join(download_dep_dir, path)

            if dependency_name in ignore:
                continue

            archive_path, archive_ext = os.path.splitext(dependency_path)
            if archive_ext in (".gz", "bz", "bz2"):
                if dependency_path.endswith(".tar.gz") or dependency_path.endswith(".tgz"):
                    mode = "r:gz"
                elif dependency_path.endswith(".tar.bz2") or dependency_path.endswith(".tbz"):
                    mode = "r:bz2"
                else:
                    mode = "r"

                archive = tarfile.open(dependency_path, mode)
                archive.extractall(archive_path)
                archive.close()

                if archive_path.endswith(".tar"):
                    # Unpacking .tar.gz or .tar.bz2 will contain an extra .tar folder
                    archive_name = os.path.split(archive_path)[-1]
                    archive_path = join(archive_path, archive_name[: -len(".tar")])

            else:
                archive = zipfile.ZipFile(dependency_path)
                archive.extractall(archive_path)
                archive.close()

            dependency_str = f"{dependency_name}-{dependency_version}"
            licenses[dependency_str] = await get_local_license(dependency_name, dependency_version, archive_path)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await fetch_license(dependency_name, dependency_version)

            if not licenses[dependency_str]:
                licenses[dependency_str] = await get_license_from_s3(dependency_name, dependency_version)

            if licenses[dependency_str]:
                await put_license_into_internal_cache(dependency_name, dependency_version, licenses[dependency_str])
    return licenses


async def get_license_from_python_path(dependency: str, version: str) -> str:
    for python_path in sys.path:
        license_text = await get_local_license(dependency, version, python_path)
        if license_text:
            return license_text
    return ""


async def get_local_license(dependency: str, version: str, path: str) -> str:
    dependency_name = dependency.lower().replace("-", "_")
    package_meta_candidates = []
    package_path = ""

    if not os.path.isdir(path):
        return ""

    for dependency_path in os.listdir(path):
        if dependency_path.lower().startswith(f"{dependency_name}-{version}"):
            package_meta_candidates.append(join(path, dependency_path))
        elif dependency_path.lower() == dependency.lower():
            package_path = join(path, dependency_path)

    for package_meta in package_meta_candidates:
        if package_meta.endswith(".egg-info"):
            license_text = await egg_info_license(package_meta)
            if license_text:
                return license_text
        elif package_meta.endswith(".dist-info"):
            license_text = await dist_info_license(package_meta)
            if license_text:
                return license_text

    if package_path:
        # Try to find a license file in the package source code
        license_text = find_license_file(package_path)
        if license_text:
            return license_text

    # A license file can be in the specified folder itself.
    license_text = find_license_file(path)
    if license_text:
        return license_text

    return ""


async def egg_info_license(package_path: str) -> str:
    with open(join(package_path, "PKG-INFO")) as file:
        info = file.readlines()

    url = None
    for line in info:
        if line.startswith("Home-page:"):
            url = line[len("Home-page:"):].strip()
            break

    if url:
        return await fetch_from_github(url)


async def dist_info_license(package_path: str) -> str:
    license_text = find_license_file(package_path)
    if license_text:
        return license_text

    metadata_path = join(package_path, "METADATA")
    if exists(metadata_path):
        with open(metadata_path, "rb") as file:
            try:
                metadata = file.readlines()
            except Exception as exc:
                print(f"Could not open metadata file for: {package_path}")
                raise exc

        url = None
        for line in metadata:
            try:
                line = line.decode("utf-8")
                if line.startswith("Home-page:"):
                    url = line[len("Home-page:"):].strip()
                    break
            except UnicodeDecodeError:
                pass

        if url:
            return await fetch_from_github(url)


def find_license_file(package_path: str) -> str:
    license_path = ""

    if not exists(package_path) or not os.path.isdir(package_path):
        return ""

    licenses_folder = join(package_path, "licenses")
    if exists(licenses_folder):
        license_search_folder = licenses_folder
    else:
        license_search_folder = package_path

    license_regex = re.compile("(licen[cs]e|copying)", re.IGNORECASE)
    for path in os.listdir(license_search_folder):
        if os.path.isdir(join(license_search_folder, path)):
            continue
        # safe because the (possibly) case-sensitive path is what we have (unlike URL)
        #   verify if any version of the common licen[cs]e or copying names appear
        if license_regex.match(path):
            license_path = join(license_search_folder, path)
            break
        for license_name in license_names:
            if path == license_name:
                license_path = join(license_search_folder, path)
                break
        if license_path:
            break

    if license_path:
        # i want to try / except here because i want to know when we have issues reading files.
        try:
            with open(license_path, "r", encoding="utf8") as license_file:
                return license_file.read()
        except UnicodeDecodeError as exc:
            print(f"[Warning] Error decoding {license_path}. Ignoring errors.")
            with open(license_path, "r", encoding="utf8", errors="ignore") as license_file:
                return license_file.read()


async def fetch_license(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    license_text = await fetch_license_from_internal_cache(dependency, version)
    if license_text:
        return license_text

    response = await session.get(f"https://pypi.org/pypi/{dependency}/{version}/json", ssl=get_ssl_cert())
    if response.status != 200:
        return ""

    response_json = await response.json()
    home_page = response_json["info"]["home_page"]
    if not home_page:
        home_page = response_json["info"]["project_urls"]["Homepage"]
        if not home_page:
            raise ValueError(f"home_page is missing for {dependency}=={version}")

    return await fetch_from_github(home_page)


async def fetch_license_from_internal_cache(dependency: str, version: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.get(f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}")
        if response.status == 200:
            json = await response.json()
            print(f"Found license for {dependency}=={version} in internal cache.")
            return json.get("text")
        return ""
    except:
        return ""


async def fetch_from_github(url: str, session: aiohttp.ClientSession = None) -> str:
    if session is None:
        session = _session

    parsed_url = urllib.parse.urlparse(url)
    if "github" not in parsed_url.netloc:
        return

    url = parsed_url.path.strip("/")

    ssl_context = get_ssl_cert()

    # Guess the license file name to avoid using GitHub API with rate limit.
    for branch in main_branches:
        for license_name in license_names:
            response = await session.get(f"https://raw.githubusercontent.com/{url}/{branch}/{license_name}",
                                         ssl=ssl_context)
            if response.status == 200:
                return await response.text()

    # Couldn't guess the license name, fallback to GitHub API.
    response = await session.get(f"https://api.github.com/repos/{url}/license", ssl=ssl_context)
    if response.status != 200:
        return ""

    response_json = await response.json()
    download_url = response_json["download_url"]

    response = await session.get(download_url, ssl=ssl_context)
    return await response.text()


async def put_license_into_internal_cache(
    dependency: str, version: str, license_text: str, session: aiohttp.ClientSession = None
):
    if session is None:
        session = _session

    if not LICENSE_CACHE_URL or not LICENSE_CACHE_API_KEY:
        return ""

    dependency = dependency.replace("_", "-")
    try:
        response = await session.put(
            f"{LICENSE_CACHE_URL}/pip/{dependency}/{version}",
            headers={"X-Key": LICENSE_CACHE_API_KEY},
            json={
                "text": license_text
            }
        )
        if response.status == 201:
            print(f"Put license for {dependency}=={version} into internal cache.")
        elif response.status == 401:
            print(f"Invalid API key for putting {dependency}=={version} license into internal cache.")
    except:
        pass


def check_gpl(dependency: str, license_text: str):
    if license_text and re.match("\bL?GPL\b", license_text):
        print(f"[Error]: '{dependency}' license text mentions GPL or LGPL!")
        sys.exit(666)


def main(
        requirements_path: str,
        pull_licenses: List[str] = None,
        dest: str = None,
        download_dir: str = None,
        ignore: List[str] = None,
        ignore_all_dependencies: bool = False,
        overwrite=False
):
    # Ignore is just the package name, and does not include version data
    if ignore is None:
        ignore = []

    global PACKAGES_FILE
    PACKAGES_FILE = os.path.join(os.path.dirname(dest), PACKAGES_FILE)

    loop = asyncio.new_event_loop()
    try:
        licenses = loop.run_until_complete(
            get_licenses(requirements_path=requirements_path, pull_licenses=pull_licenses, download_dir=download_dir, ignore=ignore,
                         ignore_all_dependencies=ignore_all_dependencies)
        )

        license_text = "".join(
            [f"{dep}\n" f"{'-' * len(dep)}\n" f"{license_text}\n\n" for dep, license_text in licenses.items()]
        )

        if dest:
            mode = "w" if overwrite else "a+"
            with open(dest, mode, encoding="utf-8") as file:
                file.write(license_text)
        else:
            return license_text
    except Exception as exc:
        print(exc)
    finally:
        loop.close()


def get_ssl_cert() -> Union[ssl.SSLContext, bool]:
    try:
        import certifi

        return ssl.create_default_context(cafile=certifi.where())
    except (ImportError, ModuleNotFoundError):
        # Don't verify SSL certificate if certifi is not installed.
        return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--requirements", dest="requirements", type=str, default="requirements.txt")
    parser.add_argument("--pull-licenses", dest="pull_licenses", type=str, nargs="+",
        help="Specifies list of licenses that should be pulled from the requirements.txt list")
    parser.add_argument("-d", "--dest", dest="dest", type=str)
    parser.add_argument("--download-dir", dest="download_dir", type=str)
    parser.add_argument(
        "--ignore",
        dest="ignore",
        action="append",
        help="specifies dependency which licenses should be ignored. "
             "the script will not try to download license for ignored dependency.",
    )
    parser.add_argument(
        "--ignore-all-dependencies",
        dest="ignore_all",
        action="store_true",
        help="Ignores all dependencies of all requirements and will only pull down the licenses for the packages within the requirements."
             "The script will only get licenses for the packages within the list of requirements and ignore all of their dependencies.",
    )
    parser.add_argument(
        "--overwrite",
        dest="overwrite",
        action="store_true",
        help="Overwrites the output file if present, otherwise license texts are appended.",
    )
    args = parser.parse_args()

    text = main(args.requirements, args.pull_licenses, args.dest, args.download_dir, args.ignore, ignore_all_dependencies=args.ignore_all,
                overwrite=args.overwrite)
    if text:
        print(text)


Jinja2-3.1.2
------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


MarkupSafe-2.0.1
----------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


PyYAML-6.0.1
------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


aiohttp-3.9.5
-------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


async-timeout-4.0.2
-------------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


atomicwrites-1.4.0
------------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


attrs-21.4.0
------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


charset-normalizer-2.1.1
------------------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


colorama-0.4.4
--------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


cryptography-42.0.7
-------------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


idna-3.3
--------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


idna-ssl-1.1.0
--------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


importlib-metadata-7.2.1
------------------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


more-itertools-8.12.0
---------------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


multidict-5.2.0
---------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


packaging-24.0
--------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


pluggy-1.5.0
------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


py-1.11.0
---------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


pyjwt-2.6.0
-----------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


pyparsing-3.0.7
---------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


pytest-7.2.2
------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


pytest-aiohttp-0.3.0
--------------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


python-dateutil-2.9.0.post0
---------------------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


teamcity-messages-1.27
----------------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


typing_extensions-4.1.1
-----------------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


wcwidth-0.2.5
-------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


websockets-10.3
---------------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


yarl-1.7.2
----------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


zipp-3.6.0
----------
Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.


