Source code for projspec.proj.datapackage

import re

from projspec.proj import ProjectSpec, ParseFailed, ProjectExtra


[docs] class DataPackage(ProjectSpec): """A FrictionlessData datapackage spec""" icon = "📊" spec_doc = "https://datapackage.org/standard/data-package/#structure" # e.g., as exported by zenodo # only tabular data; docs suggest csv, xls, json filetypes; JSON # can be inline in the metadata. sqlite and yaml are also mentioned. def match(self) -> bool: return "datapackage.json" in self.proj.basenames def parse(self) -> None: from projspec.content import DescriptiveMetadata, License, TabularData import json with self.proj.fs.open(self.proj.basenames["datapackage.json"], "rt") as f: conf = json.load(f) self.contents["descriptive_metadata"] = DescriptiveMetadata( proj=self.proj, meta={ k: v for k, v in conf.items() if k in {"name", "title", "description"} }, ) if "licenses" in conf: lic = conf["licenses"][0] self.contents["license"] = License( proj=self.proj, shortname=lic["name"], url=lic.get("path"), ) if "resources" in conf: self.contents["frictionless_data"] = [ TabularData( proj=self.proj, name=_["name"], schema=_.get("schema", {}), ) for _ in conf["resources"] ] @staticmethod def _create(path: str) -> None: with open(path + "/datapackage.json", "wt") as f: # https://github.com/frictionlessdata/examples/tree/main/text-file f.write( """ { "name": "text-file", "title": "Text File Data Package", "description": "An example of a text file in a non-tabular data package", "licenses": [{ "name": "CC0-1.0", "path": "https://creativecommons.org/publicdomain/zero/1.0/" }], "resources": [{ "name": "text-file", "path": "text-file.txt", "title": "Text File Data Resource", "format": "txt" }] } """ )
[docs] class DVCRepo(ProjectSpec): """Git management of data assets within a repo""" icon = "🌿" spec_doc = "https://doc.dvc.org/command-reference/config" def match(self) -> bool: return ".dvc" in self.proj.basenames def parse(self) -> None: import configparser conf = {} for fn in ["config", "config.local"]: # latter config wins, if both exist parser = configparser.ConfigParser() try: with self.proj.fs.open(f"{self.proj.url}/.dvc/{fn}", "rt") as f: parser.read_file(f) conf.update(parser._sections) except (IOError, ValueError): pass self.contents["remotes"] = [ _.split(" ", 1)[1][1:-2] for _ in conf if _.startswith("'remote ") ]
# The `dvc` CLI has many possible actions class IntakeCatalog(ProjectExtra): icon = "📖" spec_doc = ( "https://intake.readthedocs.io/en/latest/api2.html#intake.readers.entry.Catalog" ) template = re.compile(r"^cat(alog)?\.y[a]?ml$") match: str def match(self) -> bool: matches = [_ for _ in self.proj.basenames if self.template.match(_)] if matches: self.match = matches[0] return True return False def parse(self) -> None: from projspec.content.data import IntakeSource import yaml with self.proj.fs.open(self.proj.basenames[self.match], "rt") as f: meta = yaml.safe_load(f) if "entries" not in meta and "sources" not in meta: raise ParseFailed("No entries found in catalog") if meta.get("version") == 2: self.contents["intake_source"] = [ IntakeSource(proj=self.proj, name=_) for _ in meta.get("entries", []) ] else: self.contents["intake_source"] = [ IntakeSource(proj=self.proj, name=_) for _ in meta.get("sources", []) ] @staticmethod def _create(path: str) -> None: with open(f"{path}/catalog.yaml", "w") as f: # doesn't actually create data f.write( """ aliases: {} data: 35b33d80d511b79c: datatype: intake.readers.datatypes:Text kwargs: storage_options: null url: text-file.txt metadata: {} user_parameters: {} entries: text: kwargs: data: '{data(35b33d80d511b79c)}' metadata: {} output_instance: builtins:str reader: intake.readers.readers:FileTextReader user_parameters: {} metadata: {} user_parameters: {} version: 2 """ )