import re
from typing import Literal, Collection, Generator, Tuple, Union, Dict, Optional, List, Iterator
import sys, os
from collections import Counter, defaultdict
import pandas as pd
from .corpus import Corpus
from .logger import LoggedClass
from .piece import Piece
from ._typing import FileDict, FileList, CorpusFnameTuple, ScoreFacets, FacetArguments, FileParsedTuple, FileDataframeTuple, ScoreFacet, AnnotationsFacet, Facet
from .utils import get_musescore, get_first_level_corpora, pretty_dict, resolve_dir, \
update_labels_cfg, available_views2str, path2parent_corpus, resolve_paths_argument, enforce_fname_index_for_metadata, File
from .view import View, create_view_from_parameters, DefaultView
[docs]class Parse(LoggedClass):
"""
Class for creating one or several :class:`~.corpus.Corpus` objects and performing actions on all of them.
"""
_deprecated_elements = [
"parsed_mscx",
"parsed_tsv",
"add_detached_annotations",
"count_annotation_layers",
"count_labels",
"get_lists",
"iter",
"parse_mscx",
"pieces",
"store_scores",
"update_metadata",
]
[docs] def __init__(self,
directory: Optional[Union[str, Collection[str]]] = None,
recursive: bool = True,
only_metadata_fnames: bool = True,
include_convertible: bool = False,
include_tsv: bool = True,
exclude_review: bool = True,
file_re: Optional[Union[str, re.Pattern]] = None,
folder_re: Optional[Union[str, re.Pattern]] = None,
exclude_re: Optional[Union[str, re.Pattern]] = None,
file_paths: Optional[Collection[str]] = None,
labels_cfg: dict = {},
ms=None,
**logger_cfg):
""" Initialize a Parse object and try to create corpora if directories and/or file paths are specified.
Args:
directory: Path to scan for corpora.
recursive: Pass False if you don't want to scan ``directory`` for subcorpora, but force making it a corpus instead.
only_metadata_fnames:
The default view excludes piece names that are not listed in the corpus' metadata.tsv file (e.g. when none was found).
Pass False to include all pieces regardless. This might be needed when setting ``recursive`` to False.
include_convertible:
The default view excludes scores that would need conversion to MuseScore format prior to parsing.
Pass True to include convertible scores in .musicxml, .midi, .cap or any other format that MuseScore 3 can open.
For on-the-fly conversion, however, the parameter ``ms`` needs to be set.
include_tsv: The default view includes TSV files. Pass False to disregard them and parse only scores.
exclude_review:
The default view excludes files and folders whose name contains 'review'.
Pass False to include these as well.
file_re: Pass a regular expression if you want to create a view filtering out all files that do not contain it.
folder_re: Pass a regular expression if you want to create a view filtering out all folders that do not contain it.
exclude_re: Pass a regular expression if you want to create a view filtering out all files or folders that contain it.
file_paths:
If ``directory`` is specified, the file names of these paths are used to create a filtering view excluding all other files.
Otherwise, all paths are expected to be part of the same parent corpus which will be inferred from the first path by looking for the first parent directory that
either contains a 'metadata.tsv' file or is a git. This parameter is deprecated and ``file_re`` should be used instead.
labels_cfg: Pass a configuration dict to detect only certain labels or change their output format.
ms:
If you pass the path to your local MuseScore 3 installation, ms3 will attempt to parse musicXML, MuseScore 2,
and other formats by temporarily converting them. If you're using the standard path, you may try 'auto', or 'win' for
Windows, 'mac' for MacOS, or 'mscore' for Linux. In case you do not pass the 'file_re' and the MuseScore executable is
detected, all convertible files are automatically selected, otherwise only those that can be parsed without conversion.
**logger_cfg: Keyword arguments for changing the logger configuration. E.g. ``level='d'`` to see all debug messages.
"""
if 'level' not in logger_cfg or (logger_cfg['level'] is None):
logger_cfg['level'] = 'w'
super().__init__(subclass='Parse', logger_cfg=logger_cfg)
self.corpus_paths: Dict[str, str] = {}
"""{corpus_name -> path} dictionary with each corpus's base directory. Generally speaking, each corpus path is expected to contain a ``metadata.tsv`` and, maybe, to be a git.
"""
self.corpus_objects: Dict[str, Corpus] = {}
"""{corpus_name -> Corpus} dictionary with one object per :attr:`corpus_path <corpus_paths>`.
"""
self._ms = get_musescore(ms, logger=self.logger)
self._views: dict = {}
initial_view = create_view_from_parameters(only_metadata_fnames=only_metadata_fnames,
include_convertible=include_convertible,
include_tsv=include_tsv,
exclude_review=exclude_review,
file_paths=file_paths,
file_re=file_re,
folder_re=folder_re,
exclude_re=exclude_re,
level=self.logger.getEffectiveLevel())
self._views[None] = initial_view
if initial_view.name != 'default':
self._views['default'] = DefaultView(level=self.logger.getEffectiveLevel())
self._views['all'] = View(level=self.logger.getEffectiveLevel())
#
# self._ignored_warnings = defaultdict(list)
# """:obj:`collections.defaultdict`
# {'logger_name' -> [(message_id), ...]} This dictionary stores the warnings to be ignored
# upon loading them from an IGNORED_WARNINGS file.
# """
#
self.labels_cfg = {
'staff': None,
'voice': None,
'harmony_layer': None,
'positioning': False,
'decode': True,
'column_name': 'label',
'color_format': None,
}
""":obj:`dict`
Configuration dictionary to determine the output format of :py:attr:`~.score.Score.labels` and
:py:attr:`~.score.Score.expanded` tables. The dictonary is passed to :py:attr:`~.score.Score` upon parsing.
"""
self.labels_cfg.update(update_labels_cfg(labels_cfg, logger=self.logger))
if directory is not None:
if isinstance(directory, str):
directory = [directory]
for d in directory:
self.add_dir(directory=d, recursive=recursive)
if file_paths is not None:
self.add_files(file_paths)
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%% END of __init__() %%%%%%%%%%%%%%%%%%%%%%%%%%%%%#
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%#
@property
def ms(self) -> str:
"""Path or command of the local MuseScore 3 installation if specified by the user and recognized."""
return self._ms
@ms.setter
def ms(self, ms: Union[str, Literal['auto', 'win', 'mac']]):
executable = get_musescore(ms, logger=self.logger)
if executable is None:
raise FileNotFoundError(f"'{ms}' did not lead me to a MuseScore executable.")
if executable is not None:
self._ms = executable
for _, corpus in self.__iter__():
corpus.ms = executable
@property
def n_detected(self) -> int:
"""Number of detected files aggregated from all :class:`~.corpus.Corpus` objects without taking views into account. Excludes metadata files."""
return sum(corpus.n_detected for _, corpus in self)
@property
def n_orphans(self) -> int:
"""Number of files that are always disregarded because they could not be attributed to any of the fnames."""
return sum(len(corpus.ix2orphan_file) for _, corpus in self)
@property
def n_parsed(self) -> int:
"""Number of parsed files aggregated from all :class:`~.corpus.Corpus` objects without taking views into account. Excludes metadata files."""
return sum(corpus.n_parsed for _, corpus in self)
@property
def n_parsed_scores(self) -> int:
"""Number of parsed scores aggregated from all :class:`~.corpus.Corpus` objects without taking views into account. Excludes metadata files."""
return sum(corpus.n_parsed_scores for _, corpus in self)
@property
def n_parsed_tsvs(self) -> int:
"""Number of parsed TSV files aggregated from all :class:`~.corpus.Corpus` objects without taking views into account. Excludes metadata files."""
return sum(corpus.n_parsed_tsvs for _, corpus in self)
@property
def n_pieces(self) -> int:
"""Number of all available pieces ('fnames'), independent of the view."""
return sum(corpus.n_pieces for _, corpus in self)
@property
def n_unparsed_scores(self) -> int:
"""Number of all detected but not yet parsed scores, aggregated from all :class:`~.corpus.Corpus` objects without taking views into account. Excludes metadata files."""
return sum(corpus.n_unparsed_scores for _, corpus in self)
@property
def n_unparsed_tsvs(self) -> int:
"""Number of all detected but not yet parsed TSV files, aggregated from all :class:`~.corpus.Corpus` objects without taking views into account. Excludes metadata files."""
return sum(corpus.n_unparsed_tsvs for _, corpus in self)
@property
def view(self) -> View:
"""Retrieve the current View object. Shorthand for :meth:`get_view`."""
return self.get_view()
@view.setter
def view(self, new_view: View):
if not isinstance(new_view, View):
return TypeError("If you want to switch to an existing view, use its name like an attribute or "
"call _.switch_view().")
self.set_view(new_view)
@property
def views(self) -> None:
"""Display a short description of the available views."""
print(pretty_dict({"[active]" if k is None else k: v for k, v in self._views.items()}, "view_name", "Description"))
@property
def view_name(self) -> str:
"""Get the name of the active view."""
return self.get_view().name
@view_name.setter
def view_name(self, new_name):
view = self.get_view()
view.name = new_name
@property
def view_names(self):
return {view.name if name is None else name for name, view in self._views.items()}
[docs] def add_corpus(self,
directory: str,
corpus_name: Optional[str] = None,
only_metadata_fnames: Optional[bool] = None,
include_convertible: Optional[bool] = None,
include_tsv: Optional[bool] = None,
exclude_review: Optional[bool] = None,
file_re: Optional[Union[str, re.Pattern]] = None,
folder_re: Optional[Union[str, re.Pattern]] = None,
exclude_re: Optional[Union[str, re.Pattern]] = None,
paths: Optional[Collection[str]] = None,
**logger_cfg) -> None:
"""
This method creates a :class:`~.corpus.Corpus` object which scans the directory ``directory`` for parseable files.
It inherits all :class:`Views <.view.View>` from the Parse object.
Args:
directory: Directory to scan for files.
corpus_name:
By default, the folder name of ``directory`` is used as name for this corpus. Pass a string to
use a different identifier.
**logger_cfg:
Keyword arguments for configuring the logger of the new Corpus object. E.g. ``level='d'`` to see all debug messages.
Note that the logger is a child logger of this Parse object's logger and propagates, so it might filter debug messages.
You can use _.change_logger_cfg(level='d') to change the level post hoc.
"""
directory = resolve_dir(directory)
if not os.path.isdir(directory):
self.logger.warning(f"{directory} is not an existing directory.")
return
new_logger_cfg = dict(self.logger_cfg)
new_logger_cfg.update(logger_cfg)
if corpus_name is None:
corpus_name = os.path.basename(directory).strip(r"\/")
logger_cfg['name'] = self.logger.name + '.' + corpus_name.replace('.', '')
view_params = any(param is not None for param in (paths, file_re, folder_re, exclude_re, only_metadata_fnames, include_tsv, exclude_review, include_convertible))
if view_params:
initial_view = create_view_from_parameters(only_metadata_fnames=only_metadata_fnames,
include_convertible=include_convertible,
include_tsv=include_tsv,
exclude_review=exclude_review,
file_paths=paths,
file_re=file_re,
folder_re=folder_re,
exclude_re=exclude_re,
level=self.logger.getEffectiveLevel())
self._views[initial_view.name] = initial_view
else:
initial_view = self.get_view()
try:
corpus = Corpus(directory=directory, view=initial_view, labels_cfg=self.labels_cfg, ms=self.ms, **logger_cfg)
except AssertionError:
self.logger.debug(f"{directory} contains no parseable files.")
return
corpus.set_view(**{view_name: view for view_name, view in self._views.items() if view_name is not None})
if len(corpus.files) == 0:
self.logger.info(f"No parseable files detected in {directory}. Skipping...")
return
if corpus_name is None:
corpus_name = os.path.basename(directory).strip(r'\/')
if corpus_name in self.corpus_paths:
existing_path = self.corpus_paths[corpus_name]
if existing_path == directory:
self.logger.warning(f"Corpus '{corpus_name}' had already been present and was overwritten, i.e., reset.")
else:
self.logger.warning(f"Corpus '{corpus_name}' had already been present for the path {existing_path} and "
f"was replaced by {directory}")
self.corpus_paths[corpus_name] = directory
self.corpus_objects[corpus_name] = corpus
[docs] def add_dir(self, directory: str,
recursive: bool = True,
only_metadata_fnames: Optional[bool] = None,
include_convertible: Optional[bool] = None,
include_tsv: Optional[bool] = None,
exclude_review: Optional[bool] = None,
file_re: Optional[Union[str, re.Pattern]] = None,
folder_re: Optional[Union[str, re.Pattern]] = None,
exclude_re: Optional[Union[str, re.Pattern]] = None,
paths: Optional[Collection[str]] = None,
**logger_cfg) -> None:
"""
This method decides if the directory ``directory`` contains several corpora or if it is a corpus
itself, and calls :meth:`add_corpus` for each corpus.
Args:
directory: Directory to scan for corpora.
recursive:
By default, if any of the first-level subdirectories contains a 'metadata.tsv' or is a git, all first-level
subdirectories of ``directory`` are treated as corpora, i.e. one :class:`~.corpus.Corpus` object per folder is created.
Pass False to prevent this, which is equivalent to calling :meth:`add_corpus(directory) <add_corpus>`
**logger_cfg:
Keyword arguments for configuring the logger of the new Corpus objects. E.g. ``level='d'`` to see all debug messages.
Note that the loggers are child loggers of this Parse object's logger and propagate, so it might filter debug messages.
You can use _.change_logger_cfg(level='d') to change the level post hoc.
"""
directory = resolve_dir(directory)
if not os.path.isdir(directory):
self.logger.warning(f"{directory} is not an existing directory.")
return
L = locals()
arguments = ('only_metadata_fnames', 'include_convertible', 'include_tsv', 'exclude_review', 'file_re', 'folder_re', 'exclude_re', 'paths')
corpus_config = {arg: L[arg] for arg in arguments if L[arg] is not None}
if 'level' not in logger_cfg:
logger_cfg['level'] = self.logger.getEffectiveLevel()
corpus_config.update(logger_cfg)
if not recursive:
self.add_corpus(directory=directory,
**corpus_config)
return
# new corpus/corpora to be added
subdir_corpora = sorted(get_first_level_corpora(directory, logger=self.logger))
n_corpora = len(subdir_corpora)
if n_corpora == 0:
self.logger.debug(f"Treating {directory} as corpus.")
self.add_corpus(directory,
**corpus_config)
else:
self.logger.debug(f"{n_corpora} individual corpora detected in {directory}.")
for corpus_path in subdir_corpora:
self.add_corpus(corpus_path,
**corpus_config)
[docs] def add_files(self, file_paths: Union[str, Collection[str]], corpus_name: Optional[str] = None) -> None:
"""
Deprecated: To deal with particular files only, use :meth:`add_corpus` passing the directory containing them and
configure the :class`~.view.View` accordingly. This method here does it for you but easily leads to unexpected behaviour.
It expects the file paths to point to files located in a shared corpus folder
on some higher level or in folders for which :class:`~.corpus.Corpus` objects have already been created.
Args:
file_paths: Collection of file paths. Only existing files can be added.
corpus_name:
* By default, I will try to attribute the files to existing :class:`~.corpus.Corpus` objects based on their paths. This makes sense only when new files have
been created after the directories were scanned.
* For paths that do no not contain an existing corpus_path, I will try to detect the parent directory that is a corpus (based on it being a git or containing a ``metadata.tsv``).
If this is without success for the first path, I will raise an error. Otherwise, all subsequent paths will be considered to be part of that same corpus (watch out
meaningless relative paths!).
* You can pass a folder name contained in the first path to create a new corpus, assuming that all other paths are contained in it (watch out meaningless relative paths!).
* Pass an existing corpus_name to add the files to a particular corpus. Note that all parseable files under the corpus_path are detected anyway, and if you add files
from other directories, it will lead to invalid relative paths that work only on your system. If you're adding files that have been created after the Corpus object
has, you can leave this parameter empty; paths will be attributed to the existing corpora automatically.
"""
resolved_paths = resolve_paths_argument(file_paths, logger=self.logger)
if len(resolved_paths) == 0:
return
if corpus_name is None:
add_to_existing = defaultdict(list)
no_parent = []
for path in resolved_paths:
part_of = None
for corpus_name, corpus_path in self.corpus_paths.items():
if corpus_path in path:
part_of = corpus_name
add_to_existing[part_of].append(path)
break
if part_of is None:
no_parent.append(path)
for corpus_name, file_paths in add_to_existing.items():
self.get_corpus(corpus_name).add_file_paths(file_paths)
if len(no_parent) > 0:
# paths are expected to be contained in one and the same corpus directory
first_path = no_parent[0]
directory = path2parent_corpus(first_path)
if directory is None:
raise ValueError(f"No parent of {first_path} has been recognized as a corpus by being a git or containing a 'metadata.tsv'. Use _.add_corpus()")
self.add_corpus(directory)
elif corpus_name in self.corpus_paths:
self.get_corpus(corpus_name).add_file_paths(file_paths)
else:
# find the path according to the corpus_name
first_path = resolved_paths[0]
if corpus_name in first_path:
while True:
tmp_path, last_component = os.path.split(first_path)
if tmp_path == first_path:
# reached the root
first_path = ''
break
if last_component == corpus_name:
# first_path is the corpus first_path
break
first_path = tmp_path
else:
first_path == ''
if first_path == '':
raise ValueError(f"corpus_name needs to be a folder contained in the first path, but '{corpus_name}' isn't.")
self.add_corpus(first_path)
# corpus = self.get_corpus(corpus_name)
# new_view = create_view_from_parameters(only_metadata_fnames=False, exclude_review=False, paths=paths)
# corpus.set_view(new_view)
def color_non_chord_tones(self,
color_name: str = 'red',
view_name: Optional[str] = None,
force: bool = False,
choose: Literal['all', 'auto', 'ask'] = 'all', ) -> Dict[CorpusFnameTuple, List[FileDataframeTuple]]:
result = {}
for corpus_name, corpus in self.iter_corpora(view_name):
fname2reports = corpus.color_non_chord_tones(color_name,
view_name=view_name,
force=force,
choose=choose)
result.update({(corpus_name, fname): report for fname, report in fname2reports.items()})
return result
[docs] def change_labels_cfg(self, labels_cfg={}, staff=None, voice=None, harmony_layer=None, positioning=None, decode=None, column_name=None, color_format=None):
""" Update :obj:`Parse.labels_cfg` and retrieve new 'labels' tables accordingly.
Parameters
----------
labels_cfg : :obj:`dict`
Using an entire dictionary or, to change only particular options, choose from:
staff, voice, harmony_layer, positioning, decode, column_name
Arguments as they will be passed to :py:meth:`~ms3.annotations.Annotations.get_labels`
"""
keys = ['staff', 'voice', 'harmony_layer', 'positioning', 'decode', 'column_name', 'color_format']
for k in keys:
val = locals()[k]
if val is not None:
labels_cfg[k] = val
updated = update_labels_cfg(labels_cfg, logger=self.logger)
self.labels_cfg.update(updated)
for score in self._parsed_mscx.values():
score.change_labels_cfg(labels_cfg=updated)
ids = list(self._labellists.keys())
if len(ids) > 0:
self._extract_and_cache_dataframes(ids=ids, labels=True)
[docs] def compare_labels(self,
key: str = 'detached',
new_color: str = 'ms3_darkgreen',
old_color: str = 'ms3_darkred',
detached_is_newer: bool = False,
add_to_rna: bool = True,
view_name: Optional[str] = None) -> Tuple[int, int]:
""" Compare detached labels ``key`` to the ones attached to the Score to create a diff.
By default, the attached labels are considered as the reviewed version and labels that have changed or been added
in comparison to the detached labels are colored in green; whereas the previous versions of changed labels are
attached to the Score in red, just like any deleted label.
Args:
key: Key of the detached labels you want to compare to the ones in the score.
new_color, old_color:
The colors by which new and old labels are differentiated. Identical labels remain unchanged. Colors can be
CSS colors or MuseScore colors (see :py:attr:`utils.MS3_COLORS`).
detached_is_newer:
Pass True if the detached labels are to be added with ``new_color`` whereas the attached changed labels
will turn ``old_color``, as opposed to the default.
add_to_rna:
By default, new labels are attached to the Roman Numeral layer.
Pass False to attach them to the chord layer instead.
Returns:
Number of scores in which labels have changed.
Number of scores in which no label has chnged.
"""
changed, unchanged = 0, 0
for _, corpus in self.iter_corpora(view_name=view_name):
c, u = corpus.compare_labels(key=key,
new_color=new_color,
old_color=old_color,
detached_is_newer=detached_is_newer,
add_to_rna=add_to_rna,
view_name=view_name)
changed += c
unchanged += u
return changed, unchanged
def count_changed_scores(self, view_name: Optional[str] = None):
return sum(corpus.count_changed_scores() for _, corpus in self.iter_corpora(view_name))
[docs] def count_extensions(self,
view_name: Optional[str] = None,
per_piece: bool = False,
include_metadata: bool = False,
):
""" Count file extensions.
Parameters
----------
keys : :obj:`str` or :obj:`~collections.abc.Collection`, optional
Key(s) for which to count file extensions. By default, all keys are selected.
ids : :obj:`~collections.abc.Collection`
If you pass a collection of IDs, ``keys`` is ignored and only the selected extensions are counted.
per_key : :obj:`bool`, optional
If set to True, the results are returned as a dict {key: Counter},
otherwise the counts are summed up in one Counter.
per_subdir : :obj:`bool`, optional
If set to True, the results are returned as {key: {subdir: Counter} }. ``per_key=True`` is therefore implied.
Returns
-------
:obj:`dict`
By default, the function returns a Counter of file extensions (Counters are converted to dicts).
If ``per_key`` is set to True, a dictionary {key: Counter} is returned, separating the counts.
If ``per_subdir`` is set to True, a dictionary {key: {subdir: Counter} } is returned.
"""
extension_counters = {corpus_name: corpus.count_extensions(view_name, include_metadata=include_metadata) for corpus_name, corpus in self.iter_corpora(view_name)}
if per_piece:
return {(corpus_name, fname): dict(cnt) for corpus_name, fname2cnt in extension_counters.items() for fname, cnt in fname2cnt.items()}
return {corpus_name: dict(sum(fname2cnt.values(), Counter())) for corpus_name, fname2cnt in extension_counters.items()}
def count_files(self,
detected=True,
parsed=True,
as_dict: bool = False,
drop_zero: bool = True,
view_name: Optional[str] = None) -> Union[pd.DataFrame, dict]:
all_counts = {corpus_name: corpus._summed_file_count(types=detected, parsed=parsed, view_name=view_name) for corpus_name, corpus in self.iter_corpora(view_name=view_name)}
counts_df = pd.DataFrame.from_dict(all_counts, orient='index', dtype='Int64')
if drop_zero:
empty_cols = counts_df.columns[counts_df.sum() == 0]
counts_df = counts_df.drop(columns=empty_cols)
if as_dict:
return counts_df.to_dict(orient='index')
counts_df.index.rename('corpus', inplace=True)
return counts_df
def count_parsed_scores(self, view_name: Optional[str] = None) -> int:
return sum(map(len, self._get_parsed_score_files(view_name=view_name).values()))
def count_parsed_tsvs(self, view_name: Optional[str] = None) -> int:
return sum(map(len, self._get_parsed_tsv_files(view_name=view_name).values()))
def count_unparsed_scores(self, view_name: Optional[str] = None) -> int:
return sum(map(len, self._get_parsed_score_files(view_name=view_name).values()))
def count_unparsed_tsvs(self, view_name: Optional[str] = None) -> int:
return sum(map(len, self._get_parsed_tsv_files(view_name=view_name).values()))
def create_missing_metadata_tsv(self,
view_name: Optional[str] = None) -> None:
for corpus_name, corpus in self.iter_corpora(view_name=view_name):
if corpus.metadata_tsv is None:
path = corpus.create_metadata_tsv()
[docs] def disambiguate_facet(self,
facet: Facet,
view_name: Optional[str] = None,
ask_for_input=True) -> None:
"""Calls the method on every selected corpus."""
for name, corpus in self.iter_corpora(view_name):
corpus.disambiguate_facet(facet=facet,
view_name=view_name,
ask_for_input=ask_for_input)
def extract_facets(self,
facets: ScoreFacets = None,
view_name: Optional[str] = None,
force: bool = False,
choose: Literal['auto', 'ask'] = 'auto',
unfold: bool = False,
interval_index: bool = False,
flat=False,
concatenate=True) -> Union[pd.DataFrame, Dict[CorpusFnameTuple, Union[Dict[str, List[FileDataframeTuple]], List[FileDataframeTuple]]]]:
return self._aggregate_corpus_data('extract_facets',
facets=facets,
view_name=view_name,
force=force,
choose=choose,
unfold=unfold,
interval_index=interval_index,
flat=flat,
concatenate=concatenate,
)
def get_all_parsed(self, facets: FacetArguments = None,
view_name: Optional[str] = None,
force: bool = False,
choose: Literal['all', 'auto', 'ask'] = 'all',
flat: bool = False,
include_empty=False,
concatenate: bool = True,
) -> Union[pd.DataFrame, Dict[CorpusFnameTuple, Union[Dict[str, FileParsedTuple], List[FileParsedTuple]]]]:
return self._aggregate_corpus_data('get_all_parsed',
facets=facets,
view_name=view_name,
force=force,
choose=choose,
flat=flat,
include_empty=include_empty,
concatenate=concatenate)
def get_corpus(self, name) -> Corpus:
assert name in self.corpus_objects, f"Don't have a corpus called '{name}', only {list(self.corpus_objects.keys())}"
return self.corpus_objects[name]
[docs] def get_dataframes(self,
notes: bool = False,
rests: bool = False,
notes_and_rests: bool = False,
measures: bool = False,
events: bool = False,
labels: bool = False,
chords: bool = False,
expanded: bool = False,
form_labels: bool = False,
cadences: bool = False,
view_name: Optional[str] = None,
force: bool = False,
choose: Literal['all', 'auto', 'ask'] = 'all',
unfold: bool = False,
interval_index: bool = False,
flat=False,
include_empty: bool = False,
) -> Union[pd.DataFrame, Dict[CorpusFnameTuple, Union[Dict[str, List[FileDataframeTuple]], List[FileDataframeTuple]]]]:
"""Renamed to :meth:`get_facets`."""
l = locals()
facets = [facet for facet in ScoreFacet.__args__ if l[facet]]
return self.get_facets(facets=facets,
view_name=view_name,
force=force,
choose=choose,
unfold=unfold,
interval_index=interval_index,
flat=flat,
include_empty=include_empty)
[docs] def get_facet(self,
facet: ScoreFacet,
view_name: Optional[str] = None,
choose: Literal['auto', 'ask'] = 'auto',
unfold: bool = False,
interval_index: bool = False,
concatenate: bool = True,
) -> Union[Dict[str, FileDataframeTuple], pd.DataFrame]:
"""Retrieves exactly one DataFrame per piece, if available."""
return self._aggregate_corpus_data('get_facet',
facet=facet,
view_name=view_name,
choose=choose,
unfold=unfold,
interval_index=interval_index,
concatenate=concatenate,
)
def get_facets(self,
facets: ScoreFacets = None,
view_name: Optional[str] = None,
force: bool = False,
choose: Literal['all', 'auto', 'ask'] = 'all',
unfold: bool = False,
interval_index: bool = False,
flat=False,
include_empty=False,
concatenate=True,
) -> Union[pd.DataFrame, Dict[CorpusFnameTuple, Union[Dict[str, List[FileDataframeTuple]], List[FileDataframeTuple]]]]:
return self._aggregate_corpus_data('get_facets',
facets=facets,
view_name=view_name,
force=force,
choose=choose,
unfold=unfold,
interval_index=interval_index,
flat=flat,
include_empty=include_empty,
concatenate=concatenate,
)
def get_files(self, facets: FacetArguments = None,
view_name: Optional[str] = None,
parsed: bool = True,
unparsed: bool = True,
choose: Literal['all', 'auto', 'ask'] = 'all',
flat: bool = False,
include_empty=False,
) -> Dict[CorpusFnameTuple, Union[FileDict, FileList]]:
return self._aggregate_corpus_data('get_files',
facets=facets,
view_name=view_name,
parsed=parsed,
unparsed=unparsed,
choose=choose,
flat=flat,
include_empty=include_empty
)
[docs] def get_piece(self, corpus_name: str, fname: str) -> Piece:
"""Returns an existing Piece object."""
assert corpus_name in self.corpus_objects, f"'{corpus_name}' is not an existing corpus. Choose from {list(self.corpus_objects.keys())}"
return self.corpus_objects[corpus_name].get_piece(fname)
[docs] def get_view(self,
view_name: Optional[str] = None,
**config
) -> View:
"""Retrieve an existing or create a new View object, potentially while updating the config."""
if view_name in self._views:
view = self._views[view_name]
elif view_name is not None and self._views[None].name == view_name:
view = self._views[None]
else:
view = self.get_view().copy(new_name=view_name)
old_name = view.name
view.name = view_name
self._views[view_name] = view
self.logger.info(f"New view '{view_name}' created as a copy of '{old_name}'.")
if len(config) > 0:
view.update_config(**config)
return view
def info(self, view_name: Optional[str] = None, return_str: bool = False, show_discarded: bool = False):
""""""
header = f"All corpora"
header += "\n" + "-" * len(header) + "\n"
# start info message with the names of the available views, the header, and info on the active view.
view = self.get_view(view_name)
view.reset_filtering_data()
msg = available_views2str(self._views, view_name)
msg += header
view_info = f"View: {view}"
if view_name is None:
corpus_views = [corpus.get_view().name for _, corpus in self.iter_corpora(view_name=view_name)]
if len(set(corpus_views)) > 1:
view_info = f"This is a mixed view. Call _.info(view_name) to see a homogeneous one."
msg += view_info + "\n\n"
# Show info on all pieces and files included in the active view
counts_df = self.count_files(view_name=view_name)
if len(counts_df) == 0:
if self.n_detected == 0:
msg += 'No files detected. Use _.add_corpus().'
else:
msg += 'No files selected under the current view. You could use _.all to see everything.'
else:
if counts_df.isna().any().any():
counts_df = counts_df.fillna(0).astype('int')
additional_columns = []
for corpus_name in counts_df.index:
corpus = self.get_corpus(corpus_name)
has_metadata = 'no' if corpus.metadata_tsv is None else 'yes'
corpus_view = corpus.get_view().name
additional_columns.append([has_metadata, corpus_view])
additional_columns = pd.DataFrame(additional_columns, columns=[('has', 'metadata'), ('active', 'view')], index=counts_df.index)
info_df = pd.concat([additional_columns, counts_df], axis=1)
info_df.columns = pd.MultiIndex.from_tuples(info_df.columns)
msg += info_df.to_string()
n_changed_scores = self.count_changed_scores(view_name)
if n_changed_scores > 0:
msg += f"\n\n{n_changed_scores} scores have changed since parsing."
filtering_report = view.filtering_report(show_discarded=show_discarded, return_str=True)
if filtering_report != '':
msg += '\n\n' + filtering_report
if self.n_orphans > 0:
msg += f"\n\nThere are {self.n_orphans} orphans that could not be attributed to any of the respective corpus's fnames"
if show_discarded:
msg += ':'
for name, corpus in self:
if corpus.n_orphans > 0:
msg += f"\n\t{name}: {list(corpus.ix2orphan_file.values())}"
else:
msg += "."
if return_str:
return msg
print(msg)
[docs] def insert_detached_labels(self,
view_name: Optional[str] = None,
key: str = 'detached',
staff: int = None,
voice: Literal[1,2,3,4] = None,
harmony_layer: Literal[0,1,2] = None,
check_for_clashes: bool = True):
""" Attach all :py:attr:`~.annotations.Annotations` objects that are reachable via ``Score.key`` to their
respective :py:attr:`~.score.Score`, altering the XML in memory. Calling :py:meth:`.store_scores` will output
MuseScore files where the annotations show in the score.
Parameters
----------
key :
Key under which the :py:attr:`~.annotations.Annotations` objects to be attached are stored in the
:py:attr:`~.score.Score` objects. Defaults to 'detached'.
staff : :obj:`int`, optional
If you pass a staff ID, the labels will be attached to that staff where 1 is the upper stuff.
By default, the staves indicated in the 'staff' column of :obj:`ms3.annotations.Annotations.df`
will be used.
voice : {1, 2, 3, 4}, optional
If you pass the ID of a notational layer (where 1 is the upper voice, blue in MuseScore),
the labels will be attached to that one.
By default, the notational layers indicated in the 'voice' column of
:obj:`ms3.annotations.Annotations.df` will be used.
harmony_layer : :obj:`int`, optional
| By default, the labels are written to the layer specified as an integer in the column ``harmony_layer``.
| Pass an integer to select a particular layer:
| * 0 to attach them as absolute ('guitar') chords, meaning that when opened next time,
| MuseScore will split and encode those beginning with a note name ( resulting in ms3-internal harmony_layer 3).
| * 1 the labels are written into the staff's layer for Roman Numeral Analysis.
| * 2 to have MuseScore interpret them as Nashville Numbers
check_for_clashes : :obj:`bool`, optional
By default, warnings are thrown when there already exists a label at a position (and in a notational
layer) where a new one is attached. Pass False to deactivate these warnings.
"""
reached, goal = 0, 0
for i, (name, corpus) in enumerate(self.iter_corpora(view_name), 1):
r, g = corpus.insert_detached_labels(view_name=view_name,
key=key,
staff=staff,
voice=voice,
harmony_layer=harmony_layer,
check_for_clashes=check_for_clashes
)
reached += r
goal += g
if i > 1:
self.logger.info(f"{reached}/{goal} labels successfully added.")
[docs] def iter_corpora(self, view_name: Optional[str] = None) -> Generator[Tuple[str, Corpus], None, None]:
"""Iterate through corpora under the current or specified view."""
view = self.get_view(view_name)
for corpus_name, corpus in view.filter_by_token('corpora', self):
if view_name not in corpus._views:
if view_name is None:
corpus.set_view(view)
else:
corpus.set_view(**{view_name: view})
yield corpus_name, corpus
def iter_pieces(self) -> Tuple[CorpusFnameTuple, Piece]:
for corpus_name, corpus in self:
for fname, piece in corpus:
yield (corpus_name, fname), piece
[docs] def keys(self) -> List[str]:
"""Return the names of all corpus objects."""
return list(self.corpus_objects.keys())
def load_facet_into_scores(self,
facet: AnnotationsFacet,
view_name: Optional[str] = None,
force: bool = False,
choose: Literal['auto', 'ask'] = 'auto',
git_revision: Optional[str] = None,
key: str = 'detached',
infer: bool = True,
**cols) -> Dict[str, int]:
result = {}
for corpus_name, corpus in self.iter_corpora(view_name):
result[corpus_name] = corpus.load_facet_into_scores(facet=facet,
view_name=view_name,
force=force,
choose=choose,
git_revision=git_revision,
key=key,
infer=infer,
**cols)
return result
[docs] def load_ignored_warnings(self, path: str) -> None:
""" Adds a filters to all loggers included in a IGNORED_WARNINGS file.
Args:
path: Path of the IGNORED_WARNINGS file.
"""
for _, corpus in self:
_ = corpus.load_ignored_warnings(path)
[docs] def set_view(self, active: View = None, **views: View):
"""Register one or several view_name=View pairs."""
if active is not None:
new_name = active.name
if new_name in self._views and active != self._views[new_name]:
self.logger.info(f"The existing view called '{new_name}' has been overwritten")
del(self._views[new_name])
old_view = self._views[None]
self._views[old_view.name] = old_view
self._views[None] = active
for view_name, view in views.items():
if view.name is None:
view.name = view_name
self._views[view_name] = view
for corpus_name, corpus in self:
if active is not None and active.check_token('corpus', corpus_name):
corpus.set_view(active)
for view_name, view in views.items():
if view.check_token('corpus', corpus_name):
corpus.set_view(**{view_name: view})
def switch_view(self, view_name: str,
show_info: bool = True,
propagate = True,
) -> None:
if view_name is None:
return
new_view = self.get_view(view_name)
old_view = self.get_view()
self._views[old_view.name] = old_view
self._views[None] = new_view
new_name = new_view.name
if new_name in self._views:
del(self._views[new_name])
if propagate:
for corpus_name, corpus in self:
active_view = corpus.get_view()
if active_view.name != new_name or active_view != new_view:
corpus.set_view(new_view)
if show_info:
self.info()
[docs] def update_scores(self,
root_dir: Optional[str] = None,
folder: str = '.',
suffix: str = '',
overwrite: bool = False) -> List[str]:
""" Update scores created with an older MuseScore version to the latest MuseScore 3 version.
Args:
root_dir:
In case you want to create output paths for the updated MuseScore files based on a folder different
from :attr:`corpus_path`.
folder:
* The default '.' has the updated scores written to the same directory as the old ones, effectively
overwriting them if ``root_dir`` is None.
* If ``folder`` is None, the files will be written to ``{root_dir}/scores/``.
* If ``folder`` is an absolute path, ``root_dir`` will be ignored.
* If ``folder`` is a relative path starting with a dot ``.`` the relative path is appended to the file's subdir.
For example, ``..\scores`` will resolve to a sibling directory of the one where the ``file`` is located.
* If ``folder`` is a relative path that does not begin with a dot ``.``, it will be appended to the
``root_dir``.
suffix: String to append to the file names of the updated files, e.g. '_updated'.
overwrite: By default, existing files are not overwritten. Pass True to allow this.
Returns:
A list of all up-to-date paths, whether they had to be converted or were already in the latest version.
"""
up2date_paths = []
for _, corpus in self.iter_corpora():
paths = corpus.update_scores(root_dir=root_dir,
folder=folder,
suffix=suffix,
overwrite=overwrite)
up2date_paths.extend(paths)
return up2date_paths
[docs] def update_tsvs_on_disk(self,
facets: ScoreFacets = 'tsv',
view_name: Optional[str] = None,
force: bool = False,
choose: Literal['auto', 'ask'] = 'auto',
) -> List[str]:
"""
Update existing TSV files corresponding to one or several facets with information freshly extracted from a parsed
score, but only if the contents are identical. Otherwise, the existing TSV file is not overwritten and the
differences are displayed in a log warning. The purpose is to safely update the format of existing TSV files,
(for instance with respect to column order) making sure that the content doesn't change.
Args:
facets:
view_name:
force:
By default, only TSV files that have already been parsed are updated. Set to True in order to
force-parse for each facet one of the TSV files included in the given view, if necessary.
choose:
Returns:
List of paths that have been overwritten.
"""
paths = []
for _, corpus in self.iter_corpora(view_name=view_name):
paths.extend(corpus.update_tsvs_on_disk(facets=facets,
view_name=view_name,
force=force,
choose=choose))
return paths
def _aggregate_corpus_data(self,
method,
view_name=None,
concatenate=False,
**kwargs
):
result = {}
for corpus_name, corpus in self.iter_corpora(view_name):
corpus_method = getattr(corpus, method)
if method == 'get_facet':
kwargs['concatenate'] = False
corpus_result = corpus_method(view_name=view_name, **kwargs)
for fname, piece_result in corpus_result.items():
if method == 'get_facet':
piece_result = [piece_result]
result[(corpus_name, fname)] = piece_result
if concatenate:
keys, dataframes = [], []
flat = 'flat' not in kwargs or kwargs['flat']
if flat:
add_index_level = any(len(piece_result) > 1 for piece_result in result.values())
else:
add_index_level = any(len(file_dataframe_tuples) > 1 for piece_result in result.values() for file_dataframe_tuples in piece_result.values())
for corpus_fname, piece_result in result.items():
if flat:
n_tuples = len(piece_result)
if n_tuples == 0:
continue
keys.append(corpus_fname)
if n_tuples == 1:
if add_index_level:
file, df = piece_result[0]
df = pd.concat([df], keys=[file.rel_path])
dataframes.append(df)
else:
dataframes.append(piece_result[0][1])
else:
files, dfs = list(zip(*piece_result))
ix_level = [file.rel_path for file in files]
concat = pd.concat(dfs, keys=ix_level)
dataframes.append(concat)
else:
for facet, file_dataframe_tuples in piece_result.items():
n_tuples = len(file_dataframe_tuples)
if n_tuples == 0:
continue
keys.append(corpus_fname + (facet,))
if n_tuples == 1:
if add_index_level:
file, df = file_dataframe_tuples[0]
df = pd.concat([df], keys=[file.rel_path])
dataframes.append(df)
else:
dataframes.append(file_dataframe_tuples[0][1])
else:
files, dfs = list(zip(*file_dataframe_tuples))
ix_level = [file.rel_path for file in files]
concat = pd.concat(dfs, keys=ix_level)
dataframes.append(concat)
if len(dataframes) > 0:
try:
result = pd.concat(dataframes, keys=keys)
except ValueError:
n_levels = [df.columns.nlevels for df in dataframes]
if len(set(n_levels)) > 1:
# this error might come from various form label dataframes with varying numbers of column levels
adapted_dataframes = []
for df in dataframes:
if df.columns.nlevels == 2:
adapted_dataframes.append(df)
else:
loc = df.columns.get_loc('form_label')
adapted_dataframes.append(pd.concat([df.iloc[:, :loc], df.iloc[:, loc:]], keys=['', 'a'], axis=1))
result = pd.concat(adapted_dataframes, keys=keys)
else:
raise
nlevels = result.index.nlevels
level_names = ['corpus', 'fname']
if not flat:
level_names.append('facet')
if len(level_names) < nlevels - 1:
level_names.append('ix')
level_names.append('i')
result.index.rename(level_names, inplace=True)
else:
return pd.DataFrame()
return result
def _get_parsed_score_files(self, view_name: Optional[str] = None) -> Dict[CorpusFnameTuple, FileList]:
result = {}
for corpus_name, corpus in self.iter_corpora(view_name=view_name):
fname2files = corpus.get_files('scores', view_name=view_name, unparsed=False, flat=True)
result[corpus_name] = sum(fname2files.values(), [])
return result
def _get_unparsed_score_files(self, view_name: Optional[str] = None) -> Dict[CorpusFnameTuple, FileList]:
result = {}
for corpus_name, corpus in self.iter_corpora(view_name=view_name):
fname2files = corpus.get_files('scores', view_name=view_name, parsed=False, flat=True)
result[corpus_name] = sum(fname2files.values(), [])
return result
def _get_parsed_tsv_files(self, view_name: Optional[str] = None, flat: bool = True) -> Dict[CorpusFnameTuple, Union[FileDict, FileList]]:
result = {}
for corpus_name, corpus in self.iter_corpora(view_name=view_name):
fname2files = corpus.get_files('tsv', view_name=view_name, unparsed=False, flat=flat)
if flat:
result[corpus_name] = sum(fname2files.values(), [])
else:
dd = defaultdict(list)
for fname, typ2files in fname2files.items():
for typ, files in typ2files.items():
dd[typ].extend(files)
result[corpus_name] = dict(dd)
return result
def _get_unparsed_tsv_files(self, view_name: Optional[str] = None, flat: bool = True) -> Dict[CorpusFnameTuple, Union[FileDict, FileList]]:
result = {}
for corpus_name, corpus in self.iter_corpora(view_name=view_name):
fname2files = corpus.get_files('tsv', view_name=view_name, parsed=False, flat=flat)
if flat:
result[corpus_name] = sum(fname2files.values(), [])
else:
dd = defaultdict(list)
for fname, typ2files in fname2files.items():
for typ, files in typ2files.items():
dd[typ].extend(files)
result[corpus_name] = dict(dd)
return result
def __getattr__(self, view_name) -> View:
if view_name in self.view_names:
if view_name != self.view_name:
self.switch_view(view_name, show_info=False)
return self
else:
raise AttributeError(f"'{view_name}' is not an existing view. Use _.get_view('{view_name}') to create it.")
def detach_labels(self,
view_name: Optional[str] = None,
force: bool = False,
choose: Literal['auto', 'ask'] = 'auto',
key: str = 'removed',
staff: int = None,
voice: Literal[1, 2, 3, 4] = None,
harmony_layer: Literal[0, 1, 2, 3] = None,
delete: bool = True):
for name, corpus in self.iter_corpora(view_name):
corpus.detach_labels(view_name=view_name,
force=force,
choose=choose,
key=key,
staff=staff,
voice=voice,
harmony_layer=harmony_layer,
delete=delete)
def score_metadata(self, view_name: Optional[str] = None) -> pd.DataFrame:
metadata_dfs = {corpus_name: corpus.score_metadata(view_name=view_name) for corpus_name, corpus in self.iter_corpora(view_name=view_name)}
metadata = pd.concat(metadata_dfs.values(), keys=metadata_dfs.keys(), names=['corpus', 'fname'])
return metadata
def metadata(self,
view_name: Optional[str] = None,
choose: Optional[Literal['auto', 'ask']] = None) -> pd.DataFrame:
metadata_dfs = {corpus_name: corpus.metadata(view_name=view_name, choose=choose)
for corpus_name, corpus in self.iter_corpora(view_name=view_name)}
metadata = pd.concat(metadata_dfs.values(), keys=metadata_dfs.keys(), names=['corpus', 'fname'])
return metadata
[docs] def store_parsed_scores(self,
view_name: Optional[str] = None,
only_changed: bool = True,
root_dir: Optional[str] = None,
folder: str = '.',
suffix: str = '',
overwrite: bool = False,
simulate=False) -> Dict[str, List[str]]:
""" Stores all parsed scores under this view as MuseScore 3 files.
Args:
view_name: Name of another view if another than the current one is to be used.
only_changed:
By default, only scores that have been modified since parsing are written. Set to False to store
all scores regardless.
root_dir: Directory where to re-build the sub-directory tree of the :obj:`Corpus` in question.
folder:
* Different behaviours are available. Note that only the third option ensures that file paths are distinct for
files that have identical fnames but are located in different subdirectories of the same corpus.
* If ``folder`` is None (default), the files' type will be appended to the ``root_dir``.
* If ``folder`` is an absolute path, ``root_dir`` will be ignored.
* If ``folder`` is a relative path that does not begin with a dot ``.``, it will be appended to the ``root_dir``.
* If ``folder`` is a relative path starting with a dot ``.`` the relative path is appended to the file's subdir. For example, ``..\notes`` will resolve to a sibling directory of the one where the ``file`` is located.
suffix: Suffix to append to the original file name.
overwrite: Pass True to overwrite existing files.
simulate: Set to True if no files are to be written.
Returns:
Paths of the stored files.
"""
paths = {}
for corpus_name, corpus in self.iter_corpora(view_name):
paths[corpus_name] = corpus.store_parsed_scores(view_name=view_name,
only_changed=only_changed,
root_dir=root_dir,
folder=folder,
suffix=suffix,
overwrite=overwrite,
simulate=simulate)
return paths
[docs] def parse(self, view_name=None, level=None, parallel=True, only_new=True, labels_cfg={}, cols={}, infer_types=None, **kwargs):
""" Shorthand for executing parse_scores and parse_tsv at a time.
Args:
view_name:
"""
self.parse_scores(level=level, parallel=parallel, only_new=only_new, labels_cfg=labels_cfg, view_name=view_name)
self.parse_tsv(view_name=view_name, level=level, cols=cols, infer_types=infer_types, only_new=only_new, **kwargs)
[docs] def parse_scores(self,
level: str = None,
parallel: bool = True,
only_new: bool = True,
labels_cfg: dict = {},
view_name: str = None,
choose: Literal['all', 'auto', 'ask'] = 'all'):
""" Parse MuseScore 3 files (MSCX or MSCZ) and store the resulting read-only Score objects. If they need
to be writeable, e.g. for removing or adding labels, pass ``parallel=False`` which takes longer but prevents
having to re-parse at a later point.
Parameters
----------
keys : :obj:`str` or :obj:`~collections.abc.Collection`, optional
For which key(s) to parse all MSCX files.
ids : :obj:`~collections.abc.Collection`
To parse only particular files, pass their IDs. ``keys`` and ``fexts`` are ignored in this case.
level : {'W', 'D', 'I', 'E', 'C', 'WARNING', 'DEBUG', 'INFO', 'ERROR', 'CRITICAL'}, optional
Pass a level name for which (and above which) you want to see log records.
parallel : :obj:`bool`, optional
Defaults to True, meaning that all CPU cores are used simultaneously to speed up the parsing. It implies
that the resulting Score objects are in read-only mode and that you might not be able to use the computer
during parsing. Pass False to parse one score after the other, which uses more memory but will allow
making changes to the scores.
only_new : :obj:`bool`, optional
By default, score which already have been parsed, are not parsed again. Pass False to parse them, too.
Returns
-------
None
"""
if level is not None:
self.change_logger_cfg(level=level)
for corpus_name, corpus in self.iter_corpora(view_name=view_name):
corpus.parse_scores(level=level,
parallel=parallel,
only_new=only_new,
labels_cfg=labels_cfg,
view_name=view_name,
choose=choose)
[docs] def parse_tsv(self,
view_name=None,
level=None,
cols={},
infer_types=None,
only_new=True,
choose: Literal['all', 'auto', 'ask'] = 'all',
**kwargs):
""" Parse TSV files (or other value-separated files such as CSV) to be able to do something with them.
Parameters
----------
keys : :obj:`str` or :obj:`~collections.abc.Collection`, optional
Key(s) for which to parse all non-MSCX files. By default, all keys are selected.
ids : :obj:`~collections.abc.Collection`
To parse only particular files, pass there IDs. ``keys`` and ``fexts`` are ignored in this case.
fexts : :obj:`str` or :obj:`~collections.abc.Collection`, optional
If you want to parse only files with one or several particular file extension(s), pass the extension(s)
cols : :obj:`dict`, optional
By default, if a column called ``'label'`` is found, the TSV is treated as an annotation table and turned into
an Annotations object. Pass one or several column name(s) to treat *them* as label columns instead. If you
pass ``{}`` or no label column is found, the TSV is parsed as a "normal" table, i.e. a DataFrame.
infer_types : :obj:`dict`, optional
To recognize one or several custom label type(s), pass ``{name: regEx}``.
level : {'W', 'D', 'I', 'E', 'C', 'WARNING', 'DEBUG', 'INFO', 'ERROR', 'CRITICAL'}, optional
Pass a level name for which (and above which) you want to see log records.
**kwargs:
Arguments for :py:meth:`pandas.DataFrame.to_csv`. Defaults to ``{'sep': '\t', 'index': False}``. In particular,
you might want to update the default dictionaries for ``dtypes`` and ``converters`` used in :py:func:`load_tsv`.
Returns
-------
None
Args:
only_new:
view_name:
"""
if level is not None:
self.change_logger_cfg(level=level)
for corpus_name, corpus in self.iter_corpora(view_name=view_name):
corpus.parse_tsv(view_name=view_name,
cols=cols,
infer_types=infer_types,
only_new=only_new,
choose=choose,
**kwargs)
def __getitem__(self, item) -> Corpus:
if isinstance(item, str):
return self.get_corpus(item)
elif isinstance(item, tuple):
if len(item) == 1:
return self.get_corpus(item[0])
if len(item) == 2:
corpus_name, fname_or_ix = item
return self.get_corpus(corpus_name)[fname_or_ix]
corpus_name, *remainder = item
return self.get_corpus(corpus_name)[tuple(remainder)]
[docs] def __iter__(self) -> Iterator[Tuple[str, Corpus]]:
""" Iterate through all (corpus_name, Corpus) tuples, regardless of any Views.
Yields: (corpus_name, Corpus) tuples
"""
yield from self.corpus_objects.items()
[docs] def __repr__(self):
"""Show the :meth:`info` under the active view."""
return self.info(return_str=True)
@property
def parsed_mscx(self, *args, **kwargs) -> pd.DataFrame:
"""Deprecated property. Replaced by :attr:`n_parsed_scores`"""
raise DeprecationWarning(f"Property has been renamed to n_parsed_scores.")
@property
def parsed_tsv(self, *args, **kwargs) -> pd.DataFrame:
"""Deprecated property. Replaced by :attr:`n_parsed_tsvs`"""
raise DeprecationWarning(f"Property has been renamed to n_parsed_tsvs.")
[docs] def add_detached_annotations(self, *args, **kwargs):
"""Deprecated method. Replaced by :meth:`insert_detached_labels`."""
raise DeprecationWarning(f"Method not in use any more. Use Parse.insert_detached_labels().")
[docs] def count_annotation_layers(self, *args, **kwargs):
"""Deprecated method."""
raise DeprecationWarning(f"Method not in use any more.")
[docs] def count_labels(self, *args, **kwargs):
"""Deprecated method."""
raise DeprecationWarning(f"Method not in use any more.")
[docs] def get_lists(self, *args, **kwargs):
"""Deprecated method. Replaced by :meth:`get_facets`."""
raise DeprecationWarning(f"Method get_lists() not in use any more. Use Parse.get_facets() instead.")
[docs] def iter(self, *args, **kwargs):
"""Deprecated method. Replaced by :meth:`ms3.corpus.Corpus.iter_facets`."""
raise DeprecationWarning(f"Method iter() not in use any more. Use Corpus.iter_facets() instead.")
[docs] def parse_mscx(self, *args, **kwargs):
"""Deprecated method. Replaced by :meth:`parse_scores`."""
raise DeprecationWarning(f"Method not in use any more. Use Parse.parse_scores().")
[docs] def pieces(self, *args, **kwargs):
"""Deprecated method. Replaced by :meth:`info`."""
raise DeprecationWarning("This method is deprecated. To view pieces, call Corpus.info() or Corpus.info('all'). "
"A DataFrame showing all detected files is available under the property Corpus.files_df")
[docs] def store_scores(self, *args, **kwargs):
"""Deprecated method. Replaced by :meth:`store_parsed_scores`."""
raise DeprecationWarning(f"Method not in use any more. Use Parse.store_parsed_scores().")
########################################################################################################################
########################################################################################################################
################################################# End of Parse() ########################################################
########################################################################################################################
########################################################################################################################