Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ io \ usgs_ascii \ usgs_ascii_collection.py: 97%
39 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-10 00:01 -0800
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-10 00:01 -0800
1# -*- coding: utf-8 -*-
2"""
3LEMI 424 Collection
4====================
6Collection of TXT files combined into runs
8Created on Wed Aug 31 10:32:44 2022
10@author: jpeacock
11"""
13# =============================================================================
14# Imports
15# =============================================================================
16import pandas as pd
18from mth5.io.collection import Collection
19from mth5.io.usgs_ascii import USGSascii
22# =============================================================================
25class USGSasciiCollection(Collection):
26 """
27 Collection of USGS ASCII files.
29 .. code-block:: python
31 >>> from mth5.io.usgs_ascii import USGSasciiCollection
32 >>> lc = USGSasciiCollection(r"/path/to/ascii/files")
33 >>> run_dict = lc.get_runs(1)
36 """
38 def __init__(self, file_path=None, **kwargs):
39 super().__init__(file_path=file_path, **kwargs)
40 self.file_ext = "asc"
42 def to_dataframe(self, sample_rates=[4], run_name_zeros=4, calibration_path=None):
43 """
44 Create a data frame of each TXT file in a given directory.
46 .. note:: If a run name is already present it will not be overwritten
48 :param sample_rates: sample rate to get, defaults to [4]
49 :type sample_rates: int or list, optional
50 :param run_name_zeros: number of zeros to assing to the run name,
51 defaults to 4
52 :type run_name_zeros: int, optional
53 :param calibration_path: path to calibration files, defaults to None
54 :type calibration_path: string or Path, optional
55 :return: Dataframe with information of each TXT file in the given
56 directory.
57 :rtype: :class:`pandas.DataFrame`
59 :Example:
61 >>> from mth5.io.usgs_ascii import USGSasciiCollection
62 >>> lc = USGSasciiCollection("/path/to/ascii/files")
63 >>> ascii_df = lc.to_dataframe()
65 """
67 entries = []
68 for fn in self.get_files(self.file_ext):
69 asc_obj = USGSascii(fn)
70 asc_obj.read_metadata()
72 entry = self.get_empty_entry_dict()
73 entry["survey"] = asc_obj.survey_metadata.id
74 entry["station"] = asc_obj.station_metadata.id
75 entry["run"] = asc_obj.run_metadata.id
76 entry["start"] = asc_obj.start
77 entry["end"] = asc_obj.end
78 entry["channel_id"] = 1
79 entry["component"] = ",".join(asc_obj.run_metadata.channels_recorded_all)
80 entry["fn"] = fn
81 entry["sample_rate"] = asc_obj.sample_rate
82 entry["file_size"] = asc_obj.file_size
83 entry["n_samples"] = int(asc_obj.n_samples)
84 entry["sequence_number"] = 0
85 entry["instrument_id"] = asc_obj.run_metadata.data_logger.id
86 entry["calibration_fn"] = None
88 entries.append(entry)
90 # make pandas dataframe and set data types
91 df = self._sort_df(self._set_df_dtypes(pd.DataFrame(entries)), run_name_zeros)
93 return df
95 def assign_run_names(self, df, zeros=4):
96 """
97 Assign run names based on start and end times, checks if a file has
98 the same start time as the last end time.
100 Run names are assigned as sr{sample_rate}_{run_number:0{zeros}}. Only
101 if the run name is not assigned already.
103 :param df: Dataframe with the appropriate columns
104 :type df: :class:`pandas.DataFrame`
105 :param zeros: number of zeros in run name, defaults to 4
106 :type zeros: int, optional
107 :return: Dataframe with run names
108 :rtype: :class:`pandas.DataFrame`
110 """
112 for station in df.station.unique():
113 count = 1
114 for row in df[df.station == station].sort_values("start").itertuples():
115 if row.run is None:
116 df.loc[row.Index, "run"] = f"sr{row.sample_rate}_{count:0{zeros}}"
117 df.loc[row.Index, "sequence_number"] = count
118 count += 1
120 return df