Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ io \ usgs_ascii \ usgs_ascii_collection.py: 97%

39 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-10 00:01 -0800

1# -*- coding: utf-8 -*- 

2""" 

3LEMI 424 Collection 

4==================== 

5 

6Collection of TXT files combined into runs 

7 

8Created on Wed Aug 31 10:32:44 2022 

9 

10@author: jpeacock 

11""" 

12 

13# ============================================================================= 

14# Imports 

15# ============================================================================= 

16import pandas as pd 

17 

18from mth5.io.collection import Collection 

19from mth5.io.usgs_ascii import USGSascii 

20 

21 

22# ============================================================================= 

23 

24 

25class USGSasciiCollection(Collection): 

26 """ 

27 Collection of USGS ASCII files. 

28 

29 .. code-block:: python 

30 

31 >>> from mth5.io.usgs_ascii import USGSasciiCollection 

32 >>> lc = USGSasciiCollection(r"/path/to/ascii/files") 

33 >>> run_dict = lc.get_runs(1) 

34 

35 

36 """ 

37 

38 def __init__(self, file_path=None, **kwargs): 

39 super().__init__(file_path=file_path, **kwargs) 

40 self.file_ext = "asc" 

41 

42 def to_dataframe(self, sample_rates=[4], run_name_zeros=4, calibration_path=None): 

43 """ 

44 Create a data frame of each TXT file in a given directory. 

45 

46 .. note:: If a run name is already present it will not be overwritten 

47 

48 :param sample_rates: sample rate to get, defaults to [4] 

49 :type sample_rates: int or list, optional 

50 :param run_name_zeros: number of zeros to assing to the run name, 

51 defaults to 4 

52 :type run_name_zeros: int, optional 

53 :param calibration_path: path to calibration files, defaults to None 

54 :type calibration_path: string or Path, optional 

55 :return: Dataframe with information of each TXT file in the given 

56 directory. 

57 :rtype: :class:`pandas.DataFrame` 

58 

59 :Example: 

60 

61 >>> from mth5.io.usgs_ascii import USGSasciiCollection 

62 >>> lc = USGSasciiCollection("/path/to/ascii/files") 

63 >>> ascii_df = lc.to_dataframe() 

64 

65 """ 

66 

67 entries = [] 

68 for fn in self.get_files(self.file_ext): 

69 asc_obj = USGSascii(fn) 

70 asc_obj.read_metadata() 

71 

72 entry = self.get_empty_entry_dict() 

73 entry["survey"] = asc_obj.survey_metadata.id 

74 entry["station"] = asc_obj.station_metadata.id 

75 entry["run"] = asc_obj.run_metadata.id 

76 entry["start"] = asc_obj.start 

77 entry["end"] = asc_obj.end 

78 entry["channel_id"] = 1 

79 entry["component"] = ",".join(asc_obj.run_metadata.channels_recorded_all) 

80 entry["fn"] = fn 

81 entry["sample_rate"] = asc_obj.sample_rate 

82 entry["file_size"] = asc_obj.file_size 

83 entry["n_samples"] = int(asc_obj.n_samples) 

84 entry["sequence_number"] = 0 

85 entry["instrument_id"] = asc_obj.run_metadata.data_logger.id 

86 entry["calibration_fn"] = None 

87 

88 entries.append(entry) 

89 

90 # make pandas dataframe and set data types 

91 df = self._sort_df(self._set_df_dtypes(pd.DataFrame(entries)), run_name_zeros) 

92 

93 return df 

94 

95 def assign_run_names(self, df, zeros=4): 

96 """ 

97 Assign run names based on start and end times, checks if a file has 

98 the same start time as the last end time. 

99 

100 Run names are assigned as sr{sample_rate}_{run_number:0{zeros}}. Only 

101 if the run name is not assigned already. 

102 

103 :param df: Dataframe with the appropriate columns 

104 :type df: :class:`pandas.DataFrame` 

105 :param zeros: number of zeros in run name, defaults to 4 

106 :type zeros: int, optional 

107 :return: Dataframe with run names 

108 :rtype: :class:`pandas.DataFrame` 

109 

110 """ 

111 

112 for station in df.station.unique(): 

113 count = 1 

114 for row in df[df.station == station].sort_values("start").itertuples(): 

115 if row.run is None: 

116 df.loc[row.Index, "run"] = f"sr{row.sample_rate}_{count:0{zeros}}" 

117 df.loc[row.Index, "sequence_number"] = count 

118 count += 1 

119 

120 return df