acgc.icartt
Read and write ICARTT (ffi1001) format files
1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3'''Read and write ICARTT (ffi1001) format files 4''' 5 6import os 7import pandas as pd 8 9 10def read_icartt( files, usePickle=False, timeIndex=False ): 11 '''Read ICARTT file or files into a pandas DataFrame 12 13 Parameters 14 ---------- 15 files : list or str 16 path to ICARTT file or files that will be read. 17 Data within these files will be concatenated, so files should all contain the same variables 18 usePickle : bool, default=False 19 if usePickle=True, the data will be written to a pkl file with ".pkl" appended to path 20 On subsequent read_icartt calls, data will be read from the .pkl file, if it exists 21 timeIndex : bool, default=False 22 sets DataFrame index to the time variable from the ICARTT file, rather than a row counter 23 24 Returns 25 ------- 26 obs : pandas.DataFrame 27 ICARTT file contents. In addition to column names for the ICARTT variables, 28 the DataFrame columns also include 'time' in pandas.DatetimeIndex format and 29 'file' that is the ordinal number of the input file that each row was read from 30 ''' 31 32 # Files input must be string or list of strings 33 if isinstance( files, str ): 34 # Convert to list 35 files = [files] 36 elif isinstance( files, list ): 37 # Do nothing 38 pass 39 else: 40 raise TypeError( 'read_icartt: files must be a filename or list of filenames' ) 41 42 obsall = [] 43 for n,file in enumerate(files): 44 45 # Read from Pickle file, if it exists and is requested 46 pklfile = file+'.pkl' 47 if (usePickle and os.path.isfile(pklfile)): 48 obs = pd.read_pickle(pklfile) 49 50 else: 51 52 # Ensure file exists 53 if not os.path.isfile( file ): 54 raise FileNotFoundError( file+" doesn't exist" ) 55 56 # Read the ICARTT file 57 with open( file, 'r', encoding='utf-8' ) as f: 58 59 # Read the number of header lines 60 nheader, fmt = f.readline().split(',') 61 nheader = int(nheader) 62 63 # Ensure this is a 64 if int(fmt) != 1001: 65 raise Exception( 'read_icartt: '+file+' is not an ICARTT (ffi1001) file' ) 66 67 # Skip 5 lines 68 for junk in range(5): 69 next(f) 70 71 # Read date 72 year, month, day = map( int, f.readline().split(',')[0:3] ) 73 74 # Skip line 75 next(f) 76 77 # Read name of the time variable 78 tname, tunit = [s.strip() for s in f.readline().split(',')[0:2]] 79 80 # Raise exception if the time unit is not seconds; 81 # may need to be handled differently below 82 if (tunit in ['s','seconds','seconds (from midnight UTC)','seconds_past_midnight']): 83 # Use unit expected by pandas 84 tunit = 's' 85 else: 86 print(tunit) 87 raise Exception( 'read_icartt: time expected in seconds (s); '+ 88 'unit in file: ',tunit ) 89 90 # Number of dependent variables 91 nvar = int( f.readline() ) 92 93 # Scale factor for dependent variables 94 scale = [ float(s) for s in f.readline().split(',') ] 95 96 # Missing value flag for dependent variables 97 naflag = [ s.strip() for s in f.readline().split(',') ] 98 99 # Dependent variable names from the next nvar lines 100 varnames = [ f.readline().split(',')[0] for v in range(nvar) ] 101 102 # Missing flags for each variable as a dict 103 nadict = { varnames[i]: naflag[i] for i in range(nvar) } 104 105 # Read data 106 obs = pd.read_csv(file, skiprows=nheader-1, 107 na_values=nadict, skipinitialspace=True) 108 109 # Catch missing data that are not reported as integers 110 #obs[obs==-99999] = np.nan 111 112 # Strip whitespace from column names 113 #obs.columns = obs.columns.str.strip() 114 115 # Apply scale factors 116 for i, s in enumerate(scale): 117 if s != 1: 118 obs[varnames[i]] *= s 119 120 # Add a time variable in datetime format 121 obs['time'] = pd.DatetimeIndex( pd.Timestamp( year=year, month=month, day=day ) + 122 pd.to_timedelta( obs[tname], tunit ) ) 123 124 # Add flight number 125 obs['file'] = n+1 126 127 # Use time variable for index 128 if timeIndex: 129 obs.index = obs.time 130 131 # Save pickle 132 if usePickle: 133 obs.to_pickle(pklfile) 134 135 # Add to list 136 obsall.append(obs) 137 138 # Concatenate all files into one dataframe 139 obs = pd.concat( obsall, sort=True ) 140 141 return obs 142 143def _get(obj,name,default_value=None): 144 '''Get value from either attribute or key `name` 145 146 Parameters 147 ---------- 148 obj : dict or object 149 name : str 150 name of an attribute or key 151 default_value : 152 return value if the attribute or key do not exist 153 154 Returns 155 ------- 156 value : 157 value of attribute or dict key 158 ''' 159 try: 160 # Try access as attribute 161 value = getattr(obj,name) 162 except AttributeError: 163 try: 164 # Try access as dict key 165 value = obj[name] 166 except KeyError: 167 # Not found so return default value 168 value = default_value 169 return value 170 171def write_icartt(filename, df, metadata, **kwargs): 172 '''Write an ICARTT ffi1001 file 173 174 The contents of a pandas DataFrame (``df``) are written to a text file in ICARTT format 175 using `metadata` to specify which variables are written and provide ICARTT file header. 176 177 ICARTT file format specification document: 178 https://www.earthdata.nasa.gov/esdis/esco/standards-and-practices/icartt-file-format 179 180 Parameters 181 ---------- 182 filename : str 183 File to be created 184 df : pandas.DataFrame 185 Data values that will be written 186 metadata : dict or obj 187 See notes below for the attributes or keys that `metadata` must contain 188 **kwargs 189 passed to pandas.to_csv 190 191 Notes 192 ----- 193 `metadata` can be a dict or any object, so long as it contains the following attributes or keys: 194 - independent_variable_definition (dict) 195 should have only one key 196 - dependent_variable_definition (dict) 197 Controls which variables from `df` are written to file 198 - measurement_start_date (pandas.Timestamp or datetime.datetime) 199 date UTC when measurement collection began 200 - pi_name (str) 201 - pi_contact_info (str) 202 - organization_name (str) 203 - dm_contact_info (str) 204 - mission_name (str) 205 - project_info (str) 206 - special_comments (list of str) 207 - platform (str) 208 - location (str) 209 - associated_data (str) 210 - intrument_info (str) 211 - data_info (str) 212 - uncertainty (str) 213 - ulod_flag (str) 214 commonly '-7777' 215 - ulod_value (str) 216 - llod_flag (str) 217 commonly '-8888' 218 - llod_value (str) 219 - stipulations_on_use (str) 220 - other_comments (str) 221 - revision (str) 222 - revision_comments (list of str) 223 224 The `independent_variable_defintion` and `dependent_variable_definition` are dicts 225 with entries of the form `{'VariableName':'units, standard name, [optional long name]'}` 226 The keys must correspond to columns of `df`. 227 `independent_variable_definition` should have only one key while 228 `dependent_variable_definition` can have many. For example, 229 ``metadata.INDEPENDENT_VARIABLE_DEFINITION = 230 {'Time_Start':'seconds, time at start of measurement, seconds since midnight UTC'}`` 231 See Examples below. 232 233 234 Examples 235 -------- 236 ``` 237 import pandas as pd 238 from acgc import icartt 239 240 df = pd.DataFrame( [[1,0,30], 241 [2,10,29], 242 [3,20,27], 243 [4,30,25]], 244 columns=['Time_Start','Alt','Temp']) 245 246 metadata = dict( 247 INDEPENDENT_VARIABLE_DEFINITION = 248 {'Time_Start':'seconds, time, measurement time in seconds after takeoff'}, 249 DEPENDENT_VARIABLE_DEFINITION = 250 {'Alt':'m, altitude, altitude above ground level', 251 'Temp':'C, temperature, air temperature in Celsius'}, 252 PI_NAME = 'Jane Doe', 253 ORGANIZATION_NAME = 'NASA', 254 SOURCE_DESCRIPTION = 'Invented Instrument', 255 MISSION_NAME = 'FIREX-AQ', 256 SPECIAL_COMMENTS = ['Special comments are optional and can be omitted.', 257 'If used, they should be a list of one or more strings'], 258 PI_CONTACT_INFO = 'jdoe@email.com or postal address', 259 PLATFORM = 'NASA DC-8', 260 LOCATION = 'Boise, ID, USA', 261 ASSOCIATED_DATA = 'N/A', 262 INSTRUMENT_INFO = 'N/A', 263 DATA_INFO = 'N/A', 264 UNCERTAINTY = r'10% uncertainty in all values', 265 ULOD_FLAG = '-7777', 266 ULOD_VALUE = 'N/A', 267 LLOD_FLAG = '-8888', 268 LLOD_VALUE = 'N/A', 269 DM_CONTACT_INFO = 'Alice, data manager, alice@email.com', 270 STIPULATIONS_ON_USE = 'FIREX-AQ Data Use Policy', 271 PROJECT_INFO = 'FIREX-AQ 2019, https://project.com', 272 OTHER_COMMENTS = 'One line of comments', 273 REVISION = 'R1', 274 REVISION_COMMENTS = ['R0: Initial data', 275 'R1: One string per revision'], 276 measurement_start_date = pd.Timestamp('2020-01-30 10:20') 277 ) 278 279 icartt.write_icartt( 'test.ict', df, metadata ) 280 ``` 281 ''' 282 283 normal_comments = ['PI_CONTACT_INFO', 284 'PLATFORM', 285 'LOCATION', 286 'ASSOCIATED_DATA', 287 'INSTRUMENT_INFO', 288 'DATA_INFO', 289 'UNCERTAINTY', 290 'ULOD_FLAG', 291 'ULOD_VALUE', 292 'LLOD_FLAG', 293 'LLOD_VALUE', 294 'DM_CONTACT_INFO', 295 'PROJECT_INFO', 296 'STIPULATIONS_ON_USE', 297 'OTHER_COMMENTS', 298 'REVISION', 299 'REVISION_COMMENTS'] 300 301 # Variables that will be written to file 302 ictvars = list(_get(metadata,'INDEPENDENT_VARIABLE_DEFINITION')) + \ 303 list(_get(metadata,'DEPENDENT_VARIABLE_DEFINITION')) 304 305 # Variables that are not in the dataframe 306 missingvars = set(ictvars) - set(df.columns) 307 308 # Raise an error if there are missing variables 309 if len(missingvars)>0: 310 raise KeyError('Some output variables are not in the DataFrame: '+str(missingvars)) 311 312 # Coerce to Timestamp 313 measurement_start_date = pd.Timestamp( _get(metadata, 'measurement_start_date') ) 314 315 # Form the header 316 header = [] 317 318 for k in ['PI_NAME', 319 'ORGANIZATION_NAME', 320 'SOURCE_DESCRIPTION', 321 'MISSION_NAME',]: 322 v = _get( metadata, k ) 323 header.append( v ) 324 325 # File volume 326 header.append( '1, 1') 327 328 # Date line 329 header.append( measurement_start_date.strftime('%Y, %m, %d, ') + 330 pd.Timestamp.today().strftime('%Y, %m, %d')) 331 332 # Time interval 333 # Time spacing between records, set of unique values 334 independent_variable_name = list(_get(metadata,'INDEPENDENT_VARIABLE_DEFINITION'))[0] 335 dt = set( df[independent_variable_name].diff(1).dropna() ) 336 if len(dt)==1: 337 # Constant time interval, use value 338 interval = list(dt)[0] 339 else: 340 # Time interval is not constant so code as 0 341 interval = 0 342 header.append( str(interval) ) 343 344 # Independent variable 345 keydict = _get( metadata, 'INDEPENDENT_VARIABLE_DEFINITION' ) 346 header.append( list(keydict)[0] + ', ' + list(keydict.values())[0] ) 347 348 # Dependent variables 349 keydict = _get( metadata, 'DEPENDENT_VARIABLE_DEFINITION' ) 350 nvars = len(keydict) 351 header.append( str(nvars) ) # Number of dependent variables 352 header.append( ','.join(['1']*nvars) ) # Scale factors for dependent variables 353 header.append( ','.join(['-9999']*nvars) ) # Missing data flags for dependent vars. 354 for kn in keydict.keys(): 355 header.append( kn + ', ' + keydict[kn]) # Dependent variable definitions 356 357 # Special comments 358 v = _get( metadata, 'SPECIAL_COMMENTS' ) 359 if v: 360 # Expect a string or array of several lines 361 header.append( str(len(list(v))) ) 362 header.extend( list(v) ) 363 else: 364 header.append( '0' ) 365 366 # Normal Comments 367 nc= [] 368 for kn in normal_comments: 369 v = _get( metadata, kn ) 370 if kn=='REVISION_COMMENTS': 371 # Expect a string or array of several lines 372 nc.extend( list(v) ) 373 else: 374 nc.append( '{:s}: {:s}'.format(kn,v) ) 375 # Variable short names 376 nc.append( ', '.join(ictvars)) 377 # Add normal comments to the header 378 header.append( str(len(nc)) ) 379 header.extend( nc ) 380 381 # Write the file 382 with open(filename,'w',encoding='ascii') as f: 383 f.write(f'{len(header)+1:d}, 1001\n') # +1 accounts for this line 384 for line in header: 385 f.write(line+'\n') 386 df[ictvars].to_csv( f, 387 index=False, 388 header=False, 389 na_rep='-9999', 390 **kwargs )
11def read_icartt( files, usePickle=False, timeIndex=False ): 12 '''Read ICARTT file or files into a pandas DataFrame 13 14 Parameters 15 ---------- 16 files : list or str 17 path to ICARTT file or files that will be read. 18 Data within these files will be concatenated, so files should all contain the same variables 19 usePickle : bool, default=False 20 if usePickle=True, the data will be written to a pkl file with ".pkl" appended to path 21 On subsequent read_icartt calls, data will be read from the .pkl file, if it exists 22 timeIndex : bool, default=False 23 sets DataFrame index to the time variable from the ICARTT file, rather than a row counter 24 25 Returns 26 ------- 27 obs : pandas.DataFrame 28 ICARTT file contents. In addition to column names for the ICARTT variables, 29 the DataFrame columns also include 'time' in pandas.DatetimeIndex format and 30 'file' that is the ordinal number of the input file that each row was read from 31 ''' 32 33 # Files input must be string or list of strings 34 if isinstance( files, str ): 35 # Convert to list 36 files = [files] 37 elif isinstance( files, list ): 38 # Do nothing 39 pass 40 else: 41 raise TypeError( 'read_icartt: files must be a filename or list of filenames' ) 42 43 obsall = [] 44 for n,file in enumerate(files): 45 46 # Read from Pickle file, if it exists and is requested 47 pklfile = file+'.pkl' 48 if (usePickle and os.path.isfile(pklfile)): 49 obs = pd.read_pickle(pklfile) 50 51 else: 52 53 # Ensure file exists 54 if not os.path.isfile( file ): 55 raise FileNotFoundError( file+" doesn't exist" ) 56 57 # Read the ICARTT file 58 with open( file, 'r', encoding='utf-8' ) as f: 59 60 # Read the number of header lines 61 nheader, fmt = f.readline().split(',') 62 nheader = int(nheader) 63 64 # Ensure this is a 65 if int(fmt) != 1001: 66 raise Exception( 'read_icartt: '+file+' is not an ICARTT (ffi1001) file' ) 67 68 # Skip 5 lines 69 for junk in range(5): 70 next(f) 71 72 # Read date 73 year, month, day = map( int, f.readline().split(',')[0:3] ) 74 75 # Skip line 76 next(f) 77 78 # Read name of the time variable 79 tname, tunit = [s.strip() for s in f.readline().split(',')[0:2]] 80 81 # Raise exception if the time unit is not seconds; 82 # may need to be handled differently below 83 if (tunit in ['s','seconds','seconds (from midnight UTC)','seconds_past_midnight']): 84 # Use unit expected by pandas 85 tunit = 's' 86 else: 87 print(tunit) 88 raise Exception( 'read_icartt: time expected in seconds (s); '+ 89 'unit in file: ',tunit ) 90 91 # Number of dependent variables 92 nvar = int( f.readline() ) 93 94 # Scale factor for dependent variables 95 scale = [ float(s) for s in f.readline().split(',') ] 96 97 # Missing value flag for dependent variables 98 naflag = [ s.strip() for s in f.readline().split(',') ] 99 100 # Dependent variable names from the next nvar lines 101 varnames = [ f.readline().split(',')[0] for v in range(nvar) ] 102 103 # Missing flags for each variable as a dict 104 nadict = { varnames[i]: naflag[i] for i in range(nvar) } 105 106 # Read data 107 obs = pd.read_csv(file, skiprows=nheader-1, 108 na_values=nadict, skipinitialspace=True) 109 110 # Catch missing data that are not reported as integers 111 #obs[obs==-99999] = np.nan 112 113 # Strip whitespace from column names 114 #obs.columns = obs.columns.str.strip() 115 116 # Apply scale factors 117 for i, s in enumerate(scale): 118 if s != 1: 119 obs[varnames[i]] *= s 120 121 # Add a time variable in datetime format 122 obs['time'] = pd.DatetimeIndex( pd.Timestamp( year=year, month=month, day=day ) + 123 pd.to_timedelta( obs[tname], tunit ) ) 124 125 # Add flight number 126 obs['file'] = n+1 127 128 # Use time variable for index 129 if timeIndex: 130 obs.index = obs.time 131 132 # Save pickle 133 if usePickle: 134 obs.to_pickle(pklfile) 135 136 # Add to list 137 obsall.append(obs) 138 139 # Concatenate all files into one dataframe 140 obs = pd.concat( obsall, sort=True ) 141 142 return obs
Read ICARTT file or files into a pandas DataFrame
Parameters
- files (list or str): path to ICARTT file or files that will be read. Data within these files will be concatenated, so files should all contain the same variables
- usePickle (bool, default=False): if usePickle=True, the data will be written to a pkl file with ".pkl" appended to path On subsequent read_icartt calls, data will be read from the .pkl file, if it exists
- timeIndex (bool, default=False): sets DataFrame index to the time variable from the ICARTT file, rather than a row counter
Returns
- obs (pandas.DataFrame): ICARTT file contents. In addition to column names for the ICARTT variables, the DataFrame columns also include 'time' in pandas.DatetimeIndex format and 'file' that is the ordinal number of the input file that each row was read from
172def write_icartt(filename, df, metadata, **kwargs): 173 '''Write an ICARTT ffi1001 file 174 175 The contents of a pandas DataFrame (``df``) are written to a text file in ICARTT format 176 using `metadata` to specify which variables are written and provide ICARTT file header. 177 178 ICARTT file format specification document: 179 https://www.earthdata.nasa.gov/esdis/esco/standards-and-practices/icartt-file-format 180 181 Parameters 182 ---------- 183 filename : str 184 File to be created 185 df : pandas.DataFrame 186 Data values that will be written 187 metadata : dict or obj 188 See notes below for the attributes or keys that `metadata` must contain 189 **kwargs 190 passed to pandas.to_csv 191 192 Notes 193 ----- 194 `metadata` can be a dict or any object, so long as it contains the following attributes or keys: 195 - independent_variable_definition (dict) 196 should have only one key 197 - dependent_variable_definition (dict) 198 Controls which variables from `df` are written to file 199 - measurement_start_date (pandas.Timestamp or datetime.datetime) 200 date UTC when measurement collection began 201 - pi_name (str) 202 - pi_contact_info (str) 203 - organization_name (str) 204 - dm_contact_info (str) 205 - mission_name (str) 206 - project_info (str) 207 - special_comments (list of str) 208 - platform (str) 209 - location (str) 210 - associated_data (str) 211 - intrument_info (str) 212 - data_info (str) 213 - uncertainty (str) 214 - ulod_flag (str) 215 commonly '-7777' 216 - ulod_value (str) 217 - llod_flag (str) 218 commonly '-8888' 219 - llod_value (str) 220 - stipulations_on_use (str) 221 - other_comments (str) 222 - revision (str) 223 - revision_comments (list of str) 224 225 The `independent_variable_defintion` and `dependent_variable_definition` are dicts 226 with entries of the form `{'VariableName':'units, standard name, [optional long name]'}` 227 The keys must correspond to columns of `df`. 228 `independent_variable_definition` should have only one key while 229 `dependent_variable_definition` can have many. For example, 230 ``metadata.INDEPENDENT_VARIABLE_DEFINITION = 231 {'Time_Start':'seconds, time at start of measurement, seconds since midnight UTC'}`` 232 See Examples below. 233 234 235 Examples 236 -------- 237 ``` 238 import pandas as pd 239 from acgc import icartt 240 241 df = pd.DataFrame( [[1,0,30], 242 [2,10,29], 243 [3,20,27], 244 [4,30,25]], 245 columns=['Time_Start','Alt','Temp']) 246 247 metadata = dict( 248 INDEPENDENT_VARIABLE_DEFINITION = 249 {'Time_Start':'seconds, time, measurement time in seconds after takeoff'}, 250 DEPENDENT_VARIABLE_DEFINITION = 251 {'Alt':'m, altitude, altitude above ground level', 252 'Temp':'C, temperature, air temperature in Celsius'}, 253 PI_NAME = 'Jane Doe', 254 ORGANIZATION_NAME = 'NASA', 255 SOURCE_DESCRIPTION = 'Invented Instrument', 256 MISSION_NAME = 'FIREX-AQ', 257 SPECIAL_COMMENTS = ['Special comments are optional and can be omitted.', 258 'If used, they should be a list of one or more strings'], 259 PI_CONTACT_INFO = 'jdoe@email.com or postal address', 260 PLATFORM = 'NASA DC-8', 261 LOCATION = 'Boise, ID, USA', 262 ASSOCIATED_DATA = 'N/A', 263 INSTRUMENT_INFO = 'N/A', 264 DATA_INFO = 'N/A', 265 UNCERTAINTY = r'10% uncertainty in all values', 266 ULOD_FLAG = '-7777', 267 ULOD_VALUE = 'N/A', 268 LLOD_FLAG = '-8888', 269 LLOD_VALUE = 'N/A', 270 DM_CONTACT_INFO = 'Alice, data manager, alice@email.com', 271 STIPULATIONS_ON_USE = 'FIREX-AQ Data Use Policy', 272 PROJECT_INFO = 'FIREX-AQ 2019, https://project.com', 273 OTHER_COMMENTS = 'One line of comments', 274 REVISION = 'R1', 275 REVISION_COMMENTS = ['R0: Initial data', 276 'R1: One string per revision'], 277 measurement_start_date = pd.Timestamp('2020-01-30 10:20') 278 ) 279 280 icartt.write_icartt( 'test.ict', df, metadata ) 281 ``` 282 ''' 283 284 normal_comments = ['PI_CONTACT_INFO', 285 'PLATFORM', 286 'LOCATION', 287 'ASSOCIATED_DATA', 288 'INSTRUMENT_INFO', 289 'DATA_INFO', 290 'UNCERTAINTY', 291 'ULOD_FLAG', 292 'ULOD_VALUE', 293 'LLOD_FLAG', 294 'LLOD_VALUE', 295 'DM_CONTACT_INFO', 296 'PROJECT_INFO', 297 'STIPULATIONS_ON_USE', 298 'OTHER_COMMENTS', 299 'REVISION', 300 'REVISION_COMMENTS'] 301 302 # Variables that will be written to file 303 ictvars = list(_get(metadata,'INDEPENDENT_VARIABLE_DEFINITION')) + \ 304 list(_get(metadata,'DEPENDENT_VARIABLE_DEFINITION')) 305 306 # Variables that are not in the dataframe 307 missingvars = set(ictvars) - set(df.columns) 308 309 # Raise an error if there are missing variables 310 if len(missingvars)>0: 311 raise KeyError('Some output variables are not in the DataFrame: '+str(missingvars)) 312 313 # Coerce to Timestamp 314 measurement_start_date = pd.Timestamp( _get(metadata, 'measurement_start_date') ) 315 316 # Form the header 317 header = [] 318 319 for k in ['PI_NAME', 320 'ORGANIZATION_NAME', 321 'SOURCE_DESCRIPTION', 322 'MISSION_NAME',]: 323 v = _get( metadata, k ) 324 header.append( v ) 325 326 # File volume 327 header.append( '1, 1') 328 329 # Date line 330 header.append( measurement_start_date.strftime('%Y, %m, %d, ') + 331 pd.Timestamp.today().strftime('%Y, %m, %d')) 332 333 # Time interval 334 # Time spacing between records, set of unique values 335 independent_variable_name = list(_get(metadata,'INDEPENDENT_VARIABLE_DEFINITION'))[0] 336 dt = set( df[independent_variable_name].diff(1).dropna() ) 337 if len(dt)==1: 338 # Constant time interval, use value 339 interval = list(dt)[0] 340 else: 341 # Time interval is not constant so code as 0 342 interval = 0 343 header.append( str(interval) ) 344 345 # Independent variable 346 keydict = _get( metadata, 'INDEPENDENT_VARIABLE_DEFINITION' ) 347 header.append( list(keydict)[0] + ', ' + list(keydict.values())[0] ) 348 349 # Dependent variables 350 keydict = _get( metadata, 'DEPENDENT_VARIABLE_DEFINITION' ) 351 nvars = len(keydict) 352 header.append( str(nvars) ) # Number of dependent variables 353 header.append( ','.join(['1']*nvars) ) # Scale factors for dependent variables 354 header.append( ','.join(['-9999']*nvars) ) # Missing data flags for dependent vars. 355 for kn in keydict.keys(): 356 header.append( kn + ', ' + keydict[kn]) # Dependent variable definitions 357 358 # Special comments 359 v = _get( metadata, 'SPECIAL_COMMENTS' ) 360 if v: 361 # Expect a string or array of several lines 362 header.append( str(len(list(v))) ) 363 header.extend( list(v) ) 364 else: 365 header.append( '0' ) 366 367 # Normal Comments 368 nc= [] 369 for kn in normal_comments: 370 v = _get( metadata, kn ) 371 if kn=='REVISION_COMMENTS': 372 # Expect a string or array of several lines 373 nc.extend( list(v) ) 374 else: 375 nc.append( '{:s}: {:s}'.format(kn,v) ) 376 # Variable short names 377 nc.append( ', '.join(ictvars)) 378 # Add normal comments to the header 379 header.append( str(len(nc)) ) 380 header.extend( nc ) 381 382 # Write the file 383 with open(filename,'w',encoding='ascii') as f: 384 f.write(f'{len(header)+1:d}, 1001\n') # +1 accounts for this line 385 for line in header: 386 f.write(line+'\n') 387 df[ictvars].to_csv( f, 388 index=False, 389 header=False, 390 na_rep='-9999', 391 **kwargs )
Write an ICARTT ffi1001 file
The contents of a pandas DataFrame (df
) are written to a text file in ICARTT format
using metadata
to specify which variables are written and provide ICARTT file header.
ICARTT file format specification document: https://www.earthdata.nasa.gov/esdis/esco/standards-and-practices/icartt-file-format
Parameters
- filename (str): File to be created
- df (pandas.DataFrame): Data values that will be written
- metadata (dict or obj):
See notes below for the attributes or keys that
metadata
must contain - **kwargs: passed to pandas.to_csv
Notes
metadata
can be a dict or any object, so long as it contains the following attributes or keys:
- independent_variable_definition (dict) should have only one key
- dependent_variable_definition (dict)
Controls which variables from
df
are written to file - measurement_start_date (pandas.Timestamp or datetime.datetime) date UTC when measurement collection began
- pi_name (str)
- pi_contact_info (str)
- organization_name (str)
- dm_contact_info (str)
- mission_name (str)
- project_info (str)
- special_comments (list of str)
- platform (str)
- location (str)
- associated_data (str)
- intrument_info (str)
- data_info (str)
- uncertainty (str)
- ulod_flag (str) commonly '-7777'
- ulod_value (str)
- llod_flag (str) commonly '-8888'
- llod_value (str)
- stipulations_on_use (str)
- other_comments (str)
- revision (str)
- revision_comments (list of str)
The independent_variable_defintion
and dependent_variable_definition
are dicts
with entries of the form {'VariableName':'units, standard name, [optional long name]'}
The keys must correspond to columns of df
.
independent_variable_definition
should have only one key while
dependent_variable_definition
can have many. For example,
metadata.INDEPENDENT_VARIABLE_DEFINITION =
{'Time_Start':'seconds, time at start of measurement, seconds since midnight UTC'}
See Examples below.
Examples
import pandas as pd
from acgc import icartt
df = pd.DataFrame( [[1,0,30],
[2,10,29],
[3,20,27],
[4,30,25]],
columns=['Time_Start','Alt','Temp'])
metadata = dict(
INDEPENDENT_VARIABLE_DEFINITION =
{'Time_Start':'seconds, time, measurement time in seconds after takeoff'},
DEPENDENT_VARIABLE_DEFINITION =
{'Alt':'m, altitude, altitude above ground level',
'Temp':'C, temperature, air temperature in Celsius'},
PI_NAME = 'Jane Doe',
ORGANIZATION_NAME = 'NASA',
SOURCE_DESCRIPTION = 'Invented Instrument',
MISSION_NAME = 'FIREX-AQ',
SPECIAL_COMMENTS = ['Special comments are optional and can be omitted.',
'If used, they should be a list of one or more strings'],
PI_CONTACT_INFO = 'jdoe@email.com or postal address',
PLATFORM = 'NASA DC-8',
LOCATION = 'Boise, ID, USA',
ASSOCIATED_DATA = 'N/A',
INSTRUMENT_INFO = 'N/A',
DATA_INFO = 'N/A',
UNCERTAINTY = r'10% uncertainty in all values',
ULOD_FLAG = '-7777',
ULOD_VALUE = 'N/A',
LLOD_FLAG = '-8888',
LLOD_VALUE = 'N/A',
DM_CONTACT_INFO = 'Alice, data manager, alice@email.com',
STIPULATIONS_ON_USE = 'FIREX-AQ Data Use Policy',
PROJECT_INFO = 'FIREX-AQ 2019, https://project.com',
OTHER_COMMENTS = 'One line of comments',
REVISION = 'R1',
REVISION_COMMENTS = ['R0: Initial data',
'R1: One string per revision'],
measurement_start_date = pd.Timestamp('2020-01-30 10:20')
)
icartt.write_icartt( 'test.ict', df, metadata )