# Copyright (c) 2013,Vienna University of Technology, Department of Geodesy and Geoinformation
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the <organization> nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
Created on Jul 29, 2013
@author: Christoph Paulik christoph.paulik@geo.tuwien.ac.at
'''
import os
import numpy as np
import zipfile
import pandas as pd
import warnings
import netCDF4
from glob import glob
import pytesmo.grid.grids as grids
from pytesmo.timedate.julian import doy
from datetime import datetime
[docs]class ASCATReaderException(Exception):
pass
[docs]class ASCATTimeSeries(object):
"""
Container class for ASCAT time series
Parameters
----------
gpi : int
grid point index
lon : float
longitude of grid point
lat : float
latitude of grid point
cell : int
cell number of grid point
data : pandas.DataFrame
DataFrame which contains the data
topo_complex : int, optional
topographic complexity at the grid point
wetland_frac : int, optional
wetland fraction at the grid point
porosity_gldas : float, optional
porosity taken from GLDAS model
porosity_hwsd : float, optional
porosity calculated from Harmonised World Soil Database
Attributes
----------
gpi : int
grid point index
longitude : float
longitude of grid point
latitude : float
latitude of grid point
cell : int
cell number of grid point
data : pandas.DataFrame
DataFrame which contains the data
topo_complex : int
topographic complexity at the grid point
wetland_frac : int
wetland fraction at the grid point
porosity_gldas : float
porosity taken from GLDAS model
porosity_hwsd : float
porosity calculated from Harmonised World Soil Database
"""
def __init__(self, gpi, lon, lat, cell, data,
topo_complex=None, wetland_frac=None,
porosity_gldas=None, porosity_hwsd=None):
self.gpi = gpi
self.longitude = lon
self.latitude = lat
self.cell = cell
self.topo_complex = topo_complex
self.wetland_frac = wetland_frac
self.porosity_gldas = porosity_gldas
self.porosity_hwsd = porosity_hwsd
self.data = data
def __repr__(self):
return "ASCAT time series gpi:%d lat:%2.3f lon:%3.3f" % (self.gpi, self.latitude, self.longitude)
[docs] def plot(self, *args, **kwargs):
"""
wrapper for pandas.DataFrame.plot which adds title to plot
and drops NaN values for plotting
Returns
-------
ax : axes
matplotlib axes of the plot
Raises
------
ASCATReaderException
if data attribute is not a pandas.DataFrame
"""
if type(self.data) is pd.DataFrame:
tempdata = self.data.dropna(how='all')
ax = tempdata.plot(*args, figsize=(15, 5), **kwargs)
try:
ax.set_title(self.__repr__())
except AttributeError:
pass
return ax
else:
raise ASCATReaderException("data attribute is not a pandas.DataFrame")
[docs]class Ascat_data(object):
"""
Class that provides access to ASCAT data stored in userformat which is downloadable from
the TU Wien FTP Server after registration at http://rs.geo.tuwien.ac.at .
Parameters
----------
path : string
path to data folder which contains the zip files from the FTP server
grid_path : string
path to grid_info folder which contains txt files with information about
grid point index,latitude, longitude and cell
grid_info_filename : string, optional
name of the grid info txt file in grid_path
advisory_flags_path : string, optional
path to advisory flags .dat files, if not provided they will not be used
topo_threshold : int, optional
if topographic complexity of read grid point is above this
threshold a warning is output during reading
wetland_threshold : int, optional
if wetland fraction of read grid point is above this
threshold a warning is output during reading
Attributes
----------
path : string
path to data folder which contains the zip files from the FTP server
grid_path : string
path to grid_info folder which contains txt files with information about
grid point index,latitude, longitude and cell
grid_info_filename : string
name of the grid info txt file in grid_path
grid_info_np_filename : string
name of the numpy save file to the grid information
topo_threshold : int
if topographic complexity of read grid point is above this
threshold a warning is output during reading
wetland_threshold : int
if wetland fraction of read grid point is above this
threshold a warning is output during reading
grid_info_loaded : boolean
true if the grid information has already been loaded
grid : :class:`pytesmo.grid.grids.CellGrid` object
CellGrid object, which provides nearest neighbor search and other features
advisory_flags_path : string
path to advisory flags .dat files, if not provided they will not be used
include_advflags : boolean
True if advisory flags are available
Methods
-------
unzip_cell(cell)
unzips zipped grid point files into subdirectory
read_advisory_flags(gpi)
reads the advisory flags for a given grid point index
"""
def __init__(self, path, grid_path, grid_info_filename='TUW_W54_01_lonlat-ld-land.txt', \
advisory_flags_path=None, topo_threshold=50, wetland_threshold=50):
"""
sets the paths and thresholds
"""
self.path = path
self.grid_path = grid_path
self.grid_info_filename = grid_info_filename
self.grid_info_np_filename = 'TUW_W54_01_lonlat-ld-land.npy'
self.topo_threshold = topo_threshold
self.wetland_threshold = wetland_threshold
self.grid_info_loaded = False
self.grid = None
self.advisory_flags_path = advisory_flags_path
if self.advisory_flags_path is None:
self.include_advflags = False
else:
self.include_advflags = True
self.adv_flags_struct = np.dtype([('gpi', np.int32),
('snow', np.uint8, 366),
('frozen', np.uint8, 366),
('water', np.uint8),
('topo', np.uint8)])
def _load_grid_info(self):
"""
Reads the grid info for all land points from the txt file provided by TU Wien.
The first time the actual txt file is parsed and saved as a numpy array to
speed up future data access.
"""
grid_info_np_filepath = os.path.join(self.grid_path, self.grid_info_np_filename)
if os.path.exists(grid_info_np_filepath):
grid_info = np.load(grid_info_np_filepath)
else:
grid_info_filepath = os.path.join(self.grid_path, self.grid_info_filename)
grid_info = np.loadtxt(grid_info_filepath, delimiter=',', skiprows=1)
np.save(os.path.join(self.grid_path, self.grid_info_np_filename), grid_info)
self.grid = grids.CellGrid(grid_info[:, 2], grid_info[:, 1],
grid_info[:, 3].astype(np.int16), gpis=grid_info[:, 0])
self.grid_info_loaded = True
[docs] def unzip_cell(self, cell):
"""
unzips the downloaded .zip cell file into the directory of os.path.join(self.path,cell)
Parameters
----------
cell : int
cell number
"""
filepath = os.path.join(self.path, '%4d.zip' % cell)
unzip_file_path = os.path.join(self.path, '%4d' % cell)
if not os.path.exists(unzip_file_path):
os.mkdir(unzip_file_path)
zfile = zipfile.ZipFile(filepath)
for name in zfile.namelist():
(dirname, filename) = os.path.split(name)
fd = open(os.path.join(unzip_file_path, filename), "w")
fd.write(zfile.read(name))
fd.close()
zfile.close()
def _datetime_arr(self, longdate):
"""
parsing function that takes a number of type long which contains
YYYYMMDDHH and returns a datetime object
Parameters
----------
longdate : long
Date including hour as number of type long in format YYYYMMDDHH
Returns
-------
datetime : datetime
"""
string = str(longdate)
year = int(string[0:4])
month = int(string[4:6])
day = int(string[6:8])
hour = int(string[8:])
return datetime(year, month, day, hour)
def _read_ts(self, *args, **kwargs):
"""
takes either 1 or 2 arguments and calls the correct function
which is either reading the gpi directly or finding
the nearest gpi from given lat,lon coordinates and then reading it
"""
if not self.grid_info_loaded:
self._load_grid_info()
if len(args) == 1:
return self._read_gp(args[0], **kwargs)
if len(args) == 2:
return self._read_lonlat(args[0], args[1], **kwargs)
def _read_gp(self, gpi, **kwargs):
"""
reads the time series of the given grid point index. Masks frozen and snow observations
if keywords are present
Parameters
----------
gpi : long
grid point index
mask_frozen_prob : int,optional
if included in kwargs then all observations taken when
frozen probability > mask_frozen_prob are removed from the result
mask_snow_prob : int,optional
if included in kwargs then all observations taken when
snow probability > mask_snow_prob are removed from the result
Returns
-------
df : pandas.DataFrame
containing all fields in the list self.include_in_df
plus frozen_prob and snow_prob if a path to advisory flags was set during
initialization
"""
cell = self.grid.gpi2cell(gpi)
gp_file = os.path.join(self.path, '%4d' % cell, self.gp_filename_template % gpi)
if not os.path.exists(gp_file):
print 'first time reading from cell %4d unzipping ...' % cell
self.unzip_cell(cell)
data = np.fromfile(gp_file, dtype=self.gp_filestruct)
dates = data['DAT']
datetime_parser = np.vectorize(self._datetime_arr)
datetimes_correct = datetime_parser(dates)
dict_df = {}
for into_df in self.include_in_df:
d = np.ma.asarray(data[into_df], dtype=self.datatype[into_df])
d = np.ma.masked_equal(d, self.nan_values[into_df])
if into_df in self.scale_factor.keys():
d = d * self.scale_factor[into_df]
dict_df[into_df] = d
df = pd.DataFrame(dict_df, index=datetimes_correct)
if self.include_advflags:
adv_flags, topo, wetland = self.read_advisory_flags(gpi)
if topo >= self.topo_threshold:
warnings.warn("Warning gpi shows topographic complexity of %d %%. Data might not be usable." % topo)
if wetland >= self.wetland_threshold:
warnings.warn("Warning gpi shows wetland fraction of %d %%. Data might not be usable." % wetland)
df['doy'] = doy(df.index.month, df.index.day)
df = df.join(adv_flags, on='doy', how='left')
del df['doy']
if 'mask_frozen_prob' in kwargs:
mask_frozen = kwargs['mask_frozen_prob']
df = df[df['frozen_prob'] <= mask_frozen]
if 'mask_snow_prob' in kwargs:
mask_snow = kwargs['mask_snow_prob']
df = df[df['snow_prob'] <= mask_snow]
lon, lat = self.grid.gpi2lonlat(gpi)
return df, gpi, lon, lat, cell
def _read_lonlat(self, lon, lat, **kwargs):
return self._read_gp(self.grid.find_nearest_gpi(lon, lat), **kwargs)
[docs] def read_advisory_flags(self, gpi):
"""
Read the advisory flags located in the self.advisory_flags_path
Advisory flags include frozen probability, snow cover probability
topographic complexity and wetland fraction.
Parameters
----------
gpi : long
grid point index
Returns
-------
df : pandas.DataFrame
containing the columns frozen_prob and snow_prob. lenght 366 with one entry for
every day of the year, including February 29th
topo : numpy.uint8
topographic complexity ranging from 0-100
wetland : numpy.uint8
wetland fraction of pixel in percent
"""
if not self.include_advflags:
raise ASCATReaderException("Error: advisory_flags_path is not set")
if not self.grid_info_loaded:
self._load_grid_info()
cell = self.grid.gpi2cell(gpi)
adv_file = os.path.join(self.advisory_flags_path, '%d_advisory-flags.dat' % cell)
data = np.fromfile(adv_file, dtype=self.adv_flags_struct)
index = np.where(data['gpi'] == gpi)[0]
data = data[index]
snow = data['snow'][0]
snow[snow == 0] += 101
snow -= 101
df = pd.DataFrame({'snow_prob': snow, 'frozen_prob': data['frozen'][0]})
return df, data['topo'][0], data['water'][0]
[docs]class AscatNetcdf(object):
"""
Class that provides access to ASCAT data stored in netCDF format which is downloadable from
the HSAF website.
Parameters
----------
path : string
path to data folder which contains the zip files from the FTP server
grid_path : string
path to grid_info folder which contains a netcdf file with information about
grid point index,latitude, longitude and cell
grid_info_filename : string, optional
name of the grid info netCDF file in grid_path
default 'TUW_WARP5_grid_info_2_1.nc'
topo_threshold : int, optional
if topographic complexity of read grid point is above this
threshold a warning is output during reading
wetland_threshold : int, optional
if wetland fraction of read grid point is above this
threshold a warning is output during reading
netcdftemplate : string, optional
string template for the netCDF filename. This specifies where the cell number is
in the netCDF filename. Standard value is 'TUW_METOP_ASCAT_WARP55R12_%04d.nc' in
which %04d will be substituded for the cell number during reading of the data
loc_id : string, optional
name of the location id in the netCDF file
obs_var : string, optional
observation variable that provides the lookup between
observation number and the location id
topo_var : string, optional
name of topographic complexity variable in netCDF file
wetland_var : string, optional
name of wetland fraction variable in netCDF file
snow_var : string, optional
name of snow probability variable in netCDF file
frozen_var : string, optional
name of frozen probability variable in netCDF file
Attributes
----------
path : string
path to data folder which contains the zip files from the FTP server
grid_path : string
path to grid_info folder which contains txt files with information about
grid point index,latitude, longitude and cell
grid_info_filename : string, optional
name of the grid info netCDF file in grid_path
default 'TUW_WARP5_grid_info_2_1.nc'
topo_threshold : int
if topographic complexity of read grid point is above this
threshold a warning is output during reading
wetland_threshold : int
if wetland fraction of read grid point is above this
threshold a warning is output during reading
grid_info_loaded : boolean
true if the grid information has already been loaded
grid : grids.CellGrid object
CellGrid object, which provides nearest neighbor search and other features
advisory_flags_path : string
path to advisory flags .dat files, if not provided they will not be used
include_advflags : boolean
True if advisory flags are available
"""
def __init__(self, path, grid_path, grid_info_filename='TUW_WARP5_grid_info_2_1.nc',
topo_threshold=50, wetland_threshold=50, netcdftemplate='TUW_METOP_ASCAT_WARP55R12_%04d.nc',
loc_id='gpi', obs_var='row_size', topo_var='topo', wetland_var='wetland',
snow_var='snow', frozen_var='frozen'):
self.path = path
self.grid_path = grid_path
self.grid_info_filename = grid_info_filename
self.netcdftemplate = netcdftemplate
self.grid_info_loaded = False
self.topo_threshold = topo_threshold
self.wetland_threshold = wetland_threshold
self.loc_id = loc_id
self.obs_var = obs_var
self.topo_var = topo_var
self.wetland_var = wetland_var
self.snow_var = snow_var
self.frozen_var = frozen_var
def _load_grid_info(self):
"""
Reads the grid info for all land points from the netCDF file provided
by TU Wien
"""
grid_info_filepath = os.path.join(self.grid_path, self.grid_info_filename)
grid_info = netCDF4.Dataset(grid_info_filepath, 'r')
land = grid_info.variables['land_flag'][:]
valid_points = np.where(land == 1)[0]
# read whole grid information because this is faster than reading
# only the valid points
lon = grid_info.variables['lon'][:]
lat = grid_info.variables['lat'][:]
gpis = grid_info.variables['gpi'][:]
cells = grid_info.variables['cell'][:]
self.grid = grids.CellGrid(lon[valid_points], lat[valid_points], cells[valid_points], gpis=gpis[valid_points])
self.grid_info_loaded = True
grid_info.close()
def _read_ts(self, *args, **kwargs):
"""
takes either 1 or 2 arguments and calls the correct function
which is either reading the gpi directly or finding
the nearest gpi from given lat,lon coordinates and then reading it
"""
if not self.grid_info_loaded:
self._load_grid_info()
if len(args) == 1:
return self._read_gp(args[0], **kwargs)
if len(args) == 2:
return self._read_lonlat(args[0], args[1], **kwargs)
def _read_lonlat(self, lon, lat, **kwargs):
return self._read_gp(self.grid.find_nearest_gpi(lon, lat)[0], **kwargs)
def _read_gp(self, gpi, **kwargs):
"""
reads the time series of the given grid point index. Masks frozen and snow observations
if keywords are present
Parameters
----------
gpi : long
grid point index
mask_frozen_prob : int,optional
if included in kwargs then all observations taken when
frozen probability > mask_frozen_prob are removed from the result
mask_snow_prob : int,optional
if included in kwargs then all observations taken when
snow probability > mask_snow_prob are removed from the result
absolute_values : boolean, optional
if True soil porosities from HWSD and GLDAS will be used to
derive absolute values which will be available in the
pandas.DataFrame in the columns
'sm_por_gldas','sm_noise_por_gldas',
'sm_por_hwsd','sm_noise_por_hwsd'
Returns
-------
df : pandas.DataFrame
containing all fields in the list self.include_in_df
plus frozen_prob and snow_prob if a path to advisory flags was set during
initialization
gpi : long
grid point index
lon : float
longitude
lat : float
latitude
cell : int
cell number
topo : int
topographic complexity
wetland : int
wetland fraction
porosity : dict
porosity values for 'gldas' and 'hwsd'
"""
if not self.grid_info_loaded:
self._load_grid_info()
cell = self.grid.gpi2cell(gpi)
ncfile = netCDF4.Dataset(os.path.join(self.path, self.netcdftemplate % cell), 'r')
gpi_index = np.where(ncfile.variables[self.loc_id][:] == gpi)[0]
time_series_length = ncfile.variables[self.obs_var][gpi_index]
startindex = np.sum(ncfile.variables[self.obs_var][:gpi_index])
endindex = startindex + time_series_length
timestamps = netCDF4.num2date(ncfile.variables['time'][startindex:endindex],
ncfile.variables['time'].units)
dict_df = {}
for into_df in self.include_in_df:
d = ncfile.variables[into_df][startindex:endindex]
dict_df[into_df] = d
df = pd.DataFrame(dict_df, index=timestamps)
# read porosity values
porosity = {}
for por_source in ['gldas', 'hwsd']:
porosity[por_source] = ncfile.variables['por_%s' % por_source][gpi_index][0]
if 'absolute_values' in kwargs:
if kwargs['absolute_values']:
for por_source in ['gldas', 'hwsd']:
for el in self.to_absolute:
df['%s_por_%s' % (el, por_source)] = (df[el] / 100.0) * (porosity[por_source])
topo = ncfile.variables[self.topo_var][gpi_index][0]
wetland = ncfile.variables[self.wetland_var][gpi_index][0]
snow = np.squeeze(ncfile.variables[self.snow_var][gpi_index, :])
# if data is not valid assume no snow
if type(snow) == np.ma.masked_array:
warnings.warn('Snow probabilities not valid, assuming no snow')
snow = snow.filled(0)
frozen = np.squeeze(ncfile.variables[self.frozen_var][gpi_index, :])
# if data is not valid assume no freezing
if type(frozen) == np.ma.masked_array:
warnings.warn('Frozen probabilities not valid, assuming no freezing')
frozen = frozen.filled(0)
adv_flags = pd.DataFrame({'snow_prob': snow,
'frozen_prob': frozen})
if topo >= self.topo_threshold:
warnings.warn("Warning gpi shows topographic complexity of %d %%. Data might not be usable." % topo)
if wetland >= self.wetland_threshold:
warnings.warn("Warning gpi shows wetland fraction of %d %%. Data might not be usable." % wetland)
df['doy'] = doy(df.index.month, df.index.day)
df = df.join(adv_flags, on='doy', how='left')
del df['doy']
if 'mask_frozen_prob' in kwargs:
mask_frozen = kwargs['mask_frozen_prob']
df = df[df['frozen_prob'] <= mask_frozen]
if 'mask_snow_prob' in kwargs:
mask_snow = kwargs['mask_snow_prob']
df = df[df['snow_prob'] <= mask_snow]
lon, lat = self.grid.gpi2lonlat(gpi)
return df, gpi, lon, lat, cell, topo, wetland, porosity
[docs]class AscatH25_SSM(AscatNetcdf):
"""
class for reading ASCAT SSM data. It extends AscatNetcdf and provides the
information necessary for reading SSM data
Parameters
----------
path : string
path to data folder which contains the netCDF files from the FTP server
grid_path : string
path to grid_info folder which contains txt files with information about
grid point index,latitude, longitude and cell
grid_info_filename : string, optional
name of the grid info netCDF file in grid_path
default 'TUW_WARP5_grid_info_2_1.nc'
advisory_flags_path : string, optional
path to advisory flags .dat files, if not provided they will not be used
topo_threshold : int, optional
if topographic complexity of read grid point is above this
threshold a warning is output during reading
wetland_threshold : int, optional
if wetland fraction of read grid point is above this
threshold a warning is output during reading
include_in_df : list, optional
list of variables which should be included in the returned DataFrame.
Default is all variables
['sm', 'sm_noise', 'ssf', 'proc_flag', 'orbit_dir']
Attributes
----------
include_in_df : list
list of variables in the netcdf file
that should be returned to the user after reading
Methods
-------
read_ssm(*args,**kwargs)
read surface soil moisture
"""
def __init__(self, path, grid_path, grid_info_filename='TUW_WARP5_grid_info_2_1.nc',
topo_threshold=50, wetland_threshold=50,
include_in_df=['sm', 'sm_noise', 'ssf', 'proc_flag', 'orbit_dir']):
self.path = path
self._get_product_version()
version_kwargs_dict = {'WARP 5.5 Release 1.2':
{'netcdftemplate': 'TUW_METOP_ASCAT_WARP55R12_%04d.nc',
'loc_id': 'gpi',
'obs_var': 'row_size',
'topo_var': 'topo',
'wetland_var': 'wetland',
'snow_var': 'snow',
'frozen_var': 'frozen'},
'WARP 5.5 Release 2.1':
{'netcdftemplate': 'TUW_METOP_ASCAT_WARP55R21_%04d.nc',
'loc_id': 'location_id',
'obs_var': 'row_size',
'topo_var': 'advf_topo',
'wetland_var': 'advf_wetland',
'snow_var': 'advf_snow_prob',
'frozen_var': 'advf_frozen_prob'}
}
super(AscatH25_SSM, self).__init__(path, grid_path, grid_info_filename=grid_info_filename,
topo_threshold=topo_threshold, wetland_threshold=wetland_threshold,
**version_kwargs_dict[self.product_version])
self.include_in_df = include_in_df
self.to_absolute = ['sm', 'sm_noise']
def _get_product_version(self):
first_file = glob(os.path.join(self.path, '*.nc'))[0]
with netCDF4.Dataset(first_file) as dataset:
self.product_version = dataset.product_version
[docs] def read_ssm(self, *args, **kwargs):
"""
function to read SSM takes either 1 or 2 arguments.
It can be called as read_ssm(gpi,**kwargs) or read_ssm(lon,lat,**kwargs)
Parameters
----------
gpi : int
grid point index
lon : float
longitude of point
lat : float
latitude of point
mask_ssf : boolean, optional
default False, if True only SSF values of 1 will be allowed, all others are removed
mask_frozen_prob : int,optional
if included in kwargs then all observations taken when
frozen probability > mask_frozen_prob are removed from the result
mask_snow_prob : int,optional
if included in kwargs then all observations taken when
snow probability > mask_snow_prob are removed from the result
absolute_values : boolean, optional
if True soil porosities from HWSD and GLDAS will be used to
derive absolute values which will be available in the
pandas.DataFrame in the columns
'sm_por_gldas','sm_noise_por_gldas',
'sm_por_hwsd','sm_noise_por_hwsd'
Returns
-------
ASCATTimeSeries : object
:class:`pytesmo.io.sat.ascat.ASCATTimeSeries` instance
"""
df, gpi, lon, lat, cell, topo, wetland, porosity = super(AscatH25_SSM, self)._read_ts(*args, **kwargs)
if 'mask_ssf' in kwargs:
mask_ssf = kwargs['mask_ssf']
if mask_ssf:
df = df[df['ssf'] == 1]
return ASCATTimeSeries(gpi, lon, lat, cell, df,
topo_complex=topo, wetland_frac=wetland,
porosity_gldas=porosity['gldas'],
porosity_hwsd=porosity['hwsd'])
[docs]class Ascat_SSM(Ascat_data):
"""
class for reading ASCAT SSM data. It extends Ascat_data and provides the
information necessary for reading SSM data
Parameters
----------
path : string
path to data folder which contains the zip files from the FTP server
grid_path : string
path to grid_info folder which contains txt files with information about
grid point index,latitude, longitude and cell
grid_info_filename : string, optional
name of the grid info txt file in grid_path
advisory_flags_path : string, optional
path to advisory flags .dat files, if not provided they will not be used
topo_threshold : int, optional
if topographic complexity of read grid point is above this
threshold a warning is output during reading
wetland_threshold : int, optional
if wetland fraction of read grid point is above this
threshold a warning is output during reading
Attributes
----------
gp_filename_template : string
defines how the gpi is put into the template string to make the filename
gp_filestruct : numpy.dtype
structure template of the SSM .dat file
scale_factor : dict
factor by which to multiply the raw data to get the correct values
for each field in the gp_filestruct
include_in_df : list
list of fields that should be returned to the user after reading
nan_values : dict
nan value saved in the file which will be replaced by numpy.nan values
during reading
datatype : dict
datatype of the fields that the return data should have
Methods
-------
read_ssm(*args,**kwargs)
read surface soil moisture
"""
def __init__(self, *args, **kwargs):
super(Ascat_SSM, self).__init__(*args, **kwargs)
self.gp_filename_template = 'TUW_ASCAT_SSM_W55_gp%d.dat'
self.gp_filestruct = np.dtype([('DAT', np.int32),
('SSM', np.uint8),
('ERR', np.uint8),
('SSF', np.uint8)])
self.scale_factor = {'SSM': 0.5, 'ERR': 0.5}
self.include_in_df = ['SSM', 'ERR', 'SSF']
self.nan_values = {'SSM': 255, 'ERR': 255, 'SSF': 255}
self.datatype = {'SSM': np.float, 'ERR': np.float, 'SSF': np.int}
[docs] def read_ssm(self, *args, **kwargs):
"""
function to read SSM takes either 1 or 2 arguments.
It can be called as read_ssm(gpi,**kwargs) or read_ssm(lon,lat,**kwargs)
Parameters
----------
gpi : int
grid point index
lon : float
longitude of point
lat : float
latitude of point
mask_ssf : boolean, optional
default False, if True only SSF values of 1 will be allowed, all others are removed
mask_frozen_prob : int,optional
if included in kwargs then all observations taken when
frozen probability > mask_frozen_prob are removed from the result
mask_snow_prob : int,optional
if included in kwargs then all observations taken when
snow probability > mask_snow_prob are removed from the result
Returns
-------
ASCATTimeSeries : object
:class:`pytesmo.io.sat.ascat.ASCATTimeSeries` instance
"""
df, gpi, lon, lat, cell = super(Ascat_SSM, self)._read_ts(*args, **kwargs)
if 'mask_ssf' in kwargs:
mask_ssf = kwargs['mask_ssf']
if mask_ssf:
df = df[df['SSF'] == 1]
return ASCATTimeSeries(gpi, lon, lat, cell, df)
[docs]class Ascat_SWI(Ascat_data):
"""
class for reading ASCAT SWI data. It extends Ascat_data and provides the
information necessary for reading SWI data
Parameters
----------
path : string
path to data folder which contains the zip files from the FTP server
grid_path : string
path to grid_info folder which contains txt files with information about
grid point index,latitude, longitude and cell
grid_info_filename : string, optional
name of the grid info txt file in grid_path
advisory_flags_path : string, optional
path to advisory flags .dat files, if not provided they will not be used
topo_threshold : int, optional
if topographic complexity of read grid point is above this
threshold a warning is output during reading
wetland_threshold : int, optional
if wetland fraction of read grid point is above this
threshold a warning is output during reading
Attributes
----------
gp_filename_template : string
defines how the gpi is put into the template string to make the filename
gp_filestruct : numpy.dtype
structure template of the SSM .dat file
scale_factor : dict
factor by which to multiply the raw data to get the correct values
for each field in the gp_filestruct
include_in_df : list
list of fields that should be returned to the user after reading
nan_values : dict
nan value saved in the file which will be replaced by numpy.nan values
during reading
datatype : dict
datatype of the fields that the return data should have
T_SWI : dict
information about which numerical T-Value maps to which entry in the
datastructure
T_QFLAG : dict
information about which numerical T-Value maps to which entry in the
datastructure
Methods
-------
read_swi(*args,**kwargs)
read soil water index
"""
def __init__(self, *args, **kwargs):
super(Ascat_SWI, self).__init__(*args, **kwargs)
self.gp_filename_template = 'TUW_ASCAT_SWI_W55_gp%d.dat'
self.gp_filestruct = np.dtype([('DAT', np.int32),
('SWI_T=1', np.uint8), ('SWI_T=5', np.uint8), ('SWI_T=10', np.uint8), ('SWI_T=15', np.uint8),
('SWI_T=20', np.uint8), ('SWI_T=40', np.uint8), ('SWI_T=60', np.uint8), ('SWI_T=100', np.uint8),
('QFLAG_T=1', np.uint8), ('QFLAG_T=5', np.uint8), ('QFLAG_T=10', np.uint8), ('QFLAG_T=15', np.uint8),
('QFLAG_T=20', np.uint8), ('QFLAG_T=40', np.uint8), ('QFLAG_T=60', np.uint8), ('QFLAG_T=100', np.uint8)])
self.scale_factor = {'SWI_T=1': 0.5, 'SWI_T=5': 0.5, 'SWI_T=10': 0.5, 'SWI_T=15': 0.5,
'SWI_T=20': 0.5, 'SWI_T=40': 0.5, 'SWI_T=60': 0.5, 'SWI_T=100': 0.5,
'QFLAG_T=1': 0.5, 'QFLAG_T=5': 0.5, 'QFLAG_T=10': 0.5, 'QFLAG_T=15': 0.5,
'QFLAG_T=20': 0.5, 'QFLAG_T=40': 0.5, 'QFLAG_T=60': 0.5, 'QFLAG_T=100': 0.5
}
self.include_in_df = ['SWI_T=1', 'SWI_T=5', 'SWI_T=10', 'SWI_T=15', 'SWI_T=20', 'SWI_T=40', 'SWI_T=60', 'SWI_T=100',
'QFLAG_T=1', 'QFLAG_T=5', 'QFLAG_T=10', 'QFLAG_T=15', 'QFLAG_T=20', 'QFLAG_T=40', 'QFLAG_T=60', 'QFLAG_T=100']
self.nan_values = {'SWI_T=1': 255, 'SWI_T=5': 255, 'SWI_T=10': 255, 'SWI_T=15': 255,
'SWI_T=20': 255, 'SWI_T=40': 255, 'SWI_T=60': 255, 'SWI_T=100': 255,
'QFLAG_T=1': 255, 'QFLAG_T=5': 255, 'QFLAG_T=10': 255, 'QFLAG_T=15': 255,
'QFLAG_T=20': 255, 'QFLAG_T=40': 255, 'QFLAG_T=60': 255, 'QFLAG_T=100': 255
}
self.datatype = {'SWI_T=1': np.float, 'SWI_T=5': np.float, 'SWI_T=10': np.float, 'SWI_T=15': np.float,
'SWI_T=20': np.float, 'SWI_T=40': np.float, 'SWI_T=60': np.float, 'SWI_T=100': np.float,
'QFLAG_T=1': np.float, 'QFLAG_T=5': np.float, 'QFLAG_T=10': np.float, 'QFLAG_T=15': np.float,
'QFLAG_T=20': np.float, 'QFLAG_T=40': np.float, 'QFLAG_T=60': np.float, 'QFLAG_T=100': np.float
}
self.T_SWI = {1: 'SWI_T=1', 5: 'SWI_T=5', 10: 'SWI_T=10', 15: 'SWI_T=15',
20: 'SWI_T=20', 40: 'SWI_T=40', 60: 'SWI_T=60', 100: 'SWI_T=100'}
self.T_QFLAG = {1: 'QFLAG_T=1', 5: 'QFLAG_T=5', 10: 'QFLAG_T=10', 15: 'QFLAG_T=15',
20: 'QFLAG_T=20', 40: 'QFLAG_T=40', 60: 'QFLAG_T=60', 100: 'QFLAG_T=100'}
[docs] def read_swi(self, *args, **kwargs):
"""
function to read SWI takes either 1 or 2 arguments being.
It can be called as read_swi(gpi,**kwargs) or read_swi(lon,lat,**kwargs)
Parameters
----------
gpi : int
grid point index
lon : float
longitude of point
lat : float
latitude of point
T : int, optional
if set only the SWI and QFLAG of this T-Value will be returned
mask_qf : int, optional
if set, SWI values with a QFLAG value lower than the mask_qf value will be masked.
This is done for each T value independently
mask_frozen_prob : int,optional
if included in kwargs then all observations taken when
frozen probability > mask_frozen_prob are removed from the result
mask_snow_prob : int,optional
if included in kwargs then all observations taken when
snow probability > mask_snow_prob are removed from the result
Returns
-------
df : pandas.DataFrame
containing all fields in self.include_in_df plus frozen_prob and snow_prob if
advisory_flags_path was set. If T was set then only SWI and QFLAG values for the
selected T value are included plut frozen_prob and snow_prob if applicable
"""
df, gpi, lon, lat, cell = super(Ascat_SWI, self)._read_ts(*args, **kwargs)
if 'T' in kwargs:
T = kwargs['T']
if T in self.T_SWI.keys():
if self.include_advflags:
df = df[[self.T_SWI[T], self.T_QFLAG[T], 'frozen_prob', 'snow_prob']]
else:
df = df[[self.T_SWI[T], self.T_QFLAG[T]]]
else:
raise ASCATReaderException("Invalid T value. Choose one of " + str(sorted(self.T_SWI.keys())))
# remove rows that have to small QFLAG
if 'mask_qf' in kwargs:
mask_qf = kwargs['mask_qf']
if mask_qf:
df = df[df[self.T_QFLAG[T]] >= mask_qf]
else:
# mask each T value according to qf threshold
if 'mask_qf' in kwargs:
mask_qf = kwargs['mask_qf']
for key in self.T_SWI:
masked = df[self.T_QFLAG[key]] <= mask_qf
df[self.T_SWI[key]][masked] = np.NAN
df[self.T_QFLAG[key]][masked] = np.NAN
return ASCATTimeSeries(gpi, lon, lat, cell, df)