#Copyright (c) 2013,Vienna University of Technology, Department of Geodesy and Geoinformation
#All rights reserved.
#Redistribution and use in source and binary forms, with or without
#modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the <organization> nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
#ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
#DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
#DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
#(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
#LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
#ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
#(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
Created on Jul 29, 2013
@author: Christoph Paulik christoph.paulik@geo.tuwien.ac.at
'''
import os
import numpy as np
import zipfile
import pandas as pd
import warnings
import pytesmo.grid.nearest_neighbor as NN
from pytesmo.timedate.julian import doy
import matplotlib.pyplot as plt
from datetime import datetime
[docs]class ASCATReaderException(Exception):
pass
[docs]class Ascat_data(object):
"""
Class that provides access to ASCAT data stored in userformat which is downloadable from
rs.geo.tuwien.ac.at after registration.
Attributes:
-----------
path : string
path to data folder which contains the zip files from the FTP server
grid_path : string
path to grid_info folder which contains txt files with information about
grid point index,latitude, longitude and cell
grid_info_filename : string
name of the grid info txt file in grid_path
grid_info_np_filename : string
name of the numpy save file to the grid information
topo_threshold : int
if topographic complexity of read grid point is above this
threshold a warning is output during reading
water_threshold : int
if water fraction of read grid point is above this
threshold a warning is output during reading
grid_info_loaded : boolean
true if the grid information has already been loaded
kdTree : NN.findGeoNN object
kdTree wrapper object used for finding the nearest neighbor
advisory_flags_path : string
path to advisory flags .dat files, if not provided they will not be used
include_advflags : boolean
True if advisory flags are available
Methods:
--------
unzip_cell(cell)
unzips zipped grid point files into subdirectory
find_nearest_gpi(lon,lat)
finds nearest grid point index given longitude and latitude
read_advisory_flags(gpi)
reads the advisory flags for a given grid point index
"""
def __init__(self,path,grid_path,grid_info_filename='TUW_W54_01_lonlat-ld-land.txt',\
advisory_flags_path=None,topo_threshold=50,water_threshold=50):
"""
sets the paths and thresholds
Parameters:
-----------
path : string
path to data folder which contains the zip files from the FTP server
grid_path : string
path to grid_info folder which contains txt files with information about
grid point index,latitude, longitude and cell
grid_info_filename : string, optional
name of the grid info txt file in grid_path
advisory_flags_path : string, optional
path to advisory flags .dat files, if not provided they will not be used
topo_threshold : int, optional
if topographic complexity of read grid point is above this
threshold a warning is output during reading
water_threshold : int, optional
if water fraction of read grid point is above this
threshold a warning is output during reading
"""
self.path = path
self.grid_path = grid_path
self.grid_info_filename = grid_info_filename
self.grid_info_np_filename = 'TUW_W54_01_lonlat-ld-land.npy'
self.topo_threshold = topo_threshold
self.water_threshold = water_threshold
self.grid_info_loaded = False
self.kdTree = None
self.advisory_flags_path = advisory_flags_path
if self.advisory_flags_path is None:
self.include_advflags = False
else:
self.include_advflags = True
self.adv_flags_struct = np.dtype([('gpi', np.int32),
('snow', np.uint8,366),
('frozen', np.uint8,366),
('water', np.uint8),
('topo',np.uint8)])
def _load_grid_info(self):
"""
Reads the grid info for all land points from the txt file provided by TU Wien.
The first time the actual txt file is parsed and saved as a numpy array to
speed up future data access.
"""
grid_info_np_filepath = os.path.join(self.grid_path,self.grid_info_np_filename)
if os.path.exists(grid_info_np_filepath):
grid_info = np.load(grid_info_np_filepath)
else:
grid_info_filepath = os.path.join(self.grid_path,self.grid_info_filename)
grid_info = np.loadtxt(grid_info_filepath,delimiter=',',skiprows=1)
np.save(os.path.join(self.grid_path,self.grid_info_np_filename),grid_info)
self.gpis = grid_info[:,0]
self.latitudes = grid_info[:,1]
self.longitudes = grid_info[:,2]
self.cells = grid_info[:,3].astype(np.int16)
self.kdTree = NN.findGeoNN(self.longitudes,self.latitudes)
self.grid_info_loaded = True
[docs] def unzip_cell(self,cell):
"""
unzips the downloaded .zip cell file into the directory of os.path.join(self.path,cell)
Parameters:
-----------
cell : int
cell number
"""
filepath = os.path.join(self.path,'%4d.zip'%cell)
unzip_file_path = os.path.join(self.path,'%4d'%cell)
if not os.path.exists(unzip_file_path):
os.mkdir(unzip_file_path)
zfile = zipfile.ZipFile(filepath)
for name in zfile.namelist():
(dirname, filename) = os.path.split(name)
fd = open(os.path.join(unzip_file_path,filename), "w")
fd.write(zfile.read(name))
fd.close()
def _datetime_arr(self,longdate):
"""
parsing function that takes a number of type long which contains
YYYYMMDDHH and returns a datetime object
Parameters:
-----------
longdate : long
Date including hour as number of type long in format YYYYMMDDHH
Returns:
--------
datetime : datetime
"""
string = str(longdate)
year = int(string[0:4])
month = int(string[4:6])
day = int(string[6:8])
hour = int(string[8:])
return datetime(year, month, day, hour)
[docs] def find_nearest_gpi(self,lon,lat):
"""
finds nearest gpi, builds kdTree if it does not yet exist
Parameters:
-----------
lon : float
longitude of point
lat : float
latitude of point
Returns:
--------
gpi : long
grid point index
"""
d,ind = self.kdTree.find_nearest_index(lon,lat)
return self.gpis[ind]
def _read_ts(self, *args,**kwargs):
"""
takes either 1 or 2 arguments and calls the correct function
which is either reading the gpi directly or finding
the nearest gpi from given lat,lon coordinates and then reading it
"""
if not self.grid_info_loaded: self._load_grid_info()
if len(args) == 1:
return self._read_gp(args[0],**kwargs)
if len(args) == 2:
return self._read_lonlat(args[0], args[1],**kwargs)
def _read_gp(self,gpi,**kwargs):
"""
reads the time series of the given grid point index. Masks frozen and snow observations
if keywords are present
Parameters:
-----------
gpi : long
grid point index
mask_frozen_prob : int,optional
if included in kwargs then all observations taken when
frozen probability > mask_frozen_prob are removed from the result
mask_snow_prob : int,optional
if included in kwargs then all observations taken when
snow probability > mask_snow_prob are removed from the result
Returns:
--------
df : pandas.DataFrame
containing all fields in the list self.include_in_df
plus frozen_prob and snow_prob if a path to advisory flags was set during
initialization
"""
index = np.where(gpi == self.gpis)[0]
cell = self.cells[index][0]
gp_file = os.path.join(self.path,'%4d'%cell,self.gp_filename_template%gpi)
if not os.path.exists(gp_file):
self.unzip_cell(cell)
data = np.fromfile(gp_file,dtype=self.gp_filestruct)
dates = data['DAT']
datetime_parser = np.vectorize(self._datetime_arr)
datetimes_correct = datetime_parser(dates)
dict_df={}
for into_df in self.include_in_df:
d = np.ma.asarray(data[into_df],dtype=self.datatype[into_df])
d = np.ma.masked_equal(d,self.nan_values[into_df])
if self.scale_factor.has_key(into_df):
d = d * self.scale_factor[into_df]
dict_df[into_df] = d
df = pd.DataFrame(dict_df,index=datetimes_correct)
if self.include_advflags:
adv_flags,topo,water = self.read_advisory_flags(gpi)
if topo >= self.topo_threshold:
warnings.warn("Warning gpi shows topographic complexity of %d %%. Data might not be usable."%topo)
if water >= self.water_threshold:
warnings.warn("Warning gpi shows water fraction of %d %%. Data might not be usable."%water)
df['doy'] = doy(df.index.month, df.index.day)
df = df.join(adv_flags,on='doy',how='left')
del df['doy']
if 'mask_frozen_prob' in kwargs:
mask_frozen = kwargs['mask_frozen_prob']
df = df[df['frozen_prob']<=mask_frozen]
if 'mask_snow_prob' in kwargs:
mask_snow = kwargs['mask_snow_prob']
df = df[df['snow_prob']<=mask_snow]
return df
def _read_lonlat(self,lon,lat,**kwargs):
return self._read_gp(self.find_nearest_gpi(lon, lat),**kwargs)
[docs] def read_advisory_flags(self,gpi):
"""
Read the advisory flags located in the self.advisory_flags_path
Advisory flags include frozen probability, snow cover probability
topographic complexity and water fraction.
Parameters:
-----------
gpi : long
grid point index
Returns:
--------
df : pandas.DataFrame
containing the columns frozen_prob and snow_prob. lenght 366 with one entry for
every day of the year, including February 29th
topo : numpy.uint8
topographic complexity ranging from 0-100
water : numpy.uint8
water fraction of pixel in percent
"""
if not self.include_advflags:
raise ASCATReaderException("Error: advisory_flags_path is not set")
if not self.grid_info_loaded: self._load_grid_info()
index = np.where(gpi == self.gpis)[0]
cell = self.cells[index][0]
adv_file = os.path.join(self.advisory_flags_path,'%d_advisory-flags.dat'%cell)
data = np.fromfile(adv_file,dtype=self.adv_flags_struct)
index = np.where(data['gpi'] == gpi)[0]
data = data[index]
snow = data['snow'][0]
snow[snow == 0] += 101
snow -= 101
df = pd.DataFrame({'snow_prob':snow,'frozen_prob':data['frozen'][0]})
return df,data['topo'][0],data['water'][0]
[docs]class Ascat_SSM(Ascat_data):
"""
class for reading ASCAT SSM data. It extends Ascat_data and provides the
information necessary for reading SSM data
Attributes:
-----------
gp_filename_template : string
defines how the gpi is put into the template string to make the filename
gp_filestruct : numpy.dtype
structure template of the SSM .dat file
scale_factor : dict
factor by which to multiply the raw data to get the correct values
for each field in the gp_filestruct
include_in_df : list
list of fields that should be returned to the user after reading
nan_values : dict
nan value saved in the file which will be replaced by numpy.nan values
during reading
datatype : dict
datatype of the fields that the return data should have
"""
def __init__(self,*args,**kwargs):
super(Ascat_SSM,self).__init__(*args,**kwargs)
self.gp_filename_template = 'TUW_ASCAT_SSM_W55_gp%d.dat'
self.gp_filestruct = np.dtype([('DAT', np.int32),
('SSM', np.uint8),
('ERR', np.uint8),
('SSF', np.uint8)])
self.scale_factor={'SSM':0.5,'ERR':0.5}
self.include_in_df=['SSM','ERR','SSF']
self.nan_values={'SSM':255,'ERR':255,'SSF':255}
self.datatype = {'SSM':np.float16,'ERR':np.float16,'SSF':np.int16}
[docs] def read_ssm(self,*args,**kwargs):
"""
function to read SSM takes either 1 or 2 arguments being.
It can be called as read_ssm(gpi,**kwargs) or read_ssm(lon,lat,**kwargs)
Parameters:
-----------
gpi : int
grid point index
lon : float
longitude of point
lat : float
latitude of point
mask_ssf : boolean, optional
if True only SSF values of 1 will be allowed, all others are removed
mask_frozen_prob : int,optional
if included in kwargs then all observations taken when
frozen probability > mask_frozen_prob are removed from the result
mask_snow_prob : int,optional
if included in kwargs then all observations taken when
snow probability > mask_snow_prob are removed from the result
Returns:
--------
df : pandas.DataFrame
containing all fields in self.include_in_df plus frozen_prob and snow_prob if
advisory_flags_path was set
"""
df = super(Ascat_SSM,self)._read_ts(*args,**kwargs)
if 'mask_ssf' in kwargs:
mask_ssf = kwargs['mask_ssf']
if mask_ssf:
return df[df['SSF']==1]
return df
[docs]class Ascat_SWI(Ascat_data):
"""
class for reading ASCAT SWI data. It extends Ascat_data and provides the
information necessary for reading SWI data
Attributes:
-----------
gp_filename_template : string
defines how the gpi is put into the template string to make the filename
gp_filestruct : numpy.dtype
structure template of the SSM .dat file
scale_factor : dict
factor by which to multiply the raw data to get the correct values
for each field in the gp_filestruct
include_in_df : list
list of fields that should be returned to the user after reading
nan_values : dict
nan value saved in the file which will be replaced by numpy.nan values
during reading
datatype : dict
datatype of the fields that the return data should have
T_SWI : dict
information about which numerical T-Value maps to which entry in the
datastructure
T_QFLAG : dict
information about which numerical T-Value maps to which entry in the
datastructure
"""
def __init__(self,*args,**kwargs):
super(Ascat_SWI,self).__init__(*args,**kwargs)
self.gp_filename_template = 'TUW_ASCAT_SWI_W55_gp%d.dat'
self.gp_filestruct = np.dtype([('DAT', np.int32),
('SWI_T=1', np.uint8),('SWI_T=5', np.uint8),('SWI_T=10', np.uint8),('SWI_T=15', np.uint8),
('SWI_T=20', np.uint8),('SWI_T=40', np.uint8),('SWI_T=60', np.uint8),('SWI_T=100', np.uint8),
('QFLAG_T=1', np.uint8),('QFLAG_T=5', np.uint8),('QFLAG_T=10', np.uint8),('QFLAG_T=15', np.uint8),
('QFLAG_T=20', np.uint8),('QFLAG_T=40', np.uint8),('QFLAG_T=60', np.uint8),('QFLAG_T=100', np.uint8)])
self.scale_factor={'SWI_T=1':0.5,'SWI_T=5':0.5,'SWI_T=10':0.5,'SWI_T=15':0.5,
'SWI_T=20':0.5,'SWI_T=40':0.5,'SWI_T=60':0.5,'SWI_T=100':0.5,
'QFLAG_T=1':0.5,'QFLAG_T=5':0.5,'QFLAG_T=10':0.5,'QFLAG_T=15':0.5,
'QFLAG_T=20':0.5,'QFLAG_T=40':0.5,'QFLAG_T=60':0.5,'QFLAG_T=100':0.5
}
self.include_in_df=['SWI_T=1','SWI_T=5','SWI_T=10','SWI_T=15','SWI_T=20','SWI_T=40','SWI_T=60','SWI_T=100',
'QFLAG_T=1','QFLAG_T=5','QFLAG_T=10','QFLAG_T=15','QFLAG_T=20','QFLAG_T=40','QFLAG_T=60','QFLAG_T=100']
self.nan_values={'SWI_T=1':255,'SWI_T=5':255,'SWI_T=10':255,'SWI_T=15':255,
'SWI_T=20':255,'SWI_T=40':255,'SWI_T=60':255,'SWI_T=100':255,
'QFLAG_T=1':255,'QFLAG_T=5':255,'QFLAG_T=10':255,'QFLAG_T=15':255,
'QFLAG_T=20':255,'QFLAG_T=40':255,'QFLAG_T=60':255,'QFLAG_T=100':255
}
self.datatype = {'SWI_T=1':np.float16,'SWI_T=5':np.float16,'SWI_T=10':np.float16,'SWI_T=15':np.float16,
'SWI_T=20':np.float16,'SWI_T=40':np.float16,'SWI_T=60':np.float16,'SWI_T=100':np.float16,
'QFLAG_T=1':np.float16,'QFLAG_T=5':np.float16,'QFLAG_T=10':np.float16,'QFLAG_T=15':np.float16,
'QFLAG_T=20':np.float16,'QFLAG_T=40':np.float16,'QFLAG_T=60':np.float16,'QFLAG_T=100':np.float16
}
self.T_SWI={1:'SWI_T=1',5:'SWI_T=5',10:'SWI_T=10',15:'SWI_T=15',
20:'SWI_T=20',40:'SWI_T=40',60:'SWI_T=60',100:'SWI_T=100'}
self.T_QFLAG={1:'QFLAG_T=1',5:'QFLAG_T=5',10:'QFLAG_T=10',15:'QFLAG_T=15',
20:'QFLAG_T=20',40:'QFLAG_T=40',60:'QFLAG_T=60',100:'QFLAG_T=100'}
[docs] def read_swi(self,*args,**kwargs):
"""
function to read SWI takes either 1 or 2 arguments being.
It can be called as read_swi(gpi,**kwargs) or read_swi(lon,lat,**kwargs)
Parameters:
-----------
gpi : int
grid point index
lon : float
longitude of point
lat : float
latitude of point
T : int, optional
if set only the SWI and QFLAG of this T-Value will be returned
mask_qf : int, optional
if set, SWI values with a QFLAG value lower than the mask_qf value will be masked.
This is done for each T value independently
mask_frozen_prob : int,optional
if included in kwargs then all observations taken when
frozen probability > mask_frozen_prob are removed from the result
mask_snow_prob : int,optional
if included in kwargs then all observations taken when
snow probability > mask_snow_prob are removed from the result
Returns:
--------
df : pandas.DataFrame
containing all fields in self.include_in_df plus frozen_prob and snow_prob if
advisory_flags_path was set. If T was set then only SWI and QFLAG values for the
selected T value are included plut frozen_prob and snow_prob if applicable
"""
df = super(Ascat_SWI,self)._read_ts(*args,**kwargs)
if 'T' in kwargs:
T = kwargs['T']
if self.T_SWI.has_key(T):
if self.include_advflags:
df = df [[self.T_SWI[T],self.T_QFLAG[T],'frozen_prob','snow_prob']]
else:
df = df [[self.T_SWI[T],self.T_QFLAG[T]]]
else:
raise ASCATReaderException("Invalid T value. Choose one of " + str(sorted(self.T_SWI.keys())))
#remove rows that have to small QFLAG
if 'mask_qf' in kwargs:
mask_qf = kwargs['mask_qf']
if mask_qf:
return df[df[self.T_QFLAG[T]]>=mask_qf]
else:
#mask each T value according to qf threshold
if 'mask_qf' in kwargs:
mask_qf = kwargs['mask_qf']
for key in self.T_SWI:
masked = df[self.T_QFLAG[key]]<=mask_qf
df[self.T_SWI[key]][masked] = np.NAN
df[self.T_QFLAG[key]][masked] = np.NAN
return df
if __name__ == '__main__':
start = datetime.now()
path = '/media/sf_D/small_projects/cpa_2013_07_userformat_reader/data/ASCAT_SSM_25km_ts_WARP5.5_R0.1/data/'
grid_path = '/media/sf_D/small_projects/cpa_2013_07_userformat_reader/data/auxiliary_data/grid_info/'
adv_path = '/media/sf_D/small_projects/cpa_2013_07_userformat_reader/data/auxiliary_data/advisory_flags/'
ssm = Ascat_SSM(path,grid_path,advisory_flags_path=adv_path)
#ssm.read_advisory_flags(2463927)
data = ssm.read_ssm(9.360,49.224)
print datetime.now() - start
data.plot()
plt.show()
start = datetime.now()
data = ssm.read_ssm(9.360,49.224,mask_ssf=True,mask_frozen_prob=10)
print datetime.now() - start
data.plot()
plt.show()
path = '/media/sf_D/small_projects/cpa_2013_07_userformat_reader/data/ASCAT_SWI_25km_ts_WARP5.5_R0.1/data/'
grid_path = '/media/sf_D/small_projects/cpa_2013_07_userformat_reader/data/auxiliary_data/grid_info/'
swi = Ascat_SWI(path,grid_path,advisory_flags_path=adv_path)
data = swi.read_swi(9.360,49.224,T=20,mask_qf=50,mask_frozen_prob=10)
data.plot()
plt.show()