# Copyright (c) 2014, Vienna University of Technology (TU Wien), Department
# of Geodesy and Geoinformation (GEO).
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the Vienna University of Technology - Department of
# Geodesy and Geoinformation nor the names of its contributors may be used to
# endorse or promote products derived from this software without specific
# prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL VIENNA UNIVERSITY OF TECHNOLOGY,
# DEPARTMENT OF GEODESY AND GEOINFORMATION BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# Author: Thomas Mistelbauer Thomas.Mistelbauer@geo.tuwien.ac.at
# Creation date: 2014-06-13
"""
This module provides functions for loading from and writing to NetCDF4 files.
"""
import os.path
import numpy as np
from netCDF4 import Dataset, date2num, num2date
from pytesmo.grid.netcdf import save_grid
from poets.grid import grids
from poets.timedate.dateindex import get_dtindex
[docs]def save_image(image, timestamp, region, metadata, dest_file, start_date,
sp_res, nan_value=-99, shapefile=None, temp_res='dekad',
compression=False):
"""Saves numpy.ndarray images as multidimensional netCDF4 file.
Creates a datetimeindex over the whole period defined in the settings file
Parameters
----------
image : dict of numpy.ndarrays
Input image.
timestamp : datetime.datetime
Timestamp of image.
region : str, optional
Identifier of the region in the shapefile. If the default shapefile is
used, this would be the FIPS country code.
metadata : dict
NetCDF metadata from source file.
dest_file : str
Path to the output file.
start_date : datetime.datetime
First date of available data.
sp_res : int or float
Spatial resolution of the grid.
nan_value : int, optional
Not a number value for dataset, defaults to -99.
shapefile : str, optional
Path to shape file, uses "world country admin boundary shapefile" by
default.
temp_res : string or int, optional
Temporal resolution of the output NetCDF4 file, defaults to dekad.
compression : bool, optional
If True, ncfile compression is active.
"""
if region == 'global':
grid = grids.RegularGrid(sp_res)
else:
grid = grids.ShapeGrid(region, sp_res, shapefile)
dest_file = dest_file
if not os.path.isfile(dest_file):
save_grid(dest_file, grid)
dt = get_dtindex(temp_res, start_date)
with Dataset(dest_file, 'r+', format='NETCDF4') as ncfile:
if 'time' not in ncfile.dimensions.keys():
ncfile.createDimension("time", None)
if compression:
times = ncfile.createVariable('time', 'uint16', ('time',),
zlib=True, complevel=4)
else:
times = ncfile.createVariable('time', 'uint16', ('time',))
times.units = 'days since ' + str(start_date)
times.calendar = 'standard'
times[:] = date2num(dt.tolist(), units=times.units,
calendar=times.calendar)
else:
times = ncfile.variables['time']
dim = ('time', 'lat', 'lon')
numdate = date2num(timestamp, units=times.units,
calendar=times.calendar)
for key in image.keys():
if key not in ncfile.variables.keys():
if compression:
var = ncfile.createVariable(key, image[key].dtype.char,
dim, zlib=True, complevel=4,
fill_value=nan_value)
else:
var = ncfile.createVariable(key, image[key].dtype.char,
dim, fill_value=nan_value)
else:
var = ncfile.variables[key]
if numdate in times[:]:
var_index = np.where(times[:] == numdate)[0][0]
else:
times[times[:].size] = numdate
var_index = times[:].size - 1
var[var_index] = image[key]
if metadata is not None:
for item in metadata[key]:
if item in var.ncattrs():
continue
else:
var.setncattr(str(item), metadata[key][item])
[docs]def write_tmp_file(image, timestamp, region, metadata, dest_file, start_date,
sp_res, nan_value=-99, shapefile=None):
"""Saves numpy.ndarray images as multidimensional netCDF4 file.
Parameters
----------
image : dict of numpy.ndarrays
Input image.
timestamp : datetime.datetime
Timestamp of image.
region : str, optional
Identifier of the region in the shapefile. If the default shapefile is
used, this would be the FIPS country code.
metadata : dict
NetCDF metadata from source file.
dest_file : str
Path to the output file.
start_date : datetime.datetime
First date of available data.
sp_res : int or float
Spatial resolution of the grid.
nan_value : int, optional
Not a number value for dataset, defaults to -99.
shapefile : str, optional
Path to shape file, uses "world country admin boundary shapefile" by
default.
"""
if region == 'global':
grid = grids.RegularGrid(sp_res)
else:
grid = grids.ShapeGrid(region, sp_res, shapefile)
if not os.path.isfile(dest_file):
save_grid(dest_file, grid)
with Dataset(dest_file, 'r+', format='NETCDF4') as ncfile:
if 'time' not in ncfile.dimensions.keys():
ncfile.createDimension("time", None)
times = ncfile.createVariable('time', 'uint16', ('time',))
times.units = 'days since ' + str(start_date)
times.calendar = 'standard'
else:
times = ncfile.variables['time']
numdate = date2num(timestamp, units=times.units,
calendar=times.calendar)
dim = ('time', 'lat', 'lon')
for key in image.keys():
if key not in ncfile.variables.keys():
var = ncfile.createVariable(key, image[key].dtype.char, dim,
fill_value=nan_value)
else:
var = ncfile.variables[key]
if times.shape[0] == 0:
times[0] = numdate
var_index = 0
else:
if numdate in times[:]:
var_index = np.where(times[:] == numdate)[0][0]
else:
times[times[:].size] = numdate
var_index = times[:].size - 1
var[var_index] = image[key]
if metadata is not None:
for item in metadata[key]:
if item in var.ncattrs():
continue
else:
var.setncattr(str(item), metadata[key][item])
[docs]def read_image(source_file, variables=None):
"""Reads data out of netCDF file and returns it as numpy.ndarray
Parameters
----------
source_file : str
Path to source file.
variables : list of str, optional
Variables to read from file, reads all variables if not set
Returns
-------
data : dict of numpy.arrays
Source file.
lon : numpy.array
Longitudes of the source file.
lat : numpy.array
Latitudes of the source file.
timestamp : datetime.date
Timestamp of image.
metadata : dict of strings
Metadata from source netCDF file.
"""
with Dataset(source_file, 'r', format='NETCDF4') as nc:
if 'time' in nc.variables.keys():
times = nc.variables['time']
timestamp = num2date(times[:], units=times.units)[0]
else:
timestamp = None
lon = np.copy(nc.variables['lon'])
lat = np.copy(nc.variables['lat'])
ncvars = nc.variables.keys()
if 'gpi' in ncvars:
ncvars.remove('gpi')
if 'lat' in ncvars:
ncvars.remove('lat')
if 'lon' in ncvars:
ncvars.remove('lon')
if 'time' in ncvars:
ncvars.remove('time')
data = {}
metadata = {}
for var in ncvars:
if ((variables is not None and var in variables)
or variables is None):
dat = nc.variables[var][:][0]
data[var] = dat[:]
metadata[str(var)] = {}
for attr in nc.variables[var].ncattrs():
if attr[0] != '_' and attr != 'scale_factor':
metadata[var][attr] = nc.variables[var].getncattr(attr)
return data, lon, lat, timestamp, metadata
[docs]def clip_bbox(data, lon, lat, lon_min, lat_min, lon_max, lat_max):
"""Clips bounding box out of numpy.array and returns data as numpy.ndarray
Parameters
----------
data : dict of numpy.arrays
Source file.
lon : numpy.array
Longitudes of source file
lat : numpy.array
Latitudes of source file
lon_min : float
Min longitude of bounding box.
lat_min : float
Min latitude of bounding box.
lon_max : float
Max longitude of bounding box.
lat_max : float
Max latitude of bounding box.
Returns
-------
data_new : dict of numpy.arrays
Clipped image.
lon_new : numpy.array
Longitudes of the clipped image.
lat_new : numpy.array
Latitudes of the clipped image.
"""
lons = np.where((lon >= lon_min) & (lon <= lon_max))[0]
lats = np.where((lat >= lat_min) & (lat <= lat_max))[0]
lon_new = lon[lons.min():lons.max() + 1]
lat_new = lat[lats.min():lats.max() + 1]
data_new = {}
for var in data.keys():
data_new[var] = data[var][lats.min():lats.max() + 1,
lons.min():lons.max() + 1]
return data_new, lon_new, lat_new
[docs]def read_variable(source_file, variable, date, date_to=None):
"""Gets images from a netCDF file.
Reads the image for a specific date. If date_to is given, it will return
multiple images in a multidimensional numpy.ndarray
Parameters
----------
source_file : str
Path to source file.
variable : str
Requested variable of image.
date : datetime.datetime
Date of the image, start date of data cube if date_to is set.
date_to : datetime.date, optional
End date of data cube to slice from NetCDF file.
Returns
-------
image : numpy.ndarray
Image for a specific date.
lon : numpy.array
Longitudes of the image.
lat : numpy.array
Latgitudes of the image.
metadata : dict of strings
Metadata from source netCDF file.
"""
with Dataset(source_file, 'r', format='NETCDF4') as nc:
times = nc.variables['time']
lon = nc.variables['lon'][:]
lat = nc.variables['lat'][:]
var = nc.variables[variable]
metadata = {}
for attr in var.ncattrs():
if attr[0] != '_' and attr != 'scale_factor':
metadata[attr] = var.getncattr(attr)
numdate = date2num(date, units=times.units, calendar=times.calendar)
if date_to is None:
image = var[np.where(times[:] == numdate)[0][0]]
else:
numdate_to = date2num(date_to, units=times.units,
calendar=times.calendar)
subset = np.where((times[:] >= numdate) & (times[:] <= numdate_to))
image = var[subset]
return image, lon, lat, metadata
[docs]def get_properties(src_file):
"""Gets variables, dimensions and time period from a netCDF file.
Parameters
----------
src_file : str
Path to NetCDF file.
Returns
-------
variables : list of str
List of variables.
dimensions : list of str
Dimensions of the NetCDF file.
period : list of datetime.datetime
Date of first and last image in source file.
"""
with Dataset(src_file, 'r+', format='NETCDF4') as nc:
variables = nc.variables.keys()
dimensions = nc.dimensions.keys()
time = nc.variables['time']
period = [num2date(time[:].min(), units=time.units,
calendar=time.calendar),
num2date(time[:].max(), units=time.units,
calendar=time.calendar)]
for dim in dimensions:
variables.remove(dim)
if 'gpi' in variables:
variables.remove('gpi')
return variables, dimensions, period