Source code for lcc.db_tier.connectors.file_manager

import glob
import os
import pyfits

from lcc.db_tier.base_query import LightCurvesDb
from lcc.entities.exceptions import InvalidFilesPath, InvalidFile
from lcc.entities.light_curve import LightCurve
from lcc.entities.star import Star
from lcc.utils.output_process_modules import loadFromFile
import numpy as np


# TODO: This class need to be upgraded
[docs]class FileManager(LightCurvesDb): '''This class is responsible for managing light curve files Attributes ----------- path : str Path key of folder of light curves . star_class : str Name of the loaded star-like type (e.g. Cepheids) suffix : str Suffix of light curve files in the folder. If suffix is "fits", files are loaded as fits files, otherwise files are considered as .dat files of light curve such as: #time mag err 12 13.45 0.38 files_limit : int, str Number of files which will be loaded db_ident : str Name of the database to which the file name will be assigned EXAMPLE: For the file "my_macho_star.dat" and given db_ident as "macho" makes Star object: star.ident["macho"] --> my_macho_star files_to_load : iterable of str List of file names which should be loaded from the given folder. If it is not specified all files will be loaded object_file_name : str Name of the pickle file which contains list of star objects ''' SUFFIXES = ["dat", "txt", "fits", "FITS"] DEFAULT_STARCLASS = "star" FITS_RA = "RA" FITS_DEC = "DEC" FITS_RA_UNIT = "RA_UN" FITS_DEC_UNIT = "DEC_UN" FITS_NAME = "IDENT" FITS_CLASS = "CLASS" DB_ORIGIN = "DB_ORIGIN" FITS_SUFFIX = ("fits", "FITS") def __init__(self, obtain_params): ''' Parameters ---------- obtain_params : dict Query dictionary (see class Attributes doc above) ''' if isinstance(obtain_params, list) and len(obtain_params) == 1: obtain_params = obtain_params[0] path = obtain_params.get("path", None) if not path: raise IOError("Path %s was not found" % path) if not hasattr(path, "__iter__"): path = [path] self.path = path self.star_class = obtain_params.get( "star_class", self.DEFAULT_STARCLASS) self.suffix = obtain_params.get("suffix", None) file_lim = obtain_params.get("files_limit") if file_lim: self.files_limit = int(file_lim) else: self.files_limit = None self.db_ident = obtain_params.get("db_ident") self.files_to_load = obtain_params.get("files_to_load") self.object_file_name = obtain_params.get("object_file_name")
[docs] def getStarsWithCurves(self): '''Common method for all stars provider If there are object_file_name in query dictionary, the object file of list of stars is loaded. In other case files from given path of the folder is loaded into star objects. Returns -------- list of `Star` objects Star objects with light curves ''' if self.object_file_name: return self._load_stars_object() else: paths = self.path stars = [] for path in paths: self.path = path stars += self._load_stars_from_folder() return stars
def _load_stars_from_folder(self): '''Load all files with a certain suffix as light curves''' # Check whether the path ends with "/" sign, if not add if not (self.path.endswith("/")): self.path = self.path + "/" # Get all light curve files (all files which end with certain suffix if not self.suffix: starsList = [] for suffix in self.SUFFIXES: starsList += glob.glob("%s*%s" % (self.path, suffix)) else: starsList = glob.glob("%s*%s" % (self.path, self.suffix)) numberOfFiles = len(starsList) if (numberOfFiles == 0): if self.suffix: raise InvalidFilesPath( "There are no stars in %s with %s suffix" % (self.path, self.suffix)) else: raise InvalidFilesPath( "There are no stars in %s with any of supported suffix: %s" % (self.path, self.SUFFIXES)) if (numberOfFiles < self.files_limit): self.files_limit = None else: numberOfFiles = self.files_limit if self.suffix in self.FITS_SUFFIX: return self._loadFromFITS(starsList, numberOfFiles) stars = self._loadDatFiles( [s for s in starsList if s.endswith("dat")], numberOfFiles) stars += self._loadFromFITS( [s for s in starsList if s.endswith("fits")], numberOfFiles) return stars def _loadDatFiles(self, star_paths, numberOfFiles): stars = [] counter = 1 # Load every light curve and put it into star object for singleFile in star_paths[:numberOfFiles]: if self.files_to_load and os.path.basename(singleFile) not in self.files_to_load: break lc = LightCurve(self._loadLcFromDat(singleFile)) # Check if light curve is not empty if (len(lc.mag) >= 1): db_ident = self.parseFileName(singleFile) if self.db_ident: ident = {self.db_ident: {"name": db_ident}} else: ident = {"file": {"name": db_ident}} star = Star(ident=ident) star.starClass = self.star_class star.putLightCurve(lc) stars.append(star) counter += 1 return stars @classmethod def _loadLcFromDat(self, file_name): ''' Load Light curve from dat file of light curve Parameters ----------- file_with_path : str Name of the light curve file with its path Returns -------- List of tuples of (time, mag, err) ''' BAD_VALUES = ("-99", "-99.0", "99", None, "N/A", np.NaN) TIME_COL = 0 # Order of columns in the light curve file MAG_COL = 1 ERR_COL = 2 ROUND_DIGITS = 3 try: dat = np.loadtxt(file_name, usecols=( TIME_COL, MAG_COL, ERR_COL), skiprows=0) except IndexError: dat = np.loadtxt(file_name, usecols=( TIME_COL, MAG_COL, ERR_COL), skiprows=2) except IOError, Argument: raise InvalidFilesPath( "\nCannot open light curve file\n %s" % Argument) mag, time, err = dat.T if not (len(mag) == len(time) == len(err)): raise InvalidFile( "Length of columns in light curve file is not the same") else: clean_dat = [] for x, y, z in zip(mag, time, err): if (x not in BAD_VALUES and y not in BAD_VALUES and z not in BAD_VALUES): clean_dat.append([round(x, ROUND_DIGITS), round(y, ROUND_DIGITS), round(z, ROUND_DIGITS)]) return clean_dat def _load_stars_object(self): '''Load object file of list of stars''' stars = loadFromFile(os.path.join(self.path, self.object_file_name)) if (len(stars) == 0): raise InvalidFile("There are no stars in object file") if (stars[0].__class__.__name__ != "Star"): raise InvalidFile("It is not list of stars") return stars @staticmethod
[docs] def parseFileName(file_path): '''Return cleaned name of the star without path and suffix''' end = None if file_path.rfind(".") != -1: end = file_path.rfind(".") return file_path[file_path.rfind("/") + 1:end]
def _loadFromFITS(self, star_paths, files_lim=None): stars = [] for path in star_paths: try: fits = pyfits.open(os.path.join(self.path, path)) except: raise InvalidFile("Invalid fits file or path: %s" % self.path) stars.append(self._createStarFromFITS(fits)) return stars @classmethod def _createStarFromFITS(self, fits): DB_NAME_END = "_name" DB_IDENT_SEP = "_id_" prim_hdu = fits[0].header ra = prim_hdu.get(self.FITS_RA) dec = prim_hdu.get(self.FITS_DEC) ra_unit = prim_hdu.get(self.FITS_RA_UNIT) dec_unit = prim_hdu.get(self.FITS_DEC_UNIT) star = Star(name=prim_hdu.get(self.FITS_NAME), coo=(ra, dec, (ra_unit, dec_unit)), starClass=prim_hdu.get(self.FITS_CLASS)) ident = {} more = {} for db_name_key in prim_hdu.keys(): if db_name_key.endswith(DB_NAME_END): db_name = db_name_key[:-len(DB_NAME_END)] ident[db_name] = {} ident[db_name]["name"] = prim_hdu[db_name_key] elif DB_IDENT_SEP in db_name_key: db_name, ident_key = db_name_key.split(DB_IDENT_SEP) if not ident[db_name].get("db_ident"): ident[db_name]["db_ident"] = {} ident[db_name]["db_ident"][ident_key] = prim_hdu[db_name_key] elif db_name_key not in ["SIMPLE", "BITPIX", "NAXIS", "EXTEND", self.FITS_RA, self.FITS_DEC, self.FITS_RA_UNIT, self.FITS_DEC_UNIT, self.FITS_NAME, self.FITS_CLASS]: more[db_name_key.lower()] = prim_hdu[db_name_key] star.ident = ident star.more = more for lc_hdu in fits[1:]: star.putLightCurve(self._createLcFromFits(lc_hdu)) fits.close() return star @classmethod def _createLcFromFits(self, fits): time = [] mag = [] err = [] for line in fits.data: try: t, m, e = line except: raise InvalidFile( "Light curve extension of fits couldn't be parsed\n%s" % line) time.append(t) mag.append(m) err.append(e) meta = {"xlabel": fits.header.get("TTYPE1", None), "xlabel_unit": fits.header.get("TUNIT1", None), "ylabel": fits.header.get("TTYPE2", None), "ylabel_unit": fits.header.get("TUNIT2", None), "color": fits.header.get("FILTER", None), "origin": fits.header.get(self.DB_ORIGIN, None) } return LightCurve([time, mag, err], meta) @classmethod
[docs] def writeToFITS(self, file_name, star, clobber=True): prim_hdu = pyfits.PrimaryHDU() prim_hdu.header["IDENT"] = star.name try: prim_hdu.header[self.FITS_RA] = star.coo.ra.degree prim_hdu.header[self.FITS_RA_UNIT] = "deg" prim_hdu.header[self.FITS_DEC] = star.coo.dec.degree prim_hdu.header[self.FITS_DEC_UNIT] = "deg" prim_hdu.header[self.FITS_CLASS] = star.starClass except AttributeError: pass for db, ident in star.ident.iteritems(): prim_hdu.header["HIERARCH " + db + "_name"] = ident["name"] identifiers = ident.get("db_ident") if not identifiers: identifiers = {} for key, value in identifiers.iteritems(): prim_hdu.header["HIERARCH " + db + "_id_" + key] = value for it, value in star.more.iteritems(): if len(it) > 8: it = "HIERARCH " + it prim_hdu.header[it] = value hdu_list = pyfits.HDUList(prim_hdu) for lc in star.light_curves: col1 = pyfits.Column(name=lc.meta.get("xlabel", "hjd"), unit=lc.meta.get("xlabel_unit", "days"), format='E', array=lc.time) col2 = pyfits.Column(name=(lc.meta.get("ylabel", "magnitude")), unit=lc.meta.get("ylabel_unit", "mag"), format='E', array=lc.mag) col3 = pyfits.Column(name="error", unit=lc.meta.get("ylabel_unit", "mag"), format='E', array=lc.err) # lc_hdu = pyfits.BinTableHDU.from_columns( cols ) lc_hdu = pyfits.new_table(pyfits.ColDefs([col1, col2, col3])) lc_hdu.header["FILTER"] = lc.meta.get("color", "") lc_hdu.header[ "HIERARCH " + self.DB_ORIGIN] = lc.meta.get("origin", "") hdu_list.append(lc_hdu) hdu_list.writeto( file_name, clobber=clobber)