#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Table class with CSV I/O, easy access to columns, HTML output
"""
__author__ = "Philippe Guglielmetti"
__copyright__ = "Copyright 2012, Philippe Guglielmetti"
__credits__ = []
__license__ = "LGPL"
import csv, itertools, operator, string
import datetime
import logging
[docs]class Table(list):
"""Table class with CSV I/O, easy access to columns, HTML output"""
[docs] def __init__(self,filename=None,titles=[],init=[],**kwargs):
"""inits a table either from data or csv file"""
list.__init__(self, init)
self.titles=titles
if filename:
if titles: #were specified
kwargs['titles_line']=0
if filename[-4:].lower()=='.xls':
self.read_xls(filename,**kwargs)
else:
self.read_csv(filename,**kwargs)
[docs] def __repr__(self):
return 'Table(%s,%s)'%(self.titles,self[:5])
[docs] def __str__(self):
res=''
if self.titles:
res+=str(self.titles)+'\n'
for line in self:
res+=str(line)+'\n'
return res
[docs] def html(self,page,head=None,foot=None,colstyle=None,**kwargs):
"""output HTML on a markup.page"""
page.table(**kwargs) #if style is defined, it's applied to the table here
if not head:
head=self.titles
if head:
page.THEAD(head)
for row in self:
page.TR(row,style=colstyle)
if foot:
page.TFOOT(foot)
page.table.close()
[docs] def read_xls(self, filename, **kwargs):
"""appends an Excel table"""
titles_line=kwargs.get('titles_line',1)-1
data_line=kwargs.get('data_line',2)-1
from xlrd import open_workbook
wb = open_workbook(filename)
for s in wb.sheets():
for i in range(s.nrows):
line = []
for j in range(s.ncols):
x=s.cell(i,j).value
try:
xf=float(x)
xi=int(x)
x=xi if xi==xf else xf
except:
if x=='': x=None
line.append(x)
if i==titles_line:
self.titles=line
elif i>=data_line:
self.append(line)
[docs] def read_csv(self, filename, **kwargs):
"""appends a .csv or similar file to the table"""
titles_line=kwargs.get('titles_line',1)-1
data_line=kwargs.get('data_line',2)-1
dialect=kwargs.get('dialect','excel')
delimiter=kwargs.get('delimiter',';')
encoding=kwargs.get('encoding','iso-8859-15')
#reader = csv.reader(open(filename, 'rb'), dialect=dialect, delimiter=delimiter)
reader = open(filename,'rb')
for i,row in enumerate(reader):
row=row.replace('\x00', '') #avoid NULLS from .XLS saved as .CSV
row=row.rstrip('\r\n')
row=row.split(delimiter)
if encoding:
row=[x.decode(encoding) for x in row]
if i==titles_line: #titles can have no left/right spaces
self.titles=[x.lstrip().rstrip() for x in row]
elif i>=data_line:
line=[]
for x in row:
try:
xf=float(x)
xi=int(x)
x=xi if xi==xf else xf
except:
if x=='': x=None
line.append(x)
if line!=[None]: #strange last line ...
self.append(line)
[docs] def write_csv(self,filename, transpose=False, **kwargs):
""" write the table in Excel csv format, optionally transposed"""
dialect=kwargs.get('dialect','excel')
delimiter=kwargs.get('delimiter',';')
encoding=kwargs.get('encoding','iso-8859-15')
writer=csv.writer(open(filename, 'wb'), dialect=dialect, delimiter=delimiter)
if transpose:
i=0
while self.col(i)[0]:
line=['']
if self.titles: line=[self.titles[i]]
line.extend(self.col(i))
writer.writerow(line)
i+=1
else:
if self.titles: writer.writerow(self.titles)
for line in self:
writer.writerow(line)
[docs] def ncols(self):
"""return number of columns, ignoring title"""
return reduce(max,map(len,self))
[docs] def find_col(self,title):
"""finds a column from a part of the title"""
title=title.lower()
for i,c in enumerate(self.titles):
if c.lower().find(title)>=0:
return i
return None
def _i(self,by):
'''column index'''
if isinstance(by, basestring):
return self.titles.index(by)
return by
[docs] def icol(self,by):
'''iterates column'''
for row in self:
try:
yield row[self._i(by)]
except:
yield None
[docs] def col(self,by):
return [x for x in self.icol(by)]
[docs] def get(self,row,col):
col=self._i(col)
return self[row][col]
[docs] def set(self,row,col,value):
col=self._i(col)
if row>=len(self):
self.extend([list()]*(1+row-len(self)))
if col>=len(self[row]):
self[row].extend([None]*(1+col-len(self[row])))
self[row][col]=value
[docs] def setcol(self,by,val,i=0):
'''set column'''
j=self._i(by)
for v in val:
self.set(i,j,v)
i+=1
[docs] def addcol(self,title,val=None,i=0):
'''add column to the right'''
col=len(self.titles)
self.titles.append(title)
if not isinstance(val,list):
val=[val]*len(self)
for v in val:
self.set(i,col,v)
i+=1
[docs] def sort(self,by,reverse=False):
'''sort by column'''
i=self._i(by)
if isinstance(i, int):
list.sort(self,key=lambda x:x[i],reverse=reverse)
else:
list.sort(i,reverse=reverse)
[docs] def rowasdict(self,i):
''' returns a line as a dict '''
return dict(zip(self.titles,self[i]))
[docs] def groupby(self,by,sort=True,removecol=True):
'''dictionary of subtables grouped by a column'''
i=self._i(by)
t=self.titles
if removecol: t=t[:i]+t[i+1:]
res={}
if sort:
self.sort(i)
else:
pass #groupby will group CONSECUTIVE lines with same i, so entries at bottom of table will replace the earlier entries in dict
for k, g in itertools.groupby(self, key=lambda x:x[i]):
res[k]=Table(None,titles=t,init=[a[:i]+a[i+1:] if removecol else a for a in g])
return res
[docs] def hierarchy(self,by='Level',
factory=lambda row:(row,[]), #creates an object from a line
linkfct=lambda x,y,row:x[1].append(y) #creates a parend/child relation between x and y. raw is also available (for qty)
):
'''builds a structure from a table containing a "level" column'''
res=[]
i=self._i(by)
stack=[]
for row in self:
obj=factory(row)
level=row[i]
if level==1:
res.append(obj)
while level<=len(stack):
stack.pop()
if stack:
linkfct(stack[-1],obj,row)
stack.append(obj)
return res
[docs] def applyf(self,by,f,safe=True):
'''apply a function to a column'''
i=self._i(by)
for row in self:
if safe:
try:
row[i]=f(row[i])
except:
pass
# logging.debug('applyf could not process %s'%row[i]) #might cause another error
else: # will fail if any problem occurs
row[i]=f(row[i])
[docs] def to_datetime(self,by,fmt='%d.%m.%Y',safe=True):
'''convert a column to datetime'''
self.applyf(by,lambda x:x if isinstance(x,datetime.datetime) else datetime.datetime.strptime(str(x),fmt),safe)
[docs] def to_date(self,by,fmt='%d.%m.%Y',safe=True):
'''convert a column to date'''
self.to_datetime(by,fmt,safe)
self.applyf(by,lambda x:x.date(),safe)
[docs] def total(self,funcs):
"""builds a list by appling f functions to corresponding columns"""
funcs=funcs+[None]*(len(self.titles)-len(funcs))
res=[]
for i,f in enumerate(funcs):
try:
res.append(f(self.col(i)))
except:
res.append(f)
return res
[docs] def remove_lines_where(self,filter):
"""remove lines on a condition, returns the number of lines removed"""
res=0
if len(self)>0:
for line in reversed(self):
if filter(line):
self.remove(line)
res+=1
return res
if __name__ == '__main__':
t=Table(titles=['A','B','C'])
t.append([1,2.0,3.3])
t.append(['one','two','three'])
t.append([None,['a','b','c'],0])
print t
print t.html()