Source code for simplesqlite.loader.spreadsheet.gsloader

# encoding: utf-8

"""
.. codeauthor:: Tsuyoshi Hombashi <gogogo.vm@gmail.com>
"""


from __future__ import absolute_import

import dataproperty

from ..error import InvalidDataError
from ..data import TableData
from .core import SpreadSheetLoader


[docs]class GoogleSheetsTableLoader(SpreadSheetLoader): """ Concrete class of Google Spreadsheet loader. Requirements: - `gspread <https://github.com/burnash/gspread>`_ - `oauth2client <https://pypi.python.org/pypi/oauth2client>`_ - `pyOpenSSL <https://pypi.python.org/pypi/pyOpenSSL>`_ """ @property def _sheet_name(self): return self._worksheet.title @property def _row_count(self): return self._worksheet.row_count @property def _col_count(self): return self._worksheet.col_count def __init__(self, file_path=None): super(GoogleSheetsTableLoader, self).__init__(file_path) self.title = None self.start_row = 0 self.__all_values = None
[docs] def make_table_name(self): """ |make_table_name| ================ =========================== format specifier value after the replacement ================ =========================== ``%(filename)s`` Filename of the workbook ``%(title)s`` Name of the spreadsheet ================ =========================== :return: Table name. :rtype: str """ self._validate_title() table_name = super( GoogleSheetsTableLoader, self).make_table_name() return table_name.replace("%(title)s", self.title)
[docs] def load(self): """ Load table data from a Google Spreadsheet. |load_desc| :return: |load_return| :py:meth:`~.GoogleSheetsTableLoader.make_table_name`. :rtype: iterator of |TableData| :raises InvalidDataError: If the header row is not found. """ import gspread from oauth2client.service_account import ServiceAccountCredentials self._validate_table_name() self._validate_title() scope = ['https://spreadsheets.google.com/feeds'] credentials = ServiceAccountCredentials.from_json_keyfile_name( self.source, scope) gc = gspread.authorize(credentials) for worksheet in gc.open(self.title).worksheets(): self._worksheet = worksheet self.__all_values = worksheet.get_all_values() if self._is_empty_sheet(): continue self.__strip_empty_col() value_matrix = self.__all_values[self._get_start_row_idx():] header_list = value_matrix[0] record_list = value_matrix[1:] yield TableData(self.make_table_name(), header_list, record_list)
def _is_empty_sheet(self): return len(self.__all_values) <= 1 def _get_start_row_idx(self): row_idx = 0 for row_value_list in self.__all_values: if all([ dataproperty.is_not_empty_string(value) for value in row_value_list ]): break row_idx += 1 return self.start_row + row_idx def _validate_title(self): if dataproperty.is_empty_string(self.title): raise ValueError("spreadsheet title is empty") def __strip_empty_col(self): col_idx = 0 t_value_matrix = zip(*self.__all_values) for col_value_list in t_value_matrix: if any([ dataproperty.is_not_empty_string(value) for value in col_value_list ]): break col_idx += 1 self.__all_values = zip(*t_value_matrix[col_idx:])