Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

# Copyright 2011 Luc Saffre 

# License: BSD (see file COPYING for details) 

 

""" 

This is for writing fixtures that import data from an MS-Access  

database (:xfile:`.mdb`) into Lino. 

 

Usage examples see  

:mod:`lino.projects.pcsw.fixtures.pp2lino` 

and 

:mod:`lino.projects.crl.fixtures.hs2lino`. 

 

It uses `mdb-export` to extract data from the :xfile:`.mdb`  

file to :xfile:`.csv`, then reads these csv files.  

`mdb-export` was written by Brian Bruns and is part  

of the `mdbtools` Debian package. To install it:: 

 

  aptitude install mdbtools 

   

Usage of `mdbtools` command line:: 

 

  Usage: mdb-export [options] <file> <table> 

  where options are: 

    -H             supress header row 

    -Q             don't wrap text-like fields in quotes 

    -d <delimiter> specify a column delimiter 

    -R <delimiter> specify a row delimiter 

    -I             INSERT statements (instead of CSV) 

    -D <format>    set the date format (see strftime(3) for details) 

    -S             Sanitize names (replace spaces etc. with underscore) 

    -q <char>      Use <char> to wrap text-like fields. Default is ". 

    -X <char>      Use <char> to escape quoted characters within a field. Default is doubling. 

     

Thanks to http://farismadi.wordpress.com/2008/07/13/encoding-of-mdb-tool/  

for explanations on the environment variables used by `mdb-export`. 

 

The function :func:`check_output` in this module is a copy from Python 2.7  

which we include here to make it usable in Python 2.6 too. 

 

 

""" 

from __future__ import print_function 

from builtins import object 

 

import logging 

logger = logging.getLogger(__name__) 

 

 

# ~ ENCODING = 'latin1' # the encoding used by the mdb file 

ENCODING = 'utf8' 

#~ MDB_FILE = 'PPv5MasterCopie.mdb' 

MDBTOOLS_EXPORT = 'mdb-export' 

 

import os 

import sys 

#~ ENCODING = sys.stdout.encoding 

#~ import csv 

import codecs 

import datetime 

 

from django.conf import settings 

 

from lino.utils import ucsv 

from lino.utils import dblogger 

 

 

# ~ ENCODING = 'latin1' # the encoding used by the mdb file 

ENCODING = 'utf8' 

#~ MDB_FILE = 'PPv5MasterCopie.mdb' 

MDBTOOLS_EXPORT = 'mdb-export' 

 

 

try: 

    from subprocess import check_output 

except ImportError: 

    import subprocess 

 

    def check_output(*popenargs, **kwargs): 

        r"""Run command with arguments and return its output as a byte string. 

 

        If the exit code was non-zero it raises a CalledProcessError.  The 

        CalledProcessError object will have the return code in the returncode 

        attribute and output in the output attribute. 

 

        The arguments are the same as for the Popen constructor.  Example: 

 

        >>> check_output(["ls", "-l", "/dev/null"]) 

        'crw-rw-rw- 1 root root 1, 3 Oct 18  2007 /dev/null\n' 

 

        The stdout argument is not allowed as it is used internally. 

        To capture standard error in the result, use stderr=STDOUT. 

 

        >>> check_output(["/bin/sh", "-c", 

        ...               "ls -l non_existent_file ; exit 0"], 

        ...              stderr=STDOUT) 

        'ls: non_existent_file: No such file or directory\n' 

        """ 

        if 'stdout' in kwargs: 

            raise ValueError( 

                'stdout argument not allowed, it will be overridden.') 

        process = subprocess.Popen( 

            stdout=subprocess.PIPE, *popenargs, **kwargs) 

        output, unused_err = process.communicate() 

        retcode = process.poll() 

        if retcode: 

            cmd = kwargs.get("args") 

            if cmd is None: 

                cmd = popenargs[0] 

            raise subprocess.CalledProcessError(retcode, cmd, output=output) 

        return output 

 

 

class Loader(object): 

    mdb_file = None 

    table_name = None 

    model = None 

 

    def __iter__(self): 

        fn = self.table_name + ".csv" 

        if os.path.exists(fn): 

            logger.warning("Not re-extracting %s since it exists.", fn) 

        else: 

            args = [MDBTOOLS_EXPORT, '-D', "%Y-%m-%d %H:%M:%S", 

                    self.mdb_file, self.table_name] 

            s = check_output(args, executable=MDBTOOLS_EXPORT, 

                             env=dict( 

                                 MDB_ICONV='utf-8', 

                                 MDB_JET_CHARSET='utf-8')) 

            #~ print ENCODING 

 

            fd = open(fn, 'w') 

            fd.write(s) 

            fd.close() 

            logger.info("Extracted file %s", fn) 

        reader = ucsv.UnicodeReader(open(fn, 'r'), encoding=ENCODING) 

        headers = next(reader) 

        if not headers == self.headers: 

            raise Exception("%r != %r" % (headers, self.headers)) 

        n = 0 

        for values in reader: 

            row = {} 

            for i, h in enumerate(self.headers): 

                row[h] = values[i] 

            n += 1 

            if False: 

                if int(row['IDClient']) == 967: 

                    print(row) 

                    raise Exception("20110609") 

 

            if False: 

                if n < 10: 

                    print(n, ':', row) 

                else: 

                    raise Exception("20110609") 

            for obj in self.row2obj(row): 

                yield obj 

 

    def parsedate(self, s): 

        if not s: 

            return None 

        dt = s.split() 

        if len(dt) != 2: 

            raise Exception("Unexpected datetime string %r" % s) 

        d = dt[0] 

        #~ t = dt[1] 

        a = [int(i) for i in d.split('-')] 

        return datetime.date(year=a[0], month=a[1], day=a[2]) 

 

    def parsetime(self, s): 

        if not s: 

            return None 

        dt = s.split() 

        if len(dt) != 2: 

            raise Exception("Unexpected datetime string %r" % s) 

        t = dt[1] 

        return t[:5] 

        #~ a = [int(i) for i in t.split(':')] 

        #~ return datetime.time(hour=a[0],minute=a[1],second=a[2])