contrib/non-profit-audit-reports/csv2ods.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241

#!/usr/bin/env python3
# csv2ods.py
# Convert example csv file to ods
#
# Copyright (c) 2012       Tom Marble
# Copyright (c) 2012, 2013 Bradley M. Kuhn
#
# This program gives you software freedom; you can copy, modify, convey,
# and/or redistribute it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program in a file called 'GPLv3'.  If not, write to the:
#    Free Software Foundation, Inc., 51 Franklin St, Fifth Floor
#                                    Boston, MA 02110-1301, USA.

import sys, os, os.path, optparse
import csv
import ooolib2
import shutil
import string
try:
    from Crypto.Hash import SHA256
except ModuleNotFoundError:
    print("Missing pycrypto")
    #sys.exit(-1)


def err(msg):
    print(f'error: {msg}')
    sys.exit(1)

def ReadChecksums(inputFile):
    checksums = {}
    with open(inputFile, "r") as inputFH:
        entries = inputFH.readlines()
    for ee in entries:
        fileName, checksum = ee.split(":")
        fileName = fileName.replace(' ', "")
        checksum = checksum.replace(' ', "")
        checksum = checksum.replace("\n", "")
        checksums[checksum] = fileName
    return checksums

def ChecksumFile(filename):
    sha256 = SHA256.new()
    chunk_size = 8192
    with open(filename, 'rb') as myFile:
        while True:
            chunk = myFile.read(chunk_size)
            if len(chunk) == 0:
                break
            sha256.update(chunk)
    return sha256.hexdigest()

def main():
    program = os.path.basename(sys.argv[0])

    print(get_file_checksum(sys.argv[1]))

def csv2ods(csvname, odsname, encoding='', singleFileDirectory=None, knownChecksums={}, verbose = False):
    filesSavedinManifest = {}

    if knownChecksums:
        checksumCache = {}

    if verbose:
        print(f'converting from {csvname} to {odsname}')

    if singleFileDirectory:
        if not os.path.isdir(os.path.join(os.getcwd(),singleFileDirectory)):
            os.mkdir(singleFileDirectory)

    doc = ooolib2.Calc()
    #  add a pagebreak style
    style = 'pagebreak'
    style_pagebreak = doc.styles.get_next_style('row')
    style_data = tuple([style, ('style:row-height', doc.styles.property_row_height)])
    doc.styles.style_config[style_data] = style_pagebreak
    #  add a currency style
    style = 'currency'
    style_currency = doc.styles.get_next_style('cell')
    style_data = tuple([style])
    doc.styles.style_config[style_data] = style_currency

    row = 1
    csvdir = os.path.dirname(csvname)
    if len(csvdir) == 0:
        csvdir = '.'
    csvfile = open(csvname, 'r')
    reader = csv.reader(csvfile, delimiter=',', quotechar='"')
    for fields in reader:
        if len(fields) > 0:
            for col in range(len(fields)):
                val = fields[col]
                if encoding != '' and val[0:5] != "link:":  # Only utf8 encode if it's not a filename
                    val = val.encode('utf-8')
                if len(val) > 0 and val[0] == '$':
                    doc.set_cell_value(col + 1, row, 'currency', val[1:])
                else:
                    if (len(val) > 0 and val[0:5] == "link:"):
                        val = val[5:]
                        linkname = os.path.basename(val) # name is just the last component
                        newFile = None

                        if not singleFileDirectory:
                            newFile = val

                        if knownChecksums:
                            if not checksumCache.has_key(val):
                                checksum = ChecksumFile(val)
                                checksumCache[val] = checksum
                            else:
                                checksum = checksumCache[val]

                            if knownChecksums.has_key(checksum):
                                newFile = knownChecksums[checksum]
                                print(f'FOUND new file in known: {newFile}')

                        if not newFile:
                            relativeFileWithPath = os.path.basename(val)

                            fileName, fileExtension = os.path.splitext(relativeFileWithPath)
                            newFile = fileName[:15]   # 15 is an arbitrary choice.
                            newFile = newFile + fileExtension
                            # We'll now test to see if we made this file
                            # before, and if it matched the same file we
                            # now want.  If it doesn't, try to make a
                            # short file name for it.
                            if filesSavedinManifest.has_key(newFile) and filesSavedinManifest[newFile] != val:
                                testFile = None
                                for cc in list(string.letters) + list(string.digits):
                                    testFile = cc + newFile
                                    if not filesSavedinManifest.has_key(testFile):
                                        break
                                    testFile = None
                                    if not testFile:
                                        raise Exception("too many similar file names for linkage; giving up")
                                    else:
                                        newFile = testFile
                                        if not os.path.exists(csvdir + '/' + val):
                                            raise Exception("File" + csvdir + '/' + val + " does not exist in single file directory mode; giving up")
                            src = os.path.join(csvdir, val)
                            dest = os.path.join(csvdir, singleFileDirectory, newFile)
                            shutil.copyfile(src, dest)
                            shutil.copystat(src, dest)
                            shutil.copymode(src, dest)

                            newFile = os.path.join(singleFileDirectory, newFile)

                        if knownChecksums:
                            checksumCache[checksum]   = newFile
                            knownChecksums[checksum]  = newFile

                        linkrel = '../' + newFile # ../ means remove the name of the *.ods
                        doc.set_cell_value(col + 1, row, 'link', (linkrel, linkname))
                        linkpath = csvdir + '/' + val

                        if not val in filesSavedinManifest:
                            filesSavedinManifest[newFile] = val

                        if not os.path.exists(linkpath):
                            print(f'WARNING: link {val} DOES NOT EXIST at {linkpath}')
                        if verbose:
                            if os.path.exists(linkpath):
                                print(f'relative link {val} EXISTS at {linkpath}')
                    else:
                        if val == "pagebreak":
                            doc.sheets[doc.sheet_index].set_sheet_config(('row', row), style_pagebreak)
                        else:
                            if val[0:6] == "title:":
                                doc.sheets[doc.sheet_index].set_name(val[6:])
                            else:
                                doc.set_cell_value(col + 1, row, 'string', val)
        else:
            # enter an empty string for blank lines
            doc.set_cell_value(1, row, 'string', '')
        row += 1
    # save manifest file 
    if filesSavedinManifest.keys() != []:
        manifestFH = open("MANIFEST", "a")
        manifestFH.write("# Files from %s\n" % odsname)
        for file in filesSavedinManifest.keys():
            manifestFH.write("%s\n" % file)
            
        manifestFH.close()
    # Save spreadsheet file.
    doc.save(odsname)

def main():
    program = os.path.basename(sys.argv[0])
    version = '0.1'
    parser = optparse.OptionParser(usage='%prog [--help] [--verbose]',
                                   version='%prog ' + version)
    parser.add_option('-v', '--verbose', action='store_true',
                      dest='verbose',
                      help='provide extra information while processing')
    parser.add_option('-c', '--csv', action='store', 
                      help='csv file to process')
    parser.add_option('-o', '--ods', action='store', 
                      help='ods output filename')
    parser.add_option('-e', '--encoding', action='store', 
                      help='unicode character encoding type')
    parser.add_option('-d', '--single-file-directory', action='store',
                      help='directory name to move all files into')
    parser.add_option('-s', '--known-checksum-list', action='store',
                      help='directory name to move all files into')
    (options, args) = parser.parse_args()

    if len(args) != 0:
        parser.error("not expecting extra args")  
    if not options.csv:
      parser.error('Missing required --csv option')
    if not os.path.exists(options.csv):
        err(f'csv does not exist: {options.csv}')
    if not options.ods:
        (root, ext) = os.path.splitext(options.csv)
        options.ods = root + '.ods'
    if options.verbose:
        print(f'''{program}: verbose mode on
        csv: {options.csv}
        ods: {options.ods}
        ods: {options.encoding}
        ''')
    if options.known_checksum_list and not options.single_file_directory:
        err(program + ": --known-checksum-list option is completely useless without --single-file-directory")
    knownChecksums = {}
    if options.known_checksum_list:
        if not os.access(options.known_checksum_list, os.R_OK):
            err(program + ": unable to read file: " + options.known_checksum_list)
        knownChecksums = ReadChecksums(options.known_checksum_list)
    csv2ods(options.csv, options.ods, options.encoding, options.single_file_directory, knownChecksums, options.verbose)

if __name__ == '__main__':
  main()