diff options
Diffstat (limited to 'contrib/non-profit-audit-reports/csv2ods.py')
-rwxr-xr-x | contrib/non-profit-audit-reports/csv2ods.py | 233 |
1 files changed, 233 insertions, 0 deletions
diff --git a/contrib/non-profit-audit-reports/csv2ods.py b/contrib/non-profit-audit-reports/csv2ods.py new file mode 100755 index 00000000..6aabcb59 --- /dev/null +++ b/contrib/non-profit-audit-reports/csv2ods.py @@ -0,0 +1,233 @@ +#!/usr/bin/python +# csv2ods.py +# Convert example csv file to ods +# +# Copyright (c) 2012 Tom Marble +# Copyright (c) 2012, 2013 Bradley M. Kuhn +# +# This program gives you software freedom; you can copy, modify, convey, +# and/or redistribute it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program in a file called 'GPLv3'. If not, write to the: +# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor +# Boston, MA 02110-1301, USA. + +import sys, os, os.path, optparse +import csv +import ooolib2 +import shutil +import string +from Crypto.Hash import SHA256 + +def err(msg): + print 'error: %s' % msg + sys.exit(1) + +def ReadChecksums(inputFile): + checksums = {} + with open(inputFile, "r") as inputFH: + entries = inputFH.readlines() + for ee in entries: + fileName, checksum = ee.split(":") + fileName = fileName.replace(' ', "") + checksum = checksum.replace(' ', "") + checksum = checksum.replace("\n", "") + checksums[checksum] = fileName + return checksums + +def ChecksumFile(filename): + sha256 = SHA256.new() + chunk_size = 8192 + with open(filename, 'rb') as myFile: + while True: + chunk = myFile.read(chunk_size) + if len(chunk) == 0: + break + sha256.update(chunk) + return sha256.hexdigest() + +def main(): + program = os.path.basename(sys.argv[0]) + + print get_file_checksum(sys.argv[1]) + +def csv2ods(csvname, odsname, encoding='', singleFileDirectory=None, knownChecksums={}, verbose = False): + filesSavedinManifest = {} + + if knownChecksums: + checksumCache = {} + + if verbose: + print 'converting from %s to %s' % (csvname, odsname) + + if singleFileDirectory: + if not os.path.isdir(os.path.join(os.getcwd(),singleFileDirectory)): + os.mkdir(singleFileDirectory) + + doc = ooolib2.Calc() + # add a pagebreak style + style = 'pagebreak' + style_pagebreak = doc.styles.get_next_style('row') + style_data = tuple([style, ('style:row-height', doc.styles.property_row_height)]) + doc.styles.style_config[style_data] = style_pagebreak + # add a currency style + style = 'currency' + style_currency = doc.styles.get_next_style('cell') + style_data = tuple([style]) + doc.styles.style_config[style_data] = style_currency + + row = 1 + csvdir = os.path.dirname(csvname) + if len(csvdir) == 0: + csvdir = '.' + csvfile = open(csvname, 'rb') + reader = csv.reader(csvfile, delimiter=',', quotechar='"') + for fields in reader: + if len(fields) > 0: + for col in range(len(fields)): + val = fields[col] + if encoding != '' and val[0:5] != "link:": # Only utf8 encode if it's not a filename + val = unicode(val, 'utf8') + if len(val) > 0 and val[0] == '$': + doc.set_cell_value(col + 1, row, 'currency', val[1:]) + else: + if (len(val) > 0 and val[0:5] == "link:"): + val = val[5:] + linkname = os.path.basename(val) # name is just the last component + newFile = None + + if not singleFileDirectory: + newFile = val + + if knownChecksums: + if not checksumCache.has_key(val): + checksum = ChecksumFile(val) + checksumCache[val] = checksum + else: + checksum = checksumCache[val] + + if knownChecksums.has_key(checksum): + newFile = knownChecksums[checksum] + print "FOUND new file in known: " + newFile + + if not newFile: + relativeFileWithPath = os.path.basename(val) + + fileName, fileExtension = os.path.splitext(relativeFileWithPath) + newFile = fileName[:15] # 15 is an arbitrary choice. + newFile = newFile + fileExtension + # We'll now test to see if we made this file + # before, and if it matched the same file we + # now want. If it doesn't, try to make a + # short file name for it. + if filesSavedinManifest.has_key(newFile) and filesSavedinManifest[newFile] != val: + testFile = None + for cc in list(string.letters) + list(string.digits): + testFile = cc + newFile + if not filesSavedinManifest.has_key(testFile): + break + testFile = None + if not testFile: + raise Exception("too many similar file names for linkage; giving up") + else: + newFile = testFile + if not os.path.exists(csvdir + '/' + val): + raise Exception("File" + csvdir + '/' + val + " does not exist in single file directory mode; giving up") + src = os.path.join(csvdir, val) + dest = os.path.join(csvdir, singleFileDirectory, newFile) + shutil.copyfile(src, dest) + shutil.copystat(src, dest) + shutil.copymode(src, dest) + + newFile = os.path.join(singleFileDirectory, newFile) + + if knownChecksums: + checksumCache[checksum] = newFile + knownChecksums[checksum] = newFile + + linkrel = '../' + newFile # ../ means remove the name of the *.ods + doc.set_cell_value(col + 1, row, 'link', (linkrel, linkname)) + linkpath = csvdir + '/' + val + + if not val in filesSavedinManifest: + filesSavedinManifest[newFile] = val + + if not os.path.exists(linkpath): + print "WARNING: link %s DOES NOT EXIST at %s" % (val, linkpath) + if verbose: + if os.path.exists(linkpath): + print 'relative link %s EXISTS at %s' % (val, linkpath) + else: + if val == "pagebreak": + doc.sheets[doc.sheet_index].set_sheet_config(('row', row), style_pagebreak) + else: + if val[0:6] == "title:": + doc.sheets[doc.sheet_index].set_name(val[6:]) + else: + doc.set_cell_value(col + 1, row, 'string', val) + else: + # enter an empty string for blank lines + doc.set_cell_value(1, row, 'string', '') + row += 1 + # save manifest file + if filesSavedinManifest.keys() != []: + manifestFH = open("MANIFEST", "a") + manifestFH.write("# Files from %s\n" % odsname) + for file in filesSavedinManifest.keys(): + manifestFH.write("%s\n" % file) + + manifestFH.close() + # Save spreadsheet file. + doc.save(odsname) + +def main(): + program = os.path.basename(sys.argv[0]) + version = '0.1' + parser = optparse.OptionParser(usage='%prog [--help] [--verbose]', + version='%prog ' + version) + parser.add_option('-v', '--verbose', action='store_true', + dest='verbose', + help='provide extra information while processing') + parser.add_option('-c', '--csv', action='store', + help='csv file to process') + parser.add_option('-o', '--ods', action='store', + help='ods output filename') + parser.add_option('-e', '--encoding', action='store', + help='unicode character encoding type') + parser.add_option('-d', '--single-file-directory', action='store', + help='directory name to move all files into') + parser.add_option('-s', '--known-checksum-list', action='store', + help='directory name to move all files into') + (options, args) = parser.parse_args() + + if len(args) != 0: + parser.error("not expecting extra args") + if not os.path.exists(options.csv): + err('csv does not exist: %s' % options.csv) + if not options.ods: + (root, ext) = os.path.splitext(options.csv) + options.ods = root + '.ods' + if options.verbose: + print '%s: verbose mode on' % program + print 'csv:', options.csv + print 'ods:', options.ods + print 'ods:', options.encoding + if options.known_checksum_list and not options.single_file_directory: + err(program + ": --known-checksum-list option is completely useless without --single-file-directory") + knownChecksums = {} + if options.known_checksum_list: + if not os.access(options.known_checksum_list, os.R_OK): + err(program + ": unable to read file: " + options.known_checksum_list) + knownChecksums = ReadChecksums(options.known_checksum_list) + csv2ods(options.csv, options.ods, options.encoding, options.single_file_directory, knownChecksums, options.verbose) + +if __name__ == '__main__': + main() |