diff options
author | Bradley M. Kuhn <bkuhn@ebb.org> | 2013-01-09 14:51:33 -0500 |
---|---|---|
committer | Bradley M. Kuhn <bkuhn@ebb.org> | 2013-02-18 14:08:45 -0500 |
commit | d13ab6a4026cfeec18fdd989862aecbe83caa20f (patch) | |
tree | 7fc9ad4ebeacd9fa6c28496973aa7e1f3d001dc6 /contrib/non-profit-audit-reports/csv2ods.py | |
parent | 18d2867a6315562b4f4588ebf4fc58adf1fb9acf (diff) | |
download | fork-ledger-d13ab6a4026cfeec18fdd989862aecbe83caa20f.tar.gz fork-ledger-d13ab6a4026cfeec18fdd989862aecbe83caa20f.tar.bz2 fork-ledger-d13ab6a4026cfeec18fdd989862aecbe83caa20f.zip |
Support for a list of known checksums of files already copied over.
For the times when we want to make shorter names of files by doing copies of
the documentation files for hyperlink usage, allow input of a new command
line option which is a list in the form of:
PATH_TO_FILE : sha25sum
so that those files can be used rather than new copies made.
Diffstat (limited to 'contrib/non-profit-audit-reports/csv2ods.py')
-rwxr-xr-x | contrib/non-profit-audit-reports/csv2ods.py | 79 |
1 files changed, 70 insertions, 9 deletions
diff --git a/contrib/non-profit-audit-reports/csv2ods.py b/contrib/non-profit-audit-reports/csv2ods.py index 3a3411ba..7dd840c8 100755 --- a/contrib/non-profit-audit-reports/csv2ods.py +++ b/contrib/non-profit-audit-reports/csv2ods.py @@ -25,14 +25,46 @@ import csv import ooolib2 import shutil import string +from Crypto.Hash import SHA256 def err(msg): print 'error: %s' % msg sys.exit(1) -def csv2ods(csvname, odsname, encoding='', singleFileDirectory=None, verbose = False): +def ReadChecksums(inputFile): + checksums = {} + with open(inputFile, "r") as inputFH: + entries = inputFH.readlines() + for ee in entries: + fileName, checksum = ee.split(":") + fileName = fileName.replace(' ', "") + checksum = checksum.replace(' ', "") + checksum = checksum.replace("\n", "") + checksums[checksum] = fileName + return checksums + +def ChecksumFile(filename): + sha256 = SHA256.new() + chunk_size = 8192 + with open(filename, 'rb') as myFile: + while True: + chunk = myFile.read(chunk_size) + if len(chunk) == 0: + break + sha256.update(chunk) + return sha256.hexdigest() + +def main(): + program = os.path.basename(sys.argv[0]) + + print get_file_checksum(sys.argv[1]) + +def csv2ods(csvname, odsname, encoding='', singleFileDirectory=None, knownChecksums={}, verbose = False): filesSavedinManifest = {} + if knownChecksums: + checksumCache = {} + if verbose: print 'converting from %s to %s' % (csvname, odsname) @@ -70,10 +102,25 @@ def csv2ods(csvname, odsname, encoding='', singleFileDirectory=None, verbose = F if (len(val) > 0 and val[0:5] == "link:"): val = val[5:] linkname = os.path.basename(val) # name is just the last component + newFile = None + if not singleFileDirectory: newFile = val - else: + + if knownChecksums: + if not checksumCache.has_key(val): + checksum = ChecksumFile(val) + checksumCache[val] = checksum + else: + checksum = checksumCache[val] + + if knownChecksums.has_key(checksum): + newFile = knownChecksums[checksum] + print "FOUND new file in known: " + newFile + + if not newFile: relativeFileWithPath = os.path.basename(val) + fileName, fileExtension = os.path.splitext(relativeFileWithPath) newFile = fileName[:15] # 15 is an arbitrary choice. newFile = newFile + fileExtension @@ -88,19 +135,24 @@ def csv2ods(csvname, odsname, encoding='', singleFileDirectory=None, verbose = F if not filesSavedinManifest.has_key(testFile): break testFile = None - if not testFile: - raise Exception("too many similar file names for linkage; giving up") - else: - newFile = testFile - if not os.path.exists(csvdir + '/' + val): - raise Exception("File" + csvdir + '/' + val + " does not exist in single file directory mode; giving up") + if not testFile: + raise Exception("too many similar file names for linkage; giving up") + else: + newFile = testFile + if not os.path.exists(csvdir + '/' + val): + raise Exception("File" + csvdir + '/' + val + " does not exist in single file directory mode; giving up") src = os.path.join(csvdir, val) dest = os.path.join(csvdir, singleFileDirectory, newFile) shutil.copyfile(src, dest) shutil.copystat(src, dest) shutil.copymode(src, dest) + newFile = os.path.join(singleFileDirectory, newFile) + if knownChecksums: + checksumCache[checksum] = newFile + knownChecksums[checksum] = newFile + linkrel = '../' + newFile # ../ means remove the name of the *.ods doc.set_cell_value(col + 1, row, 'link', (linkrel, linkname)) linkpath = csvdir + '/' + val @@ -149,6 +201,8 @@ def main(): help='unicode character encoding type') parser.add_option('-d', '--single-file-directory', action='store', help='directory name to move all files into') + parser.add_option('-s', '--known-checksum-list', action='store', + help='directory name to move all files into') (options, args) = parser.parse_args() if len(args) != 0: @@ -163,7 +217,14 @@ def main(): print 'csv:', options.csv print 'ods:', options.ods print 'ods:', options.encoding - csv2ods(options.csv, options.ods, options.encoding, options.single_file_directory, options.verbose) + if options.known_checksum_list and not options.single_file_directory: + err(program + ": --known-checksum-list option is completely useless without --single-file-directory") + knownChecksums = {} + if options.known_checksum_list: + if not os.access(options.known_checksum_list, os.R_OK): + err(program + ": unable to read file: " + options.known_checksum_list) + knownChecksums = ReadChecksums(options.known_checksum_list) + csv2ods(options.csv, options.ods, options.encoding, options.single_file_directory, knownChecksums, options.verbose) if __name__ == '__main__': main() |