summaryrefslogtreecommitdiff
path: root/contrib/non-profit-audit-reports/csv2ods.py
diff options
context:
space:
mode:
authorBradley M. Kuhn <bkuhn@ebb.org>2013-01-09 14:51:33 -0500
committerBradley M. Kuhn <bkuhn@ebb.org>2013-02-18 14:08:45 -0500
commitd13ab6a4026cfeec18fdd989862aecbe83caa20f (patch)
tree7fc9ad4ebeacd9fa6c28496973aa7e1f3d001dc6 /contrib/non-profit-audit-reports/csv2ods.py
parent18d2867a6315562b4f4588ebf4fc58adf1fb9acf (diff)
downloadfork-ledger-d13ab6a4026cfeec18fdd989862aecbe83caa20f.tar.gz
fork-ledger-d13ab6a4026cfeec18fdd989862aecbe83caa20f.tar.bz2
fork-ledger-d13ab6a4026cfeec18fdd989862aecbe83caa20f.zip
Support for a list of known checksums of files already copied over.
For the times when we want to make shorter names of files by doing copies of the documentation files for hyperlink usage, allow input of a new command line option which is a list in the form of: PATH_TO_FILE : sha25sum so that those files can be used rather than new copies made.
Diffstat (limited to 'contrib/non-profit-audit-reports/csv2ods.py')
-rwxr-xr-xcontrib/non-profit-audit-reports/csv2ods.py79
1 files changed, 70 insertions, 9 deletions
diff --git a/contrib/non-profit-audit-reports/csv2ods.py b/contrib/non-profit-audit-reports/csv2ods.py
index 3a3411ba..7dd840c8 100755
--- a/contrib/non-profit-audit-reports/csv2ods.py
+++ b/contrib/non-profit-audit-reports/csv2ods.py
@@ -25,14 +25,46 @@ import csv
import ooolib2
import shutil
import string
+from Crypto.Hash import SHA256
def err(msg):
print 'error: %s' % msg
sys.exit(1)
-def csv2ods(csvname, odsname, encoding='', singleFileDirectory=None, verbose = False):
+def ReadChecksums(inputFile):
+ checksums = {}
+ with open(inputFile, "r") as inputFH:
+ entries = inputFH.readlines()
+ for ee in entries:
+ fileName, checksum = ee.split(":")
+ fileName = fileName.replace(' ', "")
+ checksum = checksum.replace(' ', "")
+ checksum = checksum.replace("\n", "")
+ checksums[checksum] = fileName
+ return checksums
+
+def ChecksumFile(filename):
+ sha256 = SHA256.new()
+ chunk_size = 8192
+ with open(filename, 'rb') as myFile:
+ while True:
+ chunk = myFile.read(chunk_size)
+ if len(chunk) == 0:
+ break
+ sha256.update(chunk)
+ return sha256.hexdigest()
+
+def main():
+ program = os.path.basename(sys.argv[0])
+
+ print get_file_checksum(sys.argv[1])
+
+def csv2ods(csvname, odsname, encoding='', singleFileDirectory=None, knownChecksums={}, verbose = False):
filesSavedinManifest = {}
+ if knownChecksums:
+ checksumCache = {}
+
if verbose:
print 'converting from %s to %s' % (csvname, odsname)
@@ -70,10 +102,25 @@ def csv2ods(csvname, odsname, encoding='', singleFileDirectory=None, verbose = F
if (len(val) > 0 and val[0:5] == "link:"):
val = val[5:]
linkname = os.path.basename(val) # name is just the last component
+ newFile = None
+
if not singleFileDirectory:
newFile = val
- else:
+
+ if knownChecksums:
+ if not checksumCache.has_key(val):
+ checksum = ChecksumFile(val)
+ checksumCache[val] = checksum
+ else:
+ checksum = checksumCache[val]
+
+ if knownChecksums.has_key(checksum):
+ newFile = knownChecksums[checksum]
+ print "FOUND new file in known: " + newFile
+
+ if not newFile:
relativeFileWithPath = os.path.basename(val)
+
fileName, fileExtension = os.path.splitext(relativeFileWithPath)
newFile = fileName[:15] # 15 is an arbitrary choice.
newFile = newFile + fileExtension
@@ -88,19 +135,24 @@ def csv2ods(csvname, odsname, encoding='', singleFileDirectory=None, verbose = F
if not filesSavedinManifest.has_key(testFile):
break
testFile = None
- if not testFile:
- raise Exception("too many similar file names for linkage; giving up")
- else:
- newFile = testFile
- if not os.path.exists(csvdir + '/' + val):
- raise Exception("File" + csvdir + '/' + val + " does not exist in single file directory mode; giving up")
+ if not testFile:
+ raise Exception("too many similar file names for linkage; giving up")
+ else:
+ newFile = testFile
+ if not os.path.exists(csvdir + '/' + val):
+ raise Exception("File" + csvdir + '/' + val + " does not exist in single file directory mode; giving up")
src = os.path.join(csvdir, val)
dest = os.path.join(csvdir, singleFileDirectory, newFile)
shutil.copyfile(src, dest)
shutil.copystat(src, dest)
shutil.copymode(src, dest)
+
newFile = os.path.join(singleFileDirectory, newFile)
+ if knownChecksums:
+ checksumCache[checksum] = newFile
+ knownChecksums[checksum] = newFile
+
linkrel = '../' + newFile # ../ means remove the name of the *.ods
doc.set_cell_value(col + 1, row, 'link', (linkrel, linkname))
linkpath = csvdir + '/' + val
@@ -149,6 +201,8 @@ def main():
help='unicode character encoding type')
parser.add_option('-d', '--single-file-directory', action='store',
help='directory name to move all files into')
+ parser.add_option('-s', '--known-checksum-list', action='store',
+ help='directory name to move all files into')
(options, args) = parser.parse_args()
if len(args) != 0:
@@ -163,7 +217,14 @@ def main():
print 'csv:', options.csv
print 'ods:', options.ods
print 'ods:', options.encoding
- csv2ods(options.csv, options.ods, options.encoding, options.single_file_directory, options.verbose)
+ if options.known_checksum_list and not options.single_file_directory:
+ err(program + ": --known-checksum-list option is completely useless without --single-file-directory")
+ knownChecksums = {}
+ if options.known_checksum_list:
+ if not os.access(options.known_checksum_list, os.R_OK):
+ err(program + ": unable to read file: " + options.known_checksum_list)
+ knownChecksums = ReadChecksums(options.known_checksum_list)
+ csv2ods(options.csv, options.ods, options.encoding, options.single_file_directory, knownChecksums, options.verbose)
if __name__ == '__main__':
main()