1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
|
#!/usr/bin/python
# csv2ods.py
# Convert example csv file to ods
#
# Copyright (c) 2012 Tom Marble
# Copyright (c) 2012, 2013 Bradley M. Kuhn
#
# This program gives you software freedom; you can copy, modify, convey,
# and/or redistribute it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program in a file called 'GPLv3'. If not, write to the:
# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor
# Boston, MA 02110-1301, USA.
import sys, os, os.path, optparse
import csv
import ooolib2
import shutil
import string
from Crypto.Hash import SHA256
def err(msg):
print 'error: %s' % msg
sys.exit(1)
def ReadChecksums(inputFile):
checksums = {}
with open(inputFile, "r") as inputFH:
entries = inputFH.readlines()
for ee in entries:
fileName, checksum = ee.split(":")
fileName = fileName.replace(' ', "")
checksum = checksum.replace(' ', "")
checksum = checksum.replace("\n", "")
checksums[checksum] = fileName
return checksums
def ChecksumFile(filename):
sha256 = SHA256.new()
chunk_size = 8192
with open(filename, 'rb') as myFile:
while True:
chunk = myFile.read(chunk_size)
if len(chunk) == 0:
break
sha256.update(chunk)
return sha256.hexdigest()
def main():
program = os.path.basename(sys.argv[0])
print get_file_checksum(sys.argv[1])
def csv2ods(csvname, odsname, encoding='', singleFileDirectory=None, knownChecksums={}, verbose = False):
filesSavedinManifest = {}
if knownChecksums:
checksumCache = {}
if verbose:
print 'converting from %s to %s' % (csvname, odsname)
if singleFileDirectory:
if not os.path.isdir(os.path.join(os.getcwd(),singleFileDirectory)):
os.mkdir(singleFileDirectory)
doc = ooolib2.Calc()
# add a pagebreak style
style = 'pagebreak'
style_pagebreak = doc.styles.get_next_style('row')
style_data = tuple([style, ('style:row-height', doc.styles.property_row_height)])
doc.styles.style_config[style_data] = style_pagebreak
# add a currency style
style = 'currency'
style_currency = doc.styles.get_next_style('cell')
style_data = tuple([style])
doc.styles.style_config[style_data] = style_currency
row = 1
csvdir = os.path.dirname(csvname)
if len(csvdir) == 0:
csvdir = '.'
csvfile = open(csvname, 'rb')
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
for fields in reader:
if len(fields) > 0:
for col in range(len(fields)):
val = fields[col]
if encoding != '' and val[0:5] != "link:": # Only utf8 encode if it's not a filename
val = unicode(val, 'utf8')
if len(val) > 0 and val[0] == '$':
doc.set_cell_value(col + 1, row, 'currency', val[1:])
else:
if (len(val) > 0 and val[0:5] == "link:"):
val = val[5:]
linkname = os.path.basename(val) # name is just the last component
newFile = None
if not singleFileDirectory:
newFile = val
if knownChecksums:
if not checksumCache.has_key(val):
checksum = ChecksumFile(val)
checksumCache[val] = checksum
else:
checksum = checksumCache[val]
if knownChecksums.has_key(checksum):
newFile = knownChecksums[checksum]
print "FOUND new file in known: " + newFile
if not newFile:
relativeFileWithPath = os.path.basename(val)
fileName, fileExtension = os.path.splitext(relativeFileWithPath)
newFile = fileName[:15] # 15 is an arbitrary choice.
newFile = newFile + fileExtension
# We'll now test to see if we made this file
# before, and if it matched the same file we
# now want. If it doesn't, try to make a
# short file name for it.
if filesSavedinManifest.has_key(newFile) and filesSavedinManifest[newFile] != val:
testFile = None
for cc in list(string.letters) + list(string.digits):
testFile = cc + newFile
if not filesSavedinManifest.has_key(testFile):
break
testFile = None
if not testFile:
raise Exception("too many similar file names for linkage; giving up")
else:
newFile = testFile
if not os.path.exists(csvdir + '/' + val):
raise Exception("File" + csvdir + '/' + val + " does not exist in single file directory mode; giving up")
src = os.path.join(csvdir, val)
dest = os.path.join(csvdir, singleFileDirectory, newFile)
shutil.copyfile(src, dest)
shutil.copystat(src, dest)
shutil.copymode(src, dest)
newFile = os.path.join(singleFileDirectory, newFile)
if knownChecksums:
checksumCache[checksum] = newFile
knownChecksums[checksum] = newFile
linkrel = '../' + newFile # ../ means remove the name of the *.ods
doc.set_cell_value(col + 1, row, 'link', (linkrel, linkname))
linkpath = csvdir + '/' + val
if not val in filesSavedinManifest:
filesSavedinManifest[newFile] = val
if not os.path.exists(linkpath):
print "WARNING: link %s DOES NOT EXIST at %s" % (val, linkpath)
if verbose:
if os.path.exists(linkpath):
print 'relative link %s EXISTS at %s' % (val, linkpath)
else:
if val == "pagebreak":
doc.sheets[doc.sheet_index].set_sheet_config(('row', row), style_pagebreak)
else:
doc.set_cell_value(col + 1, row, 'string', val)
else:
# enter an empty string for blank lines
doc.set_cell_value(1, row, 'string', '')
row += 1
# save manifest file
if filesSavedinManifest.keys() != []:
manifestFH = open("MANIFEST", "a")
manifestFH.write("# Files from %s\n" % odsname)
for file in filesSavedinManifest.keys():
manifestFH.write("%s\n" % file)
manifestFH.close()
# Save spreadsheet file.
doc.save(odsname)
def main():
program = os.path.basename(sys.argv[0])
version = '0.1'
parser = optparse.OptionParser(usage='%prog [--help] [--verbose]',
version='%prog ' + version)
parser.add_option('-v', '--verbose', action='store_true',
dest='verbose',
help='provide extra information while processing')
parser.add_option('-c', '--csv', action='store',
help='csv file to process')
parser.add_option('-o', '--ods', action='store',
help='ods output filename')
parser.add_option('-e', '--encoding', action='store',
help='unicode character encoding type')
parser.add_option('-d', '--single-file-directory', action='store',
help='directory name to move all files into')
parser.add_option('-s', '--known-checksum-list', action='store',
help='directory name to move all files into')
(options, args) = parser.parse_args()
if len(args) != 0:
parser.error("not expecting extra args")
if not os.path.exists(options.csv):
err('csv does not exist: %s' % options.csv)
if not options.ods:
(root, ext) = os.path.splitext(options.csv)
options.ods = root + '.ods'
if options.verbose:
print '%s: verbose mode on' % program
print 'csv:', options.csv
print 'ods:', options.ods
print 'ods:', options.encoding
if options.known_checksum_list and not options.single_file_directory:
err(program + ": --known-checksum-list option is completely useless without --single-file-directory")
knownChecksums = {}
if options.known_checksum_list:
if not os.access(options.known_checksum_list, os.R_OK):
err(program + ": unable to read file: " + options.known_checksum_list)
knownChecksums = ReadChecksums(options.known_checksum_list)
csv2ods(options.csv, options.ods, options.encoding, options.single_file_directory, knownChecksums, options.verbose)
if __name__ == '__main__':
main()
|