Tryag File Manager
Home
-
Turbo Force
Current Path :
/
proc
/
self
/
root
/
usr
/
share
/
createrepo
/
Upload File :
New :
File
Dir
//proc/self/root/usr/share/createrepo/dumpMetadata.py
#!/usr/bin/python -t # base classes and functions for dumping out package Metadata # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Library General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # Copyright 2004 Duke University # $Id$ import os import rpm import exceptions import md5 import sha import types import struct import re import stat import bz2 try: import sqlitecachec except ImportError: pass # done to fix gzip randomly changing the checksum import gzip from zlib import error as zlibError from gzip import write32u, FNAME __all__ = ["GzipFile","open"] class GzipFile(gzip.GzipFile): def _write_gzip_header(self): self.fileobj.write('\037\213') # magic header self.fileobj.write('\010') # compression method fname = self.filename[:-3] flags = 0 if fname: flags = FNAME self.fileobj.write(chr(flags)) write32u(self.fileobj, long(0)) self.fileobj.write('\002') self.fileobj.write('\377') if fname: self.fileobj.write(fname + '\000') def _gzipOpen(filename, mode="rb", compresslevel=9): return GzipFile(filename, mode, compresslevel) def bzipFile(source, dest): s_fn = open(source, 'rb') destination = bz2.BZ2File(dest, 'w', compresslevel=9) while True: data = s_fn.read(1024000) if not data: break destination.write(data) destination.close() s_fn.close() def returnFD(filename): try: fdno = os.open(filename, os.O_RDONLY) except OSError: raise MDError, "Error opening file" return fdno def returnHdr(ts, package): """hand back the rpm header or raise an Error if the pkg is fubar""" opened_here = 0 try: if type(package) is types.StringType: opened_here = 1 fdno = os.open(package, os.O_RDONLY) else: fdno = package # let's assume this is an fdno and go with it :) except OSError: raise MDError, "Error opening file" ts.setVSFlags((rpm._RPMVSF_NOSIGNATURES|rpm.RPMVSF_NOMD5|rpm.RPMVSF_NEEDPAYLOAD)) try: hdr = ts.hdrFromFdno(fdno) except rpm.error: raise MDError, "Error opening package" if type(hdr) != rpm.hdr: raise MDError, "Error opening package" ts.setVSFlags(0) if opened_here: os.close(fdno) del fdno return hdr def getChecksum(sumtype, file, CHUNK=2**16): """takes filename, hand back Checksum of it sumtype = md5 or sha filename = /path/to/file CHUNK=65536 by default""" # chunking brazenly lifted from Ryan Tomayko opened_here = 0 try: if type(file) is not types.StringType: fo = file # assume it's a file-like-object else: opened_here = 1 fo = open(file, 'rb', CHUNK) if sumtype == 'md5': sum = md5.new() elif sumtype == 'sha': sum = sha.new() else: raise MDError, 'Error Checksumming file, wrong checksum type %s' % sumtype chunk = fo.read while chunk: chunk = fo.read(CHUNK) sum.update(chunk) if opened_here: fo.close() del fo return sum.hexdigest() except: raise MDError, 'Error opening file for checksum: %s' % file def utf8String(string): """hands back a unicoded string""" if string is None: return '' elif isinstance(string, unicode): return string du = False try: x = unicode(string, 'ascii') du = True except UnicodeError: encodings = ['utf-8', 'iso-8859-1', 'iso-8859-15', 'iso-8859-2'] for enc in encodings: try: x = unicode(string, enc) except UnicodeError: pass else: if x.encode(enc) == string: return x.encode('utf-8') newstring = '' # Kill bytes (or libxml will die) not in the small byte portion of: # http://www.w3.org/TR/REC-xml/#NT-Char # we allow high bytes, if it passed the utf8 check above. Eg. # good chars = #x9 | #xA | #xD | [#x20-...] bad_small_bytes = range(0, 8) + [11, 12] + range(14, 32) for char in string: if ord(char) in bad_small_bytes: pass # Just ignore these bytes... elif not du and ord(char) > 127: newstring = newstring + '?' else: newstring = newstring + char return newstring def byteranges(file): """takes an rpm file or fileobject and returns byteranges for location of the header""" opened_here = 0 if type(file) is not types.StringType: fo = file else: opened_here = 1 fo = open(file, 'r') #read in past lead and first 8 bytes of sig header fo.seek(104) # 104 bytes in binindex = fo.read(4) # 108 bytes in (sigindex, ) = struct.unpack('>I', binindex) bindata = fo.read(4) # 112 bytes in (sigdata, ) = struct.unpack('>I', bindata) # each index is 4 32bit segments - so each is 16 bytes sigindexsize = sigindex * 16 sigsize = sigdata + sigindexsize # we have to round off to the next 8 byte boundary disttoboundary = (sigsize % 8) if disttoboundary != 0: disttoboundary = 8 - disttoboundary # 112 bytes - 96 == lead, 8 = magic and reserved, 8 == sig header data hdrstart = 112 + sigsize + disttoboundary fo.seek(hdrstart) # go to the start of the header fo.seek(8,1) # read past the magic number and reserved bytes binindex = fo.read(4) (hdrindex, ) = struct.unpack('>I', binindex) bindata = fo.read(4) (hdrdata, ) = struct.unpack('>I', bindata) # each index is 4 32bit segments - so each is 16 bytes hdrindexsize = hdrindex * 16 # add 16 to the hdrsize to account for the 16 bytes of misc data b/t the # end of the sig and the header. hdrsize = hdrdata + hdrindexsize + 16 # header end is hdrstart + hdrsize hdrend = hdrstart + hdrsize if opened_here: fo.close() del fo return (hdrstart, hdrend) class MDError(exceptions.Exception): def __init__(self, value=None): exceptions.Exception.__init__(self) self.value = value def __str__(self): return self.value class RpmMetaData: """each rpm is one object, you pass it an rpm file it opens the file, and pulls the information out in bite-sized chunks :) """ mode_cache = {} def __init__(self, ts, basedir, filename, options): try: stats = os.stat(os.path.join(basedir, filename)) self.size = stats[6] self.mtime = stats[8] del stats except OSError, e: raise MDError, "Error Stat'ing file %s %s" % (basedir, filename) self.options = options self.localurl = options['baseurl'] if options['noepoch']: self.noepoch = "" else: self.noepoch = 0 self.relativepath = filename fd = returnFD(os.path.join(basedir, filename)) self.hdr = returnHdr(ts, fd) os.lseek(fd, 0, 0) fo = os.fdopen(fd, 'rb') self.pkgid = self.doChecksumCache(fo) fo.seek(0) (self.rangestart, self.rangeend) = byteranges(fo) fo.close() del fo del fd # setup our regex objects fileglobs = options['file-pattern-match'] #['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$'] dirglobs = options['dir-pattern-match'] #['.*bin\/.*', '^\/etc\/.*'] self.dirrc = [] self.filerc = [] for glob in fileglobs: self.filerc.append(re.compile(glob)) for glob in dirglobs: self.dirrc.append(re.compile(glob)) self.filenames = [] self.dirnames = [] self.ghostnames = [] self.genFileLists() def arch(self): if self.tagByName('sourcepackage') == 1 or not self.tagByName('sourcerpm'): return 'src' else: return self.tagByName('arch') def _correctFlags(self, flags): returnflags=[] if flags is None: return returnflags if type(flags) is not types.ListType: newflag = flags & 0xf returnflags.append(newflag) else: for flag in flags: newflag = flag if flag is not None: newflag = flag & 0xf returnflags.append(newflag) return returnflags def _checkPreReq(self, flags): reqs=[] if flags is None: return reqs if type(flags) is not types.ListType: flags = [flags] for flag in flags: newflag = flag if flag is not None: newflag = flag & 64 if newflag == 64: reqs.append(1) else: reqs.append(0) return reqs def _correctVersion(self, vers): returnvers = [] vertuple = (None, None, None) if vers is None: returnvers.append(vertuple) return returnvers if type(vers) is not types.ListType: if vers is not None: vertuple = self._stringToVersion(vers) else: vertuple = (None, None, None) returnvers.append(vertuple) else: for ver in vers: if ver is not None: vertuple = self._stringToVersion(ver) else: vertuple = (None, None, None) returnvers.append(vertuple) return returnvers def _stringToVersion(self, strng): i = strng.find(':') if i != -1 and strng[:i].isdigit(): epoch = strng[:i] else: i = -1 epoch = self.noepoch j = strng.rfind('-') if j != -1: if strng[i + 1:j] == '': version = None else: version = strng[i + 1:j] release = strng[j + 1:] else: if strng[i + 1:] == '': version = None else: version = strng[i + 1:] release = None return (epoch, version, release) ########### # Title: Remove duplicates from a sequence # Submitter: Tim Peters # From: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52560 def _uniq(self,s): """Return a list of the elements in s, but without duplicates. For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3], unique("abcabc") some permutation of ["a", "b", "c"], and unique(([1, 2], [2, 3], [1, 2])) some permutation of [[2, 3], [1, 2]]. For best speed, all sequence elements should be hashable. Then unique() will usually work in linear time. If not possible, the sequence elements should enjoy a total ordering, and if list(s).sort() doesn't raise TypeError it's assumed that they do enjoy a total ordering. Then unique() will usually work in O(N*log2(N)) time. If that's not possible either, the sequence elements must support equality-testing. Then unique() will usually work in quadratic time. """ n = len(s) if n == 0: return [] # Try using a dict first, as that's the fastest and will usually # work. If it doesn't work, it will usually fail quickly, so it # usually doesn't cost much to *try* it. It requires that all the # sequence elements be hashable, and support equality comparison. u = {} try: for x in s: u[x] = 1 except TypeError: del u # move on to the next method else: return u.keys() # We can't hash all the elements. Second fastest is to sort, # which brings the equal elements together; then duplicates are # easy to weed out in a single pass. # NOTE: Python's list.sort() was designed to be efficient in the # presence of many duplicate elements. This isn't true of all # sort functions in all languages or libraries, so this approach # is more effective in Python than it may be elsewhere. try: t = list(s) t.sort() except TypeError: del t # move on to the next method else: assert n > 0 last = t[0] lasti = i = 1 while i < n: if t[i] != last: t[lasti] = last = t[i] lasti += 1 i += 1 return t[:lasti] # Brute force is all that's left. u = [] for x in s: if x not in u: u.append(x) return u def tagByName(self, tag): data = self.hdr[tag] if type(data) is types.ListType: if len(data) > 0: return data[0] else: return '' else: return data def listTagByName(self, tag): """take a tag that should be a list and make sure it is one""" lst = [] data = self.hdr[tag] if data is None: return lst if type(data) is types.ListType: lst.extend(data) else: lst.append(data) return lst def epoch(self): if self.hdr['epoch'] is None: return self.noepoch else: return self.tagByName('epoch') def genFileLists(self): """produces lists of dirs and files for this header in two lists""" files = self.listTagByName('filenames') fileflags = self.listTagByName('fileflags') filemodes = self.listTagByName('filemodes') filetuple = zip(files, filemodes, fileflags) for (file, mode, flag) in filetuple: #garbage checks if mode is None or mode == '': self.filenames.append(file) continue if not RpmMetaData.mode_cache.has_key(mode): RpmMetaData.mode_cache[mode] = stat.S_ISDIR(mode) if RpmMetaData.mode_cache[mode]: self.dirnames.append(file) else: if flag is None: self.filenames.append(file) else: if (flag & 64): self.ghostnames.append(file) continue self.filenames.append(file) def usefulFiles(self): """search for good files""" returns = {} for item in self.filenames: if item is None: continue for glob in self.filerc: if glob.match(item): returns[item] = 1 return returns def usefulGhosts(self): """search for useful ghost file names""" returns = {} for item in self.ghostnames: if item is None: continue for glob in self.filerc: if glob.match(item): returns[item] = 1 return returns def usefulDirs(self): """search for good dirs""" returns = {} for item in self.dirnames: if item is None: continue for glob in self.dirrc: if glob.match(item): returns[item] = 1 return returns.keys() def depsList(self): """returns a list of tuples of dependencies""" # these should probably compress down duplicates too lst = [] names = self.hdr[rpm.RPMTAG_REQUIRENAME] tmpflags = self.hdr[rpm.RPMTAG_REQUIREFLAGS] flags = self._correctFlags(tmpflags) prereq = self._checkPreReq(tmpflags) ver = self._correctVersion(self.hdr[rpm.RPMTAG_REQUIREVERSION]) if names is not None: lst = zip(names, flags, ver, prereq) return self._uniq(lst) def obsoletesList(self): lst = [] names = self.hdr[rpm.RPMTAG_OBSOLETENAME] tmpflags = self.hdr[rpm.RPMTAG_OBSOLETEFLAGS] flags = self._correctFlags(tmpflags) ver = self._correctVersion(self.hdr[rpm.RPMTAG_OBSOLETEVERSION]) if names is not None: lst = zip(names, flags, ver) return self._uniq(lst) def conflictsList(self): lst = [] names = self.hdr[rpm.RPMTAG_CONFLICTNAME] tmpflags = self.hdr[rpm.RPMTAG_CONFLICTFLAGS] flags = self._correctFlags(tmpflags) ver = self._correctVersion(self.hdr[rpm.RPMTAG_CONFLICTVERSION]) if names is not None: lst = zip(names, flags, ver) return self._uniq(lst) def providesList(self): lst = [] names = self.hdr[rpm.RPMTAG_PROVIDENAME] tmpflags = self.hdr[rpm.RPMTAG_PROVIDEFLAGS] flags = self._correctFlags(tmpflags) ver = self._correctVersion(self.hdr[rpm.RPMTAG_PROVIDEVERSION]) if names is not None: lst = zip(names, flags, ver) return self._uniq(lst) def changelogLists(self): lst = [] names = self.listTagByName('changelogname') times = self.listTagByName('changelogtime') texts = self.listTagByName('changelogtext') if len(names) > 0: lst = zip(names, times, texts) return lst def doChecksumCache(self, fo): """return a checksum for a package: - check if the checksum cache is enabled if not - return the checksum if so - check to see if it has a cache file if so, open it and return the first line's contents if not, grab the checksum and write it to a file for this pkg """ if not self.options['cache']: return getChecksum(self.options['sumtype'], fo) t = [] if type(self.hdr[rpm.RPMTAG_SIGGPG]) is not types.NoneType: t.append("".join(self.hdr[rpm.RPMTAG_SIGGPG])) if type(self.hdr[rpm.RPMTAG_SIGPGP]) is not types.NoneType: t.append("".join(self.hdr[rpm.RPMTAG_SIGPGP])) if type(self.hdr[rpm.RPMTAG_HDRID]) is not types.NoneType: t.append("".join(self.hdr[rpm.RPMTAG_HDRID])) key = md5.new("".join(t)).hexdigest() csumtag = '%s-%s-%s-%s' % (os.path.basename(self.relativepath), self.hdr[rpm.RPMTAG_SHA1HEADER], self.size, self.mtime) csumfile = '%s/%s' % (self.options['cachedir'], csumtag) if os.path.exists(csumfile) and self.mtime <= os.stat(csumfile)[8]: csumo = open(csumfile, 'r') checksum = csumo.readline() csumo.close() else: checksum = getChecksum(self.options['sumtype'], fo) csumo = open(csumfile, 'w') csumo.write(checksum) csumo.close() return checksum def generateXML(doc, node, formatns, rpmObj, sumtype): """takes an xml doc object and a package metadata entry node, populates a package node with the md information""" ns = node.ns() pkgNode = node.newChild(None, "package", None) pkgNode.newProp('type', 'rpm') pkgNode.newChild(None, 'name', rpmObj.tagByName('name')) pkgNode.newChild(None, 'arch', rpmObj.arch()) version = pkgNode.newChild(None, 'version', None) if str(rpmObj.epoch()): version.newProp('epoch', str(rpmObj.epoch())) version.newProp('ver', str(rpmObj.tagByName('version'))) version.newProp('rel', str(rpmObj.tagByName('release'))) csum = pkgNode.newChild(None, 'checksum', rpmObj.pkgid) csum.newProp('type', sumtype) csum.newProp('pkgid', 'YES') for tag in ['summary', 'description', 'packager', 'url']: value = rpmObj.tagByName(tag) value = utf8String(value) value = re.sub("\n$", '', value) entry = pkgNode.newChild(None, tag, None) entry.addContent(value) time = pkgNode.newChild(None, 'time', None) time.newProp('file', str(rpmObj.mtime)) time.newProp('build', str(rpmObj.tagByName('buildtime'))) size = pkgNode.newChild(None, 'size', None) size.newProp('package', str(rpmObj.size)) size.newProp('installed', str(rpmObj.tagByName('size'))) size.newProp('archive', str(rpmObj.tagByName('archivesize'))) location = pkgNode.newChild(None, 'location', None) if rpmObj.localurl is not None: location.newProp('xml:base', rpmObj.localurl) location.newProp('href', rpmObj.relativepath) format = pkgNode.newChild(ns, 'format', None) for tag in ['license', 'vendor', 'group', 'buildhost', 'sourcerpm']: value = rpmObj.tagByName(tag) value = utf8String(value) value = re.sub("\n$", '', value) entry = format.newChild(formatns, tag, None) entry.addContent(value) hr = format.newChild(formatns, 'header-range', None) hr.newProp('start', str(rpmObj.rangestart)) hr.newProp('end', str(rpmObj.rangeend)) for (lst, nodename) in [(rpmObj.providesList(), 'provides'), (rpmObj.conflictsList(), 'conflicts'), (rpmObj.obsoletesList(), 'obsoletes')]: if len(lst) > 0: rpconode = format.newChild(formatns, nodename, None) for (name, flags, (e,v,r)) in lst: entry = rpconode.newChild(formatns, 'entry', None) entry.newProp('name', name) if flags != 0: if flags == 2: arg = 'LT' if flags == 4: arg = 'GT' if flags == 8: arg = 'EQ' if flags == 10: arg = 'LE' if flags == 12: arg = 'GE' entry.newProp('flags', arg) # if we've got a flag we've got a version, I hope :) if str(e): entry.newProp('epoch', str(e)) if v: entry.newProp('ver', str(v)) if r: entry.newProp('rel', str(r)) depsList = rpmObj.depsList() if len(depsList) > 0: rpconode = format.newChild(formatns, 'requires', None) for (name, flags, (e,v,r), prereq) in depsList: entry = rpconode.newChild(formatns, 'entry', None) entry.newProp('name', name) if flags != 0: if flags == 2: arg = 'LT' if flags == 4: arg = 'GT' if flags == 8: arg = 'EQ' if flags == 10: arg = 'LE' if flags == 12: arg = 'GE' entry.newProp('flags', arg) # if we've got a flag we've got a version, I hope :) if str(e): entry.newProp('epoch', str(e)) if v: entry.newProp('ver', str(v)) if r: entry.newProp('rel', str(r)) if prereq == 1: entry.newProp('pre', str(prereq)) for file in rpmObj.usefulFiles(): files = format.newChild(None, 'file', None) file = utf8String(file) files.addContent(file) for directory in rpmObj.usefulDirs(): files = format.newChild(None, 'file', None) directory = utf8String(directory) files.addContent(directory) files.newProp('type', 'dir') for directory in rpmObj.usefulGhosts(): files = format.newChild(None, 'file', None) directory = utf8String(directory) files.addContent(directory) files.newProp('type', 'ghost') return pkgNode def fileListXML(doc, node, rpmObj): pkg = node.newChild(None, 'package', None) pkg.newProp('pkgid', rpmObj.pkgid) pkg.newProp('name', rpmObj.tagByName('name')) pkg.newProp('arch', rpmObj.arch()) version = pkg.newChild(None, 'version', None) if str(rpmObj.epoch()): version.newProp('epoch', str(rpmObj.epoch())) version.newProp('ver', str(rpmObj.tagByName('version'))) version.newProp('rel', str(rpmObj.tagByName('release'))) for file in rpmObj.filenames: files = pkg.newChild(None, 'file', None) file = utf8String(file) files.addContent(file) for directory in rpmObj.dirnames: files = pkg.newChild(None, 'file', None) directory = utf8String(directory) files.addContent(directory) files.newProp('type', 'dir') for ghost in rpmObj.ghostnames: files = pkg.newChild(None, 'file', None) ghost = utf8String(ghost) files.addContent(ghost) files.newProp('type', 'ghost') return pkg def otherXML(doc, node, rpmObj): pkg = node.newChild(None, 'package', None) pkg.newProp('pkgid', rpmObj.pkgid) pkg.newProp('name', rpmObj.tagByName('name')) pkg.newProp('arch', rpmObj.arch()) version = pkg.newChild(None, 'version', None) if str(rpmObj.epoch()): version.newProp('epoch', str(rpmObj.epoch())) version.newProp('ver', str(rpmObj.tagByName('version'))) version.newProp('rel', str(rpmObj.tagByName('release'))) clogs = rpmObj.changelogLists() for (name, time, text) in clogs: clog = pkg.newChild(None, 'changelog', None) clog.addContent(utf8String(text)) clog.newProp('author', utf8String(name)) clog.newProp('date', str(time)) return pkg def repoXML(node, cmds): """generate the repomd.xml file that stores the info on the other files""" sumtype = cmds['sumtype'] workfiles = [(cmds['otherfile'], 'other',), (cmds['filelistsfile'], 'filelists'), (cmds['primaryfile'], 'primary')] repoid='garbageid' repopath = os.path.join(cmds['outputdir'], cmds['tempdir']) if cmds['database']: try: dbversion = str(sqlitecachec.DBVERSION) except AttributeError: dbversion = '9' rp = sqlitecachec.RepodataParserSqlite(repopath, repoid, None) for (file, ftype) in workfiles: complete_path = os.path.join(repopath, file) zfo = _gzipOpen(complete_path) uncsum = getChecksum(sumtype, zfo) zfo.close() csum = getChecksum(sumtype, complete_path) timestamp = os.stat(complete_path)[8] db_csums = {} db_compressed_sums = {} if cmds['database']: if ftype == 'primary': rp.getPrimary(complete_path, csum) elif ftype == 'filelists': rp.getFilelists(complete_path, csum) elif ftype == 'other': rp.getOtherdata(complete_path, csum) tmp_result_name = '%s.xml.gz.sqlite' % ftype tmp_result_path = os.path.join(repopath, tmp_result_name) good_name = '%s.sqlite' % ftype resultpath = os.path.join(repopath, good_name) # rename from silly name to not silly name os.rename(tmp_result_path, resultpath) compressed_name = '%s.bz2' % good_name result_compressed = os.path.join(repopath, compressed_name) db_csums[ftype] = getChecksum(sumtype, resultpath) # compress the files bzipFile(resultpath, result_compressed) # csum the compressed file db_compressed_sums[ftype] = getChecksum(sumtype, result_compressed) # remove the uncompressed file os.unlink(resultpath) # timestamp the compressed file db_timestamp = os.stat(result_compressed)[8] # add this data as a section to the repomdxml db_data_type = '%s_db' % ftype data = node.newChild(None, 'data', None) data.newProp('type', db_data_type) location = data.newChild(None, 'location', None) if cmds['baseurl'] is not None: location.newProp('xml:base', cmds['baseurl']) location.newProp('href', os.path.join(cmds['finaldir'], compressed_name)) checksum = data.newChild(None, 'checksum', db_compressed_sums[ftype]) checksum.newProp('type', sumtype) db_tstamp = data.newChild(None, 'timestamp', str(db_timestamp)) unchecksum = data.newChild(None, 'open-checksum', db_csums[ftype]) unchecksum.newProp('type', sumtype) database_version = data.newChild(None, 'database_version', dbversion) data = node.newChild(None, 'data', None) data.newProp('type', ftype) location = data.newChild(None, 'location', None) if cmds['baseurl'] is not None: location.newProp('xml:base', cmds['baseurl']) location.newProp('href', os.path.join(cmds['finaldir'], file)) checksum = data.newChild(None, 'checksum', csum) checksum.newProp('type', sumtype) timestamp = data.newChild(None, 'timestamp', str(timestamp)) unchecksum = data.newChild(None, 'open-checksum', uncsum) unchecksum.newProp('type', sumtype) # if we've got a group file then checksum it once and be done if cmds['groupfile'] is not None: grpfile = cmds['groupfile'] timestamp = os.stat(grpfile)[8] sfile = os.path.basename(grpfile) fo = open(grpfile, 'r') output = open(os.path.join(cmds['outputdir'], cmds['tempdir'], sfile), 'w') output.write(fo.read()) output.close() fo.seek(0) csum = getChecksum(sumtype, fo) fo.close() data = node.newChild(None, 'data', None) data.newProp('type', 'group') location = data.newChild(None, 'location', None) if cmds['baseurl'] is not None: location.newProp('xml:base', cmds['baseurl']) location.newProp('href', os.path.join(cmds['finaldir'], sfile)) checksum = data.newChild(None, 'checksum', csum) checksum.newProp('type', sumtype) timestamp = data.newChild(None, 'timestamp', str(timestamp))