Tryag File Manager

//proc/self/root/usr/share/createrepo/dumpMetadata.py

#!/usr/bin/python -t
# base classes and functions for dumping out package Metadata
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# Copyright 2004 Duke University

# $Id$

import os
import rpm
import exceptions
import md5
import sha
import types
import struct
import re
import stat
import bz2
try:
    import sqlitecachec
except ImportError:
    pass

# done to fix gzip randomly changing the checksum
import gzip
from zlib import error as zlibError
from gzip import write32u, FNAME

__all__ = ["GzipFile","open"]

class GzipFile(gzip.GzipFile):
    def _write_gzip_header(self):
        self.fileobj.write('\037\213')             # magic header
        self.fileobj.write('\010')                 # compression method
        fname = self.filename[:-3]
        flags = 0
        if fname:
            flags = FNAME
        self.fileobj.write(chr(flags))
        write32u(self.fileobj, long(0))
        self.fileobj.write('\002')
        self.fileobj.write('\377')
        if fname:
            self.fileobj.write(fname + '\000')

def _gzipOpen(filename, mode="rb", compresslevel=9):
    return GzipFile(filename, mode, compresslevel)
    
def bzipFile(source, dest):
    
    s_fn = open(source, 'rb')
    destination = bz2.BZ2File(dest, 'w', compresslevel=9)

while True:
        data = s_fn.read(1024000)
        
        if not data: break
        destination.write(data)

destination.close()
    s_fn.close()

def returnFD(filename):
    try:
        fdno = os.open(filename, os.O_RDONLY)
    except OSError:
        raise MDError, "Error opening file"
    return fdno
    
def returnHdr(ts, package):
    """hand back the rpm header or raise an Error if the pkg is fubar"""
    opened_here = 0
    try:
        if type(package) is types.StringType:
            opened_here = 1
            fdno = os.open(package, os.O_RDONLY)
        else: 
            fdno = package # let's assume this is an fdno and go with it :)
    except OSError:
        raise MDError, "Error opening file"
    ts.setVSFlags((rpm._RPMVSF_NOSIGNATURES|rpm.RPMVSF_NOMD5|rpm.RPMVSF_NEEDPAYLOAD))
    try:
        hdr = ts.hdrFromFdno(fdno)
    except rpm.error:
        raise MDError, "Error opening package"
    if type(hdr) != rpm.hdr:
        raise MDError, "Error opening package"
    ts.setVSFlags(0)
    
    if opened_here:
        os.close(fdno)
        del fdno

return hdr
    
def getChecksum(sumtype, file, CHUNK=2**16):
    """takes filename, hand back Checksum of it
       sumtype = md5 or sha
       filename = /path/to/file
       CHUNK=65536 by default"""
       
    # chunking brazenly lifted from Ryan Tomayko
    opened_here = 0
    try:
        if type(file) is not types.StringType:
            fo = file # assume it's a file-like-object
        else:
            opened_here = 1
            fo = open(file, 'rb', CHUNK)
            
        if sumtype == 'md5':
            sum = md5.new()
        elif sumtype == 'sha':
            sum = sha.new()
        else:
            raise MDError, 'Error Checksumming file, wrong checksum type %s' % sumtype
        chunk = fo.read
        while chunk: 
            chunk = fo.read(CHUNK)
            sum.update(chunk)

if opened_here:
            fo.close()
            del fo
            
        return sum.hexdigest()
    except:
        raise MDError, 'Error opening file for checksum: %s' % file

def utf8String(string):
    """hands back a unicoded string"""
    if string is None:
        return ''
    elif isinstance(string, unicode):    
        return string
    du = False
    try:
        x = unicode(string, 'ascii')
        du = True
    except UnicodeError:
        encodings = ['utf-8', 'iso-8859-1', 'iso-8859-15', 'iso-8859-2']
        for enc in encodings:
            try:
                x = unicode(string, enc)
            except UnicodeError:
                pass
            else:
                if x.encode(enc) == string:
                    return x.encode('utf-8')
    newstring = ''
    # Kill bytes (or libxml will die) not in the small byte portion of:
    #  http://www.w3.org/TR/REC-xml/#NT-Char
    # we allow high bytes, if it passed the utf8 check above. Eg.
    # good chars = #x9 | #xA | #xD | [#x20-...]
    bad_small_bytes = range(0, 8) + [11, 12] + range(14, 32)
    for char in string:
        if ord(char) in bad_small_bytes:
            pass # Just ignore these bytes...
        elif not du and ord(char) > 127:
            newstring = newstring + '?'
        else:
            newstring = newstring + char
    return newstring

def byteranges(file):
    """takes an rpm file or fileobject and returns byteranges for location of the header"""
    opened_here = 0
    if type(file) is not types.StringType:
        fo = file
    else:
        opened_here = 1
        fo = open(file, 'r')
    #read in past lead and first 8 bytes of sig header
    fo.seek(104)
    # 104 bytes in
    binindex = fo.read(4)
    # 108 bytes in
    (sigindex, ) = struct.unpack('>I', binindex)
    bindata = fo.read(4)
    # 112 bytes in
    (sigdata, ) = struct.unpack('>I', bindata)
    # each index is 4 32bit segments - so each is 16 bytes
    sigindexsize = sigindex * 16
    sigsize = sigdata + sigindexsize
    # we have to round off to the next 8 byte boundary
    disttoboundary = (sigsize % 8)
    if disttoboundary != 0:
        disttoboundary = 8 - disttoboundary
    # 112 bytes - 96 == lead, 8 = magic and reserved, 8 == sig header data
    hdrstart = 112 + sigsize  + disttoboundary
    
    fo.seek(hdrstart) # go to the start of the header
    fo.seek(8,1) # read past the magic number and reserved bytes

binindex = fo.read(4) 
    (hdrindex, ) = struct.unpack('>I', binindex)
    bindata = fo.read(4)
    (hdrdata, ) = struct.unpack('>I', bindata)
    
    # each index is 4 32bit segments - so each is 16 bytes
    hdrindexsize = hdrindex * 16 
    # add 16 to the hdrsize to account for the 16 bytes of misc data b/t the
    # end of the sig and the header.
    hdrsize = hdrdata + hdrindexsize + 16
    
    # header end is hdrstart + hdrsize 
    hdrend = hdrstart + hdrsize 
    if opened_here:
        fo.close()
        del fo
    return (hdrstart, hdrend)

class MDError(exceptions.Exception):
    def __init__(self, value=None):
        exceptions.Exception.__init__(self)
        self.value = value
    
    def __str__(self):
        return self.value

class RpmMetaData:
    """each rpm is one object, you pass it an rpm file
       it opens the file, and pulls the information out in bite-sized chunks :)
    """

mode_cache = {}

def __init__(self, ts, basedir, filename, options):
        try:
            stats = os.stat(os.path.join(basedir, filename))
            self.size = stats[6]
            self.mtime = stats[8]
            del stats
        except OSError, e:
            raise MDError, "Error Stat'ing file %s %s" % (basedir, filename)
        self.options = options
        self.localurl = options['baseurl']
        if options['noepoch']:
           self.noepoch = ""
        else:
           self.noepoch = 0
        self.relativepath = filename
        fd = returnFD(os.path.join(basedir, filename))
        self.hdr = returnHdr(ts, fd)
        os.lseek(fd, 0, 0)
        fo = os.fdopen(fd, 'rb')
        self.pkgid = self.doChecksumCache(fo)
        fo.seek(0)
        (self.rangestart, self.rangeend) = byteranges(fo)
        fo.close()
        del fo
        del fd
        
        # setup our regex objects
        fileglobs = options['file-pattern-match']
        #['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$']
        dirglobs = options['dir-pattern-match']
        #['.*bin\/.*', '^\/etc\/.*']
        self.dirrc = []
        self.filerc = []
        for glob in fileglobs:
            self.filerc.append(re.compile(glob))
        
        for glob in dirglobs:
            self.dirrc.append(re.compile(glob))
            
        self.filenames = []
        self.dirnames = []
        self.ghostnames = []
        self.genFileLists()

def arch(self):
        if self.tagByName('sourcepackage') == 1 or not self.tagByName('sourcerpm'):
            return 'src'
        else:
            return self.tagByName('arch')

def _correctFlags(self, flags):
        returnflags=[]
        if flags is None:
            return returnflags

if type(flags) is not types.ListType:
            newflag = flags & 0xf
            returnflags.append(newflag)
        else:
            for flag in flags:
                newflag = flag
                if flag is not None:
                    newflag = flag & 0xf
                returnflags.append(newflag)
        return returnflags

def _checkPreReq(self, flags):
        reqs=[]
        if flags is None:
            return reqs

if type(flags) is not types.ListType:
            flags = [flags]
        for flag in flags:
            newflag = flag
            if flag is not None:
                newflag = flag & 64
                if newflag == 64:
                    reqs.append(1)
                else:
                    reqs.append(0)
        return reqs

def _correctVersion(self, vers):
        returnvers = []
        vertuple = (None, None, None)
        if vers is None:
            returnvers.append(vertuple)
            return returnvers
            
        if type(vers) is not types.ListType:
            if vers is not None:
                vertuple = self._stringToVersion(vers)
            else:
                vertuple = (None, None, None)
            returnvers.append(vertuple)
        else:
            for ver in vers:
                if ver is not None:
                    vertuple = self._stringToVersion(ver)
                else:
                    vertuple = (None, None, None)
                returnvers.append(vertuple)
        return returnvers
            
    
    def _stringToVersion(self, strng):
        i = strng.find(':')
        if i != -1 and strng[:i].isdigit():
            epoch = strng[:i]
        else:
            i = -1
            epoch = self.noepoch
        j = strng.rfind('-')
        if j != -1:
            if strng[i + 1:j] == '':
                version = None
            else:
                version = strng[i + 1:j]
            release = strng[j + 1:]
        else:
            if strng[i + 1:] == '':
                version = None
            else:
                version = strng[i + 1:]
            release = None
        return (epoch, version, release)

###########
    # Title: Remove duplicates from a sequence
    # Submitter: Tim Peters 
    # From: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52560                      
        
    def _uniq(self,s):
        """Return a list of the elements in s, but without duplicates.
    
        For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3],
        unique("abcabc") some permutation of ["a", "b", "c"], and
        unique(([1, 2], [2, 3], [1, 2])) some permutation of
        [[2, 3], [1, 2]].
    
        For best speed, all sequence elements should be hashable.  Then
        unique() will usually work in linear time.
    
        If not possible, the sequence elements should enjoy a total
        ordering, and if list(s).sort() doesn't raise TypeError it's
        assumed that they do enjoy a total ordering.  Then unique() will
        usually work in O(N*log2(N)) time.
    
        If that's not possible either, the sequence elements must support
        equality-testing.  Then unique() will usually work in quadratic
        time.
        """
    
        n = len(s)
        if n == 0:
            return []
    
        # Try using a dict first, as that's the fastest and will usually
        # work.  If it doesn't work, it will usually fail quickly, so it
        # usually doesn't cost much to *try* it.  It requires that all the
        # sequence elements be hashable, and support equality comparison.
        u = {}
        try:
            for x in s:
                u[x] = 1
        except TypeError:
            del u  # move on to the next method
        else:
            return u.keys()
    
        # We can't hash all the elements.  Second fastest is to sort,
        # which brings the equal elements together; then duplicates are
        # easy to weed out in a single pass.
        # NOTE:  Python's list.sort() was designed to be efficient in the
        # presence of many duplicate elements.  This isn't true of all
        # sort functions in all languages or libraries, so this approach
        # is more effective in Python than it may be elsewhere.
        try:
            t = list(s)
            t.sort()
        except TypeError:
            del t  # move on to the next method
        else:
            assert n > 0
            last = t[0]
            lasti = i = 1
            while i < n:
                if t[i] != last:
                    t[lasti] = last = t[i]
                    lasti += 1
                i += 1
            return t[:lasti]
    
        # Brute force is all that's left.
        u = []
        for x in s:
            if x not in u:
                u.append(x)
        return u

def tagByName(self, tag):
        data = self.hdr[tag]
        if type(data) is types.ListType:
            if len(data) > 0:
                return data[0]
            else:
                return ''
        else:
            return data
    
    def listTagByName(self, tag):
        """take a tag that should be a list and make sure it is one"""
        lst = []
        data = self.hdr[tag]
        if data is None:
            return lst
            
        if type(data) is types.ListType:
            lst.extend(data)
        else:
            lst.append(data)
        return lst

def epoch(self):
        if self.hdr['epoch'] is None:
            return self.noepoch
        else:
            return self.tagByName('epoch')
            
    def genFileLists(self):
        """produces lists of dirs and files for this header in two lists"""
        
        files = self.listTagByName('filenames')
        fileflags = self.listTagByName('fileflags')
        filemodes = self.listTagByName('filemodes')
        filetuple = zip(files, filemodes, fileflags)
        for (file, mode, flag) in filetuple:
            #garbage checks
            if mode is None or mode == '':
                self.filenames.append(file)
                continue
            if not RpmMetaData.mode_cache.has_key(mode):
                RpmMetaData.mode_cache[mode] = stat.S_ISDIR(mode)
            if RpmMetaData.mode_cache[mode]:
                self.dirnames.append(file)
            else:
                if flag is None:
                    self.filenames.append(file)
                else:
                    if (flag & 64): 
                        self.ghostnames.append(file)
                        continue
                    self.filenames.append(file)

def usefulFiles(self):
        """search for good files"""
        returns = {}     
        for item in self.filenames:
            if item is None:
                continue
            for glob in self.filerc:
                if glob.match(item):
                    returns[item] = 1
        return returns
                    
    def usefulGhosts(self):
        """search for useful ghost file names"""
        returns = {}
        for item in self.ghostnames:
            if item is None:
                continue
            for glob in self.filerc:
                if glob.match(item):
                    returns[item] = 1
        return returns

def usefulDirs(self):
        """search for good dirs"""
        returns = {}
        for item in self.dirnames:
            if item is None:
                continue
            for glob in self.dirrc:
                if glob.match(item):
                    returns[item] = 1
        return returns.keys()

def depsList(self):
        """returns a list of tuples of dependencies"""
        # these should probably compress down duplicates too
        lst = []
        names = self.hdr[rpm.RPMTAG_REQUIRENAME]
        tmpflags = self.hdr[rpm.RPMTAG_REQUIREFLAGS]
        flags = self._correctFlags(tmpflags)
        prereq = self._checkPreReq(tmpflags)
        ver = self._correctVersion(self.hdr[rpm.RPMTAG_REQUIREVERSION])
        if names is not None:
            lst = zip(names, flags, ver, prereq)
        return self._uniq(lst)
        
    def obsoletesList(self):
        lst = []
        names = self.hdr[rpm.RPMTAG_OBSOLETENAME]
        tmpflags = self.hdr[rpm.RPMTAG_OBSOLETEFLAGS]
        flags = self._correctFlags(tmpflags)
        ver = self._correctVersion(self.hdr[rpm.RPMTAG_OBSOLETEVERSION])
        if names is not None:
            lst = zip(names, flags, ver)
        return self._uniq(lst)

def conflictsList(self):
        lst = []
        names = self.hdr[rpm.RPMTAG_CONFLICTNAME]
        tmpflags = self.hdr[rpm.RPMTAG_CONFLICTFLAGS]
        flags = self._correctFlags(tmpflags)
        ver = self._correctVersion(self.hdr[rpm.RPMTAG_CONFLICTVERSION])
        if names is not None:
            lst = zip(names, flags, ver)
        return self._uniq(lst)

def providesList(self):
        lst = []
        names = self.hdr[rpm.RPMTAG_PROVIDENAME]
        tmpflags = self.hdr[rpm.RPMTAG_PROVIDEFLAGS]
        flags = self._correctFlags(tmpflags)
        ver = self._correctVersion(self.hdr[rpm.RPMTAG_PROVIDEVERSION])
        if names is not None:
            lst = zip(names, flags, ver)
        return self._uniq(lst)
        
    def changelogLists(self):
        lst = []
        names = self.listTagByName('changelogname')
        times = self.listTagByName('changelogtime')
        texts = self.listTagByName('changelogtext')
        if len(names) > 0:
            lst = zip(names, times, texts)
        return lst
    
    def doChecksumCache(self, fo):
        """return a checksum for a package:
           - check if the checksum cache is enabled
              if not - return the checksum
              if so - check to see if it has a cache file
                if so, open it and return the first line's contents
                if not, grab the checksum and write it to a file for this pkg
            """
        if not self.options['cache']:
            return getChecksum(self.options['sumtype'], fo)

t = []
        if type(self.hdr[rpm.RPMTAG_SIGGPG]) is not types.NoneType:
            t.append("".join(self.hdr[rpm.RPMTAG_SIGGPG]))   
        if type(self.hdr[rpm.RPMTAG_SIGPGP]) is not types.NoneType:
            t.append("".join(self.hdr[rpm.RPMTAG_SIGPGP]))
        if type(self.hdr[rpm.RPMTAG_HDRID]) is not types.NoneType:
            t.append("".join(self.hdr[rpm.RPMTAG_HDRID]))

key = md5.new("".join(t)).hexdigest()
                                        
        csumtag = '%s-%s-%s-%s' % (os.path.basename(self.relativepath), 
                                   self.hdr[rpm.RPMTAG_SHA1HEADER], 
                                   self.size, self.mtime)
        csumfile = '%s/%s' % (self.options['cachedir'], csumtag)
        if os.path.exists(csumfile) and self.mtime <= os.stat(csumfile)[8]:
            csumo = open(csumfile, 'r')
            checksum = csumo.readline()
            csumo.close()
            
        else:
            checksum = getChecksum(self.options['sumtype'], fo)
            csumo = open(csumfile, 'w')
            csumo.write(checksum)
            csumo.close()
            
        return checksum

def generateXML(doc, node, formatns, rpmObj, sumtype):
    """takes an xml doc object and a package metadata entry node, populates a 
       package node with the md information"""
    ns = node.ns()
    pkgNode = node.newChild(None, "package", None)
    pkgNode.newProp('type', 'rpm')
    pkgNode.newChild(None, 'name', rpmObj.tagByName('name'))
    pkgNode.newChild(None, 'arch', rpmObj.arch())
    version = pkgNode.newChild(None, 'version', None)
    if str(rpmObj.epoch()):
        version.newProp('epoch', str(rpmObj.epoch()))
    version.newProp('ver', str(rpmObj.tagByName('version')))
    version.newProp('rel', str(rpmObj.tagByName('release')))
    csum = pkgNode.newChild(None, 'checksum', rpmObj.pkgid)
    csum.newProp('type', sumtype)
    csum.newProp('pkgid', 'YES')
    for tag in ['summary', 'description', 'packager', 'url']:
        value = rpmObj.tagByName(tag)
        value = utf8String(value)
        value = re.sub("\n$", '', value)
        entry = pkgNode.newChild(None, tag, None)
        entry.addContent(value)
        
    time = pkgNode.newChild(None, 'time', None)
    time.newProp('file', str(rpmObj.mtime))
    time.newProp('build', str(rpmObj.tagByName('buildtime')))
    size = pkgNode.newChild(None, 'size', None)
    size.newProp('package', str(rpmObj.size))
    size.newProp('installed', str(rpmObj.tagByName('size')))
    size.newProp('archive', str(rpmObj.tagByName('archivesize')))
    location = pkgNode.newChild(None, 'location', None)
    if rpmObj.localurl is not None:
        location.newProp('xml:base', rpmObj.localurl)
    location.newProp('href', rpmObj.relativepath)
    format = pkgNode.newChild(ns, 'format', None)
    for tag in ['license', 'vendor', 'group', 'buildhost', 'sourcerpm']:
        value = rpmObj.tagByName(tag)
        value = utf8String(value)
        value = re.sub("\n$", '', value)
        entry = format.newChild(formatns, tag, None)
        entry.addContent(value)
        
    hr = format.newChild(formatns, 'header-range', None)
    hr.newProp('start', str(rpmObj.rangestart))
    hr.newProp('end', str(rpmObj.rangeend))
    for (lst, nodename) in [(rpmObj.providesList(), 'provides'),
                            (rpmObj.conflictsList(), 'conflicts'),
                            (rpmObj.obsoletesList(), 'obsoletes')]:
        if len(lst) > 0:               
            rpconode = format.newChild(formatns, nodename, None)
            for (name, flags, (e,v,r)) in lst:
                entry = rpconode.newChild(formatns, 'entry', None)
                entry.newProp('name', name)
                if flags != 0:
                    if flags == 2: arg = 'LT'
                    if flags == 4: arg = 'GT'
                    if flags == 8: arg = 'EQ'
                    if flags == 10: arg = 'LE'
                    if flags == 12: arg = 'GE'
                    entry.newProp('flags', arg)
                    # if we've got a flag we've got a version, I hope :)
                    if str(e):
                        entry.newProp('epoch', str(e))
                    if v:
                        entry.newProp('ver', str(v))
                    if r:
                        entry.newProp('rel', str(r))

depsList = rpmObj.depsList()
    if len(depsList) > 0:
        rpconode = format.newChild(formatns, 'requires', None)    
        for (name, flags, (e,v,r), prereq) in depsList:
            entry = rpconode.newChild(formatns, 'entry', None)
            entry.newProp('name', name)
            if flags != 0:
                if flags == 2: arg = 'LT'
                if flags == 4: arg = 'GT'
                if flags == 8: arg = 'EQ'
                if flags == 10: arg = 'LE'
                if flags == 12: arg = 'GE'
                entry.newProp('flags', arg)
                # if we've got a flag we've got a version, I hope :)
                if str(e):
                    entry.newProp('epoch', str(e))
                if v:
                    entry.newProp('ver', str(v))
                if r:
                    entry.newProp('rel', str(r))
            if prereq == 1:
                entry.newProp('pre', str(prereq))
        
    for file in rpmObj.usefulFiles():
        files = format.newChild(None, 'file', None)
        file = utf8String(file)
        files.addContent(file)
    for directory in rpmObj.usefulDirs():
        files = format.newChild(None, 'file', None)
        directory = utf8String(directory)
        files.addContent(directory)
        files.newProp('type', 'dir')
    for directory in rpmObj.usefulGhosts():
        files = format.newChild(None, 'file', None)
        directory = utf8String(directory)
        files.addContent(directory)
        files.newProp('type', 'ghost')

return pkgNode
    
def fileListXML(doc, node, rpmObj):
    pkg = node.newChild(None, 'package', None)
    pkg.newProp('pkgid', rpmObj.pkgid)
    pkg.newProp('name', rpmObj.tagByName('name'))
    pkg.newProp('arch', rpmObj.arch())
    version = pkg.newChild(None, 'version', None)
    if str(rpmObj.epoch()):
        version.newProp('epoch', str(rpmObj.epoch()))
    version.newProp('ver', str(rpmObj.tagByName('version')))
    version.newProp('rel', str(rpmObj.tagByName('release')))
    for file in rpmObj.filenames:
        files = pkg.newChild(None, 'file', None)
        file = utf8String(file)
        files.addContent(file)
    for directory in rpmObj.dirnames:
        files = pkg.newChild(None, 'file', None)
        directory = utf8String(directory)
        files.addContent(directory)
        files.newProp('type', 'dir')
    for ghost in rpmObj.ghostnames:
        files = pkg.newChild(None, 'file', None)
        ghost = utf8String(ghost)
        files.addContent(ghost)
        files.newProp('type', 'ghost')
    return pkg
       
def otherXML(doc, node, rpmObj):
    pkg = node.newChild(None, 'package', None)
    pkg.newProp('pkgid', rpmObj.pkgid)
    pkg.newProp('name', rpmObj.tagByName('name'))
    pkg.newProp('arch', rpmObj.arch())
    version = pkg.newChild(None, 'version', None)
    if str(rpmObj.epoch()):
        version.newProp('epoch', str(rpmObj.epoch()))
    version.newProp('ver', str(rpmObj.tagByName('version')))
    version.newProp('rel', str(rpmObj.tagByName('release')))
    clogs = rpmObj.changelogLists()
    for (name, time, text) in clogs:
        clog = pkg.newChild(None, 'changelog', None)
        clog.addContent(utf8String(text))
        clog.newProp('author', utf8String(name))
        clog.newProp('date', str(time))
    return pkg
    
def repoXML(node, cmds):
    """generate the repomd.xml file that stores the info on the other files"""
    sumtype = cmds['sumtype']
    workfiles = [(cmds['otherfile'], 'other',),
                 (cmds['filelistsfile'], 'filelists'),
                 (cmds['primaryfile'], 'primary')]
    repoid='garbageid'
    
    repopath = os.path.join(cmds['outputdir'], cmds['tempdir'])
    
    if cmds['database']:
        try:
            dbversion = str(sqlitecachec.DBVERSION)
        except AttributeError:
            dbversion = '9'
        rp = sqlitecachec.RepodataParserSqlite(repopath, repoid, None)

for (file, ftype) in workfiles:
        complete_path = os.path.join(repopath, file)
        
        zfo = _gzipOpen(complete_path)
        uncsum = getChecksum(sumtype, zfo)
        zfo.close()
        csum = getChecksum(sumtype, complete_path)
        timestamp = os.stat(complete_path)[8]
        
        db_csums = {}
        db_compressed_sums = {}
        
        if cmds['database']:
            if ftype == 'primary':
                rp.getPrimary(complete_path, csum)
                            
            elif ftype == 'filelists':
                rp.getFilelists(complete_path, csum)
                
            elif ftype == 'other':
                rp.getOtherdata(complete_path, csum)

tmp_result_name = '%s.xml.gz.sqlite' % ftype
            tmp_result_path = os.path.join(repopath, tmp_result_name)
            good_name = '%s.sqlite' % ftype
            resultpath = os.path.join(repopath, good_name)
            
            # rename from silly name to not silly name
            os.rename(tmp_result_path, resultpath)
            compressed_name = '%s.bz2' % good_name
            result_compressed = os.path.join(repopath, compressed_name)
            db_csums[ftype] = getChecksum(sumtype, resultpath)
            
            # compress the files
            bzipFile(resultpath, result_compressed)
            # csum the compressed file
            db_compressed_sums[ftype] = getChecksum(sumtype, result_compressed)
            # remove the uncompressed file
            os.unlink(resultpath)

# timestamp the compressed file
            db_timestamp = os.stat(result_compressed)[8]
            
            # add this data as a section to the repomdxml
            db_data_type = '%s_db' % ftype
            data = node.newChild(None, 'data', None)
            data.newProp('type', db_data_type)
            location = data.newChild(None, 'location', None)
            if cmds['baseurl'] is not None:
                location.newProp('xml:base', cmds['baseurl'])
            
            location.newProp('href', os.path.join(cmds['finaldir'], compressed_name))
            checksum = data.newChild(None, 'checksum', db_compressed_sums[ftype])
            checksum.newProp('type', sumtype)
            db_tstamp = data.newChild(None, 'timestamp', str(db_timestamp))
            unchecksum = data.newChild(None, 'open-checksum', db_csums[ftype])
            unchecksum.newProp('type', sumtype)
            database_version = data.newChild(None, 'database_version', dbversion)
            
            
        data = node.newChild(None, 'data', None)
        data.newProp('type', ftype)
        location = data.newChild(None, 'location', None)
        if cmds['baseurl'] is not None:
            location.newProp('xml:base', cmds['baseurl'])
        location.newProp('href', os.path.join(cmds['finaldir'], file))
        checksum = data.newChild(None, 'checksum', csum)
        checksum.newProp('type', sumtype)
        timestamp = data.newChild(None, 'timestamp', str(timestamp))
        unchecksum = data.newChild(None, 'open-checksum', uncsum)
        unchecksum.newProp('type', sumtype)
    
    # if we've got a group file then checksum it once and be done
    if cmds['groupfile'] is not None:
        grpfile = cmds['groupfile']
        timestamp = os.stat(grpfile)[8]
        sfile = os.path.basename(grpfile)
        fo = open(grpfile, 'r')
        output = open(os.path.join(cmds['outputdir'], cmds['tempdir'], sfile), 'w')
        output.write(fo.read())
        output.close()
        fo.seek(0)
        csum = getChecksum(sumtype, fo)
        fo.close()

data = node.newChild(None, 'data', None)
        data.newProp('type', 'group')
        location = data.newChild(None, 'location', None)
        if cmds['baseurl'] is not None:
            location.newProp('xml:base', cmds['baseurl'])
        location.newProp('href', os.path.join(cmds['finaldir'], sfile))
        checksum = data.newChild(None, 'checksum', csum)
        checksum.newProp('type', sumtype)
        timestamp = data.newChild(None, 'timestamp', str(timestamp))