import os import re from twisted.plugin import IPlugin from tmat.tokenizer import TokenizerPlugin from tmat.myparser import ParserPlugin from zope.interface import Interface, Attribute, implements from sets import Set from xml.dom.minidom import getDOMImplementation import random class WhitespaceTokenizer(object): implements(IPlugin, TokenizerPlugin) running = False displayName = "Whitespace Tokenizer" preferences = {} values = {} def __init__(self): self.running=True def splitText(self, text): return text.split(" ") def tokenize(self, text): splitWords=self.splitText(text) i=0 retList=[] for token in splitWords: i=text.find(token,i) retList.append((i,i+len(token),True)) i += len(token) return retList def start(self): self.running=True def stop(self): self.running=False def version(self): return "I don't have a version yet" class DefaultParser(object): implements(IPlugin, ParserPlugin) running = False displayName = "Default Parser" preferences = {} values = {} def initialize(self): pass def start(self): self.running=True def stop(self): self.running=False def version(self): return "" def uninitialize(self): pass def parse(self, wordforms, progressBar=None): random.seed() retDict={} for word in wordforms: retDict[word]=[] for i in range(random.randint(1,5)): retDict[word].append({"morpheme":"%s-%s" % (word,i),"gloss":"%s/%s" % (word,i)}) return retDict x = WhitespaceTokenizer() y = DefaultParser()