import sys import os from subprocess import Popen, PIPE import re from twisted.plugin import IPlugin from tmat.tokenizer import TokenizerPlugin from tmat.myparser import ParserPlugin from zope.interface import Interface, Attribute, implements from xml.dom.minidom import getDOMImplementation, parseString from codecs import lookup class CommandLineTokenizer(object): implements(IPlugin, TokenizerPlugin) running = False displayName = "Command-Line (Local) Tokenizer" preferences = {"Command":"string", "Tokenize Flags":"string", "Get Version Flags":"string", "Timeout":"int"} values = {} pipe=None def setValue(self,key, val): self.values[key]=val def do(self, command, returnPipe=False): if command: cmd="%s\n" % command self.pipe=Popen(cmd,shell=True,bufsize=0,stdin=PIPE,stdout=PIPE,stderr=PIPE) if returnPipe: return self.pipe else: return (self.pipe is not None) else: return -1 def initialize(self): pass def uninitialize(self): if self.pipe and self.pipe.poll(): self.pipe.stdin.close() try: pid=self.pipe.pid os.kill(pid,9) except: return -1 return 0 def makeCommand(self, flags): command=self.values["Command"] args = [command,] + flags.split(" ") return " ".join(args) def tokenize(self, text,enc="utf-8"): Encode,Decode,Reader,Writer=lookup(enc) text=Encode(text)[0] retText, retErr = self.communicate("Tokenize", text) retList=[] i=0 for token in [Decode(x)[0].strip() for x in retText.splitlines()]: j=Decode(text)[0].find(token,i) if j <> i: retList.append((i,j,False)) i=j retList.append((i,i+len(token),True)) i += len(token) return retList def communicate(self, flagName, data=None): self.do(self.makeCommand(self.values["%s Flags" % flagName])) self.start() if data: retText,retErr=self.pipe.communicate(data.replace("\n"," ")) else: retText=retErr="" self.stop() self.uninitialize() return (retText,retErr) def start(self): self.running=True def stop(self): self.running=False def version(self): return self.communicate("Get Version")[0] class CommandLineParser(object): implements(IPlugin, ParserPlugin) running = False displayName = "Command Line Parser" preferences = {"Initialization Command":"string", "Start Command":"string", "Stop Command":"string", "Parse Command":"string", "Get Version Command":"string"} values = {} def setValue(self,key, val): self.values[key]=val def initialize(self): try: initCommand="%s\n" % self.values["Initialization Command"] except: initCommand="" self.pipe=Popen(initCommand,shell=True,bufsize=0,stdin=PIPE,stdout=PIPE,stderr=PIPE) def start(self): if not(self.pipe): self.initialize() self.communicate("Start Command") self.running=True def stop(self): self.communicate("Stop Command") self.running=False def communicate(self, key): self.pipe.stdin.write("%s\n" % self.values[key]) self.pipe.stdin.flush() return self.pipe.stdout.readline() def version(self): return self.communicate("Get Version Command") def uninitialize(self): self.communicate("Uninitialize Command") def parse(self, wordforms, progressBar=None): impl=getDOMImplementation() myDom=impl.createDocument(None,"parserInput",None) topNode=myDom.documentElement for word in wordforms: wordNode=myDom.createElement("wordform") wordNode.setAttribute("form",word) topNode.appendChild(wordNode) self.input.write(myDom.toxml()) outDom=parseString(self.output.read()) topOut=outDom.getElementsByTagName("parserOutput") retDict={} for parseNode in topOut.getElementsByTagName("parses"): word=parseNode.getAttribute("form") retDict[word]=[] for parse in parseNode.getElementsByTagName("parse"): morphemes=parse.GetAttribute("morphemes") glosses=parse.GetAttribute("glosses") retDict[word].append({"morpheme":morphemes, "gloss":glosses}) return retDict def initialize(self): try: initCommand="%s\n" % self.values["Initialization Command"] except: initCommand="" self.pipe=Popen(initCommand,shell=True,bufsize=0,stdin=PIPE,stdout=PIPE,stderr=PIPE) def parseToken(self, token): return [(x, self.getGloss(x)) for x in self.getUnderlyingForms(token)] x = CommandLineTokenizer() y = CommandLineParser()