import sys import os from subprocess import Popen, PIPE import re from twisted.plugin import IPlugin from tmat.tokenizer import TokenizerPlugin, TokenizerError from tmat.myparser import ParserPlugin, ParserError from zope.interface import Interface, Attribute, implements from xml.dom.minidom import getDOMImplementation, parseString from xmlrpclib import Server,Error,Binary # Restrict this when I know what I need from codecs import lookup import socket Encode,Decode,Reader,Writer=lookup("utf-8") class XMLRPCTokenizer(object): implements(IPlugin, TokenizerPlugin) running = False displayName = "Web-Based Tokenizer" preferences = {"uri":"string", "Command":"string", "Tokenize Flags":"string", "Get Version Flags":"string", "Timeout":"int"} values = {} def setValue(self,key, val): self.values[key]=val def initialize(self): self.server=Server(self.values['uri']) timeout=self.values['Timeout'] socket.setdefaulttimeout(float(timeout)) def tokenize(self, text): return self.server.tokenize(self.values["Command"],self.values["Tokenize Flags"],text) def start(self): pass def stop(self): pass def version(self): return self.server.version(self.values["Command"], self.values["Get Version Flags"]) def uninitialize(self): pass class XMLRPCParser(object): implements(IPlugin, ParserPlugin) running = False displayName = "Web-Based Parser Plugin" preferences = {"uri":"string", "Finite State Machine":"string", "Timeout":"int", "Tokens-per-call":"int"} values = {} def setValue(self,key, val): self.values[key]=val def initialize(self): self.server=Server(self.values['uri']) timeout=int(self.values['Timeout']) socket.setdefaulttimeout(float(timeout)) def start(self): return self.server.start() def stop(self): return self.server.stop() def version(self): return self.server.version() def uninitialize(self): return self.server.uninitialize() def parse(self, wordforms): lst=list(wordforms) split=self.values["Tokens-per-call"] fsm=self.values["Finite State Machine"] dicts=[] if not(split): split=len(lst) try: for i in range((len(lst)+split-1)/split): dicts.append(self.server.parse(fsm,lst[split*i:min(split*(i+1),len(lst))])) except: raise ParserError ## Construct this the slow way for now retDict={} #for d in dicts: ## A dictionary returned by server.parse # for key in d: ## key = the initial word, encoded # k=key # retDict[k]=[] # for thisDict in d[key]: ## thisDict = dict of morph, gloss values # m=thisDict["morpheme"] # g=thisDict["gloss"] # retDict[k].append({"morpheme":m,"gloss":g}) #return retDict return dict([item for d in dicts for item in d.iteritems()]) x = XMLRPCTokenizer() y = XMLRPCParser()