#!/usr/bin/python # coding: utf-8 import os, sys # for daemonize import SocketServer, urllib, socket # for service import re # utility functions def isContentOnly(seq, aset): for c in seq: if c not in aset: return False pass return True def isContentAny(seq, aset): for c in seq: if c in aset: return True pass return False def buildTranslater(*args, **kwds): tmp = dict(*args, **kwds) rtrns = re.compile('|'.join(map(re.escape, tmp))) def translateOne(match): return tmp[match.group(0)] def translate(text): return rtrns.sub(translateOne, text) return translate class SocialSKKServer(SocketServer.ThreadingTCPServer): allow_reuse_address = True daemon_threads = True pass class SocialSKKRequestHandler(SocketServer.StreamRequestHandler): VERSION = u'PySocialSKKServ0.2 ' SERVER = u'http://www.social-ime.com:80/' CLIENT_END = u'0' CLIENT_REQUEST = u'1' CLIENT_VERSION = u'2' CLIENT_HOST = u'3' CLIENT_SERVER_COMPLETION = u'4' SERVER_ERROR = u'0' SERVER_FOUND = u'1' SERVER_NOT_FOUND = u'4' SERVER_FULL = u'9' COMBUFSIZE = 1024 SERVER_CHARSET = 'EUC-JP' CLIENT_CHARSET = 'EUC-JP' debug = True specialbefore = u'/;#' specialafter = { u'/': u'\\057', u';': u'\\073', u'#': u'\\043', } specialxlat = None def debug_method(self, arg): if self.debug: import locale code = locale.getdefaultlocale()[1] try: try: sys.stderr.write('"%s"\n' % arg.decode(self.CLIENT_CHARSET).encode(code)) except UnicodeDecodeError: try: sys.stderr.write('"%s"\n' % arg.decode(self.SERVER_CHARSET).encode(code)) except UnicodeDecodeError: sys.stderr.write('"%r"\n' % arg) pass pass except UnicodeEncodeError: sys.stderr.write('"%s"\n' % arg.encode(code)) pass pass return # str.isalnumだと「ひらがな」なども「アルファベット」扱いなので自作 def isalnum(self, str): return isContentOnly(str, u'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ') def isHiragana(self, str): return isContentOnly(str, u'あいうえおぁぃぅぇぉかきくけこがぎぐげごさしすせそざじずぜぞたちつてとだぢづでどっなにぬねのはひふへほぱぴぷぺぽばびぶべぼまみむめもやゆよゃゅょらりるれろわゐゑをん') def isKatakana(self, str): return isContentOnly(str, u'アイウエオァィゥェォカキクケコガギグゲゴサシスセソザジズゼゾタチツテトダヂヅデドッナニヌネノハヒフヘホパピプペポバビブベボマミムメモヤユヨャュョラリルレロワヰヱヲン') def isKana(self, str): return isContentOnly(str, u'あいうえおぁぃぅぇぉかきくけこがぎぐげごさしすせそざじずぜぞたちつてとだぢづでどっなにぬねのはひふへほぱぴぷぺぽばびぶべぼまみむめもやゆよゃゅょらりるれろわゐゑをんアイウエオァィゥェォカキクケコガギグゲゴサシスセソザジズゼゾタチツテトダヂヅデドッナニヌネノハヒフヘホパピプペポバビブベボマミムメモヤユヨャュョラリルレロワヰヱヲン') # 変換の統計 nRequest = 0 nError = 0 nOK = 0 nFail = 0 nPredict = 0 nDict = 0 # Social IME APIを呼び出して変換 # 結果は配列で返す def convert(self, arg, predict): self.debug_method(u'convert %s, %s' % (arg, predict)) self.nRequest = self.nRequest + 1 earg = arg.encode(self.SERVER_CHARSET) if predict: # 予測変換 params = urllib.urlencode({'string': earg, 'charset': self.SERVER_CHARSET}) request = self.SERVER+'api2/predict.php?%s' % params self.nPredict = self.nPredict + 1 pass else: # 文節で変換 params = urllib.urlencode({'string': earg, 'resize[0]': '+'+str(len(arg)), 'charset': self.SERVER_CHARSET}) request = self.SERVER+'api/?%s' % params self.nDict = self.nDict + 1 pass try: f = urllib.urlopen(request) result = f.read().strip() uresult = result.decode(self.SERVER_CHARSET) array = uresult.split(u'\t') # 特殊文字対策 newarray = [] for x in array: if isContentAny(x, self.specialbefore): r = u'(concat "%s")' % self.specialxlat(x) pass else: r = x pass newarray.append(r) pass ret = newarray except IOError, e: self.nError = self.nError + 1 ret = [] pass return ret def do_0(self, arg): # CLIENT_END self.debug_method('CLIENT_END:' + arg) self.close_connection = True return arg def do_1(self, arg): # CLIENT_REQUEST predict = False # 送り仮名だったら予測変換 # 全てASCII文字か日本語文字だったら文節変換 if self.isalnum(arg): # 英単語っぽい predict = False else: if arg[-1] in [u'a',u'b',u'c',u'd',u'e',u'f',u'g',u'h',u'i',u'j',u'k',u'l',u'm',u'n',u'o',u'p',u'q',u'r',u's',u't',u'u',u'v',u'w',u'x',u'y',u'z', u'*']: # EBDIC backendでのワイルドカード変換と送り仮名ありのとき予測変換 predict = True # ワイルドカード文字は削除 if arg[-1] == u'*': arg = arg[:-1]; pass pass else: # 送りなし predict = False pass pass result = self.convert(arg, predict) if result: ret = self.SERVER_FOUND + u'/' + u'/'.join(result) + u'/\n' self.nOK = self.nOK + 1 else: ret = self.SERVER_NOT_FOUND + arg self.nFail = self.nFail + 1 pass return ret def do_2(self, arg): # CLIENT_VERSION return self.VERSION def do_3(self, arg): # CLIENT_HOST hostname = socket.gethostname() ipaddr = socket.gethostbyname(hostname) ret = u'%s:%s: ' % (hostname, ipaddr) return ret def do_4(self, arg): # CLIENT_SERVER_COMPLETION lookup = False if arg[-1] == u'~': # skk-lookup方式 arg = arg[:-1] lookup = True pass # まずは予測変換させてみる result = self.convert(arg, True) newresult = [] if lookup: newresult = result else: for x in result: if self.isHiragana(x): newresult.append(x) pass pass pass if newresult: ret = self.SERVER_FOUND + u' ' + u' '.join(newresult) + u' \n' self.nOK = self.nOK + 1 else: ret = self.SERVER_NOT_FOUND + arg self.nFail = self.nFail + 1 pass return ret def do_UNKNOWN(self, arg): sys.stdout.write('unsupported command (%r)' % arg) return self.SERVER_ERROR close_connection = False def handle_one_request(self): raw_request = self.connection.recv(self.COMBUFSIZE) if raw_request: try: urequest = raw_request.decode(self.CLIENT_CHARSET) mname = u'do_' + urequest[0] if not hasattr(self, mname): self.do_UNKNOWN(urequest) return method = getattr(self, mname) urequest = urequest.strip() uret = method(urequest[1:]) ret = uret.encode(self.CLIENT_CHARSET) except UnicodeDecodeError, e: uret = self.SERVER_ERROR ret = uret.encode(self.CLIENT_CHARSET) pass self.wfile.write(ret) pass else: self.close_connection = True pass return def handle(self): sys.stdout.write('new connection %s %s\n' % (self.client_address, self.server)) while not self.close_connection: self.handle_one_request() pass sys.stdout.write('%s request: %d(%d/%d) error: %d ok: %d fail: %d\n' % (self.client_address, self.nRequest, self.nDict, self.nPredict, self.nError, self.nOK, self.nFail)) sys.stdout.write('connection closed %s %s\n' % (self.client_address, self.server)) return def setup(self): SocketServer.StreamRequestHandler.setup(self) self.specialxlat = buildTranslater(self.specialafter) pass def finish(self): SocketServer.StreamRequestHandler.finish(self) pass pass # end of class def daemonize(stdin='/dev/null', stdout='/dev/null', stderr='/dev/null', run='/dev/null'): print stdin, stdout, stderr, run # 1st fork try: pid = os.fork() if pid > 0: sys.exit(0) pass pass except OSError, (errno, errstr): sys.stderr.write('1st fork failed: %s(%d)\n' % (errstr, errno)) sys.exit(1) pass # separate from parent process os.chdir('/') os.umask(0) os.setsid() # 2nd fork try: pid = os.fork() if pid > 0: sys.exit(0) pass file(run, 'w').write(str(os.getpid())) pass except OSError, (errno, errstr): sys.stderr.write('2nd fork failed: %s(%d)\n' % (errstr, errno)) sys.exit(1) pass # daemonize complete for f in sys.stdout, sys.stderr: f.flush() pass si = open(stdin, 'r') so = open(stdout, 'a+', 0) se = open(stderr, 'a+', 0) os.dup2(si.fileno(), sys.stdin.fileno()) os.dup2(so.fileno(), sys.stdout.fileno()) os.dup2(se.fileno(), sys.stderr.fileno()) return if __name__ == '__main__': if sys.platform == 'win32': # Windowsでデーモン化って出来る? pass else: daemonize(stdout='/tmp/skkserv.log', stderr='/tmp/skkserv.err', run='/tmp/skkserv.run') pass sys.stdout.write('initializing PySocialSKKServ\n') server = SocialSKKServer(('0.0.0.0', 5511), SocialSKKRequestHandler) sys.stdout.write('PySocialSKKServ service start\n') server.serve_forever() pass