2013年2月20日 星期三

用奇摩字典做查詢 - ydict.py

#!/usr/bin/env python
# coding=UTF-8
# Chen Wen 
# Web Site http://code.google.com/p/ydict/
# Blog : http://chenpc.csie.in

import getopt
import sys
import string
import httplib, urllib,string,sys
from HTMLParser import HTMLParser
from optparse import OptionParser
import locale
from codecs import EncodedFile
import shelve,os
import random
import ConfigParser
from multiprocessing import Process, Queue, Pool


version="ydict 1.2.5"
red="\33[31;1m"
lindigo="\33[36;1m"
indigo="\33[36m"
green="\33[32m"
yellow="\33[33;1m"
blue="\33[34;1m"
org="\33[0m"
light="\33[0;1m"
learn=0
browsemode=False
database=0
voicedata = ""
playback = ""
prefetch = ""



if os.access(os.getenv("HOME")+"/.ydict.db", os.F_OK):
    db = shelve.open(os.getenv("HOME")+"/.ydict.db","c")
    learn = 1
    database = 1
try:
    config = ConfigParser.ConfigParser()
    config.readfp(open(os.getenv("HOME")+"/.ydictrc"))
    voicedata = config.get('ydict', 'voicedata')
    playback = config.get('ydict', 'playback')
    prefetch = config.get('ydict', 'prefetch')
except :
    pass

if prefetch == "":
    prefetch = "5"
    
        
def cleanup():
    if database:
        db.sync()
    exit()

def importfile(file):
    fp = open(file)
    for line in fp:
        newword=line.split(" ")[0]
        newword=newword.split("\n")[0]
        if db.has_key(newword) == 0:
            db[newword]=0
    print "File imported!"
def result(count, total):
    if total == 0:
        print ""
        exit()
    print "\nScore: ",int(count),"/",int(total),"(",count/total,")"
    exit()

def seckey(x):
        return x[1]
       
def savefile(k, url):
    if voicedata == "":
        return
    filename = "'"+voicedata+"/"+k[0]+"/"+k+".mp3'"
    if not os.access(filename, os.F_OK):        
        if not os.access(voicedata+"/"+k[0], os.F_OK):
            os.system("mkdir "+voicedata+"/"+k[0])
        os.system("rm -f "+voicedata+"/voice.tmp")
        os.system("wget -q "+url+" -O "+filename+"voice.tmp");
        os.system("mv "+filename+"voice.tmp "+filename)
        
def speek(k):
    if voicedata == "" or playback == "" or k == "":
        return
    
    filename = voicedata+"/"+k[0]+"/"+""+k+".mp3"
    if not os.access(filename, os.F_OK):
        dict(k, m_pron)
    else:                
        os.system(playback+" '"+filename+"' >/dev/null 2>&1 &")

def answers(iq, oq):
    while(1):
        key = iq.get()
        (result,k) = dict(key, 1)
        oq.put([key, result])
        
        
def browse():
    wordlist = db.items()
    size=len(wordlist)
    totalcount = 0.0
    right = 0.0
    lookup = Queue(maxsize = string.atoi(prefetch))
    answer = Queue(maxsize = string.atoi(prefetch))
    lookuper = Process( target=answers, args=(lookup, answer) )
    lookuper.daemon = True
    lookuper.start()

    if size <= 1:
        print "There must be at least two words needed in the list."
        exit()
    i = 0
    while(1) :
        while(not lookup.full()):
            k=wordlist[i][0]
            i = i + 1
            if i >= size:
                i = 0
            k=k.lower()
            lookup.put(k)
        (k, result) = answer.get()
        if not db.has_key(k):
            continue
        print result
        speek(k)                
        
        try:
            word = raw_input("(d) Delete, (enter) Continue: ")
            if word == "d":
                del db[k]                                
                wordlist=db.items()
                size=len(wordlist)
                if size <= 1:
                    print "There must be at least two words needed in the list."
                    exit()                    
        except KeyboardInterrupt:
            result(right,totalcount)            
        
def wordlearn():
    wordlist = db.items()
    wordlist.sort(key=seckey)
    size=len(wordlist)
    totalcount = 0.0
    right = 0.0
    lookup = Queue(maxsize = 5)
    answer = Queue(maxsize = 5)
    lookuper = Process( target=answers, args=(lookup, answer) )
    lookuper.daemon = True
    lookuper.start()

    if size <= 1:
        print "There must be at least two words needed in the list."
        exit()

    while(1) :
        while(not lookup.full()):
            k=wordlist[int(random.triangular(0, size-1, 0))][0]
            k=k.lower()
            lookup.put(k)
        (k, result) = answer.get()
        if not db.has_key(k):
            continue
        if browsemode == False:
            print result.replace(k, "####").replace(k.upper(), "####").replace(k[0].swapcase()+k[1:].lower(),"####")
        else:
            print result
        speek(k)
        word = raw_input("Input :")                
                
        if word == k.lower():
            print "Bingo!"
            right+=1
            db[k]+=1
            if db[k] >= 100:
                db[k]=100
        else:
            db[k]-=3
            if db[k] < 0:
                db[k]=0
            print "WRONG! Correct answer is : ",k
            try:
                word = raw_input("(d) Delete, (enter) Continue: ")
                if word == "d":
                    del db[k]                                
                    wordlist=db.items()
                    wordlist.sort(key=seckey)
                    size=len(wordlist)
                    if size <= 1:
                        print "There must be at least two words needed in the list."
                        exit()                    
            except KeyboardInterrupt:
                result(right,totalcount)
            

        totalcount+=1
        if totalcount % (int(size/4)+1) == 0:            
            wordlist=db.items()
            wordlist.sort(key=seckey)
def wordlist():
    wordlist = db.items()
    wordlist.sort(key=seckey)
    for k,v in wordlist:
        print k,v

class MyHTMLParser(HTMLParser):
    redirect=0
    pron=True
    def __init__(self):
        self.show=0
        self.prefix=""
        self.postfix=org
        self.entry=1
        self.desc=0
        self.result=[]
        self.learn=learn
        self.learnword=0
        self.chinese=0
        self.mp3url=""
        self.key=""

    def handle_starttag(self, tag, attrs):
        if self.redirect == 1 and tag == "strong":
            self.show=1
            self.prefix="Spell Check: ["+yellow
            self.postfix=org+"]"
        
        elif tag == "span" and len(attrs)==0:
            if self.pron == True:
                self.show=1
                self.prefix=""
        elif tag == "div" and len(attrs)==0:
            if self.pron == True:
                self.show=1
                self.prefix=""
        elif tag == "div" and len(attrs)!=0:
            if attrs[0][1]=="pronunciation" and self.pron==True:
                self.result.append(blue)
            elif attrs[0][1]=="caption":
                self.show=1
                self.prefix=red
            elif attrs[0][1]=="theme clr":
                self.show=1
                if self.chinese == 0:
                    self.learnword=1
                    self.prefix="["+light
                    self.postfix=org+"]"
            elif attrs[0][1]=="description":
                if self.desc != 0:
                    self.show=1
                    self.prefix="  "+org
                self.desc+=1
        elif tag == "p" and len(attrs)!=0:
            if attrs[0][1] == "example":
                self.show=1
                self.prefix="    "+indigo
            elif attrs[0][1] == "interpret":
                self.show=1
                self.prefix="  "+org+str(self.entry)+"."
                self.entry+=1

    def handle_data(self,data):
        if self.show == 1:
            self.result.append(self.prefix+data+self.postfix+"\n")
            self.show=0
            self.prefix=""
            self.postfix=""
        if(self.learn == 1 and self.learnword == 1):
            self.key = data.lower()
            if(db.has_key(self.key) == 0 and self.key.isalpha() ):
                db[self.key] = 0
            self.learnword=0
            savefile(self.key, self.mp3url)

    def handle_endtag(self, tag):
        if tag == "div":
            self.result.append(org)

def htmlspcahrs(content):
    content=content.replace("&","&")
    content=content.replace("'","\'")
    content=content.replace(""","\"")
    content=content.replace(">",">")
    content=content.replace("<","<")
    content=content.replace("","")
    content=content.replace("","")
    content=content.replace("",lindigo)
    content=content.replace("",org+indigo)
    content=content.replace("\n","\n    "+green)
    return content


def http_postconn(word):
    yahoourl="tw.dictionary.yahoo.com"
    params = urllib.urlencode({'p': word ,'ei' : 'UTF-8'})
    return urllib.urlopen("http://%s/search" % yahoourl, params)

def dict(word,pron):
    output=""
    word=word.strip()
    if len(word) <= 0:
        return output, ""
    r1=http_postconn(word)
    data1 = r1.read()
    p=MyHTMLParser()
    p.redirect=0
    p.chinese=0
    p.pron=pron
    
    try:        
        index5 = string.index(data1, '{"audio":"')        
        index6 = string.index(data1,'"}};var noFlashPlayerMessage')        
        p.mp3url = data1[index5+10:index6]
    except ValueError:
        p.mp3url = ""
        pass
    
    try:
        data1=data1[:string.index(data1,'

Online Resources

')] except ValueError: return output, word try: index1=string.index(data1,"?冽銝閬") p.redirect=1 except ValueError: try: index1=string.index(data1,"敺甇?摮?曆??唳閬?鞈???") if db.has_key(word): del db[word] return yellow+"Not Found!"+org+"\n", word except ValueError: index1=string.index(data1,"摮??") try: index3=string.index(data1,"隞乩???") index4=string.index(data1," ?典??訾葉????) print yellow+"隞乩???"+light+data1[index3+18:index4]+yellow+" ?典??訾葉????+org except ValueError: pass try: string.index(data1,"?潮") string.index(data1,"瘜券") p.chinese=1 except ValueError: pass data=data1[index1:] p.reset() data=htmlspcahrs(data) p.feed(data) for s in p.result: output+=s return output, p.key if __name__ == '__main__': parser = OptionParser(usage = "Usage: ydict [options] word1 word2 ......") parser.add_option("-s", "--step", dest="step", help="one step mode.",default=False,action="store_true") parser.add_option("-p", "--pron", dest="pron", help="disable pronounce.",default=True,action="store_false") parser.add_option("-u", "--utf8", dest="utf8", help="force utf-8 encoding.",default=False,action="store_true") parser.add_option("-b", "--big5", dest="big5", help="force big5 encoding.",default=False,action="store_true") parser.add_option("-w", "--word", dest="oneword", type="string" , help="only one word.",action="store") parser.add_option("-c", "--nocolor", dest="nocolor", help="force no color code",default=False, action="store_true") parser.add_option("-v", "--version", dest="version", help="show version.",default=False,action="store_true") parser.add_option("-d", "--database", dest="database", help="initial database.",default=False,action="store_true") parser.add_option("-l", "--learn", dest="learnmode", help="start learning mode.",default=False,action="store_true") parser.add_option("-B", "--browse", dest="browsemode", help="start browse mode.",default=False,action="store_true") parser.add_option("-a", "--list", dest="listall", help="list all word in list.",default=False,action="store_true") parser.add_option("-i", "--import", dest="importfile", type="string", help="import a word list",default=False,action="store") (options, args) = parser.parse_args() m_pron=options.pron (lang , enc)=locale.getdefaultlocale() if options.nocolor: red="" lindigo="" indigo="" green="" yellow="" blue="" org="" light="" if options.importfile: importfile(options.importfile) cleanup() if options.version == True: print version cleanup() if options.utf8 == True: enc="utf8" elif options.big5 == True: enc="big5" else: enc="utf8" if options.browsemode == True: try: browse() except KeyboardInterrupt: print "" cleanup() except EOFError: print "" cleanup() if options.utf8 == options.big5 ==True: print "Can not select utf-8 and big5 at the same time" cleanup() if enc == 'big5': m_pron=False if options.oneword: (result, k) = dict(options.oneword,m_pron) speek(k) result=unicode(result,'utf8') result=result.encode(enc) print result cleanup() if len(args) >= 1: for w in args: (result, k)=dict(w,m_pron) speek(k) result=unicode(result,'utf8') result=result.encode(enc) print result cleanup() if options.learnmode: try: wordlearn() except KeyboardInterrupt: print "" cleanup() except EOFError: print "" cleanup() cleanup() elif options.listall: wordlist() cleanup() if options.database == True: db=shelve.open(os.getenv("HOME")+"/.ydict.db","c") db.close() exit() while(1): try: word=raw_input(" ") except KeyboardInterrupt: print "" cleanup() except EOFError: print "" cleanup() (result,k)=dict(word,m_pron) speek(k) result=unicode(result,'utf8') result=result.encode(enc) print result if options.step == True: cleanup()

沒有留言: