程式印象: 用奇摩字典做查詢

#!/usr/bin/env python
# coding=UTF-8
# Chen Wen 
# Web Site http://code.google.com/p/ydict/
# Blog : http://chenpc.csie.in

import getopt
import sys
import string
import httplib, urllib,string,sys
from HTMLParser import HTMLParser
from optparse import OptionParser
import locale
from codecs import EncodedFile
import shelve,os
import random
import ConfigParser
from multiprocessing import Process, Queue, Pool


version="ydict 1.2.5"
red="\33[31;1m"
lindigo="\33[36;1m"
indigo="\33[36m"
green="\33[32m"
yellow="\33[33;1m"
blue="\33[34;1m"
org="\33[0m"
light="\33[0;1m"
learn=0
browsemode=False
database=0
voicedata = ""
playback = ""
prefetch = ""



if os.access(os.getenv("HOME")+"/.ydict.db", os.F_OK):
    db = shelve.open(os.getenv("HOME")+"/.ydict.db","c")
    learn = 1
    database = 1
try:
    config = ConfigParser.ConfigParser()
    config.readfp(open(os.getenv("HOME")+"/.ydictrc"))
    voicedata = config.get('ydict', 'voicedata')
    playback = config.get('ydict', 'playback')
    prefetch = config.get('ydict', 'prefetch')
except :
    pass

if prefetch == "":
    prefetch = "5"
    
        
def cleanup():
    if database:
        db.sync()
    exit()

def importfile(file):
    fp = open(file)
    for line in fp:
        newword=line.split(" ")[0]
        newword=newword.split("\n")[0]
        if db.has_key(newword) == 0:
            db[newword]=0
    print "File imported!"
def result(count, total):
    if total == 0:
        print ""
        exit()
    print "\nScore: ",int(count),"/",int(total),"(",count/total,")"
    exit()

def seckey(x):
        return x[1]
       
def savefile(k, url):
    if voicedata == "":
        return
    filename = "'"+voicedata+"/"+k[0]+"/"+k+".mp3'"
    if not os.access(filename, os.F_OK):        
        if not os.access(voicedata+"/"+k[0], os.F_OK):
            os.system("mkdir "+voicedata+"/"+k[0])
        os.system("rm -f "+voicedata+"/voice.tmp")
        os.system("wget -q "+url+" -O "+filename+"voice.tmp");
        os.system("mv "+filename+"voice.tmp "+filename)
        
def speek(k):
    if voicedata == "" or playback == "" or k == "":
        return
    
    filename = voicedata+"/"+k[0]+"/"+""+k+".mp3"
    if not os.access(filename, os.F_OK):
        dict(k, m_pron)
    else:                
        os.system(playback+" '"+filename+"' >/dev/null 2>&1 &")

def answers(iq, oq):
    while(1):
        key = iq.get()
        (result,k) = dict(key, 1)
        oq.put([key, result])
        
        
def browse():
    wordlist = db.items()
    size=len(wordlist)
    totalcount = 0.0
    right = 0.0
    lookup = Queue(maxsize = string.atoi(prefetch))
    answer = Queue(maxsize = string.atoi(prefetch))
    lookuper = Process( target=answers, args=(lookup, answer) )
    lookuper.daemon = True
    lookuper.start()

    if size <= 1:
        print "There must be at least two words needed in the list."
        exit()
    i = 0
    while(1) :
        while(not lookup.full()):
            k=wordlist[i][0]
            i = i + 1
            if i >= size:
                i = 0
            k=k.lower()
            lookup.put(k)
        (k, result) = answer.get()
        if not db.has_key(k):
            continue
        print result
        speek(k)                
        
        try:
            word = raw_input("(d) Delete, (enter) Continue: ")
            if word == "d":
                del db[k]                                
                wordlist=db.items()
                size=len(wordlist)
                if size <= 1:
                    print "There must be at least two words needed in the list."
                    exit()                    
        except KeyboardInterrupt:
            result(right,totalcount)            
        
def wordlearn():
    wordlist = db.items()
    wordlist.sort(key=seckey)
    size=len(wordlist)
    totalcount = 0.0
    right = 0.0
    lookup = Queue(maxsize = 5)
    answer = Queue(maxsize = 5)
    lookuper = Process( target=answers, args=(lookup, answer) )
    lookuper.daemon = True
    lookuper.start()

    if size <= 1:
        print "There must be at least two words needed in the list."
        exit()

    while(1) :
        while(not lookup.full()):
            k=wordlist[int(random.triangular(0, size-1, 0))][0]
            k=k.lower()
            lookup.put(k)
        (k, result) = answer.get()
        if not db.has_key(k):
            continue
        if browsemode == False:
            print result.replace(k, "####").replace(k.upper(), "####").replace(k[0].swapcase()+k[1:].lower(),"####")
        else:
            print result
        speek(k)
        word = raw_input("Input :")                
                
        if word == k.lower():
            print "Bingo!"
            right+=1
            db[k]+=1
            if db[k] >= 100:
                db[k]=100
        else:
            db[k]-=3
            if db[k] < 0:
                db[k]=0
            print "WRONG! Correct answer is : ",k
            try:
                word = raw_input("(d) Delete, (enter) Continue: ")
                if word == "d":
                    del db[k]                                
                    wordlist=db.items()
                    wordlist.sort(key=seckey)
                    size=len(wordlist)
                    if size <= 1:
                        print "There must be at least two words needed in the list."
                        exit()                    
            except KeyboardInterrupt:
                result(right,totalcount)
            

        totalcount+=1
        if totalcount % (int(size/4)+1) == 0:            
            wordlist=db.items()
            wordlist.sort(key=seckey)
def wordlist():
    wordlist = db.items()
    wordlist.sort(key=seckey)
    for k,v in wordlist:
        print k,v

class MyHTMLParser(HTMLParser):
    redirect=0
    pron=True
    def __init__(self):
        self.show=0
        self.prefix=""
        self.postfix=org
        self.entry=1
        self.desc=0
        self.result=[]
        self.learn=learn
        self.learnword=0
        self.chinese=0
        self.mp3url=""
        self.key=""

    def handle_starttag(self, tag, attrs):
        if self.redirect == 1 and tag == "strong":
            self.show=1
            self.prefix="Spell Check: ["+yellow
            self.postfix=org+"]"
        
        elif tag == "span" and len(attrs)==0:
            if self.pron == True:
                self.show=1
                self.prefix=""
        elif tag == "div" and len(attrs)==0:
            if self.pron == True:
                self.show=1
                self.prefix=""
        elif tag == "div" and len(attrs)!=0:
            if attrs[0][1]=="pronunciation" and self.pron==True:
                self.result.append(blue)
            elif attrs[0][1]=="caption":
                self.show=1
                self.prefix=red
            elif attrs[0][1]=="theme clr":
                self.show=1
                if self.chinese == 0:
                    self.learnword=1
                    self.prefix="["+light
                    self.postfix=org+"]"
            elif attrs[0][1]=="description":
                if self.desc != 0:
                    self.show=1
                    self.prefix="  "+org
                self.desc+=1
        elif tag == "p" and len(attrs)!=0:
            if attrs[0][1] == "example":
                self.show=1
                self.prefix="    "+indigo
            elif attrs[0][1] == "interpret":
                self.show=1
                self.prefix="  "+org+str(self.entry)+"."
                self.entry+=1

    def handle_data(self,data):
        if self.show == 1:
            self.result.append(self.prefix+data+self.postfix+"\n")
            self.show=0
            self.prefix=""
            self.postfix=""
        if(self.learn == 1 and self.learnword == 1):
            self.key = data.lower()
            if(db.has_key(self.key) == 0 and self.key.isalpha() ):
                db[self.key] = 0
            self.learnword=0
            savefile(self.key, self.mp3url)

    def handle_endtag(self, tag):
        if tag == "div":
            self.result.append(org)

def htmlspcahrs(content):
    content=content.replace("&","&")
    content=content.replace("'","\'")
    content=content.replace(""","\"")
    content=content.replace(">",">")
    content=content.replace("<","<")
    content=content.replace("","")
    content=content.replace("","")
    content=content.replace("",lindigo)
    content=content.replace("",org+indigo)
    content=content.replace("\n","\n    "+green)
    return content


def http_postconn(word):
    yahoourl="tw.dictionary.yahoo.com"
    params = urllib.urlencode({'p': word ,'ei' : 'UTF-8'})
    return urllib.urlopen("http://%s/search" % yahoourl, params)

def dict(word,pron):
    output=""
    word=word.strip()
    if len(word) <= 0:
        return output, ""
    r1=http_postconn(word)
    data1 = r1.read()
    p=MyHTMLParser()
    p.redirect=0
    p.chinese=0
    p.pron=pron
    
    try:        
        index5 = string.index(data1, '{"audio":"')        
        index6 = string.index(data1,'"}};var noFlashPlayerMessage')        
        p.mp3url = data1[index5+10:index6]
    except ValueError:
        p.mp3url = ""
        pass
    
    try:
        data1=data1[:string.index(data1,'Online Resources')]
    except ValueError:
        return output, word
        
    try:
        index1=string.index(data1,"?冽銝閬")
        p.redirect=1
    except ValueError:
        try:
            index1=string.index(data1,"敺甇?摮?曆??唳閬?鞈???")
            if db.has_key(word):
                del db[word]
            return yellow+"Not Found!"+org+"\n", word
        except ValueError:
            index1=string.index(data1,"摮??")
    
    try:
        index3=string.index(data1,"隞乩???")
        index4=string.index(data1," ?典??訾葉????)
        print yellow+"隞乩???"+light+data1[index3+18:index4]+yellow+" ?典??訾葉????+org
    except ValueError:
        pass
    try:
        string.index(data1,"?潮")
        string.index(data1,"瘜券")
        p.chinese=1
    except ValueError:
        pass
        
    data=data1[index1:]
    p.reset()
    data=htmlspcahrs(data)
    p.feed(data)
    for s in p.result:
        output+=s
    return output, p.key


if __name__ == '__main__':
    parser = OptionParser(usage = "Usage: ydict [options] word1 word2 ......")
    parser.add_option("-s", "--step", dest="step", help="one step mode.",default=False,action="store_true")
    parser.add_option("-p", "--pron", dest="pron", help="disable pronounce.",default=True,action="store_false")
    parser.add_option("-u", "--utf8", dest="utf8", help="force utf-8 encoding.",default=False,action="store_true")
    parser.add_option("-b", "--big5", dest="big5", help="force big5 encoding.",default=False,action="store_true")
    parser.add_option("-w", "--word", dest="oneword", type="string" , help="only one word.",action="store")
    parser.add_option("-c", "--nocolor", dest="nocolor", help="force no color code",default=False, action="store_true")
    parser.add_option("-v", "--version", dest="version", help="show version.",default=False,action="store_true")
    parser.add_option("-d", "--database", dest="database", help="initial database.",default=False,action="store_true")
    parser.add_option("-l", "--learn", dest="learnmode", help="start learning mode.",default=False,action="store_true")
    parser.add_option("-B", "--browse", dest="browsemode", help="start browse mode.",default=False,action="store_true")
    parser.add_option("-a", "--list", dest="listall", help="list all word in list.",default=False,action="store_true")
    parser.add_option("-i", "--import", dest="importfile", type="string", help="import a word list",default=False,action="store")

    (options, args) = parser.parse_args()
    m_pron=options.pron
    (lang , enc)=locale.getdefaultlocale()

    if options.nocolor:
        red=""
        lindigo=""
        indigo=""
        green=""
        yellow=""
        blue=""
        org=""
        light=""

    if options.importfile:
        importfile(options.importfile)
        cleanup()
        
    if options.version == True:
        print version
        cleanup()
    if options.utf8 == True:
        enc="utf8"
    elif options.big5 == True:
        enc="big5"
    else:
        enc="utf8"
        
    if options.browsemode == True:
        try:
            browse()
        except KeyboardInterrupt:
            print ""
            cleanup()
        except EOFError:
            print ""
            cleanup()

    if options.utf8 == options.big5 ==True:
        print "Can not select utf-8 and big5 at the same time"
        cleanup()
    if enc == 'big5':
        m_pron=False
    
    if options.oneword:
        (result, k) = dict(options.oneword,m_pron)
        speek(k)
        result=unicode(result,'utf8')
        result=result.encode(enc)
        print result
        cleanup()
        
    if len(args) >= 1:
        for w in args:
            (result, k)=dict(w,m_pron)
            speek(k)
            result=unicode(result,'utf8')
            result=result.encode(enc)
            print result
        cleanup()
        
    if options.learnmode:
        try:
            wordlearn()
        except KeyboardInterrupt:
            print ""
            cleanup()
        except EOFError:
            print ""
            cleanup()
        cleanup()
    elif options.listall:
        wordlist()
        cleanup()
    if options.database == True:
            db=shelve.open(os.getenv("HOME")+"/.ydict.db","c")
            db.close()
            exit()
            
    while(1):
        try:
            word=raw_input(" ")
        except KeyboardInterrupt:
            print ""
            cleanup()
        except EOFError:
            print ""
            cleanup()

        (result,k)=dict(word,m_pron)
        speek(k)        
        result=unicode(result,'utf8')
        result=result.encode(enc)
        print result

        if options.step == True:
            cleanup()
程式印象

2013年2月20日星期三

用奇摩字典做查詢 - ydict.py

Online Resources

沒有留言:

2013年2月20日 星期三

用奇摩字典做查詢 - ydict.py

Online Resources

沒有留言:

2013年2月20日星期三