#!/usr/bin/env python
# coding=UTF-8
# Chen Wen
# Web Site http://code.google.com/p/ydict/
# Blog : http://chenpc.csie.in
import getopt
import sys
import string
import httplib, urllib,string,sys
from HTMLParser import HTMLParser
from optparse import OptionParser
import locale
from codecs import EncodedFile
import shelve,os
import random
import ConfigParser
from multiprocessing import Process, Queue, Pool
version="ydict 1.2.5"
red="\33[31;1m"
lindigo="\33[36;1m"
indigo="\33[36m"
green="\33[32m"
yellow="\33[33;1m"
blue="\33[34;1m"
org="\33[0m"
light="\33[0;1m"
learn=0
browsemode=False
database=0
voicedata = ""
playback = ""
prefetch = ""
if os.access(os.getenv("HOME")+"/.ydict.db", os.F_OK):
db = shelve.open(os.getenv("HOME")+"/.ydict.db","c")
learn = 1
database = 1
try:
config = ConfigParser.ConfigParser()
config.readfp(open(os.getenv("HOME")+"/.ydictrc"))
voicedata = config.get('ydict', 'voicedata')
playback = config.get('ydict', 'playback')
prefetch = config.get('ydict', 'prefetch')
except :
pass
if prefetch == "":
prefetch = "5"
def cleanup():
if database:
db.sync()
exit()
def importfile(file):
fp = open(file)
for line in fp:
newword=line.split(" ")[0]
newword=newword.split("\n")[0]
if db.has_key(newword) == 0:
db[newword]=0
print "File imported!"
def result(count, total):
if total == 0:
print ""
exit()
print "\nScore: ",int(count),"/",int(total),"(",count/total,")"
exit()
def seckey(x):
return x[1]
def savefile(k, url):
if voicedata == "":
return
filename = "'"+voicedata+"/"+k[0]+"/"+k+".mp3'"
if not os.access(filename, os.F_OK):
if not os.access(voicedata+"/"+k[0], os.F_OK):
os.system("mkdir "+voicedata+"/"+k[0])
os.system("rm -f "+voicedata+"/voice.tmp")
os.system("wget -q "+url+" -O "+filename+"voice.tmp");
os.system("mv "+filename+"voice.tmp "+filename)
def speek(k):
if voicedata == "" or playback == "" or k == "":
return
filename = voicedata+"/"+k[0]+"/"+""+k+".mp3"
if not os.access(filename, os.F_OK):
dict(k, m_pron)
else:
os.system(playback+" '"+filename+"' >/dev/null 2>&1 &")
def answers(iq, oq):
while(1):
key = iq.get()
(result,k) = dict(key, 1)
oq.put([key, result])
def browse():
wordlist = db.items()
size=len(wordlist)
totalcount = 0.0
right = 0.0
lookup = Queue(maxsize = string.atoi(prefetch))
answer = Queue(maxsize = string.atoi(prefetch))
lookuper = Process( target=answers, args=(lookup, answer) )
lookuper.daemon = True
lookuper.start()
if size <= 1:
print "There must be at least two words needed in the list."
exit()
i = 0
while(1) :
while(not lookup.full()):
k=wordlist[i][0]
i = i + 1
if i >= size:
i = 0
k=k.lower()
lookup.put(k)
(k, result) = answer.get()
if not db.has_key(k):
continue
print result
speek(k)
try:
word = raw_input("(d) Delete, (enter) Continue: ")
if word == "d":
del db[k]
wordlist=db.items()
size=len(wordlist)
if size <= 1:
print "There must be at least two words needed in the list."
exit()
except KeyboardInterrupt:
result(right,totalcount)
def wordlearn():
wordlist = db.items()
wordlist.sort(key=seckey)
size=len(wordlist)
totalcount = 0.0
right = 0.0
lookup = Queue(maxsize = 5)
answer = Queue(maxsize = 5)
lookuper = Process( target=answers, args=(lookup, answer) )
lookuper.daemon = True
lookuper.start()
if size <= 1:
print "There must be at least two words needed in the list."
exit()
while(1) :
while(not lookup.full()):
k=wordlist[int(random.triangular(0, size-1, 0))][0]
k=k.lower()
lookup.put(k)
(k, result) = answer.get()
if not db.has_key(k):
continue
if browsemode == False:
print result.replace(k, "####").replace(k.upper(), "####").replace(k[0].swapcase()+k[1:].lower(),"####")
else:
print result
speek(k)
word = raw_input("Input :")
if word == k.lower():
print "Bingo!"
right+=1
db[k]+=1
if db[k] >= 100:
db[k]=100
else:
db[k]-=3
if db[k] < 0:
db[k]=0
print "WRONG! Correct answer is : ",k
try:
word = raw_input("(d) Delete, (enter) Continue: ")
if word == "d":
del db[k]
wordlist=db.items()
wordlist.sort(key=seckey)
size=len(wordlist)
if size <= 1:
print "There must be at least two words needed in the list."
exit()
except KeyboardInterrupt:
result(right,totalcount)
totalcount+=1
if totalcount % (int(size/4)+1) == 0:
wordlist=db.items()
wordlist.sort(key=seckey)
def wordlist():
wordlist = db.items()
wordlist.sort(key=seckey)
for k,v in wordlist:
print k,v
class MyHTMLParser(HTMLParser):
redirect=0
pron=True
def __init__(self):
self.show=0
self.prefix=""
self.postfix=org
self.entry=1
self.desc=0
self.result=[]
self.learn=learn
self.learnword=0
self.chinese=0
self.mp3url=""
self.key=""
def handle_starttag(self, tag, attrs):
if self.redirect == 1 and tag == "strong":
self.show=1
self.prefix="Spell Check: ["+yellow
self.postfix=org+"]"
elif tag == "span" and len(attrs)==0:
if self.pron == True:
self.show=1
self.prefix=""
elif tag == "div" and len(attrs)==0:
if self.pron == True:
self.show=1
self.prefix=""
elif tag == "div" and len(attrs)!=0:
if attrs[0][1]=="pronunciation" and self.pron==True:
self.result.append(blue)
elif attrs[0][1]=="caption":
self.show=1
self.prefix=red
elif attrs[0][1]=="theme clr":
self.show=1
if self.chinese == 0:
self.learnword=1
self.prefix="["+light
self.postfix=org+"]"
elif attrs[0][1]=="description":
if self.desc != 0:
self.show=1
self.prefix=" "+org
self.desc+=1
elif tag == "p" and len(attrs)!=0:
if attrs[0][1] == "example":
self.show=1
self.prefix=" "+indigo
elif attrs[0][1] == "interpret":
self.show=1
self.prefix=" "+org+str(self.entry)+"."
self.entry+=1
def handle_data(self,data):
if self.show == 1:
self.result.append(self.prefix+data+self.postfix+"\n")
self.show=0
self.prefix=""
self.postfix=""
if(self.learn == 1 and self.learnword == 1):
self.key = data.lower()
if(db.has_key(self.key) == 0 and self.key.isalpha() ):
db[self.key] = 0
self.learnword=0
savefile(self.key, self.mp3url)
def handle_endtag(self, tag):
if tag == "div":
self.result.append(org)
def htmlspcahrs(content):
content=content.replace("&","&")
content=content.replace("'","\'")
content=content.replace(""","\"")
content=content.replace(">",">")
content=content.replace("<","<")
content=content.replace("","")
content=content.replace("","")
content=content.replace("",lindigo)
content=content.replace("",org+indigo)
content=content.replace("\n","\n "+green)
return content
def http_postconn(word):
yahoourl="tw.dictionary.yahoo.com"
params = urllib.urlencode({'p': word ,'ei' : 'UTF-8'})
return urllib.urlopen("http://%s/search" % yahoourl, params)
def dict(word,pron):
output=""
word=word.strip()
if len(word) <= 0:
return output, ""
r1=http_postconn(word)
data1 = r1.read()
p=MyHTMLParser()
p.redirect=0
p.chinese=0
p.pron=pron
try:
index5 = string.index(data1, '{"audio":"')
index6 = string.index(data1,'"}};var noFlashPlayerMessage')
p.mp3url = data1[index5+10:index6]
except ValueError:
p.mp3url = ""
pass
try:
data1=data1[:string.index(data1,'Online Resources
')]
except ValueError:
return output, word
try:
index1=string.index(data1,"?冽銝閬")
p.redirect=1
except ValueError:
try:
index1=string.index(data1,"敺甇?摮?曆??唳閬?鞈???")
if db.has_key(word):
del db[word]
return yellow+"Not Found!"+org+"\n", word
except ValueError:
index1=string.index(data1,"摮??")
try:
index3=string.index(data1,"隞乩???")
index4=string.index(data1," ?典??訾葉????)
print yellow+"隞乩???"+light+data1[index3+18:index4]+yellow+" ?典??訾葉????+org
except ValueError:
pass
try:
string.index(data1,"?潮")
string.index(data1,"瘜券")
p.chinese=1
except ValueError:
pass
data=data1[index1:]
p.reset()
data=htmlspcahrs(data)
p.feed(data)
for s in p.result:
output+=s
return output, p.key
if __name__ == '__main__':
parser = OptionParser(usage = "Usage: ydict [options] word1 word2 ......")
parser.add_option("-s", "--step", dest="step", help="one step mode.",default=False,action="store_true")
parser.add_option("-p", "--pron", dest="pron", help="disable pronounce.",default=True,action="store_false")
parser.add_option("-u", "--utf8", dest="utf8", help="force utf-8 encoding.",default=False,action="store_true")
parser.add_option("-b", "--big5", dest="big5", help="force big5 encoding.",default=False,action="store_true")
parser.add_option("-w", "--word", dest="oneword", type="string" , help="only one word.",action="store")
parser.add_option("-c", "--nocolor", dest="nocolor", help="force no color code",default=False, action="store_true")
parser.add_option("-v", "--version", dest="version", help="show version.",default=False,action="store_true")
parser.add_option("-d", "--database", dest="database", help="initial database.",default=False,action="store_true")
parser.add_option("-l", "--learn", dest="learnmode", help="start learning mode.",default=False,action="store_true")
parser.add_option("-B", "--browse", dest="browsemode", help="start browse mode.",default=False,action="store_true")
parser.add_option("-a", "--list", dest="listall", help="list all word in list.",default=False,action="store_true")
parser.add_option("-i", "--import", dest="importfile", type="string", help="import a word list",default=False,action="store")
(options, args) = parser.parse_args()
m_pron=options.pron
(lang , enc)=locale.getdefaultlocale()
if options.nocolor:
red=""
lindigo=""
indigo=""
green=""
yellow=""
blue=""
org=""
light=""
if options.importfile:
importfile(options.importfile)
cleanup()
if options.version == True:
print version
cleanup()
if options.utf8 == True:
enc="utf8"
elif options.big5 == True:
enc="big5"
else:
enc="utf8"
if options.browsemode == True:
try:
browse()
except KeyboardInterrupt:
print ""
cleanup()
except EOFError:
print ""
cleanup()
if options.utf8 == options.big5 ==True:
print "Can not select utf-8 and big5 at the same time"
cleanup()
if enc == 'big5':
m_pron=False
if options.oneword:
(result, k) = dict(options.oneword,m_pron)
speek(k)
result=unicode(result,'utf8')
result=result.encode(enc)
print result
cleanup()
if len(args) >= 1:
for w in args:
(result, k)=dict(w,m_pron)
speek(k)
result=unicode(result,'utf8')
result=result.encode(enc)
print result
cleanup()
if options.learnmode:
try:
wordlearn()
except KeyboardInterrupt:
print ""
cleanup()
except EOFError:
print ""
cleanup()
cleanup()
elif options.listall:
wordlist()
cleanup()
if options.database == True:
db=shelve.open(os.getenv("HOME")+"/.ydict.db","c")
db.close()
exit()
while(1):
try:
word=raw_input(" ")
except KeyboardInterrupt:
print ""
cleanup()
except EOFError:
print ""
cleanup()
(result,k)=dict(word,m_pron)
speek(k)
result=unicode(result,'utf8')
result=result.encode(enc)
print result
if options.step == True:
cleanup()
2013年2月20日 星期三
用奇摩字典做查詢 - ydict.py
Labels:
Python
嗨,大家好!我是一位資訊人、畫畫人、科技人和行銷人。定位自己為網路觀察家,永遠保持好奇心與熱情,學習跨領域新事物,希望最終能成為一個全方位的人。喜歡探索新技術和創意,並將其應用於工作和生活中。也喜歡畫畫、旅行、閱讀和寫作。希望我的部落格可以為您帶來啟發和樂趣,並與您分享我的經驗和見解。謝謝您的訪問,期待與您交流!
訂閱:
張貼留言 (Atom)
沒有留言:
張貼留言