#!/usr/bin/env python
# coding=UTF-8
# Chen Wen
# Web Site http://code.google.com/p/ydict/
# Blog : http://chenpc.csie.in
import getopt
import sys
import string
import httplib, urllib,string,sys
from HTMLParser import HTMLParser
from optparse import OptionParser
import locale
from codecs import EncodedFile
import shelve,os
import random
import ConfigParser
from multiprocessing import Process, Queue, Pool
version="ydict 1.2.5"
red="\33[31;1m"
lindigo="\33[36;1m"
indigo="\33[36m"
green="\33[32m"
yellow="\33[33;1m"
blue="\33[34;1m"
org="\33[0m"
light="\33[0;1m"
learn=0
browsemode=False
database=0
voicedata = ""
playback = ""
prefetch = ""
if os.access(os.getenv("HOME")+"/.ydict.db", os.F_OK):
db = shelve.open(os.getenv("HOME")+"/.ydict.db","c")
learn = 1
database = 1
try:
config = ConfigParser.ConfigParser()
config.readfp(open(os.getenv("HOME")+"/.ydictrc"))
voicedata = config.get('ydict', 'voicedata')
playback = config.get('ydict', 'playback')
prefetch = config.get('ydict', 'prefetch')
except :
pass
if prefetch == "":
prefetch = "5"
def cleanup():
if database:
db.sync()
exit()
def importfile(file):
fp = open(file)
for line in fp:
newword=line.split(" ")[0]
newword=newword.split("\n")[0]
if db.has_key(newword) == 0:
db[newword]=0
print "File imported!"
def result(count, total):
if total == 0:
print ""
exit()
print "\nScore: ",int(count),"/",int(total),"(",count/total,")"
exit()
def seckey(x):
return x[1]
def savefile(k, url):
if voicedata == "":
return
filename = "'"+voicedata+"/"+k[0]+"/"+k+".mp3'"
if not os.access(filename, os.F_OK):
if not os.access(voicedata+"/"+k[0], os.F_OK):
os.system("mkdir "+voicedata+"/"+k[0])
os.system("rm -f "+voicedata+"/voice.tmp")
os.system("wget -q "+url+" -O "+filename+"voice.tmp");
os.system("mv "+filename+"voice.tmp "+filename)
def speek(k):
if voicedata == "" or playback == "" or k == "":
return
filename = voicedata+"/"+k[0]+"/"+""+k+".mp3"
if not os.access(filename, os.F_OK):
dict(k, m_pron)
else:
os.system(playback+" '"+filename+"' >/dev/null 2>&1 &")
def answers(iq, oq):
while(1):
key = iq.get()
(result,k) = dict(key, 1)
oq.put([key, result])
def browse():
wordlist = db.items()
size=len(wordlist)
totalcount = 0.0
right = 0.0
lookup = Queue(maxsize = string.atoi(prefetch))
answer = Queue(maxsize = string.atoi(prefetch))
lookuper = Process( target=answers, args=(lookup, answer) )
lookuper.daemon = True
lookuper.start()
if size <= 1:
print "There must be at least two words needed in the list."
exit()
i = 0
while(1) :
while(not lookup.full()):
k=wordlist[i][0]
i = i + 1
if i >= size:
i = 0
k=k.lower()
lookup.put(k)
(k, result) = answer.get()
if not db.has_key(k):
continue
print result
speek(k)
try:
word = raw_input("(d) Delete, (enter) Continue: ")
if word == "d":
del db[k]
wordlist=db.items()
size=len(wordlist)
if size <= 1:
print "There must be at least two words needed in the list."
exit()
except KeyboardInterrupt:
result(right,totalcount)
def wordlearn():
wordlist = db.items()
wordlist.sort(key=seckey)
size=len(wordlist)
totalcount = 0.0
right = 0.0
lookup = Queue(maxsize = 5)
answer = Queue(maxsize = 5)
lookuper = Process( target=answers, args=(lookup, answer) )
lookuper.daemon = True
lookuper.start()
if size <= 1:
print "There must be at least two words needed in the list."
exit()
while(1) :
while(not lookup.full()):
k=wordlist[int(random.triangular(0, size-1, 0))][0]
k=k.lower()
lookup.put(k)
(k, result) = answer.get()
if not db.has_key(k):
continue
if browsemode == False:
print result.replace(k, "####").replace(k.upper(), "####").replace(k[0].swapcase()+k[1:].lower(),"####")
else:
print result
speek(k)
word = raw_input("Input :")
if word == k.lower():
print "Bingo!"
right+=1
db[k]+=1
if db[k] >= 100:
db[k]=100
else:
db[k]-=3
if db[k] < 0:
db[k]=0
print "WRONG! Correct answer is : ",k
try:
word = raw_input("(d) Delete, (enter) Continue: ")
if word == "d":
del db[k]
wordlist=db.items()
wordlist.sort(key=seckey)
size=len(wordlist)
if size <= 1:
print "There must be at least two words needed in the list."
exit()
except KeyboardInterrupt:
result(right,totalcount)
totalcount+=1
if totalcount % (int(size/4)+1) == 0:
wordlist=db.items()
wordlist.sort(key=seckey)
def wordlist():
wordlist = db.items()
wordlist.sort(key=seckey)
for k,v in wordlist:
print k,v
class MyHTMLParser(HTMLParser):
redirect=0
pron=True
def __init__(self):
self.show=0
self.prefix=""
self.postfix=org
self.entry=1
self.desc=0
self.result=[]
self.learn=learn
self.learnword=0
self.chinese=0
self.mp3url=""
self.key=""
def handle_starttag(self, tag, attrs):
if self.redirect == 1 and tag == "strong":
self.show=1
self.prefix="Spell Check: ["+yellow
self.postfix=org+"]"
elif tag == "span" and len(attrs)==0:
if self.pron == True:
self.show=1
self.prefix=""
elif tag == "div" and len(attrs)==0:
if self.pron == True:
self.show=1
self.prefix=""
elif tag == "div" and len(attrs)!=0:
if attrs[0][1]=="pronunciation" and self.pron==True:
self.result.append(blue)
elif attrs[0][1]=="caption":
self.show=1
self.prefix=red
elif attrs[0][1]=="theme clr":
self.show=1
if self.chinese == 0:
self.learnword=1
self.prefix="["+light
self.postfix=org+"]"
elif attrs[0][1]=="description":
if self.desc != 0:
self.show=1
self.prefix=" "+org
self.desc+=1
elif tag == "p" and len(attrs)!=0:
if attrs[0][1] == "example":
self.show=1
self.prefix=" "+indigo
elif attrs[0][1] == "interpret":
self.show=1
self.prefix=" "+org+str(self.entry)+"."
self.entry+=1
def handle_data(self,data):
if self.show == 1:
self.result.append(self.prefix+data+self.postfix+"\n")
self.show=0
self.prefix=""
self.postfix=""
if(self.learn == 1 and self.learnword == 1):
self.key = data.lower()
if(db.has_key(self.key) == 0 and self.key.isalpha() ):
db[self.key] = 0
self.learnword=0
savefile(self.key, self.mp3url)
def handle_endtag(self, tag):
if tag == "div":
self.result.append(org)
def htmlspcahrs(content):
content=content.replace("&","&")
content=content.replace("'","\'")
content=content.replace(""","\"")
content=content.replace(">",">")
content=content.replace("<","<")
content=content.replace("","")
content=content.replace("","")
content=content.replace("",lindigo)
content=content.replace("",org+indigo)
content=content.replace("\n","\n "+green)
return content
def http_postconn(word):
yahoourl="tw.dictionary.yahoo.com"
params = urllib.urlencode({'p': word ,'ei' : 'UTF-8'})
return urllib.urlopen("http://%s/search" % yahoourl, params)
def dict(word,pron):
output=""
word=word.strip()
if len(word) <= 0:
return output, ""
r1=http_postconn(word)
data1 = r1.read()
p=MyHTMLParser()
p.redirect=0
p.chinese=0
p.pron=pron
try:
index5 = string.index(data1, '{"audio":"')
index6 = string.index(data1,'"}};var noFlashPlayerMessage')
p.mp3url = data1[index5+10:index6]
except ValueError:
p.mp3url = ""
pass
try:
data1=data1[:string.index(data1,'Online Resources
')]
except ValueError:
return output, word
try:
index1=string.index(data1,"?冽銝閬")
p.redirect=1
except ValueError:
try:
index1=string.index(data1,"敺甇?摮?曆??唳閬?鞈???")
if db.has_key(word):
del db[word]
return yellow+"Not Found!"+org+"\n", word
except ValueError:
index1=string.index(data1,"摮??")
try:
index3=string.index(data1,"隞乩???")
index4=string.index(data1," ?典??訾葉????)
print yellow+"隞乩???"+light+data1[index3+18:index4]+yellow+" ?典??訾葉????+org
except ValueError:
pass
try:
string.index(data1,"?潮")
string.index(data1,"瘜券")
p.chinese=1
except ValueError:
pass
data=data1[index1:]
p.reset()
data=htmlspcahrs(data)
p.feed(data)
for s in p.result:
output+=s
return output, p.key
if __name__ == '__main__':
parser = OptionParser(usage = "Usage: ydict [options] word1 word2 ......")
parser.add_option("-s", "--step", dest="step", help="one step mode.",default=False,action="store_true")
parser.add_option("-p", "--pron", dest="pron", help="disable pronounce.",default=True,action="store_false")
parser.add_option("-u", "--utf8", dest="utf8", help="force utf-8 encoding.",default=False,action="store_true")
parser.add_option("-b", "--big5", dest="big5", help="force big5 encoding.",default=False,action="store_true")
parser.add_option("-w", "--word", dest="oneword", type="string" , help="only one word.",action="store")
parser.add_option("-c", "--nocolor", dest="nocolor", help="force no color code",default=False, action="store_true")
parser.add_option("-v", "--version", dest="version", help="show version.",default=False,action="store_true")
parser.add_option("-d", "--database", dest="database", help="initial database.",default=False,action="store_true")
parser.add_option("-l", "--learn", dest="learnmode", help="start learning mode.",default=False,action="store_true")
parser.add_option("-B", "--browse", dest="browsemode", help="start browse mode.",default=False,action="store_true")
parser.add_option("-a", "--list", dest="listall", help="list all word in list.",default=False,action="store_true")
parser.add_option("-i", "--import", dest="importfile", type="string", help="import a word list",default=False,action="store")
(options, args) = parser.parse_args()
m_pron=options.pron
(lang , enc)=locale.getdefaultlocale()
if options.nocolor:
red=""
lindigo=""
indigo=""
green=""
yellow=""
blue=""
org=""
light=""
if options.importfile:
importfile(options.importfile)
cleanup()
if options.version == True:
print version
cleanup()
if options.utf8 == True:
enc="utf8"
elif options.big5 == True:
enc="big5"
else:
enc="utf8"
if options.browsemode == True:
try:
browse()
except KeyboardInterrupt:
print ""
cleanup()
except EOFError:
print ""
cleanup()
if options.utf8 == options.big5 ==True:
print "Can not select utf-8 and big5 at the same time"
cleanup()
if enc == 'big5':
m_pron=False
if options.oneword:
(result, k) = dict(options.oneword,m_pron)
speek(k)
result=unicode(result,'utf8')
result=result.encode(enc)
print result
cleanup()
if len(args) >= 1:
for w in args:
(result, k)=dict(w,m_pron)
speek(k)
result=unicode(result,'utf8')
result=result.encode(enc)
print result
cleanup()
if options.learnmode:
try:
wordlearn()
except KeyboardInterrupt:
print ""
cleanup()
except EOFError:
print ""
cleanup()
cleanup()
elif options.listall:
wordlist()
cleanup()
if options.database == True:
db=shelve.open(os.getenv("HOME")+"/.ydict.db","c")
db.close()
exit()
while(1):
try:
word=raw_input(" ")
except KeyboardInterrupt:
print ""
cleanup()
except EOFError:
print ""
cleanup()
(result,k)=dict(word,m_pron)
speek(k)
result=unicode(result,'utf8')
result=result.encode(enc)
print result
if options.step == True:
cleanup()
2013年2月20日 星期三
用奇摩字典做查詢 - ydict.py
Labels:
Python
在文字的世界裡尋找心靈的共鳴,遠山藍以溫柔的筆觸分享書籍的力量與生活的智慧。無論是細膩的書評、深刻的人生感悟,還是技術與創新的新奇發現,每篇文章都是一次內心的療癒旅程。希望透過閱讀,帶領讀者在忙碌的生活中找到一片寧靜與啟發。讓我們一起,在書香中遇見更好的自己!
訂閱:
張貼留言 (Atom)
沒有留言:
張貼留言