From: http://www.minilinux.net/node/27
这个程序使用google translate来辅助翻译po文件。需要注意的是,现在机器的翻译水平并不高,人工智能还有很长很长的路要走。这个程序的主要作用是节省一部分英语不是那么好的网友查字典的时间,而指望它翻译出来的句子能直接被读懂是不现实的。
这个程序的使用方法很简单,它只有一个参数,就是需要翻译的po文件名称,翻译后的结果直接输出到stdout。如果文件名是-
,那么程序将从stdin读入po文件。
这个程序的一大特色是可以支持多个代理服务器轮流向google translate发出请求,以免被google封锁,哈哈。配置代理服务器只要修改一下程序开头的proxies数组就可以了。其中None代表不使用代理服务器。
程序中间还有一段是设置输出po文件头的部分,主要是版权申明和reportbug字段可能要根据需要修改。
另外这个程序对bash脚本中字符串翻译做了一些优化,比如$ABC这样在字符串中的变量名是不会被翻译的。具体可以看translate_fixed函数。
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
import httplib
import urllib
import urllib2
import traceback
import re
import random
import time
proxies = [
"proxy1:port",
"proxy2:port",
None,
]
def translate(text):
addr = "http://www.google.com/translate_t?sl=en&tl=zh-CN"
proxies.insert(0, proxies.pop())
for proxy in proxies[:]:
try:
req = urllib2.Request(addr)
req.add_header("User-Agent", "Mozilla/5.0")
req.add_data(urllib.urlencode({"text": text}))
if proxy:
req.set_proxy(proxy, "http")
trans = urllib2.urlopen(req).read()
trans = re.search("<div id=result_box dir=\"ltr\">(.*?)</div>",
trans).group(1)
trans = re.sub("&#(\d+);",
lambda s: chr(int(s.group(1))),
trans)
trans = re.sub("&([a-z]+);",
lambda s: {"lt":"<", "gt":">", "amp":"&"}[s.group(1)],
trans)
return trans
except:
proxies.remove(proxy)
print >>sys.stderr, "Proxy: %s" % proxy
traceback.print_exc()
return None
def translate_fixed(text):
# signature = random.randrange(1E20, 1E21)
notrans = []
def replace(match):
notrans.append(match.group(0))
return " 0.%d68065175210" % (len(notrans) - 1)
text = re.sub("\${[\w_]+}|\$[\w_]+", replace, text)
text = re.sub("\\\\\"|\\\\$|\\\\\\\\n|\\\\t", replace, text)
text = re.sub("\\\\", replace, text)
text = re.sub("[A-Z]{2,100}", replace, text)
text = re.sub("<\w+>|</\w+>", replace, text)
text = re.sub("puppy(?i)", replace, text)
trans = translate(text)
for i in range(len(notrans)):
# print "re.sub",
# print "%dx" % (signature + i), notrans[i], trans
trans = re.sub("0 ?.%d68065175210" % i, lambda x: notrans[i], trans)
return trans
random.seed()
if len(sys.argv) != 2:
print "Usage: " + sys.argv[0] + " filename.pot"
sys.exit(1)
if sys.argv[1] == "-":
f = sys.stdin
else:
f = open(sys.argv[1])
print """# 中文Puppy Linux开发者之家.
# This file is distributed under GPL.
#"""
print """#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: %(program)s\\n"
"Report-Msgid-Bugs-To: %(reportbug)s\\n"
"POT-Creation-Date: %(date)s\\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
"Last-Translator: Google Translate\\n"
"Language-Team: Chinese\\n"
"MIME-Version: 1.0\\n"
"Content-Type: text/plain; charset=utf-8\\n"
"Content-Transfer-Encoding: 8bit\\n"
""" % {"program": sys.argv[1],
"reportbug": "laborer@126.com",
"date": time.strftime("%Y-%m-%d %H:%M%z")}
for line in f:
if not line.rstrip():
break;
trans = []
buf = ""
skip = False
for line in f:
line = line.rstrip()
if skip:
skip = line;
continue
if line.startswith("#,"):
print line + ", fuzzy"
elif line.startswith("#"):
print line
elif line.startswith("msgid "):
print "msgid",
line = line[6:]
elif line.startswith("msgstr "):
print "msgstr",
if buf:
trans.append(translate_fixed(buf))
buf = ""
for s in trans:
print "\"%s\"" % s
print
trans = []
skip = True
if line.startswith("\""):
print line
buf += line[1:-1]
if buf.endswith("\\n"):
trans.append(translate_fixed(buf[:-2])+"\\n")
buf = ""