本文通过Python3+pyqt5实现了python Qt GUI 快速编程的19章的页面索引器应用程序例子。
/home/yrd/eric_workspace/chap19/walker_ans.py
#!/usr/bin/env python3
import codecs
import html.entities
import re
import sys
from PyQt5.QtCore import (QMutex, QThread,pyqtSignal,Qt)
class Walker(QThread):
finished = pyqtSignal(bool,int)
indexed = pyqtSignal(str,int)
COMMON_WORDS_THRESHOLD = 250
MIN_WORD_LEN = 3
MAX_WORD_LEN = 25
INVALID_FIRST_OR_LAST = frozenset("0123456789_")
STRIPHTML_RE = re.compile(r"<[^>]*?>", re.IGNORECASE|re.MULTILINE)
ENTITY_RE = re.compile(r"&(\w+?);|&#(\d+?);")
SPLIT_RE = re.compile(r"\W+", re.IGNORECASE|re.MULTILINE)
def __init__(self, index, lock, files, filenamesForWords,
commonWords, parent=None):
super(Walker, self).__init__(parent)
self.index = index
self.lock = lock
self.files = files
self.filenamesForWords = filenamesForWords
self.commonWords = commonWords
self.stopped = False
self.mutex = QMutex()
self.completed = False
def stop(self):
try:
self.mutex.lock()
self.stopped = True
finally:
self.mutex.unlock()
def isStopped(self):
try:
self.mutex.lock()
return self.stopped
finally:
self.mutex.unlock()
def run(self):
self.processFiles()
self.stop()
self.finished.emit(self.completed,self.index)
def processFiles(self):
def unichrFromEntity(match):
text = match.group(match.lastindex)
if text.isdigit():
return chr(int(text))
u = html.entities.name2codepoint.get(text)
return chr(u) if u is not None else ""
for fname in self.files:
if self.isStopped():
return
words = set()
fh = None
try:
fh = codecs.open(fname, "r", "UTF8", "ignore")
text = fh.read()
except EnvironmentError as e:
sys.stderr.write("Error: {0}\n".format(e))
continue
finally:
if fh is not