解决方案:
线程间的事件通知,可以使用标准库中的Threading.Event,如果是两个线程一般会用到2组eVent来相互通知
1.等待事件一端调用wait,等待事件 event.wait(),如果要wait持续生效 得event.clear()
2.通知事件一端调用set,通知事件 event.set()
3.daemon线程是指所有的线程如果结束了,daemon线程也会结束
使用tarfile可以实现文件的压缩打包
from xml.etree.ElementTree import Element,ElementTree from xml.dom.minidom import parse from xml.dom import minidom from <span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/thread" title="View all posts in thread" target="_blank">thread</a></span>ing import Thread from <span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/thread" title="View all posts in thread" target="_blank">thread</a></span>ing import Event from collections import deque from Queue import Queue from StringIO import StringIO import requests import csv q2 =deque() def pretty(e,level=0): if len(e) >0: e.text='\n' + '\t'*(level+1) for child in e: pretty(child,level+1) child.tail=child.tail[:-1] e.tail='\n' + '\t'*level class DownloadThread(Thread): def __init__(self,sid,queue): Thread.__init__(self) self.sid = sid self.url = "http://table.finance.yahoo.com/table.csv?s=%s.sz" self.url %= str(sid).rjust(6,'0') self.queue = queue def download(self,url): proxies = {"http": "http://135.245.48.34:8000"} #response = requests.get(url,proxies=proxies) #response = requests.get(url) #if response.ok: # return StringIO(response.content) f = open("pingan.csv") #reader = csv.reader(f) #return StringIO(reader) return StringIO(f.read()) def run(self): print "download ",self.sid,self.url data = self.download(self.url) self.queue.put((self.sid,data)) class ConvertThread(Thread): def __init__(self,queue,cEvent,tEvent): Thread.__init__(self) self.queue = queue self.cEvent = cEvent self.tEvent = tEvent def createXmlFromCsv(self,scsv,fxml): root=minidom.Document() dataElement=root.createElement("Data") reader=csv.reader(scsv) headers=reader.next() headers = map(lambda h: h.replace(' ',''),headers) for row in reader: rowElement=root.createElement("Row") for tag,text in zip(headers,row): item=root.createElement(tag) item.appendChild(root.createTextNode(text)) rowElement.appendChild(item) dataElement.appendChild(rowElement) fxml.write(root.appendChild(dataElement).toprettyxml()) #return root.appendChild(dataElement) def csvToXml(self,scsv,fname): reader=csv.reader(scsv) headers=reader.next() root=Element('Data') for row in reader: eRow=Element('Row') root.append(eRow) for tag,text in zip(headers,row): e=Element(str(tag).replace(" ", "_")) e.text=text eRow.append(e) pretty(root) et = ElementTree(root) et.write(fname) def run(self): count = 0 while True: sid, data = self.queue.get() if sid == -1: print "-1 was called" self.cEvent.set() self.tEvent.wait() break if data: print 'Convert to XML ...(%d)' % sid fname = str(sid).rjust(6,'0') + '.xml' with open (fname,'wb') as wf: self.createXmlFromCsv(data,wf) #self.csvToXml(data,wf) count += 1 if count == 5: self.cEvent.set() self.tEvent.wait() self.tEvent.clear() count = 0 class TarThread(Thread): def __init__(self,cEvent,tEvent): Thread.__init__(self) self.count = 0 self.cEvent = cEvent self.tEvent = tEvent self.setDaemon(True) def tarXml(self): self.count += 1 tfname = '%d.tgz' %self.count tf = tarfile.open(tfname,"w:gz") for fname in os.listdir('.'): if fname.endswith('.xml'): tf.add(fname) os.remove(fname) tf.close() if not tf.members: os.remove(tfname) def run(self): while True: self.cEvent.wait() self.tarXml() self.cEvent.clear() self.tEvent.set() q=Queue() cEvent = Event() tEvent = Event() dThreads=[DownloadThread(i,q) for i in xrange(1,13)] cThreads=ConvertThread(q,cEvent,tEvent) tThread = TarThread(cEvent,tEvent) for t in dThreads: t.start() cThreads.start() tThread.start() for t in dThreads: t.join() q.put((-1,None))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
|
from
xml.etree.ElementTree
import
Element
,
ElementTree
from
xml.dom.minidom
import
parse
from
xml.dom
import
minidom
from
threading
import
Thread
from
threading
import
Event
from
collections
import
deque
from
Queue
import
Queue
from
StringIO
import
StringIO
import
requests
import
csv
q2
=
deque
(
)
def
pretty
(
e
,
level
=
0
)
:
if
len
(
e
)
>
0
:
e
.
text
=
'\n'
+
'\t'
*
(
level
+
1
)
for
child
in
e
:
pretty
(
child
,
level
+
1
)
child
.
tail
=
child
.
tail
[
:
-
1
]
e
.
tail
=
'\n'
+
'\t'
*
level
class
DownloadThread
(
Thread
)
:
def
__init__
(
self
,
sid
,
queue
)
:
Thread
.
__init__
(
self
)
self
.
sid
=
sid
self
.
url
=
"http://table.finance.yahoo.com/table.csv?s=%s.sz"
self
.
url
%=
str
(
sid
)
.
rjust
(
6
,
'0'
)
self
.
queue
=
queue
def
download
(
self
,
url
)
:
proxies
=
{
"http"
:
"http://135.245.48.34:8000"
}
#response = requests.get(url,proxies=proxies)
#response = requests.get(url)
#if response.ok:
# return StringIO(response.content)
f
=
open
(
"pingan.csv"
)
#reader = csv.reader(f)
#return StringIO(reader)
return
StringIO
(
f
.
read
(
)
)
def
run
(
self
)
:
print
"download "
,
self
.
sid
,
self
.
url
data
=
self
.
download
(
self
.
url
)
self
.
queue
.
put
(
(
self
.
sid
,
data
)
)
class
ConvertThread
(
Thread
)
:
def
__init__
(
self
,
queue
,
cEvent
,
tEvent
)
:
Thread
.
__init__
(
self
)
self
.
queue
=
queue
self
.
cEvent
=
cEvent
self
.
tEvent
=
tEvent
def
createXmlFromCsv
(
self
,
scsv
,
fxml
)
:
root
=
minidom
.
Document
(
)
dataElement
=
root
.
createElement
(
"Data"
)
reader
=
csv
.
reader
(
scsv
)
headers
=
reader
.
next
(
)
headers
=
map
(
lambda
h
:
h
.
replace
(
' '
,
''
)
,
headers
)
for
row
in
reader
:
rowElement
=
root
.
createElement
(
"Row"
)
for
tag
,
text
in
zip
(
headers
,
row
)
:
item
=
root
.
createElement
(
tag
)
item
.
appendChild
(
root
.
createTextNode
(
text
)
)
rowElement
.
appendChild
(
item
)
dataElement
.
appendChild
(
rowElement
)
fxml
.
write
(
root
.
appendChild
(
dataElement
)
.
toprettyxml
(
)
)
#return root.appendChild(dataElement)
def
csvToXml
(
self
,
scsv
,
fname
)
:
reader
=
csv
.
reader
(
scsv
)
headers
=
reader
.
next
(
)
root
=
Element
(
'Data'
)
for
row
in
reader
:
eRow
=
Element
(
'Row'
)
root
.
append
(
eRow
)
for
tag
,
text
in
zip
(
headers
,
row
)
:
e
=
Element
(
str
(
tag
)
.
replace
(
" "
,
"_"
)
)
e
.
text
=
text
eRow
.
append
(
e
)
pretty
(
root
)
et
=
ElementTree
(
root
)
et
.
write
(
fname
)
def
run
(
self
)
:
count
=
0
while
True
:
sid
,
data
=
self
.
queue
.
get
(
)
if
sid
==
-
1
:
print
"-1 was called"
self
.
cEvent
.
set
(
)
self
.
tEvent
.
wait
(
)
break
if
data
:
print
'Convert to XML ...(%d)'
%
sid
fname
=
str
(
sid
)
.
rjust
(
6
,
'0'
)
+
'.xml'
with
open
(
fname
,
'wb'
)
as
wf
:
self
.
createXmlFromCsv
(
data
,
wf
)
#self.csvToXml(data,wf)
count
+=
1
if
count
==
5
:
self
.
cEvent
.
set
(
)
self
.
tEvent
.
wait
(
)
self
.
tEvent
.
clear
(
)
count
=
0
class
TarThread
(
Thread
)
:
def
__init__
(
self
,
cEvent
,
tEvent
)
:
Thread
.
__init__
(
self
)
self
.
count
=
0
self
.
cEvent
=
cEvent
self
.
tEvent
=
tEvent
self
.
setDaemon
(
True
)
def
tarXml
(
self
)
:
self
.
count
+=
1
tfname
=
'%d.tgz'
%
self
.
count
tf
=
tarfile
.
open
(
tfname
,
"w:gz"
)
for
fname
in
os
.
listdir
(
'.'
)
:
if
fname
.
endswith
(
'.xml'
)
:
tf
.
add
(
fname
)
os
.
remove
(
fname
)
tf
.
close
(
)
if
not
tf
.
members
:
os
.
remove
(
tfname
)
def
run
(
self
)
:
while
True
:
self
.
cEvent
.
wait
(
)
self
.
tarXml
(
)
self
.
cEvent
.
clear
(
)
self
.
tEvent
.
set
(
)
q
=
Queue
(
)
cEvent
=
Event
(
)
tEvent
=
Event
(
)
dThreads
=
[
DownloadThread
(
i
,
q
)
for
i
in
xrange
(
1
,
13
)
]
cThreads
=
ConvertThread
(
q
,
cEvent
,
tEvent
)
tThread
=
TarThread
(
cEvent
,
tEvent
)
for
t
in
dThreads
:
t
.
start
(
)
cThreads
.
start
(
)
tThread
.
start
(
)
for
t
in
dThreads
:
t
.
join
(
)
q
.
put
(
(
-
1
,
None
)
)
|