从网页爬取数据:
[cictest@oc1152622168 claimreport]$ cat /home/cictest/claimreport/fetchdata.py
# encoding: utf8
import requests
import re
from http.cookiejar import CookieJar
import json
from bs4 import BeautifulSoup
import datetime,pytz
#from ea.passwd import decrypt
import urllib3
import time
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
s = requests.session()
s.cookies = CookieJar()
user="abc@cn.**.com"
passwd="******"
class web_login():
def getdata(self):
url1 = "http://stuffportal.******.com/"
html1 = s.get(url1,verify=False).text
csrfmtokenre = re.compile("name=\"csrfmiddlewaretoken\".*")
csrfmtoken = re.findall(csrfmtokenre, html1)[0].split("value=")[1].split("\"")[1]
url2 = "http://stuffportal.******.com/login/"
param = {}
param['csrfmiddlewaretoken'] = csrfmtoken
param['inputEmail'] = user
param['inputPassword'] = passwd
response = s.post(url2, data=param,verify=False)
teamchk = 'http://stuffportal.******.com/team_checker/'
html2 = s.get(teamchk,verify=False).text
enddatere = re.compile("endDate:.*")
enddate = re.findall(enddatere, html2)[0].split('\'')[1]
csrfmtokenre2 = re.compile("name=\"csrfmiddlewaretoken\".*")
csrfmtoken2 = re.findall(csrfmtokenre2, html2)[0].split("value=")[1].split("\"")[1]
data = {}
data['csrfmiddlewaretoken'] = csrfmtoken2
data['inputWeekDay'] = enddate
html3 = s.post(teamchk, data=data,verify=False).text
soup = BeautifulSoup(html3, "html.parser")
unclaimusr = soup.find_all("div", class_="col-md-12 p-0 mt-4")[0].find_all('p')
#claimedusr = soup.find_all("div", class_="col-md-12 p-0 mt-4")[1].find_all(['p','span'])
output = open("/home/cictest/claimreport/output.html", 'w')
output.write(html3)
output.close()
output2 = open("/home/cictest/claimreport/unclaimusr", 'w')
for usr in unclaimusr:
output2.write(str(usr)+"\n")
output2.close()
#return html3
'''
data = {}
data['csrfmiddlewaretoken'] =
html2 =
'''
web_login().getdata()
对生成的数据进行处理:
[cictest@oc1152622168 claimreport]$ cat /home/cictest/claimreport/ilc.sh
#!/bin/bash
#sudo cp -a /home/cictest/claimreport/output.html /home/cictest/ilc/output.html && sudo chown cictest:cictest /home/cictest/ilc/output.html
sed -i -e '/id=\"sidebar\"/,/role=\"main\"/d' -e '/h2\ class=/,/Subscribe/d' -e '/team_exclude/,+3d' /home/cictest/claimreport/output.html
scp /home/cictest/claimreport/output.html cictest@192.168.122.7:~/
ssh -tt -o StrictHostKeyChecking=no cictest@192.168.122.7 "sudo cp -a /home/cictest/output.html /var/www/vhosts/reporting.linux.au.ibm.com/ilc/index.html && sudo chown apache:apache /var/www/vhosts/reporting.linux.au.ibm.com/ilc/index.html && sudo chmod 755 /var/www/vhosts/reporting.linux.au.ibm.com/ilc/index.html"
pdf转成图片格式:
redhat linux: yum install wkhtmltopdf
Debian/Ubuntu: apt-get install wkhtmltopdf
法一:
[cictest@oc1152622168 claimreport]$ cat /home/cictest/claimreport/pdfimg.py
import pdfkit
from pdf2image import convert_from_path, convert_from_bytes
import tempfile
try:
pdfkit.from_url('https://9.119.72.67/ilc', '/home/cictest/claimreport/out.pdf')
except OSError as err:
pass with tempfile.TemporaryDirectory() as path:
images_from_path = convert_from_path('/home/cictest/claimreport/out.pdf', output_folder=path)
i = 1
for img in images_from_path:
img.save("/home/cictest/claimreport/imgs/ilcimg{0}".format(i), "JPEG")
i += 1
Using wkhtmltopdf without X server
Arjun Munji edited this page on Sep 7, 2018 · 11 revisions
wkhtmltopdf needs a X server. If you're running your application on a VPS, you probably don't have one installed. The solution is install a "virtual" X server.
Linux
Debian/Ubuntu
To install wkhtmltopdf
in a Debian/Ubuntu server, follow this steps(as root):
apt-get install wkhtmltopdf
apt-get install xvfb
printf '#!/bin/bash\nxvfb-run -a --server-args="-screen 0, 1024x768x24" /usr/bin/wkhtmltopdf -q $*' > /usr/bin/wkhtmltopdf.sh
chmod a+x /usr/bin/wkhtmltopdf.sh
ln -s /usr/bin/wkhtmltopdf.sh /usr/local/bin/wkhtmltopdf
wkhtmltopdf http://www.google.com output.pdf
How do I install and use Xvfb server on Red Hat Enterprise Linux?
yum install xorg-x11-server-Xvfb x11vnc
- Boot the Xvfb server system in runlevel 3, this is just to ensure that X being run is on Xvfb server and not on Xorg.
- SSH to the system in question as root and follow below steps
- Set appropriate
DISPLAY
environment variable
export DISPLAY=IP.of.local.system:1
echo $DISPLAY
- Start Xvfb server on display 1.0
Xvfb :1 -screen 0 1024x786x24 &;
ps aux | grep X
- Start vnc service on display 1.0
x11vnc -display :1 -bg -nopw -listen IP.of.local.system -xkb
- Start Gnome session
gnome-session &;
- Connect to this gnome-session running over
Xvfb
from remote system usingvncviewer
command
vncviewer IP.of.Xvfb.Server
https://www.linuxquestions.org/questions/linux-newbie-8/xvfb-873567/
法二:
from wand.image import Image
with Image(filename="test.jpg") as img:
img.resize(200, 150)
img.save(filename="t.jpg")
Error - AttributeError 'list' object has no attribute 'encode'
I see what's going on now. The problem is that SMTP.sendmail and
email.MIMEText need two different things.
email.MIMEText sets up the "To:" header for the body of the e-mail. It is
ONLY used for displaying a result to the human being at the other end, and
like all e-mail headers, must be a single string. (Note that it does not
actually have to have anything to do with the people who actually receive
the message.)
SMTP.sendmail, on the other hand, sets up the "envelope" of the message for
the SMTP protocol. It needs a Python list of strings, each of which has a
single address.
So, what you need to do is COMBINE the two replies you received. Set
msg['To'] to a single string, but pass the raw list to sendmail:
msg['To'] = ', '.join( emails )
....
s.sendmail( msg['From'], emails, msg.as_string() )
--
- Tim Roberts, ti**@probo.com
Providenza & Boekelheide, Inc.