Text


Example 3-1. Python single/double quote comparison
In [1]: s = "This is a string with 'quotes' in it"
In [2]: s
Out[2]: "This is a string with 'quotes' in it"
In [3]: s = 'This is a string with \'quotes\' in it'
In [4]: s
Out[4]: "This is a string with 'quotes' in it"
In [5]: s = 'This is a string with "quotes" in it'
In [6]: s
Out[6]: 'This is a string with "quotes" in it'
In [7]: s = "This is a string with \"quotes\" in it"
In [8]: s
Out[8]: 'This is a string with "quotes" in it'

Example 3-2. Triple quotes
In [6]: s = 'this is
------------------------------------------------------------
File "<ipython console>", line 1
s = 'this is
^
SyntaxError: EOL while scanning single-quoted string
In [7]: s = '''this is a
...: multiline string'''
In [8]: s
Out[8]: 'this is a\nmultiline string'


Example 3-3. Escape sequences and raw strings
In [1]: s = '\t'
In [2]: s
Out[2]: '\t'
In [3]: print s
In [4]: s = r'\t'
In [5]: s
Out[5]: '\\t'
In [6]: print s
\t
In [7]: s = '''\t'''
In [8]: s
Out[8]: '\t'
In [9]: print s
In [10]: s = r'''\t'''
In [11]: s
Out[11]: '\\t'
In [12]: print s
\t
In [13]: s = r'\''
In [14]: s
Out[14]: "\\'"
In [15]: print s
\'


Example 3-4. In and not in
In [1]: import subprocess
In [2]: res = subprocess.Popen(['uname', '-sv'], stdout=subprocess.PIPE)
In [3]: uname = res.stdout.read().strip()
In [4]: uname
Out[4]: 'Linux #1 SMP Tue Feb 12 02:46:46 UTC 2008'
In [5]: 'Linux' in uname
Out[5]: True
In [6]: 'Darwin' in uname
Out[6]: False
In [7]: 'Linux' not in uname
Out[7]: False
In [8]: 'Darwin' not in uname
Out[8]: True

Example 3-5. find( ) and index( )
In [9]: uname.index('Linux')
Out[9]: 0
In [10]: uname.find('Linux')
Out[10]: 0
In [11]: uname.index('Darwin')
---------------------------------------------------------------------------
<type 'exceptions.ValueError'> Traceback (most recent call last)
/home/jmjones/code/<ipython console> in <module>()
<type 'exceptions.ValueError'>: substring not found
In [12]: uname.find('Darwin')
Out[12]: -1

Example 3-6. String slice
In [13]: smp_index = uname.index('SMP')
In [14]: smp_index
Out[14]: 9
In [15]: uname[smp_index:]
Out[15]: 'SMP Tue Feb 12 02:46:46 UTC 2008'
In [16]: uname[:smp_index]
Out[16]: 'Linux #1 '
In [17]: uname
Out[17]: 'Linux #1 SMP Tue Feb 12 02:46:46 UTC 2008'


Example 3-7. startswith( ) and endswith( )
In [1]: some_string = "Raymond Luxury-Yacht"
In [2]: some_string.startswith("Raymond")
Out[2]: True
In [3]: some_string.startswith("Throatwarbler")
Out[3]: False
In [4]: some_string.endswith("Luxury-Yacht")
Out[4]: True
In [5]: some_string.endswith("Mangrove")
Out[5]: False


Example 3-8. Startswith( ) endswith( ) replacement hack
In [6]: some_string[:len("Raymond")] == "Raymond"
Out[6]: True
In [7]: some_string[:len("Throatwarbler")] == "Throatwarbler"
Out[7]: False
In [8]: some_string[-len("Luxury-Yacht"):] == "Luxury-Yacht"
Out[8]: True
In [9]: some_string[-len("Mangrove"):] == "Mangrove"
Out[9]: False


Example 3-9. lstrip( ), rstrip( ), and strip( )
In [1]: spacious_string = "\n\t Some Non-Spacious Text\n \t\r"
In [2]: spacious_string
Out[2]: '\n\t Some Non-Spacious Text\n \t\r'
In [3]: print spacious_string
Some Non-Spacious Text
In [4]: spacious_string.lstrip()
Out[4]: 'Some Non-Spacious Text\n \t\r'
In [5]: print spacious_string.lstrip()
Some Non-Spacious Text
In [6]: spacious_string.rstrip()
Out[6]: '\n\t Some Non-Spacious Text'
In [7]: print spacious_string.rstrip()
Some Non-Spacious Text
In [8]: spacious_string.strip()
Out[8]: 'Some Non-Spacious Text'
In [9]: print spacious_string.strip()
Some Non-Spacious Text


Example 3-10. upper( ) and lower( )
In [1]: mixed_case_string = "VOrpal BUnny"
In [2]: mixed_case_string == "vorpal bunny"
Out[2]: False
In [3]: mixed_case_string.lower() == "vorpal bunny"
Out[3]: True
In [4]: mixed_case_string == "VORPAL BUNNY"
Out[4]: False
In [5]: mixed_case_string.upper() == "VORPAL BUNNY"
Out[5]: True
In [6]: mixed_case_string.upper()
Out[6]: 'VORPAL BUNNY'
In [7]: mixed_case_string.lower()
Out[7]: 'vorpal bunny'


Example 3-11. split( )
In [1]: comma_delim_string = "pos1,pos2,pos3"
In [2]: pipe_delim_string = "pipepos1|pipepos2|pipepos3"
In [3]: comma_delim_string.split(',')
Out[3]: ['pos1', 'pos2', 'pos3']
In [4]: pipe_delim_string.split('|')
Out[4]: ['pipepos1', 'pipepos2', 'pipepos3']


Example 3-12. split( ) multiple delimiter example
In [1]: multi_delim_string = "pos1XXXpos2XXXpos3"
In [2]: multi_delim_string.split("XXX")
Out[2]: ['pos1', 'pos2', 'pos3']
In [3]: multi_delim_string.split("XX")
Out[3]: ['pos1', 'Xpos2', 'Xpos3']
In [4]: multi_delim_string.split("X")
Out[4]: ['pos1', '', '', 'pos2', '', '', 'pos3']


Example 3-13. Basic regular expression usage
In [1]: import re
In [2]: re_string = "{{(.*?)}}"
In [3]: some_string = "this is a string with {{words}} embedded in\
...: {{curly brackets}} to show an {{example}} of {{regular expressions}}"
In [4]: for match in re.findall(re_string, some_string):
...: print "MATCH->", match
...:
MATCH-> words
MATCH-> curly brackets
MATCH-> example
MATCH-> regular expressions


Example 3-14. Simple regular expression, compiled pattern
In [1]: import re
In [2]: re_obj = re.compile("{{(.*?)}}")
In [3]: some_string = "this is a string with {{words}} embedded in\
...: {{curly brackets}} to show an {{example}} of {{regular expressions}}"
In [4]: for match in re_obj.findall(some_string):
...: print "MATCH->", match
...:
MATCH-> words
MATCH-> curly brackets
MATCH-> example
MATCH-> regular expressions


Example 3-15. re no compile code performance test
#!/usr/bin/env python
import re
def run_re():
pattern = 'pDq'
infile = open('large_re_file.txt', 'r')
match_count = 0
lines = 0
for line in infile:
match = re.search(pattern, line)
if match:
match_count += 1
lines += 1
return (lines, match_count)
if __name__ == "__main__":
lines, match_count = run_re()
print 'LINES::', lines
print 'MATCHES::', match_count


Example 3-16. re compile code performance test
#!/usr/bin/env python
import re
def run_re():
pattern = 'pDq'
re_obj = re.compile(pattern)
infile = open('large_re_file.txt', 'r')
match_count = 0
lines = 0
for line in infile:
match = re_obj.search(line)
if match:
match_count += 1
lines += 1
return (lines, match_count)
if __name__ == "__main__":
lines, match_count = run_re()
print 'LINES::', lines
print 'MATCHES::', match_count


Example 3-17. Raw strings and regular expressions
In [1]: import re
In [2]: raw_pattern = r'\b[a-z]+\b'
In [3]: non_raw_pattern = '\b[a-z]+\b'
In [4]: some_string = 'a few little words'
In [5]: re.findall(raw_pattern, some_string)
Out[5]: ['a', 'few', 'little', 'words']
In [6]: re.findall(non_raw_pattern, some_string)
Out[6]: []


Example 3-18. Simple grouped group with findall( )
In [1]: import re
In [2]: re_obj = re.compile(r"""(A\W+\b(big|small)\b\W+\b
...: (brown|purple)\b\W+\b(cow|dog)\b\W+\b(ran|jumped)\b\W+\b
...: (to|down)\b\W+\b(the)\b\W+\b(street|moon).*?\.)""",
...: re.VERBOSE)
In [3]: re_obj.findall('A big brown dog ran down the street. \
...: A small purple cow jumped to the moon.')
Out[3]:
[('A big brown dog ran down the street.',
'big',
'brown',
'dog',
'ran',
'down',
'the',
'street'),
('A small purple cow jumped to the moon.',
'small',
'purple',
'cow',
'jumped',
'to',
'the',
'moon')]


Example 3-19. finditer( ) example
In [4]: re_iter = re_obj.finditer('A big brown dog ran down the street. \
...: A small purple cow jumped to the moon.')
In [5]: re_iter
Out[5]: <callable-iterator object at 0xa17ad0>
In [6]: for item in re_iter:
...: print item
...: print item.groups()
...:
<_sre.SRE_Match object at 0x9ff858>
('A big brown dog ran down the street.', 'big', 'brown', 'dog', 'ran',
'down', 'the', 'street')
<_sre.SRE_Match object at 0x9ff940>
('A small purple cow jumped to the moon.', 'small', 'purple', 'cow',
'jumped', 'to', 'the', 'moon')


Example 3-20. Comparison of match( ) and search( )
In [1]: import re
In [2]: re_obj = re.compile('FOO')
In [3]: search_string = ' FOO'
In [4]: re_obj.search(search_string)
Out[4]: <_sre.SRE_Match object at 0xa22f38>
In [5]: re_obj.match(search_string)
In [6]:


Example 3-21. Start and end parameters for search( ) and match( )
In [6]: re_obj.search(search_string, pos=1)
Out[6]: <_sre.SRE_Match object at 0xabe030>
In [7]: re_obj.match(search_string, pos=1)
Out[7]: <_sre.SRE_Match object at 0xabe098>
In [8]: re_obj.search(search_string, pos=1, endpos=3)
In [9]: re_obj.match(search_string, pos=1, endpos=3)
In [10]:


Apache Config File Hacking
Apache config file:
NameVirtualHost 127.0.0.1:80
<VirtualHost localhost:80>
DocumentRoot /var/www/
<Directory />
Options FollowSymLinks
AllowOverride None
</Directory>
ErrorLog /var/log/apache2/error.log
LogLevel warn
CustomLog /var/log/apache2/access.log combined
ServerSignature On
</VirtualHost>
<VirtualHost local2:80>
DocumentRoot /var/www2/
<Directory />
Options FollowSymLinks
AllowOverride None
</Directory>
ErrorLog /var/log/apache2/error2.log
LogLevel warn
CustomLog /var/log/apache2/access2.log combined
ServerSignature On
</VirtualHost>


#!/usr/bin/env python
from cStringIO import StringIO
import re
vhost_start = re.compile(r'<VirtualHost\s+(.*?)>')
vhost_end = re.compile(r'</VirtualHost')
docroot_re = re.compile(r'(DocumentRoot\s+)(\S+)')
def replace_docroot(conf_string, vhost, new_docroot):
'''yield new lines of an httpd.conf file where docroot lines matching
the specified vhost are replaced with the new_docroot
'''
conf_file = StringIO(conf_string)
in_vhost = False
curr_vhost = None
for line in conf_file:
vhost_start_match = vhost_start.search(line)
if vhost_start_match:
curr_vhost = vhost_start_match.groups()[0]
in_vhost = True
if in_vhost and (curr_vhost == vhost):
docroot_match = docroot_re.search(line)
if docroot_match:
sub_line = docroot_re.sub(r'\1%s' % new_docroot, line)
line = sub_line
vhost_end_match = vhost_end.search(line)
if vhost_end_match:
in_vhost = False
yield line

if __name__ == '__main__':
import sys
conf_file = sys.argv[1]
vhost = sys.argv[2]
docroot = sys.argv[3]
conf_string = open(conf_file).read()
for line in replace_docroot(conf_string, vhost, docroot):
print line,


Example 3-22. Enumerating sys.stdin.readline
#!/usr/bin/env python
import sys
counter = 1
while True:
line = sys.stdin.readline()
if not line:
break
print "%s: %s" % (counter, line)
counter += 1


Example 3-23. sys.stdin readline example
#!/usr/bin/env python
import sys
for i, line in enumerate(sys.stdin):
print "%s: %s" % (i, line)


Example 3-24. Apache logfile parser—split on whitespace
#!/usr/bin/env python
"""
USAGE:
apache_log_parser_split.py some_log_file
This script takes one command line argument: the name of a log file
to parse. It then parses the log file and generates a report which
associates remote hosts with number of bytes transferred to them.
"""
import sys
def dictify_logline(line):
'''return a dictionary of the pertinent pieces of an apache combined log file
Currently, the only fields we are interested in are remote host and bytes sent,
but we are putting status in there just for good measure.
'''
split_line = line.split()
return {'remote_host': split_line[0],
'status': split_line[8],
'bytes_sent': split_line[9],
}
def generate_log_report(logfile):
'''return a dictionary of format remote_host=>[list of bytes sent]
This function takes a file object, iterates through all the lines in the file,
and generates a report of the number of bytes transferred to each remote host
for each hit on the webserver.
'''
report_dict = {}
for line in logfile:
line_dict = dictify_logline(line)
print line_dict
try:
bytes_sent = int(line_dict['bytes_sent'])
except ValueError:
##totally disregard anything we don't understand
continue
report_dict.setdefault(line_dict['remote_host'], []).append(bytes_sent)
return report_dict
if __name__ == "__main__":
if not len(sys.argv) > 1:
print __doc__
sys.exit(1)
infile_name = sys.argv[1]
try:
infile = open(infile_name, 'r')
except IOError:
print "You must specify a valid file to parse"
print __doc__
sys.exit(1)
log_report = generate_log_report(infile)
print log_report
infile.close()


Example 3-25. Unit test for Apache logfile parser—split on whitespace
#!/usr/bin/env python
import unittest
import apache_log_parser_split
class TestApacheLogParser(unittest.TestCase):
def setUp(self):
pass
def testCombinedExample(self):
# test the combined example from apache.org
combined_log_entry = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] '\
'"GET /apache_pb.gif HTTP/1.0" 200 2326 "http://www.example.com/start.html" '\
'"Mozilla/4.08 [en] (Win98; I ;Nav)"'
self.assertEqual(apache_log_parser_split.dictify_logline(combined_log_entry), {'remote_host': '127.0.0.1', 'status': '200', 'bytes_sent': '2326'})
def testCommonExample(self):
# test the common example from apache.org
common_log_entry = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] '\
'"GET /apache_pb.gif HTTP/1.0" 200 2326'

self.assertEqual(apache_log_parser_split.dictify_logline(common_log_entry),
{'remote_host': '127.0.0.1', 'status': '200', 'bytes_sent': '2326'})
def testExtraWhitespace(self):
# test for extra whitespace between fields
common_log_entry = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] '\
'"GET /apache_pb.gif HTTP/1.0" 200 2326'
self.assertEqual(apache_log_parser_split.dictify_logline(common_log_entry),
{'remote_host': '127.0.0.1', 'status': '200', 'bytes_sent': '2326'})
def testMalformed(self):
# test for extra whitespace between fields
common_log_entry = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700][^\[\]+\] #time
\s+ #whitespace
"[^"]+" #first line of request
\s+ #whitespace
(?P<status>\d+)
\s+ #whitespace
(?P<bytes_sent>-|\d+)
\s* #whitespace
''', re.VERBOSE)
def dictify_logline(line):
'''return a dictionary of the pertinent pieces of an apache combined log file
Currently, the only fields we are interested in are remote host and bytes sent,
but we are putting status in there just for good measure.
'''
m = log_line_re.match(line)
if m:
groupdict = m.groupdict()
if groupdict['bytes_sent'] == '-':
groupdict['bytes_sent'] = '0'
return groupdict
else:
return {'remote_host': None,
'status': None,
'bytes_sent': "0",
}
def generate_log_report(logfile):
'''return a dictionary of format remote_host=>[list of bytes sent]
This function takes a file object, iterates through all the lines in the file,
and generates a report of the number of bytes transferred to each remote host
for each hit on the webserver.
'''
report_dict = {}
for line in logfile:
line_dict = dictify_logline(line)
print line_dict
try:
bytes_sent = int(line_dict['bytes_sent'])
except ValueError:
##totally disregard anything we don't understand
continue
report_dict.setdefault(line_dict['remote_host'], []).append(bytes_sent)
return report_dict
if __name__ == "__main__":
if not len(sys.argv) > 1:
print __doc__
sys.exit(1)
infile_name = sys.argv[1]
try:
infile = open(infile_name, 'r')
except IOError:
print "You must specify a valid file to parse"
print __doc__
sys.exit(1)
log_report = generate_log_report(infile)
print log_report
infile.close()

Example 3-27. ElementTree parse of Tomcat users file
#!/usr/bin/env python
from xml.etree import ElementTree as ET
if __name__ == '__main__':
infile = '/etc/tomcat5.5/tomcat-users.xml'
tomcat_users = ET.parse(infile)
for user in [e for e in tomcat_users.findall('/user') if
e.get('name') == 'tomcat']:
print user.attrib


Example 3-28. Mac OS X system_profiler output parser
#!/usr/bin/env python
import sys
from xml.etree import ElementTree as ET
e = ET.parse('system_profiler.xml')
if __name__ == '__main__':
for d in e.findall('/array/dict'):
if d.find('string').text == 'SPSoftwareDataType':
sp_data = d.find('array').find('dict')
break
else:
print "SPSoftwareDataType NOT FOUND"
sys.exit(1)
record = []
for child in sp_data.getchildren():
record.append(child.text)
if child.tag == 'string':
print "%-15s -> %s" % tuple(record)
record = []

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值