浅谈Python基础篇

本文深入探讨Python中列表、字典、集合、元组的高级应用,包括数据解析、统计重复元素、排序及字典操作。同时,覆盖迭代器、生成器的使用,字符串处理技巧,以及文件读写、JSON、CSV、XML和Excel的操作方法。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

一,列表、字典、集合、元组的使用

from random import randint, sample

 

# 列表解析

data = [randint(-10, 10) for _ in xrange(10)]

 

filter(lambda x: x >= 0, data)

[x for x in data if x >= 0]      #最快速

 

# 字典解析

d = {x: randint(60, 100) for x in xrange(1,21)}

 

{k : v for k, v in d.iteritems() if v > 90}

 

# 集合解析

s = set(data)

{x for x in s if x % 3 ==0}

 

 

# 元组

student = ('Jim', 16, 'male', 'jim@qq.com')

# 1. enum

NAME, AGE, SEX, EMAIL = xrange(4)

print student[NAME]

# 2. 

from collections import namedtuple

Student = namedtuuple('Student', ['name', 'age', 'sex', 'email'])

s2 = Student('Tom', 16, 'mail', 'tom@qq.com')

print s2.name

 

 

# 统计列表的重复元素

li = [randint(0, 20) for _ in xrange(30)]

d = dict.fromkeys(li, 0)

# 1. 

for x in li: d[x] += 1

# 2. 

from collections import Counter

d2 = Counter(li)

d2.most_common(3) #重复数最高的三个元素

 

# 字典根据值value排序

sorted([3, 1, 5]) #排序列表

(97, 'a') > (88, 'b') # 元组的比较,每个元素从开始比较

 

d = {x: randint(60, 100) for x in 'abcde'}

# 1.

data = zip(d.itervalues(), d.iterkeys())

sorted(data)

# 2. 

sorted(d.items(), key=lambda x: x[1])

 

 

# 多个字典中的公共键

sample('abcdefg', 3)

sample('abcdefg', randint(3,6)) # 随机取出几个元素

 

s1 = {x: randint(1,4) for x in sample('abcdefg', randint(3.6))}

s2 = {x: randint(1,4) for x in sample('abcdefg', randint(3.6))}

s3 = {x: randint(1,4) for x in sample('abcdefg', randint(3.6))}

 

# 1. 

res = []

for k in s1: if k in s2 and k in s3: res.append(k) # res.pop(k)

 

# 2. 使用集合

res = s1.viewkeys() & s2.viewkeys() & s3.viewkeys()

 

# 3. 

m1 = map(dict.viewkeys, [s1, s2, s3])

res = reduce(lambda a, b: a & b, m1)

 

 

# 保持字典有序

d =  {'Jim':(1, 35), 'Leo':(2, 38), 'Tom':(3, 44)}

 

from collections import OrderedDict

d = OrderedDict() #按进入字典的顺序打印

d['Jim'] = (1,35)

d['Leo'] = (2,38)

d['Tom'] = (3,44)

 

from time import time

start = time()

raw_input() #等待输入

# ...

timesecond = time() - start

 

 

# 历史记录

# 1. 用队列存储

from collections import deque

q = deque([], 5)

q.append(1) # 达到长度后,先进先出

 

li = list(q) # 转换成列表类型

 

# 2. 将q存到文件中

import pickle

pickle.dump(q, open('test','w'))

 

q2 = pickle.load(open('test','r'))

 
二、迭代器、生成器


# 实现可迭代对象、迭代器对象

# 用时访问 并封装到一个对象中

 

# 可迭代对象

li = [1,2,3,4]

str= 'abcde'

 

# 迭代器

iterl = iter(li)    # li.__iter__()

iters = iter(str)   # str.__getitem__()

 

iterl.next()

 

# 1. 城市天气的迭代器和可迭代对象

from collections import Iterable, Iterator

class WIterator(Iterator):

    def __init__(self, cities):

        self.cities = cities

        self.index  = 0

 

    def getWeather(city):

        import requests

        r = requests.get(u'http://wthrcdn.etouch.cn/weather_mini?city=' + city)

        data = r.json()['data']['forecast'][0]

        return '%s: %s, %s' % (city, data['low'], data['high'])

 

    def next(self):

        if self.index == len(self.cities)

            raise StopIteration

        city = self.cities[self.index]

        self.index += 1

        return self.getWeather(city)

 

class WIterable(Iterable):

    def __init__(self, cities):

        self.cities = cities

 

    def __iter__(self):

        return WIterator(self.cities)

 

for x in WIterable([u'北京', u'长沙', u'广州']):

    print x

 

 

# 2. 使用生成器函数实现可迭代对象

def f():

    print 'in f(), 1'

    yield 1

    print 'in f(), 2'

    yield 2

g = f()   # g.__iter__()

 

for i in g: print i

 

class PrintNumbers:

    def __init__(self, start, end):

        self.start = start

        self.end   = end

 

    def isPrimeNum(self, k):

        if k % 2 == 0:

            return True

        else:

            return False

 

    def __iter__(self):

        for k in xrange(self.start, self.end+1):

            if self.isPrimeNum(k):

                yield k

 

for x in PrintNumbers(1, 100): print x

 

 

# 进行反向迭代

li = [1,2,3,4,5]

li.reverse()     # 改变原来列表

li[::-1]         # 切片,和原来列表等大的新列表

 

ll = li.reversed(li)  # 列表反向迭代 

for i in ll: print i

 

 

class FloatRange:

    def __init__(self, start, end, step=0.1)

        self.start = start

        self.end   = end

        self.step  = step

    def __iter__(self):

        t = self.start

        while t <= self.end:

            yield t

            t += self.step

    def __reversed__(self):

        t = self.enf

        while t >= self.start:

            yield t

            t -= self.step

# 正向迭代

for i in FloatRange(1.0, 4.0, 0.5): print x

# 反向迭代

for i in reversed(FloatRange(1.0, 4.0, 0.5)): print x

 

 

 

# 对迭代器做切片操作

from itertools import islice

# islice()

 

li = range(20)

t  = iter(li)

for x in islice(t, 5, 10): print x # 会消耗原来的迭代对象

 

 

 

 

 

# 在一个for中迭代多个可迭代对象

 

chinese = [randint(60,100) for _ in xrange(40)]

math    = [randint(60,100) for _ in xrange(40)]

english = [randint(60,100) for _ in xrange(40)]

 

for in in xrange(len(math)):

    print chinese[i] + math[i] + english[i]

 

total = []

# 并行多个可迭代对象

for c, m, e in zip(chiness, math, english)

    print c+m+e

 

 

# 1. 串连多个迭代对象

from itertools import chain

c1 = [randint(60,100) for _ in xrange(40)]

c2 = [randint(60,100) for _ in xrange(42)]

c3 = [randint(60,100) for _ in xrange(45)]

 

for s in chain(c1, c2, c3):

    if s > 90: print s

三、字符串


# 拆分含多种分隔符的字符串

 

s = "fwerf sd123 ^sdf dfdsf*d dsf 123"

s.split(“xy”)         #默认以空格分割,或以参数分割

 

res = s.split(";")

map(lambda x: x.split("|"), res) # 以";"和"|"分割的二维数组

t = []

map(lambda x: t.extend(x.split("|")), res) # 二维元素放到t中 

 

# 1. 

def aSplit(s, ds):

    res = [s]

 

    for d in ds:

        t = []

        map(lambda x: t.extend(x.split(d)), res)

        res = t

    return res

print aSplit(s, " ^*")       # 会存在空的元素

 

# 2. 正则表达式

import re

re.split(r'[,;|]+', s)

 

 

 

# 判断字符串a是否以b开头或结尾

#s.startswith() s.endswith() 接收单个字符串或字符串元组

import os, stat

files = [name for name in os.listdir(".") if name.endswith(('.sh', '.py'))]

 

 

 

# 调整字符串中文本的格式

#日志中'yyyy-mm-dd' 改为 'mm/dd/yyyy'

import re

log = open("/var/log/dpkg.log").read()

re.sub('(\d{4})-(\d{2})-(\d{2})', r'\2/\3/\1', log)

re.sub('(?P<year>\d{4})-(?P<mon>\d{2})-(?P<day>\d{2})', r'\g<mon>/\g<day>/\g<year>', log)

 

 

 

 

# 多个小字符串拼接成大字符串

s1 = "abcde"

s2 = "12345"

s1 + s2                 # str.__add__(s1, s2) str.__gt__(s1, s2) 运算符重载

 

s = ""

for p in pl: s += p     # 变量多时,临时变量开销大,资源浪费

 

 

s.join(s1)              # 参数可为字符串,可为列表

 

li = ['avc', 123, 'xyz', 456]

''.join([str(x) for x in li]) #列表解析,会生成一个列表,开销大

''.join(str(x) for x in li)   #生成器, (str(x) for x in li) 作为参数是括号省略

 

 

 

# 字符串格式对齐

# str.ljust() str.rjust() str.center()

 

s = "abc"

s.ljust(10 ,'=')         # 左对齐,填充=

s.center(10)

 

format(s, '<20')         # 左对齐

format(s, '>20')         # 右对齐

format(s, '^20')         # 居中

 

 

 

 

# 去掉字符串中不需要的字符

s = ' -------sd   dfadf    2332   +++++++++'

s.strip(' -+')                 

s.lstrip()

s.rstirp()

 

# 删除固定位置的字符,拼接切片

s[:3]+ s[4:]

 

# 替换

s.replace('\t', '')

 

import re

re.sub('[\t\r]', '', s)

 

s = 'abc123e3rxyz'

#s.translate()

import string 

tr = string.maketrans('abcxyz', 'xyzabc')

s.translate(tr)

 

s = 'abc\rdfd\n234234\t'

s.translate(None, '\r\t\n')
四、文件读写


# python2 str   unicode

# python3 bytes str

 

# python2

s = u'你好'

s.encode('utf8')     #存储到文件中的格式

 

f = open('hello.txt', 'w')

f.write(s.encode('utf8'))

f.close()

 

f = open('hello.txt', 'r')

t = f.read().decode('utf8')    # 你好

f.close()

 

# python3  字符串就是unicode

strb = b'asdfasdfsdg'

s = '你好'

f = open('hello2.txt', 'wt', encoding='utf8') # 自动完成编解码

f.write(s)

f.close()

 

f = open('hello2.txt', 'rt', encoding='utf8')

s = f.read()

f.close()

 

 

 

# 处理二进制文件  处理音频文件,将音量调小保存

f = open('demo.wav', 'rb')

info = f.read(44)               #文件头

import struct

struct.unpack('h',info[22:24])  #处理文件头 数据运算

struct.unpack('i',infi[24:28])

 

f.seek(0,2)

f.tell()

n = (f.tell()-44) /2

 

import array

buf = array.array('h', (0 for _ in xrange(n)))

 

f.seek(44)

f.readinto(buf)

 

for i in xrange(n): buf[i] /= 8

 

f2 = open('demo2.wav', 'wb')

f2.write(info)

buf.tofile(f2)

f2.close()

 

 

 

# 使用临时文件

# 自动删除,不占内存

from tempfile import TemporaryFile, NamedTemporaryFile

f = TemporaryFile()                        # 系统文件系统找不到

f.write('abcddee'*100000)

f.seek(0)

f.read(100)

 

ntf = NamedTemporaryFile(delete=False)     # 能找到文件,默认关闭以后会删除文件

fname = nft.name

 

 

 

 

# 设置文件的缓冲

# I/O 操作以块为单位,如4096字节一个块

f = open('test.txt', 'w', buffering=2048)  # 全缓冲,要写满缓冲才会写到文件中

f = open('test.txt', 'w', buffering=1)     # 行缓冲,\n就会写文件

f = open('test.txt', 'w', buffering=1)     # 无缓冲,实时写

f.write('abc')

 

 

 

 

# 将文件映射到内存

import mmap

 

f = open('demo.bn','r+b')

f.fileno()

 

m = mmap.mmap(f.fileno(), 0, access=mmpa.ACCESS_WRITE, offset=mmap.PAGESIZE)

# 得到字节数组

m[4:8] = '\xff'*4              # 修改直接改变文件内容

 

 

 

 

# 读写csv数据

from urllib import urlretrieve

urlretrieve('http://table.finance.yahoo.com/table.csv?s=000001.sz', 'pingan.csv')

 

rf = open('pingan.csv', 'rb')

import csv

reader = csv.reader(rf)

header = reader.next()

 

wf = open('pingan_c.csv', 'wb')

writer = csv.writeer(wf)

writer.writerow(header)

rf.close()

wf.close()

 

 

 

 

 

# 读写json数据

import requests

import json

 

from record import Record

record = Record(channel=1)

audioData = record.record(2)

 

from secret import API_KEY, SECRET_KEY

authUrl = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=" + API_KEY + "&client_secret=" + SECRET_KEY

 

response = requests.get(authUrl)

res = json.loads(response.content)

token = res['access_token']

 

#百度语音识别

cuid = 'xxxxxxxxxxxxx'

srvUrl = 'http://vop.baidu.com/server_api?cuid=' + cuid + '&token=' + token

heepHeader = {'Content-Type': 'audio/wav; rate = 8000'}

response = requests.post(srvUrl, headers=httpHeader, data=audioData)

res = json.loads(response.content)

text = res['result'][0]

 

print text

 

 

# json.dumps()  python对象(列表、字典等)转换成json字符串

# json.dumps(data, sort_keys=True)

# json.loads()  json字符串转换成python对象

 

with open('demo.json', 'wb') as f:

    json.dump(l, f)  # 将l数据写到文件

 

 

 

 

 

 

# 构建xml文档

from xml.etree.ElementTree import parse

with open('demo.xml') with f:

    et = parse(f)

root = et.getroot()

root.tag

root.attrib

root.text

 

#root.getchildren()

for child in root:

    print child.get('name')

 

root.find('country')

root.findall('country')                  # 直接子元素

for e in root.iterfind('country'):

    print e.get('name')

 

 

from xml.etree.ElementTree import Element, ElementTree, tostring

e = Element('Data')

e.set('name', 'abc')

 

e2 = Element('Row')

e3 = Element('Open')

e3.text = '8.80'

e2.append(e3)

 

e.append(e2)

 

tostring(e)

 

et = ElementTree(e)

et.write('demo.xml')

 

 

 

 

 

# 读写excel文件

import xlrd, xlwt

 

book = xlrd.open_workbook('demo.xls')

book.sheets()

 

sheet = book.sheet_by_index(0)

rows = sheet.nrows

cols = sheet.ncols

 

cell = sheet.cell(0,0)               #(0,0)单元格

cell.ctype

cell.value

五. 派生内置不可变类型并修改其实例化行为


class IntTuple(tuple):

    def __new__(cls, iterable):             #先于__init__()调用

        g = (x for x in iterable if isinstance(x, int) and x > 0)

        super(IntTuple, cls).__new__(cls, g)

 

    def __init__(self, iterable):

        # 此时如果过滤iterable 无法过滤成功

        super(IntTuple, self).__init__(iterable)

 

t = IntTuple([1, -1, 'abc', 6, ['x', 'y'], 3])

print t
六. 使用描述符对实例属性做类型检查


# 描述符: 包含 __get__() __set__() __delete__() 函数的类

 

class Attr(object):

    def __init__(self, name, type_):

        self.name = name

        self.type_= type_

 

    def __get__(self, instance, cls):

        return instanse.__dict__[self.name]

 

    def __set__(self, instance, value):

        if not isinstance(value, self.type_):

            raise TypeError('expected %s' % self.type_)

        instance.__dict__[self.name] = value

 

    def __delete__(self, instance):

        del instance.__dict__[self.name]

 

 

class Person(object):

    name = Attr('name', str)

    age  = Attr('age', int)

    hgt  = Attr('height', float)

 

 

p = Person()

p.name = 'Bob'

print p.name

 

p.age = '17'      #会抛出异常
七.  在环状数据结构中管理内存


import sys

 

class A(object):

    def __del__(self):                  # 当引用次数变为0时,调用析构函数

        print 'in A.__del__'

 

a = A()

a2 = a

print sys.getrefcount(a) - 1            # 查看对象a的引用次数,参数名也引用了对象,要-1

del a 

del a2

 

 

# 循环引用

class Data(object):                     # Data类保存Node对象引用

    def __init__(self, value, owner):

        self.owner = owner

        self.value = value

 

    def __str__(self):

        return "%s's data, value is %s" % (self.owner, self.value)

 

 

    def __del__(self):

        print 'in Data.__del__'

 

class Node(object):                       # Node类调用Data对象

    def __init__(self, valu):

        self.data = Data(value, self)

 

    def __del__(self):

        print 'in Node.__del__'

 

node = Node(100)

del node                                  # 此时Data Node不会被回收掉

raw_input('wait...')

 

 

# 使用弱引用

import weakref

a_wref = weakref.ref(a)

a2 = a_wref()

 

class Data(object):                     # Data类保存Node对象引用

    def __init__(self, value, owner):

        self.owner = weakref.ref(owner) # 弱引用

        self.value = value

 

    def __str__(self):

        return "%s's data, value is %s" % (self.owner(), self.value)

 

 

    def __del__(self):

        print 'in Data.__del__'

 

node2 = node(100)

del node2                                  # 此时Data Node将被回收
 

 

row = sheet.row(1)                   #cell对象列表

data = sheet.row_values(1, 1)        #第1列跳过第一格的值列表

 

sheet.put_cell(0, cols, xlrd.XL_CELL_TEXT, u'Total', None)

 

wbook = xlwt.Workbook()

wsheet = wbook.add_sheet('sheet1')

style = xlwt.easyxf('align: vertical center, horizontal center')

wsheet.write(rows,cols, sheet.cell_value(rows,cols), style)

wsheet.save('output.xls')

八. 通过实例方法名的字符串调用方法


# Circle Triangle Trctangle 求面积的方法名都不同

# 通过传方法名来调用不同的方法

 

# 1. getattr 获取对象属性,方法名也是属性

 

from lib1 import Circle

from lib2 import Triangle

from lib3 import Tectangle

 

def getArea(shape):

    for name in ('area', 'getArea', get_area):

        f = getattr(shape, name , None)

        if f:

            return f()

 

shape1 = Circle(2)

shape2 = Tirangle(3,4,5)

shape3 = Rectangle(6,4)

 

shapes = [shape1, shape2, shape3]

print map(getArea, shapes)

 

 

# 2. 使用opreator标准库

 

from opreator import methodcaller

s = "abc123abc456"

s.find('abc', 4)

 

methodcaller('find', 'abc', 4)(s)
十. 让对象支持上下文管理


# 要使用上下文管理,类中要定义 __enter__ __exit__方法,分别在with开始和结束时调用

class test(object):

    ...

 

    def __enter__(self):

        pass

 

    def __exit__(self, exc_type, exc_val, exc_tb):

        pass

 

with test() as k:

    pass

 
十一. 类的比较操作


# 重定义运算符

# <    __lt__()

# >    __rt__()

# <=   __le__()

# >=   __re__()

 

# 添加装饰器, 只需重定义__eq__ 及以上任意一个

 

from functools import total_ordering

 

@total_ordering

class Rectangle(object):

    def __init__(self, w, h):

        self.w = w

        self.h = h

 

    def area(area):

        return self.w * self.h

 

    def __lt__(self, obj):

        print 'in __lt__'

        return self.area() < obj.area()

 

    def __eq__(self, obj):

        print 'in __eq__'

        return self.area() == obj.area()

 

class Circle(object):

    def __init__(self, r):

        self.r = r

 

    def area(area):

        return self.r ** 2 * 3.14

 

r1 = Retangle(5, 3)

r2 = Retangle(4, 4)

c1 = Circle(3)

 

print r1 >= r2

print r2 >= r3
 

 

 
 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值