文件操作技巧-优快云博客


Example 6-1. Exploring common OS module data methods
In [1]: import os
In [2]: os.getcwd()
Out[2]: '/private/tmp'
In [3]: os.mkdir("/tmp/os_mod_explore")
In [4]: os.listdir("/tmp/os_mod_explore")
Out[4]: []
In [5]: os.mkdir("/tmp/os_mod_explore/test_dir1")
In [6]: os.listdir("/tmp/os_mod_explore")
Out[6]: ['test_dir1']
In [7]: os.stat("/tmp/os_mod_explore")
Out[7]: (16877, 6029306L, 234881026L, 3, 501, 0, 102L,
1207014425, 1207014398, 1207014398)
In [8]: os.rename("/tmp/os_mod_explore/test_dir1",
"/tmp/os_mod_explore/test_dir1_renamed")
In [9]: os.listdir("/tmp/os_mod_explore")
Out[9]: ['test_dir1_renamed']
In [10]: os.rmdir("/tmp/os_mod_explore/test_dir1_renamed")
In [11]: os.rmdir("/tmp/os_mod_explore/")


Example 6-2. Using the shutil module to copy a data tree
In [1]: import os
In [2]: os.chdir("/tmp")
In [3]: os.makedirs("test/test_subdir1/test_subdir2")
In [4]: ls -lR
total 0
drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test/
./test:
total 0
drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test_subdir1/
./test/test_subdir1:
total 0
drwxr-xr-x 2 ngift wheel 68 Mar 31 22:27 test_subdir2/
./test/test_subdir1/test_subdir2:
In [5]: import shutil
In [6]: shutil.copytree("test", "test-copy")
In [19]: ls -lR
total 0
drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test/
drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test-copy/
./test:
total 0
drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test_subdir1/
./test/test_subdir1:
total 0
drwxr-xr-x 2 ngift wheel 68 Mar 31 22:27 test_subdir2/
./test/test_subdir1/test_subdir2:
./test-copy:
total 0
drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test_subdir1/
./test-copy/test_subdir1:
total 0
drwxr-xr-x 2 ngift wheel 68 Mar 31 22:27 test_subdir2/
./test-copy/test_subdir1/test_subdir2:


Example 6-3. Moving a data tree with shutil
In [20]: shutil.move("test-copy", "test-copy-moved")
In [21]: ls -lR
total 0
drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test/
drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test-copy-moved/
./test:
total 0
drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test_subdir1/
./test/test_subdir1:
total 0
drwxr-xr-x 2 ngift wheel 68 Mar 31 22:27 test_subdir2/
./test/test_subdir1/test_subdir2:
./test-copy-moved:
total 0
drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test_subdir1/
./test-copy-moved/test_subdir1:
total 0
drwxr-xr-x 2 ngift wheel 68 Mar 31 22:27 test_subdir2/
./test-copy-moved/test_subdir1/test_subdir2:

Example 6-4. Deleting a data tree with shutil
In [22]: shutil.rmtree("test-copy-moved")
In [23]: shutil.rmtree("test-copy")
In [24]: ll

Example 6-5. Verbose directory walking script
import os
path = "/tmp"
def enumeratepaths(path=path):
        """Returns the path to all the files in a directory recursively"""
        path_collection = []
        for dirpath, dirnames, filenames in os.walk(path):
            for file in filenames:
                fullpath = os.path.join(dirpath, file)
            path_collection.append(fullpath)
        return path_collection
def enumeratefiles(path=path):
        """Returns all the files in a directory as a list"""
        file_collection = []
        for dirpath, dirnames, filenames in os.walk(path):
            for file in filenames:
                file_collection.append(file)
        return file_collection
def enumeratedir(path=path):
        """Returns all the directories in a directory as a list"""
        dir_collection = []
        for dirpath, dirnames, filenames in os.walk(path):
            for dir in dirnames:
                dir_collection.append(dir)
        return dir_collection
if __name__ == "__main__":
            print "\nRecursive listing of all paths in a dir:"
            for path in enumeratepaths():
                print path
                print "\nRecursive listing of all files in dir:"
            for file in enumeratefiles():
                print file
                print "\nRecursive listing of all dirs in dir:"
            for dir in enumeratedir():
                print dir


Example 6-6. Creating reusable directory walking module
import os
class diskwalk(object):
"""API for getting directory walking collections"""
def __init__(self, path):
    self.path = path
def enumeratePaths(self):
"""Returns the path to all the files in a directory as a list"""
    path_collection = []
    for dirpath, dirnames, filenames in os.walk(self.path):
        for file in filenames:
            fullpath = os.path.join(dirpath, file)
            path_collection.append(fullpath)
    return path_collection
def enumerateFiles(self):
"""Returns all the files in a directory as a list"""
    file_collection = []
    for dirpath, dirnames, filenames in os.walk(self.path):
        for file in filenames:
            file_collection.append(file)
    return file_collection
def enumerateDir(self):
"""Returns all the directories in a directory as a list"""
    dir_collection = []
    for dirpath, dirnames, filenames in os.walk(self.path):
        for dir in dirnames:
            dir_collection.append(dir)
    return dir_collection


Example 6-7. Performing an MD5 checksum on files
import hashlib
def create_checksum(path):
"""
Reads in file. Creates checksum of file line by line.
Returns complete checksum total for file.
"""
    fp = open(path)
    checksum = hashlib.md5()
    while True:
        buffer = fp.read(8192)
        if not buffer:break
        checksum.update(buffer)
    fp.close()
    checksum = checksum.digest()
    return checksum


Example 6-8. Performing an MD5 checksum on a directory tree to find duplicates
In [1]: from checksum import createChecksum
In [2]: from diskwalk_api import diskwalk
In [3]: d = diskwalk('/tmp/duplicates_directory')
In [4]: files = d.enumeratePaths()
In [5]: len(files)
Out[5]: 12
In [6]: dup = []
In [7]: record = {}
In [8]: for file in files:
compound_key = (getsize(file),create_checksum(file))
if compound_key in record:
dup.append(file)
else:
record[compound_key] = file
....:
....:
In [9]: print dup
['/tmp/duplicates_directory/image2']


Example 6-9. Finding duplicates
from checksum import create_checksum
from diskwalk_api import diskwalk
from os.path import getsize
def findDupes(path = '/tmp'):
    dup = []
    record = {}
    d = diskwalk(path)
    files = d.enumeratePaths()
    for file in files:
        compound_key = (getsize(file),create_checksum(file))
        if compound_key in record:
            dup.append(file)
        else:
            #print "Creating compound key record:", compound_key
            record[compound_key] = file
    return dup
if __name__ == "__main__":
    dupes = findDupes()
    for dup in dupes:
    print “Duplicate: %s” % dup


Example 6-10. Delete module
#!/usr/bin/env python
import os
class Delete(object):
    """Delete Methods For File Objects"""
    def __init__(self, file):
        self.file = file
    def interactive(self):
    """interactive deletion mode"""
    input = raw_input("Do you really want to delete %s [N]/Y" % self.file)
    if input.upper():
        print "DELETING: %s" % self.file
        status = os.remove(self.file)
    else:
        print "Skipping: %s" % self.file
    return
    def dryrun(self):
        """simulation mode for deletion"""
        print "Dry Run: %s [NOT DELETED]" % self.file
        return
    def delete(self):
        """Performs a delete on a file, with additional conditions
        """
        print "DELETING: %s" % self.file
        try:
            status = os.remove(self.file)
        except Exception, err:
            print err
            return status
if __name__ == "__main__":
    from find_dupes import findDupes
    dupes = findDupes('/tmp')
    for dupe in dupes:
        delete = Delete(dupe)

#delete.dryrun()
#delete.delete()
#delete.interactive()


Example 6-11. Interactively using fnmatch and glob to search for file matches
In [1]: from diskwalk_api import diskwalk
In [2]: files = diskwalk("/tmp")
In [3]: from fnmatch import fnmatch
In [4]: for file in files:
...: if fnmatch(file,"*.txt"):
...: print file
...:
...:
/tmp/file.txt
In [5]: from glob import glob
In [6]: import os
In [7]: os.chdir("/tmp")
In [8]: glob("*")
Out[8]: ['file.txt', 'image.iso', 'music.mp3']


Example 6-12. Renaming a tree full of MP3 files to text files
In [1]: from diskwalk_api import diskwalk
In [2]: from shutil import move
In [3]: from fnmatch import fnmatch
In [4]: files = diskwalk("/tmp")
In [5]: for file in files:
if fnmatch(file, "*.mp3"):
#here we can do anything we want, delete, move, rename...hmmm rename
move(file, "%s.txt" % file)
In [6]: ls -l /tmp/
total 0
-rw-r--r-- 1 ngift wheel 0 Apr 1 21:50 file.txt
-rw-r--r-- 1 ngift wheel 0 Apr 1 21:50 image.iso
-rw-r--r-- 1 ngift wheel 0 Apr 1 21:50 music.mp3.txt
-rw-r--r-- 1 ngift wheel 0 Apr 1 22:45 music1.mp3.txt
-rw-r--r-- 1 ngift wheel 0 Apr 1 22:45 music2.mp3.txt
-rw-r--r-- 1 ngift wheel 0 Apr 1 22:45 music3.mp3.txt


Example 6-13. Simple wrap of rsync
#!/usr/bin/env python
#wraps up rsync to synchronize two directories
from subprocess import call
import sys
source = "/tmp/sync_dir_A/" #Note the trailing slash
target = "/tmp/sync_dir_B"
rsync = "rsync"
arguments = "-a"
cmd = "%s %s %s %s" % (rsync, arguments, source, target)
def sync():
    ret = call(cmd, shell=True)
    if ret !=0:
    print "rsync failed"
    sys.exit(1)
sync()


Example 6-14. An rsync command that doesn’t quit until the job is finished
#!/usr/bin/env python
#wraps up rsync to synchronize two directories
from subprocess import call
import sys
import time
"""this motivated rsync tries to synchronize forever"""
source = "/tmp/sync_dir_A/" #Note the trailing slash
target = "/tmp/sync_dir_B"
rsync = "rsync"
arguments = "-av"
cmd = "%s %s %s %s" % (rsync, arguments, source, target)
def sync():
while True:
    ret = call(cmd, shell=True)
    if ret !=0:
        print "resubmitting rsync"
        time.sleep(30)
    else:
        print "rsync was succesful"
        subprocess.call("mail -s 'jobs done' bofh@example.com", shell=True)
        sys.exit(0)
sync()


Example 6-15. Creating metadata about a filesystem with SQLAlchemy
#!/usr/bin/env python
from sqlalchemy import create_engine
from sqlalchemy import Table, Column, Integer, String, MetaData, ForeignKey
from sqlalchemy.orm import mapper, sessionmaker
import os
#path
path = " /tmp"
#Part 1: create engine
engine = create_engine('sqlite:///:memory:', echo=False)
#Part 2: metadata
metadata = MetaData()
filesystem_table = Table('filesystem', metadata,
    Column('id', Integer, primary_key=True),
    Column('path', String(500)),
    Column('file', String(255)),
)
metadata.create_all(engine)
#Part 3: mapped class
class Filesystem(object):
    def __init__(self, path, file):
        self.path = path
        self.file = file
    def __repr__(self):
        return "[Filesystem('%s','%s')]" % (self.path, self.file)
    #Part 4: mapper function
mapper(Filesystem,filesystem_table)
#Part 5: create session
Session = sessionmaker(bind=engine, autoflush=True, transactional=True)
session = Session()
#Part 6: crawl file system and populate database with results
for dirpath, dirnames, filenames in os.walk(path):
    for file in filenames:
        fullpath = os.path.join(dirpath, file)
        record = Filesystem(fullpath, file)
        session.save(record)
#Part 7: commit to the database
session.commit()
#Part 8: query
for record in session.query(Filesystem):
    print "Database Record Number: %s, Path: %s , File: %s " \
        % (record.id,record.path, record.file)


Example 6-16. Create big text file
In [1]: f = open("largeFile.txt", "w")
In [2]: statement = "This is a big line that I intend to write over and over again."
ln [3]: x = 0
In [4]: for x in xrange(20000):
....: x += 1
....: f.write("%s\n" % statement)
....:
....:
In [4]: ls -l
-rw-r--r-- 1 root root 1236992 Oct 25 23:13 largeFile.txt


Example 6-17. TAR up contents of file
In [1]: import tarfile
In [2]: tar = tarfile.open("largefile.tar", "w")
In [3]: tar.add("largeFile.txt")
In [4]: tar.close()


Example 6-18. TAR up contents of a directory tree
In [27]: import tarfile
In [28]: tar = tarfile.open("temp.tar", "w")
In [29]: import os
In [30]: for root, dir, files in os.walk("/tmp"):
....: for file in filenames:
....:
KeyboardInterrupt
In [30]: for root, dir, files in os.walk("/tmp"):
for file in files:
....: fullpath = os.path.join(root,file)
....: tar.add(fullpath)
....:
....:
In [33]: tar.close()

Example 6-19. Creating bzip2 TAR archive
In [1: tar = tarfile.open("largefilecompressed.tar.bzip2", "w|bz2")
In [2]: tar.add("largeFile.txt")
In [3]: ls -h
foo1.txt fooDir1/ largeFile.txt largefilecompressed.tar.bzip2*
foo2.txt fooDir2/ largefile.tar
ln [4]: tar.close()
In [5]: ls -lh
-rw-r--r-- 1 root root 61M Oct 25 23:15 largeFile.txt
-rw-r--r-- 1 root root 61M Oct 26 00:39 largefile.tar
-rwxr-xr-x 1 root root 10K Oct 26 01:02 largefilecompressed.tar.bzip2*


Example 6-20. Creating a gzip TAR archive
In [10]: tar = tarfile.open("largefile.tar.gzip", "w|gz")
In [11]: tar.add("largeFile.txt")
ln [12]: tar.close()
In [13]: ls -lh
-rw-r--r-- 1 root root 61M Oct 26 01:20 largeFile.txt
-rw-r--r-- 1 root root 61M Oct 26 00:39 largefile.tar
-rwxr-xr-x 1 root root 160K Oct 26 01:24 largefile.tar.gzip*