cut:显示切割的行数据
f:选择显示的列
s:不显示没有分隔符的行
d:自定义分隔符
sort:排序文件的行
n:按数值排序
r:倒序
t:自定义分隔符
k:选择排序列
u:合并相同行
f:忽略大小写
wc - print newline, word, and byte counts for each file
-c, --bytes
print the byte counts
-m, --chars
print the character counts
-l, --lines
print the newline counts
cut
原文档
oxx12121212ooxx
ooxx 12121212
oox 12121212
1212 ooxx 1212
oo3xx
oo4xx
ooWxx
oomxx
$ooxx
oo1234xx
ooxyzxx
Demo1
筛选以空格分割的第1列
[root@linux1 tmp]# cut -d' ' -f1 grep.text
oxx12121212ooxx
ooxx
oox
1212
oo3xx
oo4xx
ooWxx
oomxx
$ooxx
oo1234xx
ooxyzxx
Demo2
筛选以空格分割的第1-3列
[root@linux1 tmp]# cut -d' ' -f1-3 grep.text
oxx12121212ooxx
ooxx 12121212
oox 12121212
1212 ooxx 1212
oo3xx
oo4xx
ooWxx
oomxx
$ooxx
oo1234xx
ooxyzxx
Demo3
不显示没有分割符的行
[root@linux1 tmp]# cut -d' ' -f1-3 -s grep.text
ooxx 12121212
oox 12121212
1212 ooxx 1212
sort 文本排序
sort - sort lines of text files
-n, --numeric-sort
compare according to string numerical value
-r, --reverse
reverse the result of comparisons
-k, --key=POS1[,POS2]
start a key at POS1 (origin 1), end it at POS2 (default end of line)
-t, --field-separator=SEP
use SEP instead of non-blank to blank transition
-u, --unique
with -c, check for strict ordering; without -c, output only the first of an equal run
-f, --ignore-case
fold lower case to upper case characters
源文件
sort.txt
nana 12
apple 1
orange 8
demo1 默认字典排序
[root@linux1 tmp]# sort sort.txt
apple 1
nana 12
orange 8
demo2 数字排序
[root@linux1 tmp]# sort -t' ' -k2 -n sort.txt
apple 1
orange 8
nana 12
demo3 数字倒序排序
[root@linux1 tmp]# sort -t' ' -k2 -n -r sort.txt
nana 12
orange 8
apple 1
wc - - word-count
[root@linux1 tmp]# wc sort.txt
3 6 25 sort.txt
[root@linux1 tmp]# cat -A sort.txt
nana 12$
apple 1$
orange 8$
[root@linux1 tmp]# man cat
[9]+ Stopped man cat
[root@linux1 tmp]# cat -b sort.txt
1 nana 12
2 apple 1
3 orange 8
[root@linux1 tmp]# wc -l sort.txt
3 sort.txt
[root@linux1 tmp]# cat sort.txt | wc -l
3
[root@linux1 tmp]# cat sort.txt | wc
3 6 25
sed
sed - stream editor for filtering and transforming text
-i[SUFFIX], --in-place[=SUFFIX]
edit files in place (makes backup if extension supplied). The default operation mode is to break symbolic and
hard links. This can be changed with --follow-symlinks and --copy.
直接修改
a \
text Append text, which has each embedded newline preceded by a backslash.
在指定行后添加
i \
text Insert text, which has each embedded newline preceded by a backslash.
在指定行前添加
d
Delete pattern space. Start next cycle.
删除指定行
s/regexp/replacement/
查找并替换
Attempt to match regexp against the pattern space. If successful, replace that portion matched with replacement.
The replacement may contain the special character & to refer to that portion of the pattern space which matched,
and the special escapes \1 through \9 to refer to the corresponding matching sub-expressions in the regexp.
demo1 复制第二行
注:加参数 -i 直接修改
[root@linux1 tmp]# sed '2p' sort.txt
nana 12
apple 1
apple 1
orange 8
demo2 删除第二行
[root@linux1 tmp]# sed '2d' sort.txt
nana 12
orange 8
demo3 第二行前插入niu
[root@linux1 tmp]# sed -i '2iniu' sort.txt
nana 12
niu
apple 1
orange 8
demo3 第二行后插入niu
[root@linux1 tmp]# sed -i '2aniu' sort.txt
nana 12
apple 1
niu
orange 8
demo4 第二行后插入niu
[root@linux1 tmp]# sed -i '2aniu' sort.txt
nana 12
apple 1
niu
orange 8
demo5 替换文件中的niu为bin
查看源文件
[root@linux1 tmp]# cat sort.txt
nana 12
niu
niu
apple 1
orange 8
尝试修改
[root@linux1 tmp]# sed 's/niu/bin/' sort.txt
nana 12
bin
bin
apple 1
orange 8
加入参数 -i 直接修改
[root@linux1 tmp]# sed -i 's/niu/bin/' sort.txt
查看修改后的文件
[root@linux1 tmp]# cat sort.txt
nana 12
bin
bin
apple 1
orange 8
demo6 sed的综合练习
通过复制获得源文件
[root@linux1 tmp]# cp /etc/inittab ./
查看原文件
[root@linux1 tmp]# cat inittab
# inittab is only used by upstart for the default runlevel.
#
# ADDING OTHER CONFIGURATION HERE WILL HAVE NO EFFECT ON YOUR SYSTEM.
#
# System initialization is started by /etc/init/rcS.conf
#
# Individual runlevels are started by /etc/init/rc.conf
#
# Ctrl-Alt-Delete is handled by /etc/init/control-alt-delete.conf
#
# Terminal gettys are handled by /etc/init/tty.conf and /etc/init/serial.conf,
# with configuration in /etc/sysconfig/init.
#
# For information on how to write upstart event handlers, or how
# upstart works, see init(5), init(8), and initctl(8).
#
# Default runlevel. The runlevels used are:
# 0 - halt (Do NOT set initdefault to this)
# 1 - Single user mode
# 2 - Multiuser, without NFS (The same as 3, if you do not have networking)
# 3 - Full multiuser mode
# 4 - unused
# 5 - X11
# 6 - reboot (Do NOT set initdefault to this)
#
id:3:initdefault:
修改文件最后一行的id:3:initdefault: 数字3为8
[root@linux1 tmp]# sed 's/\(id:\)[0-9]\(:initdefault:\)/\18\2/' inittab
# inittab is only used by upstart for the default runlevel.
#
# ADDING OTHER CONFIGURATION HERE WILL HAVE NO EFFECT ON YOUR SYSTEM.
#
# System initialization is started by /etc/init/rcS.conf
#
# Individual runlevels are started by /etc/init/rc.conf
#
# Ctrl-Alt-Delete is handled by /etc/init/control-alt-delete.conf
#
# Terminal gettys are handled by /etc/init/tty.conf and /etc/init/serial.conf,
# with configuration in /etc/sysconfig/init.
#
# For information on how to write upstart event handlers, or how
# upstart works, see init(5), init(8), and initctl(8).
#
# Default runlevel. The runlevels used are:
# 0 - halt (Do NOT set initdefault to this)
# 1 - Single user mode
# 2 - Multiuser, without NFS (The same as 3, if you do not have networking)
# 3 - Full multiuser mode
# 4 - unused
# 5 - X11
# 6 - reboot (Do NOT set initdefault to this)
#
id:8:initdefault:
demo7 sed 修改ip地址
查看源文件
[root@linux1 tmp]# cat ifcfg-eth0
DEVICE=eth0
TYPE=Ethernet
ONBOOT=yes
NM_CONTROLLED=yes
BOOTPROTO=static
IPADDR=192.168.180.101
NETMASK=255.255.255.0
GATEWAY=192.168.180.2
DNS1=114.114.114.114
DNS2=8.8.8.8
不严格的修改方式
[root@linux1 tmp]# sed 's/\(IPADDR=\([0-9]\?[0-9][0-9]\?.\)\{3\}\).*/\188/' ifcfg-eth0
DEVICE=eth0
TYPE=Ethernet
ONBOOT=yes
NM_CONTROLLED=yes
BOOTPROTO=static
IPADDR=192.168.180.88
NETMASK=255.255.255.0
GATEWAY=192.168.180.2
DNS1=114.114.114.114
DNS2=8.8.8.8
严格修改方式
ip地址的分类情况
0-9
10-99
100-199
200-249
250-255
[root@linux1 tmp]# sed 's/\(IPADDR=\(\([0-9]\|[1-9][0-9]\|1[0-9][0-9]\|2[0-4][0-9]\|25[0-9]\)\.\)\{3\}\).*/\199/' ifcfg-eth0
DEVICE=eth0
TYPE=Ethernet
ONBOOT=yes
NM_CONTROLLED=yes
BOOTPROTO=static
IPADDR=192.168.180.99
NETMASK=255.255.255.0
GATEWAY=192.168.180.2
DNS1=114.114.114.114
DNS2=8.8.8.8
awk
gawk - pattern scanning and processing language
-F fs
--field-separator fs
Use fs for the input field separator (the value of the FS predefined variable).
NF The number of fields in the current input record.
NR The total number of input records seen so far.
只是显示/etc/passwd的账户:CUT
passwd 在/etc/passwd/
[root@linux1 tmp]# awk -F':' '{print $1}' passwd
root
bin
daemon
adm
lp
sync
shutdown
halt
mail
uucp
operator
games
gopher
ftp
nobody
vcsa
saslauth
postfix
sshd
只是显示/etc/passwd的账户和账户对应的shell,而账户与shell之间以逗号分割,而且在所有行开始前添加列名name,shell,在最后一行添加"blue,/bin/nosh"(cut,sed)
[root@linux1 tmp]# awk -F':' ' BEGIN{print "name,shell"}{ print $1"\t" $7"\t"}END{print "blue"}' passwd
name,shell
root /bin/bash
bin /sbin/nologin
daemon /sbin/nologin
adm /sbin/nologin
lp /sbin/nologin
sync /bin/sync
shutdown /sbin/shutdown
halt /sbin/halt
mail /sbin/nologin
uucp /sbin/nologin
operator /sbin/nologin
games /sbin/nologin
gopher /sbin/nologin
ftp /sbin/nologin
nobody /sbin/nologin
vcsa /sbin/nologin
saslauth /sbin/nologin
postfix /sbin/nologin
sshd /sbin/nologin
blue
搜索/etc/passwd有root关键字的所有行
[root@linux1 tmp]# awk '/root/ {print $0}' passwd
root:x:0:0:root:/root:/bin/bash
operator:x:11:0:operator:/root:/sbin/nologin
统计/etc/passwd文件中,每行的行号,每行的列数,对应的完整行内容
[root@linux1 tmp]# awk -F':' '{print NR"\t" NF"\t" $0}' passwd
1 7 root:x:0:0:root:/root:/bin/bash
2 7 bin:x:1:1:bin:/bin:/sbin/nologin
3 7 daemon:x:2:2:daemon:/sbin:/sbin/nologin
4 7 adm:x:3:4:adm:/var/adm:/sbin/nologin
5 7 lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin
6 7 sync:x:5:0:sync:/sbin:/bin/sync
7 7 shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown
8 7 halt:x:7:0:halt:/sbin:/sbin/halt
9 7 mail:x:8:12:mail:/var/spool/mail:/sbin/nologin
10 7 uucp:x:10:14:uucp:/var/spool/uucp:/sbin/nologin
11 7 operator:x:11:0:operator:/root:/sbin/nologin
12 7 games:x:12:100:games:/usr/games:/sbin/nologin
13 7 gopher:x:13:30:gopher:/var/gopher:/sbin/nologin
14 7 ftp:x:14:50:FTP User:/var/ftp:/sbin/nologin
15 7 nobody:x:99:99:Nobody:/:/sbin/nologin
16 7 vcsa:x:69:69:virtual console memory owner:/dev:/sbin/nologin
17 7 saslauth:x:499:76:"Saslauthd user":/var/empty/saslauth:/sbin/nologin
18 7 postfix:x:89:89::/var/spool/postfix:/sbin/nologin
19 7 sshd:x:74:74:Privilege-separated SSH:/var/empty/sshd:/sbin/nologin
创建 awk.txt
Tom 0 2012-12-11 car 3000
John 1 2013-01-13 bike 1000
vivi 1 2013-01-18 car 2800
Tom 0 2013-01-20 car 2500
John 1 2013-01-28 bike 3500
统计
[root@linux1 tmp]# awk '{ split( $3 ,date,"-");if(date[2]=="01"){name[$1]+=$5;if($2=="0"){job[$1]="manger"}else{job[$1]="worker"}}} END{for( i in name){ print i "\t" job[i] "\t" name[i]}}' awk.txt
vivi worker 2800
Tom manger 2500
John worker 4500
本文详细介绍Linux下cut、sort、wc、sed及awk等文本处理工具的使用方法与技巧,包括行数据切割、文本排序、字符计数、文本过滤与转换等功能,通过实际案例展示如何高效操作文本文件。
16

被折叠的 条评论
为什么被折叠?



