awk命令扩展使用操作

最新推荐文章于 2024-08-05 22:01:46 发布

lyuharvey

最新推荐文章于 2024-08-05 22:01:46 发布

阅读量158

点赞数

CC 4.0 BY-SA版权

文章标签： awk 操作系统 python

原文链接：https://my.oschina.net/u/996931/blog/2250974

2019独角兽企业重金招聘Python工程师标准>>>

awk 中使用外部shell变量

示例1

[root@centos01 t1022]# A=888
[root@centos01 t1022]# echo "" | awk -v GET_A=$A '{print GET_A}'
888
[root@centos01 t1022]# echo "aaaaaaaaaaaaa" |  awk -v GET_A=$A '{print GET_A}'
888

说明：-v选项用于定义参数，这里表示将变量A的值赋予GET_A。有多少个变量需要赋值，就需要多少个-v选项。前面的echo "string"是awk运行需要的参数

示例2

[root@centos01 t1022]# cat test.txt
1111111:13443253456
2222222:13211222122
1111111:13643543544
3333333:12341243123
2222222:12123123123
[root@centos01 t1022]# cat 1.sh
#!/bin/bash
sort -n test.txt | awk -F ':' '{print $1}' | uniq > t.txt
for id in `cat t.txt`;do
    echo "[$id]"
    awk -v get_id=$id -F ':' '$1==get_id {print $2}' test.txt
    # 或者 awk -F ':' '$1=="'$id'" {print $2}' test.txt
done

[root@centos01 t1022]# bash 1.sh
[1111111]
13443253456
13643543544
[2222222]
13211222122
12123123123
[3333333]
12341243123

awk 合并文件

[root@centos01 t1022]# cat p1.txt
1 aa
2 bb
3 ee
4 ss
[root@centos01 t1022]# cat p2.txt
1 ab
2 cd
3 ad
4 bd
5 de
[root@centos01 t1022]# awk 'NR==FNR{a[$1]=$2}NR>FNR{print $0,a[$1]}' p1.txt p2.txt
1 ab aa
2 cd bb
3 ad ee
4 bd ss
5 de

说明: NR表示读取的行数， FNR表示读取的当前行数。所以NR==FNR 就表示读取p1.txt的时候。同理NR>FNR表示读取p2.txt的时候

把一个文件多行连接成一行

[root@centos01 t1022]# cat p1.txt
1
2
3
4
[root@centos01 t1022]# f=`cat p1.txt`;echo $f
1 2 3 4
[root@centos01 t1022]# awk '{printf("%s",$0)}' p1.txt 
1 2 3 4  [root@centos01 t1022]#   # 打印后没有换行，交互不是特别好,加echo处理
[root@centos01 t1022]# awk '{printf("%s",$0)}' p1.txt;echo
1 2 3 4
[root@centos01 t1022]# paste -s -d '' p1.txt
1 2 3 4
[root@centos01 t1022]# cat p1.txt |xargs
1 2 3 4

[root@centos01 t1022]# cat p1.txt|xargs|sed 's/ /+/g'
1+2+3+4

扩展：

gdb安装 yum install -y gdb
gdb当计算器使用

[root@centos01 t1022]# gdb
GNU gdb (GDB) Red Hat Enterprise Linux 7.6.1-110.el7
Copyright (C) 2013 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-redhat-linux-gnu".
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>.
(gdb) p 3+2
$1 = 5
(gdb) p 128+12435
$2 = 12563

awk中gsub函数的使用

# 把test01.txt文件中的所有root替换为ABC打印出来
[root@centos01 t1022]# cat test01.txt
0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin
adm:x:3:4:adm:/var/adm:/sbin/nologin
lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin
sync:x:5:0:sync:/sbin:/bin/sync
shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown
halt:x:7:0:halt:/sbin:/sbin/halt
mail:x:8:12:mail:/var/spool/mail:/sbin/nologin
operator:x:11:0:operator:/root:/sbin/nologin
[root@centos01 t1022]#
[root@centos01 t1022]# awk 'gsub(/root/, "ABC")' test01.txt
0:0:ABC:/ABC:/bin/bash
operator:x:11:0:operator:/ABC:/sbin/nologin

# 替换每行第一次出现的root为ABC
[root@centos01 t1022]# awk 'sub(/root/, "ABC")' test01.txt
0:0:ABC:/root:/bin/bash
operator:x:11:0:operator:/ABC:/sbin/nologin

# 替换$3中的root为ABC打印出来
[root@centos01 t1022]# awk -F ':' 'gsub(/root/, "ABC", $3) {print $0}' test01.txt
0 0 ABC /root /bin/bash

awk 截取指定多个域为一行

[root@centos01 t1022]# cat test01.txt
0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin
adm:x:3:4:adm:/var/adm:/sbin/nologin
lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin
sync:x:5:0:sync:/sbin:/bin/sync
shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown
halt:x:7:0:halt:/sbin:/sbin/halt
mail:x:8:12:mail:/var/spool/mail:/sbin/nologin
operator:x:11:0:operator:/root:/sbin/nologin

[root@centos01 t1022]# cat 2.sh
for i in `seq 1 10`
do
    awk -F ':' -v a=$i '{$a;printf $a ""}' test01.txt
    echo
done

[root@centos01 t1022]#  bash 2.sh
0bindaemonadmlpsyncshutdownhaltmailoperator
0xxxxxxxxx
root1234567811
/root1247000120
/bin/bashbindaemonadmlpsyncshutdownhaltmailoperator
/bin/sbin/var/adm/var/spool/lpd/sbin/sbin/sbin/var/spool/mail/root
/sbin/nologin/sbin/nologin/sbin/nologin/sbin/nologin/bin/sync/sbin/shutdown/sbin/halt/sbin/nologin/sbin/nologin

grep 或 egrep 或awk 过滤两个或多个关键词

grep -E '123|abc' filename # 找出文件（filename）中包含123或者包含abc的行
egrep '123|abc' filename # 用egrep同样可以实现
awk '/123|abc/' filename # awk 的实现方式

[root@centos01 t1022]# cat test01.txt
0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin
adm:x:3:4:adm:/var/adm:/sbin/nologin
lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin
sync:x:5:0:sync:/sbin:/bin/sync
shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown
halt:x:7:0:halt:/sbin:/sbin/halt
mail:x:8:12:mail:/var/spool/mail:/sbin/nologin
operator:x:11:0:operator:/root:/sbin/nologin

[root@centos01 t1022]# grep -E 'x:|nologin' test01.txt
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin
adm:x:3:4:adm:/var/adm:/sbin/nologin
lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin
sync:x:5:0:sync:/sbin:/bin/sync
shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown
halt:x:7:0:halt:/sbin:/sbin/halt
mail:x:8:12:mail:/var/spool/mail:/sbin/nologin
operator:x:11:0:operator:/root:/sbin/nologin

[root@centos01 t1022]# egrep 'x:|nologin' test01.txt 
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin
adm:x:3:4:adm:/var/adm:/sbin/nologin
lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin
sync:x:5:0:sync:/sbin:/bin/sync
shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown
halt:x:7:0:halt:/sbin:/sbin/halt
mail:x:8:12:mail:/var/spool/mail:/sbin/nologin
operator:x:11:0:operator:/root:/sbin/nologin

[root@centos01 t1022]# awk '/x:|abc/' test01.txt
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin
adm:x:3:4:adm:/var/adm:/sbin/nologin
lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin
sync:x:5:0:sync:/sbin:/bin/sync
shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown
halt:x:7:0:halt:/sbin:/sbin/halt
mail:x:8:12:mail:/var/spool/mail:/sbin/nologin
operator:x:11:0:operator:/root:/sbin/nologin

awk编写生成以下结构文件

用awk编写生成以下结构文件的程序。( 最后列使用现在的时间，时间格式为YYYYMMDDHHMISS) 各列的值应如下所示，每增加一行便加1，共500万行。
1,1,0000000001,0000000001,0000000001,0000000001,0000000001,0000000001,2005100110101 2,2,0000000002,0000000002,0000000002,0000000002,0000000002,0000000002,2005100110101

[root@centos01 t1022]# awk 'BEGIN{for(i=1;i<=10;i++)printf("%d,%d,%010d,%010d,%010d,%010d,%010d,%010d,%d\n",i,i,i,i,i,i,i,i,strftime("%Y%m%d%H%M%S"))}'
1,1,0000000001,0000000001,0000000001,0000000001,0000000001,0000000001,20181023070344
2,2,0000000002,0000000002,0000000002,0000000002,0000000002,0000000002,20181023070344
3,3,0000000003,0000000003,0000000003,0000000003,0000000003,0000000003,20181023070344
4,4,0000000004,0000000004,0000000004,0000000004,0000000004,0000000004,20181023070344
5,5,0000000005,0000000005,0000000005,0000000005,0000000005,0000000005,20181023070344
6,6,0000000006,0000000006,0000000006,0000000006,0000000006,0000000006,20181023070344
7,7,0000000007,0000000007,0000000007,0000000007,0000000007,0000000007,20181023070344
8,8,0000000008,0000000008,0000000008,0000000008,0000000008,0000000008,20181023070344
9,9,0000000009,0000000009,0000000009,0000000009,0000000009,0000000009,20181023070344
10,10,0000000010,0000000010,0000000010,0000000010,0000000010,0000000010,20181023070344

awk 打印单引号

awk 'BEGIN{print "a'"'"'s"}' # 不用脱义，就多写几个单引号、双引号

awk 'BEGIN{print "a'''s"}' # 用脱义，脱义的是单引号

awk 'BEGIN{print "a"s"}' # 用脱义，脱义的是双引号

[root@centos01 t1022]# awk 'BEGIN{print "a'"'"'s"}'
a's

转载于:https://my.oschina.net/u/996931/blog/2250974