想看看awk复杂脚本好不好写,好不好维护,可以参考此例,刚好有task需要可以使用awk脚本来完成,尝试了下,真不好写。
个人认为超过300行的awk代码,不如用python来完成,python代码也比较容易维护。
需要处理的数据就不贴了(公司私密),task目标是将头文件中的register转换成一定格式,直接上代码吧:
目前还有一个问题是if(isarray(array)) delete array, 再作数组使用时会导致fatal: attempt to use scalar `arrany' as an array 不知道为什么,望大神给个提示,谢谢。
#!/usr/bin/awk -f
####################################################
#
# Script Name: r4r.awk
#
# Description:
# Generate register table for CARP FPGA
# used in Interstellar
#
# Usage: ./r4r.awk header.h
#
# Output: .c and .h files
#
####################################################
function usage()
{
printf("\n%s\n\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n",
" Usage : ./r4r.awk [-? | [-- --help | -h]] device=xx [debug=num] files...",
" -? : what you can see here",
" device : set device name, this is mandatory",
" debug : for debuggin, switch-on if set 1",
" files : expected header files, one or more",
"Caution: highly recommand put .tsv files under same directory,",
"their stum name are identical to header file name respectively.",
"Access permission need be extracted there")
}
# Input: register name
# Return:
# On success, return register access permission
# in one of RWACC_RW, RWACC_R, RWACC_W,
# based on register name which can be search in .tsv file
# On failure, return RWACC_R
function reg_get_acc(regname,
tsvfile, line, start_retry, retry, duo, unary, rc)
{
retry = 3
tsvfile = FILENAME
sub(/.h$/, ".tsv", tsvfile)
if (system("test -r " tsvfile)) {
print "\033[33mwarning\033[0m Not readable: " tsvfile
print "\033[33mwarning\033[0m use RWACC_RW access permission"
return "RWACC_R"
}
sub(/\[[0-9]+\]/, "", regname)
while ((getline line < tsvfile) > 0) {
if ((line ~ /^#define_/) || (line ~ /^Register Name/))
continue
# match against regname or regname[n]
rc = split(line, bubble)
if (rc <= 1)
continue
sub(/_\(N\)/, "", bubble[1])
sub(/\W+/, "", bubble[2])
# # debug
# printf("Try to match bubble[1]: %s, against regname: %s\n",
# bubble[1], regname)
if (bubble[1] ~ regname) {
match(bubble[2], /^(R|W|RW)$/, unary)
# # debug
# printf("CHK: retry == %d, register: %s, access unary[1]: %s\n",
# retry, regname, unary[1])
if (unary[1])
return "RWACC_" unary[1]
start_retry = 1
}
if (start_retry && --retry <= 0)
break
}
# # debug
# printf("CHK: not found access for register: %s, set to R\n", regname)
if (retry <= 0)
printf("ERROR: no access permission for register: %s\n", regname)
return "RWACC_R"
}
BEGIN {
errcode = 0 # indicate program status
if (ARGC == 1) {
usage()
errcode = 2
exit 1
}
# ARGV[0] == program_name
for (i = 1; i < ARGC; i++) {
if (ARGV[i] == "-h" || ARGV[i] == "--help" || ARGV[i] == "-?") {
usage()
errcode = 2
exit 1
}
if (ARGV[i] == "-t") {
source = "header"
ARGV[i] = ""
continue
}
if (ARGV[i] == "-T") {
source = "tsv"
ARGV[i] = ""
continue
}
# extract device and debug variable
match(ARGV[i], /(\w+)=(\w+)/, duo)
if (RLENGTH < 0)
continue
switch (duo[1]) {
case "device":
# use last device definition if multiple on cmdline
device = tolower(duo[2])
ARGV[i] = "" # awk will ignore null parameter
break;
case "debug":
# TODO:
# debug = "info" # ["debug", "info", "warn", "error"]
# debug = 0 # set 1 to show debug log
if (duo[2] > 2) {
printf("not support: %s > 2, ignore\n", ARGV[i])
} else {
debug=duo[2]
# print "result: debug=" debug
# errcode = 1
# exit 1
}
ARGV[i] = ""
break;
default:
print "\nnot support: " ARGV[i]
errcode = 1
exit 1
}
}
if (!device) {
print "Not specify device...\n"
errcode = 1
exit 1
}
hfile = device "_reg.h"
cfile = device "_reg.c"
regtab_name = device "_regtbl"
# e.g. if device = carp
# hfile = "carp_reg.h"
# cfile = "carp_reg.c"
# regtab_name = "carp_regtbl"
reg_num = "NUM_OF_" toupper(device) "_REGS"
# define array index sorted ascendingly
PROCINFO["sorted_in"] = "@ind_num_asc"
# Caution, these two are global index
i = 0
z = 0
printf("\nTarget Files: %s %s\n\n", hfile, cfile)
}
BEGINFILE {
regname_prefix = "" # null when beginning new file
regid_prefix = "REG_" # preceding register name
reg_baseaddr = "undef" # register module base address, get it from header file
reg_baseaddr_width = 0 # How many hex numbers to represent address
# All Adress Width would obey this one,
# BECAUSE THIS IS THE BASE ADDRESS WIDTH.
reg_space = 4 # register value space in bytes, default is 4
reg_def_space = 0 # register module space, get it from header file
reg_def_number = 0 # register module number, get it from header file
delete reg_port_range # how many ports
# TODO: why no error when reuse it if delete array at BEGINFILE
delete reg_port_name # port name definition from header file
begin_i = "undef" # index when process first register record per file
module_prefix = FILENAME
gsub(/\.h/, "", module_prefix)
module_prefix = toupper(module_prefix)
reg_baseaddr_field = module_prefix "_BASE_ADDR"
reg_def_space_field = module_prefix "_REGSPACE"
reg_def_number_field = module_prefix "_NUMBER"
printf("BEGINFILE: \033[32m%s\033[0m\n", FILENAME)
}
# get block register base address
# TODO: watch out \r carriage char near the end of line and separate field
/^\s*#define\s+\w+_BASE_ADDR\s+0[xX][[:xdigit:]]+/ {
print $0
if ((reg_baseaddr "") != "undef") {
print "\033[31merror:\033[0m redefined base address: " \
$3, " previous address: " reg_baseaddr
errcode = 1
exit 1
}
if ($2 != reg_baseaddr_field) {
print "\033[31merror:\033[0m base address field not match: " \
$2, ", expect field content: " reg_baseaddr_field
errcode = 1
exit 1
}
reg_baseaddr = $3
reg_baseaddr_width = length(reg_baseaddr)
if (reg_baseaddr ~ /^0[xX]/)
reg_baseaddr_width -= 2
# alias
w = reg_baseaddr_width
if (debug)
print "Get register base address width: " w
next
}
# get block register number definition
/^\s*#define\s+\w+_NUMBER\s+[[:digit:]]+/ {
print $0
if (reg_def_number) {
print "\033[31merror:\033[0m redefined block register number: " \
$3, " previous number: " reg_def_number
errcode = 1
exit 1
}
if ($2 != reg_def_number_field) {
print "\033[31merror:\033[0m number field not match: " \
$2, ", expect field content: " reg_def_number_field
errcode = 1
exit 1
}
reg_def_number = $3
if (debug)
print "Get block register number: " reg_def_number
next
}
# get port(block) register size
/^\s*#define\s+\w+_REGSPACE\s+[[:digit:]]+/ {
print $0
if (reg_def_space) {
print "\033[31merror:\033[0m redefined block register number: " \
$3, " previous regspace: " reg_def_space
errcode = 1
exit 1
}
if ($2 != reg_def_space_field) {
print "\033[31merror:\033[0m space field not match: " \
$2, ", expect field content: " reg_def_space_field
errcode = 1
exit 1
}
reg_def_space = $3
if (debug)
print "Get block register space: " reg_def_space
next
}
# get register port(block) name enumeration
/^\s*#define\s+\w+\s+0/ {
# print "\033[33mwarning:\033[0m Enter block name enumeration: $0: " $0
if (reg_def_number == 1) {
print "warining: no port name enumeration as module number == 1"
next
}
# check if repeatly defined
gsub(/\W+/, "", $3)
# if ($3 in reg_port_name) {
# print "\033[31merror:\033[0m redefined block name: " \
# $2, " such as the first element: " reg_port_name[0]
# errcode = 1
# exit 1
# }
match($2, /\w+([[:digit:]]+)/, unary)
if(RLENGTH < 0) {
print "\033[31merror:\033[0m port name not end with number: " $2
errcode = 1
exit 1
}
if (unary[1] != $3) {
printf("\033[31merror:\033[0m port name suffix: %d != enumeration: %d\n",
unary[1], $3)
errcode = 1
exit 1
}
reg_port_name[0] = $2
# already process 0-index, procede remainder
j = 1
while((getline line) > 0) {
if (j >= reg_def_number) {
# commet below log
if (debug)
print "stop enumeration analysis: " line
break
}
rc = split(line, array)
# printf("rc = %d, line content: %s\n", rc, line)
if (rc == 3) {
gsub(/^\W+/, "", array[3])
gsub(/\W+$/, "", array[3])
if (j != array[3]) {
printf("\033[31merror:\033[0m [%d]: out-of-order enumeration: %s\n", j, line)
errcode = 1
exit 1
}
gsub(/^\W+/, "", array[2])
gsub(/\W+$/, "", array[2])
reg_port_name[j] = array[2]
# debug
# printf("CHK: reg_port_name[%d] = %s, $0: %s\n", j, reg_port_name[j], line)
j++
}
# otherwise, ignore the record
}
# # Definitely false, see above
# if (reg_def_number <= 1)
# next
reg_port_type = ""
reg_port_start = 0
reg_port_end = 0
r = 0
for (k in reg_port_name) {
# nonoverlapping match, as python?
match(reg_port_name[k], /(\w+_[[:alpha:]]+)([[:digit:]]+)?$/, duo)
if (RLENGTH < 0) {
printf("\033[31mwarning:\033[0m wrong port name format: %s\n", reg_port_name[k])
errcode = 1
exit 1
}
if (reg_port_type == duo[1]) {
# printf("CHK: reg_port_type = %s, duo[1] = %s\n", reg_port_type, duo[1])
reg_port_end = duo[2]
} else if (reg_port_type != duo[1]) {
# printf("CHK: reg_port_type = %s\n", reg_port_type)
if (reg_port_type) {
if (reg_port_start == reg_port_end) {
reg_port_range[r] = reg_port_type
} else {
reg_port_range[r] = sprintf("%s_%d_%d",
reg_port_type, reg_port_start, reg_port_end)
}
r++
}
reg_port_type = duo[1]
reg_port_start = duo[2]
reg_port_end = reg_port_start
}
}
if (reg_port_start == reg_port_end) {
reg_port_range[r] = reg_port_type
} else {
reg_port_range[r] = sprintf("%s_%d_%d",
reg_port_type, reg_port_start, reg_port_end)
}
# debug
# for (r in reg_port_range)
# printf("CHK: reg_port_range[%d] = %s\n", r, reg_port_range[r])
next
}
# outmost struct
/^\s*struct\s+\w+\s+{/ {
if (regname_prefix != "") {
print "reg_prefix is not null: " regname_prefix
errcode = 1
exit 1
}
if (debug) print "PATERN RULE: struct: get regname_prefix: $2: " $2
# evaluate register name prefix
regname_prefix = $2
if (match($2, /^(REG_)(\w+)/, array)) {
regname_prefix = array[2]
} else if (match($2, /(\w+)(_REG)$/, array)) {
regname_prefix = array[1]
}
regname_prefix = regname_prefix "_"
# regid_prefix = "REG_" regname_prefix
regid_prefix = "REG_"
if (debug) {
print "regname_prefix: " regname_prefix
print "regid_prefix: " regid_prefix
}
next
}
# register record
# example:
# volatile uint32_t PRI_QUANTA[8]; // 0x00000480
($0 ~ "\\s*volatile uint32_t\\s+[A-Z0-9_]+") {
if (debug == 1) {
print "register record: " $0
j = 0
while (j++ < NF)
print "$" j ": " $j
}
if ($3 ~ /SPARE_/) {
print "\033[33mwarning\033[0m: ignore \033[31mSPARE\033[0m record : " $0
next
}
# get width information
if (!match($2, /([[:alpha:]]+)([[:digit:]]+)[[:alpha:]_]+/, duo)) {
# print "Can't get offset from type information: field $2 = " $2
print "warning: use default 4 bytes for register value space\n"
} else {
reg_space = int(duo[2] / 8)
if (reg_space != duo[2] / 8) {
print "!!! Check BITS LENGTH, not octet"
errcode = 1
exit 1
}
if (debug)
print "Get \033[31mregister space\033[0m: " reg_space
}
# strip register name
if (!sub(/;\W*\s*$/, "", $3)) {
print "$3: " $3 ", strip semicolon failed"
errcode = 1
exit 1
}
# strip register address
gsub(/\s*\W*\s*$/, "", $5)
# backup first register record per file
if (begin_i == "undef") begin_i = i
# piece together register name and id
reg_name[i] = regname_prefix $3
reg_acc [reg_name[i]] = reg_get_acc($3) # must be original not-prefixed register name
reg_id [reg_name[i]] = regid_prefix reg_name[i]
reg_addr[reg_name[i]] = sprintf("0x%0*x", w, strtonum(reg_baseaddr) + strtonum($5))
# debug
# printf("CHK: reg_addr[%d] = %s\n", i, reg_addr[i])
# construct dimension if register is an array
if (match(reg_name[i], /(\w+)\[([[:digit:]]+)\]/, duo)) {
reg_name_old = reg_name[i]
reg_name[i] = duo[1] # already proceding regname_prefix
reg_id [reg_name[i]] = regid_prefix reg_name[i]
reg_acc [reg_name[i]] = reg_acc[reg_name_old]
reg_addr[reg_name[i]] = reg_addr[reg_name_old]
reg_dimen_name[reg_name[i]] = "dim_" tolower(reg_name[i])
reg_dimen [reg_name[i]] = sprintf("static dim_desc_t %s = {\n" \
" 1,\n" \
" { .name = \"%s\", .count = %d},\n" \
"};\n", reg_dimen_name[reg_name[i]], tolower(reg_name[i]), duo[2])
delete reg_acc [reg_name_old]
delete reg_id [reg_name_old]
delete reg_addr[reg_name_old]
# # debug
# printf("CHK register array: reg_name[%d] = %s, duo[1] = %s, duo[2] = %d\n", i, reg_name[i], duo[1], duo[2])
}
# } else {
# # # debug
# # printf("CHK register: reg_name[%d] = %s\n", i, reg_name[i])
# }
i++
next
}
# process inner struct section
/^\s*struct\s+{\s*/ {
# take innner struct as section
sec_baseaddr = "undef"
sec_reg_space = 4 # default 4
sec_reg_num = 0
sec_iteration = 0
sec_len = 0
inner_struct_end = 0
j = 0
# clear array, don't use if (isarray(array)) delete array
# bug if reuse when delete an array
# Two methods here:
# split("", sec_reg_name)
delete sec_reg_name
while ((getline line) > 0) {
gsub(/\r+/, "", line) # strip carriage char
rc = split(line, array) # FS is expected to [:space:]
# # debug
# printf("inner struct, rc = %d, line: %s\n", rc, line)
switch (rc) {
case 5: # inner struct register record
# extract type length in bytes
if (!match(array[2], /[[:alpha:]]+([[:digit:]]+)[[:alpha:]_]+/, unary)) {
printf("\033[31mwarning:\033[0m Can't get type length: %s, using default %d",
line, sec_reg_space)
} else {
sec_reg_space = strtonum(unary[1]) / 8 # octet in byte length
}
# strip section register name
gsub(/;$/, "", array[3])
# Accumulate space, but ignore padding field
match(array[3], /(pad__\w*)\[?([[:digit:]]+)?\]?/, duo)
sec_len += duo[2] ? sec_reg_space * duo[2] : sec_reg_space
if (duo[1]) {
if (debug) print "in inner struct, ignore padding field: " line
break
}
sec_reg_num++
sec_reg_name[j] = array[3]
sec_reg_addr[j] = array[5]
if (debug) printf("\033[31minner:\033[0m sec_reg_name[%d] = %s\n", j, sec_reg_name[j])
j++
if ((sec_baseaddr "") == "undef")
sec_baseaddr = sprintf("0x%0*x", w, strtonum(array[5]) + strtonum(reg_baseaddr))
break
case 3: # inner struct padding register record that lacks address label
# strip semicolon
gsub(/;$/, "", array[3])
# Should accumulate space, but ignore padding field
# Accumulate space, but ignore padding field
match(array[3], /(pad__\w*)\[?([[:digit:]]+)?\]?/, duo)
sec_len += duo[2] ? sec_reg_space * duo[2] : sec_reg_space;
if (duo[1]) {
if (debug) print "in inner struct, ignore padding field: " line
} else {
print "\033[31merror:\033[0m check this inner struct record: " line
errcode = 1
exit 1
}
break
case 2: # inner struct end
# strip field name
gsub(/;$/, "", array[2])
match(array[2], /^(\w+)\[?([[:digit:]]+)?\]?/, duo)
sec_name = duo[1]
sec_iteration = duo[2] ? duo[2] : 1;
if (duo[3]) {
print "\033[31merror:\033[0m inner struct end error: " line
errcode = 1
exit 1
}
inner_struct_end = 1
break
default: # wrong format
printf("\033[31merror:\033[0m file: %s: "\
"ignore this section register record: %s\n", FILENAME, line)
errcode = 1
exit 1
}
if (inner_struct_end == 1) break
}
if (!inner_struct_end) {
printf("\033[31merror:\033[0m file: %s, not found inner struct end: " FILENAME)
errcode = 1
exit 1
}
reg_name[i] = sprintf("%s%s_0_%d", regname_prefix, sec_name, sec_iteration - 1)
reg_id [reg_name[i]] = regid_prefix reg_name[i]
reg_acc [reg_name[i]] = reg_get_acc(sec_name)
reg_addr[reg_name[i]] = sec_baseaddr
reg_dimen_name[reg_name[i]] = "dim_" tolower(reg_name[i])
reg_dimen [reg_name[i]] = sprintf("static dim_desc_t %s = {\n" \
" 2,\n" \
" { .name = \"%s\", .count = %d},\n" \
" { .name = \"%s\", .count = %d},\n" \
"};\n", reg_dimen_name[reg_name[i]], tolower(reg_name[i]), sec_iteration,
tolower(sec_name) "_regs", sec_reg_num)
# debug
# printf("CHK: reg_addr[%s] = %s, sec_baseaddr = %s, w = %s\n",
# reg_name[i], reg_addr[reg_name[i]], sec_baseaddr, w)
i++
next
}
ENDFILE {
# do summa per file
if (reg_def_number > 1) {
space = 0
for (k in reg_port_range) {
for (l = begin_i; l < i; l++) {
reg_name_old = reg_name[l]
sub(regname_prefix, "", reg_name_old)
sub(regname_prefix, "", reg_port_range[k])
reg_all_name[z] = sprintf("%s%s_%s", regname_prefix, reg_port_range[k], reg_name_old)
reg_all_id[reg_all_name[z]] = "REG_" reg_all_name[z]
reg_all_acc[reg_all_name[z]] = reg_acc[reg_name[l]]
reg_all_addr[reg_all_name[z]] = sprintf("0x%0*x", w, strtonum(reg_addr[reg_name[l]]) + strtonum(space))
if (reg_name[l] in reg_dimen_name) {
reg_dimen_name[reg_all_name[z]] = reg_dimen_name[reg_name[l]]
reg_dimen[reg_all_name[z]] = reg_dimen[reg_name[l]]
delete reg_dimen_name[reg_name[l]]
delete reg_dimen[reg_name[l]]
}
z++
}
match(reg_port_range[k], /[[:alpha:]]+_([[:digit:]]+)_([[:digit:]]+)$/, duo)
if (duo[2])
space += strtonum(reg_def_space) * (duo[2] - duo[1])
else
space += strtonum(reg_def_space)
}
} else {
for (l = begin_i; l < i; l++) {
reg_all_name[z] = reg_name[l]
reg_all_id [reg_all_name[z]] = reg_id [reg_name[l]]
reg_all_acc [reg_all_name[z]] = reg_acc [reg_name[l]]
reg_all_addr[reg_all_name[z]] = reg_addr[reg_name[l]]
z++
}
}
printf("ENDFILE : \033[32m%s\033[0m\n\n", FILENAME)
}
END {
if (errcode) {
exit 1
}
if (!hfile) {
print "not defined hfile, Failed..."
exit 1
}
if (!cfile) {
print "not defined cfile, Failed..."
exit 1
}
# sort address ascendingly
PROCINFO["sorted_in"] = "@val_num_asc"
# begin construct header file
printf("%s\n%s\n\n",
"#ifndef _LUOYANG_REG_H_",
"#define _LUOYANG_REG_H_") > hfile
# construct header file
# evaluate align
maxlen = 0
for (i in reg_all_name) {
l = length(reg_all_name[i])
if (l > maxlen)
maxlen = l
}
maxlen += 8 # plus interspace
if (debug)
print "maxlen = " maxlen
# print register defines
for (i in reg_all_addr)
printf("#define %-*s%s\n", maxlen, i, reg_all_addr[i]) > hfile
# print enum items
print "\nenum {" > hfile
for (i in reg_all_addr)
printf("\t%s,\n", reg_all_id[i]) > hfile
printf("\t%s,\n};\n", reg_num) > hfile
printf("\nextern reg_desc_t %s[];\n", regtab_name) > hfile
print "\n#endif" > hfile
# end construct header file
# construct C file
# print includes
print "#include " > cfile
print "#include " > cfile
printf("\n#include \"%s\"\n", hfile) > cfile
printf("#include \"%s_priv.h\"\n", device) > cfile
printf("#include \"%s_platform.h\"\n\n", device) > cfile
# print dimensions
for (i in reg_all_addr) {
if (i in reg_dimen) print reg_dimen[i] > cfile
}
# print register table
printf("\nreg_desc_t %s[%s] = {\n", regtab_name, reg_num) > cfile
printf("%s\n%s\n%s\n",
" // {id, name, type,",
" // offset, width, mask, dft_val,",
" // rw, fields, num_fields, acc_info, private}") > cfile
for (i in reg_all_addr) {
printf(" {%s, \"%s\", RTYPE_NORMAL_NOTEST,\n",
reg_all_id[i], i) > cfile
printf(" %s, 4, \"xFF\\xFF\\xFF\\xFF\", NULL,\n", i) > cfile
if (i in reg_dimen_name) {
printf(" %s, NULL, 0, 0, &%s},\n",
reg_all_acc[i], reg_dimen_name[i]) > cfile
} else {
printf(" %s, NULL, 0, 0, NULL},\n", reg_all_acc[i]) > cfile
}
}
print "};\n" > cfile
printf("\n%s\n\n", errcode ? "Failed!" : "Success!")
}