UNIX_shell_programming(几道练习题目)

本文介绍了一个用于计算多个文件中单词出现频率的Shell脚本。该脚本能够处理命令行参数传入的文件,并统计每个文件内单词的出现次数,最后以表格形式展示结果。单词被定义为一个或多个连续的字母字符。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

Word Frequency

Given n files as command line arguments, calculate the frequency of words for each file, and display the results in a table. Words are defined to be one or more continuous list of letters (see the definition of letters above). Every non-letter character is to be considered whitespace.

The output should be a multi-column list, the first column being the list of words encountered (in lowercase) in any of the input files (sorted according to the C locale), and subsequent columns containing the number of occurrences of that word in file1 ... filen, separated by spaces. Example output for 2 files:

a 5 8
the 6 3
word 2 0
she 3 5
#!/bin/bash

# Word frequencies
# $Id: unix9-wordfreq.sh 191 2006-03-29 11:07:00Z cactus $
# See http://cactus.rulez.org/elte/2005-1-unix/#9 for a description of what it does
# Licensed under the GNU General Public License, version 2

function help () {
    self=`basename $0`

    cat << EOF
Usage: $self FILE1 [FILE2...]
Creates statistics about the words occuring in the files.

Options:
    -help   Display this help message

(C) 2005 Dr. ERDI Gergo <cactus@cactus.rulez.org>

Version: \$Id: unix9-wordfreq.sh 191 2006-03-29 11:07:00Z cactus $
EOF
    exit 0
}

function error () {
    echo ERROR: $@! >&2
    exit 1
}

# Ez mashogy mukodik mint a tobbi: itt csak vegignezzuk az opciokat,
# hogy van-e koztuk -help

function options () {
    [ -z "$1" ] && return
    case "$1" in
	-help)
	    help
	    ;;
	*)
	    [ -f "$1" -a -r "$1" ] || error "$1: Unable to open file"
	    shift
	    options "$@"
	    ;;
    esac
}

function awk_count () {
    AWKPROG='
BEGIN {
    FS="[^a-zA-ZáÁéÉíÍóÓöÖõÕúÚüÜûÛ]"
    
    ekezet_lower["Á"] = "á";
    ekezet_lower["É"] = "é";
    ekezet_lower["Í"] = "í";
    ekezet_lower["Ó"] = "ó";
    ekezet_lower["Ö"] = "ö";
    ekezet_lower["Õ"] = "õ";
    ekezet_lower["Ú"] = "ú";
    ekezet_lower["Ü"] = "ü";
    ekezet_lower["Û"] = "û";
}

function iso88592_tolower (s) {
    ret=""

    for (j = 1; j <= length(s); j++)
        ret = ret iso88592_tolower_c(substr(s, j, 1));
    return ret;
}

function iso88592_tolower_c (c) {
    if (match (c, "[a-zA-Z]"))
        return tolower (c);
    else if (c in ekezet_lower)
        return ekezet_lower[c];

    return c;
}
    

/[a-zA-ZáÁéÉíÍóÓöÖõÕúÚüÜûÛ]/ {
    for (i = 1; i != NF + 1; i++)
    {
        if (match ($i, "^[a-zA-ZáÁéÉíÍóÓöÖõÕúÚüÜûÛ]+$"))
	{
            words[iso88592_tolower($i)]++;
	}
    }
}

END {    
    for (i in words)
        printf "%s %s\n", i, words[i];
}
'
    echo "awk '$AWKPROG' '$1' 2>/dev/null|sort"
}

# Ez elemenkenti feldolgozas, oriasi Fothi power :)
function awk_join () {
    AWKPROG='
function read1 () {
    if (!eof1) {
        eof1 = (getline <= 0);
        if (eof1)
            return;
        
        key1 = $1;
        val1 = $2;

        width = NF - 1;
        for (i = 3; i <= NF; ++i)
            val1 = sprintf ("%s %s", val1, $i);
    }
}

function read2 () {
    if (!eof2) {
        eof2 = ((getline < f2) <= 0);
        if (eof2)
            return;
        
        key2 = $1;
        val2 = $2;
    }
}

function join () {
    printf "%s %s %s\n", key1, val1, val2;
}

function fill_from_1 () {
    printf "%s %s 0\n", key1, val1, "0";
}

function fill_from_2 () {
    printf "%s ", key2;
    for (i = 0; i < width; ++i)
        printf "0 ";
    printf "%s\n", val2;
}

BEGIN {
    FS=" ";

    read1();
    read2();
    
    while (!eof1 || !eof2) {
        if (key1 == key2) {
            join();
            read1();
            read2();
        } else if (eof2 || (!eof1 && (key1 < key2))) {
            fill_from_1();
            read1();
        } else {
            fill_from_2();
            read2();
        }
    }
    exit
}
'
    echo "awk -v f2=<($1) '$AWKPROG' 2>/dev/null"
}


[ $# -lt 1 ] && error "Missing arguments"
options "$@"

fullpipe="$(awk_count $1)"
shift

for i in "$@"
do
  counter="`awk_count $i`"  
  awkline="`awk_join "$counter"`"  
  fullpipe="$fullpipe | $awkline"
done

export LANG=
export LC_ALL=
export LC_CTYPE=
eval "$fullpipe"
转载自:http://gergo.erdi.hu/elte/2005-1-unix/
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值