c - word counter (binary-tree)
count ascii word in a file, written by c, using binary-tree,
code:
word_counter.c:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define WORD_MAX_LEN 50
#define LAST_LETTER 1
#define LAST_OTHER 0
/**
* count ascii words
* @author kuchaguangjie
* @date 2012-05-09 14:05:21
* @mail kuchaguangjie@163.com
*/
struct word_count {
char *word;
int count;
struct word_count *left;
struct word_count *right;
};
/**
* find word from FILE
*
* state machine of word found:
|---------------------------------------|
|last_char new_char word_found|
|---------------------------------------|
|letter other Y |
|letter letter N |
|other other N |
|other letter N |
|---------------------------------------|
letter, include: a-z A-Z,
other, any other ascii char,
*
* @param fp
* pointer to FILE
* @param word
* the char array to store word
* @param limit
* max char count in the word
*
* @return
* 1 -> not reach EOF, 0 -> EOF,
* if reach EOF, it will put into word the string found, or an empty string,
*/
int getword(FILE *fp, char *word, int limit) {
int i = 0, last = LAST_OTHER;
char c;
while ((c = fgetc(fp)) != EOF) {
if ((c >= 97 && c <= 122) || (c >= 65 && c <= 90)) { // a - z, A - Z,
last = LAST_LETTER;
if (i < WORD_MAX_LEN)
word[i++] = c;
} else {
if (last == LAST_LETTER) {
word[i] = '\0';
return 1;
}
last = LAST_OTHER;
}
}
// reach EOF
word[i] = '\0';
return 0;
}
/**
* alloc memory for a struct word_count
*/
struct word_count *word_alloc() {
return (struct word_count *) malloc(sizeof(struct word_count));
}
/**
* binary-tree add
*
* @param p
* pointer to the node on/under which to add/count the word
* @param word
* the word to add
*
* @return
* the pointer on which the word is add/count++
*/
struct word_count *btree_add(struct word_count *p, char *word) {
int cmp;
if (p == NULL) {
p = word_alloc();
p->word = strdup(word); // tip: here must duplicate the string, because the original string will change later,
p->count = 1;
p->left = p->right = NULL;
} else if ((cmp = strcmp(word, p->word)) == 0)
p->count++;
else if (cmp < 0)
p->left = btree_add(p->left, word);
else
p->right = btree_add(p->right, word);
return p; // tip: must return the pointer, in case when original pointer is NULL, need to update it,
}
/* print all nodes in order */
void treeprint(struct word_count *p) {
if (p != NULL) {
treeprint(p->left);
printf("%6d, %s\n", p->count, p->word);
treeprint(p->right);
}
}
/**
* do count words from a file
*
* @param fp
* FILE pointer
* @param counts
* word_count array, which is ordered,
* @param n
* word counts, equals to word_count array size,
*
* @return
* total different word
*/
void docount(FILE *fp) {
struct word_count *root = NULL;
char word[WORD_MAX_LEN + 1];
int end;
while ((end = getword(fp, word, WORD_MAX_LEN)) == 1) {
root = btree_add(root, word);
}
// last word
if (word[0] != '\0') {
root = btree_add(root, word);
}
treeprint(root);
}
int main() {
char *fpath = "/home/eric/workspace/c_workplace/practise/word_counter.c";
FILE *fp = fopen(fpath, "r");
docount(fp);
return 1;
}

本文介绍了一个使用C语言实现的ASCII单词计数器,它通过二叉树来存储和统计文件中的不同单词出现次数。该程序可以读取文本文件,并能够区分大小写的ASCII字符来识别单词。
1448

被折叠的 条评论
为什么被折叠?



