统计单词出现次数--hash表,二叉树,标准库

本文探讨了如何使用C++的标准模板库进行高效数据操作,并通过实例展示了如何利用哈希表进行字符串计数及时间效率分析。

我一直很喜欢用标准库,非常非常方便。但是有时候你根本不知道它为你做了什么,我喜欢知道自己写的程序每一步都干了什么!!!

////////////////////////////////// ///////////////////////////////// //标准模板库 ////////////////////////////////// /* #include <time.h> #include <map> #include <string> #include <fstream> #include <iostream> using namespace std; int main() { double beginTime = clock(); ifstream in("source.txt"); map<string, int> M; map<string, int>::iterator j; string t; while( in >> t ) { M[t]++; } for ( j = M.begin(); j != M.end(); j++) { cout << j->first << " " << j->second << endl; } double endTime = clock(); cout << "time: " << endTime - beginTime << "ms " << endl; return 0; } */ ////////////////////////////////// ///////////////////////////////// // 编程珠玑十五章,珍珠字符串 //用Hash表实现之 ////////////////////////////////// /* #include <time.h> #include <stdio.h> #include <string.h> #include <stdlib.h> #define NHASH 29989 #define MULT 31 typedef struct node *nodeptr; typedef struct node { char *word; int count; nodeptr next; }node; nodeptr bin[NHASH]; unsigned int hash( char *p) { unsigned int h = 0; for( ; *p; p++) { h = MULT * h + *p; } return h % NHASH; } void incword( char *s) { int h = hash( s ); nodeptr p = NULL; for ( p = bin[h]; p != NULL; p = p->next) { if ( strcmp( s, p->word) == 0) { p->count ++; return; } } p = NULL; p = new node; if ( p == NULL) { return; } p->count = 1; p->word = new char[ strlen(s) + 1]; strcpy( p->word, s); p->next = bin[h]; bin[h] = p; } int main() { double beginTime = clock(); int i = 0; FILE *fp = NULL; fp = fopen("source.txt","r"); if ( fp == NULL ) { printf("文件打开错误"); return 1; } //ifstream in("source.txt"); for ( i = 0; i < NHASH; i++) { bin[i] = NULL; } char tempWord[100]; while( fscanf(fp, "%s", tempWord) != EOF ) { incword( tempWord ); } nodeptr p = NULL; for ( i = 0; i < NHASH; i++) { p = NULL; for ( p = bin[i]; p != NULL; p = p->next) { //cout << p->word<< " " << p->count << endl; printf("%s %d\n", p->word, p->count); } } double endTime = clock(); printf("time:%g ms", endTime - beginTime ); return 0; } */ #include <stdio.h> #include <string.h> #include <stdlib.h> #include <time.h> typedef struct node *nodeptr; typedef struct node { char *word; int count; nodeptr lchild; nodeptr rchild; }node; nodeptr setUp( nodeptr root, char *s) { if ( root == NULL ) { root = (nodeptr)malloc( sizeof(node)); root->word = (char *)malloc(strlen(s) + 1); strcpy( root->word, s); root ->lchild = NULL; root ->rchild = NULL; root->count = 1; return root; } int flag = strcmp( root->word, s); if( !flag) { root ->count ++; return root; } else if ( flag < 0 ) { root ->rchild = setUp( root->rchild, s); } else { root->lchild = setUp(root->lchild, s); } return root; } void Traverse( nodeptr root) { if ( root != NULL) { if ( root->rchild != NULL) { Traverse(root->rchild); } printf("%s %d\n", root->word, root->count); if ( root->lchild != NULL) { Traverse(root->lchild); } } return ; } int main() { nodeptr root = NULL; double beginTime = clock(); int i = 0; FILE *fp = NULL; fp = fopen("source.txt","r"); if ( fp == NULL ) { printf("文件打开错误"); return 1; } char tempWord[100]; while( fscanf(fp, "%s", tempWord) != EOF ) { root = setUp(root, tempWord ); } Traverse(root); double endTime = clock(); printf("time:%g ms", endTime - beginTime ); return 0; }

#include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> #define MAX_WORDS 10000 // 最大单词数量 #define MAX_WORD_LENGTH 50 // 最大单词长度 #define HASH_TABLE_SIZE 1009 // 哈希大小 // 单词频率结构 typedef struct { char word[MAX_WORD_LENGTH]; int frequency; } WordFrequency; //链结构 typedef struct ListNode { WordFrequency data; struct ListNode *next; } ListNode; // 二叉树节点 typedef struct TreeNode { WordFrequency data; struct TreeNode *left; struct TreeNode *right; } TreeNode; // 哈希节点(链地址法) typedef struct HashNode { WordFrequency data; struct HashNode *next; } HashNode; // AVL树节点 typedef struct AVLNode { WordFrequency data; struct AVLNode *left; struct AVLNode *right; int height; } AVLNode; // 全局变量 WordFrequency wordList[MAX_WORDS]; ListNode *wordListHead = NULL; int wordCount = 0; HashNode *hashTable[HASH_TABLE_SIZE]; // 插入单词单词 int insertWord(const char *word) { for (int i = 0; i < wordCount; i++) { if (strcmp(wordList[i].word, word) == 0) { wordList[i].frequency++; return i; } } if (wordCount < MAX_WORDS) { strcpy(wordList[wordCount].word, word); wordList[wordCount].frequency = 1; wordCount++; return wordCount - 1; } printf("错误: 单词已满!\n"); return -1; } // 添加缺失的max函数 int max(int a, int b) { return (a > b) ? a : b; } // 添加缺失的min函数 int min(int a, int b) { return (a < b) ? a : b; } // 比较函数 -单词字典序 int compareWords(const void *a, const void *b) { return strcmp(((WordFrequency *)a)->word, ((WordFrequency *)b)->word); } // 比较函数 - 按频率 int compareFrequency(const void *a, const void *b) { return ((WordFrequency *)b)->frequency - ((WordFrequency *)a)->frequency; // 降序排列 } // 询问是否继续 int askToContinue() { int choice; printf("\n1. 返回主菜单\n"); printf("2. 退出程序\n"); printf("请选择: "); scanf("%d",&choice); return choice == 1; } // 插入单词到链(新增函数) void insertWordToList(const char *word) { // 检查单词是否已存在 ListNode *current = wordListHead; while (current != NULL) { if (strcmp(current->data.word, word) == 0) { current->data.frequency++; return; } current = current->next; } // 创建新节点 ListNode *newNode = (ListNode *)malloc(sizeof(ListNode)); if (newNode == NULL) { printf("内存分配失败!\n"); return; } // 初始化节点数据 strncpy(newNode->data.word, word, MAX_WORD_LENGTH - 1); newNode->data.word[MAX_WORD_LENGTH - 1] = &#39;\0&#39;; newNode->data.frequency = 1; newNode->next = NULL; // 插入到链头部 if (wordListHead == NULL) { wordListHead = newNode; } else { newNode->next = wordListHead; wordListHead = newNode; } wordCount++; } // 函数声明 void menu(); void sequentialSearch(); void binarySearch(); void treeSearch(); void hashTableSearch(); void chainHashSearch(); void avlTreeSearch(); void heapSort(); void quickSort(); void shellSort(); TreeNode* insertBST(TreeNode *root, const WordFrequency *data); void searchBST(TreeNode *root, const char *target); void freeBST(TreeNode *root); void initHashTable(); unsigned long hashFunc(const char *word); void insertHash(const WordFrequency *data); void searchHash(const char *target); void freeHashTable(); HashNode* createHashNode(const WordFrequency *data); AVLNode* insertAVL(AVLNode *node, const WordFrequency *data); int getHeight(AVLNode *node); int getBalance(AVLNode *node); AVLNode* rightRotate(AVLNode *y); AVLNode* leftRotate(AVLNode *x); void searchAVL(AVLNode *root, const char *target); void freeAVL(AVLNode *root); void heapify(WordFrequency arr[], int n, int i); void heapSortWords(WordFrequency arr[], int n); // 主函数 int main() { // 初始化哈希 initHashTable(); // 从文件读取单词 FILE *file = fopen("C:/Users/English.txt", "r"); if (file == NULL) { printf("无法打开文件 English.txt!\n"); printf("请确保文件存在并且可读。\n"); return 1; } char word[MAX_WORD_LENGTH]; // 读取文件内容并统计词频 while (fscanf(file, "%s", word) != EOF) { // 清理单词,只保留字母并转为小写 int j = 0; for (int i = 0; word[i] != &#39;\0&#39;; i++) { if (isalpha(word[i])) { word[j++] = tolower(word[i]); } } word[j] = &#39;\0&#39;; if (j > 0) { insertWord(word); } } fclose(file); printf("成功读取并处理了 %d 个单词\n\n", wordCount); // 初始化链结构 for (int i = 0; i < wordCount; i++) { insertWordToList(wordList[i].word); } // 初始化哈希数据 for (int i = 0; i < wordCount; i++) { insertHash(&wordList[i]); } // 主循环 int choice; do { menu(); printf("请输入选择 (1-10): "); if (scanf("%d", &choice) != 1) { printf("输入无效,请输入数字!\n"); while (getchar() != &#39;\n&#39;); // 清除输入缓冲区 choice = 0; // 重置选择 continue; } while (getchar() != &#39;\n&#39;); // 清除输入缓冲区 switch (choice) { case 1: sequentialSearch(); break; case 2: binarySearch(); break; case 3: treeSearch(); break; case 4: hashTableSearch(); break; case 5: chainHashSearch(); break; case 6: avlTreeSearch(); break; case 7: heapSort(); break; case 8: quickSort(); break; case 9: shellSort(); break; case 10: printf("退出系统...\n"); break; default: printf("无效选择,请重试!\n"); } } while (choice != 10); // 释放资源 freeHashTable(); return 0; } // 菜单函数 void menu() { printf("\n"); printf("********************************************************\n"); printf("* 英语单词词频统计和检索系统 *\n"); printf("********************************************************\n"); printf("* 1. 基于链的顺序查找 *\n"); printf("* 2. 基于顺序的折半查找 *\n"); printf("* 3. 基于二叉树的查找 *\n"); printf("* 4. 基于开放地址法的散列查找 *\n"); printf("* 5. 基于链地址法的散列查找 *\n"); printf("* 6. 基于平衡二叉树的查找 *\n"); printf("* 7. 对单词按词频进行堆排序 *\n"); printf("* 8. 对单词按词频进行快速排序 *\n"); printf("* 9. 对单词按词频进行希尔排序 *\n"); printf("* 10. 退出系统 *\n"); printf("********************************************************\n"); } // 基于链的顺序查找 void sequentialSearch() { char target[MAX_WORD_LENGTH]; printf("请输入要查找的单词: "); if (fgets(target, MAX_WORD_LENGTH, stdin) == NULL) { printf("读取输入失败!\n"); return; } target[strcspn(target, "\n")] = &#39;\0&#39;; // 移除换行符 // 遍历链查找 ListNode *current = wordListHead; int found = 0; while (current != NULL) { if (strcmp(current->data.word, target) == 0) { printf("单词 \"%s\" 出现次数: %d\n", target, current->data.frequency); found = 1; break; } current = current->next; } if (!found) { printf("未找到单词 \"%s\"\n", target); } if (!askToContinue()) { exit(0); } } // 基于顺序的折半查找 void binarySearch() { char target[MAX_WORD_LENGTH]; printf("请输入要查找的单词: "); if (fgets(target, MAX_WORD_LENGTH, stdin) == NULL) { printf("读取输入失败!\n"); return; } target[strcspn(target, "\n")] = 0; // 移除换行符 // 先排序 WordFrequency *sortedWords = (WordFrequency *)malloc(wordCount * sizeof(WordFrequency)); if (sortedWords == NULL) { printf("内存分配失败!\n"); return; } memcpy(sortedWords, wordList, wordCount * sizeof(WordFrequency)); qsort(sortedWords, wordCount, sizeof(WordFrequency), compareWords); int left = 0, right = wordCount - 1; int found = 0; while (left <= right) { int mid = left + (right - left) / 2; int cmp = strcmp(sortedWords[mid].word, target); if (cmp == 0) { printf("单词 \"%s\" 出现次数: %d\n", target, sortedWords[mid].frequency); found = 1; break; } else if (cmp < 0) { left = mid + 1; } else { right = mid - 1; } } if (!found) { printf("未找到单词 \"%s\"\n", target); } free(sortedWords); if (!askToContinue()) { exit(0); } } // 基于二叉树的查找 void treeSearch() { char target[MAX_WORD_LENGTH]; printf("请输入要查找的单词: "); if (fgets(target, MAX_WORD_LENGTH, stdin) == NULL) { printf("读取输入失败!\n"); return; } target[strcspn(target, "\n")] = 0; // 移除换行符 TreeNode *root = NULL; for (int i = 0; i < wordCount; i++) { root = insertBST(root, &wordList[i]); } searchBST(root, target); freeBST(root); if (!askToContinue()) { exit(0); } } // 插入BST TreeNode* insertBST(TreeNode *root, const WordFrequency *data) { if (root == NULL) { root = (TreeNode *)malloc(sizeof(TreeNode)); if (root == NULL) { printf("内存分配失败!\n"); exit(1); } root->data = *data; root->left = NULL; root->right = NULL; return root; } if (strcmp(data->word, root->data.word) < 0) { root->left = insertBST(root->left, data); } else if (strcmp(data->word, root->data.word) > 0) { root->right = insertBST(root->right, data); } return root; } // 搜索BST void searchBST(TreeNode *root, const char *target) { if (root == NULL) { printf("未找到单词 \"%s\"\n", target); return; } int cmp = strcmp(target, root->data.word); if (cmp == 0) { printf("单词 \"%s\" 出现次数: %d\n", target, root->data.frequency); } else if (cmp < 0) { searchBST(root->left, target); } else { searchBST(root->right, target); } } // 释放BST内存 void freeBST(TreeNode *root) { if (root == NULL) return; freeBST(root->left); freeBST(root->right); free(root); } // 初始化哈希 void initHashTable() { for (int i = 0; i < HASH_TABLE_SIZE; i++) { hashTable[i] = NULL; } } // 哈希函数 unsigned long hashFunc(const char *word) { unsigned long hash = 5381; int c; while ((c = *word++)) { hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ } return hash % HASH_TABLE_SIZE; } // 创建哈希节点 HashNode* createHashNode(const WordFrequency *data) { HashNode *node = (HashNode *)malloc(sizeof(HashNode)); if (node == NULL) { printf("内存分配失败!\n"); exit(1); } node->data = *data; node->next = NULL; return node; } // 插入哈希(链地址法) void insertHash(const WordFrequency *data) { unsigned long index = hashFunc(data->word); HashNode *newNode = createHashNode(data); // 插入到链头部 newNode->next = hashTable[index]; hashTable[index] = newNode; } // 搜索哈希 void searchHash(const char *target) { unsigned long index = hashFunc(target); HashNode *current = hashTable[index]; while (current != NULL) { if (strcmp(current->data.word, target) == 0) { printf("单词 \"%s\" 出现次数: %d\n", target, current->data.frequency); return; } current = current->next; } printf("未找到单词 \"%s\"\n", target); } // 释放哈希内存 void freeHashTable() { for (int i = 0; i < HASH_TABLE_SIZE; i++) { HashNode *current = hashTable[i]; while (current != NULL) { HashNode *temp = current; current = current->next; free(temp); } hashTable[i] = NULL; } } // 基于开放地址法的散列查找 void hashTableSearch() { printf("基于开放地址法的散列查找功能尚未实现。\n"); if (!askToContinue()) { exit(0); } } // 基于链地址法的散列查找 void chainHashSearch() { char target[MAX_WORD_LENGTH]; printf("请输入要查找的单词: "); if (fgets(target, MAX_WORD_LENGTH, stdin) == NULL) { printf("读取输入失败!\n"); return; } target[strcspn(target, "\n")] = 0; // 移除换行符 searchHash(target); if (!askToContinue()) { exit(0); } } // AVL树相关函数 int getHeight(AVLNode *node) { if (node == NULL) return 0; return node->height; } int getBalance(AVLNode *node) { if (node == NULL) return 0; return getHeight(node->left) - getHeight(node->right); } AVLNode* rightRotate(AVLNode *y) { AVLNode *x = y->left; AVLNode *T2 = x->right; // 执行旋转 x->right = y; y->left = T2; // 更新高度 y->height = max(getHeight(y->left), getHeight(y->right)) + 1; x->height = max(getHeight(x->left), getHeight(x->right)) + 1; // 返回新的根节点 return x; } AVLNode* leftRotate(AVLNode *x) { AVLNode *y = x->right; AVLNode *T2 = y->left; // 执行旋转 y->left = x; x->right = T2; // 更新高度 x->height = max(getHeight(x->left), getHeight(x->right)) + 1; y->height = max(getHeight(y->left), getHeight(y->right)) + 1; // 返回新的根节点 return y; } AVLNode* insertAVL(AVLNode *node, const WordFrequency *data) { // 执行标准的BST插入 if (node == NULL) { AVLNode *newNode = (AVLNode *)malloc(sizeof(AVLNode)); if (newNode == NULL) { printf("内存分配失败!\n"); exit(1); } newNode->data = *data; newNode->left = NULL; newNode->right = NULL; newNode->height = 1; // 新节点的初始高度为1 return newNode; } if (strcmp(data->word, node->data.word) < 0) { node->left = insertAVL(node->left, data); } else if (strcmp(data->word, node->data.word) > 0) { node->right = insertAVL(node->right, data); } else { // 相同的单词不插入 return node; } // 更新当前节点的高度 node->height = 1 + max(getHeight(node->left), getHeight(node->right)); // 获取平衡因子,检查节点是否失衡 int balance = getBalance(node); // 如果失衡,则需要进行旋转操作 // 左左情况 if (balance > 1 && strcmp(data->word, node->left->data.word) < 0) { return rightRotate(node); } // 右右情况 if (balance < -1 && strcmp(data->word, node->right->data.word) > 0) { return leftRotate(node); } // 左右情况 if (balance > 1 && strcmp(data->word, node->left->data.word) > 0) { node->left = leftRotate(node->left); return rightRotate(node); } // 右左情况 if (balance < -1 && strcmp(data->word, node->right->data.word) < 0) { node->right = rightRotate(node->right); return leftRotate(node); } // 返回未修改的节点指针 return node; } void searchAVL(AVLNode *root, const char *target) { if (root == NULL) { printf("未找到单词 \"%s\"\n", target); return; } int cmp = strcmp(target, root->data.word); if (cmp == 0) { printf("单词 \"%s\" 出现次数: %d\n", target, root->data.frequency); } else if (cmp < 0) { searchAVL(root->left, target); } else { searchAVL(root->right, target); } } void freeAVL(AVLNode *root) { if (root == NULL) return; freeAVL(root->left); freeAVL(root->right); free(root); } // 基于平衡二叉树的查找 void avlTreeSearch() { char target[MAX_WORD_LENGTH]; printf("请输入要查找的单词: "); if (fgets(target, MAX_WORD_LENGTH, stdin) == NULL) { printf("读取输入失败!\n"); return; } target[strcspn(target, "\n")] = 0; // 移除换行符 AVLNode *root = NULL; for (int i = 0; i < wordCount; i++) { root = insertAVL(root, &wordList[i]); } searchAVL(root, target); freeAVL(root); if (!askToContinue()) { exit(0); } } // 堆排序相关函数 void heapify(WordFrequency arr[], int n, int i) { int largest = i; // 初始化根节点 int left = 2 * i + 1; // 左子节点 int right = 2 * i + 2; // 右子节点 // 如果左子节点比根节点大,则更新最大节点 if (left < n && arr[left].frequency > arr[largest].frequency) { largest = left; } // 如果右子节点比当前最大节点大,则更新最大节点 if (right < n && arr[right].frequency > arr[largest].frequency) { largest = right; } // 如果最大节点不是根节点,则交换 if (largest != i) { WordFrequency temp = arr[i]; arr[i] = arr[largest]; arr[largest] = temp; // 递归地对受影响的子树进行堆化 heapify(arr, n, largest); } } void heapSortWords(WordFrequency arr[], int n) { // 构建最大堆 for (int i = n / 2 - 1; i >= 0; i--) { heapify(arr, n, i); } // 一个个地从堆中取出元素 for (int i = n - 1; i > 0; i--) { // 将当前根节点(最大值)移到数组末尾 WordFrequency temp = arr[0]; arr[0] = arr[i]; arr[i] = temp; // 在减少的堆上调用max heapify heapify(arr, i, 0); } } // 对单词按词频进行堆排序 void heapSort() { printf("按词频进行堆排序(降序):\n"); // 创建副本进行排序 WordFrequency *sortedWords = (WordFrequency *)malloc(wordCount * sizeof(WordFrequency)); if (sortedWords == NULL) { printf("内存分配失败!\n"); return; } memcpy(sortedWords, wordList, wordCount * sizeof(WordFrequency)); // 执行堆排序 heapSortWords(sortedWords, wordCount); // 输出排序结果(前20个) printf("排序结果(前20个高频词):\n"); for (int i = wordCount - 1; i >= max(0, wordCount - 20); i--) { printf("%d. %s: %d\n", wordCount - i, sortedWords[i].word, sortedWords[i].frequency); } printf("\n总词数: %d\n", wordCount); free(sortedWords); if (!askToContinue()) { exit(0); } } // 对单词按词频进行快速排序 void quickSort() { printf("按词频进行快速排序(降序):\n"); // 创建副本进行排序 WordFrequency *sortedWords = (WordFrequency *)malloc(wordCount * sizeof(WordFrequency)); if (sortedWords == NULL) { printf("内存分配失败!\n"); return; } memcpy(sortedWords, wordList, wordCount * sizeof(WordFrequency)); // 使用标准库的快速排序 qsort(sortedWords, wordCount, sizeof(WordFrequency), compareFrequency); // 输出排序结果(前20个) printf("排序结果(前20个高频词):\n"); for (int i = 0; i < min(20, wordCount); i++) { printf("%d. %s: %d\n", i + 1, sortedWords[i].word, sortedWords[i].frequency); } printf("\n总词数: %d\n", wordCount); free(sortedWords); if (!askToContinue()) { exit(0); } } // 对单词按词频进行希尔排序 void shellSort() { printf("按词频进行希尔排序(降序):\n"); // 创建副本进行排序 WordFrequency *sortedWords = (WordFrequency *)malloc(wordCount * sizeof(WordFrequency)); if (sortedWords == NULL) { printf("内存分配失败!\n"); return; } memcpy(sortedWords, wordList, wordCount * sizeof(WordFrequency)); // 希尔排序 for (int gap = wordCount / 2; gap > 0; gap /= 2) { for (int i = gap; i < wordCount; i++) { WordFrequency temp = sortedWords[i]; int j; for (j = i; j >= gap && sortedWords[j - gap].frequency < temp.frequency; j -= gap) { sortedWords[j] = sortedWords[j - gap]; } sortedWords[j] = temp; } } // 输出排序结果(前20个) printf("排序结果(前20个高频词):\n"); for (int i = 0; i < min(20, wordCount); i++) { printf("%d. %s: %d\n", i + 1, sortedWords[i].word, sortedWords[i].frequency); } printf("\n总词数: %d\n", wordCount); free(sortedWords); if (!askToContinue()) { exit(0); } }修改链相关错误
最新发布
06-27
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值