#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAX_WORDS 10000 // 最大单词数量
#define MAX_WORD_LENGTH 50 // 最大单词长度
#define HASH_TABLE_SIZE 1009 // 哈希表大小
// 单词频率结构
typedef struct {
char word[MAX_WORD_LENGTH];
int frequency;
} WordFrequency;
//链表结构
typedef struct ListNode {
WordFrequency data;
struct ListNode *next;
} ListNode;
// 二叉树节点
typedef struct TreeNode {
WordFrequency data;
struct TreeNode *left;
struct TreeNode *right;
} TreeNode;
// 哈希表节点(链地址法)
typedef struct HashNode {
WordFrequency data;
struct HashNode *next;
} HashNode;
// AVL树节点
typedef struct AVLNode {
WordFrequency data;
struct AVLNode *left;
struct AVLNode *right;
int height;
} AVLNode;
// 全局变量
WordFrequency wordList[MAX_WORDS];
ListNode *wordListHead = NULL;
int wordCount = 0;
HashNode *hashTable[HASH_TABLE_SIZE];
// 插入单词到单词列表
int insertWord(const char *word) {
for (int i = 0; i < wordCount; i++) {
if (strcmp(wordList[i].word, word) == 0) {
wordList[i].frequency++;
return i;
}
}
if (wordCount < MAX_WORDS) {
strcpy(wordList[wordCount].word, word);
wordList[wordCount].frequency = 1;
wordCount++;
return wordCount - 1;
}
printf("错误: 单词列表已满!\n");
return -1;
}
// 添加缺失的max函数
int max(int a, int b) {
return (a > b) ? a : b;
}
// 添加缺失的min函数
int min(int a, int b) {
return (a < b) ? a : b;
}
// 比较函数 - 按单词字典序
int compareWords(const void *a, const void *b) {
return strcmp(((WordFrequency *)a)->word, ((WordFrequency *)b)->word);
}
// 比较函数 - 按频率
int compareFrequency(const void *a, const void *b) {
return ((WordFrequency *)b)->frequency - ((WordFrequency *)a)->frequency; // 降序排列
}
// 询问是否继续
int askToContinue() {
int choice;
printf("\n1. 返回主菜单\n");
printf("2. 退出程序\n");
printf("请选择: ");
scanf("%d",&choice);
return choice == 1;
}
// 插入单词到链表(新增函数)
void insertWordToList(const char *word) {
// 检查单词是否已存在
ListNode *current = wordListHead;
while (current != NULL) {
if (strcmp(current->data.word, word) == 0) {
current->data.frequency++;
return;
}
current = current->next;
}
// 创建新节点
ListNode *newNode = (ListNode *)malloc(sizeof(ListNode));
if (newNode == NULL) {
printf("内存分配失败!\n");
return;
}
// 初始化节点数据
strncpy(newNode->data.word, word, MAX_WORD_LENGTH - 1);
newNode->data.word[MAX_WORD_LENGTH - 1] = '\0';
newNode->data.frequency = 1;
newNode->next = NULL;
// 插入到链表头部
if (wordListHead == NULL) {
wordListHead = newNode;
} else {
newNode->next = wordListHead;
wordListHead = newNode;
}
wordCount++;
}
// 函数声明
void menu();
void sequentialSearch();
void binarySearch();
void treeSearch();
void hashTableSearch();
void chainHashSearch();
void avlTreeSearch();
void heapSort();
void quickSort();
void shellSort();
TreeNode* insertBST(TreeNode *root, const WordFrequency *data);
void searchBST(TreeNode *root, const char *target);
void freeBST(TreeNode *root);
void initHashTable();
unsigned long hashFunc(const char *word);
void insertHash(const WordFrequency *data);
void searchHash(const char *target);
void freeHashTable();
HashNode* createHashNode(const WordFrequency *data);
AVLNode* insertAVL(AVLNode *node, const WordFrequency *data);
int getHeight(AVLNode *node);
int getBalance(AVLNode *node);
AVLNode* rightRotate(AVLNode *y);
AVLNode* leftRotate(AVLNode *x);
void searchAVL(AVLNode *root, const char *target);
void freeAVL(AVLNode *root);
void heapify(WordFrequency arr[], int n, int i);
void heapSortWords(WordFrequency arr[], int n);
// 主函数
int main() {
// 初始化哈希表
initHashTable();
// 从文件读取单词
FILE *file = fopen("C:/Users/English.txt", "r");
if (file == NULL) {
printf("无法打开文件 English.txt!\n");
printf("请确保文件存在并且可读。\n");
return 1;
}
char word[MAX_WORD_LENGTH];
// 读取文件内容并统计词频
while (fscanf(file, "%s", word) != EOF) {
// 清理单词,只保留字母并转为小写
int j = 0;
for (int i = 0; word[i] != '\0'; i++) {
if (isalpha(word[i])) {
word[j++] = tolower(word[i]);
}
}
word[j] = '\0';
if (j > 0) {
insertWord(word);
}
}
fclose(file);
printf("成功读取并处理了 %d 个单词\n\n", wordCount);
// 初始化链表结构
for (int i = 0; i < wordCount; i++) {
insertWordToList(wordList[i].word);
}
// 初始化哈希表数据
for (int i = 0; i < wordCount; i++) {
insertHash(&wordList[i]);
}
// 主循环
int choice;
do {
menu();
printf("请输入选择 (1-10): ");
if (scanf("%d", &choice) != 1) {
printf("输入无效,请输入数字!\n");
while (getchar() != '\n'); // 清除输入缓冲区
choice = 0; // 重置选择
continue;
}
while (getchar() != '\n'); // 清除输入缓冲区
switch (choice) {
case 1: sequentialSearch(); break;
case 2: binarySearch(); break;
case 3: treeSearch(); break;
case 4: hashTableSearch(); break;
case 5: chainHashSearch(); break;
case 6: avlTreeSearch(); break;
case 7: heapSort(); break;
case 8: quickSort(); break;
case 9: shellSort(); break;
case 10: printf("退出系统...\n"); break;
default: printf("无效选择,请重试!\n");
}
} while (choice != 10);
// 释放资源
freeHashTable();
return 0;
}
// 菜单函数
void menu() {
printf("\n");
printf("********************************************************\n");
printf("* 英语单词词频统计和检索系统 *\n");
printf("********************************************************\n");
printf("* 1. 基于链表的顺序查找 *\n");
printf("* 2. 基于顺序表的折半查找 *\n");
printf("* 3. 基于二叉树的查找 *\n");
printf("* 4. 基于开放地址法的散列查找 *\n");
printf("* 5. 基于链地址法的散列查找 *\n");
printf("* 6. 基于平衡二叉树的查找 *\n");
printf("* 7. 对单词按词频进行堆排序 *\n");
printf("* 8. 对单词按词频进行快速排序 *\n");
printf("* 9. 对单词按词频进行希尔排序 *\n");
printf("* 10. 退出系统 *\n");
printf("********************************************************\n");
}
// 基于链表的顺序查找
void sequentialSearch() {
char target[MAX_WORD_LENGTH];
printf("请输入要查找的单词: ");
if (fgets(target, MAX_WORD_LENGTH, stdin) == NULL) {
printf("读取输入失败!\n");
return;
}
target[strcspn(target, "\n")] = '\0'; // 移除换行符
// 遍历链表查找
ListNode *current = wordListHead;
int found = 0;
while (current != NULL) {
if (strcmp(current->data.word, target) == 0) {
printf("单词 \"%s\" 出现次数: %d\n", target, current->data.frequency);
found = 1;
break;
}
current = current->next;
}
if (!found) {
printf("未找到单词 \"%s\"\n", target);
}
if (!askToContinue()) {
exit(0);
}
}
// 基于顺序表的折半查找
void binarySearch() {
char target[MAX_WORD_LENGTH];
printf("请输入要查找的单词: ");
if (fgets(target, MAX_WORD_LENGTH, stdin) == NULL) {
printf("读取输入失败!\n");
return;
}
target[strcspn(target, "\n")] = 0; // 移除换行符
// 先排序
WordFrequency *sortedWords = (WordFrequency *)malloc(wordCount * sizeof(WordFrequency));
if (sortedWords == NULL) {
printf("内存分配失败!\n");
return;
}
memcpy(sortedWords, wordList, wordCount * sizeof(WordFrequency));
qsort(sortedWords, wordCount, sizeof(WordFrequency), compareWords);
int left = 0, right = wordCount - 1;
int found = 0;
while (left <= right) {
int mid = left + (right - left) / 2;
int cmp = strcmp(sortedWords[mid].word, target);
if (cmp == 0) {
printf("单词 \"%s\" 出现次数: %d\n", target, sortedWords[mid].frequency);
found = 1;
break;
} else if (cmp < 0) {
left = mid + 1;
} else {
right = mid - 1;
}
}
if (!found) {
printf("未找到单词 \"%s\"\n", target);
}
free(sortedWords);
if (!askToContinue()) {
exit(0);
}
}
// 基于二叉树的查找
void treeSearch() {
char target[MAX_WORD_LENGTH];
printf("请输入要查找的单词: ");
if (fgets(target, MAX_WORD_LENGTH, stdin) == NULL) {
printf("读取输入失败!\n");
return;
}
target[strcspn(target, "\n")] = 0; // 移除换行符
TreeNode *root = NULL;
for (int i = 0; i < wordCount; i++) {
root = insertBST(root, &wordList[i]);
}
searchBST(root, target);
freeBST(root);
if (!askToContinue()) {
exit(0);
}
}
// 插入BST
TreeNode* insertBST(TreeNode *root, const WordFrequency *data) {
if (root == NULL) {
root = (TreeNode *)malloc(sizeof(TreeNode));
if (root == NULL) {
printf("内存分配失败!\n");
exit(1);
}
root->data = *data;
root->left = NULL;
root->right = NULL;
return root;
}
if (strcmp(data->word, root->data.word) < 0) {
root->left = insertBST(root->left, data);
} else if (strcmp(data->word, root->data.word) > 0) {
root->right = insertBST(root->right, data);
}
return root;
}
// 搜索BST
void searchBST(TreeNode *root, const char *target) {
if (root == NULL) {
printf("未找到单词 \"%s\"\n", target);
return;
}
int cmp = strcmp(target, root->data.word);
if (cmp == 0) {
printf("单词 \"%s\" 出现次数: %d\n", target, root->data.frequency);
} else if (cmp < 0) {
searchBST(root->left, target);
} else {
searchBST(root->right, target);
}
}
// 释放BST内存
void freeBST(TreeNode *root) {
if (root == NULL) return;
freeBST(root->left);
freeBST(root->right);
free(root);
}
// 初始化哈希表
void initHashTable() {
for (int i = 0; i < HASH_TABLE_SIZE; i++) {
hashTable[i] = NULL;
}
}
// 哈希函数
unsigned long hashFunc(const char *word) {
unsigned long hash = 5381;
int c;
while ((c = *word++)) {
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
}
return hash % HASH_TABLE_SIZE;
}
// 创建哈希节点
HashNode* createHashNode(const WordFrequency *data) {
HashNode *node = (HashNode *)malloc(sizeof(HashNode));
if (node == NULL) {
printf("内存分配失败!\n");
exit(1);
}
node->data = *data;
node->next = NULL;
return node;
}
// 插入哈希表(链地址法)
void insertHash(const WordFrequency *data) {
unsigned long index = hashFunc(data->word);
HashNode *newNode = createHashNode(data);
// 插入到链表头部
newNode->next = hashTable[index];
hashTable[index] = newNode;
}
// 搜索哈希表
void searchHash(const char *target) {
unsigned long index = hashFunc(target);
HashNode *current = hashTable[index];
while (current != NULL) {
if (strcmp(current->data.word, target) == 0) {
printf("单词 \"%s\" 出现次数: %d\n", target, current->data.frequency);
return;
}
current = current->next;
}
printf("未找到单词 \"%s\"\n", target);
}
// 释放哈希表内存
void freeHashTable() {
for (int i = 0; i < HASH_TABLE_SIZE; i++) {
HashNode *current = hashTable[i];
while (current != NULL) {
HashNode *temp = current;
current = current->next;
free(temp);
}
hashTable[i] = NULL;
}
}
// 基于开放地址法的散列查找
void hashTableSearch() {
printf("基于开放地址法的散列查找功能尚未实现。\n");
if (!askToContinue()) {
exit(0);
}
}
// 基于链地址法的散列查找
void chainHashSearch() {
char target[MAX_WORD_LENGTH];
printf("请输入要查找的单词: ");
if (fgets(target, MAX_WORD_LENGTH, stdin) == NULL) {
printf("读取输入失败!\n");
return;
}
target[strcspn(target, "\n")] = 0; // 移除换行符
searchHash(target);
if (!askToContinue()) {
exit(0);
}
}
// AVL树相关函数
int getHeight(AVLNode *node) {
if (node == NULL) return 0;
return node->height;
}
int getBalance(AVLNode *node) {
if (node == NULL) return 0;
return getHeight(node->left) - getHeight(node->right);
}
AVLNode* rightRotate(AVLNode *y) {
AVLNode *x = y->left;
AVLNode *T2 = x->right;
// 执行旋转
x->right = y;
y->left = T2;
// 更新高度
y->height = max(getHeight(y->left), getHeight(y->right)) + 1;
x->height = max(getHeight(x->left), getHeight(x->right)) + 1;
// 返回新的根节点
return x;
}
AVLNode* leftRotate(AVLNode *x) {
AVLNode *y = x->right;
AVLNode *T2 = y->left;
// 执行旋转
y->left = x;
x->right = T2;
// 更新高度
x->height = max(getHeight(x->left), getHeight(x->right)) + 1;
y->height = max(getHeight(y->left), getHeight(y->right)) + 1;
// 返回新的根节点
return y;
}
AVLNode* insertAVL(AVLNode *node, const WordFrequency *data) {
// 执行标准的BST插入
if (node == NULL) {
AVLNode *newNode = (AVLNode *)malloc(sizeof(AVLNode));
if (newNode == NULL) {
printf("内存分配失败!\n");
exit(1);
}
newNode->data = *data;
newNode->left = NULL;
newNode->right = NULL;
newNode->height = 1; // 新节点的初始高度为1
return newNode;
}
if (strcmp(data->word, node->data.word) < 0) {
node->left = insertAVL(node->left, data);
} else if (strcmp(data->word, node->data.word) > 0) {
node->right = insertAVL(node->right, data);
} else {
// 相同的单词不插入
return node;
}
// 更新当前节点的高度
node->height = 1 + max(getHeight(node->left), getHeight(node->right));
// 获取平衡因子,检查节点是否失衡
int balance = getBalance(node);
// 如果失衡,则需要进行旋转操作
// 左左情况
if (balance > 1 && strcmp(data->word, node->left->data.word) < 0) {
return rightRotate(node);
}
// 右右情况
if (balance < -1 && strcmp(data->word, node->right->data.word) > 0) {
return leftRotate(node);
}
// 左右情况
if (balance > 1 && strcmp(data->word, node->left->data.word) > 0) {
node->left = leftRotate(node->left);
return rightRotate(node);
}
// 右左情况
if (balance < -1 && strcmp(data->word, node->right->data.word) < 0) {
node->right = rightRotate(node->right);
return leftRotate(node);
}
// 返回未修改的节点指针
return node;
}
void searchAVL(AVLNode *root, const char *target) {
if (root == NULL) {
printf("未找到单词 \"%s\"\n", target);
return;
}
int cmp = strcmp(target, root->data.word);
if (cmp == 0) {
printf("单词 \"%s\" 出现次数: %d\n", target, root->data.frequency);
} else if (cmp < 0) {
searchAVL(root->left, target);
} else {
searchAVL(root->right, target);
}
}
void freeAVL(AVLNode *root) {
if (root == NULL) return;
freeAVL(root->left);
freeAVL(root->right);
free(root);
}
// 基于平衡二叉树的查找
void avlTreeSearch() {
char target[MAX_WORD_LENGTH];
printf("请输入要查找的单词: ");
if (fgets(target, MAX_WORD_LENGTH, stdin) == NULL) {
printf("读取输入失败!\n");
return;
}
target[strcspn(target, "\n")] = 0; // 移除换行符
AVLNode *root = NULL;
for (int i = 0; i < wordCount; i++) {
root = insertAVL(root, &wordList[i]);
}
searchAVL(root, target);
freeAVL(root);
if (!askToContinue()) {
exit(0);
}
}
// 堆排序相关函数
void heapify(WordFrequency arr[], int n, int i) {
int largest = i; // 初始化根节点
int left = 2 * i + 1; // 左子节点
int right = 2 * i + 2; // 右子节点
// 如果左子节点比根节点大,则更新最大节点
if (left < n && arr[left].frequency > arr[largest].frequency) {
largest = left;
}
// 如果右子节点比当前最大节点大,则更新最大节点
if (right < n && arr[right].frequency > arr[largest].frequency) {
largest = right;
}
// 如果最大节点不是根节点,则交换
if (largest != i) {
WordFrequency temp = arr[i];
arr[i] = arr[largest];
arr[largest] = temp;
// 递归地对受影响的子树进行堆化
heapify(arr, n, largest);
}
}
void heapSortWords(WordFrequency arr[], int n) {
// 构建最大堆
for (int i = n / 2 - 1; i >= 0; i--) {
heapify(arr, n, i);
}
// 一个个地从堆中取出元素
for (int i = n - 1; i > 0; i--) {
// 将当前根节点(最大值)移到数组末尾
WordFrequency temp = arr[0];
arr[0] = arr[i];
arr[i] = temp;
// 在减少的堆上调用max heapify
heapify(arr, i, 0);
}
}
// 对单词按词频进行堆排序
void heapSort() {
printf("按词频进行堆排序(降序):\n");
// 创建副本进行排序
WordFrequency *sortedWords = (WordFrequency *)malloc(wordCount * sizeof(WordFrequency));
if (sortedWords == NULL) {
printf("内存分配失败!\n");
return;
}
memcpy(sortedWords, wordList, wordCount * sizeof(WordFrequency));
// 执行堆排序
heapSortWords(sortedWords, wordCount);
// 输出排序结果(前20个)
printf("排序结果(前20个高频词):\n");
for (int i = wordCount - 1; i >= max(0, wordCount - 20); i--) {
printf("%d. %s: %d\n", wordCount - i, sortedWords[i].word, sortedWords[i].frequency);
}
printf("\n总词数: %d\n", wordCount);
free(sortedWords);
if (!askToContinue()) {
exit(0);
}
}
// 对单词按词频进行快速排序
void quickSort() {
printf("按词频进行快速排序(降序):\n");
// 创建副本进行排序
WordFrequency *sortedWords = (WordFrequency *)malloc(wordCount * sizeof(WordFrequency));
if (sortedWords == NULL) {
printf("内存分配失败!\n");
return;
}
memcpy(sortedWords, wordList, wordCount * sizeof(WordFrequency));
// 使用标准库的快速排序
qsort(sortedWords, wordCount, sizeof(WordFrequency), compareFrequency);
// 输出排序结果(前20个)
printf("排序结果(前20个高频词):\n");
for (int i = 0; i < min(20, wordCount); i++) {
printf("%d. %s: %d\n", i + 1, sortedWords[i].word, sortedWords[i].frequency);
}
printf("\n总词数: %d\n", wordCount);
free(sortedWords);
if (!askToContinue()) {
exit(0);
}
}
// 对单词按词频进行希尔排序
void shellSort() {
printf("按词频进行希尔排序(降序):\n");
// 创建副本进行排序
WordFrequency *sortedWords = (WordFrequency *)malloc(wordCount * sizeof(WordFrequency));
if (sortedWords == NULL) {
printf("内存分配失败!\n");
return;
}
memcpy(sortedWords, wordList, wordCount * sizeof(WordFrequency));
// 希尔排序
for (int gap = wordCount / 2; gap > 0; gap /= 2) {
for (int i = gap; i < wordCount; i++) {
WordFrequency temp = sortedWords[i];
int j;
for (j = i; j >= gap && sortedWords[j - gap].frequency < temp.frequency; j -= gap) {
sortedWords[j] = sortedWords[j - gap];
}
sortedWords[j] = temp;
}
}
// 输出排序结果(前20个)
printf("排序结果(前20个高频词):\n");
for (int i = 0; i < min(20, wordCount); i++) {
printf("%d. %s: %d\n", i + 1, sortedWords[i].word, sortedWords[i].frequency);
}
printf("\n总词数: %d\n", wordCount);
free(sortedWords);
if (!askToContinue()) {
exit(0);
}
}修改链表相关错误
最新发布