简单的散列表

     看了书上所讲,似乎hash表原理不复杂,但要设计好的hash表就很不容易了。目前我还没有在实践中用到过hash表,感觉不到hash表的强大,但是看其原理,利用hash表访问速度的确非常快,插入也算很快的了。

     很多讲散列表的资料都会拿字典来做例子,因为查词需要速度,试想几十上百万的词条如果不组织的有规律,用遍历比较去查那样速度肯定快不起来,尽管也是线性 时间复杂度,尽管现在计算机速度很快。用散列组织这些词条的话查起来就是常数时间了。最近很迷茫,也不像做其它事情,写了个简单的散列模拟存储字典,随即 生成上限一定长度的单词,利用一个简单的算法生成单词对应的key,分别利用separate chaining和linear probing来解决冲突(分别用两个数组存储字典),然后遍历其中一个数组,并在另外一个数组中查找相应记录,记录所需时间。

/*
*  @describe
*  implement basic functions of hash table.
*  create a dictionary which consists 500000 random words with length less than 10 by random
*  use Separate Chaining and Open Addressing(linear probing, quadratic probing) and compare their efficiency
*  implement by C
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#define WORDS_COUNT 200000
#define NUM 400000
#define WORD_LENGTH 50
#define LETTERS_NUM 26

/*
*  basic unit of the item involved in this programme
*  id represent the number in hash table of the word and word[10] store the word
*/
typedef struct Item
{
    int id;
    char word[WORD_LENGTH];
} item,*pItem;

/*
* basic linked-list unit used in the Separate Chaining method
*/
typedef struct Word
{
    struct Word *next;
    char word[WORD_LENGTH];
} word, *pWord;

item createWord(void);   // crate a word by random, random seed must be created first
void freeMemory(void);   // free memory
void insert(void);           // insert words to hash_tables
void init(void);             // do initialization
void analyzeHash(void);      // analyze hash table
int calculateId(int, const char *);   // calculate key of hash by a word

/*
*  global variables
*/   
    pItem hash_lp;            // array used by linear probing
//    pItem hash_qp;            // array used by quadratic probing
    pWord hash_sc;            // array used by separate chaining

//----------------------------------------------------------------------------------------------------
int main(int argc, char *argv[])
{
    int time_start, time_finish;

    srand(unsigned(time(NULL)));
time_start = clock();
    init();                 // initialize
time_finish = clock();
printf("init finished...it takes time %dms/n", time_finish-time_start);

time_start = time_finish;
    insert();               // insert random words to hash tables
time_finish = clock();
printf("insert finished...it takes time %dms/n", time_finish-time_start);   

time_start = time_finish;
    analyzeHash();          // analyze hash tables
time_finish = clock();
printf("analyze finished...it takes time %dms/n", time_finish-time_start);
    freeMemory();           // free memory

    return 0;
}

/*
* calculate key of hash by a word
* assume a-z is represented by 1-26, and calculate the number of the word in hash table
* take word "aaa" for example: id = (1*26*26 + 1*26 + 1*1)%NUM
*/
int calculateId(int length, const char *word)
{
    int i = 0, j = 0, result_id = 0;

    for(i = 0; i < length; i++)
    {
        int num = word[i] - 'a' + 1;                // create a number between 1-26

        int tmp_id = num;                       // store num*26*[length-i-1 times]%COUNT
        for(j = 1; j <= length - i - 1; j++)    // num*26*[length-i-1 times]
        {
            int tmp = tmp_id * LETTERS_NUM;
            tmp_id = (tmp < NUM ? tmp : (tmp%NUM));
        }

        result_id += tmp_id;                  // calculate id
        if(result_id >= NUM)
        {
            result_id %= NUM;
        }
    }
    return result_id;
}

/*
*  initialize hash tables
*/
void init(void)
{
    int i = 0;
    item null_item;
    word null_word;
    null_item.id = -1;
    null_item.word[0] = '/0';
    null_word.next = NULL;
    null_word.word[0] = '/0';

    if(!(hash_lp=(pItem)malloc(NUM*sizeof(item))))
    {
        printf("not enough memory");
        exit(0);
    }

    if(!(hash_sc=(pWord)malloc(NUM*sizeof(word))))
    {
        printf("not enough memory");
        exit(0);
    }

    // initialize hash tables with NULL
    for(i = 0; i < NUM; i++)
    {
        hash_lp[i] = null_item;
        hash_sc[i] = null_word;
    }
}

/*
* function: crate a word by random with lower-case letters only and with maxmium length of 10
* assume a-z is represented by 1-26, and calculate the number of the word in hash table
* take word "aaa" for example: id = (1*26*26 + 1*26 + 1*1)%NUM
*/
item createWord(void)
{
    item new_item;
    int length = 0;
    int i = 0;
    int j = 0;

    new_item.id = 0;
    new_item.word[0] = '/0';

    length = rand()%WORD_LENGTH + 1;

    /*
    * create word by random
    */
    for(i = 0; i < length; i++)
    {
        int num = rand()%LETTERS_NUM + 1;                // create a number between 1-26
        new_item.word[i] = (num - 1 + 'a');     // create a character by random
    }
    new_item.word[i] = '/0';
   
    new_item.id = calculateId(length, new_item.word);

    return new_item;
}

/*
*  function: insert words to hash tables
*/
void insert(void)
{
    /*
        create words and insert them to three kinds of hash table respectively
    */
    int i = 0;
    for(i = 0; i < WORDS_COUNT; i++)
    {
        item new_item = createWord();
        int id = new_item.id;
        // insert into hash_lp
        if(id >= NUM/2) 
        {
            while(hash_lp[id].id != -1) { id--; }      // search empty place
        }
        else
        {
            while(hash_lp[id].id != -1) { id++; }      // search empty place
        }

        hash_lp[id].id = new_item.id;
        strcpy(hash_lp[id].word, new_item.word);

        // insert into hash_sc
        id = new_item.id;
        if(hash_sc[id].word[0] == '/0')
        {
            // if the place is empty
            hash_sc[id].next = NULL;
            strcpy(hash_sc[id].word, new_item.word);
        }
        else
        {
            // the place is not empty

            // create a new word by item
            pWord pNew_word = (pWord)malloc(sizeof(word));
            if(!pNew_word)
            {
                printf("not enough memory");
                exit(0);
            }
            strcpy(pNew_word->word, new_item.word);
            pNew_word->next = hash_sc[id].next;
            hash_sc[id].next = pNew_word;
        }
    }
}

/*
*   function: analyze the basic functions of hash table
*   A. iterate hash_lp, and find each word in hash_sc, calculate the total search time and vice versa
*/
void analyzeHash(void)
{
    int i = 0, id = 0;
    int find_count = 0;
   
    /*
    *  iterate hash_lp, access in hash_sc
    */
    for(i = 0; i < NUM; i++)
    {
        item tmp_item = hash_lp[i];

        if(tmp_item.id != -1)
        {
            id = calculateId(strlen(tmp_item.word), tmp_item.word);
            pWord tmp_word = &hash_sc[id];
            if(tmp_word->word[0] == '/0')
            {

                printf("can't find error");
            }
            else
            {
                while((tmp_word != NULL) && (strcmp(tmp_item.word, tmp_word->word)))
                {
                    tmp_word = tmp_word->next;
                }
                if(tmp_word == NULL)
                {
                    // can't find
                    printf("can't find error");
                }
                else
                {
                    find_count++;
                //    printf("find %s, id=%d/n", tmp_word->word, i);
                }
            }
        }
    }
    printf("iterate hash_lp, access in hash_sc, find_count =%d/n", find_count);
}


/*
* function: free memory after the programe is over
*/
void freeMemory(void)
{
    free(hash_lp);
    for(int i = 0; i < NUM; i++)
    {
        if(hash_sc[i].next != NULL)
        {
            /*
            *  if the place has linked objects, the memory of them should be freed one by one
            *  perhaps use C++ class and with desconstructor function it will be much more convenient
            */
            pWord pCurrent_word = (&hash_sc[i])->next;
            while(pCurrent_word != NULL)
            {
                pWord tmp = pCurrent_word;
                pCurrent_word = pCurrent_word->next;
                free(tmp);
            }           
        }
    }
    free(hash_sc);
}
   
   
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值