algorithm week6

最新推荐文章于 2017-10-09 13:09:56 发布

原创最新推荐文章于 2017-10-09 13:09:56 发布 · 1.1k 阅读

0 ·

CC 4.0 BY-SA版权

本文解析了两个经典的算法题目：一是寻找特定区间内数组中能组成特定目标值的不重复整数对的数量；二是利用堆实现动态维护中位数的问题。通过实际代码示例，展示了如何使用哈希表和堆数据结构来高效解决这些问题。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

Question 1

Download the text file here. (Right click and save link as).

The goal of this problem is to implement a variant of the 2-SUM algorithm (covered in the Week 6 lecture on hash table applications).

The file contains 1 million integers, both positive and negative (there might be some repetitions!).This is your array of integers, with the ith row of the file specifying the ith entry of the array.

Your task is to compute the number of target values t in the interval [-10000,10000] (inclusive) such that there are distinct numbers x,y in the input file that satisfy x+y=t . (NOTE: ensuring distinctness requires a one-line addition to the algorithm from lecture.)

Write your numeric answer (an integer between 0 and 20001) in the space provided.

OPTIONAL CHALLENGE: If this problem is too easy for you, try implementing your own hash table for it. For example, you could compare performance under the chaining and open addressing approaches to resolving collisions.

Question 2

Download the text file here.

The goal of this problem is to implement the "Median Maintenance" algorithm (covered in the Week 5 lecture on heap applications). The text file contains a list of the integers from 1 to 10000 in unsorted order; you should treat this as a stream of numbers, arriving one by one. Letting

xi denote the

i th number of the file, the

k th median

mk is defined as the median of the numbers

x1,…,xk . (So, if

k is odd, then

mk is

((k+1)/2) th smallest number among

x1,…,xk ; if

k is even, then

mk is the

(k/2) th smallest number among

x1,…,xk .)

In the box below you should type the sum of these 10000 medians, modulo 10000 (i.e., only the last 4 digits). That is, you should compute

(m1+m2+m3+⋯+m10000)mod10000 .

OPTIONAL EXERCISE: Compare the performance achieved by heap-based and search-tree-based implementations of the algorithm.

#include <iostream>
#include <fstream>
#define MIN 10000
using namespace std;

int hash2[MIN + 1] = { 0 };
int count = 0;

void readData()
{
ifstream fin("2sum.txt");
int temp = 0;
while(fin>>temp)
{
if(temp < MIN)
hash2[temp]++;
}
}

bool hashMap(int n)
{
if(n > MIN)
return false;
if(hash2[n])
return true;
else
return false;
}

int main()
{
readData();

for(int i = -10000; i <= 10000; i++)
{
for(int j = -10000; j <= 10000; j++)
{
if(hashMap(j) && hashMap(i - j))
{
count++;
break;
}
}
}
cout<<count<<endl;
return 0;
}

#include <stdio.h>
#include <time.h>

#define MAX 10000

struct heap
{
int size;
int data[MAX];
};

struct heap min_heap, max_heap;

long long count = 0;

void init();

void minheap_insert(int, struct heap *);

int extract_min(struct heap *);

void maxheap_insert(int, struct heap *);
int extract_max(struct heap *);

void swap(int *, int *);

int parent(int);

int smallest_child(int, struct heap *);

int biggest_child(int, struct heap *);

void display(struct heap *);

void read_data(char *);

int main(void)
{
init();
read_data("D:\\Median.txt");
printf("The count mod 10000 is : %d\n", count % 10000);
return 0;
}

void init()
{
min_heap.size = 0;
max_heap.size = 0;
int x = 0;
for(x = 0; x < MAX; x++)
{
*(min_heap.data + x) = 0;
*(max_heap.data + x) = 0;
}
}

void minheap_insert(int label, struct heap * temp)
{
temp->size += 1;
int i = temp->size;
*(temp->data + i - 1) = label;

int *parent_i_th, *i_th;
i_th = temp->data + i - 1;
parent_i_th = temp->data + parent(i) - 1;
while(*parent_i_th > *i_th)
{
swap(parent_i_th, i_th);
i = parent(i);
i_th = temp->data + i - 1;
parent_i_th = temp->data + parent(i) - 1;
}
}

void maxheap_insert(int label, struct heap * temp)
{

temp->size += 1;
int i = temp->size;
*(temp->data + i - 1) = label;
int *parent_i_th, *i_th;
i_th = temp->data + i - 1;
parent_i_th = temp->data + parent(i) - 1;

while(*parent_i_th < *i_th)
{
swap(parent_i_th, i_th);
i = parent(i);
i_th = temp->data + i - 1;
parent_i_th = temp->data + parent(i) - 1;
}
if(label == 1260)
{
printf("1260 Test: ");
display(temp);
}
}

int extract_min(struct heap * temp)
{
if(temp->size == 0)
{
printf("No more element!\n");
return -1;
}
int result = *(temp->data);
*(temp->data) = *(temp->data + temp->size - 1);
temp->size -= 1;
int i = 1;
int *i_th, *s_child;
i_th = temp->data + i - 1;
s_child = temp->data + smallest_child(i, temp) - 1;
while(*i_th > *s_child)
{
// Attention, Need caculate smallest_child before swap.
i = smallest_child(i, temp);
swap(i_th, s_child);
i_th = temp->data + i - 1;
s_child = temp->data + smallest_child(i, temp) - 1;
}
return result;
}

int extract_max(struct heap * temp)
{
if(temp->size == 0)
{
printf("No more element!\n");
return -1;
}
int result = *(temp->data);
*(temp->data) = *(temp->data + temp->size - 1);
temp->size -= 1;
int i = 1;
int *i_th, *b_child;
i_th = temp->data + i - 1;
b_child = temp->data + biggest_child(i, temp) - 1;
while(*i_th < *b_child)
{
// Attention, Need to caculate biggest_child before swap.
i = biggest_child(i, temp);
swap(i_th, b_child);
i_th = temp->data + i - 1;
b_child = temp->data + biggest_child(i, temp) - 1;

}

return result;
}

void swap(int * label1, int * label2)
{
int temp = *label2;
*label2 = *label1;
*label1 = temp;
}

int parent(int label)
{
if(label == 1)
return 1;
else
return label/2;
}

int smallest_child(int label, struct heap * temp)
{
int left_child = label * 2;
int right_child = label * 2 + 1;
if(left_child > temp->size)
return label;
else if(left_child == temp->size)
return left_child;
else
{
if(*(temp->data + left_child - 1) <= *(temp->data + right_child - 1))
return left_child;
else
return right_child;
}
}

int biggest_child(int label, struct heap * temp)
{
int left_child = label * 2;
int right_child = label * 2 + 1;
if(left_child > temp->size)
return label;
else if(left_child == temp->size)
return left_child;
else
{
if(*(temp->data + left_child - 1) > *(temp->data + right_child - 1))
return left_child;
else
return right_child;
}
}

void display(struct heap * temp)
{
int x = 0;
for(x = 0; x < temp->size; x++)
printf("%d-> ", *(temp->data + x));
printf("\n");
}

void read_data(char * file)
{
FILE *fp = fopen(file, "rb");
int label;
FILE *wp = fopen("D:\\test.csv", "wb");
int i = 0;
while(!feof(fp))
{
// Please don't forget the space after %d.
fscanf(fp, "%d ", &label);
//printf("Label : %d \n", label);

if(max_heap.size == 0)
maxheap_insert(label, &max_heap);
else
{
if(label <= *(max_heap.data))
maxheap_insert(label, &max_heap);
else
minheap_insert(label, &min_heap);

/*
printf("1Low:%d\n", max_heap.size);
display(&max_heap);
printf("1High:%d\n", min_heap.size);
display(&min_heap);
*/

if(max_heap.size - min_heap.size > 1)
{
label = extract_max(&max_heap);
minheap_insert(label, &min_heap);
}
if(max_heap.size < min_heap.size)
{
label = extract_min(&min_heap);
maxheap_insert(label, &max_heap);
}
}
/*
printf("Low:%d\n", max_heap.size);
display(&max_heap);
printf("High:%d\n", min_heap.size);
display(&min_heap);
*/
int temp;
temp = *(max_heap.data);
count += temp;
fprintf(wp, "%d\n", temp);
printf("%d th Median: %d \n", ++i, temp);

}
fclose(wp);
fclose(fp);
}