2014-04-02 抽取最短英文描述（阿里巴巴面试题有bug不是最优）

最新推荐文章于 2024-09-09 09:44:12 发布

原创最新推荐文章于 2024-09-09 09:44:12 发布 · 744 阅读

0 ·

CC 4.0 BY-SA版权

本文介绍了一种从产品描述中提取包含特定关键词的最短子串作为产品简介的方法。通过定位关键词首次和最后一次出现的位置，并逐步调整边界来缩小范围。

题目：

Alibaba笔试题：给定一段产品的英文描述，包含M个英文单词，每个英文单词以空格分隔，无其他标点符号；再给定N个英文单词关键字，请说明思路并编程实现方法

char* extractSummary(char[] description,char[] *key words)

目标是找出此产品描述中包含N个关键字（每个关键词至少出现一次）的长度最短的子串，作为产品简介输出。（不限编程语言）20分。

思路：

首先找出这N个单词在该M个单词出现的最开始出现的单词X，和最后出现的单词Y，然后分别去掉单词X,再次判别单词是否在X,Y之间，若在，后移找到新的单词X,一次操作；对于Y，可以进行同样的操作，依次前移，得到最后的最短字串单词为止。

程序实现：

#include<stdio.h>
#include<stdlib.h>
#include<string.h>

char *left, *right;
char character[1024];
char **keywords;
int number;

char * str_reverse_str(char *word)
{
	char *p = NULL;
	char *temp = NULL;
	char *count = word;
	char *characters = character;

	while((*characters != '\0') && (p = strstr(characters, word))){
		temp = p;
		while(count && *count != '\0'){
			p++;
			count++;
		}
		characters = p;
		count = word;
	}
	
	return temp;
}

int is_key_word(char *words)
{
	int i;
	for(i = 0; i < number; i++){		//obtain the first and last
		if(strcmp(words, keywords[i]) == 0)
			return 1;
	}
	return 0;
}
	
void obtain_first_word(char *first)
{
	left = first;

	char p[30];	//obtain the word
	char *s = p;
	*s++ = ' ';
	first++;
	while(first  && *first != ' '){
		*s++ = *first++;
	}
	*s++ = ' ';
	*s = '\0';
	
	if(strstr(first, p)){
		obtain_first_word(first);
	}else if(!is_key_word(p)){
		obtain_first_word(first);
	} 
}

void obtain_last_word(char *last)
{
	right = last;
	char p[30];
	char *s = p;
	*s++ = ' '; 			
	last--;
	last--;
	char *p2 = last;
	while(p2  && *p2 != ' '){
		s++;
		p2--;
	}
	*s++ =' ';
	*s = '\0';
	s--;
	s--;
	while(last  && *last != ' '){
		*s-- = *last--;
	}
	char *temp = last;
	temp++;
	char c;
	if(temp){
		c = *temp;
		*temp = '\0';
	}
	if(strstr(left, p)){
		obtain_last_word(temp);
	} else if(!is_key_word(p)){
		obtain_last_word(temp);
	} else {
		*temp = c;
	}
}

char * extract_summary()
{
	int i;
	char *first = NULL, *last = NULL, *p ;

	for(i = 0; i < number; i++){		//obtain the first and last
		if((p = strstr(character, keywords[i]))){
			if(!first ||first > p)
				first = p;
		}

		if((p = str_reverse_str(keywords[i]))){
			if(!last ||last < p)
				last = p;
			//printf("%d\n", 101);
		}
	}	

	last++;
	while(last && *last != ' '){
		last++;
	}
	*last++ = ' ';	
	*last = '\0';
	obtain_first_word(first);
	obtain_last_word(last);
	printf("\nthe minest character as follows:\n%s", ++left);
}

int main(void)
{
	char *s = character;			//set the the side of character to ' '
	*s++ = ' ';
	printf("Please input the character sequence as follows:\n");
	gets(s);
	while(s && *s != '\0'){
		s++;
	}
	*s++ = ' ';
	*s = '\0';
	
	printf("Please input  the number of keywords:\n");
	scanf("%d", &number);
	printf("Please input  the  keyword as follows:\n");
	keywords = (char **)malloc(sizeof(char *) * number);	
	int i;
	for(i = 0; i < number; i++){
		keywords[i] = (char *)malloc(sizeof(char) * 30);
		s = keywords[i];		//set the the side of character to ' '
		*s++ = ' '; 
		scanf("%s", s);
		while(s && *s != '\0'){
			s++;
		}
		*s++ = ' ';
		*s ='\0';
	}

	extract_summary();
	
	for(i = 0; i < number; i++){
		free(keywords[i]);
	}
	free(keywords);
	
	return 0;
}