LeetCode Word Ladder

本文探讨了在不使用unordered_set的情况下寻找两个单词间最短转换序列的问题,并对比了Floyd、Dijkstra算法与广度优先搜索的效率。

终于憋出来了,虽说还不是特别好。没想到一点点的改动竟然是过于不过的差别就是这句:if(temp>1) break;。这题最令我为难的是我不能用unordered_set<string>这个数据结构,原因简单的很,我用的是XP,VS是2008版的,所以操作都是基于vector的,首先就是复制到vector,这题就是找两点之间的最短距离呗,先是试了佛洛依德,后来迪杰斯克拉,后来广度优先,都没通过(要么时间要么空间),看了看别人的都用了unordered_set<string>这个数据结构,而且算法上他们都是由某一个字符串衍生另一个字符串然后判断是否在字典里(我想在判断是否属于字典时unordered_set<string>这个数据结构应该有优化的作用)而我的算法是不衍生字符串,就那当前字符串和字典里的每一个比较判断是否联通(后来就是在这优化了一点居然通过了),其实还是那当前字符串和字典里的每个字符串比较(若是先衍生再查找不也得和每个字符串比较吗)。还有我的程序刚开始就是复制到vector也消耗时间,所以最后提交的结果比别人的慢了4,5倍。下面是通过的程序(注意:不加if(temp>1)break;这句就通不过了):

typedef struct{
	int num;
	int layer;
}node;

int ladderLength05(string start, string end,vector<string> &dict) {//int ladderLength(string start, string end,unordered_set<string> &dictArg) {
	//vector<string> dict(dictArg.begin(),dictArg.end());
	dict.push_back(start);
	dict.push_back(end);
	//sort(dict.begin(),dict.end());
	//unique(dict.begin(),dict.end());
	int lenDict = dict.size();
	int lenStr = 0;
	int ret = 0;
	if(lenDict!=0)
		lenStr = dict[0].length();
	else
		return 0;
	queue<node> que;
	bool *flag = new bool[lenDict];
	for (int i=0;i<lenDict;i++)
		flag[i] = false;
	int layer = 1;
	node nd;
	nd.num = lenDict-2;
	nd.layer = 1;
	que.push(nd);
	flag[lenDict-2] = true;
	int templayer,tempnum;
	node tempnode;
	int temp;
	while(!que.empty())
	{
		tempnode = que.front();
		que.pop();
		templayer = tempnode.layer;
		tempnum = tempnode.num;
		if (tempnum==lenDict-1)
		{
			ret = templayer;
			break;
		}
		for (int i=0;i<lenDict;i++)
		{
			if(flag[i]==true)
				continue;
			temp = 0;
			for (int k=0;k<lenStr;k++)
			{
				if(dict[i][k]!=dict[tempnum][k])
					temp++;
				if(temp>1)
					break;
			}
			if (temp==0||temp==1)
			{
				tempnode.layer = templayer + 1;
				tempnode.num = i;
				que.push(tempnode);
				flag[i] = true;
			}
		}
	}
	delete[] flag;
	return ret;
}
下面是没通过的程序,floyd和dijstra

int ladderLength02(string start, string end,vector<string> &dict) {//int ladderLength(string start, string end,unordered_set<string> &dictArg) {
	//vector<string> dict(dictArg.begin(),dictArg.end());
	int lenDictmid = dict.size();
	int lenDict = lenDictmid + 2;
	int lenStr = 0;
	if(lenDictmid!=0)
		lenStr = dict[0].length();
	int **dp = new int*[lenDict];
	for (int i=0;i<lenDict;i++)
	{
		dp[i] = new int[lenDict];
		dp[i][i] = 0;
	}
	int temp;
	
	for (int i=0;i<lenDict-2;i++)
	{
		for (int j=i+1;j<lenDict-2;j++)
		{
			temp = 0;
			for (int k=0;k<lenStr;k++)
			{
				if(dict[i][k]!=dict[j][k])
					temp++;
			}
			if(temp==1|temp==0)
				dp[i][j] = temp;
			else
				dp[i][j] = INT_MAX;
			dp[j][i] = dp[i][j];
		}
	}
	for (int i=0;i<lenDict-2;i++)
	{
		temp = 0;
		for (int k=0;k<lenStr;k++)
		{
			if(dict[i][k]!=start[k])
				temp++;
		}
		if(temp==1|temp==0)
			dp[i][lenDict-2] = temp;
		else
			dp[i][lenDict-2] = INT_MAX;
		dp[lenDict-2][i] = dp[i][lenDict-2];
	}
	for (int i=0;i<lenDict-2;i++)
	{
		temp = 0;
		for (int k=0;k<lenStr;k++)
		{
			if(end[k]!=dict[i][k])
				temp++;
		}
		if(temp==1|temp==0)
			dp[i][lenDict-1] = temp;
		else
			dp[i][lenDict-1] = INT_MAX;
		dp[lenDict-1][i] = dp[i][lenDict-1];
	}
	
	temp = 0;
	for (int k=0;k<lenStr;k++)
	{
		if(end[k]!=start[k])
			temp++;
	}
	if(temp==1|temp==0)
		dp[lenDict-2][lenDict-1] = temp;
	else
		dp[lenDict-2][lenDict-1] = INT_MAX;
	dp[lenDict-1][lenDict-2] = dp[lenDict-2][lenDict-1];

	for (int k=0;k<lenDict;k++)
	{
		for (int i=0;i<lenDict;i++)
		{
			if(i==k)
				continue;
			for (int j=0;j<lenDict;j++)
			{
				if(j==k)
					continue;
				if(dp[i][k]!=INT_MAX&&dp[k][j]!=INT_MAX&&dp[i][j]>dp[i][k] + dp[k][j])
					dp[j][i] = dp[i][j] = dp[i][k] + dp[k][j];
			}
		}
	}
	for (int i=0;i<lenDict;i++)
	{
		for(int j=0;j<lenDict;j++)
			cout<<dp[i][j]<<" ";
		cout<<endl;
	}
	int ret = dp[lenDict-2][lenDict-1];
	for(int i=0;i<lenDict;i++)
		delete[] dp[i];
	delete[] dp;
	return ret + 1;
}
int ladderLength04(string start, string end,vector<string> &dict) {//int ladderLength(string start, string end,unordered_set<string> &dictArg) {
	//vector<string> dict(dictArg.begin(),dictArg.end());
	dict.push_back(start);
	dict.push_back(end);
	//int lenDictmid = 
	int lenDict = dict.size();
	int lenStr = 0;
	if(lenDict!=0)
		lenStr = dict[0].length();
	int temp;
	//dj from lenDict-2 to lenDict-1
	int *dj = new int[lenDict];
	for (int i=0;i<lenDict;i++)
	{
		temp = 0;
		for (int k=0;k<lenStr;k++)
		{
			if(dict[i][k]!=dict[lenDict-2][k])
				temp++;
		}
		if(temp==1||temp==0)
			dj[i] = temp;
		else
			dj[i] = INT_MAX;
	}
	dj[lenDict-2] = -1;//means traveled set
	int minpos = lenDict - 2;
	int minvalue = INT_MAX;
	int ret=-1;
	for (int k=0;k<lenDict-1;k++)
	{
		for (int i=0;i<lenDict;i++)
		{
			if(dj[i]>0&&dj[i]<minvalue)
				minpos = i;
		}
		if (minpos==lenDict-1)
		{
			ret = dj[minpos];
			break;
		}
		if(dj[minpos]==INT_MAX)
			break;
		for (int i=0;i<lenDict;i++)
		{
			if (i!=minpos&&dj[i]>0)//
			{
				temp = 0;
				for (int k=0;k<lenStr;k++)
				{
					if(dict[i][k]!=dict[minpos][k])
						temp++;
				}
				if(temp==1||temp==0)
				{
					if(dj[i]>dj[minpos]+temp)
						dj[i] = dj[minpos]+temp;
				}
			}
		}
		dj[minpos] = -1;
	}
	if(dj[lenDict-1]==INT_MAX)
		ret = -1;
	delete[] dj;
	return ret + 1;
}
别人的比较高效的

int ladderLength06(string start, string end, unordered_set<string> &dict) {// 
	// Start typing your C/C++ solution below  
	// DO NOT write int main() function  
	//BFS(because all edge is 1) to find the minimum path  
	//O(n*len*26)  
	//shortest path O(n^2) will be TLE  
	queue<pair<string,int>> q;  
	unordered_set<string> visited;  
	q.push(make_pair(start, 1));  
	visited.insert(start);  
	while (!q.empty())  
	{  
		string curStr = q.front().first;  
		int curStep = q.front().second;  
		q.pop();  
		for (int i = 0; i < curStr.size(); ++i)  
		{  
			string tmp = curStr;  
			for (int j = 0; j < 26; ++j)  
			{  
				tmp[i] = j+'a';  
				if(tmp == end)  
					return curStep+1;  
				if(visited.find(tmp) == visited.end() && dict.find(tmp) != dict.end())  
				{  
					q.push(make_pair(tmp, curStep+1));  
					visited.insert(tmp);  
				}  
			}  
		}  
	}  
	return 0;  
}  

这题中 unordered_set 怎么实现高效的插入,查找呢?

http://stackoverflow.com/questions/6204982/unordered-set-questions

unordered_set and all the other unordered_ data structures use hashing, as mentioned by @Sean. Hashing involves amortized constant time for insertion, and close to constant time for lookup. A hash function essentially takes some information and produces a number from it. It is a function in the sense that the same input has to produce the same output. However, different inputs can result in the same output, resulting in what is termed a collision. Lookup would be guaranteed to be constant time for an "perfect hash function", that is, one with no collisions. In practice, the input number comes from the element you store in the structure (say it's value, it is a primitive type) and maps it to a location in a data structure. Hence, for a given key, the function takes you to the place where the element is stored without need for any traversals or searches (ignoring collisions here for simplicity), hence constant time. There are different implementations of these structures (open addressing, chaining, etc.) See hash table, hash function. I also recommend section 3.7 of The Algorithm Design Manual by Skiena. Now, concerning big-O complexity, you are right that you have O(n) + O(n) + O(size of overlap). Since the overlap cannot be bigger than the smaller of m and n, the overall complexity can be expressed as O(kN), where N is the largest between m and n. So, O(N). Again, this is "best case", without collisions, and with perfect hashing.

set and multi_set on the other hand use binary trees, so insertions and look-ups are typically O(logN). The actual performance of a hashed structure vs. a binary tree one will depend on N, so it is best to try the two approaches and profile them in a realistic running scenario.

非常好的问题! 你提到 **`denylist` 是静态的**,也就是说数据不会发生变化,只在初始化时加载一次。在这种情况下,使用 **完美哈希(Perfect Hashing)** 可以带来显著的性能提升和内存优化。 下面我们来详细对比一下: --- ## ✅ 一、当前使用的哈希 vs 完美哈希 | 对比项 | 当前哈希(开放寻址) | 完美哈希(Perfect Hash) | |--------|----------------------|---------------------------| | 数据是否可变 | ✅ 支持动态插入/删除 | ❌ 仅适用于静态数据 | | 查询效率 | ⚠️ O(1) 平均,可能冲突 | ✅ 真正的 O(1) | | 内存占用 | ⚠️ 需要预留空槽位 | ✅ 精确匹配大小 | | 构建时间 | ✅ 快速 | ⚠️ 构建较慢(但只需一次) | | 是否需要冲突处理 | ✅ 需要(如线性探测) | ❌ 不需要 | | 实现复杂度 | ✅ 简单 | ⚠️ 复杂 | --- ## ✅ 二、完美哈希的原理 完美哈希是一种 **为静态数据集构建的哈希函数**,它的特性是: - **每个键都能映射到一个唯一的索引位置**; - **完全没有哈希冲突**; - 查询时 **只需一次访问哈希表**,不需要探测; - 常用于嵌入式系统、编译器关键字查找、静态 denylist 等场景。 --- ## ✅ 三、为什么完美哈希适合你的场景? 你提到: > "我的 denylist 的数据是静态的" 这意味着: - 数据不会变化; - 可以在构建程序时预计算哈希函数; - 可以一次性构建完美哈希表; - 查询时可以做到 **真正的 O(1)**,无冲突、无探测。 --- ## ✅ 四、实现方式(以你的 IP denylist 为例) 由于你的数据是静态的,我们可以使用工具(如 `gperf`)或手动构建一个 **最小完美哈希函数**,将每个 IP 地址映射到唯一的索引。 ### 示例:手动实现完美哈希结构(简化版) ```c #include <stdio.h> #include <string.h> // 假设 denylist 中的 IP 是固定的 const char* deny_ips[] = { "224.0.0.1", "224.0.0.5", "ff02::1", "ff02::2" }; // 手动定义完美哈希映射表(key -> index) typedef struct { const char* ip; int index; } PerfectHashEntry; PerfectHashEntry perfect_hash_table[] = { {"224.0.0.1", 0}, {"224.0.0.5", 1}, {"ff02::1", 2}, {"ff02::2", 3} }; #define DENYLIST_SIZE (sizeof(perfect_hash_table)/sizeof(perfect_hash_table[0])) // 完美哈希查找函数 int is_ip_denied(const char* ip) { for (int i = 0; i < DENYLIST_SIZE; ++i) { if (strcmp(perfect_hash_table[i].ip, ip) == 0) { return 1; // 在 denylist 中 } } return 0; // 不在 denylist 中 } int main() { const char* test_ips[] = { "224.0.0.1", "224.0.0.2", "ff02::1", "ff02::3" }; for (int i = 0; i < 4; ++i) { printf("IP: %s -> %s\n", test_ips[i], is_ip_denied(test_ips[i]) ? "DENIED" : "ALLOWED"); } return 0; } ``` ### 输出: ``` IP: 224.0.0.1 -> DENIED IP: 224.0.0.2 -> ALLOWED IP: ff02::1 -> DENIED IP: ff02::3 -> ALLOWED ``` --- ## ✅ 五、如何自动生成完美哈希函数? 对于大量静态数据,你可以使用工具自动生成完美哈希函数: ### 工具推荐: - **`gperf`(GNU Perfect Hash)**:专门用于生成完美哈希函数的工具。 - 支持 C/C++ 输出; - 可以生成查找函数和哈希表结构。 ### 使用示例(`gperf`): 1. 创建一个文本文件 `denylist.gperf`: ``` struct denylist_entry { const char* ip; int index; }; %% 224.0.0.1, 0 224.0.0.5, 1 ff02::1, 2 ff02::2, 3 %% ``` 2. 使用 `gperf` 生成代码: ```bash gperf -t -N in_denylist -K ip -L ANSI-C denylist.gperf > denylist_hash.c ``` 3. 将生成的函数集成到你的项目中即可。 --- ## ✅ 六、总结:完美哈希 vs 当前哈希 | 场景 | 推荐方案 | |------|----------| | 数据静态、只读 | ✅ 完美哈希 | | 数据动态、频繁更新 | ❌ 完美哈希,✅ 使用开放寻址或链式哈希 | | 需要极致性能 | ✅ 完美哈希 | | 数据量小 | ✅ 手动实现完美哈希 | | 数据量大 | ✅ 使用 `gperf` 自动生成 | --- ##
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值