839. Similar String Groups

本文介绍使用并查集解决相似字符串分组问题的两种方法。一种适用于词汇较少的情况,通过枚举列表中所有字符串的两两关系;另一种适用于词汇较多的场景,通过遍历每个字符并枚举其可能到达的字符。文章详细解释了算法原理,并提供Python代码实现。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

解法

中心思想是用并查集,关键点是联系的输入
两种方法:

  1. 枚举list里两两字符串之间的相系,适合词少的时候
  2. 遍历每个字符,枚举它可能到达的字符,适合词多的时候
    假设字符串s可以到达t,注意这里是存的是t到s的边,因为set[i]要存i字符串可以到达的在list里的字符串的集合

点:

  1. 词稀疏和稠密适合不同的算法
  2. 注意最后统计的时候同一个s不要统计2次

标准答案里并查集用的是数组下标,我用的是字符串hash,反正也过了,懒得改了

class Solution(object):
    def numSimilarGroups(self, A):
        """
        :type A: List[str]
        :rtype: int
        """
        import itertools
        f = {}
        n = len(A)
        if n==0:
            return 0
        w = len(A[0])

        def is_similar(a,b):
            l = len(a)
            rest = 2
            for i in xrange(l):
                if a[i]!=b[i]:
                    rest -= 1
                    if rest<0:
                        return False
            return rest>=0

        def find(x):
            if x not in f:
                f[x]=x
                return x
            r = x
            while f[r]!=r:
                r = f[r]
            while x!=r:
                tmp = f[x]
                f[x] = r
                x = tmp
            return r

        def join(x,y):
            f[find(y)]=find(x)

        if n<w*w:
            for i in xrange(n-1):
                for j in xrange(i,n):
                    if is_similar(A[i],A[j]):
                        join(A[i],A[j])
        else:
            from collections import defaultdict
            edges = defaultdict(set)
            for string in A:
                L = list(string)
                for j1,j2 in itertools.combinations(xrange(w), 2):
                    L[j1], L[j2] = L[j2], L[j1]
                    edges["".join(L)].add(string)
                    L[j1], L[j2] = L[j2], L[j1]
            for string in A:
                for word in edges[string]:
                    join(string, word)
        return len(filter(lambda x:x not in f or f[x]==x, set(A)))
Sub MergeSimilarCells() Application.ScreenUpdating = False Application.DisplayAlerts = False Dim ws As Worksheet Set ws = ActiveSheet Dim startCol As Integer: startCol = 11 ' K列 Dim currentRow As Integer: currentRow = 3 Dim lastCol As Integer: lastCol = ws.Cells(currentRow, ws.Columns.Count).End(xlToLeft).Column ' 存储相似列分组 Dim dict As Object Set dict = CreateObject("Scripting.Dictionary") ' 第一轮:识别相似列 Dim baseKey As String For col = startCol To lastCol If ws.Cells(currentRow, col).MergeCells Then Set mergedRng = ws.Cells(currentRow, col).MergeArea baseKey = mergedRng.Cells(1, 1).Value col = col + mergedRng.Columns.Count - 1 Else baseKey = ws.Cells(currentRow, col).Value End If If baseKey = "" Then GoTo NextCol Dim foundGroup As Boolean foundGroup = False For Each key In dict.Keys If IsSimilar(key, baseKey) Then dict(key) = dict(key) & "," & col foundGroup = True Exit For End If Next key If Not foundGroup Then dict.Add baseKey, CStr(col) End If NextCol: Next col ' 第二轮:合并单元格并平移数据 Dim groups As Variant For Each key In dict.Keys groups = Split(dict(key), ",") If UBound(groups) >= 0 Then ' 至少2列才合并 Dim minCol As Integer: minCol = groups(0) Dim maxCol As Integer: maxCol = groups(UBound(groups)) ' 合并第三行 With ws.Range(ws.Cells(3, minCol), ws.Cells(3, maxCol)) .UnMerge .Merge .HorizontalAlignment = xlCenter .Value = key End With ' 平移第四行及以下数据 Dim lastRow As Long lastRow = ws.Cells(ws.Rows.Count, minCol).End(xlUp).Row For r = 4 To lastRow Dim newValue As String newValue = "" ' 收集所有数据 For c = minCol To maxCol If Not IsEmpty(ws.Cells(r, c)) Then newValue = newValue & ws.Cells(r, c).Value & ", " End If Next c ' 保留最后单元格数据 If Len(newValue) > 0 Then newValue = Left(newValue, Len(newValue) - 2) ' 移除末尾逗号 ws.Cells(r, minCol).Value = newValue ' 清空其他单元格 For c = minCol + 1 To maxCol ws.Cells(r, c).ClearContents Next c End If Next r End If Next key Application.DisplayAlerts = True Application.ScreenUpdating = True MsgBox "操作完成!", vbInformation End Sub ' 相似度判断函数 Function IsSimilar(s1 As String, s2 As String) As Boolean ' 空值处理 If Len(s1) = 0 Or Len(s2) = 0 Then Exit Function ' 情况1: 完全相等 If s1 = s2 Then IsSimilar = True Exit Function End If ' 情况2: 字符重合率>90% If CharacterOverlap(s1, s2) >= 0.9 Then IsSimilar = True Exit Function End If ' 情况3: 存在相同片段 If Len(s1) >= 2 And Len(s2) >= 2 Then If HasCommonSegment(s1, s2) Then IsSimilar = True Exit Function End If End If ' 情况4: 字面相似度>50% If TextSimilarity(s1, s2) >= 0.5 Then IsSimilar = True Exit Function End If End Function ' 字符重合率计算 Function CharacterOverlap(s1 As String, s2 As String) As Double Dim common As Integer: common = 0 Dim total As Integer: total = Len(s1) + Len(s2) For i = 1 To Len(s1) If InStr(1, s2, Mid(s1, i, 1)) > 0 Then common = common + 1 End If Next i CharacterOverlap = (2 * common) / total End Function ' 检查相同片段 Function HasCommonSegment(s1 As String, s2 As String) As Boolean Dim minLen As Integer: minLen = WorksheetFunction.Min(Len(s1), Len(s2)) Dim segLen As Integer: segLen = Application.WorksheetFunction.RoundUp(minLen * 0.4, 0) For i = 1 To Len(s1) - segLen + 1 Dim segment As String segment = Mid(s1, i, segLen) If InStr(1, s2, segment) > 0 Then HasCommonSegment = True Exit Function End If Next i End Function ' 字面相似度计算 Function TextSimilarity(s1 As String, s2 As String) As Double Dim words1 As Variant: words1 = Split(ReplaceSymbols(s1), " ") Dim words2 As Variant: words2 = Split(ReplaceSymbols(s2), " ") Dim matches As Integer: matches = 0 For Each w1 In words1 For Each w2 In words2 If w1 = w2 And Len(w1) > 0 Then matches = matches + 1 Exit For End If Next w2 Next w1 TextSimilarity = (2 * matches) / (UBound(words1) + UBound(words2) + 2) End Function ' 替换分隔符标准化文本 Function ReplaceSymbols(txt As String) As String Dim res As String res = Replace(txt, "/", " ") res = Replace(res, "、", " ") res = Replace(res, ",", " ") ReplaceSymbols = Application.WorksheetFunction.Trim(res) End Function以上代码运行后提示编译错误: ByRef 参数类型不符
最新发布
07-19
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值