Trie(C#)

本文介绍了一个高效的模糊搜索算法实现,该算法支持非通配符模糊搜索,并具备搜索结果优先级功能,能根据首字母匹配及连续匹配提升结果的相关性。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

支持模糊搜索(非通配符)

支持搜索结果优先级(首字母匹配、连续匹配有较高优先级

TrieSearch.cs

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.Linq;
using System.Threading.Tasks;

// ReSharper disable UnusedAutoPropertyAccessor.Global
// ReSharper disable MemberCanBePrivate.Global
// ReSharper disable PossibleMultipleEnumeration
namespace TrieCS
{
    class ExampleContet
    {
        public string Name { get; set; }

        public string Content { get; set; }

        public static readonly ExampleContet[] Examples =
        {
            new ExampleContet
            {
                Name = "app list",
                Content = "YouJian Rili DianShi XiangCe Zoe YouTube Car ShiZhong HTCZhiNan LianXiRen PlayShangDian GuPiao BiaoGe ChaoJiYongHu WenDang Keep Seeder Sense6Toolbox XposedInstaller BaiDuNuoMi MeiTuan BaiDu BaiDuDiTu BaiDuLvYou BaiDuTieBa BiYingCiDian BoHao BuKaManHua ChiZi CunQian FanYi XiaMiYinYue"
            },
            new ExampleContet
            {
                Name = "ins",
                Content = "s sin dins"
            },
            new ExampleContet
            {
                Name = "ab",
                Content = "abc dab"
            },
            new ExampleContet
            {
                Name = "blank",
                Content = ""
            }
        };
    }

    /// <summary>
    /// Performance info
    /// </summary>
    public class PerfInfo
    {
        public double FilterOutMs { get; internal set; }
        public double TotalMs { get; internal set; }

        public override string ToString()
        {
            return String.Format("Total:{0:0.00000}ms, FilterAdd:{1:0.00000}ms",
                TotalMs,
                FilterOutMs);
        }
    }

    /// <summary>
    /// Grouped result of all <see cref="MatchInfo"/> with the same index.
    /// </summary>
    public class SearchResult : IComparable<SearchResult>
    {
        /// <summary>
        /// index in the <see cref="TrieSearch.Keywords"/> of a <see cref="TrieSearch"/>
        /// </summary>
        public int Index { get; set; }
        /// <summary>
        /// positions of matched chars in <see cref="Content"/>
        /// </summary>
        public SortedList<int, int> Positions { get; private set; }
        /// <summary>
        /// how excactly it matchs the query
        /// </summary>
        public int Priority { get; set; }
        /// <summary>
        /// 
        /// </summary>
        public string Content { get; set; }

        // ReSharper disable once UnusedMember.Global
        public string PositionString
        {
            get
            {
                return String.Join(",", Positions.Values);
            }
        }

        public SearchResult()
        {
            Positions = new SortedList<int, int>();
        }

        public int CompareTo(SearchResult other)
        {
            return other.Priority.CompareTo(Priority);
        }
    }

    /// <summary>
    /// Result of an single matched char
    /// </summary>
    class MatchInfo
    {
        /// <summary>
        /// index in the <see cref="TrieSearch.Keywords"/> of a <see cref="TrieSearch"/>
        /// </summary>
        public int Index { get; set; }
        /// <summary>
        /// position the this char in a string
        /// </summary>
        public int Position { get; set; }
        /// <summary>
        /// 
        /// </summary>
        public int Priority { get; set; }
        /// <summary>
        /// the char content
        /// </summary>
        public char Data { get; set; }

        public override string ToString()
        {
            return String.Format("[{0}.{1}]", Index, Position);
        }
    }

    /// <summary>
    /// Trie node
    /// </summary>
    class Node
    {
        /// <summary>
        /// Char content. Only for data visualization.
        /// </summary>
        public char Data { get; set; }
        /// <summary>
        /// All <see cref="MatchInfo"/> of this node.
        /// </summary>
        public List<MatchInfo> Infos { get; private set; }

        public Node()
        {
            Infos = new List<MatchInfo>();
        }
        public override string ToString()
        {
            return Data.ToString(CultureInfo.InvariantCulture);
        }
    }

    /// <summary>
    /// 
    /// </summary>
    class TrieSearch
    {
        /// <summary>
        /// Performance info of the last search.
        /// </summary>
        public PerfInfo LastPerfInfo { get; private set; }

        /// <summary>
        /// All nodes in this trie.
        /// </summary>
        public Dictionary<char, Node> Nodes { get; private set; }

        private string[] _keywords;
        /// <summary>
        /// the contents from which you may want to search.
        /// <para/>setting this value will immidietly build the trie.
        /// </summary>
        public string[] Keywords
        {
            get { return _keywords; }
        }

        /// <summary>
        /// Build the trie. A simple async wrapper for <see cref="BuildTree"/>
        /// </summary>
        /// <param name="contents"></param>
        /// <returns></returns>
        public Task BuildTreeAsync(string[] contents)
        {
            return Task.Run(() => BuildTree(contents));
        }

        /// <summary>
        /// Search. A simple async wrapper for <see cref="Search"/>
        /// </summary>
        /// <param name="query"></param>
        /// <returns>Matched results</returns>
        public Task<SearchResult[]> SearchAsync(string query)
        {
            return Task.Run<SearchResult[]>(() => Search(query));
        }

        /// <summary>
        /// build the tree.
        /// </summary>
        /// <param name="index">index of the keyword in <see cref="Keywords"/></param>
        public void BuildTree(string[] contents)
        {
            _keywords = contents;
            Nodes = new Dictionary<char, Node>();
            for (var i = 0; i < contents.Length; i++)
            {
                string keyword = contents[i];
                for (var j = 0; j < keyword.Length; j++)
                {
                    var c = keyword[j];
                    if (!Char.IsLetter(c)) continue;
                    var info = new MatchInfo
                    {
                        Index = i,
                        Position = j,
                        Priority = j == 0 ? 3 : 1,  // initial char has a higher priority
                        Data = c,
                    };

                    // Capital char has a higher priority.
                    if (Char.IsUpper(c))
                        info.Priority += 2;

                    // Only lower char is accepted.
                    c = Char.ToLower(c);

                    Node newNode;
                    if (!Nodes.TryGetValue(c, out newNode))
                    {
                        newNode = new Node { Data = c };
                        Nodes[c] = newNode;
                    }

                    // Record this match info.
                    newNode.Infos.Add(info);
                }
            }
        }

        /// <summary>
        /// Search
        /// </summary>
        /// <param name="query"></param>
        /// <returns>Matched results</returns>
        public SearchResult[] Search(string query)
        {
            LastPerfInfo = new PerfInfo();

            if (Keywords.Length == 0)
                return new SearchResult[0];

            var timer = HiPerfTimer.StartNew();
            var lastMatchedPositions = new Dictionary<int, int>();
            var resultList = new Dictionary<int, SearchResult>();
            var isInOrder = new Dictionary<int, bool>();

            for (int pos = 0; pos < query.Length; pos++)
            {
                var c = query[pos];
                Node node;
                if (!Nodes.TryGetValue(c, out node))
                {
                    resultList.Clear();
                    break;
                }

                FilterOut(resultList, node);

                isInOrder = new Dictionary<int, bool>();

                node.Infos.ForEach(info =>
                {
                    var index = info.Index;
                    if (pos == 0)
                    {
                        if (!lastMatchedPositions.ContainsKey(index))
                            lastMatchedPositions[index] = info.Position;
                        isInOrder[index] = true;
                    }
                    else
                    {
                        if (!resultList.ContainsKey(index))
                        {
                            resultList.Remove(index);
                            return;
                        }
                        else if (isInOrder.ContainsKey(index) && isInOrder[index])
                        {
                            return;
                        }
                        else
                        {
                            if (info.Position > lastMatchedPositions[index])
                            {
                                isInOrder[index] = true;
                                lastMatchedPositions[index] = info.Position;
                                Debug.WriteLine("pos of {0} is now {1}", index, info.Position);
                            }
                            else
                            {
                                isInOrder[index] = false;
                            }
                        }
                    }
                        
                    AddToResult(resultList, info, index);
                });
                // filter out items not in order.
                foreach (var item in isInOrder.Where(_ => !_.Value).Select(_ => _.Key))
                {
                    resultList.Remove(item);
                }
            }

            // other procedures
            var resultArray = PostProcess(resultList);

            timer.Stop();
            LastPerfInfo.TotalMs = timer.Duration;

            return resultArray;
        }

        /// <summary>
        /// Remove items, which not appears in node.Infos, in resultList
        /// </summary>
        /// <param name="resultList"></param>
        /// <param name="node"></param>
        private static void FilterOut(Dictionary<int, SearchResult> resultList, Node node)
        {
            var lookup = node.Infos.ToLookup(_ => _.Index);
            var dif = resultList.Select(_ => _.Key).Where(_ => !lookup.Contains(_)).ToList();
            dif.ForEach(_ => resultList.Remove(_));
        }

        /// <summary>
        /// 
        /// </summary>
        /// <param name="resultList"></param>
        /// <returns></returns>
        private static SearchResult[] PostProcess(Dictionary<int, SearchResult> resultList)
        {
            var resultArray = resultList.Values.ToArray();
            foreach (var item in resultArray)
            {
                // increase priority if continues.
                if (Continues(item.Positions.Values))
                    item.Priority += 4;
            }
            return resultArray;
        }

        /// <summary>
        /// Add <see cref="MatchInfo"/> to resultList
        /// </summary>
        /// <param name="resultList"></param>
        /// <param name="info"></param>
        /// <param name="index"></param>
        private void AddToResult(Dictionary<int, SearchResult> resultList, MatchInfo info, int index)
        {
            SearchResult sr;
            if (!resultList.TryGetValue(info.Index, out sr))
            {
                sr = new SearchResult
                {
                    Index = index,
                    Content = Keywords[index]
                };
                resultList[index] = sr;
            }
            if (!sr.Positions.ContainsKey(info.Position))
            {
                sr.Positions.Add(info.Position, info.Position);
                if (info.Priority > sr.Priority)
                    sr.Priority = info.Priority;
            }
        }

        /// <summary>
        /// 
        /// </summary>
        /// <param name="list"></param>
        /// <returns></returns>
        private static bool Continues(IEnumerable<int> list)
        {
            var last = list.ElementAt(0);
            var threshold = 3;
            foreach (var item in list.Skip(1))
            {
                if (item != last + 1)
                {
                    threshold = 3;
                    last = item;
                    continue;
                }
                last = item;
                threshold--;
                if (threshold == 0)
                    return true;
            }
            return false;
        }
    }
}
// ReSharper restore PossibleMultipleEnumeration
// ReSharper restore MemberCanBePrivate.Global
// ReSharper restore UnusedAutoPropertyAccessor.Global

 

HiPerfTimer

/* High precision timer class
 *   - http://www.eggheadcafe.com/articles/20021111.asp
 * 
 *  (Thanks to the author!)
 */
using System;
using System.Runtime.InteropServices;
using System.ComponentModel;
using System.Threading;

namespace TrieCS
{
    public class HiPerfTimer
    {
        [DllImport("Kernel32.dll")]
        private static extern bool QueryPerformanceCounter(out long lpPerformanceCount);
        [DllImport("Kernel32.dll")]
        private static extern bool QueryPerformanceFrequency(out long lpFrequency);
        private long startTime;
        private long stopTime;
        private long freq;
        /// <summary>
        /// ctor
        /// </summary>
        public HiPerfTimer()
        {
            startTime = 0;
            stopTime = 0;
            freq = 0;
            if (QueryPerformanceFrequency(out freq) == false)
            {
                throw new Win32Exception(); // timer not supported
            }
        }

        public static HiPerfTimer StartNew()
        {
            var t = new HiPerfTimer();
            t.Start();
            return t;
        }

        /// <summary>
        /// Start the timer
        /// </summary>
        /// <returns>long - tick count</returns>
        public long Start()
        {
            QueryPerformanceCounter(out startTime);
            return startTime;
        }
        /// <summary>
        /// Stop timer 
        /// </summary>
        /// <returns>long - tick count</returns>
        public long Stop()
        {
            QueryPerformanceCounter(out stopTime);
            return stopTime;
        }
        /// <summary>
        /// Return the duration of the timer (in milliseconds)
        /// </summary>
        /// <returns>double - duration</returns>
        public double Duration
        {
            get
            {
                return (double)(stopTime - startTime) / (double)freq * 1000.0;
            }
        }
        /// <summary>
        /// Frequency of timer (no counts in one second on this machine)
        /// </summary>
        ///<returns>long - Frequency</returns>
        public long Frequency
        {
            get
            {
                QueryPerformanceFrequency(out freq);
                return freq;
            }
        }
    }
}
View Code

 

转载于:https://www.cnblogs.com/ornithopter/p/3773119.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值