Implement regular expressions

最新推荐文章于 2021-03-11 05:41:30 发布
原创最新推荐文章于 2021-03-11 05:41:30 发布 · 732 阅读
0 ·
CC 4.0 BY-SA版权
本文介绍了一种基于图论的正则表达式匹配方法，通过构建图结构并使用深度优先搜索来查找字符串是否符合给定的正则表达式模式。此方法支持多种正则表达式元字符。
摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >
    namespace details
    {
        void dfs_visit( size_t u, std::vector<std::forward_list<size_t>> & g, std::vector<bool> & visited )
        {
            visited[ u ] = true;
            for ( auto i = g[ u ].begin(), e = g[ u ] .end(); i != e; ++i ) {
                if ( !visited[ *i ] ) {
                    dfs_visit( *i, g, visited );
                }
            }
        }
    }

    class graph
    {
    public:
        graph( size_t vertices ) : m_graph( vertices, std::forward_list<size_t>( ) )
        {
            std::for_each( m_graph.begin( ), m_graph.end( ), [ this ]( std::forward_list<size_t> & list ) { this->m_before.push_back( list.before_begin( ) ); } );
        }
        void add_edge( size_t u, size_t v )
        {
            assert( u < m_graph.size( ) && v <m_graph.size( ) );
            m_before[ u ] = m_graph[ u ].insert_after( m_before[ u ], v );
        }

        void dfs_visit( size_t u, std::vector<size_t> & output )
        {
            std::vector<bool> visited( m_graph.size(), false );
            details::dfs_visit( u, m_graph, visited );

            output.reserve( m_graph.size() );
            size_t index = 0;
            for ( auto i = visited.begin(), e = visited.end(); i != e; ++i, ++index ) {
                if ( *i ) {
                    output.push_back( index );
                }
            }
        }
    private:
        std::vector<std::forward_list<size_t>> m_graph;
        std::vector<std::forward_list<size_t>::iterator> m_before;
    };

    const char * regex_is_wrong = "The value of the second parameter is wrong";

    /* These metadata characters:
             (  ) | . * +
     * are supported. */
    bool match( const char * input, const char * regex )
    {
        bool matched = false;
        if (regex != NULL && regex[0] != 0 && input != NULL && input[0] != 0) {
            size_t M = strlen( regex );

            std::vector<char> buf;
            if ( regex[0] != '(' ) {
                buf.reserve( M + 3 );
                buf.push_back( '(' );
                buf.insert( buf.end(), regex, regex + M );
                buf.push_back( ')' );
                buf.push_back( 0 );
                M += 2;
                regex = &buf[0];
            }

            graph g( M + 1  );

            std::vector<size_t> map( M, SIZE_MAX );
            std::stack<size_t> left_parentheses, separators;
            std::unordered_map<char, std::function<void ( graph &, size_t )>> handlers = {
                {
                    '*',
                    [ &map, regex ]( graph & g, size_t i ) {
                        size_t j = SIZE_MAX;
                        if ( i > 0 ) {
                            j = i - 1;
                            if ( regex[ i - 1 ] == ')' ) {
                                j = map[ i - 1 ];
                            }
                        }
                        if ( j != SIZE_MAX ) {
                            g.add_edge( i, j );
                            g.add_edge( j, i );
                        }
                        else {
                            throw std::invalid_argument( regex_is_wrong );
                        }
                        g.add_edge( i, i + 1 );
                    }
                },
                {
                    '+',
                    [ &map, regex ]( graph & g, size_t i ) {
                        size_t j = SIZE_MAX;
                        if ( i > 0 ) {
                            j = i - 1;
                            if ( regex[ i - 1 ] == ')' ) {
                                j = map[ i - 1 ];
                            }
                        }
                        if ( j != SIZE_MAX ) {
                            g.add_edge( i, j );
                        } else {
                            throw std::invalid_argument( regex_is_wrong );
                        }
                        g.add_edge( i, i + 1 );
                    }
                },
                {
                    '(',
                    [ &left_parentheses ]( graph & g, size_t i ) {
                        left_parentheses.push( i );
                        g.add_edge( i, i + 1 );
                    }
                },
                {
                    ')',
                    [ &map, &left_parentheses, &separators ]( graph & g, size_t i ) {
                        size_t j;
                        while ( !separators.empty() ) {
                            j = separators.top();
                            separators.pop();
                            g.add_edge( j, i );
                        }
                        if ( !left_parentheses.empty() ) {
                            j = left_parentheses.top();
                            left_parentheses.pop();
                            map[ i ] = j;
                        }
                        else {
                            throw std::invalid_argument( regex_is_wrong );
                        }
                        g.add_edge( i, i + 1 );
                    }
                },
                {
                    '|',
                    [ &left_parentheses, &separators ]( graph & g, size_t i ) {
                        size_t j;
                        if ( !left_parentheses.empty( ) ) {
                            j = left_parentheses.top( );
                            g.add_edge( j, i + 1 );
                        }
                        separators.push( i );
                    }
                }
            };

            char c;
            for ( size_t i = 0; ( c = regex[ i ] ) != 0; ++i ) {
                auto h = handlers.find( c );
                if ( h != handlers.end() ) {
                    h->second( g, i );
                }
            }

            if ( left_parentheses.empty( ) && separators.empty( ) ) {
                std::vector<size_t> s1;
                g.dfs_visit( 0, s1 );
                for ( size_t i = 0; ( c = input[ i ] ) != 0; ++i ) {
                    std::vector<size_t> s2;
                    for (auto j = s1.begin(), je = s1.end(); j != je; ++j ) {
                        auto index = *j;
                        if ( index < M ) {
                            char c2 = regex[ index ];
                            if ( c2 == '.' || c2 == c ) {
                                s2.push_back( index + 1 );
                                g.dfs_visit( index + 1, s2 );
                            }
                        }
                    }
                    s1.swap( s2 );
                }

                matched = std::find( s1.begin(), s1.end(), M ) != s1.end();
            }
            else {
                throw std::invalid_argument( regex_is_wrong );
            }
        }
        else {
            matched = input == regex || ( input != NULL && regex != NULL && input[0] == 0 && regex[0] == 0 );
        }
        return matched;
    }