简单高效uri解析

本文介绍了一种简单高效的方法来解析URI,包括scheme、host、port、path、query和fragment等组成部分,适用于各种应用场景。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

简单高效解析形如[scheme://]host[:port][path][?query][#fragment]的uri.


【头文件】
struct URI
{
    std::string scheme;   // protocol string(http/https/ftp/...)
    std::string host;     // host string(domain or ip)
    ushort      port;     // port number
    std::string path;     // path string(e.g. /index.html)
    std::string query;    // query string(e.g. a=1&b=2)
    std::string fragment; // additional identifying information

    URI() { Clear(); }
    void Clear()
    {
        scheme = "http";  // default "http"
        host.clear();     // default ""
        port = 0;         // default 0
        path = "/";       // default "/"
        query.clear();    // default ""
        fragment.clear(); // default ""
    }
};

/// @brief 解析形如[scheme://]host[:port][path][?query][#fragment]的uri
/// @param[in] uristr 需要解析的uri串
/// @param[out] uri 解析结果 
/// @return 解析成功返回true,否则返回false
/// @attention 解析失败时,出参未定义
bool ParseUri(const string& uristr, URI& uri);

【实现文件】
bool ParseUri(const string& uristr, URI& uri)
{
    const char* begin = uristr.c_str();
    const char* end = uristr.c_str() + uristr.size();
    const char* ptr = begin;
    uri.Clear();

parse_host:
    char c = 0;
    while ((c = *ptr) != 0 and c != '.' and c != ':')
        ++ptr;
    if (0 == c)
        return false;

    if (':' == c) // find the scheme
    {
        // find "//" after ":"
        if (end - ptr <= 3)
            return false;
        if (*(ptr + 1) != '/' or *(ptr + 2) != '/')
            return false;

        uri.scheme.assign(begin, ptr - begin);
        begin = (ptr += 3); // sikp to host begin
        goto parse_host;
    }

    // find the host end
    while ((c = *ptr) != 0 and c != ':' and c != '/' and c != '?' and c != '#')
        ++ptr;
    uri.host.assign(begin, ptr - begin);

    if (':' == c) // find the port
    {
        if ((c = *(++ptr)) < '1' or c > '9')
            return false; // should be number(and should be > 0)
        
        ushort port = 0;
        for (; (c = *ptr) >= '0' and c <= '9'; ++ptr)
            port = (port << 3) + (port << 1) + (c ^ '0'); // auto var may be faster than uri.port
        uri.port = port;
    }

    if ('/' == c) // find the path
    {
        for (begin = ptr; (c = *ptr) != 0 and c != '?' and c != '#'; )
            ++ptr;
        uri.path.assign(begin, ptr - begin);
    }

    if ('?' == c) // find the query_string
    {
        for (begin = ++ptr; (c = *ptr) != 0 and c != '#'; )
            ++ptr;
        uri.query.assign(begin, ptr - begin);
    }

    if ('#' == c) // find the fragment
        uri.fragment.assign(ptr + 1, end - ptr - 1);
    return true;
}

【单元测试】
    URI uri;

    // full content
    string uristr = "ftp://www.baidu.com:21/abc.txt?a=b&c=3#frag=10,20";
    ASSERT_TRUE(ParseUri(uristr, uri));
    ASSERT_EQ("ftp", uri.scheme);
    ASSERT_EQ("www.baidu.com", uri.host);
    ASSERT_EQ(21, uri.port);
    ASSERT_EQ("/abc.txt", uri.path);
    ASSERT_EQ("a=b&c=3", uri.query);
    ASSERT_EQ("frag=10,20", uri.fragment);

    // no scheme
    uristr = "www.a-b-c.com:8080/def.vim?xy=12#xxx";
    ASSERT_TRUE(ParseUri(uristr, uri));
    ASSERT_EQ("http", uri.scheme);
    ASSERT_EQ("www.a-b-c.com", uri.host);
    ASSERT_EQ(8080, uri.port);
    ASSERT_EQ("/def.vim", uri.path);
    ASSERT_EQ("xy=12", uri.query);
    ASSERT_EQ("xxx", uri.fragment);

    // no port
    uristr = "http://www.baidu.com/abc.xyz?ab=cd#yyy";
    ASSERT_TRUE(ParseUri(uristr, uri));
    ASSERT_EQ("http", uri.scheme);
    ASSERT_EQ("www.baidu.com", uri.host);
    ASSERT_EQ(0, uri.port);
    ASSERT_EQ("/abc.xyz", uri.path);
    ASSERT_EQ("ab=cd", uri.query);
    ASSERT_EQ("yyy", uri.fragment);

    // no path
    uristr = "http://www.baidu.com:8080?m=5#zzz";
    ASSERT_TRUE(ParseUri(uristr, uri));
    ASSERT_EQ("http", uri.scheme);
    ASSERT_EQ("www.baidu.com", uri.host);
    ASSERT_EQ(8080, uri.port);
    ASSERT_EQ("/", uri.path);
    ASSERT_EQ("m=5", uri.query);
    ASSERT_EQ("zzz", uri.fragment);

    // no query
    uristr = "http://www.qq.com:8888/def.txt#x=y";
    ASSERT_TRUE(ParseUri(uristr, uri));
    ASSERT_EQ("http", uri.scheme);
    ASSERT_EQ("www.qq.com", uri.host);
    ASSERT_EQ(8888, uri.port);
    ASSERT_EQ("/def.txt", uri.path);
    ASSERT_EQ("", uri.query);
    ASSERT_EQ("x=y", uri.fragment);

    // no fragment
    uristr = "http://www.soso.com:1234/abc.html?x=y";
    ASSERT_TRUE(ParseUri(uristr, uri));
    ASSERT_EQ("http", uri.scheme);
    ASSERT_EQ("www.soso.com", uri.host);
    ASSERT_EQ(1234, uri.port);
    ASSERT_EQ("/abc.html", uri.path);
    ASSERT_EQ("x=y", uri.query);
    ASSERT_EQ("", uri.fragment);

    // no scheme & port
    uristr = "www.sohu.com/?x=y";
    ASSERT_TRUE(ParseUri(uristr, uri));
    ASSERT_EQ("http", uri.scheme);
    ASSERT_EQ("www.sohu.com", uri.host);
    ASSERT_EQ(0, uri.port);
    ASSERT_EQ("/", uri.path);
    ASSERT_EQ("x=y", uri.query);
    ASSERT_EQ("", uri.fragment);

    // no scheme & path
    uristr = "www.baidu.com:8080?p=345";
    ASSERT_TRUE(ParseUri(uristr, uri));
    ASSERT_EQ("http", uri.scheme);
    ASSERT_EQ("www.baidu.com", uri.host);
    ASSERT_EQ(8080, uri.port);
    ASSERT_EQ("/", uri.path);
    ASSERT_EQ("p=345", uri.query);
    ASSERT_EQ("", uri.fragment);

    // no scheme & query
    uristr = "www.baidu.com:8080/abc.txt";
    ASSERT_TRUE(ParseUri(uristr, uri));
    ASSERT_EQ("http", uri.scheme);
    ASSERT_EQ("www.baidu.com", uri.host);
    ASSERT_EQ(8080, uri.port);
    ASSERT_EQ("/abc.txt", uri.path);
    ASSERT_EQ("", uri.query);
    ASSERT_EQ("", uri.fragment);

    // no port & path
    uristr = "https://www.baidu.com?x=3";
    ASSERT_TRUE(ParseUri(uristr, uri));
    ASSERT_EQ("https", uri.scheme);
    ASSERT_EQ("www.baidu.com", uri.host);
    ASSERT_EQ(0, uri.port);
    ASSERT_EQ("/", uri.path);
    ASSERT_EQ("x=3", uri.query);
    ASSERT_EQ("", uri.fragment);

    // no port & query
    uristr = "https://www.baidu.com/def.html";
    ASSERT_TRUE(ParseUri(uristr, uri));
    ASSERT_EQ("https", uri.scheme);
    ASSERT_EQ("www.baidu.com", uri.host);
    ASSERT_EQ(0, uri.port);
    ASSERT_EQ("/def.html", uri.path);
    ASSERT_EQ("", uri.query);
    ASSERT_EQ("", uri.fragment);

    // no path & query
    uristr = "https://www.google.com:12345";
    ASSERT_TRUE(ParseUri(uristr, uri));
    ASSERT_EQ("https", uri.scheme);
    ASSERT_EQ("www.google.com", uri.host);
    ASSERT_EQ(12345, uri.port);
    ASSERT_EQ("/", uri.path);
    ASSERT_EQ("", uri.query);
    ASSERT_EQ("", uri.fragment);

    // no scheme & port & path
    uristr = "www.baidu.com?z=5";
    ASSERT_TRUE(ParseUri(uristr, uri));
    ASSERT_EQ("http", uri.scheme);
    ASSERT_EQ("www.baidu.com", uri.host);
    ASSERT_EQ(0, uri.port);
    ASSERT_EQ("/", uri.path);
    ASSERT_EQ("z=5", uri.query);
    ASSERT_EQ("", uri.fragment);

    // no scheme & port & query
    uristr = "www.baidu.com/index.htm";
    ASSERT_TRUE(ParseUri(uristr, uri));
    ASSERT_EQ("http", uri.scheme);
    ASSERT_EQ("www.baidu.com", uri.host);
    ASSERT_EQ(0, uri.port);
    ASSERT_EQ("/index.htm", uri.path);
    ASSERT_EQ("", uri.query);
    ASSERT_EQ("", uri.fragment);

    // no scheme & path & query
    uristr = "www.yahoo.com:1234";
    ASSERT_TRUE(ParseUri(uristr, uri));
    ASSERT_EQ("http", uri.scheme);
    ASSERT_EQ("www.yahoo.com", uri.host);
    ASSERT_EQ(1234, uri.port);
    ASSERT_EQ("/", uri.path);
    ASSERT_EQ("", uri.query);
    ASSERT_EQ("", uri.fragment);

    // no port & path & query
    uristr = "ftp://www.baidu.com";
    ASSERT_TRUE(ParseUri(uristr, uri));
    ASSERT_EQ("ftp", uri.scheme);
    ASSERT_EQ("www.baidu.com", uri.host);
    ASSERT_EQ(0, uri.port);
    ASSERT_EQ("/", uri.path);
    ASSERT_EQ("", uri.query);
    ASSERT_EQ("", uri.fragment);

    // no scheme & port & path & query
    uristr = "www.360.com";
    ASSERT_TRUE(ParseUri(uristr, uri));
    ASSERT_EQ("http", uri.scheme);
    ASSERT_EQ("www.360.com", uri.host);
    ASSERT_EQ(0, uri.port);
    ASSERT_EQ("/", uri.path);
    ASSERT_EQ("", uri.query);
    ASSERT_EQ("", uri.fragment);

    // invalid urls
    ASSERT_FALSE(ParseUri("www", uri));
    ASSERT_FALSE(ParseUri("host:80", uri));
    ASSERT_FALSE(ParseUri("http://", uri));
    ASSERT_FALSE(ParseUri("http:/|a.b.com", uri));
    ASSERT_FALSE(ParseUri("http://qq.com:023", uri));


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值