Torrent结构分析及测试代码

最新推荐文章于 2023-09-22 23:37:10 发布

weixin_30746117

最新推荐文章于 2023-09-22 23:37:10 发布

阅读量210

点赞数

CC 4.0 BY-SA版权

文章标签： c/c++

原文链接：http://www.cnblogs.com/skactor/articles/3906514.html

本文详细介绍了Bencoding格式及其在Torrent文件中的应用，包括数据类型存储方式、解析过程和示例代码，旨在帮助开发者深入理解并实现Torrent文件解析。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

torrent文件信息存储格式：

bencoding是一种以简洁格式指定和组织数据的方法。支持下列类型：字节串、整数、列表和字典。

1 字符串存储格式: <字符串的长度>:<字符串的内容>
例如：    4:spam 表示spam, 2:ab 表示ab

2 数字的存储格式: i<整数>e
例如：    i3e 表示整数3, i1024e 表示整数1024

3 列表的存储格式： l<子元素>e 其中：子元素可以是字符串，整数，列表和字典，或者是它们的组合体
例如：    l4:spam4:eggse    表示 [ "spam", "eggs" ]
        l3:logi32ee        表示 [ "log", 32 ]

4 字典的存储格式： d<<key><value><key><value><key><value>...<key><value>>e
其中：key只能是字符串类型，value则可以是字符串，整数，列表和字典，或者是它们的组合体，key和value必须是成对出现的
例如：    d3:cow3:moo4:spam4:eggse    表示 { "cow" => "moo", "spam" => "eggs" }
        d4:spaml1:a1:bee            表示 { "spam" => [ "a", "b" ] }
        d9:publisher3:bob4:spaml1:a1:be5:counti80ee 表示 { "publisher" => "bob", "spam" => [ "a", "b" ], "count" => 80 }

torrent文件的信息：

announce:                tracker服务器的URL(字符串)
announce-list(可选):    备用tracker服务器列表(列表)
creation date(可选):    种子创建的时间，Unix标准时间格式，从1970年1月1日 00:00:00到创建时间的秒数(整数)
comment(可选):            备注(字符串)
created by(可选):        创建人或创建程序的信息(字符串)

info:                一个字典结构，包含文件的主要信息，为分二种情况：单文件结构或多文件结构
    piece length:    每个块的大小，单位字节(整数)
    pieces:            每个块的20个字节的SHA1 Hash的值(二进制格式)

    单文件结构如下：
    name:            文件名(字符串)
    length:            文件长度，单位字节(整数)

    多文件结构如下：
    name:            目录名(字符串)
    files:            一个字典结构的列表，字典结构至少包含下面两个信息
        length:        文件长度，单位字节(整数)
        path:        文件的路径和名字，是一个列表结构，如"test"test.txt 列表为l4:test8test.txte

torrent文件解析的代码：

下面给出解析torrent文件C++示例代码：

  1 //////////////////////////////////////////////////////////////////////////
  2 
  3 // interfaceBencode.h
  4 
  5 #pragma once
  6 #include "interfaceString.h"
  7 
  8 
  9 //////////////////////////////////////////////////////////////////////////
 10 //    torrent信息存储文法分析
 11 //    <content>    ::= <dict>
 12 //    <dict>        ::= d<string><<string> | <int> | <dict> | <list>e
 13 //    <list>        ::= l<string> | <int> | <dict> | <list>e
 14 //    <string>    ::= <string length>:<string data>
 15 //    <int>        ::= i<number>e
 16 //////////////////////////////////////////////////////////////////////////
 17 
 18 class INode
 19 {
 20 public:
 21     virtual ~INode(){}
 22     virtual bool encode(string& content) = 0;
 23 };
 24 
 25 class StringNode : public INode 
 26 {
 27 public:
 28     virtual ~StringNode() {}
 29     virtual bool encode(string& content);
 30 
 31     string m_value;
 32 };
 33 
 34 
 35 class IntNode : public INode 
 36 {
 37 public:
 38     virtual ~IntNode() {}
 39     virtual bool encode(string& content);
 40 
 41     int m_value;
 42 };
 43 
 44 class DictNode : public INode 
 45 {
 46 public:
 47     virtual ~DictNode() 
 48     {
 49         for (map<StringNode*, INode*>::iterator iter = m_map_nodes.begin(); iter != m_map_nodes.end(); ++iter)
 50         {
 51             delete iter->first;
 52             delete iter->second;
 53         }
 54 
 55         m_map_nodes.clear();
 56     }
 57     virtual bool encode(string& content);
 58 
 59     map<StringNode*, INode*> m_map_nodes;
 60 };
 61 
 62 class ListNode : public INode 
 63 {
 64 public:
 65     virtual ~ListNode() 
 66     {
 67         for (vector<INode*>::iterator iter = m_nodes.begin(); iter != m_nodes.end(); ++iter)
 68             delete *iter;
 69 
 70         m_nodes.clear();
 71     }
 72     virtual bool encode(string& content);
 73 
 74     vector<INode*> m_nodes;
 75 };
 76 
 77 bool StringNode::encode(string& content)
 78 {
 79     if (content.size() < 3)
 80         return false;
 81 
 82     size_t pos = content.find(':', 0);
 83     if (pos == string::npos)
 84         return false;
 85 
 86     int count = 0;
 87     InterfaceString::to_number(content.substr(0, pos), count);
 88 
 89     m_value = content.substr(pos+1, count);
 90     content = content.erase(0, pos+1+count);
 91 
 92     return true;
 93 }
 94 
 95 bool IntNode::encode(string& content)
 96 {
 97     if (content[0] != 'i')
 98     {
 99         // bad int node
100         assert(false);
101         return false;
102     }
103 
104     size_t pos = content.find('e', 0);
105     if (pos == string::npos)
106         return false;
107 
108     string s_value = content.substr(1, pos-1);
109     InterfaceString::to_number(s_value, m_value);
110 
111     content = content.erase(0, s_value.size()+2);
112     return true;
113 }
114 
115 bool DictNode::encode(string& content)
116 {
117     if (content.empty())
118         return false;
119 
120     if (content[0] != 'd')
121     {
122         // bad dict node
123         assert(false);
124         return false;
125     }
126 
127     content = content.erase(0, 1);
128 
129     while (!content.empty())
130     {
131         StringNode* keyNode = new StringNode();
132         keyNode->decode(content);
133 
134         if (content.empty())
135             break;
136 
137         INode* valueNode = NULL;
138         if (content[0] == 'l')
139             valueNode = new ListNode();
140         else if (content[0] == 'd')
141             valueNode = new DictNode();
142         else if (content[0] == 'i')
143             valueNode = new IntNode();
144         else
145             valueNode = new StringNode();
146 
147         if (valueNode == NULL)
148             return false;
149 
150         valueNode->encode(content);
151         m_map_nodes[keyNode] = valueNode;
152 
153         if (content[0] == 'e')
154         {
155             content = content.erase(0, 1);
156             break;
157         }
158     }
159     return true;
160 }
161 
162 bool ListNode::encode(string& content)
163 {
164     if (content[0] != 'l')
165     {
166         // bad list node
167         assert(false);
168         return false;
169     }
170 
171     content = content.erase(0, 1);
172 
173     while (!content.empty())
174     {
175         INode* valueNode = NULL;
176         if (content[0] == 'l')
177             valueNode = new ListNode();
178         else if (content[0] == 'd')
179             valueNode = new DictNode();
180         else if (content[0] == 'i')
181             valueNode = new IntNode();
182         else
183             valueNode = new StringNode();
184 
185         if (valueNode == NULL)
186             return false;
187 
188         valueNode->encode(content);
189         m_nodes.push_back(valueNode);
190 
191         if (content[0] == 'e')
192         {
193             content = content.erase(0, 1);
194             break;
195         }    
196     }
197     return true;
198 }

注：上述代码用到了数字跟字符串之间的转换，可自行实现。上述只是分析torrent文件里的信息，如果想得到全部信息可以这样调用：

1 string content = "d8:ann..........e"; // content表示torrent文件的内容
2 DictNode* pDictNode = new DictNode();
3 pDictNode->encode(content);
4 那么所有的信息都可以在pDictNode结点里找到了。

如果你想要得到torrent文件相关字段的信息，则还需要对上述代码进行封装，下面给出我封装过的类。

  1 ////////////////////////////////////////////////
  2 
  3 // interfaceTorrentFile.h
  4 
  5 #pragma once
  6 
  7 // begin namespace core_common
  8 namespace core_common    
  9 {
 10 class TorrentFile
 11 {
 12 public:
 13     struct files_t 
 14     {
 15         string        file_path;
 16         uint64_t    file_size;
 17     };
 18 
 19     struct infos_t
 20     {
 21         uint64_t        piece_length;
 22         string            pieces;
 23 
 24         bool            is_dir;
 25         vector<files_t> files;
 26     };
 27 
 28     struct torrent_t 
 29     {
 30         string            announce;
 31         vector<string>    announce_list;
 32         string            comment;
 33         string            create_by;
 34         uint64_t        create_data;
 35         string            encoding;
 36         infos_t            infos;
 37     };
 38 
 39 public:
 40 
 41     ///将torrent文件的字符串转化为torrent结构
 42     static bool encode(const string& content, torrent_t& torrent);
 43 
 44     ///将torrent结构转化为torrent文件的字符串
 45     //static bool decode(const torrent_t& torrent, string& content);
 46 };
 47 
 48 };    // end namespace core_common
 49 
 50 /////////////////////////////////////////////////
 51 
 52 // interfaceTorrentFile.cpp
 53 
 54 #include "interfaceBencode.h"
 55 
 56 #include "InterfaceTorrent.h"
 57 
 58 using namespace core_common;
 59 
 60 
 61 INode* find_node(const map<StringNode*, INode*>& node_map, const string& key)
 62 {
 63     for (map<StringNode*, INode*>::const_iterator iter = node_map.begin(); iter != node_map.end(); ++iter)
 64     {
 65         StringNode* pKeyNode = dynamic_cast<StringNode*>(iter->first);
 66         assert(pKeyNode != NULL);
 67         if (pKeyNode == NULL)
 68             return NULL;
 69 
 70         if ( pKeyNode->m_value == key )
 71             return iter->second;
 72     }
 73 
 74     return NULL;
 75 }
 76 
 77 string get_node_value(StringNode* strNode)
 78 {
 79     return strNode == NULL ? "" : strNode->m_value;
 80 }
 81 
 82 uint64_t get_node_value(IntNode* intNode)
 83 {
 84     return intNode == NULL ? 0 : intNode->m_value;
 85 }
 86 
 87 bool TorrentFile::encode(const string& torrent_content, torrent_t& torrent)
 88 {    
 89     string content = torrent_content;
 90 
 91     DictNode* pDictNode = new DictNode();
 92     pDictNode->encode(content);
 93 
 94     torrent.create_by    = get_node_value( dynamic_cast<StringNode*>(find_node(pDictNode->m_map_nodes, "created by")) );    // 查找 created by
 95     torrent.create_data = get_node_value( dynamic_cast<IntNode*>(find_node(pDictNode->m_map_nodes, "creation date")) );    // 查找 creation date
 96     torrent.encoding    = get_node_value( dynamic_cast<StringNode*>(find_node(pDictNode->m_map_nodes, "encoding")) );    // 查找 encoding
 97     torrent.comment        = get_node_value( dynamic_cast<StringNode*>(find_node(pDictNode->m_map_nodes, "comment")) );    // 查找 comment
 98     torrent.announce    = get_node_value( dynamic_cast<StringNode*>(find_node(pDictNode->m_map_nodes, "announce")) );    // 查找 announce
 99 
100     // 查找 announce-list
101     {
102         INode* pNode = find_node(pDictNode->m_map_nodes, "announce-list");
103         if (pNode != NULL)
104         {
105             ListNode* pValueNode = dynamic_cast<ListNode*>(pNode);
106             assert(pValueNode != NULL);
107             if (pValueNode == NULL)
108                 return false;
109 
110             for (vector<INode*>::iterator iter_announce = pValueNode->m_nodes.begin(); iter_announce != pValueNode->m_nodes.end(); ++iter_announce)
111                 torrent.announce_list.push_back( get_node_value( dynamic_cast<StringNode*>(*iter_announce)) );
112         }
113     }
114 
115     // 查找 info
116     INode* pNode = find_node(pDictNode->m_map_nodes, "info");
117     if (pNode != NULL)
118     {
119         DictNode* pValueNode = dynamic_cast<DictNode*>(pNode);
120         assert(pValueNode != NULL);
121         
122         torrent.infos.piece_length    = get_node_value( dynamic_cast<IntNode*>(find_node(pValueNode->m_map_nodes, "piece length")) );    // 查找 piece length
123         torrent.infos.pieces            = get_node_value( dynamic_cast<StringNode*>(find_node(pValueNode->m_map_nodes, "pieces")) );        // 查找 piece
124 
125         // 查找 是否是目录
126         INode* pSubNode = find_node(pValueNode->m_map_nodes, "files");
127         if (pSubNode != NULL)
128         {
129             torrent.infos.is_dir = true;
130 
131             // 查找 目录名
132             string dir_name = get_node_value( dynamic_cast<StringNode*>(find_node(pValueNode->m_map_nodes, "name")) );
133 
134             // 查找 子文件
135             ListNode* pSubValueNode = dynamic_cast<ListNode*>(pSubNode);
136             assert(pSubValueNode != NULL);
137             if (pSubValueNode != NULL)
138             {
139                 for (vector<INode*>::iterator iter_file = pSubValueNode->m_nodes.begin(); iter_file != pSubValueNode->m_nodes.end(); ++iter_file)
140                 {
141                     DictNode* pDictNode = dynamic_cast<DictNode*>(*iter_file);
142                     assert(pDictNode != NULL);
143                     if (pDictNode != NULL)
144                     {
145                         files_t file;
146                         file.file_size = get_node_value( dynamic_cast<IntNode*>(find_node(pDictNode->m_map_nodes, "length")) );
147 
148                         ListNode* pListNode = dynamic_cast<ListNode*>(find_node(pDictNode->m_map_nodes, "path"));
149                         if (pListNode != NULL)
150                         {
151                             file.file_path = dir_name;
152                             for (vector<INode*>::iterator iter_path = pListNode->m_nodes.begin(); iter_path != pListNode->m_nodes.end(); ++iter_path)
153                             {
154                                 file.file_path += "//";
155                                 file.file_path += get_node_value( dynamic_cast<StringNode*>(*iter_path));
156                             }
157                         }
158 
159                         torrent.infos.files.push_back(file);
160                     }
161                 }
162             }
163         }
164         else
165         {
166             torrent.infos.is_dir = false;
167 
168             files_t file;
169             file.file_size = get_node_value( dynamic_cast<IntNode*>(find_node(pValueNode->m_map_nodes, "length")) );
170             file.file_path = get_node_value( dynamic_cast<StringNode*>(find_node(pValueNode->m_map_nodes, "name")) );
171 
172             torrent.infos.files.push_back(file);
173         }
174     }
175 
176     delete pDictNode;
177     return true;
178 }