oppo社区，学习笔记（三）

最新推荐文章于 2024-04-22 09:58:57 发布

SayLove丶

最新推荐文章于 2024-04-22 09:58:57 发布

阅读量472

点赞数

分类专栏： python3 爬虫

本文链接：https://blog.youkuaiyun.com/qq_34776122/article/details/79231822

版权

python3 同时被 2 个专栏收录

16 篇文章

订阅专栏

爬虫

9 篇文章

订阅专栏

from New_MyPython.Community_MyThread.Requse import HttpReq


class Topic:

    def __init__(self):
        self.request = HttpReq.Http()
        '''
            目的：获取某个已存在话题下的所有tid
                  返回当前话下的帖子数量
        '''
        pass

    def get_all_topic(self):
        '''
            获取所有话题
        :return:
        '''
        topic_all = []
        top_id_all = []
        i = 1

        while i:
            url = 'https://www.oppo.cn/topic/index/topic-list.json?page=%s&limit=10&type=dateline' %   i
  html = self.request.get(url)
            total = '\{"id":("\d\d\d",".*?)","'  # 131","name":"清晨的第一缕阳光)"'
            min_html = self.request.find_data(total, html)
            for n in min_html:
                name = n.split('"')[1]  # 话题名称
                top_id = n.split(':"')[1]  # 话题id
                if name in topic_all or top_id in topic_all:
                    i = False
                    break
                else:
                    topic_all.append(name)
                    top_id_all.append(top_id)

            if i:
                i += 1
        return topic_all, top_id_all

    def search_topic(self, user_input):
        '''
            根据用户搜索，匹配对应话题
        :return:
        '''
        if 0 < len(user_input) < 4:
            srarch_result = {}
            content = 1
            a, all_topic = self.get_all_topic()
            for i in all_topic:
                if user_input in i:
                    srarch_result[content] = i
                    content += 1
            if len(srarch_result) == 1:
                return srarch_result[1]
            elif len(srarch_result) == False:
                print('话题不存在！')
            else:
                return srarch_result

        else:
            print('请输入准确的关键词！')
            return None

    def order_zip(self, list1, list2):
        '''
            将两个序列按顺序压缩成字典
        :param list1:
        :param list2:
        :return:
        '''
        dict_name = {}
        content = 0
        for _list in list1:
            if len(list1) != len(list2):
                print('两个列表长度不一致，无法压缩')
                break
            dict_name[_list] = list2[content]
            content += 1
        return dict_name


# if __name__ == '__main__':
#     T = Topic()
#     #T.get_all_topic()
#     user_input = input('话题关键词：')
#     s = T.search_topic(user_input)
#     print(s)

class Search_Topic(Topic):
    # def __init__(self):
    #     super().__init__()
    def topic_tid_sum(self, data):
        '''
            根据话题，返回话题id
        :param data:
        :return:
        '''
        list1, list2 = self.get_all_topic()
        topics = self.order_zip(list2, list1)
        top = self.search_topic(data)
        if top != None:
            if type(top) == dict:
                print('搜索到', len(top), '个话题', top)
                while True:
                    nub = int(input('输入对应话题的序列值:\n'))
                    if nub > len(top):
                        print('请输入正确的序列值！')
                        return 1
                    else:
                        top = top[nub]
                        top_id = topics[top]
                        break

            else:
                top_id = topics[top]
            print('匹配话题：', top)
            return top_id
    def topic_tids(self, data):
        tids_list = []
        top_id = self.topic_tid_sum(data)
        coutent = 1
        while True:
            topic_url = 'https://www.oppo.cn/topic/index/thread.json?page=%s&limit=21&type=3&id=%s' % ( coutent, top_id)
            html = self.request.get(topic_url)
            total = '"id":"(.*?)",'
            tids = self.request.find_data(total, html)
            if tids != []:
                if tids[0] not in tids_list:
                    tids_list += tids
                    print('第', coutent, '页')
                    coutent += 1
                else:
                    break
            elif top_id == None:
                pass
            else:
                print('该话题下没有帖子或者已经到最后一页了')
                break
        return list(set(tids_list))


if __name__ == '__main__':
    T = Search_Topic()
    data = input('输入话题关键词：')
    c = T.topic_tids(data)
    print(c)