442. Find All Duplicates in an Array 难度:medium

本文介绍了一种在不使用额外空间的情况下找出数组中所有出现两次的元素的方法,并提供了一个高效的C++实现示例。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

题目:

Given an array of integers, 1 ≤ a[i] ≤ n (n = size of array), some elements appear twice and others appear once.

Find all the elements that appear twice in this array.

Could you do it without extra space and in O(n) runtime?

Example:

Input:
[4,3,2,7,8,2,3,1]

Output:
[2,3]

思路:

把数组元素当做下一个位置坐标,标记一下,如果再遇到就加入到返回数组中。


程序:

class Solution {
public:
    vector<int> findDuplicates(vector<int>& nums) {
        vector<int> res;
        if(nums.size() <= 1)
            return res;
        
        for(int i = 0;i < nums.size();i++)
        {
            int next = abs(nums[i]) - 1;
            if(nums[next] < 0)
                res.push_back(next + 1);
            else
                nums[next] = -nums[next];
        }
        return res;
    }
};


# https://search.dangdang.com/?key=%B1%E0%B3%CC&act=input&page_index=1 # 导入requests import requests import time from bs4 import BeautifulSoup import mysql.connector import csv # 定义容器 用来存储所有数据 allContainer = [] for i in range(1, 36): # 判断当前是否为第一次循环 if i == 1: url = "https://search.dangdang.com/?key=%B1%E0%B3%CC&act=input&page_index=1" else: url = f"https://search.dangdang.com/?key=%B1%E0%B3%CC&act=input&page_index={i}" print(f"当前已完成第{i}次") # 循环休眠 防止检测 time.sleep(1) # 发起请求 # 请求头 header = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "Accept-Encoding": "gzip, deflate, br, zstd", "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6", "Cache-Control": "max-age=0", "Connection": "keep-alive", "Cookie": "ddscreen=2; __permanent_id=20250609184530979156760224679480468; __visit_id=20250609184530993130404124438448889; __out_refer=1749465931%7C!%7Cwww.baidu.com%7C!%7C; dest_area=country_id%3D9000%26province_id%3D111%26city_id%3D0%26district_id%3D0%26town_id%3D0; __rpm=s_112100.155956512835%2C155956512836..1749466159510%7Cs_112100.155956512835%2C155956512836..1749466166450; search_passback=1e0bf85a587c99ab37bc4668fc0100003945670025bc4668; __trace_id=20250609184927332100480187110221325", "Host": "search.dangdang.com", "Referer": "https://search.dangdang.com/?key=%B1%E0%B3%CC&act=input&page_index=2", "Sec-Fetch-Dest": "document", "Sec-Fetch-Mode": "navigate", "Sec-Fetch-Site": "same-origin", "Sec-Fetch-User": "?1", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36 Edg/137.0.0.0", } response = requests.get(url, headers=header) # 设置响应的编码格式 # response.encoding = 'utf-8' # 自动识别编码方式(关键!) response.encoding = response.apparent_encoding # 将响应先保存至本地,然后先测试对本地页面进行数据解析 然后再进行多次爬取 # with open('../data/当当网.html', 'w', encoding='utf-8') as f: # f.write(response.text) htmlTree = BeautifulSoup(response.text, 'html.parser') allulEle = htmlTree.find_all('ul', class_="bigimg") for ul in allulEle: # 根据每一个ul标签中的li 进行指定数据的获取 allw1 = ul.find_all('li', recursive=False) # 获取w1下的p标签 for li_tag in allw1: rowContainer = [] # 提取书名 title_tag = li_tag.find_all('p', class_='name') if title_tag: a_tag = li_tag.find_all('a') if a_tag: title = a_tag[0].get('title') href = a_tag[0].get('href') link = f"https:{href}" rowContainer.append(title) rowContainer.append(link) else: title = "" href = "" else: title = "" href = "" pre_price = li_tag.find_all('span', class_='search_pre_price') for p in pre_price: PrePrice = p.get_text(strip=True) rowContainer.append(PrePrice) # 提取评论数 comment_count = li_tag.find('a', class_='search_comment_num') if comment_count: CommentCount = comment_count.get_text(strip=True) else: CommentCount = '0条评论' rowContainer.append(CommentCount) # 提取作者、出版时间、出版社 author_info = li_tag.find('p', class_='search_book_author') for p in author_info: AuthorInfo = p.get_text(strip=True).replace('\\\\', '').replace('/', '') if not AuthorInfo: AuthorInfo = '' rowContainer.append(AuthorInfo) allContainer.append(rowContainer) for i in allContainer: print(i) # 导入数据库模块 import mysql.connector # 使用当前库中的内置对象来创建数据库连接 mydb = mysql.connector.connect( host='localhost', # 当前mysql运行服务的地址 port=3306, # mysql服务的端口号 user='root', # mysql用户名 password='root', # 密码 database='dangdang' ) # 创建游标对象 mycursor = mydb.cursor() # discount VARCHAR ( 20 ), -- 折扣 # 创建图书信息表 create_table_sql = """ CREATE TABLE IF NOT EXISTS books ( id INT AUTO_INCREMENT PRIMARY KEY, title VARCHAR ( 255 ) NOT NULL, -- 书名 link VARCHAR ( 512 ), -- 链接 now_price VARCHAR ( 20 ), -- 现价 comment_count VARCHAR ( 50 ), -- 评论数 author VARCHAR ( 100 ), -- 作者 publish_date VARCHAR ( 20 ), -- 出版时间 publisher VARCHAR ( 100 ), -- 出版社 action VARCHAR ( 100 ), unidentified VARCHAR ( 20 ) ) """ # 执行建表语句 mycursor.execute(create_table_sql) # 插入语句 insert_sql = """ INSERT INTO books (title, link, now_price, comment_count, author, publish_date, publisher, action, unidentified) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) """ for book in allContainer: if len(book) == 8: book.insert(4, '') mycursor.execute(insert_sql, list(book)) # 提交事务 mydb.commit() print("✅ 数据插入完成,共插入", len(allContainer), "条记录") # 关闭连接 mycursor.close() mydb.close() import pandas as pd # 转成 DataFrame df = pd.DataFrame(allContainer, columns=["书名", "链接", "现价", "评论数", "作者", "出版时间", "出版社", "可选状态", "未知"]) # 插入序号列(从 1 开始) df.insert(0, '序号', range(1, len(df) + 1)) # 保存为 Excel 文件 df.to_excel("../data/当当网.xlsx", index=False) print("✅ 数据已成功保存为 Excel 文件!") import jieba import wordcloud # 读取excel 文件到当前代码中 import openpyxl from wordcloud import WordCloud # 获取当前excel 表格对象 wb = openpyxl.load_workbook('../data/当当网.xlsx') # 获取当前表格中的sheet sheet = wb.worksheets[0] # 遍历当前的execl 对象 # min row = 2 代表的是从当前表格中的第二行开始获取 # min col = 3 代表获取第三列 # max col = 3 最大的列是三,之确保我们获取当前第三列 # 定义一个列表用来存储当前获取的所有的数据 data = [] for row in sheet.iter_rows(min_row=2, min_col=2, max_col=5): data.append(row[0].value) # 获取每个单元格中的value值 # print(data) # 对当前的数组中的元素进行分割词组 seg_list = jieba.cut(''.join(data), cut_all=False) # print(type(seg_list)) # # print('/'.join(seg_list)) # 引入当前的字体 作为词云图的渲染字体 fonts = '../data/AlibabaPuHuiTi-2-65-Medium.ttf' wc = WordCloud( # 通过属性对当前的词云图进行赋值 width=1200, # 宽600px height=600, background_color='white', max_font_size=50, min_font_size=10, font_path=fonts ) # 将分隔完成的数据 加载到当前的wc对象中 wc.generate_from_text(''.join(seg_list)) # 保存当前的结果到指定文件夹中 wc.to_file("../data/词云图.png") import numpy as np import pandas as pd # 这些设置有助于调试时查看完整的 DataFrame 数据,适合开发阶段使用 pd.set_option('display.max_columns', None) pd.set_option('display.max_rows', None) pd.set_option('display.width', None) pd.set_option('display.max_colwidth', None) newXml = pd.read_excel("../data/当当网.xlsx") print(newXml.shape) # 查看行数列数 print(newXml.info()) # 查看各列的数据类型及缺失值 # # 检查重复行 duplicates = newXml.duplicated(keep='first') print("重复行数量:", duplicates.sum()) # # 删除重复行 cleaned_data = newXml.drop_duplicates() print("删除重复后数据形状:", cleaned_data.shape) # # 删除含有空值的行 dropna_data = newXml.dropna() print("删除空值后数据形状:", dropna_data.shape) # 或者填充空值 # filled_data = newXml.fillna({"未知": "默认值"}) df = newXml.drop(columns=['未知']) print(df) # filled_data = newXml.fillna({"CPU信息": "未知", "等级": 0}) df.to_excel("../data/new当当.xlsx", index=False) import pandas as pd import numpy as np import pandas as pd from pyecharts import options as opts from pyecharts.charts import Bar, Line, Scatter, Pie, Radar from pyecharts.globals import ThemeType # 读取文件 excel_file = pd.ExcelFile('../data/new当当.xlsx') # 获取指定工作表中的数据 df = excel_file.parse('Sheet1') # 将出版社列转换为字符串类型 df['出版社'] = df['出版社'].astype(str) # 获取出版社和书名列的数据 publishers = df['出版社'].to_numpy() book_names = df['书名'].to_numpy() # 获取唯一的出版社 unique_publishers = np.unique(publishers) # 统计每个出版社的书籍数量 book_counts = np.array([np.sum(publishers == publisher) for publisher in unique_publishers]) # 构建结果 DataFrame result_df = pd.DataFrame({ '出版社': unique_publishers, '书籍数量': book_counts }) print(result_df) # 读取数据 df = pd.read_excel('../data/new当当.xlsx') # 数据预处理 # 转换现价列,提取数字 df['现价'] = df['现价'].str.extract('(\d+\.?\d*)').astype(float) # 转换评论数列,提取数字 df['评论数'] = df['评论数'].str.extract('(\d+)').astype(int) # 转换出版时间列,提取年份 df['出版年份'] = pd.to_datetime(df['出版时间']).dt.year # 图表1:价格分布直方图 hist, bins = pd.cut(df['现价'], bins=20, retbins=True) hist_value = hist.value_counts().sort_index() # 使用 Bar 来模拟直方图 histogram = ( Bar(init_opts=opts.InitOpts(theme=ThemeType.LIGHT, width="800px", height="400px")) .add_xaxis([f"{bins[i]:.2f}-{bins[i + 1]:.2f}" for i in range(len(bins) - 1)]) .add_yaxis("书籍数量", hist_value.tolist(), category_gap=0) .set_global_opts( title_opts=opts.TitleOpts(title="价格分布柱状图"), xaxis_opts=opts.AxisOpts(name="价格区间"), yaxis_opts=opts.AxisOpts(name="数量"), ) ) # 图表2:不同出版社出版书籍数量柱状图 publisher_counts = df['出版社'].value_counts() bar_publisher = ( Bar(init_opts=opts.InitOpts(theme=ThemeType.LIGHT, width="800px", height="400px")) .add_xaxis(publisher_counts.index.tolist()) .add_yaxis("出版书籍数量", publisher_counts.tolist()) .set_global_opts( title_opts=opts.TitleOpts(title="不同出版社出版书籍数量柱状图"), xaxis_opts=opts.AxisOpts(name="出版社", axislabel_opts={"rotate": 90}), yaxis_opts=opts.AxisOpts(name="出版书籍数量"), ) ) # 图表3:每年出版书籍数量折线图 yearly_counts = df['出版年份'].value_counts().sort_index() line_yearly = ( Line(init_opts=opts.InitOpts(theme=ThemeType.LIGHT, width="800px", height="400px")) .add_xaxis(yearly_counts.index.astype(str).tolist()) .add_yaxis("出版书籍数量", yearly_counts.tolist(), is_smooth=True, symbol="circle") .set_global_opts( title_opts=opts.TitleOpts(title="每年出版书籍数量折线图"), xaxis_opts=opts.AxisOpts(name="出版年份"), yaxis_opts=opts.AxisOpts(name="出版书籍数量"), ) ) # 图表4:评论数前五书籍的书名与评论数柱状图 top_5_commented = df.nlargest(5, '评论数') bar_comment = ( Bar(init_opts=opts.InitOpts(theme=ThemeType.LIGHT, width="800px", height="400px")) .add_xaxis(top_5_commented['书名'].tolist()) .add_yaxis("评论数", top_5_commented['评论数'].tolist()) .set_global_opts( title_opts=opts.TitleOpts(title="评论数前五书籍的书名与评论数柱状图"), xaxis_opts=opts.AxisOpts(name="书名", axislabel_opts={"rotate": 90}), yaxis_opts=opts.AxisOpts(name="评论数"), ) ) # 图表5:价格与评论数的散点图 # 将现价列转换为字符串类型 df['现价'] = df['现价'].astype(str) # 提取价格数值 df['价格'] = df['现价'].str.extract(r'(\d+\.?\d*)').astype(float) # 检查价格列是否存在缺失值 print(f"价格列缺失值数量: {df['价格'].isna().sum()}") # 删除价格列为缺失值的行 df = df.dropna(subset=['价格']) # 定义价格区间 bins = [0, 50, 100, 150, 200, float('inf')] labels = ['0 - 50', '51 - 100', '101 - 150', '151 - 200', '200以上'] # 划分价格区间并统计数量 df['价格区间'] = pd.cut(df['价格'], bins=bins, labels=labels) price_range_counts = df['价格区间'].value_counts().reset_index(name='数量') # 使用 pyecharts 绘制饼状图 pie = ( Pie() .add( series_name="数量", data_pair=[list(z) for z in zip(price_range_counts['价格区间'], price_range_counts['数量'])], radius=["40%", "75%"], ) .set_global_opts( title_opts=opts.TitleOpts(title="价格区间与数量的饼状图"), legend_opts=opts.LegendOpts(orient="vertical", pos_top="15%", pos_left="2%"), ) .set_series_opts( label_opts=opts.LabelOpts(formatter="{b}: {d}%") ) ) # 将评论数列转换为字符串类型 df['评论数'] = df['评论数'].astype(str) # 提取评论数数值 df['评论数数值'] = df['评论数'].str.extract(r'(\d+\.?\d*)').astype(float) # 找出评论数前五的书籍 top_5_books = df.nlargest(5, '评论数数值', keep='all')[['书名', '评论数数值']] # 定义雷达图的指标 c_schema = [{"name": book_name, "max": top_5_books['评论数数值'].max()} for book_name in top_5_books['书名']] # 准备雷达图的数据 data = [[count for count in top_5_books['评论数数值'].values]] # 创建雷达图对象 ( Radar() .add_schema(schema=c_schema) .add( series_name="评论数", data=data, areastyle_opts=opts.AreaStyleOpts(opacity=0.2) ) .set_global_opts( title_opts=opts.TitleOpts(title="评论数前五的书籍的书名与评论数雷达图"), ) .render("../data/radar_chart_top5_books.html") ) # 统计不同出版社的书籍数量 publisher_book_count = df['出版社'].value_counts().reset_index() publisher_book_count.columns = ['出版社', '书籍数量'] # 选取书籍数量前 10 的出版社 top_10_publisher = publisher_book_count.nlargest(10, '书籍数量') # 创建散点图对象 scatter = ( Scatter() .add_xaxis(top_10_publisher['出版社'].tolist()) .add_yaxis( series_name="书籍数量", y_axis=top_10_publisher['书籍数量'].tolist(), symbol_size=10, label_opts=opts.LabelOpts(is_show=False) ) .set_global_opts( title_opts=opts.TitleOpts(title="不同出版社书籍数量前10的散点图"), xaxis_opts=opts.AxisOpts( name="出版社", type_="category", axislabel_opts=opts.LabelOpts(rotate=45, interval="auto") ), yaxis_opts=opts.AxisOpts(name="书籍数量"), ) ) # 保存图表 histogram.render("../data/price_distribution_histogram.html") bar_publisher.render("../data/publisher_book_count_bar.html") line_yearly.render("../data/yearly_book_count_line.html") bar_comment.render("../data/top_commented_books_bar.html") pie.render("../data/price_range_pie_chart.html") scatter.render("../data/scatter_top10_publisher_book_count.html") from flask import Flask, request, render_template_string, jsonify import requests # import requests # # # 定义一个message的变量,作为会话的容器 # messages = [{"role":"system","content":""}] # # # API KEY # API_KEY = "sk-ec2e933afb424766ba6bce9765960a3a" # # 设置请求头 # header = { # "Content-Type": "application/json", # 告知服务器我们传递的内容的数据类型 # "Authorization": f"Bearer {API_KEY}" # api_key # } # # # 请求地址 # url = "https://api.deepseek.com/chat/completions" # # # 因为要完成多轮对话 所以要有循环 # # 同时因为要完成用户的多次对话请求 # # def DeepSeekChat(userMessage): # # 1. 将用户输入的信息与角色进行拼接 从而变成一个完成的对话 # messages.append( {"role": "user", "content": userMessage}) # # # 2. 请求deepseek 请求过程中将我们携带的多个参数进行传递 # data = { # "model":"deepseek-chat", # "messages":messages, # "stream":False # } # # # 3. 发起请求 # response = requests.post(url, json=data, headers=header) # # # 4. 对response进行处理 # if response.status_code == 200: # # 获取响应内容 # result_json = response.json() # # 处理当前json中的内容 # airesult = result_json['choices'][0]['message']['content'] # # AI返回结果 # print(f"图书商城AI客服:{airesult}") # # 如何实现多轮对话 # # 将回复的内容继续追加到列表中,形成会话的闭合,结合上下文内容 # messages.append({"role": "assistant", "content": airesult}) # else: # print(response.text) # print("请求失败") # # print("图书商城欢迎您") # print("使用exit退出程序") # # while True: # userinput = input("你:") # if userinput == "exit": # print("退出当前程序") # break # else: # # 调用函数完成多次请求的发送 # # 将用户输入的内容作为参数调用函数 完成API的调用 # DeepSeekChat(userinput) 这是我写的代码,需要把爬虫得到的数据与deepseek相结合,使得可以在一个新页面上根据数据与ai对话,请你进行修改
最新发布
06-14
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值