Pub crawl

本文介绍了一道关于寻找都柏林城市中酒吧最优遍历路径的算法问题。目标是在不重复访问同一酒吧的前提下,尽可能多地访问不同的酒吧,并且在酒吧间移动时始终向左转。文章提供了详细的解题思路及代码实现。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

Al just arrived in Dublin. He is going to spend his cash on the famous Dublin activity - the pub crawl. The goal is to drink a pint of Guinness in as many different pubs in the city as possible, not visiting any pub twice. There are n pubs in Dublin. Al gets drunk very fast, so he sees pubs as points on a plane, and his path from a pub to the next one as a straight line connecting these points, though the actual path may involve going along different streets, around buildings or even walking in circles trying to find the next pub. Al doesn’t care about those details. The only thing he cares about is that every turn he makes is a left turn, he enjoys going left. This means for every three consecutive pubs in his route the third one must lie in the left half plane with respect to the directed line from the first pub to the second. It is known that builders in Dublin also enjoy pub crawl, so they never managed to build at least three pubs on a straight line. Help Al to find out how many pubs he can visit and plan the route for him.  
Input  
The first line contains an integer n (1 ≤ n ≤ 5000) – the number of pubs. Each of the next n lines contains two integers x and y (−109 ≤ x, y ≤ 109) – pub coordinates. Different pubs are located in different points.  
Output  
The first line should contain an integer m – the maximum number of pubs Al can visit. The next line should contain m indices of pubs in the order Al should visit them. Pubs are numbered from 1 to n in the same order as they appear in the input.   

 Sample input

0 4 3 0 7 11 9 1 13 8 

Sample output 

5 1 4 3 2

题意:给出n个点的坐标,题目保证不会出现三点共线,然后随便选择一个点,每次只往左走,即,对于路线上的每三个点,第三个点必须在前两个点的连线的左边。

题解:看到题意,想了一下,感觉不难,也很容易感觉的出和凸包很像,虽然我并不会凸包,但是以为很容易。虽然比赛期间有队友打码很快过了,不过后来补题的时候wa了一遍又一遍,才发现代码实现起来不容易,况且以前都没学过凸包,只是单单知道凸包的定义罢了。最后看了一下模板,理解一下,debug几次ac后,发现凸包的模板很简单,也很容易理解,就不多解释了。

其实本题就是一个凸包的等定义代换,凸包,其实也可以理解为每次只能往左边走,最后围起来的就是凸包。

所以这道题,一开始随意选个起点,找一个凸包,然后保留路线的最终点,去掉凸包上的其他点,(用一个vis数组标记就可以了),然后继续找凸包,直到点全部找完就可以了。

代码:

#include <iostream>
#include <algorithm>
#include <cstdio>
#include <stack>
#include <cstring>
#include <vector>
#include <bitset>
#include <string>
#include <cmath>
#include <set>
#include <map>
#include <queue>
using namespace std;
typedef long long ll;
typedef pair<int,int> P;
const int maxn = 1e6+7;
const int INF = 0x3f3f3f3f;
int n;
int vis[5005];
vector<int> ans;
struct X{
    ll x,y,id;
    X(ll x = 0,ll y = 0,ll id = 0):x(x),y(y),id(id) {}
    X operator - (const X &a){
        return X(x-a.x,y-a.y);
    }
    ll det(const X &a){
        return x*a.y-y*a.x;
    }
};
X s[5005];

bool cmp(X a,X b){
    if(a.x==b.x) return a.y < b.y;
        else return a.x < b.x;
}

vector<X> work(X *p,int n){
    int cap = 0;
    vector<X> tub(n*2);
    for(int i = 1;i <= n;i++){
        if(vis[p[i].id]) continue;
        while(cap>1&&(tub[cap-1]-tub[cap-2]).det(p[i]-tub[cap-1])<=0) cap--;
        tub[cap++] = p[i];
    }
    for(int i = n-1,t = cap;i >= 1;i--){
        if(vis[p[i].id]) continue;
        while(cap>t&&(tub[cap-1]-tub[cap-2]).det(p[i]-tub[cap-1])<=0) cap--;
        tub[cap++] = p[i];
    }
    tub.resize(cap-1);
    return tub;
}


void solve(){
    sort(s+1,s+n+1,cmp);
    memset(vis,0,sizeof(vis));
    vector<X> preans;
    int si = s[1].id;
    int now = n;
    while(now>1){
        preans = work(s,n);
        now-=preans.size()-1;
        for(int i = 0;i < preans.size();i++){
            if(preans[i].id == si){
                int j = i;
                for(;j < i+preans.size()-1;j++){
                    vis[preans[j%preans.size()].id] = 1;
                    printf("%d ",preans[j%preans.size()].id);
                }
                si = preans[j%(preans.size())].id;
                break;
            }
        }
    }
    printf("%d\n",si);
}

int main(){
    while(scanf("%d",&n)!=EOF){
        for(int i = 1;i <= n;i++){
            int x,y;
            scanf("%d %d",&x,&y);
            s[i].x = x;
            s[i].y = y;
            s[i].id = i;
        }
        printf("%d\n",n);
        solve();
    }
    return 0;
}


import requests from bs4 import BeautifulSoup import re import csv import sqlite3 import tkinter as tk from tkinter import ttk, messagebox, filedialog import matplotlib.pyplot as plt from sklearn.cluster import KMeans import pandas as pd import numpy as np import time import random import os class DoubanBookCrawler: def __init__(self, master): self.master = master master.title("豆瓣读书Top250爬取工具") # 创建GUI组件 self.create_widgets() # 初始化数据库 self.init_database() def create_widgets(self): # 控制面板 control_frame = ttk.LabelFrame(self.master, text="控制面板") control_frame.pack(padx=10, pady=5, fill="x") # 爬取按钮 self.crawl_btn = ttk.Button(control_frame, text="开始爬取", command=self.start_crawling) self.crawl_btn.pack(side=tk.LEFT, padx=5, pady=5) # 导出选项 export_frame = ttk.Frame(control_frame) export_frame.pack(side=tk.RIGHT, padx=5) ttk.Label(export_frame, text="导出格式:").pack(side=tk.LEFT) self.export_var = tk.StringVar(value="csv") ttk.Radiobutton(export_frame, text="CSV", variable=self.export_var, value="csv").pack(side=tk.LEFT) ttk.Radiobutton(export_frame, text="Excel", variable=self.export_var, value="excel").pack(side=tk.LEFT) ttk.Radiobutton(export_frame, text="数据库", variable=self.export_var, value="db").pack(side=tk.LEFT) # 数据分析选项 analysis_frame = ttk.LabelFrame(self.master, text="数据分析") analysis_frame.pack(padx=10, pady=5, fill="x") ttk.Button(analysis_frame, text="评分分布图", command=self.plot_rating_distribution).pack(side=tk.LEFT, padx=5) ttk.Button(analysis_frame, text="出版年份分析", command=self.plot_publish_years).pack(side=tk.LEFT, padx=5) ttk.Button(analysis_frame, text="评分聚类分析", command=self.rating_cluster_analysis).pack(side=tk.LEFT, padx=5) # 日志输出 log_frame = ttk.LabelFrame(self.master, text="日志") log_frame.pack(padx=10, pady=5, fill="both", expand=True) self.log_text = tk.Text(log_frame, height=10) self.log_text.pack(padx=5, pady=5, fill="both", expand=True) scrollbar = ttk.Scrollbar(log_frame, command=self.log_text.yview) scrollbar.pack(side=tk.RIGHT, fill="y") self.log_text.config(yscrollcommand=scrollbar.set) def init_database(self): """初始化SQLite数据库""" try: self.conn = sqlite3.connect('douban_books.db') self.cursor = self.conn.cursor() self.cursor.execute(''' CREATE TABLE IF NOT EXISTS books ( id INTEGER PRIMARY KEY, title TEXT NOT NULL, author TEXT, publisher TEXT, publish_year INTEGER, price REAL, rating REAL, rating_count INTEGER, link TEXT ) ''') self.conn.commit() self.log("数据库初始化成功") except Exception as e: self.log(f"数据库初始化失败: {str(e)}", error=True) def log(self, message, error=False): """记录日志到GUI""" tag = "ERROR" if error else "INFO" self.log_text.insert(tk.END, f"[{tag}] {message}\n") self.log_text.see(tk.END) if error: messagebox.showerror("错误", message) def start_crawling(self): """启动爬虫任务""" try: self.books_data = [] for page in range(0, 250, 25): url = f"https://book.douban.com/top250?start={page}" self.log(f"正在爬取页面: {url}") self.crawl_page(url) time.sleep(random.uniform(1, 3)) # 随机延时防止被封 self.log("爬取完成,开始保存数据...") self.save_data() self.log("数据处理完成") except Exception as e: self.log(f"爬取过程中发生错误: {str(e)}", error=True) def crawl_page(self, url): """爬取单个页面""" try: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept-Language': 'zh-CN,zh;q=0.9' } response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() # 检查HTTP状态码 if response.status_code == 403: raise Exception("访问被拒绝,可能触发了反爬机制") soup = BeautifulSoup(response.text, 'html.parser') book_items = soup.select('tr.item') if not book_items: raise Exception("未找到书籍信息,页面结构可能已变更") for item in book_items: book_info = self.parse_book_item(item) self.books_data.append(book_info) self.log(f"已爬取: {book_info['title']}") except requests.exceptions.RequestException as e: raise Exception(f"网络请求失败: {str(e)}") except Exception as e: raise Exception(f"页面解析失败: {str(e)}") def parse_book_item(self, item): """解析单本书籍信息""" try: title_tag = item.select_one('div.pl2 a') title = title_tag['title'] if title_tag else title_tag.text.strip() link = title_tag['href'] if title_tag else '' pub_info = item.select_one('p.pl').text.split('/') author = pub_info[0].strip() if len(pub_info) > 0 else '' publisher = pub_info[-3].strip() if len(pub_info) >= 3 else '' # 提取出版年份 publish_year = None year_match = re.search(r'(\d{4})', pub_info[-2] if len(pub_info) >= 2 else '') if year_match: publish_year = int(year_match.group(1)) # 提取价格 price = None price_match = re.search(r'(\d+\.\d+)', pub_info[-1] if pub_info else '') if price_match: price = float(price_match.group(1)) rating_tag = item.select_one('span.rating_nums') rating = float(rating_tag.text) if rating_tag else 0.0 rating_count_tag = item.select_one('span.pl') rating_count = 0 if rating_count_tag: count_match = re.search(r'(\d+)', rating_count_tag.text) if count_match: rating_count = int(count_match.group(1)) return { 'title': title, 'author': author, 'publisher': publisher, 'publish_year': publish_year, 'price': price, 'rating': rating, 'rating_count': rating_count, 'link': link } except Exception as e: self.log(f"解析书籍信息出错: {str(e)}", error=True) return {} def save_data(self): """保存数据到不同格式""" if not self.books_data: self.log("没有数据可保存", error=True) return export_format = self.export_var.get() if export_format == "csv": self.save_to_csv() elif export_format == "excel": self.save_to_excel() elif export_format == "db": self.save_to_database() def save_to_csv(self): """保存到CSV文件""" try: filename = filedialog.asksaveasfilename( defaultextension=".csv", filetypes=[("CSV文件", "*.csv")] ) if not filename: return with open(filename, 'w', newline='', encoding='utf-8-sig') as f: writer = csv.DictWriter(f, fieldnames=self.books_data[0].keys()) writer.writeheader() writer.writerows(self.books_data) self.log(f"数据已保存到CSV文件: {filename}") except Exception as e: self.log(f"保存CSV失败: {str(e)}", error=True) def save_to_excel(self): """保存到Excel文件""" try: filename = filedialog.asksaveasfilename( defaultextension=".xlsx", filetypes=[("Excel文件", "*.xlsx")] ) if not filename: return df = pd.DataFrame(self.books_data) df.to_excel(filename, index=False) self.log(f"数据已保存到Excel文件: {filename}") except Exception as e: self.log(f"保存Excel失败: {str(e)}", error=True) def save_to_database(self): """保存到SQLite数据库""" try: for book in self.books_data: self.cursor.execute(''' INSERT OR REPLACE INTO books (title, author, publisher, publish_year, price, rating, rating_count, link) VALUES (?, ?, ?, ?, ?, ?, ?, ?) ''', ( book['title'], book['author'], book['publisher'], book['publish_year'], book['price'], book['rating'], book['rating_count'], book['link'] )) self.conn.commit() self.log(f"成功保存{len(self.books_data)}条数据到数据库") except Exception as e: self.log(f"数据库保存失败: {str(e)}", error=True) def plot_rating_distribution(self): """使用matplotlib绘制评分分布图""" try: ratings = [book['rating'] for book in self.books_data if book.get('rating')] if not ratings: raise Exception("没有可用的评分数据") plt.figure(figsize=(10, 6)) plt.hist(ratings, bins=20, color='skyblue', edgecolor='black') plt.title('豆瓣读书Top250评分分布') plt.xlabel('评分') plt.ylabel('书籍数量') plt.grid(axis='y', alpha=0.75) plt.show() self.log("评分分布图已生成") except Exception as e: self.log(f"生成评分分布图失败: {str(e)}", error=True) def plot_publish_years(self): """绘制出版年份分布图""" try: years = [book['publish_year'] for book in self.books_data if book.get('publish_year')] if not years: raise Exception("没有可用的出版年份数据") year_counts = {} for year in years: if year in year_counts: year_counts[year] += 1 else: year_counts[year] = 1 # 按年份排序 sorted_years = sorted(year_counts.items()) years = [item[0] for item in sorted_years] counts = [item[1] for item in sorted_years] plt.figure(figsize=(12, 6)) plt.bar(years, counts, color='lightgreen') plt.title('豆瓣读书Top250出版年份分布') plt.xlabel('出版年份') plt.ylabel('书籍数量') plt.xticks(rotation=45) plt.grid(axis='y', alpha=0.75) plt.tight_layout() plt.show() self.log("出版年份分布图已生成") except Exception as e: self.log(f"生成出版年份分布图失败: {str(e)}", error=True) def rating_cluster_analysis(self): """使用KMeans进行评分聚类分析""" try: # 准备数据 data = [] for book in self.books_data: if book.get('rating') is not None and book.get('rating_count') is not None: data.append([book['rating'], book['rating_count']]) if len(data) < 3: raise Exception("有效数据不足,无法进行聚类分析") X = np.array(data) # 使用KMeans聚类 kmeans = KMeans(n_clusters=3, random_state=42) kmeans.fit(X) labels = kmeans.labels_ # 可视化聚类结果 plt.figure(figsize=(10, 6)) scatter = plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', s=50, alpha=0.8) plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], c='red', s=200, marker='X', label='聚类中心') plt.title('豆瓣读书Top250评分聚类分析') plt.xlabel('评分') plt.ylabel('评价人数') plt.legend() plt.colorbar(scatter, label='聚类分组') plt.grid(True, alpha=0.3) plt.tight_layout() plt.show() self.log("评分聚类分析已完成") except Exception as e: self.log(f"聚类分析失败: {str(e)}", error=True) if __name__ == "__main__": root = tk.Tk() app = DoubanBookCrawler(root) root.mainloop()对代码添加获取图书图片的功能并保存到指定文件夹,且可以指定爬取前那几个的信息
06-15
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值