Get countries from system

本文介绍了一个C#程序,用于生成SQL脚本,批量插入文化信息、货币和国家数据到数据库中。通过遍历所有特定文化及其相关地区信息,程序创建相应的数据库记录。
   1: using System;
   2: using System.Collections.Generic;
   3: using System.Linq;
   4: using System.Text;
   5: using System.Globalization;
   6: using System.IO;
   7:  
   8: namespace CultureInformation
   9: {
  10:     class Program
  11:     {
  12:         static void Main(string[] args)
  13:         {
  14:             StreamWriter sw = new StreamWriter(@"D:\PopulateTableData.Txt");
  15:             CultureInfo[] cultures = CultureInfo.GetCultures(CultureTypes.SpecificCultures);
  16:            
  17:             StringBuilder sb = new StringBuilder();
  18:  
  19:             sb.AppendLine(" declare @LocalizableId bigint, @CultureId bigint,@CurrencyId bigint ");
  20:  
  21:             //insert Culture
  22:             foreach (CultureInfo item in cultures)
  23:             {
  24:                 sb.AppendLine(string.Format("INSERT INTO [LocalizableText] ([CultureInvariantText]) VALUES('{0}') ", item.EnglishName.Replace("'", "''")));
  25:                 sb.AppendLine(" set @LocalizableId=IDENT_CURRENT('LocalizableText') ");
  26:                 sb.AppendLine(string.Format("INSERT INTO [Culture]([Code],[Name]) VALUES('{0}',@LocalizableId)", item.Name));
  27:             }
  28:  
  29:             //insert currency
  30:             List<string> currencyList = new List<string>();
  31:             foreach (CultureInfo item in cultures)
  32:             {
  33:                 RegionInfo r = new RegionInfo(item.LCID);
  34:  
  35:                 if (!currencyList.Contains(r.ISOCurrencySymbol))
  36:                 {
  37:                     currencyList.Add(r.ISOCurrencySymbol);
  38:                     sb.AppendLine(string.Format("INSERT INTO [LocalizableText] ([CultureInvariantText]) VALUES ('{0}')", r.CurrencyEnglishName.Replace("'", "''")));
  39:                     sb.AppendLine(" set @LocalizableId=IDENT_CURRENT('LocalizableText') ");
  40:                     sb.AppendLine(string.Format("INSERT INTO Currency ([IsoCode],[Name]) VALUES ('{0}',@LocalizableId)", r.ISOCurrencySymbol));
  41:                 }
  42:             }
  43:  
  44:             // insert country
  45:             List<string> cultureList = new List<string>();
  46:             foreach (CultureInfo item in cultures)
  47:             {
  48:                 RegionInfo r = new RegionInfo(item.LCID);
  49:                 if (!(cultureList.Contains(r.ThreeLetterISORegionName)))
  50:                 {
  51:                     cultureList.Add(r.ThreeLetterISORegionName);
  52:                     if (r.TwoLetterISORegionName.Length > 2) continue;
  53:                     sb.AppendLine(string.Format("INSERT INTO [LocalizableText] ([CultureInvariantText]) VALUES ('{0}')", r.EnglishName.Replace("'", "''")));
  54:                     sb.AppendLine(" set @LocalizableId=IDENT_CURRENT('LocalizableText') ");
  55:                     sb.AppendLine(string.Format(" select @CultureId=[CultureId] from [Culture] where [Code]='{0}'", item.Name));
  56:                     sb.AppendLine(string.Format("  select @CurrencyId=[CurrencyId] from [Currency] where [IsoCode]='{0}'", r.ISOCurrencySymbol));
  57:                     sb.AppendLine(string.Format("INSERT INTO Country([IsoCode3],[IsoCode2],[CurrencyId],[CultureId],[Name]) VALUES('{0}','{1}',@CurrencyId,@CultureId,@LocalizableId)",
  58:                                 r.ThreeLetterISORegionName, r.TwoLetterISORegionName));
  59:                 }
  60:  
  61:             }
  62:  
  63:  
  64:             sw.WriteLine(sb.ToString());
  65:  
  66:             sw.Flush();
  67:             sw.Close();
  68:             Console.ReadLine();
  69:         }
  70:  
  71:  
  72:         public static List<string> GetCountryList()
  73:         {
  74:  
  75:             //create a new Generic list to hold the country names returned
  76:             List<string> cultureList = new List<string>();
  77:             //create an array of CultureInfo to hold all the cultures found, these include the users local cluture, and all the
  78:  
  79:             //cultures installed with the .Net Framework
  80:             CultureInfo[] cultures = CultureInfo.GetCultures(CultureTypes.AllCultures & ~CultureTypes.NeutralCultures);
  81:  
  82:             //loop through all the cultures found
  83:             foreach (CultureInfo culture in cultures)
  84:             {
  85:                 //pass the current culture's Locale ID (http://msdn.microsoft.com/en-us/library/0h88fahh.aspx)
  86:                 //to the RegionInfo contructor to gain access to the information for that culture
  87:                 RegionInfo region = new RegionInfo(culture.LCID);
  88:                 //make sure out generic list doesnt already
  89:                 //contain this country
  90:                 if (!(cultureList.Contains(region.EnglishName)))
  91:  
  92:                     //not there so add the EnglishName (http://msdn.microsoft.com/en-us/library/system.globalization.regioninfo.englishname.aspx)
  93:  
  94:                     //value to our generic list
  95:                     cultureList.Add(region.EnglishName);
  96:             }
  97:             return cultureList;
  98:  
  99:         }
 100:     }
 101: }
import requests from bs4 import BeautifulSoup import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import re import matplotlib.font_manager as fm from requests.exceptions import RequestException, Timeout import time from collections import defaultdict import os from urllib.parse import urljoin import random # 创建存储海报的文件夹 if not os.path.exists('posters'): os.makedirs('posters') # 解决中文乱码问题 - 增强版字体设置(核心改进部分) def setup_chinese_fonts(): """设置支持中文的字体,兼容多系统,确保中文正常显示""" # 中文字体候选列表,按优先级排序,增加更多备选字体 chinese_fonts = [ "SimHei", # Windows 黑体 "Microsoft YaHei", # Windows 雅黑 "WenQuanYi Micro Hei", # Linux 系统 "Heiti TC", # macOS 黑体 "Arial Unicode MS", # 通用备选 "SimSun", # 宋体 "NSimSun", # 新宋体 "SimKai", # 楷体 "FangSong", # 仿宋 "KaiTi", # 楷体_GB2312 "STSong", # 华文宋体 "STHeiti", # 华文黑体 "STKaiti", # 华文楷体 ] # 获取系统所有可用字体 system_fonts = [f.lower() for f in fm.findSystemFonts()] # 查找可用的中文字体 available_font = None for font in chinese_fonts: for sys_font in system_fonts: if font.lower() in sys_font: # 找到匹配的字体文件 available_font = sys_font break if available_font: break if available_font: # 使用找到的字体 font_prop = fm.FontProperties(fname=available_font) font_name = font_prop.get_name() plt.rcParams["font.family"] = [font_name] print(f"已设置中文字体: {font_name}") return font_name # 返回找到的字体名称 else: # 备选方案:直接设置字体族 plt.rcParams["font.family"] = ["sans-serif", "SimHei", "WenQuanYi Micro Hei"] print("警告: 未检测到中文字体,使用备选方案,可能影响显示效果") return "sans-serif" # 返回默认字体族 # 初始化中文字体并保存当前字体 current_font = setup_chinese_fonts() sns.set_style("whitegrid") def download_poster(url, title, max_retries=2): """下载电影海报并保存到本地""" try: # 清理标题中的特殊字符,避免作为文件名出错 safe_title = re.sub(r'[\/:*?"<>|]', '', title) file_path = f"posters/{safe_title}.jpg" # 如果文件已存在,跳过下载 if os.path.exists(file_path): return file_path # 随机延迟避免被反爬 time.sleep(random.uniform(0.5, 1.5)) headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36", "Referer": "https://movie.douban.com/" } response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() with open(file_path, 'wb') as f: f.write(response.content) return file_path except Exception as e: print(f"下载海报失败 ({title}): {str(e)}") return None def crawl_douban_top250(timeout=10, retry=3, delay=2): """爬取豆瓣电影Top250数据,包含错误处理和重试机制""" movies = [] base_url = "https://movie.douban.com/top250?start={}&filter=" # 增加更多User-Agent选择,降低被反爬概率 user_agents = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Safari/605.1.15", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0" ] for start in range(0, 250, 25): url = base_url.format(start) current_retry = 0 while current_retry < retry: try: # 随机选择User-Agent headers = { "User-Agent": random.choice(user_agents), "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Referer": "https://movie.douban.com/", "Cookie": "your_cookie_here" # 建议替换为实际Cookie } time.sleep(delay + random.uniform(-0.5, 0.5)) # 随机化延迟时间 response = requests.get(url, headers=headers, timeout=timeout) response.raise_for_status() # 检查HTTP错误状态 soup = BeautifulSoup(response.text, "html.parser") movie_items = soup.select(".grid_view li") if not movie_items: print(f"警告:第{start//25 + 1}页未找到电影数据") break for item in movie_items: # 提取电影名称 title_tag = item.select_one(".title") title = title_tag.text.strip() if title_tag else "未知标题" # 提取评分 rating_tag = item.select_one(".rating_num") rating = float(rating_tag.text.strip()) if (rating_tag and rating_tag.text.strip().replace('.', '').isdigit()) else 0.0 # 提取详细信息(导演、主演、年份、国家、类型) bd_tag = item.select_one(".bd p") director = "未知导演" actors = "未知主演" year = "未知年份" country = "未知国家/地区" genre = "未知类型" if bd_tag: bd_lines = [line.strip() for line in bd_tag.text.strip().split("\n") if line.strip()] # 处理导演和主演信息 if len(bd_lines) >= 1: first_line = bd_lines[0] if "导演:" in first_line: dir_part, rest = first_line.split("导演:", 1) director = rest.split("主演:", 1)[0].strip() if "主演:" in rest else rest.strip() if "主演:" in rest: actors_part = rest.split("主演:", 1)[1] actors = actors_part.split("...")[0].strip() if actors_part.strip() else "未知主演" else: director = first_line[:30] # 防止过长文本 # 处理年份、国家和类型 if len(bd_lines) >= 2: second_line = bd_lines[1] year_match = re.search(r"\d{4}", second_line) if year_match: year = year_match.group() remaining = second_line[year_match.end():].strip() # 处理多种分隔符情况 separators = ["/", " ", "·"] for sep in separators: if sep in remaining: parts = [p.strip() for p in remaining.split(sep) if p.strip()] if len(parts) >= 1: country = parts[0] if len(parts) >= 2: genre = parts[1] break if genre == "未知类型" and remaining: genre = remaining.split()[0] if remaining.split() else remaining # 提取评价人数 rating_count_tag = item.select_one(".star span:last-child") rating_count = 0 if rating_count_tag: count_text = rating_count_tag.text.strip() count_match = re.search(r"(\d+,)*\d+", count_text) if count_match: rating_count = int(re.sub(",", "", count_match.group())) # 提取排名和简介 rank_tag = item.select_one(".pic em") rank = int(rank_tag.text.strip()) if (rank_tag and rank_tag.text.strip().isdigit()) else 0 quote_tag = item.select_one(".inq") quote = quote_tag.text.strip() if quote_tag else "无简介" # 提取海报URL poster_tag = item.select_one(".pic img") poster_url = poster_tag["src"] if poster_tag and "src" in poster_tag.attrs else None poster_path = None if poster_url: poster_path = download_poster(poster_url, title) # 提取电影详情页链接 link_tag = item.select_one(".pic a") detail_url = link_tag["href"] if link_tag and "href" in link_tag.attrs else "未知" # 只添加有效数据 if title != "未知标题" and rating > 0 and year != "未知年份": movies.append({ "电影名称": title, "评分": rating, "导演": director, "主演": actors, "上映年份": year, "国家/地区": country, "电影类型": genre, "评价人数": rating_count, "排名": rank, "简介": quote, "海报路径": poster_path, "详情链接": detail_url }) print(f"成功爬取第{start//25 + 1}页,累计{len(movies)}部电影") break except Timeout: current_retry += 1 print(f"请求超时({current_retry}/{retry}),重试中...") except RequestException as e: current_retry += 1 print(f"请求失败({current_retry}/{retry}):{str(e)},重试中...") except Exception as e: print(f"解析错误:{str(e)},跳过该电影") continue print(f"爬取完成,共获取{len(movies)}部有效电影数据") return movies def process_and_analyze_data(movies): """处理并分析电影数据,生成各种统计指标""" if not movies: raise ValueError("没有可分析的电影数据") # 创建DataFrame并清洗数据 df = pd.DataFrame(movies) # 去重 initial_count = len(df) df = df.drop_duplicates(subset=["电影名称"], keep="first") print(f"数据去重:原始{initial_count}条,去重后{len(df)}条") # 数据类型转换 df["上映年份"] = pd.to_numeric(df["上映年份"], errors="coerce") df["评价人数"] = pd.to_numeric(df["评价人数"], errors="coerce") df["排名"] = pd.to_numeric(df["排名"], errors="coerce") # 清理无效数据 df = df.dropna(subset=["上映年份", "评分", "电影名称"]) print(f"清理后剩余{len(df)}条有效记录") # 基本评分统计 rating_stats = { "平均评分": df["评分"].mean(), "最高评分": df["评分"].max(), "最低评分": df["评分"].min(), "评分中位数": df["评分"].median() } # 按年代分组分析 df["年代"] = (df["上映年份"] // 10 * 10).astype(int) decade_stats = df.groupby("年代")["评分"].agg(["mean", "count"]).sort_index() # 高分电影年份分析 high_rating_years = df[df["评分"] >= 9.0]["上映年份"].value_counts().sort_index() if high_rating_years.empty: high_rating_years = pd.Series({pd.Timestamp.now().year: 0}, name="上映年份") # 电影类型分析 genre_stats = defaultdict(lambda: {"count": 0, "total_rating": 0}) for _, row in df.iterrows(): genres = str(row["电影类型"]).split() for genre in genres: if genre and genre != "未知类型": genre_stats[genre]["count"] += 1 genre_stats[genre]["total_rating"] += row["评分"] # 计算类型平均评分 genre_count = {g: stats["count"] for g, stats in genre_stats.items()} genre_rating = {g: stats["total_rating"]/stats["count"] for g, stats in genre_stats.items() if stats["count"] > 0} # 国家/地区分析 country_details = defaultdict(int) for countries in df["国家/地区"]: for c in str(countries).split("/"): if c.strip(): country_details[c.strip()] += 1 country_details = dict(sorted(country_details.items(), key=lambda x: x[1], reverse=True)) # 导演分析 director_stats = defaultdict(int) for director in df["导演"]: if director and director != "未知导演": # 处理多位导演的情况 for d in director.split("/"): d = d.strip() if d: director_stats[d] += 1 top_directors = dict(sorted(director_stats.items(), key=lambda x: x[1], reverse=True)[:10]) # 评分与评价人数相关性 valid_corr_data = df[(df["评价人数"] > 0) & (df["评分"] > 0)] correlation = valid_corr_data["评分"].corr(valid_corr_data["评价人数"]) if len(valid_corr_data) >= 2 else 0.0 # TOP榜单 top10_rating = df.sort_values(by=["评分", "排名"], ascending=[False, True]).head(10) top10_rating_count = df.sort_values(by=["评价人数", "评分"], ascending=[False, False]).head(10) return ( df, rating_stats, decade_stats, high_rating_years, genre_count, genre_rating, country_details, top_directors, correlation, top10_rating, top10_rating_count ) def visualize_data(df, rating_stats, decade_stats, genre_count, genre_rating, country_details, top_directors, top10_rating, high_rating_years, correlation): """可视化分析结果,确保所有中文正常显示""" # 使用全局设置的字体 global current_font plot_font = {'fontproperties': fm.FontProperties(family=current_font)} # 1. 评分分布直方图 plt.figure(figsize=(10, 6)) sns.histplot(df["评分"], bins=15, kde=True, color="#3498db") plt.axvline(rating_stats["平均评分"], color='r', linestyle='dashed', linewidth=2, label=f'平均分: {rating_stats["平均评分"]:.2f}') plt.title("豆瓣Top250电影评分分布", fontsize=14, pad=20, **plot_font) plt.xlabel("评分", fontsize=12,** plot_font) plt.ylabel("电影数量", fontsize=12, **plot_font) plt.legend(prop={'family': current_font}) plt.tight_layout() plt.savefig("rating_distribution.png", dpi=300) plt.close() print("评分分布图已保存为 rating_distribution.png") # 2. 各年代电影评分趋势 plt.figure(figsize=(12, 7)) sns.lineplot(x=decade_stats.index, y="mean", data=decade_stats, marker='o', color="#e74c3c") plt.title("各年代豆瓣Top250电影平均评分趋势", fontsize=14, pad=20,** plot_font) plt.xlabel("年代", fontsize=12, **plot_font) plt.ylabel("平均评分", fontsize=12,** plot_font) plt.ylim(8.0, 9.5) for x, y in zip(decade_stats.index, decade_stats["count"]): plt.text(x, 8.05, f"n={y}", ha="center", fontsize=9, **plot_font) plt.grid(alpha=0.3) plt.tight_layout() plt.savefig("decade_rating_trend.png", dpi=300) plt.close() print("年代评分趋势图已保存为 decade_rating_trend.png") # 3. 电影类型数量分布 plt.figure(figsize=(12, 8)) top_genres = dict(sorted(genre_count.items(), key=lambda x: x[1], reverse=True)[:15]) sns.barplot(x=list(top_genres.values()), y=list(top_genres.keys()), palette="viridis") plt.title("电影类型数量分布(前15)", fontsize=14, pad=20,** plot_font) plt.xlabel("电影数量", fontsize=12, **plot_font) plt.ylabel("类型", fontsize=12,** plot_font) for i, v in enumerate(top_genres.values()): plt.text(v + 0.5, i, str(v), va="center", **plot_font) plt.tight_layout() plt.savefig("genre_count.png", dpi=300) plt.close() print("类型数量分布图已保存为 genre_count.png") # 4. 电影类型平均评分 plt.figure(figsize=(12, 8)) valid_genres = {g: r for g, r in genre_rating.items() if genre_count.get(g, 0) >= 5} valid_genres = dict(sorted(valid_genres.items(), key=lambda x: x[1], reverse=True)) sns.barplot(x=list(valid_genres.values()), y=list(valid_genres.keys()), palette="plasma") plt.title("电影类型平均评分(样本数≥5)", fontsize=14, pad=20,** plot_font) plt.xlabel("平均评分", fontsize=12, **plot_font) plt.ylabel("类型", fontsize=12,** plot_font) plt.xlim(8.0, 9.5) for i, v in enumerate(valid_genres.values()): plt.text(v + 0.02, i, f"{v:.2f}", va="center", **plot_font) plt.tight_layout() plt.savefig("genre_rating.png", dpi=300) plt.close() print("类型评分图已保存为 genre_rating.png") # 5. 国家/地区分布 plt.figure(figsize=(12, 8)) top_countries = dict(list(country_details.items())[:10]) sns.barplot(x=list(top_countries.values()), y=list(top_countries.keys()), palette="magma") plt.title("电影国家/地区分布(前10)", fontsize=14, pad=20,** plot_font) plt.xlabel("电影数量", fontsize=12, **plot_font) plt.ylabel("国家/地区", fontsize=12,** plot_font) for i, v in enumerate(top_countries.values()): plt.text(v + 0.5, i, str(v), va="center", **plot_font) plt.tight_layout() plt.savefig("country_distribution.png", dpi=300) plt.close() print("国家/地区分布图已保存为 country_distribution.png") # 6. 评分TOP10电影 plt.figure(figsize=(14, 7)) top10_sorted = top10_rating.sort_values("评分", ascending=True) sns.barplot(x="评分", y="电影名称", data=top10_sorted, palette="rocket") plt.title("评分TOP10电影", fontsize=14, pad=20,** plot_font) plt.xlabel("评分", fontsize=12, **plot_font) plt.ylabel("电影名称", fontsize=12,** plot_font) plt.xlim(8.9, 9.7) for i, v in enumerate(top10_sorted["评分"]): plt.text(v + 0.01, i, f"{v:.1f}", va="center", **plot_font) plt.tight_layout() plt.savefig("top10_rating.png", dpi=300) plt.close() print("评分TOP10图已保存为 top10_rating.png") # 7. 高分电影年份分布 plt.figure(figsize=(14, 7)) valid_years = high_rating_years[high_rating_years.index >= 1930] if valid_years.empty: valid_years = pd.Series({2000: 0}, name="上映年份") sns.barplot(x=valid_years.index.astype(str), y=valid_years.values, palette="cubehelix") plt.title("高分电影(≥9.0)年份分布", fontsize=14, pad=20, **plot_font) plt.xlabel("年份", fontsize=12,** plot_font) plt.ylabel("电影数量", fontsize=12, **plot_font) plt.xticks(rotation=45, ha="right", fontproperties=fm.FontProperties(family=current_font)) for i, v in enumerate(valid_years.values): if v > 0: plt.text(i, v + 0.1, str(v), ha="center",** plot_font) plt.tight_layout() plt.savefig("high_rating_years.png", dpi=300) plt.close() print("高分电影年份图已保存为 high_rating_years.png") # 8. 评分与评价人数相关性 plt.figure(figsize=(12, 8)) valid_data = df[(df["评价人数"] > 0) & (df["评分"] > 0)] sns.scatterplot(x="评价人数", y="评分", data=valid_data, alpha=0.6, s=50) sns.regplot(x="评价人数", y="评分", data=valid_data, scatter=False, color="red") plt.title(f"评分与评价人数相关性(r={correlation:.3f})", fontsize=14, pad=20, **plot_font) plt.xlabel("评价人数", fontsize=12,** plot_font) plt.ylabel("评分", fontsize=12, **plot_font) plt.xscale("log") # 使用对数刻度更清晰展示分布 plt.grid(alpha=0.3) plt.tight_layout() plt.savefig("rating_vs_count.png", dpi=300) plt.close() print("评分与评价人数相关性图已保存为 rating_vs_count.png") # 9. 热门导演分析 if top_directors: plt.figure(figsize=(12, 7)) sns.barplot(x=list(top_directors.values()), y=list(top_directors.keys()), palette="mako") plt.title("上榜次数最多的导演(前10)", fontsize=14, pad=20,** plot_font) plt.xlabel("上榜电影数量", fontsize=12, **plot_font) plt.ylabel("导演", fontsize=12,** plot_font) for i, v in enumerate(top_directors.values()): plt.text(v + 0.1, i, str(v), va="center", **plot_font) plt.tight_layout() plt.savefig("top_directors.png", dpi=300) plt.close() print("热门导演分析图已保存为 top_directors.png") def main(): """主函数:协调爬取、分析和可视化过程""" print("=" * 60) print(" 豆瓣电影Top250数据爬取与分析工具 ") print("=" * 60) try: # 1. 爬取数据 print("\n【1/4】开始爬取豆瓣Top250数据...") movies = crawl_douban_top250(timeout=15, retry=3) if len(movies) < 100: print("\n警告:爬取数据不足100部,可能受反爬限制") if input("是否继续分析?(y/n):").strip().lower() != "y": print("程序终止") return # 2. 处理与分析数据 print("\n【2/4】开始处理与分析数据...") analysis_results = process_and_analyze_data(movies) ( df, rating_stats, decade_stats, high_rating_years, genre_count, genre_rating, country_details, top_directors, correlation, top10_rating, top10_rating_count ) = analysis_results # 3. 保存数据 df.to_csv("douban_top250.csv", index=False, encoding="utf-8-sig") print(f"\n【3/4】数据已保存至 douban_top250.csv({len(df)}条记录)") # 4. 可视化 print("\n【4/4】生成可视化图表...") visualize_data(df, rating_stats, decade_stats, genre_count, genre_rating, country_details, top_directors, top10_rating, high_rating_years, correlation) # 输出核心结果 print("\n" + "=" * 60) print(" 核心分析结果 ") print("=" * 60) print("1. 评分概况:") for key, value in rating_stats.items(): print(f" - {key}:{value:.2f}") print(f"\n2. 高分电影(≥9.0)最多的年份:") if high_rating_years.max() > 0: top_year = high_rating_years.idxmax() print(f" - {top_year}年({high_rating_years.max()}部)") print(f"\n3. 主要类型分析:") if genre_count: top_genre = max(genre_count.items(), key=lambda x: x[1]) top_rating_genre = max(genre_rating.items(), key=lambda x: x[1]) print(f" - 数量最多:{top_genre[0]}({top_genre[1]}部)") print(f" - 评分最高:{top_rating_genre[0]}({top_rating_genre[1]:.2f}分)") print(f"\n4. 国家/地区分析:") if country_details: top_country = next(iter(country_details.items())) print(f" - 上榜最多:{top_country[0]}({top_country[1]}部)") print(f"\n5. 导演分析:") if top_directors: top_dir = next(iter(top_directors.items())) print(f" - 上榜最多:{top_dir[0]}({top_dir[1]}部)") print(f"\n6. 相关性:评分与评价人数相关系数 {correlation:.3f}") print(f"\n7. 评分前三电影:") for i, (_, row) in enumerate(top10_rating.head(3).iterrows(), 1): print(f" {i}. {row['电影名称']}({row['上映年份']}年,{row['评分']:.1f}分)") print(f"\n8. 最受欢迎前三电影(评价人数):") for i, (_, row) in enumerate(top10_rating_count.head(3).iterrows(), 1): print(f" {i}. {row['电影名称']}({row['评价人数']:,}人评价)") print("=" * 60) print("\n分析完成!所有结果已保存到当前目录") print(f"电影海报已保存到 {os.path.abspath('posters')} 文件夹") except Exception as e: print(f"\n程序出错:{str(e)}") print("建议检查网络连接或更新Cookie后重试") if __name__ == "__main__": main() 修改上述代码电影名称乱码问题并生成新的完整的代码
09-12
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值