1.涉及内容
2.代码
mian.py
# -*- coding: utf-8 -*-
import pandas as pd
from tools import get_n_face, get_color_mean #自己写的
import matplotlib.pyplot as plt
# 数据集路径
dataset_path = './dataset/movie_metadata.csv'
# 是否第一次运行
is_first_run = False
def run_main():
"""
主函数
"""
if is_first_run:
# 第一次运行程序
data_df = pd.read_csv(dataset_path, nrows=100)
# 记录海报中人脸个数
print('海报人脸检测:')
data_df['n_face'] = data_df['movie_imdb_link'].apply(get_n_face)
# 保存结果
data_df.to_csv('./imdb_face.csv', index=False, encoding='utf-8')
# 记录海报的平均像素值(这里代表图像的主要颜色,可通过颜色的表达方式替代)
print('海报像素均值计算:')
data_df['color_mean'] = data_df['movie_imdb_link'].apply(get_color_mean)
# 保存结果
data_df.to_csv('./imdb_face_color.csv', index=False, encoding='utf-8')
data_df = pd.read_csv('./imdb_face_color.csv')
# 分析结果
# 人脸个数和评分的关系
# 去除无效人脸
data_df = data_df[data_df['n_face'] != -1]
face_score = data_df['imdb_score'].groupby(data_df['n_face']).mean()
face_score.name = 'Score'
face_score.index.name = 'Number of Face'
face_score.plot(kind='bar')
plt.show()
# 像素均值和评分的关系
color_score = data_df['imdb_score'].groupby(data_df['color_mean']).mean()
color_score.name = 'Score'
color_score.index.name = 'Mean of color'
color_score.plot(kind='bar')
plt.show()
if __name__ == '__main__':
run_main()
tools.py
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
import urllib.request
import cognitive_face as CF
from skimage import io
import numpy as np
def get_img_link(movie_link):
"""
通过电影的链接爬取海报的链接
"""
movie_html = urllib.request.urlopen(movie_link)
movie_html_obj = BeautifulSoup(movie_html, 'html.parser', from_encoding='utf-8')
# 获取海报小图的链接
small_poster_img_link = movie_html_obj.find('div', class_='poster').find('img')['src']
# 获取海报大图的链接
big_poster_img_link = small_poster_img_link[:small_poster_img_link.find('._V1_') + 4] + '.jpg'
return big_poster_img_link
def get_n_face(movie_link):
"""
通过图像链接获取包含的人脸个数
"""
print('正在处理链接:', movie_link)
img_link = get_img_link(movie_link)
Key = 'xxxxxxxxxx ' # 这里请替换成自己申请的key
CF.Key.set(Key)
n_face = -1
try:
face_list = CF.face.detect(img_link)
n_face = len(face_list)
print('人脸个数:', n_face)
except CF.util.CognitiveFaceException:
print('无效图片')
return n_face
def round_to_int(x, base=10):
"""
将数字转换到最近的整数
"""
return int(base * round(float(x)/base))
def get_color_mean(movie_link):
"""
通过图像链接获取其平均像素值
"""
print('正在处理链接:', movie_link)
img_link = get_img_link(movie_link)
image = io.imread(img_link)
int_mean_color = round_to_int(np.mean(image))
print('像素均值:', int_mean_color)
return int_mean_color