Matplotlib模块是Python中用来画图的模块
制作折线图
利用plt.plot(x轴,y轴)结构创建折线图
import matplotlib.pyplot as plt
import pandas as pd
data = pd.read_csv("UNRATE.csv")
data["DATE"] = pd.to_datetime(data["DATE"])#利用pd.to_datetime()函数,可以将非标准型时间数据转换成标准型时间数据
print(data.head(3))
# plt.plot() #利用plot()函数画图
# plt.show() #show()函数展示图像
first_twive = data[0:12]
plt.plot(first_twive["DATE"], first_twive["VALUE"])#指定x轴和y轴内容,可以利用linewidth参数设定折线宽度
plt.xticks(rotation = 45)#将x轴的label旋转45度
plt.xlabel("Month")#指定label
plt.ylabel("Unemployment Rate")
plt.title("Monthly Unemployment Rate")
plt.show()
画两条线在同一图中
import matplotlib.pyplot as plt
import pandas as pd
data = pd.read_csv("UNRATE.csv")
fig = plt.figure(figsize=(6, 3))#figure()函数用来指定图像对象,figsize参数指定图的长与宽
plt.plot(data["DATE"][0:12], data["VALUE"][0:12], c="red")
plt.plot(data["DATE"][12:24], data["VALUE"][12:24], c="blue")#利用两个plt.plot()函数可以在同一张图上画出两条线,c参数控制颜色
plt.xticks(rotation=45)
fig.show()
加入lengend
import matplotlib.pyplot as plt
import pandas as pd
data = pd.read_csv("UNRATE.csv")
data["DATE"] = pd.to_datetime(data["DATE"])
data["Month"] = data["DATE"].dt.month#利用dt.month函数将标准时间中的月份提取出来
colors = ["red", "blue", "green", "orange", "black"]
for i in range(5):
start_index = i * 12
end_index = (i + 1) * 12
subset = data[start_index:end_index]
label = str(1948 + i)
plt.plot(subset["Month"], subset["VALUE"], c=colors[i], label=label)#在曲线函数中指定label后还需要对整个图做操作才能显示legend
plt.legend(loc="best")#loc参数控制legend位置,best的意思是让plt选择最佳位置
plt.show()
制作子图
data = pd.read_csv("UNRATE.csv")
fig = plt.figure()#figure()函数用来指定画图域,可以指定参数来确定图长与图宽
ax1 = fig.add_subplot(2, 2, 1)#制作两行两列图中的第一张图
ax2 = fig.add_subplot(2, 2, 2)#制作两行两列图中的第二张图
ax3 = fig.add_subplot(2, 2, 4)#制作两行两列图中的第四张图
ax1.plot(data["DATE"][0:12], data["VALUE"][0:12])
ax2.plot(data["DATE"][12:24], data["VALUE"][12:24])
ax3.plot(data["DATE"][24:36], data["VALUE"][24:36])
plt.show()#因为没有制作第三张图,所以没有左下角的图
import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure(figsize=(3, 6))#利用figsize函数指定了图长和图宽
ax1 = fig.add_subplot(2, 1, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax1.plot(np.random.randint(1, 5, 5), range(1, 6))
ax2.plot(np.arange(10) * 3, np.arange(10))
plt.show()
制作条形图
import pandas as pd
import matplotlib.pyplot as plt
from numpy import arange
data = pd.read_csv("fandango_scores.csv")
cols = ["FILM", "RT_user_norm", "Metacritic_user_nom", "IMDB_norm", "Fandango_Ratingvalue", "Fandango_Stars"]
data_norm = data[cols]
num_cols = ["RT_user_norm", "Metacritic_user_nom", "IMDB_norm", "Fandango_Ratingvalue", "Fandango_Stars"]
bar_height = data_norm.loc[0, num_cols].values
bar_positions = arange(5) + 0.75#表示距远点距离
tick_positions = [0.5, 1.5, 2.5, 3.5, 4.5]
fig, ax = plt.subplots()#fig一般用来控制图像对象子,ax用来具体生成子图
ax.bar(bar_positions, bar_height, 0.3))#bar()函数两个参数,第一个参数为柱状图位置参数,第二个参数为柱状图高度,第三个参数为柱宽
ax.set_xticks(tick_positions)
ax.set_xticklabels(num_cols, rotation=45)
ax.set_xlabel("Rating Source")
ax.set_ylabel("Average Rating")
ax.set_title("Rating")
plt.show()
横着制作柱状图
import pandas as pd
import matplotlib.pyplot as plt
from numpy import arange
data = pd.read_csv("fandango_scores.csv")
cols = ["FILM", "RT_user_norm", "Metacritic_user_nom", "IMDB_norm", "Fandango_Ratingvalue", "Fandango_Stars"]
data_norm = data[cols]
num_cols = ["RT_user_norm", "Metacritic_user_nom", "IMDB_norm", "Fandango_Ratingvalue", "Fandango_Stars"]
bar_height = data_norm.loc[0, num_cols].values
bar_positions = arange(5) + 0.75
tick_positions = [0.5, 1.5, 2.5, 3.5, 4.5]
fig, ax = plt.subplots()
ax.barh(bar_positions, bar_height, 0.5)#barh()函数可以横着生成柱状图
ax.set_yticks(tick_positions)
ax.set_yticklabels(num_cols)
ax.set_ylabel("Rating Source")
ax.set_xlabel("Average Rating")
ax.set_title("Rating")
plt.show()
制作散点图
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv("fandango_scores.csv")
cols = ["FILM", "RT_user_norm", "Metacritic_user_nom", "IMDB_norm", "Fandango_Ratingvalue", "Fandango_Stars"]
data_norm = data[cols]
fig, ax = plt.subplots()
ax.scatter(data_norm["Fandango_Ratingvalue"], data_norm["RT_user_norm"])#利用scatter()函数,第一个参数为x轴内容,第二个参数为y轴内容
ax.set_xlabel("Fandango")
ax.set_ylabel("Rotten Tomatoes")
plt.show()
增加子图,复杂版
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv("fandango_scores.csv")
cols = ["FILM", "RT_user_norm", "Metacritic_user_nom", "IMDB_norm", "Fandango_Ratingvalue", "Fandango_Stars"]
data_norm = data[cols]
fig = plt.figure(figsize=(5, 10))
ax1 = fig.add_subplot(2, 1, 1)
ax2 = fig.add_subplot(2, 1, 2)
ax1.scatter(data_norm["Fandango_Ratingvalue"], data_norm["RT_user_norm"])
ax1.set_xlabel("Fandango")
ax1.set_ylabel("Rotten Tomatoes")
ax2.scatter(data_norm["RT_user_norm"], data_norm["Fandango_Ratingvalue"])
ax2.set_xlabel("Rotten Tomatoer")
ax2.set_ylabel("Fandango")
plt.show()
制作分布图
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv("fandango_scores.csv")
cols = ["FILM", "RT_user_norm", "Metacritic_user_nom", "IMDB_norm", "Fandango_Ratingvalue", "Fandango_Stars"]
data_norm = data[cols]
fandango_distribution = data_norm["Fandango_Ratingvalue"].value_counts()#value_counts()函数可以用来统计每个值有多少个样本
fandango_distribution = fandango_distribution.sort_index()#在sort_index()函数后数据会按照大小排列
print(fandango_distribution.head(3))
fig, ax = plt.subplots()
ax.hist(data_norm["Fandango_Ratingvalue"], bins=30)#利用hist()函数可以画出柱状图,bins参数用来指定间隔,可以使用range=(4,5)参数表示只显示4到5之间的数据分布
plt.show()
加子图复杂版
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv("fandango_scores.csv")
cols = ["FILM", "RT_user_norm", "Metacritic_user_nom", "IMDB_norm", "Fandango_Ratingvalue", "Fandango_Stars"]
data_norm = data[cols]
fandango_distribution = data_norm["Fandango_Ratingvalue"].value_counts()
fandango_distribution = fandango_distribution.sort_index()
print(fandango_distribution.head(3))
fig = plt.figure(figsize=(5, 20))
ax1 = fig.add_subplot(4, 1, 1)
ax2 = fig.add_subplot(4, 1, 2)
ax3 = fig.add_subplot(4, 1, 3)
ax4 = fig.add_subplot(4, 1, 4)
ax1.hist(data_norm["Fandango_Ratingvalue"], bins=30)
ax1.set_title = ("Distribution of Fandango Ratings")
ax1.set_xlabel("value")
ax1.set_ylabel("Distuibution")
ax1.set_ylim(0,50)#利用set_ylim()函数可以设定y轴的范围
ax2.hist(data_norm["RT_user_norm"], bins=40)
ax2.set_title = ("Distribution of RT_user_norm")
ax2.set_xlabel("value")
ax2.set_ylabel("Distuibution")
ax2.set_ylim(0,60)
ax3.hist(data_norm["Metacritic_user_nom"], bins=50)
ax3.set_title = ("Distribution of Metacritic_user_nom")
ax3.set_xlabel("value")
ax3.set_ylabel("Distuibution")
ax3.set_ylim(0,70)
ax4.hist(data_norm["IMDB_norm"], bins=60)
ax4.set_title = ("Distribution of IMDB_norm")
ax4.set_xlabel("value")
ax4.set_ylabel("Distuibution")
ax4.set_ylim(0,40)
plt.show()
制作象形图
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv("fandango_scores.csv")
cols = ["FILM", "RT_user_norm", "Metacritic_user_nom", "IMDB_norm", "Fandango_Ratingvalue", "Fandango_Stars"]
data_norm = data[cols]
fig, ax = plt.subplots()
ax.boxplot(data_norm["RT_user_norm"])
ax.set_xticklabels(["Rotten Tomatoes"])
ax.set_ylim(0, 5)
plt.show()
将多个箱型图放在一张图上
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv("fandango_scores.csv")
cols = ["FILM", "RT_user_norm", "Metacritic_user_nom", "IMDB_norm", "Fandango_Ratingvalue", "Fandango_Stars"]
data_norm = data[cols]
num_cols = ["RT_user_norm", "Metacritic_user_nom", "IMDB_norm", "Fandango_Ratingvalue"]
fig, ax = plt.subplots()
ax.boxplot(data_norm[num_cols].values)#注意这里传入的需要是一个列表名,且取value
ax.set_xticklabels(["RT_user_norm", "Metacritic_user_nom", "IMDB_norm", "Fandango_Ratingvalue"])
ax.set_ylim(0, 5)
plt.show()
注意 fig,ax = plt.subplots() 意思是:建立一个fig对象,建立一个axis对象
等同于
fig=plt.figure()
ax=fig.add_subplot(111)