Python数据分析：人口密度计算与缺失值处理-优快云博客

本文链接：https://blog.youkuaiyun.com/zhangting_123abc/article/details/129126422

该博客通过Python和pandas库对人口数据进行分析，包括处理state全称中的空值，合并各州面积数据，填充并删除area列中的缺失值，提取2010年全民人口信息，计算各州人口密度，并最终排序找出人口密度最高的州。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

#coding=utf-8
from matplotlib import  pyplot as plt
import numpy as np
import pandas as pd
#导入文件
abb=pd.read_csv('./state-abbrevs.csv')
area=pd.read_csv('./state-areas.csv')
pop=pd.read_csv('./state-population.csv')

#将人口数据和各州简称数据进行合并
abb_pop=pd.merge(abb,pop,left_on='abbreviation',right_on='state/region',how='outer')
#print(abb_pop_merge.head())

#将合并的数据重复的一列进行删除
abb_pop=abb_pop.drop(labels='abbreviation',axis=1,inplace=False)
#print(c)
#查看存在缺失数据的列
#print(abb_pop.isnull().any(axis=1))
#在drop中，0是列，1是行，在其他中0是行，1是列
#print(abb_pop.info())
#去重操作
#思路：将state这一列中的空值对应的行数据取出，从该行数据中就可以取出简称的值
#将state的空值定位到
abb_pop['state'].isnull()
a=abb_pop.loc[abb_pop['state'].isnull()]#作为一个行索引
b=a['state/region']
#print(b)
c=b.unique()
print(c)

2：找出state全称里面的空值，进行赋值。

abb_pop['state'].isnull()
a=abb_pop.loc[abb_pop['state'].isnull()]#作为一个行索引
b=a['state/region']
#print(b)
c=b.unique()#代表空值有哪些：‘PR'和’USA'
#print(c)
#将USA的全称进行赋值
#（05：50）将USA对应的行索引求出来
date=abb_pop.loc[abb_pop['state/region']=='USA']#行数据求出来
#将行索引取出
indexes=abb_pop.loc[abb_pop['state/region']=='USA'].index
abb_pop.loc[indexes,'state']='United States'
#将PR里的空值赋值
date2=abb_pop.loc[abb_pop['state/region']=='PR']
indexes2=date2.index
abb_pop.loc[indexes2,'state']='pprrrr'

3：合并各州的面积数据

abb_pop_area=pd.merge(abb_pop,area,how='outer')
print(abb_pop_area.head())

4：area（sq.mi)这一列中有缺失数据

abb_pop_area=pd.merge(abb_pop,area,how='outer')
#print(abb_pop_area.info())
a=abb_pop_area['area (sq. mi)'].isnull()
area_is_null=abb_pop_area.loc[a]
indexes3=area_is_null.index
print(indexes3)

5：去除含有缺失数据的行

abb_pop_area.drop(labels=indexes3,inplace=True,axis=0)

6：找出2010年全民人口的数据

abb_pop_area=pd.merge(abb_pop,area,how='outer')
#print(abb_pop_area.info())
a=abb_pop_area['area (sq. mi)'].isnull()
area_is_null=abb_pop_area.loc[a]
indexes3=area_is_null.index
#print(indexes3)
abb_pop_area.drop(labels=indexes3,inplace=True,axis=0)
#print(abb_pop_area.head())
#abb_pop_area.query('age==''and year==2010')

#print(abb_pop_area.query('ages =="total" and year==2010.0'))

#print(abb_pop_area.query('ages ==‘total’and year==2010.0'))
print(abb_pop_area.query('ages =="total" and year==2010.0'))

7：计算各州的人口密度

abb_pop_area['midu']=abb_pop_area.population /abb_pop_area['area (sq. mi)']           
print(abb_pop_area)

8：排序，找出人口密度最高的一个州

abb_pop_area['midu']=abb_pop_area.population /abb_pop_area['area (sq. mi)']         
print(abb_pop_area.sort_values(by='midu',axis=0,ascending=False).iloc[0] [0] )

人口分析案例