R语言学习笔记
文章目录
probit回归
factor()和as.factor()
这两个没区别。
#MEPS DATA
Hexpend<-read.csv("HealthExpend.csv") #导入数据
# CHECK THE NAMES,DIMENSION IN THE FILE AND LIST THE FRIST
names(Hexpend)
dim(Hexpend)
Hexpend[1:8,]
attach(Hexpend)
n<-dim(Hexpend)[1]
POSEXP<-seq(0,0,length=n)
for(i in 1:n){
if(EXPENDIP[i]!=0)POSEXP[i]=1}
# ALTERNATIVE - FIT A GENERALIZED LINEAR MODEL;
PosExpglm = glm(POSEXP~GENDER,family=binomial(link=logit))
summary(PosExpglm)
logLik(PosExpglm)
summary(POSEXP)
# FULL LOGIT MODEL
PosExpglmFull=glm(POSEXP~AGE+GENDER
+factor(RACE)+factor(REGION)+factor(EDUC)+factor(PHSTAT)+factor(ANYLIMIT)+factor(INCOME)+factor(insure),
family=binomial(link=logit))
summary(PosExpglmFull)
logLik(PosExpglmFull)
Gender<-as.factor(GENDER)
PosExpglmFull=glm(POSEXP~AGE+C(Gender,base=1)
+as.factor(RACE)+as.factor(REGION)+as.factor(EDUC)+as.factor(PHSTAT)+as.factor(ANYLIMIT)+as.factor(INCOME)+as.factor(insure),
family=binomial(link=logit))
summary(PosExpglmFull)
relevel()
# CHANGE REFERANCE LEVELS TO AGREE WITH BOOK(DONE IN SAS)
RACE=relevel(factor(RACE),ref="WHITE")
REGION=relevel(factor(REGION),ref="WEST")
EDUC=relevel(factor(EDUC),ref="LHIGHSC")
PHSTAT=relevel(factor(PHSTAT),ref="EXCE")
INCOME=relevel(factor(INCOME),ref="POOR")
这里给的例子里没有factor,但不加会报错。
可以对比relevel前后的结果
案例11.4复刻 glm函数
Regression Modeling with Actuarial and Financial P315 案例11.4 Application: Medical Expenditures
HealthExpend<-read.csv("HealthExpend.csv") #导入数据
str(HealthExpend)
attach(HealthExpend)
n=2000 #共有2000个数据
sink("result.txt") #回归结果等储存在这里
整理变量
## 整理变量
#Ethnicity即RACE或RACE1
ASIAN<-seq(0,0,length=n) # 1 if Asian 4.3 4.7
BLACK<-seq(0,0,length=n) # 1 if Black 14.8 10.5
NATIVE<-seq(0,0,length=n) #1 if Native 1.1 13.6???这个的描述统计结果不符