File:  [Local Repository] / imach / src / simulation.R
Revision 1.3: download - view: text, annotated - select for diffs
Thu Jul 20 08:40:31 2023 UTC (15 months, 2 weeks ago) by brouard
Branches: MAIN
CVS tags: HEAD
*** empty log message ***

# $Id: simulation.R,v 1.3 2023/07/20 08:40:31 brouard Exp $
#  $State: Exp $
#  
# Simulating data for IMaCh tests
# Feinuo Sun and Nicolas Brouard (INED, July 2023)
# 
install.packages("dplyr")
if(!require("tidyverse")){
    install.packages("tidyverse",repos="http://cran.r-project.org")
#    install.packages("tidyverse")
    require("tidyverse")
  }
# LIBRARIES
library(haven)
#samplesize<- 10000
samplesize<- 50000

# Which Life table?
# Simulating a gompertz law of mortality: slope of line mu_m (about 9% increase per year)
# and a_m, modal age at death. From whose we can compute the life expectancy e_65.

install.packages("expint",repos="http://cran.r-project.org")
library(expint)
am <- 85
mum <- 9/100
expint(exp(mum*(65-am)))
am <- 84.8506
exp(exp(mum*(65-am)))/mum*expint(exp(mum*(65-am)))
# e(65) = 18.10904 years
e65 <- exp(exp(mum*(65-am)))/mum*expint(exp(mum*(65-am)))

e65 <- exp(exp(mum*(65-am)))/mum*expint(exp(mum*(65-am)))
e65
                                        # Life table from 0
l <- function(a,am,mum){
    exp(-exp(mum*(a-am)))/exp(-exp(-mum*am))
}
l65 <- function(a,am,mum){
    exp(-exp(mum*(a-am)))/exp(-exp(mum*(65-am)))
}
l65(65,am,mum)
#l65<-l(65,am,mum) # 0.84
l65(100,am,mum)

#curve(l65(x,am,mum), from = 65, to = 100)
# Add a line:
#curve(1 - l65(x,am,mum), add = TRUE, col = "red")

# inverse function from l(a) find a.
linv <- function(la,am,mum){
    am+log(exp(-mum*am) -log(la))/mum
}
linv(0.001,am,mum)
linv(1,am,mum)

l65inv <- function(la,am,mum){
    am+log(exp(mum*(65-am)) -log(la))/mum
}
l65inv(0.001,am,mum)
l65inv(1,am,mum)

am
mum
l65bisinv <- function(la){
    84.8506+log(exp(0.09*(65-84.8506)) -log(la))/0.09
}
l65bisinv(0.0001)
l65bisinv(1)

set.seed(128)
zeroone<-runif(samplesize,min=0.00000001,max=1)
zeroone<-runif(samplesize,min=0.001,max=1)
lifelength <- lapply(zeroone,l65bisinv)
#slife<- sort(unlist(lifelength),decreasing=TRUE)
#?sort
#head(slife)
#mean(unlist(lifelength)-65) #17.99482
#head(lifelength,10)
#class(lifelength)

#lifelength <- rnorm(n=samplesize, mean=85, sd=16)

set.seed(124)
# First interview 4th semester
monthinterview <- runif(samplesize, min=10,max=13)
st_98 <- rbinom(n=samplesize, size=1, prob=0.5)+1
# state 2
st_00 <- rbinom(n=samplesize, size=1, prob=0.2)+1
# date of birth (simulating population in 1998 age 65+), uniformly?
popage65110in1998<- runif(samplesize, min=65, max=110)
#gender
ragender <- rbinom(n=samplesize, size=1, prob=0.56)

r98iwmid <- 1998 + (monthinterview-1)/12
yrinterview1 <- floor(r98iwmid)
#monthinw1 <- floor((monthinterview - floor(monthinterview))*12)+1
monthinw1 <- floor(monthinterview)


int_98 <- paste0(monthinw1,"/",yrinterview1)

rabdate <- r98iwmid - popage65110in1998 
birthyr <- floor(rabdate)
monthdb <- floor((rabdate - floor(rabdate))*12)+1
brt <- paste0(monthdb,"/",birthyr)
# date of death for dropping cases
#raddate <- rabdate + lifelength
raddate <- rabdate + unlist(lifelength)
dateinterview2 <- 1998 + 2 + monthinterview/12
#head(cbind(r98iwmid,dateinterview2,raddate),70)
# people whose death occured before the interview will be dropped (radnate)
radndate <- if_else(raddate < r98iwmid,  NA, raddate )
head(cbind(r98iwmid,dateinterview2,raddate,radndate,lifelength),n=5299)
#head(cbind(r98iwmid,dateinterview2,raddate,radndate,lifelength),70)
# in order to avoid date of death known after last wave and potential bias
# people whose death will occur after last interview will have an unkwonn (99/9999) date of death
lastinterview<- dateinterview2
radldate<- if_else(radndate > dateinterview2+1/12,  9999, radndate )
head(cbind(r98iwmid,dateinterview2,raddate,radndate,lifelength,radldate),n=5299)
radldate<- if_else(dateinterview2+1/12 > radndate & radndate > dateinterview2, radndate , radldate )
head(cbind(r98iwmid,dateinterview2,raddate,radndate,lifelength,radldate),n=5299)
ddtyr <- if_else((!is.na(radldate) & radldate ==9999),  9999, floor(radldate))
monthdd <- if_else((!is.na(radldate) & radldate ==9999),  99,floor((radldate - floor(radldate))*12)+1)
#head(cbind(r98iwmid,dateinterview2,raddate,lifelength,radldate,ddtyr,monthdd),70)
ddt <- if_else(!is.na(radldate),paste0(monthdd,"/",ddtyr), NA)
#head(cbind(r98iwmid,dateinterview2,raddate,lifelength,radldate,ddt),70)
weight <- rep(1, samplesize)
# state 1 st_98
ageatinterview1 <- r98iwmid - rabdate
# interview 2 st 2000
# same month of interview
yrinterview2 <- floor(dateinterview2)
monthinw2 <- floor((dateinterview2 - floor(dateinterview2))*12)+1
int_00 <- paste0(monthinw2,"/",yrinterview2)
#head(cbind(r98iwmid,dateinterview2,int_00,raddate,radndate,lifelength,radldate),70)
ageatinterview2 <- dateinterview2 - rabdate
# state 2
st_00 <- if_else(raddate < dateinterview2, 3, st_00)
hhidpn <- seq(1,samplesize)
HRSSIMULdata <- data.frame(hhidpn,ragender, weight, brt, ddt, int_98, st_98, int_00, st_00)
head(HRSSIMULdata,70)                             

HRSSIMULdata <- HRSSIMULdata %>% filter(!is.na(ddt))
HRSSIMULdata <- HRSSIMULdata[,c("hhidpn","ragender", "weight", "brt", "ddt", "int_98", "st_98", "int_00", "st_00" )]
head(HRSSIMULdata,70)                             
#### export to txt file for IMaCh
write.table(HRSSIMULdata,file="HRSSIMUL.txt",col.names=F,row.names=F,quote=F)


#HRSSIMULdata<-HRSSIMULdata[,c("hhidpn","female","nhwhites","schlyrs","weight","brt","ddt","int_10","st_10","marpar_10","smoker_10","srh_10","int_12","st_12","marpar_12","smoker_12","srh_12","int_14","st_14","marpar_14","smoker_14","srh_14")]

## # VARIABLE SELECTIONS
## dt1<-dat[,c( "hhidpn",   # ID number of the respondent
##             "ragender",  # respondent's gender 1M or 2F, mean 1.56
##             "rabdate",   # Respondent's birth date (1890.0  to    1995.0)
##             "raddate",   # Respondent's death date (1917.0  to   2019.0)
##             "rahispan",  # Mexican-American and Other Hispanic are recoded to "1." 
##             "raracem",   # Race-masked: White/Caucasian 1, Black/African American 2, Other 3, Missing . 
##             "raeduc",    # Education: Years of
##             "r11mstat",  # Marital status at wave 11: .J Webonterview missing, .M Other missing. Married 1
##             "r12mstat",    #  Married spouse absent 2
##             "r13mstat",    #  Partnered  3 Separated 4 Divorced  5
##             "r14mstat",    #  Widowed 7, Never married 8
##             "r11iwmid",    #  Date of interview at wave 11
##             "r12iwmid",    #
##             "r13iwmid",    #
##             "r14iwmid",    #
##             "s11ddate",    # Date of death (from wave 11) 1669
##             "s12ddate",    # 967
##             "s13ddate",    # 381
##             "s14ddate",    #   8
##             "r11cesd",    # CESD score at 11, 19400, mean 1.54
##             "r12cesd",     #
##             "r13cesd",     #
##             "r14cesd",     #
##             "r11agey_m",  # Age at mid wave 11 66.85
##             "r11adla",    # Sum of ADLs at wave 11, 0.41 
##             "r12adla",     # 0.43
##             "r13adla",     # 0.40
##             "r14adla",     # 0.39
##             "r11wtresp"   # Weight
##             )]

# INDIVIDUAL SELECTIONS:
# Respondents in 2012, aged 50 and older, with no missing information on marital status in 2012
#dt2<-dt1 %>% filter(!is.na(r11mstat) & r11agey_m>=50);nrow(dm_bis)
#write.csv2(dt2,file="hrs12xSAS_noM.csv")

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>