Annotation of imach/src/simulation.R, revision 1.1
1.1 ! brouard 1: # Simulating data for IMaCh tests
! 2: # Feinuo Sun and Nicolas Brouard (INED, July 2023)
! 3: #
! 4: install.packages("dplyr")
! 5: if(!require("tidyverse")){
! 6: install.packages("tidyverse",repos="http://cran.r-project.org")
! 7: # install.packages("tidyverse")
! 8: require("tidyverse")
! 9: }
! 10: # LIBRARIES
! 11: library(haven)
! 12: samplesize<- 10000
! 13:
! 14: # Which Life table?
! 15: # Simulating a gompertz law of mortality: slope of line mu_m (about 9% increase per year)
! 16: # and a_m, modal age at death. From whose we can compute the life expectancy e_65.
! 17:
! 18: install.packages("expint",repos="http://cran.r-project.org")
! 19: library(expint)
! 20: am <- 85
! 21: mum <- 9/100
! 22: expint(exp(mum*(65-am)))
! 23: am <- 84.8506
! 24: exp(exp(mum*(65-am)))/mum*expint(exp(mum*(65-am)))
! 25: # e(65) = 18.10904 years
! 26: e65 <- exp(exp(mum*(65-am)))/mum*expint(exp(mum*(65-am)))
! 27:
! 28: e65 <- exp(exp(mum*(65-am)))/mum*expint(exp(mum*(65-am)))
! 29: # Life table from 0
! 30: l <- function(a,am,mum){
! 31: exp(-exp(mum*(a-am)))/exp(-exp(-mum*am))
! 32: }
! 33: l65 <- function(a,am,mum){
! 34: exp(-exp(mum*(a-am)))/exp(-exp(mum*(65-am)))
! 35: }
! 36: l65(65,am,mum)
! 37: #l65<-l(65,am,mum) # 0.84
! 38: l65(100,am,mum)
! 39:
! 40: #curve(l65(x,am,mum), from = 65, to = 100)
! 41: # Add a line:
! 42: #curve(1 - l65(x,am,mum), add = TRUE, col = "red")
! 43:
! 44: # inverse function from l(a) find a.
! 45: linv <- function(la,am,mum){
! 46: am+log(exp(-mum*am) -log(la))/mum
! 47: }
! 48: linv(0.001,am,mum)
! 49: linv(1,am,mum)
! 50:
! 51: l65inv <- function(la,am,mum){
! 52: am+log(exp(mum*(65-am)) -log(la))/mum
! 53: }
! 54: l65inv(0.001,am,mum)
! 55: l65inv(1,am,mum)
! 56:
! 57: am
! 58: mum
! 59: l65bisinv <- function(la){
! 60: 84.8506+log(exp(0.09*(65-84.8506)) -log(la))/0.09
! 61: }
! 62: l65bisinv(0.001)
! 63: l65bisinv(1)
! 64:
! 65: set.seed(128)
! 66: zeroone<-runif(samplesize,min=0.001,max=1)
! 67: lifelength <- lapply(zeroone,l65bisinv)
! 68:
! 69: #lifelength <- rnorm(n=samplesize, mean=85, sd=16)
! 70:
! 71: set.seed(124)
! 72: # First interview 4th semester
! 73: monthinterview <- runif(samplesize, min=9,max=12)
! 74: st_98 <- rbinom(n=samplesize, size=1, prob=0.5)+1
! 75: # state 2
! 76: st_00 <- rbinom(n=samplesize, size=1, prob=0.2)+1
! 77: # date of birth (simulating population in 1998 age 65+), uniformly?
! 78: popage65110in1998<- runif(samplesize, min=65, max=110)
! 79: #gender
! 80: ragender <- rbinom(n=samplesize, size=1, prob=0.56)
! 81:
! 82: yrinterview1 <- floor(r98iwmid)
! 83: monthinw1 <- floor((monthinterview - floor(monthinterview))*12)+1
! 84:
! 85: int_98 <- paste0(monthinw1,"/",yrinterview1)
! 86:
! 87: r98iwmid <- 1998 + monthinterview/12
! 88: rabdate <- r98iwmid - popage65110in1998
! 89: birthyr <- floor(rabdate)
! 90: monthdb <- floor((rabdate - floor(rabdate))*12)+1
! 91: brt <- paste0(monthdb,"/",birthyr)
! 92: # date of death for dropping cases
! 93: raddate <- rabdate + lifelength
! 94: dateinterview2 <- 1998 + 2 + monthinterview/12
! 95: #head(cbind(r98iwmid,dateinterview2,raddate),70)
! 96: # people whose death occured before the interview will be dropped (radnate)
! 97: radndate <- if_else(raddate < r98iwmid, NA, raddate )
! 98: #head(cbind(r98iwmid,dateinterview2,raddate,radndate,lifelength),70)
! 99: # in order to avoid date of death known after last wave and potential bias
! 100: # people whose death will occur after last interview will have an unkwonn (99/9999) date of death
! 101: lastinterview<- dateinterview2
! 102: radldate<- if_else(radndate > dateinterview2, 9999, radndate )
! 103: head(cbind(r98iwmid,dateinterview2,raddate,radndate,lifelength,radldate),70)
! 104: ddtyr <- if_else((!is.na(radldate) & radldate ==9999), 9999, floor(radldate))
! 105: monthdd <- if_else((!is.na(radldate) & radldate ==9999), 99,floor((radldate - floor(radldate))*12)+1)
! 106: #head(cbind(r98iwmid,dateinterview2,raddate,lifelength,radldate,ddtyr,monthdd),70)
! 107: ddt <- if_else(!is.na(radldate),paste0(monthdd,"/",ddtyr), NA)
! 108: #head(cbind(r98iwmid,dateinterview2,raddate,lifelength,radldate,ddt),70)
! 109: weight <- rep(1, samplesize)
! 110: # state 1 st_98
! 111: ageatinterview1 <- r98iwmid - rabdate
! 112: # interview 2 st 2000
! 113: # same month of interview
! 114: yrinterview2 <- floor(dateinterview2)
! 115: monthinw2 <- floor((dateinterview2 - floor(dateinterview2))*12)+1
! 116: int_00 <- paste0(monthinw2,"/",yrinterview2)
! 117: ageatinterview2 <- dateinterview2 - rabdate
! 118: # state 2
! 119: st_00 <- if_else(raddate < dateinterview2, 3, st_00)
! 120: hhidpn <- seq(1,samplesize)
! 121: HRSSIMULdata <- data.frame(hhidpn,ragender, weight, brt, ddt, int_98, st_98, int_00, st_00)
! 122: head(HRSSIMULdata,70)
! 123:
! 124: HRSSIMULdata <- HRSSIMULdata %>% filter(!is.na(ddt))
! 125: HRSSIMULdata <- HRSSIMULdata[,c("hhidpn","ragender", "weight", "brt", "ddt", "int_98", "st_98", "int_00", "st_00" )]
! 126: head(HRSSIMULdata,70)
! 127: #### export to txt file for IMaCh
! 128: write.table(HRSSIMULdata,file="HRSSIMUL.txt",col.names=F,row.names=F,quote=F)
! 129:
! 130:
! 131: #HRSSIMULdata<-HRSSIMULdata[,c("hhidpn","female","nhwhites","schlyrs","weight","brt","ddt","int_10","st_10","marpar_10","smoker_10","srh_10","int_12","st_12","marpar_12","smoker_12","srh_12","int_14","st_14","marpar_14","smoker_14","srh_14")]
! 132:
! 133: ## # VARIABLE SELECTIONS
! 134: ## dt1<-dat[,c( "hhidpn", # ID number of the respondent
! 135: ## "ragender", # respondent's gender 1M or 2F, mean 1.56
! 136: ## "rabdate", # Respondent's birth date (1890.0 to 1995.0)
! 137: ## "raddate", # Respondent's death date (1917.0 to 2019.0)
! 138: ## "rahispan", # Mexican-American and Other Hispanic are recoded to "1."
! 139: ## "raracem", # Race-masked: White/Caucasian 1, Black/African American 2, Other 3, Missing .
! 140: ## "raeduc", # Education: Years of
! 141: ## "r11mstat", # Marital status at wave 11: .J Webonterview missing, .M Other missing. Married 1
! 142: ## "r12mstat", # Married spouse absent 2
! 143: ## "r13mstat", # Partnered 3 Separated 4 Divorced 5
! 144: ## "r14mstat", # Widowed 7, Never married 8
! 145: ## "r11iwmid", # Date of interview at wave 11
! 146: ## "r12iwmid", #
! 147: ## "r13iwmid", #
! 148: ## "r14iwmid", #
! 149: ## "s11ddate", # Date of death (from wave 11) 1669
! 150: ## "s12ddate", # 967
! 151: ## "s13ddate", # 381
! 152: ## "s14ddate", # 8
! 153: ## "r11cesd", # CESD score at 11, 19400, mean 1.54
! 154: ## "r12cesd", #
! 155: ## "r13cesd", #
! 156: ## "r14cesd", #
! 157: ## "r11agey_m", # Age at mid wave 11 66.85
! 158: ## "r11adla", # Sum of ADLs at wave 11, 0.41
! 159: ## "r12adla", # 0.43
! 160: ## "r13adla", # 0.40
! 161: ## "r14adla", # 0.39
! 162: ## "r11wtresp" # Weight
! 163: ## )]
! 164:
! 165: # INDIVIDUAL SELECTIONS:
! 166: # Respondents in 2012, aged 50 and older, with no missing information on marital status in 2012
! 167: #dt2<-dt1 %>% filter(!is.na(r11mstat) & r11agey_m>=50);nrow(dm_bis)
! 168: #write.csv2(dt2,file="hrs12xSAS_noM.csv")
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>