--- imach/src/imach.c 2010/04/26 20:30:53 1.136 +++ imach/src/imach.c 2010/04/30 18:19:40 1.138 @@ -1,6 +1,13 @@ -/* $Id: imach.c,v 1.136 2010/04/26 20:30:53 brouard Exp $ +/* $Id: imach.c,v 1.138 2010/04/30 18:19:40 brouard Exp $ $State: Exp $ $Log: imach.c,v $ + Revision 1.138 2010/04/30 18:19:40 brouard + *** empty log message *** + + Revision 1.137 2010/04/29 18:11:38 brouard + (Module): Checking covariates for more complex models + than V1+V2. A lot of change to be done. Unstable. + Revision 1.136 2010/04/26 20:30:53 brouard (Module): merging some libgsl code. Fixing computation of likelione (using inter/intrapolation if mle = 0) in order to @@ -420,11 +427,11 @@ extern int errno; #define ODIRSEPARATOR '/' #endif -/* $Id: imach.c,v 1.136 2010/04/26 20:30:53 brouard Exp $ */ +/* $Id: imach.c,v 1.138 2010/04/30 18:19:40 brouard Exp $ */ /* $State: Exp $ */ char version[]="Imach version 0.98m, April 2010, INED-EUROREVES-Institut de longevite "; -char fullversion[]="$Revision: 1.136 $ $Date: 2010/04/26 20:30:53 $"; +char fullversion[]="$Revision: 1.138 $ $Date: 2010/04/30 18:19:40 $"; char strstart[80]; char optionfilext[10], optionfilefiname[FILENAMELENGTH]; int erreur=0, nberr=0, nbwarn=0; /* Error number, number of errors number of warnings */ @@ -622,11 +629,11 @@ void replace_back_to_slash(char *s, char } char *trimbb(char *out, char *in) -{ /* Trim multiple blanks in line */ +{ /* Trim multiple blanks in line but keeps first blanks if line starts with blanks */ char *s; s=out; while (*in != '\0'){ - while( *in == ' ' && *(in+1) == ' ' && *(in+1) != '\0'){ + while( *in == ' ' && *(in+1) == ' '){ /* && *(in+1) != '\0'){*/ in++; } *out++ = *in++; @@ -635,6 +642,35 @@ char *trimbb(char *out, char *in) return s; } +char *cutv(char *blocc, char *alocc, char *in, char occ) +{ + /* cuts string in into blocc and alocc where blocc ends before last occurence of char 'occ' + and alocc starts after last occurence of char 'occ' : ex cutv(blocc,alocc,"abcdef2ghi2j",'2') + gives blocc="abcdef2ghi" and alocc="j". + If occ is not found blocc is null and alocc is equal to in. Returns alocc + */ + char *s, *t; + t=in;s=in; + while (*in != '\0'){ + while( *in == occ){ + *blocc++ = *in++; + s=in; + } + *blocc++ = *in++; + } + if (s == t) /* occ not found */ + *(blocc-(in-s))='\0'; + else + *(blocc-(in-s)-1)='\0'; + in=s; + while ( *in != '\0'){ + *alocc++ = *in++; + } + + *alocc='\0'; + return s; +} + int nbocc(char *s, char occ) { int i,j=0; @@ -647,27 +683,27 @@ int nbocc(char *s, char occ) return j; } -void cutv(char *u,char *v, char*t, char occ) -{ - /* cuts string t into u and v where u ends before first occurence of char 'occ' - and v starts after first occurence of char 'occ' : ex cutv(u,v,"abcdef2ghi2j",'2') - gives u="abcedf" and v="ghi2j" */ - int i,lg,j,p=0; - i=0; - for(j=0; j<=strlen(t)-1; j++) { - if((t[j]!= occ) && (t[j+1]== occ)) p=j+1; - } +/* void cutv(char *u,char *v, char*t, char occ) */ +/* { */ +/* /\* cuts string t into u and v where u ends before last occurence of char 'occ' */ +/* and v starts after last occurence of char 'occ' : ex cutv(u,v,"abcdef2ghi2j",'2') */ +/* gives u="abcdef2ghi" and v="j" *\/ */ +/* int i,lg,j,p=0; */ +/* i=0; */ +/* lg=strlen(t); */ +/* for(j=0; j<=lg-1; j++) { */ +/* if((t[j]!= occ) && (t[j+1]== occ)) p=j+1; */ +/* } */ - lg=strlen(t); - for(j=0; j
=(p+1))(v[j-p-1] = t[j]); - } -} +/* for(j=0; j<= lg; j++) { */ +/* if (j>=(p+1))(v[j-p-1] = t[j]); */ +/* } */ +/* } */ /********************** nrerror ********************/ @@ -1227,21 +1263,21 @@ double **prevalim(double **prlim, int nl for(agefin=age-stepm/YEARM; agefin>=age-delaymax; agefin=agefin-stepm/YEARM){ newm=savm; /* Covariates have to be included here again */ - cov[2]=agefin; - - for (k=1; k<=cptcovn;k++) { - cov[2+k]=nbcode[Tvar[k]][codtab[ij][Tvar[k]]]; - /* printf("ij=%d k=%d Tvar[k]=%d nbcode=%d cov=%lf codtab[ij][Tvar[k]]=%d \n",ij,k, Tvar[k],nbcode[Tvar[k]][codtab[ij][Tvar[k]]],cov[2+k], codtab[ij][Tvar[k]]);*/ - } - for (k=1; k<=cptcovage;k++) cov[2+Tage[k]]=cov[2+Tage[k]]*cov[2]; - for (k=1; k<=cptcovprod;k++) - cov[2+Tprod[k]]=nbcode[Tvard[k][1]][codtab[ij][Tvard[k][1]]] * nbcode[Tvard[k][2]][codtab[ij][Tvard[k][2]]]; - - /*printf("ij=%d cptcovprod=%d tvar=%d ", ij, cptcovprod, Tvar[1]);*/ - /*printf("ij=%d cov[3]=%lf cov[4]=%lf \n",ij, cov[3],cov[4]);*/ - /*printf("ij=%d cov[3]=%lf \n",ij, cov[3]);*/ + cov[2]=agefin; + + for (k=1; k<=cptcovn;k++) { + cov[2+k]=nbcode[Tvar[k]][codtab[ij][Tvar[k]]]; + /* printf("ij=%d k=%d Tvar[k]=%d nbcode=%d cov=%lf codtab[ij][Tvar[k]]=%d \n",ij,k, Tvar[k],nbcode[Tvar[k]][codtab[ij][Tvar[k]]],cov[2+k], codtab[ij][Tvar[k]]);*/ + } + for (k=1; k<=cptcovage;k++) cov[2+Tage[k]]=cov[2+Tage[k]]*cov[2]; + for (k=1; k<=cptcovprod;k++) + cov[2+Tprod[k]]=nbcode[Tvard[k][1]][codtab[ij][Tvard[k][1]]] * nbcode[Tvard[k][2]][codtab[ij][Tvard[k][2]]]; + + /*printf("ij=%d cptcovprod=%d tvar=%d ", ij, cptcovprod, Tvar[1]);*/ + /*printf("ij=%d cov[3]=%lf cov[4]=%lf \n",ij, cov[3],cov[4]);*/ + /*printf("ij=%d cov[3]=%lf \n",ij, cov[3]);*/ out=matprod2(newm, pmij(pmmij,cov,ncovmodel,x,nlstate),1,nlstate+ndeath,1,nlstate+ndeath,1,nlstate+ndeath, oldm); - + savm=oldm; oldm=newm; maxmax=0.; @@ -1268,41 +1304,56 @@ double **prevalim(double **prlim, int nl double **pmij(double **ps, double *cov, int ncovmodel, double *x, int nlstate ) { - double s1, s2; + /* According to parameters values stored in x and the covariate's values stored in cov, + computes the probability to be observed in state j being in state i by appying the + model to the ncovmodel covariates (including constant and age). + lnpijopii=ln(pij/pii)= aij+bij*age+cij*v1+dij*v2+... = sum_nc=1^ncovmodel xij(nc)*cov[nc] + and, according on how parameters are entered, the position of the coefficient xij(nc) of the + ncth covariate in the global vector x is given by the formula: + j=i nc + ((i-1)*(nlstate+ndeath-1)+(j-2))*ncovmodel + Computes ln(pij/pii) (lnpijopii), deduces pij/pii by exponentiation, + sums on j different of i to get 1-pii/pii, deduces pii, and then all pij. + Outputs ps[i][j] the probability to be observed in j being in j according to + the values of the covariates cov[nc] and corresponding parameter values x[nc+shiftij] + */ + double s1, lnpijopii; /*double t34;*/ int i,j,j1, nc, ii, jj; for(i=1; i<= nlstate; i++){ for(j=1; ji s1=%.17e, s2=%.17e %lx %lx\n",s1,s2,s1,s2); */ + for (nc=1, lnpijopii=0.;nc <=ncovmodel; nc++){ + /*lnpijopii += x[(i-1)*nlstate*ncovmodel+(j-2)*ncovmodel+nc+(i-1)*(ndeath-1)*ncovmodel]*cov[nc];*/ + lnpijopii += x[nc + ((i-1)*(nlstate+ndeath-1)+(j-2))*ncovmodel]*cov[nc]; +/* printf("Int j>i s1=%.17e, lnpijopii=%.17e %lx %lx\n",s1,lnpijopii,s1,lnpijopii); */ } - ps[i][j]=s2; + ps[i][j]=lnpijopii; /* In fact ln(pij/pii) */ } } - /*ps[3][2]=1;*/ for(i=1; i<= nlstate; i++){ s1=0; for(j=1; ji} pij/pii=(1-pii)/pii and thus pii is known from s1 */ ps[i][i]=1./(s1+1.); + /* Computing other pijs */ for(j=1; j ncodemax[j]) break; - } - } - } - - for (k=0; k< maxncov; k++) Ndum[k]=0; - - for (i=1; i<=ncovmodel-2; i++) { /* -2, cste and age */ + } /* end of loop on */ + } /* end of loop on modality */ + } /* end of loop on model-covariate j. nbcode[Tvarj][1]=0 and nbcode[Tvarj][2]=1 sets the value of covariate j*/ + + for (k=0; k< maxncov; k++) Ndum[k]=0; + + for (i=1; i<=ncovmodel-2; i++) { /* -2, cste and age */ /* Listing of all covariables in statement model to see if some covariates appear twice. For example, V1 appears twice in V1+V1*V2.*/ ij=Tvar[i]; /* Tvar might be -1 if status was unknown */ Ndum[ij]++; } ij=1; - for (i=1; i<= maxncov; i++) { + for (i=1; i<= maxncov; i++) { /* modmaxcovj is unknown here. Only Ndum[2(V2),3(age*V3), 5(V3*V2) 6(V1*V4) */ if((Ndum[i]!=0) && (i<=ncovcol)){ Tvaraff[ij]=i; /*For printing */ ij++; @@ -4533,7 +4592,7 @@ int readdata(char datafile[], int firsto for (j=maxwav;j>=1;j--){ - cutv(stra, strb,line,' '); + cutv(stra, strb, line, ' '); if(strb[0]=='.') { /* Missing status */ lval=-1; }else{ @@ -4693,71 +4752,76 @@ int decodemodel ( char model[], int last cptcovprod=j1; /*Number of products V1*V2 +v3*age = 2 */ strcpy(modelsav,model); - if ((strcmp(model,"age")==0) || (strcmp(model,"age*age")==0)){ - printf("Error. Non available option model=%s ",model); - fprintf(ficlog,"Error. Non available option model=%s ",model);fflush(ficlog); + if (strstr(model,"AGE") !=0){ + printf("Error. AGE must be in lower case 'age' model=%s ",model); + fprintf(ficlog,"Error. AGE must be in lower case model=%s ",model);fflush(ficlog); return 1; } /* This loop fills the array Tvar from the string 'model'.*/ /* j is the number of + signs in the model V1+V2+V3 j=2 i=3 to 1 */ - /* modelsav=V3*age+V2+V1+V4 strb=V3*age stra=V2+V1+V4 - i=1 Tvar[1]=3 Tage[1]=1 - i=2 Tvar[2]=2 - i=3 Tvar[3]=1 - i=4 Tvar[4]= 4 - i=5 Tvar[5] - for (k=1; k<=cptcovn;k++) { - cov[2+k]=nbcode[Tvar[k]][codtab[ij][Tvar[k]]]; - */ - for(k=1; k<=(j+1);k++){ - cutv(strb,stra,modelsav,'+'); /* keeps in strb after the first '+' - modelsav=V3*age+V2+V1+V4 strb=V3*age stra=V2+V1+V4 + /* modelsav=V2+V1+V4+age*V3 strb=age*V3 stra=V2+V1+V4 */ + /* k=4 (age*V3) Tvar[k=4]= 3 (from V3) Tage[cptcovage=1]=4 */ + /* k=3 V4 Tvar[k=3]= 4 (from V4) */ + /* k=2 V1 Tvar[k=2]= 1 (from V1) */ + /* k=1 Tvar[1]=2 (from V2) */ + /* k=5 Tvar[5] */ + /* for (k=1; k<=cptcovn;k++) { */ + /* cov[2+k]=nbcode[Tvar[k]][codtab[ij][Tvar[k]]]; */ + /* } */ + /* for (k=1; k<=cptcovage;k++) cov[2+Tage[k]]=cov[2+Tage[k]]*cov[2]; */ + for(k=cptcovn; k>=1;k--){ + cutv(stra,strb,modelsav,'+'); /* keeps in strb after the first '+' + modelsav==V2+V1+V4+V3*age strb=V3*age stra=V2+V1+V4 */ - /* if (nbocc(modelsav,'+')==0) strcpy(strb,modelsav);*/ /* and analyzes it */ + if (nbocc(modelsav,'+')==0) strcpy(strb,modelsav); /* and analyzes it */ /* printf("i=%d a=%s b=%s sav=%s\n",i, stra,strb,modelsav);*/ /*scanf("%d",i);*/ - if (strchr(strb,'*')) { /* Model includes a product V3*age+V2+V1+V4 strb=V3*age */ + if (strchr(strb,'*')) { /* Model includes a product V2+V1+V4+V3*age strb=V3*age */ cutv(strd,strc,strb,'*'); /* strd*strc Vm*Vn: strb=V3*age strc=age strd=V3 ; V3*V2 strc=V2, strd=V3 */ if (strcmp(strc,"age")==0) { /* Vn*age */ cptcovprod--; cutv(strb,stre,strd,'V'); /* stre="V3" */ - Tvar[k]=atoi(stre); /* V1+V3*age+V2 Tvar[2]=3, and Tvar[3]=2 */ + Tvar[k]=atoi(stre); /* V2+V1+V4+V3*age Tvar[4]=2 ; V1+V2*age Tvar[2]=2 */ cptcovage++; /* Sums the number of covariates which include age as a product */ - Tage[cptcovage]=k; /* Tage[1] =2 */ + Tage[cptcovage]=k; /* Tage[1] = 4 */ /*printf("stre=%s ", stre);*/ - } - else if (strcmp(strd,"age")==0) { /* or age*Vn */ + } else if (strcmp(strd,"age")==0) { /* or age*Vn */ cptcovprod--; cutv(strb,stre,strc,'V'); Tvar[k]=atoi(stre); cptcovage++; Tage[cptcovage]=k; - } - else { /* Age is not in the model V1+V3*V2+V2 strb=V3*V2*/ + } else { /* Age is not in the model product V2+V1+V1*V4+V3*age+V3*V2 strb=V3*V2*/ + /* loops on k1=1 (V3*V2) and k1=2 V4*V3 */ cutv(strb,stre,strc,'V'); /* strc= Vn, stre is n; strb=V3*V2 stre=3 strc=*/ - Tvar[k]=ncovcol+k1; /* find 'n' in Vn and stores in Tvar. - If already ncovcol=2 and model=V2*V1 Tvar[1]=2+1 and Tvar[2]=2+2 etc */ + Tvar[k]=ncovcol+k1; /* For model-covariate k tells which data-covariate to use but + because this model-covariate is a construction we invent a new column + ncovcol + k1 + If already ncovcol=4 and model=V2+V1+V1*V4+age*V3+V3*V2 + Tvar[3=V1*V4]=4+1 Tvar[5=V3*V2]=4 + 2= 6, etc */ cutv(strb,strc,strd,'V'); /* strd was Vm, strc is m */ - Tprod[k1]=k; /* Tprod[1] */ - Tvard[k1][1]=atoi(strc); /* m*/ - Tvard[k1][2]=atoi(stre); /* n */ - Tvar[cptcovn+k2]=Tvard[k1][1]; - Tvar[cptcovn+k2+1]=Tvard[k1][2]; - for (i=1; i<=lastobs;i++) /* Computes the new covariate which is a product of covar[n][i]* covar[m][i] - and is stored at ncovol+k1 */ + Tprod[k1]=k; /* Tprod[1]=3(=V1*V4) for V2+V1+V1*V4+age*V3+V3*V2 */ + Tvard[k1][1]=atoi(strc); /* m 1 for V1*/ + Tvard[k1][2]=atoi(stre); /* n 4 for V4*/ + Tvar[cptcovn+k2]=Tvard[k1][1]; /* Tvar[(cptcovn=4+k2=1)=5]= 1 (V1) */ + Tvar[cptcovn+k2+1]=Tvard[k1][2]; /* Tvar[(cptcovn=4+(k2=1)+1)=6]= 4 (V4) */ + for (i=1; i<=lastobs;i++){ + /* Computes the new covariate which is a product of + covar[n][i]* covar[m][i] and stores it at ncovol+k1 */ covar[ncovcol+k1][i]=covar[atoi(stre)][i]*covar[atoi(strc)][i]; + } k1++; k2=k2+2; - } - } + } /* End age is not in the model */ + } /* End if model includes a product */ else { /* no more sum */ /*printf("d=%s c=%s b=%s\n", strd,strc,strb);*/ /* scanf("%d",i);*/ cutv(strd,strc,strb,'V'); - Tvar[i]=atoi(strc); + Tvar[k]=atoi(strc); } - strcpy(modelsav,stra); /* modelsav=V2+V3*age+V1+V4 strb=V3*age+V1+V4 */ + strcpy(modelsav,stra); /* modelsav=V2+V1+V4 stra=V2+V1+V4 */ /*printf("a=%s b=%s sav=%s\n", stra,strb,modelsav); scanf("%d",i);*/ } /* end of loop + */ @@ -4773,7 +4837,7 @@ int decodemodel ( char model[], int last scanf("%d ",i);*/ - return (0); + return (0); /* with covar[new additional covariate if product] and Tage if age */ endread: printf("Exiting decodemodel: "); return (1); @@ -5317,25 +5381,51 @@ run imach with mle=-1 to get a correct t anint=matrix(1,maxwav,1,n); s=imatrix(1,maxwav+1,1,n); /* s[i][j] health state for wave i and individual j */ tab=ivector(1,NCOVMAX); - ncodemax=ivector(1,8); + ncodemax=ivector(1,8); /* hard coded ? */ /* Reads data from file datafile */ if (readdata(datafile, firstobs, lastobs, &imx)==1) goto end; /* Calculation of the number of parameters from char model */ - Tvar=ivector(1,NCOVMAX); /* Was 15 changed to NCOVMAX. Stores the number n of the covariates in Vm+Vn at 1 and m at 2 */ + /* modelsav=V2+V1+V4+age*V3 strb=age*V3 stra=V2+V1+V4 + k=4 (age*V3) Tvar[k=4]= 3 (from V3) Tag[cptcovage=1]=4 + k=3 V4 Tvar[k=3]= 4 (from V4) + k=2 V1 Tvar[k=2]= 1 (from V1) + k=1 Tvar[1]=2 (from V2) + */ + Tvar=ivector(1,NCOVMAX); /* Was 15 changed to NCOVMAX. */ + /* V2+V1+V4+age*V3 is a model with 4 covariates (3 plus signs). + For each model-covariate stores the data-covariate id. Tvar[1]=2, Tvar[2]=1, Tvar[3]=4, + Tvar[4=age*V3] is 3 and 'age' is recorded in Tage. + */ + /* For model-covariate k tells which data-covariate to use but + because this model-covariate is a construction we invent a new column + ncovcol + k1 + If already ncovcol=4 and model=V2+V1+V1*V4+age*V3 + Tvar[3=V1*V4]=4+1 etc */ Tprod=ivector(1,15); + /* Tprod[k1=1]=3(=V1*V4) for V2+V1+V1*V4+age*V3 + if V2+V1+V1*V4+age*V3+V3*V2 TProd[k1=2]=5 (V3*V2) + */ Tvaraff=ivector(1,15); - Tvard=imatrix(1,15,1,2); - Tage=ivector(1,15); + Tvard=imatrix(1,15,1,2); /* For V3*V2 Tvard[k1=2][1]=3 (V3) Tvard[k1=2][2]=2(V2) */ + Tage=ivector(1,15); /* Gives the covariate id of covariates associated with age: V2 + V1 + age*V4 + V3*age + 4 covariates (3 plus signs) + Tage[1=V3*age]= 4; Tage[2=age*V4] = 3 + */ if(decodemodel(model, lastobs) == 1) goto end; + if((double)(lastobs-imx)/(double)imx > 1.10){ + nbwarn++; + printf("Warning: The value of parameter lastobs=%d is big compared to the \n effective number of cases imx=%d, please adjust, \n otherwise you are allocating more memory than necessary.\n",lastobs, imx); + fprintf(ficlog,"Warning: The value of parameter lastobs=%d is big compared to the \n effective number of cases imx=%d, please adjust, \n otherwise you are allocating more memory than necessary.\n",lastobs, imx); + } /* if(mle==1){*/ - if (weightopt != 1) { /* Maximisation without weights*/ - for(i=1;i<=n;i++) weight[i]=1.0; + if (weightopt != 1) { /* Maximisation without weights. We can have weights different from 1 but want no weight*/ + for(i=1;i<=imx;i++) weight[i]=1.0; /* changed to imx */ } /*-calculation of age at interview from date of interview and age at death -*/