#

# =====================================================================

# Copyright (C) 1999-2005  Li Hsu

 

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

 

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

# GNU General Public License for more details.

 

# The text of the GNU General Public License, version 2, is available

# as http://www.gnu.org/copyleft or by writing to the Free Software

# Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

 

# The main reference for hybrid data analysis is

# Hsu L, Starr JR, Zheng Y, and Schwartz SM (2008). On combining triads and

# unrelated subjects data in candidate gene studies: An application to data on

# testicular cancer.  Under review.

# http://phsweb.fhcrc.org/stat/stat?action=showSoftware

# You can contact us at lih@fhcrc.org.

# =======================================================================

 

 

#############################################################################

# Functions for analysis of hybrid data

#############################################################################

 

 

hybrid = function(case.fam,cc.ctrl=NULL, n.env=0,model=NULL,interaction=NULL,nimp=1){

## Goal: wrapper function to perform combined case-parents and controls analysis

## nimp: number if imputations, when it is 1, it is complete case-family analysis

 

## labeling

            label = names(case.fam)

            if (!is.null(interaction)) {

                        for (i in 1:(length(interaction)/2)){

                                    i1=interaction[(2*i-1)]; i2=interaction[(2*i)]

                                    label=c(label, paste(label[i1],label[i2],sep="*"))

                        }

            }

 

            label=label[-(1:2)]

 

 

## remove cases (and their families) and controls if there is any

## missing value in the covariates

 

 

            case.fam=as.matrix(case.fam)

            if (!is.null(cc.ctrl)) cc.ctrl = as.matrix(cc.ctrl)

 

            na.count = apply(is.na(cc.ctrl[,-(1:2)]),1,sum)

            na.flag = na.count>=1

            cc.ctrl0 = cc.ctrl[!na.flag,]

 

            na.count = apply(is.na(case.fam[,-(1:2)]),1,sum)

            na.flag = na.count>=1

        case.fam0 = case.fam[!na.flag,]

 

            complete = NULL; case.fam1 = NULL

            single.case = NULL; dyads=NULL

 

            fam.id<-unique(case.fam0[,1])

 

            for (i in 1:length(fam.id)){

                        ff0<-case.fam0[,1]==fam.id[i]   # flag individuals with the same fam.id

                        if (1 %in% case.fam0[ff0,2]) { # the set has to include case

                                    if (sum(ff0)==1) {  # singleton case

                                                single.case = rbind(single.case,case.fam0[ff0,])

                                    } else {

                                                case.fam1 = rbind(case.fam1,case.fam0[ff0,])

                                                if (sum(ff0)==3) {

                                                            complete=rbind(complete, case.fam0[ff0,])

                                                }

                                                if (sum(ff0)==2) {

                                                            dyads=rbind(dyads, case.fam0[ff0,])

                                                }

                                    }

                        }

            }

 

## calculate number of parameters

            npar = ncol(case.fam)-1+length(interaction)/2 # include intercept,

                                                                        #main effects, and interaction

 

## perform analysis

            if (nimp==1) {

                        cc.ctrl0 = rbind(cc.ctrl0,dyads[(dyads[,2]==1),])

                        cc.ctrl0 = rbind(cc.ctrl0,single.case)

                        est0 = fit.model(case.fam=complete,cc.ctrl=cc.ctrl0, n.env=n.env,

                                    model=model,interaction=interaction)

            } else {

                        cc.ctrl0 = rbind(cc.ctrl0,single.case)

                        imp.est=matrix(0,nimp,3*npar); imp.se=imp.est

                        for (i in 1:nimp){

                                    case.imp = impute.parent(case.fam1)

                                    est0 = fit.model(case.fam=case.imp,cc.ctrl=cc.ctrl0, n.env=n.env,

                                                model=model,interaction=interaction)

                                    imp.est[i,] = c(t(est0$est))

                                    imp.se[i,] = c(t(est0$se))

                        }

                        imp.mean = apply(imp.est,2,mean)

                        imp.var<-imp.se^2

                        imp.se0<-(apply(imp.var,2,mean)+apply(imp.est,2,var)*(nimp+1)/nimp)^0.5

                        imp.mean1=matrix(imp.mean,ncol=npar,byrow=T)

                        imp.se1=matrix(imp.se0,ncol=npar,byrow=T)

                        est0 = list(est=imp.mean1,se=imp.se1)

            }

 

            low.ci = est0$est - 1.96*est0$se

            up.ci = est0$est + 1.96*est0$se

            p.value = (1-pnorm(abs(est0$est)/est0$se))*2

            low.ci0=matrix(substr(as.character(t(low.ci)),start=1,stop=5),byrow=T,ncol=npar)

            up.ci0=matrix(substr(as.character(t(up.ci)),start=1,stop=5),byrow=T,ncol=npar)

            p.value0 = matrix(substr(as.character(t(p.value)),start=1,stop=4),byrow=T,ncol=npar)

            est = matrix(substr(as.character(t(est0$est)),start=1,stop=5),byrow=T,ncol=npar)

            se = matrix(substr(as.character(t(est0$se)),start=1,stop=5),byrow=T,ncol=npar)

 

            entry=matrix(0,3,(npar-1))

            for (i in 1:3) {

                        for (j in 2:npar){

                                    if (se[i,j]==0) {entry[i,(j-1)]="----"} else {

                                                entry[i,(j-1)] = paste(est[i,j],"(",se[i,j], ",",

                                                            p.value0[i,j], ")",sep="")

                                    }

                        }

            }

 

           

            entry.label=cbind(label,t(entry))

            entry.label = rbind(c(" ","Case-Fam", "Case-Ctrl", "Case-Fam-Ctrl"),entry.label)

            tt=cbind(format(entry.label[,1]),format(entry.label[,-1]))

 

            for (i in 1:npar){

                        cat(tt[i,],fill=T,sep="   ","\n")

            }

 

}

      

momdad.all <- function(dadgeno,momgeno) {

## Goal: To find all possible genotypes for pseudo-siblings conditional on

##       parental genotypes.

## both momgeno and dadgeno are vectors

 

    if(length(momgeno)/2 != floor(length(momgeno)/2))stop("not an even

            number of alleles")

    if(length(momgeno)!= length(dadgeno))stop("mom and dad have different

            numbers of genes")

 

## transform it to matrix of genotypes with rows being the number of genes

    momgeno <- matrix(momgeno,ncol=2,byrow=T)

    dadgeno <- matrix(dadgeno,ncol=2,byrow=T)

    num.gene <-nrow(momgeno)

    dat <- momgeno[1,]

    dat <- rbind(cbind(dat,dadgeno[1,1]),cbind(dat,dadgeno[1,2]))

    if(num.gene>1) {

         for(i in 2:num.gene){

             dat <- rbind(cbind(dat,momgeno[i,1]),cbind(dat,momgeno[i,2]))

             dat <- rbind(cbind(dat,dadgeno[i,1]),cbind(dat,dadgeno[i,2]))

         }

    }

    dat

}

 

## Functions that are needed for estimation

 

comb.reduced<-function(par,fam,cc,env.ind){

## Goal: combined case-control and case-family

     npar<-length(par)

     indx<-(2:npar)[env.ind==0];   beta<-par[indx]

     indx1<-c(1:3, indx-1+3);   fam0<-fam[, indx1]  #indx1: find the indices

                                                    # of covariates in fam

     lik1<-condlik.reduced(beta,fam0)

     lik2<-cclik(par,cc)

     lik<-lik1+lik2

     return(lik)

}

 

condlik.reduced<-function(beta,fam){

## Fam: 1: famid; 2: individual id; 3: affected sibling; 4 and onward: covariates

      nn<-ncol(fam)

      tt<-exp(beta %*% t(fam[,4:nn]))

      tt1<-rowsum(c(tt),fam[,1], reorder=FALSE)

      tt2<-tt[fam[,3]==1]/tt1

      lik<- -sum(log(tt2))

      return(lik)

}

 

 

cclik<-function(par,cc){

## CC: 1:individual id; 2: disease status; 3 and onward: covariates

   alpha<-par[1];   beta<-par[-1];

   nn<-ncol(cc)

   risk<-beta %*% t(cc[,3:nn])

   lik0<-exp((alpha+risk)*cc[,2])/(1+exp(alpha+risk))

   lik<- -sum(log(lik0))

   return(lik)

}

 

mating<-function(p){

      q<-1-p;

      mu1<-p^4; mu2<-4*p^3*q; mu3<-2*p^2*q^2; mu4<-4*p^2*q^2;

      mu5<-4*p*q^3; mu6<-q^4

      return(c(mu1,mu2,mu3,mu4,mu5,mu6))

}

 

 

var.mat<-function(par,fam, cc,env.ind){

## some initial parameters

   npar <- length(par);   # number of parameters

##

## Calculate the score equations; par include alpha and beta

## First handling the case-control data

## 1st col: individual id; 2nd: disease status; 3rd and onward: covariates

##

   zcov<-cbind(1,cc[,3:ncol(cc)])

   cc.risk<-1/(1+exp(-par%*%t(zcov)))

   score.cc<-as.matrix(zcov*c(cc[,2]-cc.risk))  # function c(.) is to make the matrix as a

                                   # vector

  

 

##

## Next handling family-triads.

## fam is organized as one row per (peudo-) sib

## Fam: 1: famid; 2: individual id; 3: affected sibling; 4 and onward: covariates

##

   beta<-par[-1]; nn<-ncol(fam)

 

   fam.risk0<-c(exp(beta %*% t(fam[,4:nn])))   ## exp(beta*z), c(.) to vectorize

   fam.risk1<-c(rowsum(c(fam.risk0),fam[,1], reorder=FALSE)) ## sum(exp(beta*z))

   fam.risk2<-as.matrix(fam[,4:nn] * fam.risk0)  ## z*exp(beta*z) 

   fam.risk3<-matrix(0,length(fam.risk1),(nn-3))

   for (i in 1:(nn-3)) {             ## sum(z*exp(beta*z))/sum(exp(beta*z))

      fam.risk3[,i]<- rowsum(c(fam.risk2[,i]),fam[,1], reorder=FALSE)/

                      fam.risk1 

   }         

   score.case0<-fam[(fam[,3]==1),4:nn] - fam.risk3   ## score for the case families

   score.case<-cbind(0,score.case0)   # add the intercept

 

## Add subjects that are not in the CC but in the triads and vice versa

 

   unique.id<-sort(unique(c(fam[,2][fam[,3]==1],cc[,1])))

   score.cc0<-matrix(0,length(unique.id),npar)

   match.id<-match(cc[,1],unique.id)

   score.cc0[match.id, ] <- score.cc

   

## Add subjects that are not in the case-parents triads

   score.fam<-matrix(0,length(unique.id),npar)

   fam.ind.id<-fam[,2][fam[,3]==1]

   match.id<-match(fam.ind.id,unique.id)  # match the ind id in fam with the full id list

   score.fam[match.id, ]<-score.case # insert the rows with triads score

 

## Case-control

   sigma0.cc<-matrix(0,npar,npar)

   for (i in 1:nrow(score.cc)){

       sigma0.cc<-sigma0.cc+ score.cc[i,] %*% t(score.cc[i,])

   }

 

## Family

   sigma0.fam<-matrix(0,npar,npar)  # with intercept

   for (i in 1:nrow(score.case)){

       sigma0.fam<-sigma0.fam+ score.case[i,] %*% t(score.case[i,])

   }

 

## Adding cc+fam

   score.all<-score.cc0+score.fam

 

   sigma0<-matrix(0,npar,npar)

   for (i in 1:nrow(score.all)){

       sigma0<-sigma0+ score.all[i,] %*% t(score.all[i,])

   }

 

## Calculate the 2nd derivatives

 

## 2nd derivative for CC data

   mm1 <- cc.risk*(1 - cc.risk)  # h(x; theta)*(1-h(x;theta))

   mat.cc<-matrix(0,npar,npar)

   for (i in 1:nrow(score.cc)){

      mat.cc<-mat.cc+ mm1[i]*(zcov[i,] %*% t(zcov[i,]))

   }

 

## 2nd derivative for Fam data

   rep.times<-rowsum(rep(1,nrow(fam)),fam[,1],reorder=FALSE)

   deno<-rep(fam.risk1, times=rep.times)  # repeat sum(exp(beta*z)) by the number

                                          # of fam. members in that family

   weight <- fam.risk0/deno  # exp(beta*z)/sum(exp(beta*z))

   fam.cov<-cbind(0,fam[,4:nn])

   mat.fam<-matrix(0,npar,npar)

 

# iteration by individual, z^2*exp(beta*z))/sum(exp(beta*z))

   for (i in 1:nrow(fam)){ 

      mat.fam<-mat.fam+ weight[i]*(fam.cov[i,]%*% t(fam.cov[i,]))

   }

# iteration by family, (sum(z*exp(beta*z))/sum(exp(beta*z)))^2

   for (i in 1:nrow(score.case)){   

      mat.fam[-1,-1]<-mat.fam[-1,-1]-(fam.risk3[i,])%*% t(fam.risk3[i,]) 

   }

 

   mat.all<-mat.cc+mat.fam

   sigma1<-solve(mat.all)

   sigma<- diag(sigma1 %*% sigma0 %*% sigma1)^0.5

 

   sigma1<-solve(mat.cc)

#   sigma.cc<-diag(sigma1 %*% sigma0.cc %*% sigma1)^0.5

   sigma.cc = diag(sigma1)^0.5

 

   sigma.fam<-rep(0,length(sigma))

   indx<-(2:npar)[env.ind==0]

   sigma1<-solve(mat.fam[indx,indx])

   sigma.fam[indx]<-diag(sigma1 %*% (sigma0.fam[indx,indx]) %*% sigma1)^0.5

 

   sigma.all<-rbind(sigma.fam,sigma.cc,sigma)

   return(sigma.all)

}

 

mating.freq.geno<-function(fam){

## Goal: Calculating the triads frequencies for each gene

## fam:  1st column is famid and 2nd column is individual id, 3 and onward are genotypes

##

   n.gene<-ncol(fam)-2

   gene = as.matrix(fam[,-(1:2)])

 

   child<-fam[,2]==1

   father<-fam[,2]==2

   mother<-fam[,2]==3

 

   famid<-fam[child,1]

   conv.fam<-famid

   for (i in 1:n.gene){

        conv.fam<-cbind(conv.fam, gene[child,i])

        conv.fam<-merge(conv.fam,cbind(fam[father,1],gene[father,i]),

                  by.x=1,by.y=1,all=T)

        conv.fam<-merge(conv.fam,cbind(fam[mother,1],gene[mother,i]),

                  by.x=1,by.y=1,all=T)

   }

 

## taking out cases with missing genotypes

   for (i in 1:n.gene){

        if (i==1) ff<-!is.na(conv.fam[,2])

        ff<-ff & !is.na(conv.fam[,((i-1)*3+2)])

   }

   conv.fam<-conv.fam[ff,]

 

 

   mu<-matrix(0,10,n.gene)

   mu.v<-as.character(c(0,1,2,3,2.5,3.5,4.5,4,5,6))

   unique.id<-c(0,2,1,3,5,4,6)  # this is for calculating the posterior

                                # probability

 

   conv.fam0<-conv.fam   

   for (i in 1:n.gene){

      miss.o<-is.na(conv.fam0[,(i-1)*3+2])

      conv.fam<-conv.fam0[!miss.o,]

      miss<-is.na(conv.fam[,(i-1)*3+3]) + is.na(conv.fam[,(i-1)*3+4])

      comp<-conv.fam[miss==0,]

      momdad<-comp[,(i-1)*3+3] + comp[,(i-1)*3+4]

      momdad[(comp[,(i-1)*3+3]==1) & (comp[,(i-1)*3+4]==1)]<-2.5

      momdadchild<-momdad+comp[,(i-1)*3+2]

      count<-table(momdadchild)

      id<-match(names(count),mu.v)

      mu[id,i]<-count

      mu.c<-mu

 

      repeat{

      mu0<-mu[,i]/sum(mu[,i])

      pb<-post.prob(mu0)

      mu<-mu.c

      for (j in 1:nrow(conv.fam)){

        if (miss[j]==1) {

           ff<-is.na(conv.fam[j,(i-1)*3+3:4])

           id<-conv.fam[j,(i-1)*3+2]+conv.fam[j,(i-1)*3+3:4][!ff]*2

           indx<-match(id,unique.id)

           prob<-pb[indx,]

           if (indx==1) {mu[1,i]<-mu[1,i]+prob[1]

                         mu[2,i]<-mu[2,i]+prob[2]}

           if (indx==2) {mu[2,i]<-mu[2,i]+prob[1]

                         mu[5,i]<-mu[5,i]+prob[2]}

           if (indx==3) {mu[3,i]<-mu[3,i]+prob[2]

                         mu[4,i]<-mu[4,i]+prob[3]}

           if (indx==4) {mu[3,i]<-mu[3,i]+prob[1]

                         mu[6,i]<-mu[6,i]+prob[2]

                         mu[8,i]<-mu[8,i]+prob[3]}

           if (indx==5) {mu[4,i]<-mu[4,i]+prob[1]

                         mu[8,i]<-mu[8,i]+prob[2]}

           if (indx==6) {mu[7,i]<-mu[7,i]+prob[2]

                         mu[9,i]<-mu[9,i]+prob[3]}

           if (indx==7) {mu[9,i]<-mu[9,i]+prob[2]

                         mu[10,i]<-mu[10,i]+prob[3]}

        }

     }

#     print(mu[,i]/(sum(mu[,i])))

 

     if (sum(abs(mu[,i]/(sum(mu[,i]))-mu0))<10^{-5}) break;

      }

  }

 

  return(mu/nrow(conv.fam))

}

 

post.prob<-function(mu){

# mu: mu1, ..., mu10

    pb<-matrix(0,7,3)

    pb[1,]<-c(2*mu[1],mu[2],0)/(2*mu[1]+mu[2])

    pb[2,]<-c(mu[2],2*mu[5],0)/(mu[2]+2*mu[5])

    pb[3,]<-c(0, mu[3],mu[4])/(mu[3]+mu[4])

    pb[4,]<-c(mu[3],2*mu[6],mu[8])/(mu[3]+2*mu[6]+mu[8])

    if (mu[4]+mu[8]>0) {

        pb[5,]<-c(mu[4],mu[8],0)/(mu[4]+mu[8]) } else {

        pb[5,]<-c(1,0,0) }

    if (2*mu[7]+mu[9]>0) {

         pb[6,]<-c(0, 2*mu[7],mu[9])/(2*mu[7]+mu[9])} else {

         pb[6,]<-c(0,1,0)}

    if (mu[9]+2*mu[10]>0) {

         pb[7,]<-c(0, mu[9],2*mu[10])/(mu[9]+2*mu[10])} else {

         pb[7,]<-c(0,1,0)}

    return(pb)

}

 

conv.trans<-function(fam){

#################################################################################

# This function is to convert the gene count for each gene to two columns

# corresponding to two alleles.  The transmitted allele is listed first followed

# by the non-transmitted allele.  In the case of heterozygous genotypes for

# all family members, the father is assigned allele 1 for transmitted and 0

# for non-transmitted.  This shouldn't be a problem if there is no maternal or

# imprinting effect.

#################################################################################

#

# fam is a matrix of genotypes where each gene is coded as 0,1,2 for number of

#   variant alleles.

# The format is is as follows

#     1: famid

#     2: (1,2,3) for (case,father,mother)

#     3 and onward: genes which take values from {0,1,2}.

#

    fid<-unique(fam[,1]); n.gene<-ncol(fam)-2

    fami.conv<-NULL

    for (i in 1:length(fid)){

        fami<-fam[fam[,1]==fid[i],3:ncol(fam)]

        if (n.gene==1) {fami<-matrix(fami)}

        all.fam.g<-NULL

        for (j in 1:n.gene){

            if (fami[1,j]==0) {   # if offspring is homozygous aa

               offspring<-c(0,0)

               father<-c(0, fami[2,j]-0)

               mother<-c(0,fami[3,j]-0)

             }

            if (fami[1,j]==2) {   # if offspring is homozygous AA

               offspring<-c(1,1)

               father<-c(1, fami[2,j]-1)

               mother<-c(1,fami[3,j]-1)

             }

            if (fami[1,j]==1) {  # if offspring is heterozygous Aa

               if (fami[2,j]==0) {

                    offspring<-c(0,1)

                    father<-c(0,0)

                    mother<-c(1,fami[3,j]-1)

               }

               if (fami[3,j]==0) {

                    offspring<-c(1,0)

                    father<-c(1,fami[2,j]-1)

                    mother<-c(0,0)

               }

               if (fami[2,j]==2) {

                    offspring<-c(1,0)

                    father<-c(1,1)

                    mother<-c(0,fami[3,j]-0)

               }

               if (fami[3,j]==2) {

                    offspring<-c(0,1)

                    father<-c(0,fami[2,j]-0)

                    mother<-c(1,1)

               }

               if ((fami[2,j]==1) & (fami[3,j]==1)) {

                    offspring<-c(1,0)

                    father<-c(1,0)

                    mother<-c(0,1)

               }

             }

             all.fam.gj<-rbind(offspring, father,mother)

             all.fam.g<-cbind(all.fam.g,all.fam.gj)

       }

       fami.conv<-rbind(fami.conv,all.fam.g)       

      }

      fam0<-cbind(fam[,1:2],fami.conv)

      row.names(fam0)=NULL

      return(fam0)

}

     

##

imputation.geno<-function(fam,mu){

## Goal: imputing missing genotypes for parents

## fam: 1st column is famid, 2nd is individual ID (1:case, 2: father, 3: mother)

## mu: triad frequencies

 

   n.gene<-ncol(fam)-2

   gene<-as.matrix(fam[,-(1:2)])

 

   child<-fam[,2]==1

   father<-fam[,2]==2

   mother<-fam[,2]==3

 

   famid<-fam[child,1]

   conv.fam<-famid

   for (i in 1:n.gene){

        conv.fam<-cbind(conv.fam, gene[child,i])

        conv.fam<-merge(conv.fam,cbind(fam[father,1],gene[father,i]),

                  by.x=1,by.y=1,all=T)

        conv.fam<-merge(conv.fam,cbind(fam[mother,1],gene[mother,i]),

                  by.x=1,by.y=1,all=T)

   }

 

## calculating the posterior probability  

## unique.id is defined as offspring genotype + 2*parent genotype, giving

## (0,2,1,3,5,4,6)

   conv.fam0<-conv.fam

   unique.id<-c(0,2,1,3,5,4,6)

   for (i in 1:n.gene){

      miss.o<-!is.na(conv.fam[,(i-1)*3+2])   ## offspring missingness

      miss<-is.na(conv.fam[,(i-1)*3+3]) + is.na(conv.fam[,(i-1)*3+4]) #parent missingness

      pb<-post.prob(mu[,i])

      for (j in 1:nrow(conv.fam)){

         if ((miss.o[j]) & (miss[j]==1)) {  #impute only that are dyads

            ff<-is.na(conv.fam[j,(i-1)*3+3:4])

            id<-conv.fam[j,(i-1)*3+2]+conv.fam[j,(i-1)*3+3:4][!ff]*2

            prob<-pb[match(id,unique.id),]

            conv.fam[j,(i-1)*3+3:4][ff]<-c(0,1,2)[c(rmultinom(1,1,prob))==1]

          }

      }

   }

 

## convert the data into one subject per row

   imp.fam<-matrix(0,nrow(conv.fam)*3, n.gene)

   for (i in 1:nrow(conv.fam)){

     indx1<-seq(2,ncol(conv.fam),by=3)

     imp.fam[3*(i-1)+1,]<-unlist(conv.fam[i,indx1])

     indx1<-seq(3,ncol(conv.fam),by=3)

     imp.fam[3*(i-1)+2,]<-unlist(conv.fam[i,indx1])

     indx1<-seq(4,ncol(conv.fam),by=3)

     imp.fam[3*(i-1)+3,]<-unlist(conv.fam[i,indx1])

   }

 

## imp.fam, 1: famid; 2: (1,2,3) for (case,father,mother); 3: genes

   imp.fam<-cbind(rep(conv.fam[,1],each=3),rep(c(1,2,3),

             length(unique(conv.fam[,1]))), imp.fam)

  

   return(imp.fam)

}

 

impute.parent = function(case.fam, n.env=0) {

## Goal: impute the missing genotypes        

 

            if (n.env>0) {fam.gene = case.fam[,-(3:(2+n.env))]} else {fam.gene=case.fam}

            mu = mating.freq.geno(fam.gene)

            fam.imp<-imputation.geno(fam.gene,mu)

            if (n.env>0) {

                        fam = cbind(fam[,1:2], case.fam[,(3:(2+n.env))], fam.imp[,-(1:2)])

            } else {fam = fam.imp}

            return(fam)

}

 

fit.model = function(case.fam,cc.ctrl=NULL, n.env=0, model=NULL, interaction=NULL){

 

## cc.ctrl: singleton cases and unrelated controls

## n.env: number of environmental covariates and 0 for no covariates.

## model: a vector of values corresponding to the genes. Each element takes values of

##        1 if additive model, 2 if dominant, 3 if recessive.

##        if model=NULL, the default is additive model

## interaction: a vector of column index (i1, i2, i3, i4..) indicating

##             interaction of i1th column and i2th column, i3th and i4th columns

##             It is Null if no interaction effects are considered.

 

 

## order case families by family id and individual id

## individual id: 1: case, 2:father, 3:mother

 

            oo = order(case.fam[,1], case.fam[,2])

            case.fam = case.fam[oo,]

 

            case.fam = as.matrix(case.fam)

            ctrl = as.matrix(cc.ctrl)

 

## create a case-control data set

            cc0 = rbind(case.fam[case.fam[,2]==1,], ctrl)

 

## track the environmental covariates indicator

           

            if (n.env>0) { env.ind = c(rep(1,n.env),rep(0,(ncol(cc0)-2-n.env)))} else

                                    { env.ind = rep(0,(ncol(cc0)-2)) }

 

## recode the genetic effects for case-control data

            ng = ncol(cc0) - 2 - n.env   # number of genes

            for (i in 1:ng) {  # The default coding is additive model

                        if (!is.null(model)) {

                                    if (model[i]==2) {

                                                cc0[,(i+2+n.env)]=ifelse(cc0[,(i+2+n.env)]>=1,1,0) #dominant

                                    } else { if (model[i]==3) {

                                                cc0[,(i+2+n.env)]=ifelse(cc0[,(i+2+n.env)]>=2,1,0)

                                                             #recessive

                                                }

                                                }

                        }

            }

 

## derive interaction effects for case-control data

            if (!is.null(interaction)) {

                        ninter = length(interaction)/2

                        for (i in 1:ninter) {

                                    i1 = interaction[(2*i-1)]; i2 = interaction[(2*i)]

                                    cc0 = cbind(cc0, cc0[,i1]*cc0[,i2])

                                    env.ind = c(env.ind, env.ind[(i1-2)]*env.ind[(i2-2)])

                        }

            }

 

 

 

## fit a logistic regression model

#          fit = glm(cc0[,2]~cc0[,-(1:2)], family=binomial(link=logit))

#            print(summary(fit))

 

## converting genotype coding from (0,1,2) to the two columns

            if (n.env > 0) {

                        temp = case.fam[,-(3:(2+n.env))] } else {temp=case.fam}

            allele= conv.trans(temp)[,-(1:2)]

            row.names(allele)=NULL

 

## deriving all pseudo-sibling genotypes

            n.fam = nrow(allele)/3  # no. of families

            geno = NULL

            for (i in 1:n.fam) {

                        geno<-rbind(geno,momdad.all(allele[(3*i-1),],allele[(3*i),]))

            }

            ng = ncol(geno)/2  ## number of genes

 

 

## repeat the number of entries for environmental covariates to match the cases

            env =NULL

            if (n.env>0) {

                        for (i in 1:n.env){

                                    env = cbind(env,rep(case.fam[case.fam[,2]==1,(i+2)],each=4^ng))

                        }

            }

 

## code the genetic effects according to "model"

            gcov = matrix(0,nrow(geno),ng)

            for (i in 1:ng) {

                        temp = as.numeric(geno[,(2*i-1)])+as.numeric(geno[,(2*i)])

                        if (is.null(model)) {

                                    gcov[,i] = temp

                        } else {

                                    if (model[i]==1) {

                                                gcov[,i] = temp

                                    } else {

                                                if (model[i]==2) {

                                                            gcov[,i]=ifelse(temp>=1,1,0) #dominant

                                                } else {

                                                            gcov[,i]=ifelse(temp>=2,1,0)   #recessive

                                                }

                                    }

                        }

            }

 

## derive interaction effects

            zcov = cbind(env,gcov)

            if (!is.null(interaction)) {

                        ninter = length(interaction)/2

                        for (i in 1:ninter) {

                                    i1 = interaction[(2*i-1)]; i2 = interaction[(2*i)]

                                    zcov = cbind(zcov, zcov[,(i1-2)]*zcov[,(i2-2)])

                        }

            }

 

## obtaining estimates

 

            famid=rep(unique(case.fam[,1]),each=4^ng)

            nfam=length(unique(case.fam[,1]))

            aff.ind=rep(c(1,rep(0,4^ng-1)), times=nfam)

 

            npar = ncol(zcov) + 1           # the addition of 1 is for intercept

            par.init = rep(0,npar)

            par.est = matrix(0,3,npar)           # 1st row is conditional lk est

                                                            # 2nd row is cc est

                                                            # 3rd row is combined est

            se.est = matrix(0,3,npar)       # the se correspond to par.est

 

### conditional likelihood estimate

            fam.cond = cbind(famid,famid,aff.ind,zcov)  #the famid was repeated twice

                                                                            # to fit in the old cond. like

            par.ind = (2:npar)[env.ind==0]

            indx = c(1:3,par.ind+2)

            par.est[1,par.ind] = nlminb(par.init[par.ind],condlik.reduced,

                        fam=fam.cond[,indx])$par

            se.est[1,]<-var.mat(par.est[1,],fam=fam.cond,cc0,env.ind)[1,]

 

### case-control estimate

            par.est[2,] = nlminb(par.init,cclik,cc=cc0)$par

            se.est[2,] = var.mat(par.est[2,],fam.cond,cc0,env.ind)[2,]

 

### combined estimate

            par.est[3,] = nlminb(par.init,comb.reduced, fam=fam.cond,cc=cc0,env.ind=env.ind)$par

            se.est[3,] = var.mat(par.est[3,],fam.cond,cc0,env.ind)[3,]

 

            return(list(est=par.est,se=se.est))

}