Title: | Causal Generalized Linear Models |
Version: | 0.1.0 |
Description: | An implementation of methods for causal discovery in a structural causal model where the conditional distribution of the target node is described by a generalized linear model conditional on its causal parents. |
License: | GPL-3 |
Encoding: | UTF-8 |
RoxygenNote: | 7.2.3 |
Imports: | mgcv |
NeedsCompilation: | no |
Packaged: | 2025-03-10 16:46:51 UTC; v_vin |
Author: | Veronica Vinciotti [aut, cre], Ernst C. Wit [aut] |
Maintainer: | Veronica Vinciotti <veronica.vinciotti@unitn.it> |
Repository: | CRAN |
Date/Publication: | 2025-03-12 17:30:20 UTC |
Causal generalized additive model
Description
This function does a search for a causal submodel within the generalized additive model provided.
Usage
cgam(
formula,
family,
data,
alpha = 0.05,
pval.approx = FALSE,
B = 100,
seed = 1,
search = c("all", "stepwise"),
...
)
Arguments
formula |
A formula object. |
family |
A distributional family object. Currently supported options are: binomial and poisson. |
data |
A data frame containing the variables in the model. |
alpha |
Significance level for statistical test. |
pval.approx |
If TRUE, chi-squared approximated p-values are calculated. Default is FALSE, in which case p-values are calculated via bootstrap. |
B |
Number of bootstrap sample when pval.approx=FALSE. |
seed |
Seed for generating bootstrap samples. |
search |
If search="stepwise", a greedy forward stepwise search is conducted. Default is search="all", in which case all possible submodels are considered. |
... |
Further arguments to be passed to the gam function. |
Value
A gam object of the selected causal submodel.
Examples
##############################
#causal Poisson gam##########
n<-1000
set.seed(123)
X1<-rnorm(n,0,1)
Y<-rpois(n,exp(sin(X1)))
X2<-log(Y+1)+rnorm(n,0,0.5)
data<-data.frame(cbind(X1, X2, Y))
cm_all<-cgam(Y ~ s(X1)+s(X2),"poisson",data,pval.approx=TRUE,search="all")
cm_all$model.opt
cm_step<-cgam(Y ~ s(X1)+s(X2),"poisson",data,pval.approx=TRUE,search="stepwise")
cm_step$model.opt
#bigger simulation with 7 covariates
set.seed(123)
n<-1000
X1<-rnorm(n=n,sd=sqrt(0.04))
X2<-X1+rnorm(n=n,sd=sqrt(0.04))
X3<-X1+X2+rnorm(n=n,sd=sqrt(0.04))
m<-sin(X2*5)+X3^3
Z<-m+rnorm(n=n,sd=sqrt(0.04))
X4<-X2+rnorm(n=n,sd=sqrt(0.04))
X5<-Z+rnorm(n=n,sd=sqrt(0.04))
X6<-Z+rnorm(n=n,sd=sqrt(0.04))
X7<-X6+rnorm(n=n,sd=sqrt(0.04))
Y<-qpois(pnorm(Z, mean = m, sd = sqrt(0.04)), lambda=exp(m))
dat<-data.frame(cbind(X1, X2, X3, X4, X5, X6, X7,Y))
fml<- Y~s(X1)+s(X2)+s(X3)+s(X4)+s(X5)+s(X6)+s(X7)
mod.all <-cgam(fml,"poisson",dat,pval.approx=TRUE,search="all")
mod.all$model.opt
mod.step <-cgam(fml,"poisson",dat,pval.approx=TRUE,search="stepwise")
mod.step$model.opt
####################################
#causal logistic gam################
n<-1000
set.seed(123)
X1<-rnorm(n,0,1)
Y<-rbinom(n,1,exp(X1)/(1+exp(X1)))
flip<-rbinom(n,1,0.1)
X2<-(1-flip)*Y+rnorm(n,0,0.3)
data<-data.frame(cbind(X1, X2, Y))
cm_all<-cgam(Y ~ s(X1)+s(X2),"binomial",data,pval.approx=FALSE,search="all")
cm_all$model.opt
cm_step<-cgam(Y ~ s(X1)+s(X2),"binomial",data,pval.approx=FALSE,search="stepwise")
cm_step$model.opt
Causal generalized linear model
Description
This function does a search for a causal submodel within the generalized linear model provided.
Usage
cglm(
formula,
family,
data,
alpha = 0.05,
pval.approx = FALSE,
B = 100,
seed = 1,
search = c("all", "stepwise"),
...
)
Arguments
formula |
A formula object. |
family |
A distributional family object. Currently supported options are: binomial and poisson. |
data |
A data frame containing the variables in the model. |
alpha |
Significance level for statistical test |
pval.approx |
If TRUE, chi-squared approximated p-values are calculated. Default is FALSE, in which case p-values are calculated via bootstrap. |
B |
Number of bootstrap sample when pval.approx=FALSE. |
seed |
Seed for generating bootstrap samples. |
search |
If search="stepwise", a greedy forward stepwise search is conducted. Default is search="all", in which case all possible submodels are considered. |
... |
Further arguments to be passed to the glm function. |
Value
A glm object of the selected causal submodel.
Examples
###################################
#causal Poisson glm#################
n<-1000
set.seed(123)
X1<-rnorm(n,0,1)
Y<-rpois(n,exp(X1))
X2<-log(Y+1)+rnorm(n,0,0.3)
data<-data.frame(cbind(X1, X2, Y))
cm_all<-cglm(Y ~ X1+X2,"poisson",data,pval.approx=TRUE,search="all")
cm_all$model.opt
cm_step<-cglm(Y ~ X1+X2,"poisson",data,pval.approx=TRUE,search="stepwise")
cm_step$model.opt
##########################
#causal logistic glm#######
n<-2000
set.seed(123)
X1<-rnorm(n,0,1)
Y<-rbinom(n,1,exp(X1)/(1+exp(X1)))
flip<-rbinom(n,1,0.1)
X2<-(1-flip)*Y+rnorm(n,0,0.3)
data<-data.frame(cbind(X1, X2, Y))
cm_all<-cglm(Y ~ X1+X2,"binomial",data,pval.approx=FALSE,search="all")
cm_all$model.opt
cm_step<-cglm(Y ~ X1+X2,"binomial",data,pval.approx=FALSE,search="stepwise")
cm_step$model.opt
#bigger simulation with 5 covariates
set.seed(12)
n<-3000
X1<-rnorm(n,0,1)
X2<-rnorm(n,X1,0.5)
X3<-rnorm(n,0,1)
X4<-rnorm(n,X2,.5)
Y<-rbinom(n,1,exp(.8*X2-.9*X3)/(1+exp(.8*X2-.9*X3)))
flip<-rbinom(n,1,0.1)
X5<-(1-flip)*Y+flip*(1-Y)+rnorm(n,0,.3)
dat<-data.frame(cbind(X1, X2, X3, X4, X5,Y))
mod.all <-cglm(Y~X1+X2+X3+X4+X5,"binomial",dat,pval.approx=FALSE,search="all")
mod.all$model.opt
mod.step <-cglm(Y~X1+X2+X3+X4+X5,"binomial",dat,pval.approx=FALSE,search="stepwise")
mod.step$model.opt