hotfix ANOVA interactions

grlloyd · grlloyd · commit 13f7a1e550f2 · 2025-01-28T09:47:09.000Z
include interactions in outputs
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: structToolbox
 Type: Package
 Title: Data processing & analysis tools for Metabolomics and other omics
-Version: 1.18.0
+Version: 1.18.1
 Authors@R: c(
     person(
         c("Gavin","Rhys"),
diff --git a/NEWS b/NEWS
@@ -1,3 +1,6 @@
+Changes 1.81.1
++ fix missing interactions for ANOVA outputs
+
 Changes 1.15.2
 + fix ANOVA type 1
 + add control_group param to ttest
diff --git a/R/anova_class.R b/R/anova_class.R
@@ -9,11 +9,11 @@
 #' @export ANOVA
 ANOVA = function(alpha=0.05,mtc='fdr',formula,ss_type='III',...) {
     out=struct::new_struct('ANOVA',
-        alpha=alpha,
-        mtc=mtc,
-        formula=formula,
-        ss_type=ss_type,
-        ...)
+                           alpha=alpha,
+                           mtc=mtc,
+                           formula=formula,
+                           ss_type=ss_type,
+                           ...)
     return(out)
 }
 
@@ -33,147 +33,150 @@ ANOVA = function(alpha=0.05,mtc='fdr',formula,ss_type='III',...) {
         significant='entity'
     ),
     prototype = list(name='Analysis of Variance',
-        description=paste0(
-            "Analysis of Variance (ANOVA) is a univariate method used to ",
-            "analyse the difference among ",
-            "group means. Multiple test corrected p-values are computed to ",
-            "indicate significance for each feature."),
-        type="univariate",
-        predicted='p_value',
-        ontology="OBI:0200201",
-        libraries='car',
-        .params=c('alpha','mtc','formula','ss_type'),
-        .outputs=c('f_statistic','p_value','significant'),
-
-        alpha=ents$alpha,
-        mtc=ents$mtc,
-        formula=ents$formula,
-
-        ss_type=enum(name='ANOVA sum of squares',
-            description=c(
-                'I' = 'Type I sum of squares.',
-                'II' = 'Type II sum of squares.',
-                'III' = 'Type III sum of squares.'
-            ),
-            value='III',
-            type='character',
-            allowed=c('I','II','III')
-        ),
-
-        f_statistic=ents$f_statistic,
-        p_value=ents$p_value,
-        significant=ents$significant
+                     description=paste0(
+                         "Analysis of Variance (ANOVA) is a univariate method used to ",
+                         "analyse the difference among ",
+                         "group means. Multiple test corrected p-values are computed to ",
+                         "indicate significance for each feature."),
+                     type="univariate",
+                     predicted='p_value',
+                     ontology="OBI:0200201",
+                     libraries='car',
+                     .params=c('alpha','mtc','formula','ss_type'),
+                     .outputs=c('f_statistic','p_value','significant'),
+                     
+                     alpha=ents$alpha,
+                     mtc=ents$mtc,
+                     formula=ents$formula,
+                     
+                     ss_type=enum(name='ANOVA sum of squares',
+                                  description=c(
+                                      'I' = 'Type I sum of squares.',
+                                      'II' = 'Type II sum of squares.',
+                                      'III' = 'Type III sum of squares.'
+                                  ),
+                                  value='III',
+                                  type='character',
+                                  allowed=c('I','II','III')
+                     ),
+                     
+                     f_statistic=ents$f_statistic,
+                     p_value=ents$p_value,
+                     significant=ents$significant
     )
 )
 
 
 #' @export
 #' @template model_apply
 setMethod(f="model_apply",
-    signature=c("ANOVA",'DatasetExperiment'),
-    definition=function(M,D)
-    {
-        X=D$data
-        var_names=all.vars(M$formula)
-        
-        # attempt to detect within factors
-        within=which(var_names %in% all.names(M$formula)[which('Error'== all.names(M$formula))+2])
-        
-        if (length(within)>0) {
-            var_names_ex=var_names[-within]
-        } else {
-            var_names_ex=var_names
-        }
-        
-        y=D$sample_meta[var_names[2:length(var_names)]]
-        
-        # set the contrasts
-        O=options('contrasts') # keep the old ones
-        options(contrasts = c("contr.sum","contr.poly"))
-        
-        output=apply(X,2,function(x) {
-            temp=y
-            temp[[var_names[1]]]=x
-            
-            # check number levels
-            temp2=na.omit(temp)
-            s=sapply(var_names_ex[2:length(var_names)],function(x) summary(temp2[[x]]))
-            n=nrow(temp2)
-            
-            # check for alias columns
-            dona=FALSE
-            if (all(unlist(s)>2)) { # check we have enough levels
-                temp3=temp[,var_names_ex] # ignore within factors
-                al=alias(M$formula,temp3) # check we have independent columns
-                if ('Complete' %in% names(al)) {
-                    dona=TRUE
-                }
-            } else {
-                dona=TRUE
-            }
-            
-            # check for enough samples
-            temp3=temp
-            temp3[[var_names[1]]]=rnorm(nrow(y))
-            LM=lm(formula=M$formula,data=temp3)
-            p=nrow(summary(LM)$coefficients)
-            if (n<(p+1)) {
-                dona=TRUE
-            }
-            
-            if (dona) {
-                # missing values have probably prevented one or more combinations of levels being present
-                # use some fake data to generate the output table then replace all the values with NA
-                temp[[var_names[1]]]=rnorm(nrow(y))
-                LM=lm(formula=M$formula,data=temp)
-                if (M$ss_type=='I') {
-                    A=anova(LM)  
-                } else {
-                    A=car::Anova(LM,type=M$ss_type)
-                }
-                A[!is.na(A)]=NA
-                return(A)
-            }
-            
-            LM=lm(formula=M$formula,data=temp)
-            if (M$ss_type=='I') {
-                A=anova(LM)  
-            } else {
-                A=car::Anova(LM,type=M$ss_type)
-            }
-            return(A)
-        })
-        
-        f_statistic=sapply(output,function(x){
-            x$`F value`
-        })
-        f_statistic=as.data.frame(t(f_statistic))
-        colnames(f_statistic)=rownames(output[[1]])
-        f_statistic=f_statistic[,colnames(y),drop=FALSE]
-        
-        
-        p_value=sapply(output,function(x){
-            x$`Pr(>F)`
-        })
-        p_value=as.data.frame(t(p_value))
-        colnames(p_value)=rownames(output[[1]])
-        p_value=p_value[,colnames(y),drop=FALSE]
-        
-        # fdr correct the p.values
-        for (k in 1:ncol(p_value)) {
-            p_value[, k]=p.adjust(p_value[ ,k],M$mtc)
-        }
-        
-        # populate the object
-        M$f_statistic=f_statistic
-        M$p_value=p_value
-        M$significant=as.data.frame(p_value<M$alpha)
-        
-        # reset the contrasts
-        options(O)
-        
-        return(M)
-    }
+          signature=c("ANOVA",'DatasetExperiment'),
+          definition=function(M,D)
+          {
+              X=D$data
+              var_names=all.vars(M$formula)
+              
+              # attempt to detect within factors
+              within=which(var_names %in% all.names(M$formula)[which('Error'== all.names(M$formula))+2])
+              
+              if (length(within)>0) {
+                  var_names_ex=var_names[-within]
+              } else {
+                  var_names_ex=var_names
+              }
+              
+              y=D$sample_meta[var_names[2:length(var_names)]]
+              
+              # set the contrasts
+              O=options('contrasts') # keep the old ones
+              options(contrasts = c("contr.sum","contr.poly"))
+              
+              output=apply(X,2,function(x) {
+                  temp=y
+                  temp[[var_names[1]]]=x
+                  
+                  # check number levels
+                  temp2=na.omit(temp)
+                  s=sapply(var_names_ex[2:length(var_names)],function(x) summary(temp2[[x]]))
+                  n=nrow(temp2)
+                  
+                  # check for alias columns
+                  dona=FALSE
+                  if (all(unlist(s)>2)) { # check we have enough levels
+                      temp3=temp[,var_names_ex] # ignore within factors
+                      al=alias(M$formula,temp3) # check we have independent columns
+                      if ('Complete' %in% names(al)) {
+                          dona=TRUE
+                      }
+                  } else {
+                      dona=TRUE
+                  }
+                  
+                  # check for enough samples
+                  temp3=temp
+                  temp3[[var_names[1]]]=rnorm(nrow(y))
+                  LM=lm(formula=M$formula,data=temp3)
+                  p=nrow(summary(LM)$coefficients)
+                  if (n<(p+1)) {
+                      dona=TRUE
+                  }
+                  
+                  if (dona) {
+                      # missing values have probably prevented one or more combinations of levels being present
+                      # use some fake data to generate the output table then replace all the values with NA
+                      temp[[var_names[1]]]=rnorm(nrow(y))
+                      LM=lm(formula=M$formula,data=temp)
+                      if (M$ss_type=='I') {
+                          A=anova(LM)  
+                      } else {
+                          A=car::Anova(LM,type=M$ss_type)
+                      }
+                      A[!is.na(A)]=NA
+                      return(A)
+                  }
+                  
+                  LM=lm(formula=M$formula,data=temp)
+                  if (M$ss_type=='I') {
+                      A=anova(LM)  
+                  } else {
+                      A=car::Anova(LM,type=M$ss_type)
+                  }
+                  return(A)
+              })
+              
+              out_cols=terms(M$formula)
+              out_cols=attributes(out_cols)$term.labels
+              
+              f_statistic=sapply(output,function(x){
+                  x$`F value`
+              })
+              f_statistic=as.data.frame(t(f_statistic))
+              colnames(f_statistic)=rownames(output[[1]])
+              f_statistic=f_statistic[,out_cols,drop=FALSE]
+              
+              
+              p_value=sapply(output,function(x){
+                  x$`Pr(>F)`
+              })
+              p_value=as.data.frame(t(p_value))
+              colnames(p_value)=rownames(output[[1]])
+              p_value=p_value[,out_cols,drop=FALSE]
+              
+              # fdr correct the p.values
+              for (k in 1:ncol(p_value)) {
+                  p_value[, k]=p.adjust(p_value[ ,k],M$mtc)
+              }
+              
+              # populate the object
+              M$f_statistic=f_statistic
+              M$p_value=p_value
+              M$significant=as.data.frame(p_value<M$alpha)
+              
+              # reset the contrasts
+              options(O)
+              
+              return(M)
+          }
 )