Skip to content

Commit 13f7a1e

Browse files
committed
hotfix ANOVA interactions
include interactions in outputs
1 parent 6758b3b commit 13f7a1e

File tree

3 files changed

+146
-140
lines changed

3 files changed

+146
-140
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: structToolbox
22
Type: Package
33
Title: Data processing & analysis tools for Metabolomics and other omics
4-
Version: 1.18.0
4+
Version: 1.18.1
55
Authors@R: c(
66
person(
77
c("Gavin","Rhys"),

NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
Changes 1.81.1
2+
+ fix missing interactions for ANOVA outputs
3+
14
Changes 1.15.2
25
+ fix ANOVA type 1
36
+ add control_group param to ttest

R/anova_class.R

Lines changed: 142 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99
#' @export ANOVA
1010
ANOVA = function(alpha=0.05,mtc='fdr',formula,ss_type='III',...) {
1111
out=struct::new_struct('ANOVA',
12-
alpha=alpha,
13-
mtc=mtc,
14-
formula=formula,
15-
ss_type=ss_type,
16-
...)
12+
alpha=alpha,
13+
mtc=mtc,
14+
formula=formula,
15+
ss_type=ss_type,
16+
...)
1717
return(out)
1818
}
1919

@@ -33,147 +33,150 @@ ANOVA = function(alpha=0.05,mtc='fdr',formula,ss_type='III',...) {
3333
significant='entity'
3434
),
3535
prototype = list(name='Analysis of Variance',
36-
description=paste0(
37-
"Analysis of Variance (ANOVA) is a univariate method used to ",
38-
"analyse the difference among ",
39-
"group means. Multiple test corrected p-values are computed to ",
40-
"indicate significance for each feature."),
41-
type="univariate",
42-
predicted='p_value',
43-
ontology="OBI:0200201",
44-
libraries='car',
45-
.params=c('alpha','mtc','formula','ss_type'),
46-
.outputs=c('f_statistic','p_value','significant'),
47-
48-
alpha=ents$alpha,
49-
mtc=ents$mtc,
50-
formula=ents$formula,
51-
52-
ss_type=enum(name='ANOVA sum of squares',
53-
description=c(
54-
'I' = 'Type I sum of squares.',
55-
'II' = 'Type II sum of squares.',
56-
'III' = 'Type III sum of squares.'
57-
),
58-
value='III',
59-
type='character',
60-
allowed=c('I','II','III')
61-
),
62-
63-
f_statistic=ents$f_statistic,
64-
p_value=ents$p_value,
65-
significant=ents$significant
36+
description=paste0(
37+
"Analysis of Variance (ANOVA) is a univariate method used to ",
38+
"analyse the difference among ",
39+
"group means. Multiple test corrected p-values are computed to ",
40+
"indicate significance for each feature."),
41+
type="univariate",
42+
predicted='p_value',
43+
ontology="OBI:0200201",
44+
libraries='car',
45+
.params=c('alpha','mtc','formula','ss_type'),
46+
.outputs=c('f_statistic','p_value','significant'),
47+
48+
alpha=ents$alpha,
49+
mtc=ents$mtc,
50+
formula=ents$formula,
51+
52+
ss_type=enum(name='ANOVA sum of squares',
53+
description=c(
54+
'I' = 'Type I sum of squares.',
55+
'II' = 'Type II sum of squares.',
56+
'III' = 'Type III sum of squares.'
57+
),
58+
value='III',
59+
type='character',
60+
allowed=c('I','II','III')
61+
),
62+
63+
f_statistic=ents$f_statistic,
64+
p_value=ents$p_value,
65+
significant=ents$significant
6666
)
6767
)
6868

6969

7070
#' @export
7171
#' @template model_apply
7272
setMethod(f="model_apply",
73-
signature=c("ANOVA",'DatasetExperiment'),
74-
definition=function(M,D)
75-
{
76-
X=D$data
77-
var_names=all.vars(M$formula)
78-
79-
# attempt to detect within factors
80-
within=which(var_names %in% all.names(M$formula)[which('Error'== all.names(M$formula))+2])
81-
82-
if (length(within)>0) {
83-
var_names_ex=var_names[-within]
84-
} else {
85-
var_names_ex=var_names
86-
}
87-
88-
y=D$sample_meta[var_names[2:length(var_names)]]
89-
90-
# set the contrasts
91-
O=options('contrasts') # keep the old ones
92-
options(contrasts = c("contr.sum","contr.poly"))
93-
94-
output=apply(X,2,function(x) {
95-
temp=y
96-
temp[[var_names[1]]]=x
97-
98-
# check number levels
99-
temp2=na.omit(temp)
100-
s=sapply(var_names_ex[2:length(var_names)],function(x) summary(temp2[[x]]))
101-
n=nrow(temp2)
102-
103-
# check for alias columns
104-
dona=FALSE
105-
if (all(unlist(s)>2)) { # check we have enough levels
106-
temp3=temp[,var_names_ex] # ignore within factors
107-
al=alias(M$formula,temp3) # check we have independent columns
108-
if ('Complete' %in% names(al)) {
109-
dona=TRUE
110-
}
111-
} else {
112-
dona=TRUE
113-
}
114-
115-
# check for enough samples
116-
temp3=temp
117-
temp3[[var_names[1]]]=rnorm(nrow(y))
118-
LM=lm(formula=M$formula,data=temp3)
119-
p=nrow(summary(LM)$coefficients)
120-
if (n<(p+1)) {
121-
dona=TRUE
122-
}
123-
124-
if (dona) {
125-
# missing values have probably prevented one or more combinations of levels being present
126-
# use some fake data to generate the output table then replace all the values with NA
127-
temp[[var_names[1]]]=rnorm(nrow(y))
128-
LM=lm(formula=M$formula,data=temp)
129-
if (M$ss_type=='I') {
130-
A=anova(LM)
131-
} else {
132-
A=car::Anova(LM,type=M$ss_type)
133-
}
134-
A[!is.na(A)]=NA
135-
return(A)
136-
}
137-
138-
LM=lm(formula=M$formula,data=temp)
139-
if (M$ss_type=='I') {
140-
A=anova(LM)
141-
} else {
142-
A=car::Anova(LM,type=M$ss_type)
143-
}
144-
return(A)
145-
})
146-
147-
f_statistic=sapply(output,function(x){
148-
x$`F value`
149-
})
150-
f_statistic=as.data.frame(t(f_statistic))
151-
colnames(f_statistic)=rownames(output[[1]])
152-
f_statistic=f_statistic[,colnames(y),drop=FALSE]
153-
154-
155-
p_value=sapply(output,function(x){
156-
x$`Pr(>F)`
157-
})
158-
p_value=as.data.frame(t(p_value))
159-
colnames(p_value)=rownames(output[[1]])
160-
p_value=p_value[,colnames(y),drop=FALSE]
161-
162-
# fdr correct the p.values
163-
for (k in 1:ncol(p_value)) {
164-
p_value[, k]=p.adjust(p_value[ ,k],M$mtc)
165-
}
166-
167-
# populate the object
168-
M$f_statistic=f_statistic
169-
M$p_value=p_value
170-
M$significant=as.data.frame(p_value<M$alpha)
171-
172-
# reset the contrasts
173-
options(O)
174-
175-
return(M)
176-
}
73+
signature=c("ANOVA",'DatasetExperiment'),
74+
definition=function(M,D)
75+
{
76+
X=D$data
77+
var_names=all.vars(M$formula)
78+
79+
# attempt to detect within factors
80+
within=which(var_names %in% all.names(M$formula)[which('Error'== all.names(M$formula))+2])
81+
82+
if (length(within)>0) {
83+
var_names_ex=var_names[-within]
84+
} else {
85+
var_names_ex=var_names
86+
}
87+
88+
y=D$sample_meta[var_names[2:length(var_names)]]
89+
90+
# set the contrasts
91+
O=options('contrasts') # keep the old ones
92+
options(contrasts = c("contr.sum","contr.poly"))
93+
94+
output=apply(X,2,function(x) {
95+
temp=y
96+
temp[[var_names[1]]]=x
97+
98+
# check number levels
99+
temp2=na.omit(temp)
100+
s=sapply(var_names_ex[2:length(var_names)],function(x) summary(temp2[[x]]))
101+
n=nrow(temp2)
102+
103+
# check for alias columns
104+
dona=FALSE
105+
if (all(unlist(s)>2)) { # check we have enough levels
106+
temp3=temp[,var_names_ex] # ignore within factors
107+
al=alias(M$formula,temp3) # check we have independent columns
108+
if ('Complete' %in% names(al)) {
109+
dona=TRUE
110+
}
111+
} else {
112+
dona=TRUE
113+
}
114+
115+
# check for enough samples
116+
temp3=temp
117+
temp3[[var_names[1]]]=rnorm(nrow(y))
118+
LM=lm(formula=M$formula,data=temp3)
119+
p=nrow(summary(LM)$coefficients)
120+
if (n<(p+1)) {
121+
dona=TRUE
122+
}
123+
124+
if (dona) {
125+
# missing values have probably prevented one or more combinations of levels being present
126+
# use some fake data to generate the output table then replace all the values with NA
127+
temp[[var_names[1]]]=rnorm(nrow(y))
128+
LM=lm(formula=M$formula,data=temp)
129+
if (M$ss_type=='I') {
130+
A=anova(LM)
131+
} else {
132+
A=car::Anova(LM,type=M$ss_type)
133+
}
134+
A[!is.na(A)]=NA
135+
return(A)
136+
}
137+
138+
LM=lm(formula=M$formula,data=temp)
139+
if (M$ss_type=='I') {
140+
A=anova(LM)
141+
} else {
142+
A=car::Anova(LM,type=M$ss_type)
143+
}
144+
return(A)
145+
})
146+
147+
out_cols=terms(M$formula)
148+
out_cols=attributes(out_cols)$term.labels
149+
150+
f_statistic=sapply(output,function(x){
151+
x$`F value`
152+
})
153+
f_statistic=as.data.frame(t(f_statistic))
154+
colnames(f_statistic)=rownames(output[[1]])
155+
f_statistic=f_statistic[,out_cols,drop=FALSE]
156+
157+
158+
p_value=sapply(output,function(x){
159+
x$`Pr(>F)`
160+
})
161+
p_value=as.data.frame(t(p_value))
162+
colnames(p_value)=rownames(output[[1]])
163+
p_value=p_value[,out_cols,drop=FALSE]
164+
165+
# fdr correct the p.values
166+
for (k in 1:ncol(p_value)) {
167+
p_value[, k]=p.adjust(p_value[ ,k],M$mtc)
168+
}
169+
170+
# populate the object
171+
M$f_statistic=f_statistic
172+
M$p_value=p_value
173+
M$significant=as.data.frame(p_value<M$alpha)
174+
175+
# reset the contrasts
176+
options(O)
177+
178+
return(M)
179+
}
177180
)
178181

179182

0 commit comments

Comments
 (0)