V3_Benchmarking_Ablation.Rmd
data(Golub_Merge, package = 'golubEsets') # ExpressionSet
smallG<-Golub_Merge[200:259,]
smallG
#> ExpressionSet (storageMode: lockedEnvironment)
#> assayData: 60 features, 72 samples
#> element names: exprs
#> protocolData: none
#> phenoData
#> sampleNames: 39 40 ... 33 (72 total)
#> varLabels: Samples ALL.AML ... Source (11 total)
#> varMetadata: labelDescription
#> featureData: none
#> experimentData: use 'experimentData(object)'
#> pubMedIds: 10521349
#> Annotation: hu6800
library(SummarizedExperiment)
#> Loading required package: GenomicRanges
#> Loading required package: stats4
#> Loading required package: BiocGenerics
#> Loading required package: parallel
#>
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:parallel':
#>
#> clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
#> clusterExport, clusterMap, parApply, parCapply, parLapply,
#> parLapplyLB, parRapply, parSapply, parSapplyLB
#> The following object is masked from 'package:limma':
#>
#> plotMA
#> The following objects are masked from 'package:dplyr':
#>
#> combine, intersect, setdiff, union
#> The following objects are masked from 'package:stats':
#>
#> IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#>
#> anyDuplicated, append, as.data.frame, basename, cbind,
#> colMeans, colnames, colSums, dirname, do.call, duplicated,
#> eval, evalq, Filter, Find, get, grep, grepl, intersect,
#> is.unsorted, lapply, lengths, Map, mapply, match, mget, order,
#> paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind,
#> Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort,
#> table, tapply, union, unique, unsplit, which, which.max,
#> which.min
#> Loading required package: S4Vectors
#>
#> Attaching package: 'S4Vectors'
#> The following object is masked from 'package:ParamHelpers':
#>
#> isEmpty
#> The following objects are masked from 'package:dplyr':
#>
#> first, rename
#> The following object is masked from 'package:base':
#>
#> expand.grid
#> Loading required package: IRanges
#>
#> Attaching package: 'IRanges'
#> The following object is masked from 'package:purrr':
#>
#> reduce
#> The following objects are masked from 'package:dplyr':
#>
#> collapse, desc, slice
#> The following object is masked from 'package:grDevices':
#>
#> windows
#> Loading required package: GenomeInfoDb
#> Loading required package: Biobase
#> Welcome to Bioconductor
#>
#> Vignettes contain introductory material; view with
#> 'browseVignettes()'. To cite Bioconductor, see
#> 'citation("Biobase")', and for packages 'citation("pkgname")'.
#> Loading required package: DelayedArray
#> Loading required package: matrixStats
#>
#> Attaching package: 'matrixStats'
#> The following objects are masked from 'package:Biobase':
#>
#> anyMissing, rowMedians
#> The following object is masked from 'package:dplyr':
#>
#> count
#> Loading required package: BiocParallel
#>
#> Attaching package: 'DelayedArray'
#> The following objects are masked from 'package:matrixStats':
#>
#> colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
#> The following object is masked from 'package:purrr':
#>
#> simplify
#> The following objects are masked from 'package:base':
#>
#> aperm, apply
smallG_SE<-makeSummarizedExperimentFromExpressionSet(smallG)
smallG_SE # from vignette V1
#> class: RangedSummarizedExperiment
#> dim: 60 72
#> metadata(3): experimentData annotation protocolData
#> assays(1): exprs
#> rownames(60): D13627_at D13628_at ... D16350_at D16469_at
#> rowData names(0):
#> colnames(72): 39 40 ... 32 33
#> colData names(11): Samples ALL.AML ... PS Source
top_DE_analytes_present<-5
# functional:
task_SE_Functional<-Fun_SE_to_taskFunc(smallG_SE, param.Y.name = 'ALL.AML', param.covariates = NULL, param_positive_y_level = 'ALL', task_return_format = 'functional', task_type = 'classif') ## will work with either 1 or multiple assayS
task_SE_Functional
#> Supervised task: DF_functionals
#> Type: classif
#> Target: ALL.AML
#> Observations: 72
#> Features:
#> numerics factors ordered functionals
#> 0 0 0 1
#> Missings: FALSE
#> Has weights: FALSE
#> Has blocking: FALSE
#> Has coordinates: FALSE
#> Classes: 2
#> ALL AML
#> 47 25
#> Positive class: ALL
# non-functional:
## 1. directly, but into DF
extracted_DF_from_task_SE<-getTaskData(task_SE_Functional, functionals.as = "dfcols") # keep matrix
#> Functional features have been converted to numerics
extracted_DF_from_task_SE %>% str
#> 'data.frame': 72 obs. of 61 variables:
#> $ ALL.AML : Factor w/ 2 levels "ALL","AML": 1 1 1 1 1 1 1 1 1 1 ...
#> $ exprs.D13627_at : num 330 544 978 1035 3895 ...
#> $ exprs.D13628_at : num 229 147 110 237 106 256 144 84 -7 -3 ...
#> $ exprs.D13630_at : num 359 289 609 485 866 663 673 401 480 273 ...
#> $ exprs.D13633_at : num -9 57 207 302 475 0 112 257 244 252 ...
#> $ exprs.D13634_at : num 115 248 91 58 244 245 98 182 186 241 ...
#> $ exprs.D13635_at : num 31 -43 40 31 84 -159 -7 -2 62 111 ...
#> $ exprs.D13636_at : num 195 23 -60 317 449 -262 386 295 177 51 ...
#> $ exprs.D13637_at : num 161 137 -94 -96 432 -535 136 86 99 143 ...
#> $ exprs.D13639_at : num 456 3336 655 2771 3575 ...
#> $ exprs.D13640_at : num 1105 1204 1751 1008 932 ...
#> $ exprs.D13641_at : num 760 458 793 863 698 477 818 322 606 756 ...
#> $ exprs.D13642_at : num 272 150 6 44 134 173 36 302 -148 38 ...
#> $ exprs.D13643_at : num -1484 -1300 -298 -176 -558 ...
#> $ exprs.D13644_at : num 125 57 12 136 78 178 26 2 -29 -37 ...
#> $ exprs.D13645_at : num -53 131 -183 132 91 -374 176 9 -346 -98 ...
#> $ exprs.D13748_at : num 3225 4583 4543 6713 6817 ...
#> $ exprs.D13789_at : num -1303 -781 -453 -687 -353 ...
#> $ exprs.D13897_rna2_at: num 569 1267 146 619 444 ...
#> $ exprs.D13900_at : num 2293 1077 1809 1607 3233 ...
#> $ exprs.D13969_at : num -135 -373 -297 27 -36 -373 -37 -329 -211 -195 ...
#> $ exprs.D13988_at : num 1479 1264 1218 983 1261 ...
#> $ exprs.D14043_at : num 231 1427 1093 2927 3479 ...
#> $ exprs.D14134_at : num -130 -136 -132 -142 -170 -175 -25 -84 -14 -110 ...
#> $ exprs.D14446_at : num 205 127 225 189 239 166 29 141 -34 61 ...
#> $ exprs.D14497_at : num -45 57 5 -2 -11 4 46 -26 -91 -99 ...
#> $ exprs.D14520_at : num 65 -121 -223 48 -33 142 -28 153 -94 -35 ...
#> $ exprs.D14530_at : num 13361 16673 16061 19362 21519 ...
#> $ exprs.D14533_at : num -410 -300 -82 -84 -66 -227 -97 122 -69 -75 ...
#> $ exprs.D14657_at : num 436 412 1541 1666 4514 ...
#> $ exprs.D14658_at : num 444 828 583 1199 2177 ...
#> $ exprs.D14659_at : num -12 74 76 0 114 -9 67 28 2 -63 ...
#> $ exprs.D14660_at : num -82 -36 12 2 144 -51 104 40 28 -40 ...
#> $ exprs.D14661_at : num 400 272 1100 327 805 317 355 324 555 133 ...
#> $ exprs.D14662_at : num 417 863 494 540 1553 ...
#> $ exprs.D14663_at : num 598 202 720 401 1039 ...
#> $ exprs.D14664_at : num 101 74 1 302 140 -132 84 62 -74 18 ...
#> $ exprs.D14678_at : num 50 -553 2 172 -59 -121 252 81 189 -100 ...
#> $ exprs.D14686_at : num 354 464 692 445 465 531 212 177 312 111 ...
#> $ exprs.D14689_at : num 557 436 447 498 925 ...
#> $ exprs.D14694_at : num 1431 1155 1569 2006 1883 ...
#> $ exprs.D14695_at : num 296 206 330 487 319 532 114 175 114 442 ...
#> $ exprs.D14710_at : num 2920 3187 3358 5148 6462 ...
#> $ exprs.D14811_at : num 238 323 315 214 422 616 316 230 -17 499 ...
#> $ exprs.D14812_at : num 2404 1305 2553 3610 6604 ...
#> $ exprs.D14822_at : num 0 48 27 33 -13 64 32 -12 -69 -37 ...
#> $ exprs.D14823_at : num 36 36 -2 12 -2 -64 28 31 133 174 ...
#> $ exprs.D14827_at : num -325 -38 239 63 170 -8 -39 0 -250 -309 ...
#> $ exprs.D14838_at : num -79 -45 31 -67 -29 41 -111 -9 -57 94 ...
#> $ exprs.D14874_at : num 164 187 389 138 42 82 -18 120 -106 137 ...
#> $ exprs.D14878_at : num 238 346 439 714 1026 ...
#> $ exprs.D14889_at : num 547 379 286 432 398 646 111 293 226 284 ...
#> $ exprs.D15049_at : num -21 -282 311 -84 82 -306 48 -26 -111 -155 ...
#> $ exprs.D15050_at : num 473 355 2517 485 630 ...
#> $ exprs.D15057_at : num 369 178 593 628 2613 ...
#> $ exprs.D16181_at : num 41 133 77 -21 92 5 29 88 29 129 ...
#> $ exprs.D16217_at : num 407 869 775 369 795 752 349 499 283 527 ...
#> $ exprs.D16227_at : num -393 -7 15 -254 -219 -662 -23 -21 -259 -431 ...
#> $ exprs.D16294_at : num 190 213 363 58 618 195 377 132 416 109 ...
#> $ exprs.D16350_at : num 89 33 -23 90 33 -74 14 72 50 81 ...
#> $ exprs.D16469_at : num -19 226 770 874 2058 ...
## 2. Fun_SE_to_taskFunc(..., task_return_format = 'dfcols')
task_SE_NON_Functional<-Fun_SE_to_taskFunc(smallG_SE, param.Y.name = 'ALL.AML', param.covariates = NULL, param_positive_y_level = 'ALL', task_return_format = 'dfcols', task_type = 'classif') ## will work with either 1 or multiple assayS
design<-cbind(Grp1=1, Grp2vs1=smallG_SE$ALL.AML)
fit<-lmFit(assay(smallG_SE), design)
fit<-eBayes(fit) # Moderated t-statistic
SE_topDE_limma<-topTable(fit, coef=2, number = top_DE_analytes_present)
SE_topDE_limma
#> logFC AveExpr t P.Value adj.P.Val
#> D14664_at 222.89021 142.222222 6.098707 5.021597e-08 3.012958e-06
#> D14658_at -470.60596 758.361111 -4.122194 1.003426e-04 3.010278e-03
#> D16469_at 550.64085 900.513889 3.738111 3.719443e-04 7.438886e-03
#> D16217_at 270.34213 661.166667 3.527259 7.403340e-04 8.321168e-03
#> D14660_at -47.14979 -6.541667 -3.501750 8.033590e-04 8.321168e-03
#> B
#> D14664_at -4.417205
#> D14658_at -4.498600
#> D16469_at -4.514066
#> D16217_at -4.522339
#> D14660_at -4.523327
## cpoFilterUnivariate() may NOT implement functional! even though functional should automatically transfered to non-functional !!! check!!!
task_SE_NON_Functional_scaled<-task_SE_NON_Functional %>>% cpoScale()
SE_topDE_CPO_univ<-task_SE_NON_Functional %>>% cpoFilterUnivariate(abs = top_DE_analytes_present)
SE_topDE_CPO_univ
#> Supervised task: DF_non_functional
#> Type: classif
#> Target: ALL.AML
#> Observations: 72
#> Features:
#> numerics factors ordered functionals
#> 5 0 0 0
#> Missings: FALSE
#> Has weights: FALSE
#> Has blocking: FALSE
#> Has coordinates: FALSE
#> Classes: 2
#> ALL AML
#> 47 25
#> Positive class: ALL
#SE_topDE_CPO_univ<-task_SE_Functional %>>% cpoFilterUnivariate(abs = top_DE_analytes_present)
#SE_topDE_CPO_univ
#SE_topDE_CPO_limma<-
library(MultiAssayExperiment)
miniACC
#> A MultiAssayExperiment object of 5 listed
#> experiments with user-defined names and respective classes.
#> Containing an ExperimentList class object of length 5:
#> [1] RNASeq2GeneNorm: SummarizedExperiment with 198 rows and 79 columns
#> [2] gistict: SummarizedExperiment with 198 rows and 90 columns
#> [3] RPPAArray: SummarizedExperiment with 33 rows and 46 columns
#> [4] Mutations: matrix with 97 rows and 90 columns
#> [5] miRNASeqGene: SummarizedExperiment with 471 rows and 80 columns
#> Features:
#> experiments() - obtain the ExperimentList instance
#> colData() - the primary/phenotype DataFrame
#> sampleMap() - the sample availability DataFrame
#> `$`, `[`, `[[` - extract colData columns, subset, or experiment
#> *Format() - convert into a long or wide DataFrame
#> assays() - convert ExperimentList to a SimpleList of matrices
sessionInfo()
#> R version 3.5.0 (2018-04-23)
#> Platform: x86_64-w64-mingw32/x64 (64-bit)
#> Running under: Windows 10 x64 (build 14393)
#>
#> Matrix products: default
#>
#> locale:
#> [1] LC_COLLATE=English_United States.1252
#> [2] LC_CTYPE=English_United States.1252
#> [3] LC_MONETARY=English_United States.1252
#> [4] LC_NUMERIC=C
#> [5] LC_TIME=English_United States.1252
#>
#> attached base packages:
#> [1] parallel stats4 stats graphics grDevices utils datasets
#> [8] methods base
#>
#> other attached packages:
#> [1] MultiAssayExperiment_1.6.0 SummarizedExperiment_1.10.1
#> [3] DelayedArray_0.6.0 BiocParallel_1.14.1
#> [5] matrixStats_0.53.1 Biobase_2.40.0
#> [7] GenomicRanges_1.32.3 GenomeInfoDb_1.16.0
#> [9] IRanges_2.14.10 S4Vectors_0.18.2
#> [11] BiocGenerics_0.26.0 mlrCPO_0.3.4
#> [13] mlr_2.13.9000 ParamHelpers_1.12
#> [15] Bioc2mlr_0.1.0 limma_3.36.1
#> [17] purrr_0.2.5 magrittr_1.5
#> [19] dplyr_0.7.8
#>
#> loaded via a namespace (and not attached):
#> [1] Rcpp_1.0.0 lattice_0.20-35 tidyr_0.8.2
#> [4] assertthat_0.2.0 rprojroot_1.3-2 digest_0.6.18
#> [7] R6_2.3.0 plyr_1.8.4 backports_1.1.3
#> [10] evaluate_0.12 ggplot2_3.1.0 pillar_1.3.1
#> [13] zlibbioc_1.26.0 rlang_0.3.1 lazyeval_0.2.1
#> [16] rstudioapi_0.7 data.table_1.12.0 rpart_4.1-13
#> [19] Matrix_1.2-14 checkmate_1.9.1 rmarkdown_1.11
#> [22] pkgdown_1.3.0 desc_1.2.0 splines_3.5.0
#> [25] stringr_1.3.1 RCurl_1.95-4.10 munsell_0.5.0
#> [28] compiler_3.5.0 xfun_0.4 pkgconfig_2.0.2
#> [31] BBmisc_1.11 htmltools_0.3.6 tidyselect_0.2.5
#> [34] tibble_2.0.1 GenomeInfoDbData_1.1.0 roxygen2_6.1.1
#> [37] XML_3.98-1.16 crayon_1.3.4 MASS_7.3-49
#> [40] bitops_1.0-6 commonmark_1.5 grid_3.5.0
#> [43] gtable_0.2.0 scales_1.0.0 stringi_1.2.4
#> [46] XVector_0.20.0 fs_1.2.3 parallelMap_1.4
#> [49] bindrcpp_0.2.2 xml2_1.2.0 fastmatch_1.1-0
#> [52] tools_3.5.0 glue_1.3.0 survival_2.41-3
#> [55] yaml_2.2.0 colorspace_1.4-0 memoise_1.1.0
#> [58] knitr_1.21 bindr_0.1.1