if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
# orthogene is only available on Bioconductor>=3.14
if(BiocManager::version()<"3.14")
BiocManager::install(update = TRUE, ask = FALSE)
BiocManager::install("orthogene")
library(orthogene)
data("exp_mouse")
# Setting to "homologene" for the purposes of quick demonstration.
# We generally recommend using method="gprofiler" (default).
method <- "homologene"
It’s not always clear whether a dataset is using the original species gene names, human gene names, or some other species’ gene names.
infer_species takes a list/matrix/data.frame with genes and
infers the species that they best match to!
For the sake of speed, the genes extracted from gene_df
are tested against genomes from only the following 6 test_species by default:
- human
- monkey
- rat
- mouse
- zebrafish
- fly
However, you can supply your own list of test_species, which will
be automatically be mapped and standardised using map_species.
matches <- orthogene::infer_species(gene_df = exp_mouse,
method = method)
## Preparing gene_df.
## sparseMatrix format detected.
## Extracting genes from rownames.
## 15,259 genes extracted.
## Testing for gene overlap with: human
## Retrieving all genes using: homologene.
## Retrieving all organisms available in homologene.
## Mapping species name: human
## Common name mapping found for human
## 1 organism identified from search: 9606
## Using cached file: /Users/biocbuild/Library/Caches/org.R-project.R/R/orthogene/all_genes-9606-homologene.csv.gz
## Returning all 19,129 genes from human.
## Testing for gene overlap with: monkey
## Retrieving all genes using: homologene.
## Retrieving all organisms available in homologene.
## Mapping species name: monkey
## Common name mapping found for monkey
## 1 organism identified from search: 9544
## Gene table with 16,843 rows retrieved.
## Caching file --> /Users/biocbuild/Library/Caches/org.R-project.R/R/orthogene/all_genes-9544-homologene.csv.gz
## Returning all 16,843 genes from monkey.
## Testing for gene overlap with: rat
## Retrieving all genes using: homologene.
## Retrieving all organisms available in homologene.
## Mapping species name: rat
## Common name mapping found for rat
## 1 organism identified from search: 10116
## Gene table with 20,616 rows retrieved.
## Caching file --> /Users/biocbuild/Library/Caches/org.R-project.R/R/orthogene/all_genes-10116-homologene.csv.gz
## Returning all 20,616 genes from rat.
## Testing for gene overlap with: mouse
## Retrieving all genes using: homologene.
## Retrieving all organisms available in homologene.
## Mapping species name: mouse
## Common name mapping found for mouse
## 1 organism identified from search: 10090
## Using cached file: /Users/biocbuild/Library/Caches/org.R-project.R/R/orthogene/all_genes-10090-homologene.csv.gz
## Returning all 21,207 genes from mouse.
## Testing for gene overlap with: zebrafish
## Retrieving all genes using: homologene.
## Retrieving all organisms available in homologene.
## Mapping species name: zebrafish
## Common name mapping found for zebrafish
## 1 organism identified from search: 7955
## Gene table with 20,897 rows retrieved.
## Caching file --> /Users/biocbuild/Library/Caches/org.R-project.R/R/orthogene/all_genes-7955-homologene.csv.gz
## Returning all 20,897 genes from zebrafish.
## Testing for gene overlap with: fly
## Retrieving all genes using: homologene.
## Retrieving all organisms available in homologene.
## Mapping species name: fly
## Common name mapping found for fly
## 1 organism identified from search: 7227
## Gene table with 8,438 rows retrieved.
## Caching file --> /Users/biocbuild/Library/Caches/org.R-project.R/R/orthogene/all_genes-7227-homologene.csv.gz
## Returning all 8,438 genes from fly.
## Top match:
## - species: mouse
## - percent_match: 92%
To create an example dataset, turn the gene names into rat genes.
exp_rat <- orthogene::convert_orthologs(gene_df = exp_mouse,
input_species = "mouse",
output_species = "rat",
method = method)
matches <- orthogene::infer_species(gene_df = exp_rat,
method = method)
To create an example dataset, turn the gene names into human genes.
exp_human <- orthogene::convert_orthologs(gene_df = exp_mouse,
input_species = "mouse",
output_species = "human",
method = method)
matches <- orthogene::infer_species(gene_df = exp_human,
method = method)
test_speciesYou can even supply test_species with the name of one of the R packages that
orthogene gets orthologs from. This will test against all species available
in that particular R package.
For example, by setting test_species="homologene" we automatically test for
% gene matches in each of the 20+ species available in homologene.
matches <- orthogene::infer_species(gene_df = exp_human,
test_species = method,
method = method)
utils::sessionInfo()
R Under development (unstable) (2025-11-04 r88984)
Platform: aarch64-apple-darwin20
Running under: macOS Ventura 13.7.8
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.6-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
locale:
[1] C/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: America/New_York
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] orthogene_1.17.2 BiocStyle_2.39.0
loaded via a namespace (and not attached):
[1] ggiraph_0.9.2 tidyselect_1.2.1
[3] viridisLite_0.4.2 dplyr_1.1.4
[5] farver_2.1.2 R.utils_2.13.0
[7] S7_0.2.1 fastmap_1.2.0
[9] lazyeval_0.2.2 homologene_1.4.68.19.3.27
[11] fontquiver_0.2.1 digest_0.6.39
[13] lifecycle_1.0.4 tidytree_0.4.6
[15] magrittr_2.0.4 compiler_4.6.0
[17] rlang_1.1.6 sass_0.4.10
[19] tools_4.6.0 yaml_2.3.12
[21] data.table_1.17.8 knitr_1.51
[23] ggsignif_0.6.4 labeling_0.4.3
[25] htmlwidgets_1.6.4 RColorBrewer_1.1-3
[27] aplot_0.2.9 abind_1.4-8
[29] babelgene_22.9 withr_3.0.2
[31] purrr_1.2.0 R.oo_1.27.1
[33] grid_4.6.0 ggpubr_0.6.2
[35] gdtools_0.4.4 ggplot2_4.0.1
[37] scales_1.4.0 dichromat_2.0-0.1
[39] tinytex_0.58 cli_3.6.5
[41] rmarkdown_2.30 treeio_1.35.0
[43] generics_0.1.4 otel_0.2.0
[45] ggtree_4.1.1 httr_1.4.7
[47] gprofiler2_0.2.4 ape_5.8-1
[49] cachem_1.1.0 parallel_4.6.0
[51] ggplotify_0.1.3 BiocManager_1.30.27
[53] yulab.utils_0.2.3 vctrs_0.6.5
[55] Matrix_1.7-4 jsonlite_2.0.0
[57] fontBitstreamVera_0.1.1 carData_3.0-5
[59] bookdown_0.46 car_3.1-3
[61] gridGraphics_0.5-1 patchwork_1.3.2
[63] rstatix_0.7.3 Formula_1.2-5
[65] systemfonts_1.3.1 magick_2.9.0
[67] plotly_4.11.0 tidyr_1.3.2
[69] jquerylib_0.1.4 glue_1.8.0
[71] gtable_0.3.6 tibble_3.3.0
[73] pillar_1.11.1 rappdirs_0.3.3
[75] htmltools_0.5.9 R6_2.6.1
[77] evaluate_1.0.5 lattice_0.22-7
[79] R.methodsS3_1.8.2 backports_1.5.0
[81] broom_1.0.11 ggfun_0.2.0
[83] fontLiberation_0.1.0 bslib_0.9.0
[85] Rcpp_1.1.0.8.2 nlme_3.1-168
[87] xfun_0.55 fs_1.6.6
[89] pkgconfig_2.0.3