Convert gene identifiers

convertGeneIdentifiers(
  genes,
  annotation = "Homo sapiens",
  key = "ENSEMBL",
  target = "SYMBOL",
  ignoreDuplicatedTargets = TRUE
)

Arguments

genes

Character: genes to be converted

annotation

OrgDb with genome wide annotation for an organism or character with species name to query OrgDb, e.g. "Homo sapiens"

key

Character: type of identifier used, e.g. ENSEMBL; read ?AnnotationDbi::columns

target

Character: type of identifier to convert to; read ?AnnotationDbi::columns

ignoreDuplicatedTargets

Boolean: if TRUE, identifiers that share targets with other identifiers will not be converted

Value

Character vector of the respective targets of gene identifiers. The previous identifiers remain other identifiers have the same target (in case ignoreDuplicatedTargets = TRUE) or if no target was found.

Examples

genes <- c("ENSG00000012048", "ENSG00000083093", "ENSG00000141510",
           "ENSG00000051180")
convertGeneIdentifiers(genes)
#> downloading 1 resources
#> retrieving 1 resource
#> loading from cache
#> Loading required package: AnnotationDbi
#> Loading required package: stats4
#> Loading required package: BiocGenerics
#> 
#> Attaching package: ‘BiocGenerics’
#> The following objects are masked from ‘package:stats’:
#> 
#>     IQR, mad, sd, var, xtabs
#> The following objects are masked from ‘package:base’:
#> 
#>     Filter, Find, Map, Position, Reduce, anyDuplicated, aperm, append,
#>     as.data.frame, basename, cbind, colnames, dirname, do.call,
#>     duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
#>     lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
#>     pmin.int, rank, rbind, rownames, sapply, setdiff, sort, table,
#>     tapply, union, unique, unsplit, which.max, which.min
#> Loading required package: Biobase
#> Welcome to Bioconductor
#> 
#>     Vignettes contain introductory material; view with
#>     'browseVignettes()'. To cite Bioconductor, see
#>     'citation("Biobase")', and for packages 'citation("pkgname")'.
#> Loading required package: IRanges
#> Loading required package: S4Vectors
#> 
#> Attaching package: ‘S4Vectors’
#> The following object is masked from ‘package:utils’:
#> 
#>     findMatches
#> The following objects are masked from ‘package:base’:
#> 
#>     I, expand.grid, unname
#> ENSG00000012048 ENSG00000083093 ENSG00000141510 ENSG00000051180 
#>         "BRCA1"         "PALB2"          "TP53"         "RAD51" 
convertGeneIdentifiers(genes, key="ENSEMBL", target="UNIPROT")
#> loading from cache
#>                                                ENSG00000012048 
#>                                            "P38398/A0A024R1V0" 
#>                                                ENSG00000083093 
#>                                                       "Q86YC2" 
#>                                                ENSG00000141510 
#> "K7PPA8/P04637/Q53GA5/H2EHT1/A0A087X1Q1/A0A087WXZ1/A0A087WT22" 
#>                                                ENSG00000051180 
#>                                                       "Q06609" 

# Explicit species name to automatically look for its OrgDb database
sp <- "Homo sapiens"
genes <- c("ENSG00000012048", "ENSG00000083093", "ENSG00000141510",
           "ENSG00000051180")
convertGeneIdentifiers(genes, sp)
#> loading from cache
#> ENSG00000012048 ENSG00000083093 ENSG00000141510 ENSG00000051180 
#>         "BRCA1"         "PALB2"          "TP53"         "RAD51" 

# Alternatively, set the annotation database directly
ah <- AnnotationHub::AnnotationHub()
sp <- AnnotationHub::query(ah, c("OrgDb", "Homo sapiens"))[[1]]
#> loading from cache
columns(sp) # these attributes can be used to change the attributes
#>  [1] "ACCNUM"       "ALIAS"        "ENSEMBL"      "ENSEMBLPROT"  "ENSEMBLTRANS"
#>  [6] "ENTREZID"     "ENZYME"       "EVIDENCE"     "EVIDENCEALL"  "GENENAME"    
#> [11] "GENETYPE"     "GO"           "GOALL"        "IPI"          "MAP"         
#> [16] "OMIM"         "ONTOLOGY"     "ONTOLOGYALL"  "PATH"         "PFAM"        
#> [21] "PMID"         "PROSITE"      "REFSEQ"       "SYMBOL"       "UCSCKG"      
#> [26] "UNIPROT"     

convertGeneIdentifiers(genes, sp)
#> ENSG00000012048 ENSG00000083093 ENSG00000141510 ENSG00000051180 
#>         "BRCA1"         "PALB2"          "TP53"         "RAD51"