Introduction
The omophub package provides an R interface to the OMOPHub API for accessing OHDSI ATHENA standardized medical vocabularies. This vignette demonstrates basic usage patterns.
Installation
Install from GitHub:
# install.packages("devtools")
devtools::install_github("omophub/omophub-R")Authentication
The package requires an API key from OMOPHub.
Set your API key as an environment variable:
Sys.setenv(OMOPHUB_API_KEY = "your_api_key_here")Or add it to your .Renviron file for persistence:
OMOPHUB_API_KEY=your_api_key_here
Creating a Client
library(omophub)
# Create client (reads API key from environment)
client <- OMOPHubClient$new()
# Or provide API key explicitly
client <- OMOPHubClient$new(api_key = "your_api_key")
# With additional options
client <- OMOPHubClient$new(
api_key = "your_api_key",
timeout = 30,
max_retries = 3,
vocab_version = "2025.1"
)Getting Concepts
Retrieve a concept by its OMOP concept ID:
concept <- client$concepts$get(201826)
print(concept$concept_name)
# [1] "Type 2 diabetes mellitus"Get a concept by vocabulary-specific code:
concept <- client$concepts$get_by_code("SNOMED", "44054006")
print(concept$concept_name)
# [1] "Type 2 diabetes mellitus"Searching Concepts
Basic search:
results <- client$search$basic("diabetes mellitus", page_size = 10)
for (concept in results$data) {
cat(sprintf("%s: %s\n", concept$concept_id, concept$concept_name))
}Search with filters:
results <- client$search$basic(
"heart attack",
vocabulary_ids = "SNOMED",
domain_ids = "Condition",
page_size = 20
)Semantic Search
Search using natural language queries powered by neural embeddings:
# Natural language search - understands clinical intent
results <- client$search$semantic("high blood sugar levels")
for (r in results$data$results) {
cat(sprintf("%s (similarity: %.2f)\n", r$concept_name, r$similarity_score))
}Filter semantic search results:
results <- client$search$semantic(
"heart attack",
vocabulary_ids = "SNOMED",
domain_ids = "Condition",
threshold = 0.5
)Fetch all semantic search results with automatic pagination:
Similarity Search
Find concepts similar to a reference concept:
# Find concepts similar to Type 2 diabetes mellitus
similar <- client$search$similar(concept_id = 201826)
for (s in similar$similar_concepts) {
cat(sprintf("%s (score: %.2f)\n", s$concept_name, s$similarity_score))
}Search by natural language query with different algorithms:
# Semantic similarity (neural embeddings)
similar <- client$search$similar(
query = "high blood pressure",
algorithm = "semantic"
)
# Lexical similarity (string matching)
similar <- client$search$similar(
query = "high blood pressure",
algorithm = "lexical"
)
# Hybrid (combined - default)
similar <- client$search$similar(
query = "high blood pressure",
algorithm = "hybrid",
include_scores = TRUE,
include_explanations = TRUE
)Bulk Search
Search for multiple queries in a single API call — much faster than individual requests when you have many terms to look up.
Bulk Lexical Search
Execute up to 50 keyword searches at once:
results <- client$search$bulk_basic(list(
list(search_id = "q1", query = "diabetes mellitus"),
list(search_id = "q2", query = "hypertension"),
list(search_id = "q3", query = "aspirin")
), defaults = list(vocabulary_ids = list("SNOMED"), page_size = 5))
# Each result is matched by search_id
for (item in results$results) {
cat(sprintf("%s: %d results\n", item$search_id, length(item$results)))
}Bulk Semantic Search
Execute up to 25 natural-language searches using neural embeddings:
results <- client$search$bulk_semantic(list(
list(search_id = "s1", query = "heart failure treatment options"),
list(search_id = "s2", query = "type 2 diabetes medication")
), defaults = list(threshold = 0.5, page_size = 10))
for (item in results$results) {
cat(sprintf("%s: %d results\n", item$search_id,
item$result_count %||% length(item$results)))
}Defaults apply to all searches; per-search values override them:
# Different domains per query, shared vocabulary filter
results <- client$search$bulk_basic(list(
list(search_id = "cond", query = "diabetes", domain_ids = list("Condition")),
list(search_id = "drug", query = "metformin", domain_ids = list("Drug"))
), defaults = list(vocabulary_ids = list("SNOMED", "RxNorm"), page_size = 5))Autocomplete
Get suggestions for autocomplete:
suggestions <- client$concepts$suggest("diab", page_size = 5)
for (s in suggestions$suggestions) {
print(s$suggestion)
}Pagination
Manual Pagination
# First page
results <- client$search$basic("diabetes", page = 1, page_size = 50)
# Check pagination info
print(results$meta$total_items)
print(results$meta$has_next)
# Get next page if available
if (isTRUE(results$meta$has_next)) {
page2 <- client$search$basic("diabetes", page = 2, page_size = 50)
}Hierarchy Navigation
Get ancestors (parent concepts):
result <- client$hierarchy$ancestors(201826, max_levels = 3)
for (ancestor in result$ancestors) {
print(ancestor$concept_name)
}Get descendants (child concepts):
result <- client$hierarchy$descendants(201826, max_levels = 2)
for (descendant in result$descendants) {
print(descendant$concept_name)
}Concept Mappings
Find how a concept maps to other vocabularies:
result <- client$mappings$get(201826)
for (mapping in result$mappings) {
cat(sprintf("%s: %s\n",
mapping$target_vocabulary_id,
mapping$target_concept_name))
}Map to specific vocabularies:
result <- client$mappings$get(
201826,
target_vocabulary = "ICD10CM"
)FHIR-to-OMOP Resolution
The FHIR resolver translates FHIR coded values to OMOP standard
concepts in a single call — handling URI mapping, code lookup,
Maps to traversal, and CDM target table assignment
automatically.
CodeableConcept with Vocabulary Preference
result <- client$fhir$resolve_codeable_concept(
coding = list(
list(system = "http://snomed.info/sct", code = "44054006"),
list(system = "http://hl7.org/fhir/sid/icd-10-cm", code = "E11.9")
),
resource_type = "Condition"
)
# SNOMED wins over ICD-10-CM per OHDSI preference
cat(result$best_match$resolution$source_concept$vocabulary_id) # "SNOMED"Tibble Output for Batch Resolution
For dplyr / tidyr workflows, pass
as_tibble = TRUE to get a flat tibble with one row per
input coding and columns for the source and standard concepts, target
CDM table, mapping type, and resolution status. This is the most
ergonomic shape for ETL pipelines processing many codes:
library(dplyr)
tbl <- client$fhir$resolve_batch(
list(
list(system = "http://hl7.org/fhir/sid/icd-10-cm", code = "E11.9"),
list(system = "http://hl7.org/fhir/sid/icd-10-cm", code = "I10"),
list(system = "http://hl7.org/fhir/sid/icd-10-cm", code = "J45.909")
),
as_tibble = TRUE
)
tbl |>
filter(status == "resolved") |>
select(source_code, standard_concept_name, target_table)Failed rows are kept in-place with status = "failed" and
the error text in status_detail — you can filter them out
rather than silently dropping them. The batch summary
(total / resolved / failed) is
attached as an attribute:
attr(tbl, "summary")The default as_tibble = FALSE still returns the
list-shaped list(results, summary), so existing code keeps
working unchanged.
Standalone Wrapper Functions
For pipe-friendly workflows, omophub also exports
standalone wrapper functions that take the client as their first
argument. Both forms are fully supported — pick whichever reads better
for the surrounding code:
# Equivalent to client$fhir$resolve()
client |>
fhir_resolve(
system = "http://snomed.info/sct",
code = "44054006",
resource_type = "Condition"
)
# Tibble-shaped batch in a pipe
tbl <- client |>
fhir_resolve_batch(
codings = list(
list(system = "http://snomed.info/sct", code = "44054006"),
list(system = "http://loinc.org", code = "2339-0")
),
as_tibble = TRUE
)
client |>
fhir_resolve_codeable_concept(
coding = list(
list(system = "http://snomed.info/sct", code = "44054006"),
list(system = "http://hl7.org/fhir/sid/icd-10-cm", code = "E11.9")
),
resource_type = "Condition"
)FHIR Client Interop with omophub_fhir_url()
When you need raw FHIR Parameters / Bundle
responses instead of the Concept Resolver envelope,
omophub_fhir_url() returns the OMOPHub FHIR Terminology
Service base URL so you can talk to it directly with httr2
or fhircrackr. Supports FHIR versions "r4"
(default), "r4b", "r5", and
"r6":
omophub_fhir_url()
#> "https://fhir.omophub.com/fhir/r4"
omophub_fhir_url("r5")
#> "https://fhir.omophub.com/fhir/r5"Example: call CodeSystem/$lookup directly with
httr2:
library(httr2)
resp <- request(omophub_fhir_url()) |>
req_url_path_append("CodeSystem/$lookup") |>
req_url_query(
system = "http://snomed.info/sct",
code = "44054006"
) |>
req_headers(Authorization = paste("Bearer", Sys.getenv("OMOPHUB_API_KEY"))) |>
req_perform()
params <- resp_body_json(resp)
# Raw FHIR Parameters resource with the concept display and designations.Use client$fhir$resolve() (or
fhir_resolve()) when you want OMOP-enriched answers
(standard concept, CDM target table, mapping quality). Use
omophub_fhir_url() + httr2 when you need
FHIR-native responses for FHIR-native tooling.