Query cellxgene collections, datasets, and files
Source:R/collections.R, R/datasets.R, R/files.R, and 1 more
query.Rdfiles_download() retrieves one or more cellxgene
files to a cache on the local system.
links(), authors() and publisher_metadata() are
helper functions to extract 'nested' information from
collections.
Arguments
- cellxgene_db
an optional 'cellxgene_db' object, as returned by
db().- tbl
a
tibble()typically derived fromdatasets(db)orfiles(db)and containing columnsdataset_id(fordatasets_visualize()), or columnsdataset_id,file_id, andfiletype(forfiles_download()).- dry.run
logical(1) indicating whether the (often large) file(s) in
tblshould be downloaded to a local cache. Files are not downloaded whendry.run = TRUE(default).- cache.path
character(1) directory in which to cache downloaded files. The directory must already exist. The default is
tools::R_user_dir("cellxgenedp", "cache"), a package-specific path in the user home directory.
Value
Each function returns a tibble describing the corresponding component of the database.
files_download() returns a character() vector of paths to
the local files.
links() returns a tibble of external links associated
with each collection. Common links includ DOI, raw data / data
sources, and lab websites.
authors() returns a tibble of authors associated with
each collection.
publisher_metadata() returns a tibble of publisher
metadata (journal, publicate date, doi) associated with each
collection.
Examples
db <- db()
collections(db)
#> # A tibble: 380 × 19
#> collection_id collection_version_id collection_url consortia contact_email
#> <chr> <chr> <chr> <list> <chr>
#> 1 af893e86-8e9f-4… c1b538fd-0f01-41c8-a… https://cellx… <chr [1]> ruichen@bcm.…
#> 2 3a5dbf8a-9b3e-4… c656236c-fc37-4470-a… https://cellx… <chr [1]> xinsun@ucsd.…
#> 3 16876983-d454-4… ea4e5a38-8adb-4ca3-9… https://cellx… <lgl [1]> ryan.corces@…
#> 4 ad10cef8-9c6c-4… 6f05ce2a-2fca-424c-8… https://cellx… <lgl [1]> jiyeon.choi2…
#> 5 7f7fdf50-aa0e-4… eed80e7c-54ff-40ae-a… https://cellx… <chr [1]> ca3@sanger.a…
#> 6 35928d1c-36fc-4… bfa09492-85f4-473e-b… https://cellx… <chr [1]> jeremym@alle…
#> 7 e02201d7-f49f-4… c8280cb1-208f-4eca-8… https://cellx… <chr [1]> richard.smit…
#> 8 0540ee09-5b45-4… 27a2b3bf-c7f3-4138-9… https://cellx… <lgl [1]> ynose@gesurg…
#> 9 9b02383a-9358-4… 0bb91d14-4427-4e5a-9… https://cellx… <chr [1]> parkerw@wust…
#> 10 8a05eaf6-5680-4… 633a21eb-5a97-401d-9… https://cellx… <lgl [1]> EichholJ@msk…
#> # ℹ 370 more rows
#> # ℹ 14 more variables: contact_name <chr>, curator_name <chr>,
#> # description <chr>, doi <chr>, is_pre_analysis <lgl>, links <list>,
#> # name <chr>, publisher_metadata <list>, revising_in <lgl>,
#> # revision_of <lgl>, visibility <chr>, created_at <date>,
#> # published_at <date>, revised_at <date>
collections(db) |>
dplyr::glimpse()
#> Rows: 380
#> Columns: 19
#> $ collection_id <chr> "af893e86-8e9f-41f1-a474-ef05359b1fb7", "3a5dbf8…
#> $ collection_version_id <chr> "c1b538fd-0f01-41c8-a504-6f44626916c2", "c656236…
#> $ collection_url <chr> "https://cellxgene.cziscience.com/collections/af…
#> $ consortia <list> "CZI Cell Science", "LungMAP", NA, NA, "Human C…
#> $ contact_email <chr> "ruichen@bcm.edu", "xinsun@ucsd.edu", "ryan.corc…
#> $ contact_name <chr> "Rui Chen", "Xin Sun", "Ryan Corces", "Jiyeon Ch…
#> $ curator_name <chr> "Jennifer Yu-Sheng Chien", "Jennifer Yu-Sheng Ch…
#> $ description <chr> "The retina is the innermost tissue of the eyes …
#> $ doi <chr> "10.1016/j.xgen.2023.100298", NA, "10.64898/2026…
#> $ is_pre_analysis <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,…
#> $ links <list> [["", "OTHER", "https://retina-atac.cells.ucsc.…
#> $ name <chr> "Single-cell transcriptomic atlas for adult huma…
#> $ publisher_metadata <list> [[["Liang", "Qingnan"], ["Cheng", "Xuesen"], ["…
#> $ revising_in <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
#> $ revision_of <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
#> $ visibility <chr> "PUBLIC", "PUBLIC", "PUBLIC", "PUBLIC", "PUBLIC"…
#> $ created_at <date> 2026-06-09, 2026-06-10, 2026-06-10, 2026-06-10,…
#> $ published_at <date> 2021-10-29, 2025-02-03, 2026-04-30, 2026-01-07,…
#> $ revised_at <date> 2026-06-11, 2026-06-11, 2026-06-11, 2026-06-11,…
datasets(db) |>
dplyr::glimpse()
#> Rows: 2,127
#> Columns: 36
#> $ dataset_id <chr> "ed419b4e-db9b-40f1-8593-68fdf8dfb076", …
#> $ dataset_version_id <chr> "c8da6eeb-84d7-4379-a332-1bf6107859d6", …
#> $ collection_id <chr> "af893e86-8e9f-41f1-a474-ef05359b1fb7", …
#> $ donor_id <list> <"19D014", "19D013", "19D015", "19D016"…
#> $ assay <list> [["10x 3' v3", "EFO:0009922"]], [["10x …
#> $ batch_condition <list> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
#> $ cell_count <int> 18011, 11617, 53040, 56507, 7348, 244474…
#> $ cell_type <list> [["Mueller cell", "CL:0000636"], ["astr…
#> $ citation <chr> "Publication: https://doi.org/10.1016/j.…
#> $ default_embedding <chr> "X_umap", "X_umap", "X_umap", "X_umap", …
#> $ development_stage <list> [["65-year-old stage", "HsapDv:0000159"…
#> $ disease <list> [["normal", "PATO:0000461"]], [["normal…
#> $ embeddings <list> <"LVG_embedding", "X_umap", "cluster_me…
#> $ explorer_url <chr> "https://cellxgene.cziscience.com/e/ed41…
#> $ feature_biotype <list> "gene", "gene", "gene", "gene", "gene",…
#> $ feature_count <int> 30172, 30172, 30172, 30172, 30172, 30172…
#> $ feature_reference <list> "NCBITaxon:9606", "NCBITaxon:9606", "NC…
#> $ genetic_perturbation_strategy <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
#> $ is_pre_analysis <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE…
#> $ is_primary_data <list> FALSE, TRUE, FALSE, FALSE, FALSE, TRUE,…
#> $ mean_genes_per_cell <dbl> 2017.381, 4493.915, 2220.995, 2509.138, …
#> $ organism <list> [["Homo sapiens", "NCBITaxon:9606"]], […
#> $ perturbation_types <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
#> $ primary_cell_count <int> 0, 11617, 0, 0, 0, 244474, 0, 255204, 45…
#> $ raw_data_location <chr> "raw.X", "raw.X", "raw.X", "raw.X", "raw…
#> $ schema_version <chr> "7.1.0", "7.1.0", "7.1.0", "7.1.0", "7.1…
#> $ self_reported_ethnicity <list> [["European American", "HANCESTRO:0590"…
#> $ sex <list> [["female", "PATO:0000383"], ["male", "…
#> $ spatial <list> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
#> $ suspension_type <list> "nucleus", "nucleus", "nucleus", "nucle…
#> $ tissue <list> [["fovea centralis", "UBERON:0001786", …
#> $ title <chr> "Non-neuronal cells in human retina", "R…
#> $ tombstone <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE…
#> $ x_approximate_distribution <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
#> $ published_at <date> 2021-10-29, 2021-10-29, 2021-10-29, 202…
#> $ revised_at <date> 2026-06-11, 2026-06-11, 2026-06-11, 202…
# \donttest{
if (interactive()) {
## visualize the first dataset
datasets(db) |>
dplyr::slice(1) |>
datasets_visualize()
}
# }
files(db) |>
dplyr::glimpse()
#> Rows: 2,167
#> Columns: 4
#> $ dataset_id <chr> "ed419b4e-db9b-40f1-8593-68fdf8dfb076", "aad97cb5-f375-45ef…
#> $ filesize <dbl> 1433774383, 806170493, 3168171879, 3480337646, 416349451, 1…
#> $ filetype <chr> "H5AD", "H5AD", "H5AD", "H5AD", "H5AD", "H5AD", "H5AD", "H5…
#> $ url <chr> "https://datasets.cellxgene.cziscience.com/c8da6eeb-84d7-43…
if (FALSE) { # \dontrun{
files(db) |>
dplyr::slice(1) |>
files_download(dry.run = FALSE)
} # }
## common links to external data
links(db) |>
dplyr::count(link_type)
#> # A tibble: 5 × 2
#> link_type n
#> <chr> <int>
#> 1 DATA_SOURCE 97
#> 2 LAB_WEBSITE 69
#> 3 OTHER 507
#> 4 PROTOCOL 67
#> 5 RAW_DATA 465
## authors per collection
authors() |>
dplyr::count(collection_id, sort = TRUE)
#> # A tibble: 362 × 2
#> collection_id n
#> <chr> <int>
#> 1 8f126edf-5405-4731-8374-b5ce11f53e82 205
#> 2 bcb61471-2a44-4d00-a0af-ff085512674c 171
#> 3 a137437b-d284-4a27-b1e9-36958a8f92c1 164
#> 4 e5f58829-1a66-40b5-a624-9046778e74f5 164
#> 5 4f586cb6-972b-4ef7-a4ef-3c3800a3c004 147
#> 6 0b9d8a04-bb9d-44da-aa27-705bb65b54eb 135
#> 7 1ca90a2d-2943-483d-b678-b809bf464c30 108
#> 8 367d95c0-0eb0-4dae-8276-9407239421ee 106
#> 9 9c9d04c4-8899-417f-bb6f-6107dcadf14f 100
#> 10 6f6d381a-7701-4781-935c-db10d30de293 98
#> # ℹ 352 more rows
publisher_metadata() |>
dplyr::glimpse()
#> Rows: 362
#> Columns: 9
#> $ collection_id <chr> "af893e86-8e9f-41f1-a474-ef05359b1fb7", "16876983-d454…
#> $ name <chr> "Single-cell transcriptomic atlas for adult human reti…
#> $ is_preprint <lgl> FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,…
#> $ journal <chr> "Cell Genomics", "bioRxiv", "Nat Commun", "Nature", "S…
#> $ published_at <date> 2023-06-01, 2026-03-05, 2024-09-12, 2026-06-03, 2023-…
#> $ published_year <int> 2023, 2026, 2024, 2026, 2023, 2021, 2026, 2021, 2025, …
#> $ published_month <int> 6, 3, 9, 6, 10, 6, 4, 12, 12, 4, 5, 10, 8, 1, 12, 9, 1…
#> $ published_day <int> 1, 5, 12, 3, 13, 22, 8, 1, 20, 1, 18, 1, 28, 10, 1, 4,…
#> $ doi <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…