Query cellxgene collections, datasets, and files
Source:R/collections.R
, R/datasets.R
, R/files.R
, and 1 more
query.Rd
files_download()
retrieves one or more cellxgene
files to a cache on the local system.
links()
, authors()
and publisher_metadata()
are
helper functions to extract 'nested' information from
collections.
Arguments
- cellxgene_db
an optional 'cellxgene_db' object, as returned by
db()
.- tbl
a
tibble()
typically derived fromdatasets(db)
orfiles(db)
and containing columnsdataset_id
(fordatasets_visualize()
), or columnsdataset_id
,file_id
, andfiletype
(forfiles_download()
).- dry.run
logical(1) indicating whether the (often large) file(s) in
tbl
should be downloaded to a local cache. Files are not downloaded whendry.run = TRUE
(default).- cache.path
character(1) directory in which to cache downloaded files. The directory must already exist. The default is
tools::R_user_dir("cellxgenedp", "cache")
, a package-specific path in the user home directory.
Value
Each function returns a tibble describing the corresponding component of the database.
files_download()
returns a character() vector of paths to
the local files.
links()
returns a tibble of external links associated
with each collection. Common links includ DOI, raw data / data
sources, and lab websites.
authors()
returns a tibble of authors associated with
each collection.
publisher_metadata()
returns a tibble of publisher
metadata (journal, publicate date, doi) associated with each
collection.
Examples
db <- db()
collections(db)
#> # A tibble: 302 × 18
#> collection_id collection_version_id collection_url consortia contact_email
#> <chr> <chr> <chr> <list> <chr>
#> 1 dc3a5256-5c39-4… eb9b0ca6-ad97-43ca-b… https://cellx… <chr [1]> julia.szusz@…
#> 2 9f29fcd0-7075-4… d0a13d18-22fd-4238-8… https://cellx… <lgl [1]> Mingyao.Liu@…
#> 3 bd552f76-1f1b-4… 15425fb2-9264-4b4a-9… https://cellx… <lgl [1]> senbai.kang@…
#> 4 dc4cd1f7-667a-4… 7b76a9f2-78df-4371-8… https://cellx… <chr [1]> yuugot@gmail…
#> 5 38833785-fac5-4… 4e7826f3-7216-45cc-a… https://cellx… <lgl [1]> ggj@zju.edu.…
#> 6 b1a879f6-5638-4… 4116209e-4330-4045-a… https://cellx… <chr [2]> st9@sanger.a…
#> 7 180bff9c-c8a5-4… 3eba1597-fae7-451b-9… https://cellx… <lgl [1]> Martin.Kampm…
#> 8 579203e2-182f-4… 5afd25ec-b406-4e52-8… https://cellx… <chr [1]> sarah.snelli…
#> 9 b9fc3d70-5a72-4… f0bd3f64-4499-47e2-8… https://cellx… <chr [1]> bruce.aronow…
#> 10 398e34a9-8736-4… 626f1fbf-3c2c-4cd5-a… https://cellx… <lgl [1]> led13@gene.c…
#> # ℹ 292 more rows
#> # ℹ 13 more variables: contact_name <chr>, curator_name <chr>,
#> # description <chr>, doi <chr>, links <list>, name <chr>,
#> # publisher_metadata <list>, revising_in <lgl>, revision_of <lgl>,
#> # visibility <chr>, created_at <date>, published_at <date>, revised_at <date>
collections(db) |>
dplyr::glimpse()
#> Rows: 302
#> Columns: 18
#> $ collection_id <chr> "dc3a5256-5c39-4a21-ac0c-4ede3e7b2323", "9f29fcd…
#> $ collection_version_id <chr> "eb9b0ca6-ad97-43ca-b614-a318f9c114bc", "d0a13d1…
#> $ collection_url <chr> "https://cellxgene.cziscience.com/collections/dc…
#> $ consortia <list> "Human Cell Atlas (HCA)", NA, NA, "Human Cell A…
#> $ contact_email <chr> "julia.szusz@mail.utoronto.ca", "Mingyao.Liu@uto…
#> $ contact_name <chr> "Julia Murphy", "Mingyao Liu", "Senbai Kang", "Y…
#> $ curator_name <chr> "Brian J Mott", "Corinn Sophia Small", "Brian J …
#> $ description <chr> "19 living donors, all processed with 10X v3 kit…
#> $ doi <chr> "10.1038/s41467-022-35297-z", "10.1016/j.ajt.202…
#> $ links <list> [["GSE202109", "RAW_DATA", "https://www.ncbi.nl…
#> $ name <chr> "Healthy living donor kidney", "Ischemia Reperfu…
#> $ publisher_metadata <list> [[["McEvoy", "Caitriona M."], ["Murphy", "Julia…
#> $ revising_in <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
#> $ revision_of <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
#> $ visibility <chr> "PUBLIC", "PUBLIC", "PUBLIC", "PUBLIC", "PUBLIC"…
#> $ created_at <date> 2025-07-05, 2025-07-05, 2025-07-05, 2025-07-05,…
#> $ published_at <date> 2024-06-14, 2024-09-13, 2024-11-08, 2024-11-20,…
#> $ revised_at <date> 2025-07-08, 2025-07-08, 2025-07-08, 2025-07-08,…
datasets(db) |>
dplyr::glimpse()
#> Rows: 1,831
#> Columns: 33
#> $ dataset_id <chr> "0bae7ebf-eb54-46a6-be9a-3461cecefa4c", "42…
#> $ dataset_version_id <chr> "e6b8dce0-19e6-419b-925b-9354164e8f31", "07…
#> $ collection_id <chr> "dc3a5256-5c39-4a21-ac0c-4ede3e7b2323", "9f…
#> $ donor_id <list> <"HKB10", "HKB11", "HKB13", "HKB19", "HKB2…
#> $ assay <list> [["10x 3' v3", "EFO:0009922"]], [["10x 3' …
#> $ batch_condition <list> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ cell_count <int> 27675, 108613, 10689, 39713, 35954, 57918, …
#> $ cell_type <list> [["B cell", "CL:0000236"], ["T cell", "CL:…
#> $ citation <chr> "Publication: https://doi.org/10.1038/s4146…
#> $ default_embedding <chr> NA, NA, NA, NA, NA, "X_umap", NA, "X_umap",…
#> $ development_stage <list> [["33-year-old stage", "HsapDv:0000127"], …
#> $ disease <list> [["normal", "PATO:0000461"]], [["normal", …
#> $ embeddings <list> <"X_pca", "X_tsne", "X_umap">, "X_umap", "…
#> $ explorer_url <chr> "https://cellxgene.cziscience.com/e/0bae7eb…
#> $ feature_biotype <list> "gene", "gene", "gene", "gene", "gene", "g…
#> $ feature_count <int> 27323, 28629, 18064, 18064, 18064, 36869, 2…
#> $ feature_reference <list> "NCBITaxon:9606", "NCBITaxon:9606", "NCBIT…
#> $ is_primary_data <list> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, …
#> $ mean_genes_per_cell <dbl> 2242.0657, 2192.3974, 1240.0611, 1316.8952,…
#> $ organism <list> [["Homo sapiens", "NCBITaxon:9606"]], [["H…
#> $ primary_cell_count <int> 27675, 108613, 10689, 39713, 35954, 57918, …
#> $ raw_data_location <chr> "raw.X", "raw.X", "X", "X", "X", "raw.X", "…
#> $ schema_version <chr> "6.0.0", "6.0.0", "6.0.0", "6.0.0", "6.0.0"…
#> $ self_reported_ethnicity <list> [["unknown", "unknown"]], [["unknown", "un…
#> $ sex <list> [["female", "PATO:0000383"], ["male", "PAT…
#> $ spatial <list> NA, NA, NA, NA, NA, NA, NA, NA, [TRUE, TRU…
#> $ suspension_type <list> "cell", "cell", "cell", "cell", "cell", "c…
#> $ tissue <list> [["cortex of kidney", "UBERON:0001225", "t…
#> $ title <chr> "Living donor kidney", "scRNA-seq of lung b…
#> $ tombstone <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F…
#> $ x_approximate_distribution <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
#> $ published_at <date> 2024-06-14, 2024-09-13, 2024-11-08, 2024-1…
#> $ revised_at <date> 2025-07-08, 2025-07-08, 2025-07-08, 2025-0…
# \donttest{
if (interactive()) {
## visualize the first dataset
datasets(db) |>
dplyr::slice(1) |>
datasets_visualize()
}
# }
files(db) |>
dplyr::glimpse()
#> Rows: 1,859
#> Columns: 4
#> $ dataset_id <chr> "0bae7ebf-eb54-46a6-be9a-3461cecefa4c", "429d21fd-bb9a-4abf…
#> $ filesize <dbl> 476000338, 1127902252, 153133718, 600042011, 502595043, 851…
#> $ filetype <chr> "H5AD", "H5AD", "H5AD", "H5AD", "H5AD", "H5AD", "H5AD", "H5…
#> $ url <chr> "https://datasets.cellxgene.cziscience.com/e6b8dce0-19e6-41…
if (FALSE) { # \dontrun{
files(db) |>
dplyr::slice(1) |>
files_download(dry.run = FALSE)
} # }
## common links to external data
links(db) |>
dplyr::count(link_type)
#> # A tibble: 5 × 2
#> link_type n
#> <chr> <int>
#> 1 DATA_SOURCE 71
#> 2 LAB_WEBSITE 49
#> 3 OTHER 433
#> 4 PROTOCOL 55
#> 5 RAW_DATA 401
## authors per collection
authors() |>
dplyr::count(collection_id, sort = TRUE)
#> # A tibble: 288 × 2
#> collection_id n
#> <chr> <int>
#> 1 8f126edf-5405-4731-8374-b5ce11f53e82 205
#> 2 bcb61471-2a44-4d00-a0af-ff085512674c 171
#> 3 e5f58829-1a66-40b5-a624-9046778e74f5 164
#> 4 4f586cb6-972b-4ef7-a4ef-3c3800a3c004 147
#> 5 0b9d8a04-bb9d-44da-aa27-705bb65b54eb 135
#> 6 1ca90a2d-2943-483d-b678-b809bf464c30 108
#> 7 367d95c0-0eb0-4dae-8276-9407239421ee 106
#> 8 6f6d381a-7701-4781-935c-db10d30de293 98
#> 9 0a839c4b-10d0-4d64-9272-684c49a2c8ba 90
#> 10 ae1420fe-6630-46ed-8b3d-cc6056a66467 83
#> # ℹ 278 more rows
publisher_metadata() |>
dplyr::glimpse()
#> Rows: 288
#> Columns: 9
#> $ collection_id <chr> "dc3a5256-5c39-4a21-ac0c-4ede3e7b2323", "9f29fcd0-7075…
#> $ name <chr> "Healthy living donor kidney", "Ischemia Reperfusion R…
#> $ is_preprint <lgl> FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE,…
#> $ journal <chr> "Nat Commun", "American Journal of Transplantation", "…
#> $ published_at <date> 2022-12-10, 2024-12-01, 2024-11-03, 2022-12-01, 2020-…
#> $ published_year <int> 2022, 2024, 2024, 2022, 2020, 2022, 2021, 2025, 2021, …
#> $ published_month <int> 12, 12, 11, 12, 5, 6, 2, 4, 10, 1, 10, 4, 10, 11, 11, …
#> $ published_day <int> 10, 1, 3, 1, 21, 3, 1, 15, 1, 11, 13, 12, 7, 21, 1, 1,…
#> $ doi <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…