Skip to contents

files_download() retrieves one or more cellxgene files to a cache on the local system.

links(), authors() and publisher_metadata() are helper functions to extract 'nested' information from collections.

Usage

collections(cellxgene_db = db())

datasets(cellxgene_db = db())

datasets_visualize(tbl)

files(cellxgene_db = db())

files_download(tbl, dry.run = TRUE, cache.path = .cellxgene_cache_path())

links(cellxgene_db = db())

authors(cellxgene_db = db())

publisher_metadata(cellxgene_db = db())

Arguments

cellxgene_db

an optional 'cellxgene_db' object, as returned by db().

tbl

a tibble() typically derived from datasets(db) or files(db) and containing columns dataset_id (for datasets_visualize()), or columns dataset_id, file_id, and filetype (for files_download()).

dry.run

logical(1) indicating whether the (often large) file(s) in tbl should be downloaded to a local cache. Files are not downloaded when dry.run = TRUE (default).

cache.path

character(1) directory in which to cache downloaded files. The directory must already exist. The default is tools::R_user_dir("cellxgenedp", "cache"), a package-specific path in the user home directory.

Value

Each function returns a tibble describing the corresponding component of the database.

files_download() returns a character() vector of paths to the local files.

links() returns a tibble of external links associated with each collection. Common links includ DOI, raw data / data sources, and lab websites.

authors() returns a tibble of authors associated with each collection.

publisher_metadata() returns a tibble of publisher metadata (journal, publicate date, doi) associated with each collection.

Examples

db <- db()

collections(db)
#> # A tibble: 302 × 18
#>    collection_id    collection_version_id collection_url consortia contact_email
#>    <chr>            <chr>                 <chr>          <list>    <chr>        
#>  1 dc3a5256-5c39-4… eb9b0ca6-ad97-43ca-b… https://cellx… <chr [1]> julia.szusz@…
#>  2 9f29fcd0-7075-4… d0a13d18-22fd-4238-8… https://cellx… <lgl [1]> Mingyao.Liu@…
#>  3 bd552f76-1f1b-4… 15425fb2-9264-4b4a-9… https://cellx… <lgl [1]> senbai.kang@…
#>  4 dc4cd1f7-667a-4… 7b76a9f2-78df-4371-8… https://cellx… <chr [1]> yuugot@gmail…
#>  5 38833785-fac5-4… 4e7826f3-7216-45cc-a… https://cellx… <lgl [1]> ggj@zju.edu.…
#>  6 b1a879f6-5638-4… 4116209e-4330-4045-a… https://cellx… <chr [2]> st9@sanger.a…
#>  7 180bff9c-c8a5-4… 3eba1597-fae7-451b-9… https://cellx… <lgl [1]> Martin.Kampm…
#>  8 579203e2-182f-4… 5afd25ec-b406-4e52-8… https://cellx… <chr [1]> sarah.snelli…
#>  9 b9fc3d70-5a72-4… f0bd3f64-4499-47e2-8… https://cellx… <chr [1]> bruce.aronow…
#> 10 398e34a9-8736-4… 626f1fbf-3c2c-4cd5-a… https://cellx… <lgl [1]> led13@gene.c…
#> # ℹ 292 more rows
#> # ℹ 13 more variables: contact_name <chr>, curator_name <chr>,
#> #   description <chr>, doi <chr>, links <list>, name <chr>,
#> #   publisher_metadata <list>, revising_in <lgl>, revision_of <lgl>,
#> #   visibility <chr>, created_at <date>, published_at <date>, revised_at <date>

collections(db) |>
    dplyr::glimpse()
#> Rows: 302
#> Columns: 18
#> $ collection_id         <chr> "dc3a5256-5c39-4a21-ac0c-4ede3e7b2323", "9f29fcd…
#> $ collection_version_id <chr> "eb9b0ca6-ad97-43ca-b614-a318f9c114bc", "d0a13d1…
#> $ collection_url        <chr> "https://cellxgene.cziscience.com/collections/dc…
#> $ consortia             <list> "Human Cell Atlas (HCA)", NA, NA, "Human Cell A…
#> $ contact_email         <chr> "julia.szusz@mail.utoronto.ca", "Mingyao.Liu@uto…
#> $ contact_name          <chr> "Julia Murphy", "Mingyao Liu", "Senbai Kang", "Y…
#> $ curator_name          <chr> "Brian J Mott", "Corinn Sophia Small", "Brian J …
#> $ description           <chr> "19 living donors, all processed with 10X v3 kit…
#> $ doi                   <chr> "10.1038/s41467-022-35297-z", "10.1016/j.ajt.202…
#> $ links                 <list> [["GSE202109", "RAW_DATA", "https://www.ncbi.nl…
#> $ name                  <chr> "Healthy living donor kidney", "Ischemia Reperfu…
#> $ publisher_metadata    <list> [[["McEvoy", "Caitriona M."], ["Murphy", "Julia…
#> $ revising_in           <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
#> $ revision_of           <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
#> $ visibility            <chr> "PUBLIC", "PUBLIC", "PUBLIC", "PUBLIC", "PUBLIC"…
#> $ created_at            <date> 2025-07-05, 2025-07-05, 2025-07-05, 2025-07-05,
#> $ published_at          <date> 2024-06-14, 2024-09-13, 2024-11-08, 2024-11-20,
#> $ revised_at            <date> 2025-07-08, 2025-07-08, 2025-07-08, 2025-07-08,

datasets(db) |>
    dplyr::glimpse()
#> Rows: 1,831
#> Columns: 33
#> $ dataset_id                 <chr> "0bae7ebf-eb54-46a6-be9a-3461cecefa4c", "42…
#> $ dataset_version_id         <chr> "e6b8dce0-19e6-419b-925b-9354164e8f31", "07…
#> $ collection_id              <chr> "dc3a5256-5c39-4a21-ac0c-4ede3e7b2323", "9f…
#> $ donor_id                   <list> <"HKB10", "HKB11", "HKB13", "HKB19", "HKB2…
#> $ assay                      <list> [["10x 3' v3", "EFO:0009922"]], [["10x 3' …
#> $ batch_condition            <list> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
#> $ cell_count                 <int> 27675, 108613, 10689, 39713, 35954, 57918, 
#> $ cell_type                  <list> [["B cell", "CL:0000236"], ["T cell", "CL:…
#> $ citation                   <chr> "Publication: https://doi.org/10.1038/s4146…
#> $ default_embedding          <chr> NA, NA, NA, NA, NA, "X_umap", NA, "X_umap",
#> $ development_stage          <list> [["33-year-old stage", "HsapDv:0000127"], …
#> $ disease                    <list> [["normal", "PATO:0000461"]], [["normal", …
#> $ embeddings                 <list> <"X_pca", "X_tsne", "X_umap">, "X_umap", "…
#> $ explorer_url               <chr> "https://cellxgene.cziscience.com/e/0bae7eb…
#> $ feature_biotype            <list> "gene", "gene", "gene", "gene", "gene", "g…
#> $ feature_count              <int> 27323, 28629, 18064, 18064, 18064, 36869, 2…
#> $ feature_reference          <list> "NCBITaxon:9606", "NCBITaxon:9606", "NCBIT…
#> $ is_primary_data            <list> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
#> $ mean_genes_per_cell        <dbl> 2242.0657, 2192.3974, 1240.0611, 1316.8952,
#> $ organism                   <list> [["Homo sapiens", "NCBITaxon:9606"]], [["H…
#> $ primary_cell_count         <int> 27675, 108613, 10689, 39713, 35954, 57918, 
#> $ raw_data_location          <chr> "raw.X", "raw.X", "X", "X", "X", "raw.X", "…
#> $ schema_version             <chr> "6.0.0", "6.0.0", "6.0.0", "6.0.0", "6.0.0"…
#> $ self_reported_ethnicity    <list> [["unknown", "unknown"]], [["unknown", "un…
#> $ sex                        <list> [["female", "PATO:0000383"], ["male", "PAT…
#> $ spatial                    <list> NA, NA, NA, NA, NA, NA, NA, NA, [TRUE, TRU…
#> $ suspension_type            <list> "cell", "cell", "cell", "cell", "cell", "c…
#> $ tissue                     <list> [["cortex of kidney", "UBERON:0001225", "t…
#> $ title                      <chr> "Living donor kidney", "scRNA-seq of lung b…
#> $ tombstone                  <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F…
#> $ x_approximate_distribution <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
#> $ published_at               <date> 2024-06-14, 2024-09-13, 2024-11-08, 2024-1…
#> $ revised_at                 <date> 2025-07-08, 2025-07-08, 2025-07-08, 2025-0…

# \donttest{
if (interactive()) {
    ## visualize the first dataset
    datasets(db) |>
        dplyr::slice(1) |>
        datasets_visualize()
}
# }
files(db) |>
    dplyr::glimpse()
#> Rows: 1,859
#> Columns: 4
#> $ dataset_id <chr> "0bae7ebf-eb54-46a6-be9a-3461cecefa4c", "429d21fd-bb9a-4abf…
#> $ filesize   <dbl> 476000338, 1127902252, 153133718, 600042011, 502595043, 851…
#> $ filetype   <chr> "H5AD", "H5AD", "H5AD", "H5AD", "H5AD", "H5AD", "H5AD", "H5…
#> $ url        <chr> "https://datasets.cellxgene.cziscience.com/e6b8dce0-19e6-41…

if (FALSE) { # \dontrun{
files(db) |>
    dplyr::slice(1) |>
    files_download(dry.run = FALSE)
} # }

## common links to external data
links(db) |>
    dplyr::count(link_type)
#> # A tibble: 5 × 2
#>   link_type       n
#>   <chr>       <int>
#> 1 DATA_SOURCE    71
#> 2 LAB_WEBSITE    49
#> 3 OTHER         433
#> 4 PROTOCOL       55
#> 5 RAW_DATA      401

## authors per collection
authors() |>
    dplyr::count(collection_id, sort = TRUE)
#> # A tibble: 288 × 2
#>    collection_id                            n
#>    <chr>                                <int>
#>  1 8f126edf-5405-4731-8374-b5ce11f53e82   205
#>  2 bcb61471-2a44-4d00-a0af-ff085512674c   171
#>  3 e5f58829-1a66-40b5-a624-9046778e74f5   164
#>  4 4f586cb6-972b-4ef7-a4ef-3c3800a3c004   147
#>  5 0b9d8a04-bb9d-44da-aa27-705bb65b54eb   135
#>  6 1ca90a2d-2943-483d-b678-b809bf464c30   108
#>  7 367d95c0-0eb0-4dae-8276-9407239421ee   106
#>  8 6f6d381a-7701-4781-935c-db10d30de293    98
#>  9 0a839c4b-10d0-4d64-9272-684c49a2c8ba    90
#> 10 ae1420fe-6630-46ed-8b3d-cc6056a66467    83
#> # ℹ 278 more rows

publisher_metadata() |>
    dplyr::glimpse()
#> Rows: 288
#> Columns: 9
#> $ collection_id   <chr> "dc3a5256-5c39-4a21-ac0c-4ede3e7b2323", "9f29fcd0-7075…
#> $ name            <chr> "Healthy living donor kidney", "Ischemia Reperfusion R…
#> $ is_preprint     <lgl> FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE,
#> $ journal         <chr> "Nat Commun", "American Journal of Transplantation", "…
#> $ published_at    <date> 2022-12-10, 2024-12-01, 2024-11-03, 2022-12-01, 2020-…
#> $ published_year  <int> 2022, 2024, 2024, 2022, 2020, 2022, 2021, 2025, 2021, 
#> $ published_month <int> 12, 12, 11, 12, 5, 6, 2, 4, 10, 1, 10, 4, 10, 11, 11, 
#> $ published_day   <int> 10, 1, 3, 1, 21, 3, 1, 15, 1, 11, 13, 12, 7, 21, 1, 1,
#> $ doi             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA