Skip to contents

files_download() retrieves one or more cellxgene files to a cache on the local system.

links(), authors() and publisher_metadata() are helper functions to extract 'nested' information from collections.

Usage

collections(cellxgene_db = db())

datasets(cellxgene_db = db())

datasets_visualize(tbl)

files(cellxgene_db = db())

files_download(tbl, dry.run = TRUE, cache.path = .cellxgene_cache_path())

links(cellxgene_db = db())

authors(cellxgene_db = db())

publisher_metadata(cellxgene_db = db())

Arguments

cellxgene_db

an optional 'cellxgene_db' object, as returned by db().

tbl

a tibble() typically derived from datasets(db) or files(db) and containing columns dataset_id (for datasets_visualize()), or columns dataset_id, file_id, and filetype (for files_download()).

dry.run

logical(1) indicating whether the (often large) file(s) in tbl should be downloaded to a local cache. Files are not downloaded when dry.run = TRUE (default).

cache.path

character(1) directory in which to cache downloaded files. The directory must already exist. The default is tools::R_user_dir("cellxgenedp", "cache"), a package-specific path in the user home directory.

Value

Each function returns a tibble describing the corresponding component of the database.

files_download() returns a character() vector of paths to the local files.

links() returns a tibble of external links associated with each collection. Common links includ DOI, raw data / data sources, and lab websites.

authors() returns a tibble of authors associated with each collection.

publisher_metadata() returns a tibble of publisher metadata (journal, publicate date, doi) associated with each collection.

Examples

db <- db()

collections(db)
#> # A tibble: 380 × 19
#>    collection_id    collection_version_id collection_url consortia contact_email
#>    <chr>            <chr>                 <chr>          <list>    <chr>        
#>  1 af893e86-8e9f-4… c1b538fd-0f01-41c8-a… https://cellx… <chr [1]> ruichen@bcm.…
#>  2 3a5dbf8a-9b3e-4… c656236c-fc37-4470-a… https://cellx… <chr [1]> xinsun@ucsd.…
#>  3 16876983-d454-4… ea4e5a38-8adb-4ca3-9… https://cellx… <lgl [1]> ryan.corces@…
#>  4 ad10cef8-9c6c-4… 6f05ce2a-2fca-424c-8… https://cellx… <lgl [1]> jiyeon.choi2…
#>  5 7f7fdf50-aa0e-4… eed80e7c-54ff-40ae-a… https://cellx… <chr [1]> ca3@sanger.a…
#>  6 35928d1c-36fc-4… bfa09492-85f4-473e-b… https://cellx… <chr [1]> jeremym@alle…
#>  7 e02201d7-f49f-4… c8280cb1-208f-4eca-8… https://cellx… <chr [1]> richard.smit…
#>  8 0540ee09-5b45-4… 27a2b3bf-c7f3-4138-9… https://cellx… <lgl [1]> ynose@gesurg…
#>  9 9b02383a-9358-4… 0bb91d14-4427-4e5a-9… https://cellx… <chr [1]> parkerw@wust…
#> 10 8a05eaf6-5680-4… 633a21eb-5a97-401d-9… https://cellx… <lgl [1]> EichholJ@msk…
#> # ℹ 370 more rows
#> # ℹ 14 more variables: contact_name <chr>, curator_name <chr>,
#> #   description <chr>, doi <chr>, is_pre_analysis <lgl>, links <list>,
#> #   name <chr>, publisher_metadata <list>, revising_in <lgl>,
#> #   revision_of <lgl>, visibility <chr>, created_at <date>,
#> #   published_at <date>, revised_at <date>

collections(db) |>
    dplyr::glimpse()
#> Rows: 380
#> Columns: 19
#> $ collection_id         <chr> "af893e86-8e9f-41f1-a474-ef05359b1fb7", "3a5dbf8…
#> $ collection_version_id <chr> "c1b538fd-0f01-41c8-a504-6f44626916c2", "c656236…
#> $ collection_url        <chr> "https://cellxgene.cziscience.com/collections/af…
#> $ consortia             <list> "CZI Cell Science", "LungMAP", NA, NA, "Human C…
#> $ contact_email         <chr> "ruichen@bcm.edu", "xinsun@ucsd.edu", "ryan.corc…
#> $ contact_name          <chr> "Rui Chen", "Xin Sun", "Ryan Corces", "Jiyeon Ch…
#> $ curator_name          <chr> "Jennifer Yu-Sheng Chien", "Jennifer Yu-Sheng Ch…
#> $ description           <chr> "The retina is the innermost tissue of the eyes …
#> $ doi                   <chr> "10.1016/j.xgen.2023.100298", NA, "10.64898/2026…
#> $ is_pre_analysis       <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
#> $ links                 <list> [["", "OTHER", "https://retina-atac.cells.ucsc.…
#> $ name                  <chr> "Single-cell transcriptomic atlas for adult huma…
#> $ publisher_metadata    <list> [[["Liang", "Qingnan"], ["Cheng", "Xuesen"], ["…
#> $ revising_in           <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
#> $ revision_of           <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
#> $ visibility            <chr> "PUBLIC", "PUBLIC", "PUBLIC", "PUBLIC", "PUBLIC"…
#> $ created_at            <date> 2026-06-09, 2026-06-10, 2026-06-10, 2026-06-10,
#> $ published_at          <date> 2021-10-29, 2025-02-03, 2026-04-30, 2026-01-07,
#> $ revised_at            <date> 2026-06-11, 2026-06-11, 2026-06-11, 2026-06-11,

datasets(db) |>
    dplyr::glimpse()
#> Rows: 2,127
#> Columns: 36
#> $ dataset_id                    <chr> "ed419b4e-db9b-40f1-8593-68fdf8dfb076", 
#> $ dataset_version_id            <chr> "c8da6eeb-84d7-4379-a332-1bf6107859d6", 
#> $ collection_id                 <chr> "af893e86-8e9f-41f1-a474-ef05359b1fb7", 
#> $ donor_id                      <list> <"19D014", "19D013", "19D015", "19D016"…
#> $ assay                         <list> [["10x 3' v3", "EFO:0009922"]], [["10x …
#> $ batch_condition               <list> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
#> $ cell_count                    <int> 18011, 11617, 53040, 56507, 7348, 244474…
#> $ cell_type                     <list> [["Mueller cell", "CL:0000636"], ["astr…
#> $ citation                      <chr> "Publication: https://doi.org/10.1016/j.…
#> $ default_embedding             <chr> "X_umap", "X_umap", "X_umap", "X_umap", 
#> $ development_stage             <list> [["65-year-old stage", "HsapDv:0000159"…
#> $ disease                       <list> [["normal", "PATO:0000461"]], [["normal…
#> $ embeddings                    <list> <"LVG_embedding", "X_umap", "cluster_me…
#> $ explorer_url                  <chr> "https://cellxgene.cziscience.com/e/ed41…
#> $ feature_biotype               <list> "gene", "gene", "gene", "gene", "gene",
#> $ feature_count                 <int> 30172, 30172, 30172, 30172, 30172, 30172…
#> $ feature_reference             <list> "NCBITaxon:9606", "NCBITaxon:9606", "NC…
#> $ genetic_perturbation_strategy <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
#> $ is_pre_analysis               <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE…
#> $ is_primary_data               <list> FALSE, TRUE, FALSE, FALSE, FALSE, TRUE,
#> $ mean_genes_per_cell           <dbl> 2017.381, 4493.915, 2220.995, 2509.138, 
#> $ organism                      <list> [["Homo sapiens", "NCBITaxon:9606"]], […
#> $ perturbation_types            <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
#> $ primary_cell_count            <int> 0, 11617, 0, 0, 0, 244474, 0, 255204, 45…
#> $ raw_data_location             <chr> "raw.X", "raw.X", "raw.X", "raw.X", "raw…
#> $ schema_version                <chr> "7.1.0", "7.1.0", "7.1.0", "7.1.0", "7.1…
#> $ self_reported_ethnicity       <list> [["European American", "HANCESTRO:0590"…
#> $ sex                           <list> [["female", "PATO:0000383"], ["male", "…
#> $ spatial                       <list> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
#> $ suspension_type               <list> "nucleus", "nucleus", "nucleus", "nucle…
#> $ tissue                        <list> [["fovea centralis", "UBERON:0001786", …
#> $ title                         <chr> "Non-neuronal cells in human retina", "R…
#> $ tombstone                     <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE…
#> $ x_approximate_distribution    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
#> $ published_at                  <date> 2021-10-29, 2021-10-29, 2021-10-29, 202…
#> $ revised_at                    <date> 2026-06-11, 2026-06-11, 2026-06-11, 202…

# \donttest{
if (interactive()) {
    ## visualize the first dataset
    datasets(db) |>
        dplyr::slice(1) |>
        datasets_visualize()
}
# }
files(db) |>
    dplyr::glimpse()
#> Rows: 2,167
#> Columns: 4
#> $ dataset_id <chr> "ed419b4e-db9b-40f1-8593-68fdf8dfb076", "aad97cb5-f375-45ef…
#> $ filesize   <dbl> 1433774383, 806170493, 3168171879, 3480337646, 416349451, 1…
#> $ filetype   <chr> "H5AD", "H5AD", "H5AD", "H5AD", "H5AD", "H5AD", "H5AD", "H5…
#> $ url        <chr> "https://datasets.cellxgene.cziscience.com/c8da6eeb-84d7-43…

if (FALSE) { # \dontrun{
files(db) |>
    dplyr::slice(1) |>
    files_download(dry.run = FALSE)
} # }

## common links to external data
links(db) |>
    dplyr::count(link_type)
#> # A tibble: 5 × 2
#>   link_type       n
#>   <chr>       <int>
#> 1 DATA_SOURCE    97
#> 2 LAB_WEBSITE    69
#> 3 OTHER         507
#> 4 PROTOCOL       67
#> 5 RAW_DATA      465

## authors per collection
authors() |>
    dplyr::count(collection_id, sort = TRUE)
#> # A tibble: 362 × 2
#>    collection_id                            n
#>    <chr>                                <int>
#>  1 8f126edf-5405-4731-8374-b5ce11f53e82   205
#>  2 bcb61471-2a44-4d00-a0af-ff085512674c   171
#>  3 a137437b-d284-4a27-b1e9-36958a8f92c1   164
#>  4 e5f58829-1a66-40b5-a624-9046778e74f5   164
#>  5 4f586cb6-972b-4ef7-a4ef-3c3800a3c004   147
#>  6 0b9d8a04-bb9d-44da-aa27-705bb65b54eb   135
#>  7 1ca90a2d-2943-483d-b678-b809bf464c30   108
#>  8 367d95c0-0eb0-4dae-8276-9407239421ee   106
#>  9 9c9d04c4-8899-417f-bb6f-6107dcadf14f   100
#> 10 6f6d381a-7701-4781-935c-db10d30de293    98
#> # ℹ 352 more rows

publisher_metadata() |>
    dplyr::glimpse()
#> Rows: 362
#> Columns: 9
#> $ collection_id   <chr> "af893e86-8e9f-41f1-a474-ef05359b1fb7", "16876983-d454…
#> $ name            <chr> "Single-cell transcriptomic atlas for adult human reti…
#> $ is_preprint     <lgl> FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
#> $ journal         <chr> "Cell Genomics", "bioRxiv", "Nat Commun", "Nature", "S…
#> $ published_at    <date> 2023-06-01, 2026-03-05, 2024-09-12, 2026-06-03, 2023-…
#> $ published_year  <int> 2023, 2026, 2024, 2026, 2023, 2021, 2026, 2021, 2025, 
#> $ published_month <int> 6, 3, 9, 6, 10, 6, 4, 12, 12, 4, 5, 10, 8, 1, 12, 9, 1…
#> $ published_day   <int> 1, 5, 12, 3, 13, 22, 8, 1, 20, 1, 18, 1, 28, 10, 1, 4,
#> $ doi             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA