Skip to contents

files_download() retrieves one or more cellxgene files to a cache on the local system.

links(), authors() and publisher_metadata() are helper functions to extract 'nested' information from collections.

Usage

collections(cellxgene_db = db())

datasets(cellxgene_db = db())

datasets_visualize(tbl)

files(cellxgene_db = db())

files_download(tbl, dry.run = TRUE, cache.path = .cellxgene_cache_path())

links(cellxgene_db = db())

authors(cellxgene_db = db())

publisher_metadata(cellxgene_db = db())

Arguments

cellxgene_db

an optional 'cellxgene_db' object, as returned by db().

tbl

a tibble() typically derived from datasets(db) or files(db) and containing columns dataset_id (for datasets_visualize()), or columns dataset_id, file_id, and filetype (for files_download()).

dry.run

logical(1) indicating whether the (often large) file(s) in tbl should be downloaded to a local cache. Files are not downloaded when dry.run = TRUE (default).

cache.path

character(1) directory in which to cache downloaded files. The directory must already exist. The default is tools::R_user_dir("cellxgenedp", "cache"), a package-specific path in the user home directory.

Value

Each function returns a tibble describing the corresponding component of the database.

files_download() returns a character() vector of paths to the local files.

links() returns a tibble of external links associated with each collection. Common links includ DOI, raw data / data sources, and lab websites.

authors() returns a tibble of authors associated with each collection.

publisher_metadata() returns a tibble of publisher metadata (journal, publicate date, doi) associated with each collection.

Examples

db <- db()

collections(db)
#> # A tibble: 235 × 18
#>    collection_id    collection_version_id collection_url consortia contact_email
#>    <chr>            <chr>                 <chr>          <list>    <chr>        
#>  1 4828d33d-fb26-4… 3892863c-7057-4a32-a… https://cellx… <chr [1]> anna.molofsk…
#>  2 48259aa8-f168-4… 67e75752-53dd-4aec-9… https://cellx… <lgl [1]> wtk22@cam.ac…
#>  3 6e067060-f7e4-4… 37be1b6a-2d00-4b28-a… https://cellx… <lgl [1]> William.Zach…
#>  4 1ca90a2d-2943-4… 1cb67a11-55b0-4416-9… https://cellx… <chr [2]> edl@allenins…
#>  5 31937775-0602-4… e79f335e-ff6b-4c01-8… https://cellx… <lgl [1]> weallen@fas.…
#>  6 34f12de7-c5e5-4… 375b0e1c-19d6-42b9-a… https://cellx… <chr [1]> ganier.clari…
#>  7 48d354f5-a5ca-4… 8216eda1-79ae-4faf-9… https://cellx… <chr [1]> Nathan.Salom…
#>  8 28e9d721-6816-4… c152fcf5-a85a-46c4-9… https://cellx… <lgl [1]> Tom_Mariani@…
#>  9 9c8808ce-1138-4… e3c1e670-8e51-499f-a… https://cellx… <lgl [1]> ebutcher@sta…
#> 10 37f1f46d-6dfa-4… 731ffc35-11c3-4236-8… https://cellx… <lgl [1]> ecker@salk.e…
#> # ℹ 225 more rows
#> # ℹ 13 more variables: contact_name <chr>, curator_name <chr>,
#> #   description <chr>, doi <chr>, links <list>, name <chr>,
#> #   publisher_metadata <list>, revising_in <lgl>, revision_of <lgl>,
#> #   visibility <chr>, created_at <date>, published_at <date>, revised_at <date>

collections(db) |>
    dplyr::glimpse()
#> Rows: 235
#> Columns: 18
#> $ collection_id         <chr> "4828d33d-fb26-42e7-bf36-18293b0eec85", "48259aa…
#> $ collection_version_id <chr> "3892863c-7057-4a32-a538-e5407650881b", "67e7575…
#> $ collection_url        <chr> "https://cellxgene.cziscience.com/collections/48…
#> $ consortia             <list> "CZ Biohub", NA, NA, <"Allen Institute for Brai…
#> $ contact_email         <chr> "anna.molofsky@ucsf.edu", "wtk22@cam.ac.uk", "Wi…
#> $ contact_name          <chr> "Anna Molofsky", "Walid Khaled", "William Zachar…
#> $ curator_name          <chr> "Corinn Sophia Small", "Jason Hilton", "Jason Hi…
#> $ description           <chr> "Microglia are brain resident phagocytes that ca…
#> $ doi                   <chr> "10.1016/j.cell.2024.02.020", "10.1038/s41588-02…
#> $ links                 <list> [["", "LAB_WEBSITE", "https://www.annamolofskyl…
#> $ name                  <chr> "Type I interferon responsive microglia shape co…
#> $ publisher_metadata    <list> [[["Escoubas", "Caroline C."], ["Dorman", "Leah…
#> $ revising_in           <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
#> $ revision_of           <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
#> $ visibility            <chr> "PUBLIC", "PUBLIC", "PUBLIC", "PUBLIC", "PUBLIC"…
#> $ created_at            <date> 2024-06-09, 2024-06-08, 2024-06-09, 2024-06-08,…
#> $ published_at          <date> 2024-04-01, 2023-04-25, 2022-02-23, 2022-05-31,…
#> $ revised_at            <date> 2024-06-12, 2024-06-12, 2024-06-12, 2024-06-12,…

datasets(db) |>
    dplyr::glimpse()
#> Rows: 1,488
#> Columns: 33
#> $ dataset_id                 <chr> "5dec4249-8459-4df0-8998-37193135754c", "be…
#> $ dataset_version_id         <chr> "dea35fd3-c576-4ca5-9c10-72546e312b2e", "15…
#> $ collection_id              <chr> "4828d33d-fb26-42e7-bf36-18293b0eec85", "48…
#> $ donor_id                   <list> <"pooled_Control_P5", "pooled_Deprived_P5"…
#> $ assay                      <list> [["10x 3' v3", "EFO:0009922"]], [["10x 3' …
#> $ batch_condition            <list> NA, "processing_date", "processing_date", …
#> $ cell_count                 <int> 12330, 416825, 352496, 2122065, 25382, 8032…
#> $ cell_type                  <list> [["microglial cell", "CL:0000129"]], [["ca…
#> $ citation                   <chr> "Publication: https://doi.org/10.1016/j.cel…
#> $ default_embedding          <chr> "X_umap", NA, NA, NA, NA, NA, NA, "X_umap",…
#> $ development_stage          <list> [["Theiler stage 28", "MmusDv:0000037"]], …
#> $ disease                    <list> [["normal", "PATO:0000461"]], [["normal", …
#> $ embeddings                 <list> <"X_pca", "X_umap">, <"X_diffmap", "X_scVI…
#> $ explorer_url               <chr> "https://cellxgene.cziscience.com/e/5dec424…
#> $ feature_biotype            <list> "gene", "gene", "gene", "gene", "gene", "g…
#> $ feature_count              <int> 23045, 34455, 34455, 15166, 34455, 34455, 1…
#> $ feature_reference          <list> "NCBITaxon:10090", "NCBITaxon:9606", "NCBI…
#> $ is_primary_data            <list> TRUE, FALSE, FALSE, <TRUE, FALSE>, FALSE, …
#> $ mean_genes_per_cell        <dbl> 4159.6886, 1639.8527, 2608.0262, 1847.2070,…
#> $ organism                   <list> [["Mus musculus", "NCBITaxon:10090"]], [["…
#> $ primary_cell_count         <int> 12330, 0, 0, 551770, 0, 803283, 120042, 0, …
#> $ raw_data_location          <chr> "raw.X", "raw.X", "raw.X", "raw.X", "raw.X"…
#> $ schema_version             <chr> "5.1.0", "5.1.0", "5.1.0", "5.1.0", "5.1.0"…
#> $ self_reported_ethnicity    <list> [["na", "na"]], [["African", "HANCESTRO:00…
#> $ sex                        <list> [["female", "PATO:0000383"], ["male", "PAT…
#> $ spatial                    <list> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ suspension_type            <list> "cell", "cell", "cell", "cell", "cell", "c…
#> $ tissue                     <list> [["barrel cortex", "UBERON:0010415", "tiss…
#> $ title                      <chr> "Type I interferon responsive microglia sha…
#> $ tombstone                  <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F…
#> $ x_approximate_distribution <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
#> $ published_at               <date> 2024-04-01, 2023-04-25, 2023-04-25, 2024-0…
#> $ revised_at                 <date> 2024-06-12, 2024-06-12, 2024-06-12, 2024-0…

# \donttest{
if (interactive()) {
    ## visualize the first dataset
    datasets(db) |>
        dplyr::slice(1) |>
        datasets_visualize()
}
# }
files(db) |>
    dplyr::glimpse()
#> Rows: 2,723
#> Columns: 4
#> $ dataset_id <chr> "5dec4249-8459-4df0-8998-37193135754c", "5dec4249-8459-4df0…
#> $ filesize   <dbl> 239645451, 241723366, 3555862131, 3390479178, 4584229094, 4…
#> $ filetype   <chr> "H5AD", "RDS", "H5AD", "RDS", "H5AD", "RDS", "H5AD", "H5AD"…
#> $ url        <chr> "https://datasets.cellxgene.cziscience.com/dea35fd3-c576-4c…

if (FALSE) { # \dontrun{
files(db) |>
    dplyr::slice(1) |>
    files_download(dry.run = FALSE)
} # }

## common links to external data
links(db) |>
    dplyr::count(link_type)
#> # A tibble: 5 × 2
#>   link_type       n
#>   <chr>       <int>
#> 1 DATA_SOURCE    55
#> 2 LAB_WEBSITE    44
#> 3 OTHER         374
#> 4 PROTOCOL       48
#> 5 RAW_DATA      334

## authors per collection
authors() |>
    dplyr::count(collection_id, sort = TRUE)
#> # A tibble: 226 × 2
#>    collection_id                            n
#>    <chr>                                <int>
#>  1 e5f58829-1a66-40b5-a624-9046778e74f5   221
#>  2 8f126edf-5405-4731-8374-b5ce11f53e82   205
#>  3 bcb61471-2a44-4d00-a0af-ff085512674c   171
#>  4 4f586cb6-972b-4ef7-a4ef-3c3800a3c004   147
#>  5 0b9d8a04-bb9d-44da-aa27-705bb65b54eb   135
#>  6 367d95c0-0eb0-4dae-8276-9407239421ee   106
#>  7 6f6d381a-7701-4781-935c-db10d30de293    98
#>  8 1ca90a2d-2943-483d-b678-b809bf464c30    94
#>  9 0a839c4b-10d0-4d64-9272-684c49a2c8ba    90
#> 10 ae1420fe-6630-46ed-8b3d-cc6056a66467    83
#> # ℹ 216 more rows

publisher_metadata() |>
    dplyr::glimpse()
#> Rows: 226
#> Columns: 9
#> $ collection_id   <chr> "4828d33d-fb26-42e7-bf36-18293b0eec85", "48259aa8-f168…
#> $ name            <chr> "Type I interferon responsive microglia shape cortical…
#> $ is_preprint     <lgl> FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,…
#> $ journal         <chr> "Cell", "Nat Genet", "Sci. Transl. Med.", "bioRxiv", "…
#> $ published_at    <date> 2024-03-01, 2024-03-28, 2022-03-30, 2023-05-09, 2022-…
#> $ published_year  <int> 2024, 2024, 2022, 2023, 2022, 2024, 2021, 2024, 2020, …
#> $ published_month <int> 3, 3, 3, 5, 12, 1, 3, 2, 4, 10, 9, 12, 10, 8, 4, 11, 1…
#> $ published_day   <int> 1, 28, 30, 9, 1, 9, 12, 26, 30, 7, 22, 1, 13, 9, 15, 1…
#> $ doi             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…