Query projects, publications and citations
Usage
reporter_projects(..., include_fields = NULL, limit = NULL, verbose = FALSE)
reporter_publications(..., limit = NULL, verbose = FALSE)
icite(tbl, include_fields = NULL, verbose = FALSE)
Arguments
- ...
named arguments descrbing fields in the query. Values are from the 'Schema' linked to the API description.
- include_fields
character()
of fields to include. The default (null) returns all available fields.- limit
integer(1) return a maximum of
limit
records matching search criteria.- verbose
logical(1) report JSON used in search criteria, and a summary of responses prior to processing to their final
tibble
representation.- tbl
For
icite()
,tbl
must contain a columnpmid
with PubMed ids, for instance as in the tibble derived fromreporter_publications()
.
Value
reporter_projects()
returns a tibble with selected
columns. Available columns are described in the schema
available on the API documentation page.
reporter_publications()
returns a tibble with columns
coreproject
, pmid
, and applid
.
icite()
returns a tibble with columns defined by
include_fields
.
Details
The NIH reporter API used for reporter_projects()
and
reporter_publications()
is documented at
https://api.reporter.nih.gov/.
The icite()
API is described at
https://icite.od.nih.gov/api.
Examples
foas <- c( # one or more criteria, e.g., foa number(s)
"PAR-15-334", # ITCR (R21)”
"PAR-15-332", # ITCR Early-Stage Development (U01)
"PAR-15-331", # ITCR Advanced Development (U24)
"PAR-15-333" # ITCR Sustained Support (U24)
)
## use `limit = 1` to see possible values for fields to be included
reporter_projects(foa = foas, limit = 1L) |>
glimpse()
#> Rows: 1
#> Columns: 44
#> $ appl_id <int> 9676260
#> $ subproject_id <lgl> NA
#> $ fiscal_year <int> 2019
#> $ project_num <chr> "5U24CA220242-02"
#> $ project_serial_num <chr> "CA220242"
#> $ organization <df[,17]> <data.frame[1 x 17]>
#> $ award_type <chr> "5"
#> $ activity_code <chr> "U24"
#> $ award_amount <int> 550158
#> $ is_active <lgl> FALSE
#> $ project_num_split <df[,7]> <data.frame[1 x 7]>
#> $ principal_investigators <list> [<data.frame[1 x 7]>]
#> $ contact_pi_name <chr> "ABYZOV, ALEXEJ"
#> $ program_officers <list> [<data.frame[1 x 4]>]
#> $ agency_ic_admin <df[,3]> <data.frame[1 x 3]>
#> $ agency_ic_fundings <list> [<data.frame[1 x 5]>]
#> $ cong_dist <chr> "MN-01"
#> $ spending_categories <list> <108, 132, 1393, 276, 320, 3070>
#> $ project_start_date <date> 2018-05-01
#> $ project_end_date <date> 2023-04-30
#> $ organization_type <df[,3]> <data.frame[1 x 3]>
#> $ opportunity_number <chr> "PAR-15-331"
#> $ full_study_section <df[,6]> <data.frame[1 x 6]>
#> $ award_notice_date <date> 2019-05-01
#> $ is_new <lgl> FALSE
#> $ mechanism_code_dc <chr> "OR"
#> $ core_project_num <chr> "U24CA220242"
#> $ terms <chr> "<Aftercare><post treatment><After-Treatment>…
#> $ pref_terms <chr> "Address;Aftercare;Area;Attention;Basic Scien…
#> $ abstract_text <chr> "Project Summary/Abstract\n Progress in techn…
#> $ project_title <chr> "Detection of somatic, subclonal and mosaic C…
#> $ phr_text <chr> "Narrative\nThe analytical tools that will be…
#> $ spending_categories_desc <chr> "Biotechnology; Cancer; Cancer Genomics; …
#> $ agency_code <chr> "NIH"
#> $ covid_response <lgl> NA
#> $ arra_funded <chr> "N"
#> $ budget_start <chr> "2019-05-01T12:05:00Z"
#> $ budget_end <chr> "2020-04-30T12:04:00Z"
#> $ cfda_code <chr> "399"
#> $ funding_mechanism <chr> "Other Research-Related"
#> $ direct_cost_amt <int> 348999
#> $ indirect_cost_amt <int> 201159
#> $ project_detail_url <chr> "https://reporter.nih.gov/project-details/967…
#> $ date_added <chr> "2019-05-04T07:05:16Z"
## select fields of interest
include_fields <- c(
"opportunity_number",
"core_project_num",
"fiscal_year",
"award_amount",
"contact_pi_name",
"project_title",
"project_start_date",
"project_end_date"
)
projects <- reporter_projects(foa = foas, include_fields = include_fields)
projects
#> # A tibble: 189 × 8
#> opportunity_number core_project_num fiscal_year award_amount contact_pi_name
#> <chr> <chr> <int> <int> <chr>
#> 1 PAR-15-331 U24CA220242 2019 550158 ABYZOV, ALEXEJ
#> 2 PAR-15-331 U24CA220242 2022 561369 ABYZOV, ALEXEJ
#> 3 PAR-15-331 U24CA220242 2021 572828 ABYZOV, ALEXEJ
#> 4 PAR-15-331 U24CA220242 2018 559916 ABYZOV, ALEXEJ
#> 5 PAR-15-331 U24CA220242 2020 383715 ABYZOV, ALEXEJ
#> 6 PAR-15-334 R21CA220352 2019 156788 ARNOLD, COREY W…
#> 7 PAR-15-334 R21CA220352 2018 195568 ARNOLD, COREY W…
#> 8 PAR-15-332 U01CA242871 2019 378519 BAKAS, SPYRIDON
#> 9 PAR-15-332 U01CA242871 2020 360393 BAKAS, SPYRIDON
#> 10 PAR-15-332 U01CA242871 2021 357972 BAKAS, SPYRIDON
#> # ℹ 179 more rows
#> # ℹ 3 more variables: project_title <chr>, project_start_date <date>,
#> # project_end_date <date>
core_project_nums <- pull(projects, "core_project_num")
publications <- reporter_publications(core_project_nums = core_project_nums)
publications
#> # A tibble: 982 × 3
#> coreproject pmid applid
#> <chr> <int> <int>
#> 1 U24CA237719 31907209 10620674
#> 2 U24CA237719 31779674 10620674
#> 3 U24CA237719 35366592 10620674
#> 4 U24CA237719 35072136 10620674
#> 5 U24CA237719 36949070 10620674
#> 6 U24CA237719 34036230 10620674
#> 7 U24CA237719 36541006 10620674
#> 8 U24CA237719 31796060 10620674
#> 9 U24CA237719 32665297 10620674
#> 10 U24CA237719 32644817 10620674
#> # ℹ 972 more rows
## which fields are available in icite?
icite(slice(publications, 1L)) |>
glimpse()
#> Rows: 1
#> Columns: 25
#> $ pmid <dbl> 31907209
#> $ year <dbl> 2020
#> $ title <chr> "pVACtools: A Computational Toolkit to Ide…
#> $ authors <chr> "Jasreet Hundal, Susanna Kiwala, Joshua Mc…
#> $ journal <chr> "Cancer Immunol Res"
#> $ is_research_article <chr> "Yes"
#> $ relative_citation_ratio <dbl> 5.18
#> $ nih_percentile <dbl> 93.6
#> $ human <dbl> 1
#> $ animal <dbl> 0
#> $ molecular_cellular <dbl> 0
#> $ apt <dbl> 0.95
#> $ is_clinical <chr> "No"
#> $ citation_count <dbl> 95
#> $ citations_per_year <dbl> 31.66667
#> $ expected_citations_per_year <dbl> 6.117195
#> $ field_citation_rate <dbl> 11.1856
#> $ provisional <chr> "No"
#> $ x_coord <dbl> 0
#> $ y_coord <dbl> 1
#> $ cited_by_clin <chr> "37563240 37739939"
#> $ cited_by <chr> "35646870 34927080 33262196 34529669 35611…
#> $ references <chr> "23396013 29170503 31243155 19906713 28694…
#> $ doi <chr> "10.1158/2326-6066.CIR-19-0401"
#> $ last_modified <chr> "11/25/2023, 16:43:52"
include_fields <- c(
"pmid", "year", "citation_count", "relative_citation_ratio",
"doi"
)
icite(publications, include_fields)
#> # A tibble: 925 × 5
#> pmid year citation_count relative_citation_ratio doi
#> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 19898898 2010 32 0.78 10.1245/s10434-009-079…
#> 2 24925914 2014 2728 80.7 10.1126/science.1254257
#> 3 24931973 2014 42 1.25 10.1093/bioinformatics…
#> 4 25086664 2014 93 2.46 10.1038/ng.3051
#> 5 25714012 2015 10 0.35 10.18632/oncotarget.29…
#> 6 26083491 2015 26 0.8 10.1371/journal.pone.0…
#> 7 26463000 2016 34 1.12 10.1093/bib/bbv080
#> 8 26594663 2015 151 4.68 10.1016/j.cels.2015.10…
#> 9 26638175 2015 155 4.33 10.1016/j.molcel.2015.…
#> 10 26644347 2015 28 0.81 10.1038/ncomms9726
#> # ℹ 915 more rows