Skip to content

Commit

Permalink
Add changes introduced by Russ re: minimal working API client using n…
Browse files Browse the repository at this point in the history
…ew API version
  • Loading branch information
crew102 committed Aug 21, 2022
1 parent dbe2fa5 commit 95fae4c
Show file tree
Hide file tree
Showing 20 changed files with 690 additions and 1,303 deletions.
16 changes: 8 additions & 8 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,14 @@ jobs:
rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
shell: Rscript {0}

- name: Run examples
env:
_R_CHECK_CRAN_INCOMING_REMOTE_: false
run: |
options(crayon.enabled = TRUE)
remotes::install_cran("devtools")
devtools::run_examples(run_dontrun = TRUE)
shell: Rscript {0}
# - name: Run examples
# env:
# _R_CHECK_CRAN_INCOMING_REMOTE_: false
# run: |
# options(crayon.enabled = TRUE)
# remotes::install_cran("devtools")
# devtools::run_examples(run_dontrun = TRUE)
# shell: Rscript {0}

- name: Upload check results
if: failure()
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export(cast_pv_data)
export(get_endpoints)
export(get_fields)
export(get_ok_pk)
export(get_test_query)
export(qry_funs)
export(search_pv)
export(unnest_pv_data)
Expand Down
9 changes: 5 additions & 4 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
#' Fields data frame
#'
#' A data frame containing the names of retrievable and queryable fields for
#' each of the 7 API endpoints. A yes/no flag (\code{can_query}) indicates
#' each of the 13 API endpoints. A yes/no flag (\code{can_query}) indicates
#' which fields can be included in the user's query. You can also find this
#' data on the API's online documentation for each endpoint as well (e.g.,
#' the \href{https://patentsview.org/apis/api-endpoints/patents}{patents
#' endpoint field list table})
#'
#' @format A data frame with 992 rows and 7 variables:
#' @format A data frame with 130 rows and 8 variables:
#' \describe{
#' \item{endpoint}{The endpoint that this field record is for}
#' \item{field}{The name of the field}
#' \item{data_type}{The field's data type (string, date, float, integer,
#' \item{data_type}{The field's input data type (string, date, float, integer,
#' fulltext)}
#' \item{can_query}{An indicator for whether the field can be included in
#' the user query for the given endpoint}
#' \item{group}{The group the field belongs to}
#' \item{common_name}{The field's common name}
#' \item{description}{A description of the field}
#' \item{plain_name}{field without dot parent structure}
#' \item{cast_as}{data type we want the return to be cast as}
#' }
"fieldsdf"
26 changes: 17 additions & 9 deletions R/get-fields.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,21 @@
#'
#' @examples
#' # Get all assignee-level fields for the patents endpoint:
#' fields <- get_fields(endpoint = "patents", groups = "assignees")
#' fields <- get_fields(endpoint = "patents", groups = "assignees_at_grant")
#'
#' #...Then pass to search_pv:
#' # ...Then pass to search_pv:
#' \dontrun{
#'
#' search_pv(
#' query = '{"_gte":{"patent_date":"2007-01-04"}}',
#' fields = fields
#' )
#'}
#' }
#' # Get all patent and assignee-level fields for the patents endpoint:
#' fields <- get_fields(endpoint = "patents", groups = c("assignees", "patents"))
#' fields <- get_fields(endpoint = "patents", groups = c("assignees_at_grant", "patents"))
#'
#' \dontrun{
#' #...Then pass to search_pv:
#' # ...Then pass to search_pv:
#' search_pv(
#' query = '{"_gte":{"patent_date":"2007-01-04"}}',
#' fields = fields
Expand All @@ -55,27 +55,35 @@ get_fields <- function(endpoint, groups = NULL) {

#' Get endpoints
#'
#' This function reminds the user what the 7 possible PatentsView API endpoints
#' This function reminds the user what the 13 possible PatentsView API endpoints
#' are.
#'
#' @return A character vector with the names of the 7 endpoints. Those endpoints are:
#' @return A character vector with the names of the 13 endpoints. Those endpoints are:
#'
#' \itemize{
#' \item assignees
#' \item cpc_subsections
#' \item cpc_groups
#' \item cpc_subgroups
#' \item inventors
#' \item locations
#' \item nber_categories
#' \item nber_subcategories
#' \item patents
#' \item uspc_mainclasses
#' \item uspc_subclasses
#' \item application_citations
#' \item patent_citations
#' }
#'
#' @examples
#' get_endpoints()
#' @export
get_endpoints <- function() {
c(
"assignees", "cpc_subsections", "inventors", "locations",
"nber_subcategories", "patents", "uspc_mainclasses"
"application_citations", "assignees", "cpc_groups", "cpc_subgroups",
"cpc_subsections", "inventors", "locations", "nber_categories",
"nber_subcategories", "patent_citations", "patents",
"uspc_subclasses", "uspc_mainclasses"
)
}
13 changes: 8 additions & 5 deletions R/process-resp.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,38 @@ get_request <- function(resp) {
class = c("list", "pv_request")
)

if (gp$method == "POST")
if (gp$method == "POST") {
gp$body <- rawToChar(resp$req$options$postfields)
}

gp
}

#' @noRd
get_data <- function(prsd_resp) {
structure(
list(prsd_resp[[1]]),
names = names(prsd_resp[1]),
list(prsd_resp[[4]]),
names = names(prsd_resp[4]),
class = c("list", "pv_data_result")
)
}

#' @noRd
# There used to be an endpoint specific _count ex total_assignee_count
# Now all endpoints return a total_hits attribute
get_query_results <- function(prsd_resp) {
structure(
prsd_resp[grepl("_count", names(prsd_resp))],
prsd_resp["total_hits"],
class = c("list", "pv_query_result")
)
}

#' @noRd
process_resp <- function(resp) {
prsd_resp <- parse_resp(resp)

request <- get_request(resp)
data <- get_data(prsd_resp)

query_results <- get_query_results(prsd_resp)

structure(
Expand Down
51 changes: 32 additions & 19 deletions R/search-pv.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#' @noRd
get_base <- function(endpoint)
sprintf("https://api.patentsview.org/%s/query", endpoint)
get_base <- function(endpoint) {
sprintf("https://search.patentsview.org/api/v1/%s/", to_singular(endpoint))
}

#' @noRd
tojson_2 <- function(x, ...) {
Expand All @@ -16,10 +17,8 @@ to_arglist <- function(fields, subent_cnts, mtchd_subent_only,
fields = fields,
sort = list(as.list(sort)),
opts = list(
include_subentity_total_counts = subent_cnts,
matched_subentities_only = mtchd_subent_only,
page = page,
per_page = per_page
offset = (page - 1) * per_page,
size = per_page
)
)
}
Expand Down Expand Up @@ -55,10 +54,11 @@ one_request <- function(method, query, base_url, arg_list, ...) {

if (method == "GET") {
get_url <- get_get_url(query, base_url, arg_list)
resp <- httr::GET(get_url, ua, ...)
resp <- httr::GET(get_url, httr::add_headers("X-Api-Key" = pview_key()), ua, ...)
} else {
body <- get_post_body(query, arg_list)
resp <- httr::POST(base_url, body = body, ua, ...)
# api change, they want a json object, not a string representation of one
resp <- httr::POST(base_url, httr::add_headers("X-Api-Key" = pview_key(), "Content-Type" = "application/json"), body = body, ua, ...)
}

if (httr::http_error(resp)) throw_er(resp)
Expand All @@ -68,8 +68,8 @@ one_request <- function(method, query, base_url, arg_list, ...) {

#' @noRd
request_apply <- function(ex_res, method, query, base_url, arg_list, ...) {
req_pages <- ceiling(ex_res$query_results[[1]] / arg_list$opts$size)

req_pages <- ceiling(ex_res$query_results[[1]] / 10000)
if (req_pages < 1) {
stop(
"No records matched your query...Can't download multiple pages",
Expand All @@ -79,8 +79,8 @@ request_apply <- function(ex_res, method, query, base_url, arg_list, ...) {

tmp <- lapply(1:req_pages, function(i) {
Sys.sleep(3)
arg_list$opts$per_page <- 10000
arg_list$opts$page <- i
arg_list$opts$size <- 1000
arg_list$opts$offset <- (i - 1) * arg_list$opts$size
x <- one_request(method, query, base_url, arg_list, ...)
x$data[[1]]
})
Expand Down Expand Up @@ -118,7 +118,9 @@ request_apply <- function(ex_res, method, query, base_url, arg_list, ...) {
#' out the fields available for a given endpoint.
#' @param endpoint The web service resource you wish to search. \code{endpoint}
#' must be one of the following: "patents", "inventors", "assignees",
#' "locations", "cpc_subsections", "uspc_mainclasses", or "nber_subcategories".
#' "locations", "cpc_groups", "cpc_subgroups", "cpc_subsections", "uspc_mainclasses",
#' "uspc_subclasses","nber_categories", "nber_subcategories", "application_citations",
#' or "patent_citations"
#' @param subent_cnts Do you want the total counts of unique subentities to be
#' returned? This is equivalent to the \code{include_subentity_total_counts}
#' parameter found \href{https://patentsview.org/apis/api-query-language}{here}.
Expand All @@ -131,7 +133,7 @@ request_apply <- function(ex_res, method, query, base_url, arg_list, ...) {
#' \href{https://patentsview.org/apis/api-query-language}{here}.
#' @param page The page number of the results that should be returned.
#' @param per_page The number of records that should be returned per page. This
#' value can be as high as 10,000 (e.g., \code{per_page = 10000}).
#' value can be as high as 1,000 (e.g., \code{per_page = 1000}).
#' @param all_pages Do you want to download all possible pages of output? If
#' \code{all_pages = TRUE}, the values of \code{page} and \code{per_page} are
#' ignored.
Expand Down Expand Up @@ -168,14 +170,13 @@ request_apply <- function(ex_res, method, query, base_url, arg_list, ...) {
#' }
#'
#' @examples
#'
#' \dontrun{
#'
#' search_pv(query = '{"_gt":{"patent_year":2010}}')
#'
#' search_pv(
#' query = qry_funs$gt(patent_year = 2010),
#' fields = get_fields("patents", c("patents", "assignees"))
#' fields = get_fields("patents", c("patents", "assignees_at_grant"))
#' )
#'
#' search_pv(
Expand All @@ -186,17 +187,19 @@ request_apply <- function(ex_res, method, query, base_url, arg_list, ...) {
#' )
#'
#' search_pv(
#' query = qry_funs$eq(inventor_last_name = "crew"),
#' query = qry_funs$eq(name_last = "crew"),
#' endpoint = "inventors",
#' all_pages = TRUE
#' )
#'
#' search_pv(
#' query = qry_funs$contains(inventor_last_name = "smith"),
#' query = qry_funs$contains(name_last = "smith"),
#' endpoint = "assignees"
#' )
#'
#' search_pv(
#' query = qry_funs$contains(inventor_last_name = "smith"),
#' query = qry_funs$contains(inventors_at_grant.name_last = "smith"),
#' endpoint = "patents",
#' config = httr::timeout(40)
#' )
#' }
Expand All @@ -214,7 +217,6 @@ search_pv <- function(query,
method = "GET",
error_browser = NULL,
...) {

if (!is.null(error_browser))
warning("error_browser parameter has been deprecated")

Expand Down Expand Up @@ -245,3 +247,14 @@ search_pv <- function(query,

res
}

#' @noRd
pview_key <- function() {
api_key <- Sys.getenv("PATENTSVIEW_API_KEY")
if (identical(api_key, "")) {
stop("Please set env var PATENTSVIEW_API_KEY to your patentsview api key",
call. = FALSE
)
}
api_key
}
Binary file modified R/sysdata.rda
Binary file not shown.
45 changes: 45 additions & 0 deletions R/test-helpers.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@

# I tried having this filename as helper-queries.R but the ubuntu-20.04 job failed
# while the other three jobs worked. Serveral tests use get_test_query()
# Feels a bit awkward...

query_for_endpoint <- c(
"application_citations" = '{"patent_number": "10966293"}', # still searchable by pn
"assignees" = '{"_text_phrase":{"name_last": "Clinton"}}',
"cpc_groups" = '{"cpc_group_id": "A01B"}',
"cpc_subgroups" = '{"cpc_subgroup_id": "A01B1/00"}',
"cpc_subsections" = '{"cpc_subsection_id": "A01"}',
"inventors" = '{"_text_phrase":{"name_last":"Clinton"}}',
"locations" = NA,
"nber_categories" = '{"nber_category_id": "1"}',
"nber_subcategories" = '{"nber_subcategory_id": "11"}',
"patents" = '{"patent_number":"5116621"}', # still searchable by pn
"patent_citations" = '{"patent_number":"5116621"}', # still searchable by pn
"uspc_mainclasses" = '{"uspc_mainclass_id":"30"}',
"uspc_subclasses" = '{"uspc_subclass_id": "100/1"}'
)

#' Get Test Query
#'
#' In the new version of the api, only three of the endpoints are searchable
#' by patent number. This function provides a sample query for each
#' endpoint, except for locations, which isn't on the test server yet
#'
#' @param endpoint The web service resource you want a test query for. \code{endpoint}
#' must be one of the following: "patents", "inventors", "assignees",
#' "locations", "cpc_groups", "cpc_subgroups", "cpc_subsections", "uspc_mainclasses",
#' "uspc_subclasses","nber_categories", "nber_subcategories", "application_citations",
#' or "patent_citations"
#'
#' @return a test query for the specified endpoint.
#'
#' @examples
#' \dontrun{
#'
#' get_test_query("patents")
#' }
#'
#' @export
get_test_query <- function(endpoint) {
ifelse(endpoint %in% names(query_for_endpoint), query_for_endpoint[[endpoint]], NA)
}
Loading

0 comments on commit 95fae4c

Please sign in to comment.