Skip to content

Commit

Permalink
Refactor retrieve_linked_data
Browse files Browse the repository at this point in the history
  • Loading branch information
crew102 committed Sep 18, 2022
1 parent e66504d commit 87e6a3f
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 45 deletions.
27 changes: 13 additions & 14 deletions R/search-pv.R
Original file line number Diff line number Diff line change
Expand Up @@ -304,15 +304,14 @@ search_pv <- function(query,

#' Get Linked Data
#'
#' Some of the endpoints now returns HATEOAS style links to get more data.
#' ex "inventor": "https://search.patentsview.org/api/v1/inventor/252373/"
#'
#' @param url The link that was returned by the API on a previous call
#' Some of the endpoints now return HATEOAS style links to get more data. E.g.,
#' the inventors endpoint may return a link such as:
#' "https://search.patentsview.org/api/v1/inventor/252373/"
#'
#' @param api_key API key. See \href{https://patentsview.org/apis/keyrequest}{
#' Here} for info on creating a key.
#' @param url The link that was returned by the API on a previous call.
#'
#' @return A character vector with field names, same as search_pv()
#' @inherit search_pv return
#' @inheritParams search_pv
#'
#' @examples
#' \dontrun{
Expand All @@ -324,18 +323,18 @@ search_pv <- function(query,
#'
#' @export
retrieve_linked_data <- function(url,
api_key = Sys.getenv("PATENTSVIEW_API_KEY")
api_key = Sys.getenv("PATENTSVIEW_API_KEY"),
...
) {

# Don't sent the API key to any domain other than patentsview.org
if (!grepl("^https://[^/]*\\.*patentsview.org/", url)) {
stop2("retrieve_linked_data is only for patentsview.org urls - sends API key")
if (!grepl("^https://[^/]*\\.patentsview.org/", url)) {
stop2("retrieve_linked_data is only for patentsview.org urls")
}

# Go through one_request, it would resend on 429 too many requests
# Go through one_request, which handles resend on throttle errors
# The API doesn't seeem to mind ?q=&f=&o=&s= appended to the url
res = one_request("GET", "", url, list(), api_key)
res <- process_resp(res)
res
res <- one_request("GET", "", url, list(), api_key, ...)
process_resp(res)
}

30 changes: 25 additions & 5 deletions man/retrieve_linked_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 7 additions & 26 deletions tests/testthat/test-search-pv.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,24 +117,16 @@ test_that("Throttled requests are automatically retried", {

test_that("We won't expose the user's patentsview API key to random websites", {
skip_on_cran()
skip_on_ci()

# We will try to call the api that tells us who is currently in space
in_space_now_url <- "http://api.open-notify.org/astros.json"

expect_error(
retrieve_linked_data(in_space_now_url)
)
expect_error(retrieve_linked_data(in_space_now_url))
})


test_that("We can call all the legitimate HATEOAS endpoints", {
skip_on_cran()
skip_on_ci()

# Call the new, Get only endpoints that don't accept q: s:, o:, f:, parameters
# The links are returned fully qualified, like below, from some of the endpoints
# These queries retrieve one specific row
single_item_queries <- c(
"https://search.patentsview.org/api/v1/assignee/10/",
"https://search.patentsview.org/api/v1/cpc_group/A01B/",
Expand All @@ -147,28 +139,17 @@ test_that("We can call all the legitimate HATEOAS endpoints", {
"https://search.patentsview.org/api/v1/uspc_mainclass/30/",
"https://search.patentsview.org/api/v1/uspc_subclass/30:100/"
)
dev_null <- lapply(single_item_queries, function(q) {
j <- retrieve_linked_data(q)
expect_equal(j$query_results$total_hits, 1)
})

# These queries can return more than a single row
multi_item_queries <- c(
"https://search.patentsview.org/api/v1/application_citation/10966293/",
"https://search.patentsview.org/api/v1/patent_citation/10966293/"
)

x <- lapply(single_item_queries, function(q) {
Sys.sleep(2)
print(q)
dev_null <- lapply(multi_item_queries, function(q) {
j <- retrieve_linked_data(q)

# here all the total hits should be 1
expect_equal(j$query_results$total_hits, 1)
})

x <- lapply(multi_item_queries, function(q) {
Sys.sleep(2)
print(q)
j <- retrieve_linked_data(q)

# here all the total hits should be 1 or more rows
expect_true(j$query_results$total_hits >= 1)
expect_true(j$query_results$total_hits > 1)
})
})

0 comments on commit 87e6a3f

Please sign in to comment.