ropensci · Aug 19, 2022 · Aug 21, 2022 · Aug 21, 2022 · Aug 21, 2022 · Aug 21, 2022
diff --git a/.gitattributes b/.gitattributes
@@ -5,3 +5,4 @@ src/* text=lf
 R/* text=lf
 docs/* linguist-documentation=true
 inst/* linguist-documentation=true
+man/* linguist-documentation=true
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -1,31 +1,65 @@
 # For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
 # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
-on: [push, pull_request]
+
+# Details on pull_request_target and why it's insecure:
+# https://securitylab.github.com/resources/github-actions-preventing-pwn-requests/
+# Post describing a workaround, from which we take inspiration:
+# https://michaelheap.com/access-secrets-from-forks/
 
 name: R-CMD-check
 
+on:
+  push:
+    branches:
+      - master
+      - 'feature/**'
+      - 'bugfix/**'
+  pull_request_target:
+    types: [opened, synchronize]
+
 jobs:
+  pre-check:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Confirm crew102 triggered the build
+        run: |
+          if [ "${{ github.event.sender.login }}" == "crew102" ]; then
+            echo "Actor is crew102"
+          else
+            echo "Actor is ${{ github.actor }}, failing build."
+            exit 1
+          fi
+
   R-CMD-check:
+    needs: [pre-check]
     runs-on: ${{ matrix.config.os }}
 
     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
 
     strategy:
+      # Run sequentially so that we don't run into rate limit errors that our
+      # code would normally work around via retry logic
+      max-parallel: 1
       fail-fast: false
       matrix:
         config:
           - {os: windows-latest, r: 'release'}
           - {os: macOS-latest, r: 'release'}
           - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
-          - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
+          # - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
 
     env:
       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
       RSPM: ${{ matrix.config.rspm }}
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+      PATENTSVIEW_API_KEY: ${{ secrets.PATENTSVIEW_API_KEY }}
 
     steps:
-      - uses: actions/checkout@v2
+      - name: Checkout code
+        uses: actions/checkout@v3
+        with:
+          # Use the head SHA for pull requests
+          ref: ${{ github.event.pull_request.head.sha || github.sha }}
 
       - uses: r-lib/actions/setup-r@v1
         with:
@@ -71,6 +105,15 @@ jobs:
           rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
         shell: Rscript {0}
 
+      - name: Run examples
+        env:
+          _R_CHECK_CRAN_INCOMING_REMOTE_: false
+        run: |
+          options(crayon.enabled = TRUE)
+          remotes::install_cran("devtools")
+          devtools::run_examples(run_dontrun = TRUE)
+        shell: Rscript {0}
+
       - name: Upload check results
         if: failure()
         uses: actions/upload-artifact@main

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,12 +2,15 @@ Package: patentsview
 Type: Package
 Title: An R Client to the 'PatentsView' API
 Version: 0.3.0
-Authors@R: person("Christopher", "Baker", email = "chriscrewbaker@gmail.com",
-    role = c("aut", "cre"))
+Authors@R: c(
+      person("Christopher", "Baker", email = "chriscrewbaker@gmail.com", 
+        role = c("aut", "cre")),
+      person("Russ", "Allen", email = "rrjallen@yahoo.com", role = "aut")
+    )
 Encoding: UTF-8
 Description: Provides functions to simplify the 'PatentsView' API
     (<https://patentsview.org/apis/purpose>) query language,
-    send GET and POST requests to the API's seven endpoints, and parse the data
+    send GET and POST requests to the API's twenty seven endpoints, and parse the data
     that comes back.
 URL: https://docs.ropensci.org/patentsview/index.html
 BugReports: https://github.com/ropensci/patentsview/issues
@@ -17,11 +20,13 @@ Depends:
     R (>= 3.1)
 Imports:
     httr,
+    lifecycle,
     jsonlite,
     utils
 Suggests:
     knitr,
     rmarkdown,
     testthat,
     tidyr
-RoxygenNote: 7.1.1
+RoxygenNote: 7.3.2
+Roxygen: list(markdown = TRUE)
diff --git a/NAMESPACE b/NAMESPACE
@@ -11,6 +11,8 @@ export(get_endpoints)
 export(get_fields)
 export(get_ok_pk)
 export(qry_funs)
+export(retrieve_linked_data)
 export(search_pv)
 export(unnest_pv_data)
 export(with_qfuns)
+importFrom(lifecycle,deprecated)
diff --git a/R/data.R b/R/data.R
@@ -1,22 +1,18 @@
 #' Fields data frame
 #'
-#' A data frame containing the names of retrievable and queryable fields for
-#' each of the 7 API endpoints. A yes/no flag (\code{can_query}) indicates
-#' which fields can be included in the user's query. You can also find this
-#' data on the API's online documentation for each endpoint as well (e.g.,
-#' the \href{https://patentsview.org/apis/api-endpoints/patents}{patents
-#' endpoint field list table})
+#' A data frame containing the names of retrievable fields for each of the
+#' endpoints. You can find this data on the API's online documentation for each
+#' endpoint as well (e.g., the
+#' \href{https://patentsview.org/apis/api-endpoints/patents}{patents endpoint
+#' field list table}).
 #'
-#' @format A data frame with 992 rows and 7 variables:
+#' @format A data frame with the following columns:
 #' \describe{
 #'   \item{endpoint}{The endpoint that this field record is for}
-#'   \item{field}{The name of the field}
-#'   \item{data_type}{The field's data type (string, date, float, integer,
-#'     fulltext)}
-#'   \item{can_query}{An indicator for whether the field can be included in
-#'     the user query for the given endpoint}
+#'   \item{field}{The complete name of the field, including the parent group if
+#'   applicable}
+#'   \item{data_type}{The field's input data type}
 #'   \item{group}{The group the field belongs to}
-#'   \item{common_name}{The field's common name}
-#'   \item{description}{A description of the field}
+#'   \item{common_name}{The field name without the parent group structure}
 #' }
 "fieldsdf"
diff --git a/R/get-fields.R b/R/get-fields.R
@@ -7,35 +7,35 @@
 #' possible fields for each endpoint).
 #'
 #' @param endpoint The API endpoint whose field list you want to get. See
-#'   \code{\link{get_endpoints}} for a list of the 7 endpoints.
+#'   \code{\link{get_endpoints}} for a list of the 27 endpoints.
 #' @param groups A character vector giving the group(s) whose fields you want
 #'   returned. A value of \code{NULL} indicates that you want all of the
 #'   endpoint's fields (i.e., do not filter the field list based on group
 #'   membership). See the field tables located online to see which groups you
 #'   can specify for a given endpoint (e.g., the
-#'   \href{https://patentsview.org/apis/api-endpoints/patents}{patents
+#'   \href{https://search.patentsview.org/docs/docs/Search%20API/SearchAPIReference/#patent}{patent
 #'   endpoint table}), or use the \code{fieldsdf} table
-#'   (e.g., \code{unique(fieldsdf[fieldsdf$endpoint == "patents", "group"])}).
+#'   (e.g., \code{unique(fieldsdf[fieldsdf$endpoint == "patent", "group"])}).
 #'
 #' @return A character vector with field names.
 #'
 #' @examples
-#' # Get all assignee-level fields for the patents endpoint:
-#' fields <- get_fields(endpoint = "patents", groups = "assignees")
+#' # Get all assignee-level fields for the patent endpoint:
+#' fields <- get_fields(endpoint = "patent", groups = "assignees")
 #'
-#' #...Then pass to search_pv:
+#' # ...Then pass to search_pv:
 #' \dontrun{
 #'
 #' search_pv(
 #'   query = '{"_gte":{"patent_date":"2007-01-04"}}',
 #'   fields = fields
 #' )
-#'}
-#' # Get all patent and assignee-level fields for the patents endpoint:
-#' fields <- get_fields(endpoint = "patents", groups = c("assignees", "patents"))
+#' }
+#' # Get all patent and assignee-level fields for the patent endpoint:
+#' fields <- get_fields(endpoint = "patent", groups = c("assignees", "patents"))
 #'
 #' \dontrun{
-#' #...Then pass to search_pv:
+#' # ...Then pass to search_pv:
 #' search_pv(
 #'   query = '{"_gte":{"patent_date":"2007-01-04"}}',
 #'   fields = fields
@@ -48,34 +48,18 @@ get_fields <- function(endpoint, groups = NULL) {
   if (is.null(groups)) {
     fieldsdf[fieldsdf$endpoint == endpoint, "field"]
   } else {
-    validate_groups(groups = groups)
+    validate_groups(endpoint, groups = groups)
     fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$group %in% groups, "field"]
   }
 }
 
 #' Get endpoints
 #'
-#' This function reminds the user what the 7 possible PatentsView API endpoints
+#' This function reminds the user what the possible PatentsView API endpoints
 #' are.
 #'
-#' @return A character vector with the names of the 7 endpoints. Those endpoints are:
-#'
-#' \itemize{
-#'    \item assignees
-#'    \item cpc_subsections
-#'    \item inventors
-#'    \item locations
-#'    \item nber_subcategories
-#'    \item patents
-#'    \item uspc_mainclasses
-#'  }
-#'
-#' @examples
-#' get_endpoints()
+#' @return A character vector with the names of each endpoint.
 #' @export
 get_endpoints <- function() {
-  c(
-    "assignees", "cpc_subsections", "inventors", "locations",
-    "nber_subcategories", "patents", "uspc_mainclasses"
-  )
+  unique(fieldsdf$endpoint)
 }
diff --git a/R/patentsview-package.R b/R/patentsview-package.R
@@ -0,0 +1,7 @@
+#' @keywords internal
+"_PACKAGE"
+
+## usethis namespace: start
+#' @importFrom lifecycle deprecated
+## usethis namespace: end
+NULL
diff --git a/R/print.R b/R/print.R
@@ -15,18 +15,11 @@ print.pv_data_result <- function(x, ...) {
 
   k <- vapply(names(df), function(y) class(df[, y]), FUN.VALUE = character(1))
 
-  dat_level <- c(
-    patents = "a patent", inventors = "an inventor",
-    assignees = "an assignee", locations = "a location",
-    cpc_subsections = "a CPC subsection", uspc_mainclasses = "a USPC main class",
-    nber_subcategories = "a NBER subcategory"
-  )
-
   lst <- ifelse("list" %in% k, " (with list column(s) inside) ", " ")
 
   cat(
     "#### A list with a single data frame", lst, "on ",
-    dat_level[[names(x)[1]]], " level:\n\n",
+    names(x)[1], " level:\n\n",
     sep = ""
   )
 

diff --git a/R/process-error.R b/R/process-error.R
@@ -11,18 +11,19 @@ throw_if_loc_error <- function(resp) {
     if (num_grps > 2) {
       stop2(
         "Your request resulted in a 500 error, likely because you have ",
-        "requested too many fields in your request (the locations endpoint ",
+        "requested too many fields in your request (the location endpoint ",
         "currently has restrictions on the number of fields/groups you can ",
         "request). Try slimming down your field list and trying again."
       )
     }
   }
 }
 
+# Not sure this is still applicable
 #' @noRd
 hit_locations_ep <- function(url) {
   grepl(
-    "^https://api.patentsview.org/locations/",
+    "^https://search.patentsview.org/api/v1/location/",
     url,
     ignore.case = TRUE
   )
@@ -32,7 +33,7 @@ hit_locations_ep <- function(url) {
 get_num_groups <- function(url) {
   prsd_json_filds <- gsub(".*&f=([^&]*).*", "\\1", utils::URLdecode(url))
   fields <- jsonlite::fromJSON(prsd_json_filds)
-  grps <- fieldsdf[fieldsdf$endpoint == "locations" &
+  grps <- fieldsdf[fieldsdf$endpoint == "location" &
                      fieldsdf$field %in% fields, "group"]
   length(unique(grps))
 }
@@ -52,5 +53,5 @@ xheader_er_or_status <- function(resp) {
 #' @noRd
 get_x_status <- function(resp) {
   headers <- httr::headers(resp)
-  headers[grepl("x-status-reason", names(headers), ignore.case = TRUE)]
+  headers[grepl("x-status-reason$", names(headers), ignore.case = TRUE)]
 }
diff --git a/R/process-resp.R b/R/process-resp.R
@@ -1,37 +1,50 @@
+#' @noRd
+parse_resp <- function(resp) {
+  j <- httr::content(resp, as = "text", encoding = "UTF-8")
+  jsonlite::fromJSON(
+    j,
+    simplifyVector = TRUE, simplifyDataFrame = TRUE, simplifyMatrix = TRUE
+  )
+}
+
 #' @noRd
 get_request <- function(resp) {
   gp <- structure(
     list(method = resp$req$method, url = resp$req$url),
     class = c("list", "pv_request")
   )
 
-  if (gp$method == "POST")
+  if (gp$method == "POST") {
     gp$body <- rawToChar(resp$req$options$postfields)
+  }
 
   gp
 }
 
 #' @noRd
 get_data <- function(prsd_resp) {
   structure(
-    list(prsd_resp[[1]]),
-    names = names(prsd_resp[1]),
+    list(prsd_resp[[4]]),
+    names = names(prsd_resp[4]),
     class = c("list", "pv_data_result")
   )
 }
 
 #' @noRd
+# There used to be an endpoint specific _count ex total_assignee_count
+# Now all endpoints return a total_hits attribute
 get_query_results <- function(prsd_resp) {
   structure(
-    prsd_resp[grepl("_count", names(prsd_resp))],
+    prsd_resp["total_hits"],
     class = c("list", "pv_query_result")
   )
 }
 
 #' @noRd
 process_resp <- function(resp) {
-  prsd_resp <- parse_resp(resp)
+  if (httr::http_error(resp)) throw_er(resp)
 
+  prsd_resp <- parse_resp(resp)
   request <- get_request(resp)
   data <- get_data(prsd_resp)
   query_results <- get_query_results(prsd_resp)

diff --git a/R/query-dsl.R b/R/query-dsl.R
@@ -1,4 +1,4 @@
-# Design adapated from http://adv-r.had.co.nz/dsl.html
+# Design adapted from http://adv-r.had.co.nz/dsl.html
 
 #' @noRd
 lapply2 <- function(...) sapply(..., USE.NAMES = TRUE, simplify = FALSE)