Skip to content

Commit

Permalink
Merge branch 'master' into b-pandoc-convert-abs-path
Browse files Browse the repository at this point in the history
  • Loading branch information
krlmlr committed Sep 25, 2020
2 parents f318f74 + a68b5e5 commit 3206600
Show file tree
Hide file tree
Showing 21 changed files with 284 additions and 58 deletions.
19 changes: 19 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,24 @@
# downlit (development version)

* Autolinking can use metadata stored in package itself with pkgdown setting
`deploy.install_metadata`; this is useful for packages that only have
private websites (@matthewstrasiotto, #29)

* Autolinking guesses reference and article urls for pkgdown sites that haven't
set url (@krlmlr, #44).

* R6 classes are autolinked when a new object is created i.e. in
`r6_object$new()`, `r6_object` will link to its docs (#59, @maelle).

* R6 methods are no longer autolinked as if they were functions of the same
name (#54, @maelle).

* `classes_pandoc()` and `classes_chroma()` have been thoroughly revieweed to
produce syntax highlighting as similar as possible to RStudio.

* `downlit_html_path()` has a more flexible XPath identifying R code blocks,
and a `classes` argument (#53, @maelle, @cderv)

* Trailing `/` are no longer stripped from URLs (#45, @krlmlr).

* Removed extra newline in `<pre>` output (#42, @krlmlr).
Expand Down
33 changes: 28 additions & 5 deletions R/downlit-html.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#' larger pipeline.
#'
#' @param in_path,out_path Input and output paths for HTML file
#' @inheritParams highlight
#' @param x An `xml2::xml_node`
#' @return `downlit_html_path()` invisibly returns `output_path`;
#' `downlit_html_node()` modifies `x` in place and returns nothing.
Expand All @@ -24,28 +25,30 @@
#' # node is modified in place
#' downlit_html_node(node)
#' node
downlit_html_path <- function(in_path, out_path) {
downlit_html_path <- function(in_path, out_path, classes = classes_pandoc()) {
if (!is_installed("xml2")) {
abort("xml2 package required .html transformation")
}

html <- xml2::read_html(in_path, encoding = "UTF-8")
downlit_html_node(html)
downlit_html_node(html, classes = classes)
xml2::write_html(html, out_path, format = FALSE)

invisible(out_path)
}

#' @export
#' @rdname downlit_html_path
downlit_html_node <- function(x) {
downlit_html_node <- function(x, classes = classes_pandoc()) {
stopifnot(inherits(x, "xml_node"))

# <pre class="sourceCode r">
xpath_block <- ".//pre[contains(@class, 'sourceCode r')]"
# and <pre class="r"> which is needed when knitting a bookdown gitbook
# where highlight is set to NULL
xpath_block <- ".//pre[contains(@class, 'sourceCode r')] | .//pre[@class='r']"
tweak_children(x, xpath_block, highlight,
pre_class = "downlit",
classes = classes_pandoc(),
classes = classes,
replace = "node"
)

Expand All @@ -54,6 +57,11 @@ downlit_html_node <- function(x) {
bad_ancestor <- c("h1", "h2", "h3", "h4", "h5", "a")
bad_ancestor <- paste0("ancestor::", bad_ancestor, collapse = "|")
xpath_inline <- paste0(".//code[count(*) = 0 and not(", bad_ancestor, ")]")

# replace inline code "{packagename}" with linked text if possible
tweak_children(x, xpath_inline, autolink_curly, replace = "node")

# handle remaining inline code
tweak_children(x, xpath_inline, autolink, replace = "contents")

invisible()
Expand All @@ -78,6 +86,21 @@ tweak_children <- function(node, xpath, fun, ..., replace = c("node", "contents"
invisible()
}

autolink_curly <- function(text) {
package_name <- extract_curly_package(text)
if (is.na(package_name)) {
return(NA_character_)
}

href <- href_package(package_name)
if (is.na(href)) {
return(NA_character_)
}

paste0("<a href='", href, "'>", package_name, "</a>")
}


as_xml <- function(x) {
xml2::xml_contents(xml2::xml_contents(xml2::read_html(x)))[[1]]
}
19 changes: 16 additions & 3 deletions R/downlit-md.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@
#' @return `downlit_md_path()` invisibly returns `output_path`;
#' `downlit_md_string()` returns a string containing markdown.
#' @examples
#' if (rmarkdown::pandoc_available("1.19")) {
#' downlit_md_string("`base::t()`")
#' downlit_md_string("`base::t`")
#' downlit_md_string("* `base::t`")
#'
#' # But don't highlight in headings
#' downlit_md_string("## `base::t`")
#' }
downlit_md_path <- function(in_path, out_path, format = NULL) {
check_packages()

Expand Down Expand Up @@ -131,9 +133,20 @@ transform_code <- function(x, version) {
lapply(x, transform_code, version = version)
} else {
if (x$t == "Code") {
href <- autolink_url(x$c[[2]])
if (!is.na(href)) {
x <- pandoc_link(pandoc_attr(), list(x), pandoc_target(href))
package_name <- extract_curly_package(x$c[[2]])
# packages à la {pkgname}
if(!is.na(package_name)) {
href <- href_package(package_name)
if (!is.na(href)) {
x <- list(t = "Str", c = package_name)
x <- pandoc_link(pandoc_attr(), list(x), pandoc_target(href))
} # otherwise we do not touch x
} else {
# other cases
href <- autolink_url(x$c[[2]])
if (!is.na(href)) {
x <- pandoc_link(pandoc_attr(), list(x), pandoc_target(href))
}
}
} else if (x$t == "CodeBlock") {
out <- highlight(x$c[[2]], pre_class = "chroma")
Expand Down
107 changes: 86 additions & 21 deletions R/highlight.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,42 +119,79 @@ token_class <- function(token, text, classes) {
# for syntax highlighting
# https://github.com/wch/r-source/blob/trunk/src/main/gram.c#L511
token_type <- function(x, text) {
special <- c("IF", "ELSE", "REPEAT", "WHILE", "FOR", "IN", "NEXT", "BREAK")
infix <- c(
"'-'", "'+'", "'!'", "'~'", "'?'", "':'", "'*'", "'/'", "'^'", "'~'",
"SPECIAL", "LT", "GT", "EQ", "GE", "LE", "AND", "AND2", "OR",
"OR2", "LEFT_ASSIGN", "RIGHT_ASSIGN", "'$'", "'@'", "EQ_ASSIGN"
special <- c(
"FUNCTION",
"FOR", "IN", "BREAK", "NEXT", "REPEAT", "WHILE",
"IF", "ELSE"
)
rstudio_special <- c(
"return", "switch", "try", "tryCatch", "stop",
"warning", "require", "library", "attach", "detach",
"source", "setMethod", "setGeneric", "setGroupGeneric",
"setClass", "setRefClass", "R6Class", "UseMethod", "NextMethod"
)

x[x %in% special] <- "special"
x[x == "SYMBOL_FUNCTION_CALL" & text %in% rstudio_special] <- "special"

infix <- c(
# algebra
"'-'", "'+'", "'~'", "'*'", "'/'", "'^'",
# comparison
"LT", "GT", "EQ", "GE", "LE", "NE",
# logical
"'!'", "AND", "AND2", "OR", "OR2",
# assignment / equals
"LEFT_ASSIGN", "RIGHT_ASSIGN", "EQ_ASSIGN", "EQ_FORMALS", "EQ_SUB",
# miscellaneous
"'$'", "'@'","'~'", "'?'", "':'", "SPECIAL"
)
x[x %in% infix] <- "infix"

x[x == "NUM_CONST" & text %in% c("TRUE", "FALSE")] <- "logical"
parens <- c("LBB", "'['", "']'", "'('", "')'", "'{'", "'}'")
x[x %in% parens] <- "parens"

# Matches treatment of constants in RStudio
constant <- c(
"NA", "Inf", "NaN", "TRUE", "FALSE",
"NA_integer_", "NA_real_", "NA_character_", "NA_complex_"
)
x[x == "NUM_CONST" & text %in% constant] <- "constant"
x[x == "SYMBOL" & text %in% c("T", "F")] <- "constant"
x[x == "NULL_CONST"] <- "constant"

x
}

# Pandoc styles are based on KDE default styles:
# https://docs.kde.org/stable5/en/applications/katepart/highlight.html#kate-highlight-default-styles
# But are given a two letter abbreviations (presumably to reduce generated html size)
# But in HTML use two letter abbreviations:
# https://github.com/jgm/skylighting/blob/a1d02a0db6260c73aaf04aae2e6e18b569caacdc/skylighting-core/src/Skylighting/Format/HTML.hs#L117-L147
# Summary at
# https://docs.google.com/spreadsheets/d/1JhBtQSCtQ2eu2RepLTJONFdLEnhM3asUyMMLYE3tdYk/edit#gid=0
#
# Default syntax highlighting def for R:
# https://github.com/KDE/syntax-highlighting/blob/master/data/syntax/r.xml
#' @export
#' @rdname highlight
classes_pandoc <- function() {
c(
"logical" = "fl",
"constant" = "cn",
"NUM_CONST" = "fl",
"STR_CONST" = "st",
"NULL_CONST" = "kw",
"FUNCTION" = "fu",
"special" = "co",

"special" = "kw",
"parens" = "op",
"infix" = "op",
"SYMBOL" = "kw",

"SLOT" = "va",
"SYMBOL" = "va",
"SYMBOL_FORMALS" = "va",

"NS_GET" = "fu",
"NS_GET_INT" = "fu",
"SYMBOL_FUNCTION_CALL" = "fu",
"SYMBOL_PACKAGE" = "kw",
"SYMBOL_FORMALS" = "kw",
"SYMBOL_PACKAGE" = "fu",

"COMMENT" = "co"
)
}
Expand All @@ -164,21 +201,38 @@ classes_pandoc <- function() {
#' @rdname highlight
classes_chroma <- function() {
c(
"logical" = "kc",
"constant" = "kc",
"NUM_CONST" = "m",
"STR_CONST" = "s",
"NULL_CONST" = "l",
"FUNCTION" = "nf",

"special" = "kr",
"parens" = "o",
"infix" = "o",
"SYMBOL" = "k",

"SLOT" = "nv",
"SYMBOL" = "nv",
"SYMBOL_FORMALS" = "nv",

"NS_GET" = "nf",
"NS_GET_INT" = "nf",
"SYMBOL_FUNCTION_CALL" = "nf",
"SYMBOL_PACKAGE" = "k",
"SYMBOL_FORMALS" = "k",
"SYMBOL_PACKAGE" = "nf",

"COMMENT" = "c"
)
}

classes_show <- function(x, classes = classes_pandoc()) {
text <- paste0(deparse(substitute(x)), collapse = "\n")
out <- parse_data(text)$data
out$class <- token_class(out$token, out$text, classes)
out$class[is.na(out$class)] <- ""

out <- out[out$terminal, c("token", "text", "class")]
rownames(out) <- NULL
out
}

# Linking -----------------------------------------------------------------

token_href <- function(token, text) {
Expand All @@ -196,6 +250,17 @@ token_href <- function(token, text) {
# earlier library() statements to affect the highlighting of later blocks
fun <- which(token %in% "SYMBOL_FUNCTION_CALL")
fun <- setdiff(fun, ns_fun)
fun <- fun[token[fun-1] != "'$'"]

# Highlight R6 instantiation
r6_new_call <- which(
text == "new" & token == "SYMBOL_FUNCTION_CALL"
)
r6_new_call <- r6_new_call[token[r6_new_call - 1] == "'$'"]
r6_new_call <- r6_new_call[token[r6_new_call - 3] == "SYMBOL"]

fun <- c(fun, r6_new_call - 3)

href[fun] <- map_chr(text[fun], href_topic_local)

# Highlight packages
Expand Down
5 changes: 4 additions & 1 deletion R/link.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,14 @@ href_expr <- function(expr) {

if (fun_name %in% c("library", "require", "requireNamespace")) {
if (length(expr) == 1) {
return(NA_character_)
return(href_topic(fun_name))
}
pkg <- as.character(expr[[2]])
href_package(pkg)
} else if (fun_name == "vignette") {
if (length(expr) == 1) {
return(href_topic(fun_name))
}
expr <- call_standardise(expr)
topic_ok <- is.character(expr$topic)
package_ok <- is.character(expr$package) || is.null(expr$package)
Expand Down
23 changes: 23 additions & 0 deletions R/metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@ remote_package_article_url <- function(package) {
# Retrieve remote metadata ------------------------------------------------

remote_metadata <- function(package) {
# Is the metadata installed with the package?
meta <- local_metadata(package)
if (!is.null(meta)) {
return(meta)
}

# Otherwise, look in package websites, caching since this is a slow operation
tempdir <- Sys.getenv("RMARKDOWN_PREVIEW_DIR", unset = tempdir())
dir.create(file.path(tempdir, "downlit"), showWarnings = FALSE)
cache_path <- file.path(tempdir, "downlit", package)
Expand All @@ -34,6 +41,15 @@ remote_metadata <- function(package) {
}
}

local_metadata <- function(package) {
local_path <- system.file("pkgdown.yml", package = package)
if (local_path == "") {
NULL
} else {
yaml::read_yaml(local_path)
}
}

remote_metadata_slow <- function(package) {
urls <- package_urls(package)

Expand All @@ -44,6 +60,13 @@ remote_metadata_slow <- function(package) {
if (has_name(yaml, "articles")) {
yaml$articles <- unlist(yaml$articles)
}
if (!has_name(yaml, "urls")) {
base_url <- dirname(url)
yaml$urls <- list(
reference = paste0(base_url, "/reference"),
article = paste0(base_url, "/articles")
)
}
return(yaml)
}
}
Expand Down
16 changes: 16 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,19 @@ safe_parse <- function(text) {
error = function(e) NULL
)
}


extract_curly_package <- function(x) {
# regex adapted from https://github.com/r-lib/usethis/blob/d5857737b4780c3c3d8fe6fb44ef70e81796ac8e/R/description.R#L134
if (! grepl("^\\{[a-zA-Z][a-zA-Z0-9.]+\\}$", x)) {
return(NA)
}

# remove first curly brace
x <- sub("\\{", "", x)
# remove second curly brace and return
x <- sub("\\}", "", x)

x
}

Loading

0 comments on commit 3206600

Please sign in to comment.