Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lints to ensure link text for EIPs should match the EIP's number #99

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions eipw-lint/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -419,12 +419,24 @@ pub fn default_lints_enum() -> impl Iterator<Item = (&'static str, DefaultLint<&
message: "proposals must be referenced with the form `EIP-N` (not `EIPN` or `EIP N`)",
}),
),
(
"markdown-link-eip",
MarkdownLinkEip {
pattern: markdown::LinkEip(r"eip-([^.]*)\.md(#.+)?$")
}
),
(
"markdown-link-first",
MarkdownLinkFirst {
pattern: markdown::LinkFirst(r"(?i)(?:eip|erc)-[0-9]+"),
}
),
(
"markdown-link-other",
MarkdownLinkOther {
pattern: markdown::LinkOther(r"^(EIP|ERC)-(\d+)\s*\S*$")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be case insensitive ((?i))?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure about it. EIP-1 says the text should be like EIP-N. Does it mean upper case or not? I can add case insensitive if it is so. Then eip-25 will work for links. Is it ok or it must be EIP-25 (upper case only)?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we have the other lint to check for incorrectly cased references. So even if [eip-1](./eip-1.md) passes markdown-link-other, it'll fail the other lint.

Or at least I think it will 🤔

}
),
("markdown-rel-links", MarkdownRelativeLinks(markdown::RelativeLinks {
exceptions: vec![
"^https://(www\\.)?github\\.com/ethereum/consensus-specs/blob/[a-f0-9]{40}/.+$",
Expand Down
18 changes: 17 additions & 1 deletion eipw-lint/src/lints/known_lints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,15 @@ pub enum DefaultLint<S> {

MarkdownHtmlComments(markdown::HtmlComments<S>),
MarkdownJsonSchema(markdown::JsonSchema<S>),
MarkdownLinkEip {
pattern: markdown::LinkEip<S>
},
MarkdownLinkFirst {
pattern: markdown::LinkFirst<S>,
},
MarkdownLinkOther {
pattern: markdown::LinkOther<S>
},
MarkdownLinkStatus(markdown::LinkStatus<S>),
MarkdownProposalRef(markdown::ProposalRef<S>),
MarkdownRegex(markdown::Regex<S>),
Expand Down Expand Up @@ -102,6 +108,8 @@ where

Self::MarkdownHtmlComments(l) => Box::new(l),
Self::MarkdownJsonSchema(l) => Box::new(l),
Self::MarkdownLinkEip { pattern } => Box::new(pattern),
Self::MarkdownLinkOther { pattern } => Box::new(pattern),
Self::MarkdownLinkFirst { pattern } => Box::new(pattern),
Self::MarkdownLinkStatus(l) => Box::new(l),
Self::MarkdownProposalRef(l) => Box::new(l),
Expand Down Expand Up @@ -141,7 +149,9 @@ where

Self::MarkdownHtmlComments(l) => l,
Self::MarkdownJsonSchema(l) => l,
Self::MarkdownLinkEip { pattern } => pattern,
Self::MarkdownLinkFirst { pattern } => pattern,
Self::MarkdownLinkOther { pattern } => pattern,
Self::MarkdownLinkStatus(l) => l,
Self::MarkdownProposalRef(l) => l,
Self::MarkdownRegex(l) => l,
Expand Down Expand Up @@ -257,6 +267,12 @@ where
.map(|(a, b)| (a.as_ref(), b.as_ref()))
.collect(),
}),
Self::MarkdownLinkEip { pattern } => DefaultLint::MarkdownLinkEip {
pattern: markdown::LinkEip(pattern.0.as_ref()),
},
Self::MarkdownLinkOther { pattern } => DefaultLint::MarkdownLinkOther {
pattern: markdown::LinkOther(pattern.0.as_ref()),
},
Self::MarkdownLinkFirst { pattern } => DefaultLint::MarkdownLinkFirst {
pattern: markdown::LinkFirst(pattern.0.as_ref()),
},
Expand All @@ -275,7 +291,7 @@ where
prefix: l.prefix.as_ref(),
suffix: l.suffix.as_ref(),
})
}
},
Self::MarkdownRegex(l) => DefaultLint::MarkdownRegex(markdown::Regex {
message: l.message.as_ref(),
mode: l.mode,
Expand Down
4 changes: 4 additions & 0 deletions eipw-lint/src/lints/markdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
pub mod headings_space;
pub mod html_comments;
pub mod json_schema;
pub mod link_eip;
pub mod link_first;
pub mod link_other;
pub mod link_status;
pub mod proposal_ref;
pub mod regex;
Expand All @@ -18,7 +20,9 @@ pub mod section_required;
pub use self::headings_space::HeadingsSpace;
pub use self::html_comments::HtmlComments;
pub use self::json_schema::JsonSchema;
pub use self::link_eip::LinkEip;
pub use self::link_first::LinkFirst;
pub use self::link_other::LinkOther;
pub use self::link_status::LinkStatus;
pub use self::proposal_ref::ProposalRef;
pub use self::regex::Regex;
Expand Down
141 changes: 141 additions & 0 deletions eipw-lint/src/lints/markdown/link_eip.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/

use annotate_snippets::snippet::{Annotation, AnnotationType, Slice, Snippet};

use comrak::nodes::{Ast, NodeLink};

use crate::lints::{Context, Error, Lint};
use crate::tree::{self, Next, TraverseExt};

use regex::Regex;

use serde::{Deserialize, Serialize};

use std::fmt::{Debug, Display};

#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct LinkEip<S>(pub S);

impl<S> Lint for LinkEip<S>
where
S: Display + Debug + AsRef<str>,
{
fn lint<'a>(&self, slug: &'a str, ctx: &Context<'a, '_>) -> Result<(), Error> {
let pattern = self.0.as_ref();
let re = Regex::new(pattern).map_err(Error::custom)?;

let mut visitor = Visitor {
ctx,
re,
slug,
link_depth: 0,
current_link: Link { url: String::new(), text: String::new() },
};
ctx.body().traverse().visit(&mut visitor)?;

Ok(())
}
}
#[derive(Debug)]
struct Link {
url: String,
text: String,
}

#[derive(Debug)]
struct Visitor<'a, 'b, 'c> {
ctx: &'c Context<'a, 'b>,
re: Regex,
slug: &'c str,
link_depth: usize,
current_link: Link,
}

impl<'a, 'b, 'c> Visitor<'a, 'b, 'c> {
fn check(&self, ast: &Ast) -> Result<Next, Error> {
let pattern = r"(?i)\b(?:eip|erc)-(\d+)\b";
let url_re = Regex::new(pattern).map_err(Error::custom)?;

let url_eip_number = if let Some(captures) = url_re.captures(&self.current_link.url) {
captures.get(1).map(|m| m.as_str())
} else { None };

if let Some(url_eip_number) = url_eip_number {
let section_pattern = r"eip-([^.]*)\.md#.+$";
let url_re = Regex::new(section_pattern).map_err(Error::custom)?;
let dynamic_pattern = if url_re.is_match(&self.current_link.url) {
format!(r"^(EIP|ERC)-{}\s*\S+", regex::escape(&url_eip_number))
} else {
format!(r"^(EIP|ERC)-{}$", regex::escape(&url_eip_number))
};
let text_re = Regex::new(&dynamic_pattern).map_err(Error::custom)?;

if text_re.is_match(&self.current_link.text) {
return Ok(Next::TraverseChildren);
};

let expected = if url_re.is_match(&self.current_link.url) {
format!("[EIP|ERC-{}<section-description>]", url_eip_number)
} else {
format!("[EIP|ERC-{}]", url_eip_number)
};

let footer_label = format!("link text should match `{}`", expected);

let source = self.ctx.source_for_text(ast.sourcepos.start.line, &self.current_link.text);
self.ctx.report(Snippet {
title: Some(Annotation {
annotation_type: self.ctx.annotation_type(),
id: Some(self.slug),
label: Some("link text does not match link destination"),
}),
slices: vec![Slice {
fold: false,
line_start: ast.sourcepos.start.line,
origin: self.ctx.origin(),
source: &source,
annotations: vec![],
}],
footer: vec![Annotation {
id: None,
annotation_type: AnnotationType::Info,
label: Some(&footer_label),
}],
opt: Default::default(),
})?;
}

Ok(Next::TraverseChildren)
}
}

impl<'a, 'b, 'c> tree::Visitor for Visitor<'a, 'b, 'c> {
type Error = Error;

fn enter_link(&mut self, _: &Ast, link: &NodeLink,) -> Result<Next, Self::Error> {
if self.re.is_match(&link.url) {
self.current_link = Link { url: link.url.to_owned(), text: String::new() };
self.link_depth += 1;
}
Ok(Next::TraverseChildren)
}

fn depart_link(&mut self, _: &Ast, _: &NodeLink) -> Result<(), Self::Error> {
if self.link_depth > 0 {
self.link_depth = self.link_depth.checked_sub(1).unwrap();
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If self.link_depth is greater than zero, the checked_sub is unnecessary.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

checked_sub in the depart_link function returns self.link_depth to 0. I check link_depth in enter_text fn. If link_depth == 0, it means the text is not from the link, so, I skip it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I mean is that if self.link_depth > 0 then self.link_depth - 1 can never underflow. The checked_sub will never fail.

Ok(())
}

fn enter_text(&mut self, ast: &Ast, txt: &str) -> Result<Next, Self::Error> {
if self.link_depth > 0 {
self.current_link.text = txt.to_owned();
self.check(ast)?;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does this behave for content like:

[**EIP-1**5678](./eip-1.md#rationale)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It fails. I will add it to the tests. Should it fail?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, perhaps I spoke too soon then 🤣

I was worried that because every text node in the link sets self.current_link.text, you could end up in situations where, for example, bold would create two text nodes, breaking the lint. Maybe something like [**EIP-1**EIP-1](./eip-1.md).

If my concerns are unfounded, ignore me!

Ok(Next::TraverseChildren)
}
}
124 changes: 124 additions & 0 deletions eipw-lint/src/lints/markdown/link_other.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/

use annotate_snippets::snippet::{Annotation, AnnotationType, Slice, Snippet};

use comrak::nodes::{Ast, NodeLink};

use crate::lints::{Context, Error, Lint};
use crate::tree::{self, Next, TraverseExt};

use regex::Regex;

use serde::{Deserialize, Serialize};

use std::fmt::{Debug, Display};

#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct LinkOther<S>(pub S);

impl<S> Lint for LinkOther<S>
where
S: Display + Debug + AsRef<str>,
{
fn lint<'a>(&self, slug: &'a str, ctx: &Context<'a, '_>) -> Result<(), Error> {
let pattern = self.0.as_ref();
let re = Regex::new(pattern).map_err(Error::custom)?;

let mut visitor = Visitor {
ctx,
re,
slug,
link_depth: 0,
current_link: Link { url: String::new(), text: String::new() },
};
ctx.body().traverse().visit(&mut visitor)?;

Ok(())
}
}
#[derive(Debug)]
struct Link {
url: String,
text: String,
}

#[derive(Debug)]
struct Visitor<'a, 'b, 'c> {
ctx: &'c Context<'a, 'b>,
re: Regex,
slug: &'c str,
link_depth: usize,
current_link: Link,
}

impl<'a, 'b, 'c> Visitor<'a, 'b, 'c> {
fn check(&self, ast: &Ast) -> Result<Next, Error> {
let text_eip_number = if let Some(captures) = self.re.captures(&self.current_link.text) {
captures.get(2).map(|m| m.as_str())
} else { None };

if let Some(text_eip_number) = text_eip_number {
let pattern = format!(r"(?i)\beip-{}\b", regex::escape(&text_eip_number));
let re = Regex::new(&pattern).map_err(Error::custom)?;

if re.is_match(&self.current_link.url) {
return Ok(Next::TraverseChildren);
}

let footer_label = format!("link destinstion must match text EIP");

let source = self.ctx.source_for_text(ast.sourcepos.start.line, &self.current_link.text);
self.ctx.report(Snippet {
title: Some(Annotation {
annotation_type: self.ctx.annotation_type(),
id: Some(self.slug),
label: Some("link text does not match link destination"),
}),
slices: vec![Slice {
fold: false,
line_start: ast.sourcepos.start.line,
origin: self.ctx.origin(),
source: &source,
annotations: vec![],
}],
footer: vec![Annotation {
id: None,
annotation_type: AnnotationType::Info,
label: Some(&footer_label),
}],
opt: Default::default(),
})?;
}

Ok(Next::TraverseChildren)
}
}

impl<'a, 'b, 'c> tree::Visitor for Visitor<'a, 'b, 'c> {
type Error = Error;

fn enter_link(&mut self, _: &Ast, link: &NodeLink,) -> Result<Next, Self::Error> {
self.current_link = Link { url: link.url.to_owned(), text: String::new() };
self.link_depth += 1;
Ok(Next::TraverseChildren)
}

fn depart_link(&mut self, _: &Ast, _: &NodeLink) -> Result<(), Self::Error> {
if self.link_depth > 0 {
self.link_depth = self.link_depth.checked_sub(1).unwrap();
}
Ok(())
}

fn enter_text(&mut self, ast: &Ast, txt: &str) -> Result<Next, Self::Error> {
if self.link_depth > 0 && self.re.is_match(&txt) {
self.current_link.text = txt.to_owned();
self.check(ast)?;
}
Ok(Next::TraverseChildren)
}
}
Loading
Loading