Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integration Tests and handling for alternates and canonical #7

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,6 @@ exclude = [
[dependencies]
xml-builder = "0.5.1"
chrono = "0.4.22"

[dev-dependencies]
pretty_assertions = "1.3.0"
SteveGZr marked this conversation as resolved.
Show resolved Hide resolved
52 changes: 52 additions & 0 deletions examples/generate_url_sitemap_with_alternates.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
use chrono::{DateTime, FixedOffset, NaiveDate};
use sitemap_rs::url::{Alternate, ChangeFrequency, Url};
use sitemap_rs::url_set::UrlSet;

fn main() {
/*
* There are two functions you can use to add alternates. The first one is
* alternates() which expects a Vector with Alternate objects in it.
* alternates() overwrites existing values in the alternates-attribute
* of Url. The second one is push_alternate() which expects 2 &str, hreflang
* and href. push_alternate() appends to the alternates-attribute instead of
* overwriting.
* In the following example both are used.
*/
let urls: Vec<Url> = vec![Url::builder(String::from("https://www.example.com/"))
.last_modified(DateTime::from_utc(
NaiveDate::from_ymd_opt(2005, 1, 1)
.unwrap()
.and_hms_opt(0, 0, 0)
.unwrap(),
FixedOffset::east_opt(0).unwrap(),
))
.change_frequency(ChangeFrequency::Monthly)
.priority(0.8)
.alternates(vec![Alternate {
hreflang: String::from("en-US"),
href: String::from("https://www.example.com/"),
}])
.push_alternate(
String::from("de-DE"),
String::from("https://de.example.com/"),
)
.push_alternate(
String::from("de-CH"),
String::from("https://ch.example.com/de"),
)
.push_alternate(
String::from("fr-CH"),
String::from("https://ch.example.com/de"),
)
.push_alternate(String::from("it"), String::from("https://it.example.com/"))
.push_alternate(
String::from("x-default"),
String::from("https://www.example.com/country-selector"),
)
.build()
.expect("failed a <url> validation")];

let url_set: UrlSet = UrlSet::new(urls).expect("failed a <urlset> validation");
let mut buf = Vec::<u8>::new();
url_set.write(&mut buf).unwrap();
}
23 changes: 23 additions & 0 deletions src/image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,26 @@ impl Image {
Ok(image)
}
}

#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;

#[test]
fn test_to_xml() {
let image: Image = Image::new(String::from("https://www.example.com/image.jpg"));

let xml: XMLElement = image.to_xml().unwrap();
let mut buf = Vec::<u8>::new();
xml.render(&mut buf, true, true).unwrap();
let result = String::from_utf8(buf).unwrap();
assert_eq!(
"\
<image:image>\n\
\t<image:loc>https://www.example.com/image.jpg</image:loc>\n\
</image:image>\n",
result
);
}
}
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ pub const NAMESPACE: &str = "http://www.sitemaps.org/schemas/sitemap/0.9";
pub const IMAGE_NAMESPACE: &str = "http://www.google.com/schemas/sitemap-image/1.1";
pub const VIDEO_NAMESPACE: &str = "http://www.google.com/schemas/sitemap-video/1.1";
pub const NEWS_NAMESPACE: &str = "http://www.google.com/schemas/sitemap-news/0.9";
pub const XHTML_NAMESPACE: &str = "http://www.w3.org/1999/xhtml";
SteveGZr marked this conversation as resolved.
Show resolved Hide resolved
pub const ENCODING: &str = "UTF-8";
pub const RFC_3339_SECONDS_FORMAT: SecondsFormat = SecondsFormat::Secs;
pub const RFC_3339_USE_Z: bool = false;
49 changes: 49 additions & 0 deletions src/news.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,52 @@ impl Publication {
Ok(publication)
}
}

#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;

#[test]
fn test_news_to_xml() {
let news: News = News::new(
Publication::new(String::from("News Site"), String::from("en")),
DateTime::parse_from_rfc3339("2023-01-01T13:37:00+00:00").unwrap(),
String::from("Awesome Title of News Article"),
);
let xml: XMLElement = news.to_xml().unwrap();
let mut buf = Vec::<u8>::new();
xml.render(&mut buf, true, true).unwrap();
let result = String::from_utf8(buf).unwrap();
assert_eq!(
"\
<news:news>\n\
\t<news:publication>\n\
\t\t<news:name>News Site</news:name>\n\
\t\t<news:language>en</news:language>\n\
\t</news:publication>\n\
\t<news:publication_date>2023-01-01T13:37:00+00:00</news:publication_date>\n\
\t<news:title>Awesome Title of News Article</news:title>\n\
</news:news>\n",
result
);
}

#[test]
fn test_publication_to_xml() {
let publication: Publication =
Publication::new(String::from("News Site"), String::from("en"));
let xml: XMLElement = publication.to_xml().unwrap();
let mut buf = Vec::<u8>::new();
xml.render(&mut buf, true, true).unwrap();
let result = String::from_utf8(buf).unwrap();
assert_eq!(
"\
<news:publication>\n\
\t<news:name>News Site</news:name>\n\
\t<news:language>en</news:language>\n\
</news:publication>\n",
result
);
}
}
27 changes: 27 additions & 0 deletions src/sitemap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,30 @@ impl Sitemap {
Ok(sitemap)
}
}

#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;

#[test]
fn test_to_xml() {
let sitemap: Sitemap = Sitemap::new(
String::from("https://www.example.com/sitemap1.xml.gz"),
Some(DateTime::parse_from_rfc3339("2023-01-01T13:37:00+00:00").unwrap()),
);

let xml: XMLElement = sitemap.to_xml().unwrap();
let mut buf = Vec::<u8>::new();
xml.render(&mut buf, true, true).unwrap();
let result = String::from_utf8(buf).unwrap();
assert_eq!(
"\
<sitemap>\n\
\t<loc>https://www.example.com/sitemap1.xml.gz</loc>\n\
\t<lastmod>2023-01-01T13:37:00+00:00</lastmod>\n\
</sitemap>\n",
result
);
}
}
91 changes: 90 additions & 1 deletion src/url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use crate::url_error::UrlError;
use crate::video::Video;
use crate::{RFC_3339_SECONDS_FORMAT, RFC_3339_USE_Z};
use chrono::{DateTime, FixedOffset};
use std::collections::HashSet;
use std::fmt::{Display, Formatter};
use xml_builder::{XMLElement, XMLError};

Expand Down Expand Up @@ -57,6 +58,11 @@ pub struct Url {

/// News associated with this URL.
pub news: Option<News>,

/// Language Alternates for this URL.
///
/// Alternates must not contain duplicate hreflang values.
pub alternates: Option<Vec<Alternate>>,
}

impl Url {
Expand All @@ -65,6 +71,8 @@ impl Url {
/// Will return `UrlError::PriorityTooLow` if `priority` is below `0.0`.
/// Will return `UrlError::PriorityTooHigh` if `priority` is above `1.0`.
/// Will return `UrlError::TooManyImages` if the length of `images` is above `1,000`.
/// Will return `UrlError::DuplicateAlternateHreflangs` if `alternates` contain duplicate `hreflang` values.
#[allow(clippy::complexity)]
pub fn new(
location: String,
last_modified: Option<DateTime<FixedOffset>>,
Expand All @@ -73,6 +81,7 @@ impl Url {
images: Option<Vec<Image>>,
videos: Option<Vec<Video>>,
news: Option<News>,
alternates: Option<Vec<Alternate>>,
) -> Result<Self, UrlError> {
// make sure priority is within bounds: 0.0 <= priority <= 1.0
if let Some(p) = priority {
Expand Down Expand Up @@ -100,6 +109,22 @@ impl Url {
}
};

let alternates: Option<Vec<Alternate>> = match alternates {
None => None,
Some(alternates) => {
Copy link
Owner

@goddtriffin goddtriffin Jun 26, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this Some(alternates) condition, I like to check if alternates.is_empty() and return None if so. This helps limit the total number of ways to represent "I have zero alternates".

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Going to take a look at that.

let mut unique_hreflangs = HashSet::new();
for alternate in &alternates {
if !unique_hreflangs.insert(&alternate.hreflang) {
return Err(UrlError::DuplicateAlternateHreflangs(
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of manually checking for uniqueness within a Vec, could pub alternates: Option<Vec<Alternate>>, be refactored to be pub alternates: Option<Set<Alternate>>, from the beginning?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Going to take a look at that.

alternate.hreflang.clone(),
alternate.href.clone(),
));
}
}
Some(alternates)
}
};

Ok(Self {
location,
last_modified,
Expand All @@ -108,6 +133,7 @@ impl Url {
images,
videos,
news,
alternates,
})
}

Expand All @@ -124,7 +150,7 @@ impl Url {

// add <loc>
let mut loc: XMLElement = XMLElement::new("loc");
loc.add_text(self.location)?;
loc.add_text(self.location.clone())?;
url.add_child(loc)?;

// add <lastmod>, if it exists
Expand Down Expand Up @@ -168,6 +194,17 @@ impl Url {
url.add_child(news.to_xml()?)?;
}

// add <xhtml:link>, if any exist
if let Some(alternates) = self.alternates {
for alternate in alternates {
let mut alternate_link: XMLElement = XMLElement::new("xhtml:link");
alternate_link.add_attribute("rel", "alternate");
alternate_link.add_attribute("hreflang", &alternate.hreflang);
alternate_link.add_attribute("href", &alternate.href);
url.add_child(alternate_link)?;
}
}

Ok(url)
}
}
Expand Down Expand Up @@ -215,3 +252,55 @@ impl Display for ChangeFrequency {
write!(f, "{}", self.as_str())
}
}

/// Language Alternates for URL.
///
/// Alternates can be used to inform search engines about all language and region variants of a URL.
///
/// Possible values for hreflang are language codes (e.g. "en"), locales (e.g. "en-US") or "x-default".
#[derive(Debug, Clone)]
pub struct Alternate {
pub hreflang: String,
pub href: String,
}

#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;

#[test]
fn test_to_xml() {
let url: Url = Url::builder(String::from("https://www.example.com/"))
.last_modified(DateTime::parse_from_rfc3339("2023-01-01T13:37:00+00:00").unwrap())
.change_frequency(ChangeFrequency::Weekly)
.priority(DEFAULT_PRIORITY)
.alternates(vec![Alternate {
hreflang: String::from("en-US"),
href: String::from("https://www.example.com/"),
}])
.push_alternate(
String::from("x-default"),
String::from("https://www.example.com/country-selector"),
)
.build()
.expect("failed a <url> validation");

let xml: XMLElement = url.to_xml().unwrap();
let mut buf = Vec::<u8>::new();
xml.render(&mut buf, true, true).unwrap();
let result = String::from_utf8(buf).unwrap();
assert_eq!(
"\
<url>\n\
\t<loc>https://www.example.com/</loc>\n\
\t<lastmod>2023-01-01T13:37:00+00:00</lastmod>\n\
\t<changefreq>weekly</changefreq>\n\
\t<priority>0.5</priority>\n\
\t<xhtml:link href=\"https://www.example.com/\" hreflang=\"en-US\" rel=\"alternate\" />\n\
\t<xhtml:link href=\"https://www.example.com/country-selector\" hreflang=\"x-default\" rel=\"alternate\" />\n\
</url>\n",
result
);
}
}
26 changes: 25 additions & 1 deletion src/url_builder.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::image::Image;
use crate::news::News;
use crate::url::{ChangeFrequency, Url};
use crate::url::{Alternate, ChangeFrequency, Url};
use crate::url_error::UrlError;
use crate::video::Video;
use chrono::{DateTime, FixedOffset};
Expand Down Expand Up @@ -53,6 +53,11 @@ pub struct UrlBuilder {

/// News associated with this URL.
pub news: Option<News>,

/// Language Alternates for this URL.
///
/// Alternates must not contain duplicate hreflang values.
pub alternates: Option<Vec<Alternate>>,
}

impl UrlBuilder {
Expand All @@ -66,6 +71,7 @@ impl UrlBuilder {
images: None,
videos: None,
news: None,
alternates: None,
}
}

Expand Down Expand Up @@ -99,11 +105,28 @@ impl UrlBuilder {
self
}

pub fn alternates(&mut self, alternates: Vec<Alternate>) -> &mut Self {
self.alternates = Some(alternates);
self
}

pub fn push_alternate(&mut self, hreflang: String, href: String) -> &mut Self {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you use impl ToString instead of String for hreflang and href here? This makes for a nicer interface.

if self.alternates.is_none() {
self.alternates = Some(Vec::new());
}

if let Some(alternates) = &mut self.alternates {
alternates.push(Alternate { hreflang, href });
}
self
}

/// # Errors
///
/// Will return `UrlError::PriorityTooLow` if `priority` is below `0.0`.
/// Will return `UrlError::PriorityTooHigh` if `priority` is above `1.0`.
/// Will return `UrlError::TooManyImages` if the length of `images` is above `1,000`.
/// Will return `UrlError::DuplicateAlternateHreflangs` if `alternates` contain duplicate `hreflang` values.
pub fn build(&self) -> Result<Url, UrlError> {
Url::new(
self.location.clone(),
Expand All @@ -113,6 +136,7 @@ impl UrlBuilder {
self.images.clone(),
self.videos.clone(),
self.news.clone(),
self.alternates.clone(),
)
}
}
Loading