Skip to content

Commit

Permalink
Add xgettext command to extract translatable strings
Browse files Browse the repository at this point in the history
This command is one half of a Gettext-based translation (i18n)
workflow. It iterates over each chapter and extracts all translatable
text into a `messages.pot` file.

The text is split on paragraph boundaries, which helps ensure less
churn in the output when the text is edited.

The other half of the workflow is a `gettext` command which will take
a source Markdown file and a `xx.po` file and output a translated
Markdown file.

Part of the solution for #5.
  • Loading branch information
mgeisler committed Sep 10, 2022
1 parent 8cdb8d0 commit 1a3a1eb
Show file tree
Hide file tree
Showing 5 changed files with 155 additions and 0 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ shlex = "1"
tempfile = "3.0"
toml = "0.5.1"
topological-sort = "0.1.0"
polib = "0.1.0"

# Watch feature
notify = { version = "4.0", optional = true }
Expand Down
1 change: 1 addition & 0 deletions src/cmd/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ pub mod serve;
pub mod test;
#[cfg(feature = "watch")]
pub mod watch;
pub mod xgettext;
144 changes: 144 additions & 0 deletions src/cmd/xgettext.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
use crate::get_book_dir;
use anyhow::Context;
use clap::{arg, App, ArgMatches};
use lazy_static::lazy_static;
use mdbook::MDBook;
use polib::catalog::Catalog;
use polib::message::Message;
use regex::Regex;
use std::path::Path;

// Create clap subcommand arguments
pub fn make_subcommand<'help>() -> App<'help> {
App::new("xgettext")
.about("Extract translatable strings from all chapters")
.arg(arg!(-o --output [FILE]
"Write output to the specified file. Defaults to `messages.pot`."
))
.arg(arg!([dir]
"Root directory for the book{n}\
(Defaults to the Current Directory when omitted)"
))
}

/// Extract paragraphs from text.
///
/// Paragraphs are separated by at least two newlines. Returns an
/// iterator over line numbers (starting from 1) and paragraphs.
pub fn extract_paragraphs(text: &str) -> impl Iterator<Item = (usize, &str)> {
// TODO: This could be make more sophisticated by parsing the
// Markdown and stripping off the markup characters.
//
// As an example, a header like "## My heading" could become just
// "My heading" in the `.pot` file. Similarly, paragraphs could be
// unfolded and list items could be translated one-by-one.
lazy_static! {
static ref PARAGRAPH_SEPARATOR: Regex = Regex::new(r"\n\n+").unwrap();
}

// Skip over leading empty lines.
let trimmed = text.trim_start_matches('\n');
let mut matches = PARAGRAPH_SEPARATOR.find_iter(trimmed);
let mut lineno = 1 + text.len() - trimmed.len();
let mut last = 0;

std::iter::from_fn(move || match matches.next() {
Some(m) => {
let result = (lineno, &trimmed[last..m.start()]);
lineno += trimmed[last..m.end()].lines().count();
last = m.end();
Some(result)
}
None => {
if last < trimmed.len() {
let result = (lineno, &trimmed[last..]);
last = trimmed.len();
Some(result)
} else {
None
}
}
})
}

// Xgettext command implementation
pub fn execute(args: &ArgMatches) -> mdbook::errors::Result<()> {
let book_dir = get_book_dir(args);
let book = MDBook::load(&book_dir)?;

let mut catalog = Catalog::new();

for item in book.iter() {
match item {
mdbook::BookItem::Chapter(chapter) if !chapter.is_draft_chapter() => {
for (lineno, paragraph) in extract_paragraphs(&chapter.content) {
let source = &chapter
.source_path
.as_ref()
.map(|path| format!("{}:{}", path.to_string_lossy(), lineno))
.unwrap_or_default();
catalog.add_message(Message::new_singular("", source, "", "", &paragraph, ""));
}
}
mdbook::BookItem::PartTitle(part_title) => {
// TODO: would it be better to process SUMMARY.md like
// a normal chapter and split the text by paragraph?
catalog.add_message(Message::new_singular(
"",
"SUMMARY.md",
"",
"",
&part_title,
"",
));
}
_ => {}
}
}

let output_path = Path::new(args.value_of("output").unwrap_or("messages.pot"));
polib::po_file::write(&catalog, output_path)
.with_context(|| format!("Could not write {:?}", output_path))?;

Ok(())
}

#[cfg(test)]
mod tests {
use super::*;

macro_rules! assert_iter_eq {
($left_iter:expr, $right:expr) => {
assert_eq!($left_iter.collect::<Vec<_>>(), $right)
};
}

#[test]
fn test_extract_paragraphs_empty() {
assert_iter_eq!(extract_paragraphs(""), vec![]);
}

#[test]
fn test_extract_paragraphs_single_line() {
assert_iter_eq!(
extract_paragraphs("This is a paragraph."),
vec![(1, "This is a paragraph.")]
);
}

#[test]
fn test_extract_paragraphs_simple() {
assert_iter_eq!(
extract_paragraphs("This is\na paragraph.\n\nNext paragraph."),
vec![(1, "This is\na paragraph."), (4, "Next paragraph.")]
);
}

#[test]
fn test_extract_paragraphs_leading_newlines() {
assert_iter_eq!(
extract_paragraphs("\n\n\nThis is\na paragraph."),
vec![(4, "This is\na paragraph.")]
);
}
}
2 changes: 2 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ fn main() {
#[cfg(feature = "serve")]
Some(("serve", sub_matches)) => cmd::serve::execute(sub_matches),
Some(("test", sub_matches)) => cmd::test::execute(sub_matches),
Some(("xgettext", sub_matches)) => cmd::xgettext::execute(sub_matches),
Some(("completions", sub_matches)) => (|| {
let shell: Shell = sub_matches
.value_of("shell")
Expand Down Expand Up @@ -76,6 +77,7 @@ fn create_clap_app() -> App<'static> {
.subcommand(cmd::build::make_subcommand())
.subcommand(cmd::test::make_subcommand())
.subcommand(cmd::clean::make_subcommand())
.subcommand(cmd::xgettext::make_subcommand())
.subcommand(
App::new("completions")
.about("Generate shell completions for your shell to stdout")
Expand Down

0 comments on commit 1a3a1eb

Please sign in to comment.