-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add
xgettext
command to extract translatable strings
This command is one half of a Gettext-based translation (i18n) workflow. It iterates over each chapter and extracts all translatable text into a `messages.pot` file. The text is split on paragraph boundaries, which helps ensure less churn in the output when the text is edited. The other half of the workflow is a `gettext` command which will take a source Markdown file and a `xx.po` file and output a translated Markdown file. Part of the solution for #5.
- Loading branch information
Showing
5 changed files
with
155 additions
and
0 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,3 +8,4 @@ pub mod serve; | |
pub mod test; | ||
#[cfg(feature = "watch")] | ||
pub mod watch; | ||
pub mod xgettext; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
use crate::get_book_dir; | ||
use anyhow::Context; | ||
use clap::{arg, App, ArgMatches}; | ||
use lazy_static::lazy_static; | ||
use mdbook::MDBook; | ||
use polib::catalog::Catalog; | ||
use polib::message::Message; | ||
use regex::Regex; | ||
use std::path::Path; | ||
|
||
// Create clap subcommand arguments | ||
pub fn make_subcommand<'help>() -> App<'help> { | ||
App::new("xgettext") | ||
.about("Extract translatable strings from all chapters") | ||
.arg(arg!(-o --output [FILE] | ||
"Write output to the specified file. Defaults to `messages.pot`." | ||
)) | ||
.arg(arg!([dir] | ||
"Root directory for the book{n}\ | ||
(Defaults to the Current Directory when omitted)" | ||
)) | ||
} | ||
|
||
/// Extract paragraphs from text. | ||
/// | ||
/// Paragraphs are separated by at least two newlines. Returns an | ||
/// iterator over line numbers (starting from 1) and paragraphs. | ||
pub fn extract_paragraphs(text: &str) -> impl Iterator<Item = (usize, &str)> { | ||
// TODO: This could be make more sophisticated by parsing the | ||
// Markdown and stripping off the markup characters. | ||
// | ||
// As an example, a header like "## My heading" could become just | ||
// "My heading" in the `.pot` file. Similarly, paragraphs could be | ||
// unfolded and list items could be translated one-by-one. | ||
lazy_static! { | ||
static ref PARAGRAPH_SEPARATOR: Regex = Regex::new(r"\n\n+").unwrap(); | ||
} | ||
|
||
// Skip over leading empty lines. | ||
let trimmed = text.trim_start_matches('\n'); | ||
let mut matches = PARAGRAPH_SEPARATOR.find_iter(trimmed); | ||
let mut lineno = 1 + text.len() - trimmed.len(); | ||
let mut last = 0; | ||
|
||
std::iter::from_fn(move || match matches.next() { | ||
Some(m) => { | ||
let result = (lineno, &trimmed[last..m.start()]); | ||
lineno += trimmed[last..m.end()].lines().count(); | ||
last = m.end(); | ||
Some(result) | ||
} | ||
None => { | ||
if last < trimmed.len() { | ||
let result = (lineno, &trimmed[last..]); | ||
last = trimmed.len(); | ||
Some(result) | ||
} else { | ||
None | ||
} | ||
} | ||
}) | ||
} | ||
|
||
// Xgettext command implementation | ||
pub fn execute(args: &ArgMatches) -> mdbook::errors::Result<()> { | ||
let book_dir = get_book_dir(args); | ||
let book = MDBook::load(&book_dir)?; | ||
|
||
let mut catalog = Catalog::new(); | ||
|
||
for item in book.iter() { | ||
match item { | ||
mdbook::BookItem::Chapter(chapter) if !chapter.is_draft_chapter() => { | ||
for (lineno, paragraph) in extract_paragraphs(&chapter.content) { | ||
let source = &chapter | ||
.source_path | ||
.as_ref() | ||
.map(|path| format!("{}:{}", path.to_string_lossy(), lineno)) | ||
.unwrap_or_default(); | ||
catalog.add_message(Message::new_singular("", source, "", "", ¶graph, "")); | ||
} | ||
} | ||
mdbook::BookItem::PartTitle(part_title) => { | ||
// TODO: would it be better to process SUMMARY.md like | ||
// a normal chapter and split the text by paragraph? | ||
catalog.add_message(Message::new_singular( | ||
"", | ||
"SUMMARY.md", | ||
"", | ||
"", | ||
&part_title, | ||
"", | ||
)); | ||
} | ||
_ => {} | ||
} | ||
} | ||
|
||
let output_path = Path::new(args.value_of("output").unwrap_or("messages.pot")); | ||
polib::po_file::write(&catalog, output_path) | ||
.with_context(|| format!("Could not write {:?}", output_path))?; | ||
|
||
Ok(()) | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
|
||
macro_rules! assert_iter_eq { | ||
($left_iter:expr, $right:expr) => { | ||
assert_eq!($left_iter.collect::<Vec<_>>(), $right) | ||
}; | ||
} | ||
|
||
#[test] | ||
fn test_extract_paragraphs_empty() { | ||
assert_iter_eq!(extract_paragraphs(""), vec![]); | ||
} | ||
|
||
#[test] | ||
fn test_extract_paragraphs_single_line() { | ||
assert_iter_eq!( | ||
extract_paragraphs("This is a paragraph."), | ||
vec![(1, "This is a paragraph.")] | ||
); | ||
} | ||
|
||
#[test] | ||
fn test_extract_paragraphs_simple() { | ||
assert_iter_eq!( | ||
extract_paragraphs("This is\na paragraph.\n\nNext paragraph."), | ||
vec![(1, "This is\na paragraph."), (4, "Next paragraph.")] | ||
); | ||
} | ||
|
||
#[test] | ||
fn test_extract_paragraphs_leading_newlines() { | ||
assert_iter_eq!( | ||
extract_paragraphs("\n\n\nThis is\na paragraph."), | ||
vec![(4, "This is\na paragraph.")] | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters