Skip to content

Commit

Permalink
re-implement existing translators
Browse files Browse the repository at this point in the history
  • Loading branch information
fuma-nama committed Dec 25, 2024
1 parent 092e379 commit 2f14b14
Show file tree
Hide file tree
Showing 31 changed files with 641 additions and 301 deletions.
3 changes: 2 additions & 1 deletion biome.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
}
},
"formatter": {
"indentStyle": "space"
"indentStyle": "space",
"ignore": ["test/resources", "test/snapshots"]
}
}
Binary file modified bun.lockb
Binary file not shown.
3 changes: 3 additions & 0 deletions packages/cli/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,13 @@
"ai": "^4.0.22",
"chalk": "^5.4.1",
"dedent": "^1.5.3",
"diff": "^7.0.0",
"dotenv": "^16.4.7",
"simple-git": "^3.27.0",
"zod": "^3.24.1"
},
"devDependencies": {
"@types/diff": "^6.0.0",
"tsup": "^8.3.5",
"typescript": "^5.7.2"
}
Expand Down
18 changes: 12 additions & 6 deletions packages/cli/src/commands/diff.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { execSync } from "node:child_process";
import { intro, outro } from "@clack/prompts";
import chalk from "chalk";
import { extractChangedKeys, getConfig } from "../utils.js";
import { getConfig } from "../utils.js";

export async function diff() {
intro("Checking for changes in source locale file...");
Expand All @@ -25,25 +25,31 @@ export async function diff() {
process.exit(0);
}

const { addedKeys, removedKeys } = extractChangedKeys(diff);
let added = 0,
removed = 0;

if (addedKeys.length === 0 && removedKeys.length === 0) {
for (const line of diff.split("\n")) {
if (line.startsWith("+") && !line.startsWith("+++")) added++;
else if (line.startsWith("-") && !line.startsWith("---")) removed++;
}

if (added === 0 && removed === 0) {
outro(
chalk.yellow("No translation keys were added, modified or removed."),
);
process.exit(0);
}

const totalChanges = addedKeys.length + removedKeys.length;
const totalChanges = added + removed;
outro(
chalk.blue(
`Found ${totalChanges} translation key${totalChanges === 1 ? "" : "s"} changed`,
),
);

return {
addedKeys,
removedKeys,
addedKeys: [],
removedKeys: [],
};
} catch (error) {
outro(chalk.red("Failed to check for changes"));
Expand Down
34 changes: 16 additions & 18 deletions packages/cli/src/commands/translate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import chalk from "chalk";
import { getApiKey, getConfig } from "../utils.js";
import { getTranslator } from "../translators/index.js";
import type { PromptOptions, UpdateResult } from "../types.js";
import { simpleGit } from "simple-git";

export async function translate(targetLocale?: string, force: boolean = false) {
intro("Starting translation process...");
Expand All @@ -26,6 +27,8 @@ export async function translate(targetLocale?: string, force: boolean = false) {
process.exit(1);
}

const git = simpleGit();

// Initialize OpenAI
const openai = createOpenAI({
apiKey: await getApiKey("OpenAI", "OPENAI_API_KEY"),
Expand All @@ -43,25 +46,21 @@ export async function translate(targetLocale?: string, force: boolean = false) {
const targetPath = pattern.replace("[locale]", locale);

try {
let diff = "";

if (!force) {
// Get git diff for source file if not force translating
diff = execSync(`git diff HEAD -- ${sourcePath}`, {
encoding: "utf-8",
});

if (diff.length === 0) {
return { locale, sourcePath, success: true, noChanges: true };
}
}

// Read source and target files
const sourceContent = await fs.readFile(
path.join(process.cwd(), sourcePath),
"utf-8",
);

let previousContent = "";

if (!force) {
previousContent = await git.show(sourcePath).catch(() => "");

if (previousContent === sourceContent)
return { locale, sourcePath, success: true, noChanges: true };
}

let previousTranslation = undefined;
try {
previousTranslation = await fs.readFile(
Expand Down Expand Up @@ -90,15 +89,14 @@ export async function translate(targetLocale?: string, force: boolean = false) {
content: sourceContent,
};

let { content: finalContent, summary } = (
previousTranslation && !force
let { content: finalContent, summary } =
previousTranslation && previousContent && !force
? await adapter.onUpdate({
...options,
previousTranslation,
diff,
previousContent,
})
: await adapter.onNew(options)
) as UpdateResult;
: ((await adapter.onNew(options)) as UpdateResult);

// Run afterTranslate hook if defined
if (config.hooks?.afterTranslate) {
Expand Down
43 changes: 10 additions & 33 deletions packages/cli/src/prompt.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
import dedent from "dedent";
import type { PromptOptions } from "./types.js";

export const baseRequirements = dedent`
Translation Requirements:
- Maintain exact file structure, indentation, and formatting
- Provide natural, culturally-adapted translations that sound native
- Keep all technical identifiers unchanged
- Keep consistent capitalization, spacing, and line breaks
- Respect existing whitespace and newline patterns
- Never add space before a ! or ?
`;

export function createBasePrompt(text: string, options: PromptOptions) {
return dedent`
You are a professional translator working with ${options.format.toUpperCase()} files.
Expand All @@ -10,36 +20,3 @@ export function createBasePrompt(text: string, options: PromptOptions) {
${text}
`;
}

/**
* Create prompt for record-like objects
*/
export function createRecordPrompt(
parsedContent: Record<string, string>,
options: PromptOptions,
) {
return createBasePrompt(
`
Translation Requirements:
- Maintain exact file structure, indentation, and formatting
- Only translate text content within quotation marks
- Preserve all object/property keys, syntax characters, and punctuation marks exactly
- Keep consistent capitalization, spacing, and line breaks
- Provide natural, culturally-adapted translations that sound native
- Retain all code elements like variables, functions, and control structures
- Exclude any translator notes, comments or explanatory text
- Match source file's JSON/object structure precisely
- Handle special characters and escape sequences correctly
- Respect existing whitespace and newline patterns
- Keep all technical identifiers unchanged
- Translate only user-facing strings
- Never add space before a ! or ?
Source content:
${JSON.stringify(parsedContent, null, 2)}
Return only the translated content with identical structure.
`,
options,
);
}
11 changes: 2 additions & 9 deletions packages/cli/src/translators/index.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,4 @@
import type {
Awaitable,
PromptOptions,
PromptResult,
Translator,
UpdateOptions,
UpdateResult,
} from "../types.js";
import type { Translator } from "../types.js";
import { javascript } from "./js.js";
import { json } from "./json.js";
import { markdown } from "./md.js";
Expand All @@ -22,7 +15,7 @@ export async function getTranslator(
return javascript;
}

if (format === "json" || format === "json5") {
if (format === "json") {
return json;
}

Expand Down
148 changes: 117 additions & 31 deletions packages/cli/src/translators/js.ts
Original file line number Diff line number Diff line change
@@ -1,62 +1,148 @@
import { generateObject } from "ai";
import { createRecordPrompt } from "../prompt.js";
import { extractChangedKeys } from "../utils.js";
import type { Translator } from "../types.js";

function parse(content: string) {
return Function(
`return ${content.replace(/export default |as const;/g, "")}`,
)();
import { baseRequirements, createBasePrompt } from "../prompt.js";
import type { PromptOptions, Translator } from "../types.js";
import { diffLines } from "diff";
import { z } from "zod";

function createRegex(quote: string, multiline = false) {
return `${quote}(?:\\\\.|[^${quote}\\\\${multiline ? "" : "\\n"}])*${quote}`;
}

const quotesRegex = new RegExp(
`${createRegex(`"`)}|${createRegex(`'`)}|${createRegex(`\``, true)}`,
"g",
);

interface StringMatch {
index: number;

/**
* content, including quotes
*/
content: string;
}

/**
* Get declared strings from code (e.g. "hello world" or `hello ${world}`)
*/
function getStrings(code: string) {
let match = quotesRegex.exec(code);

const strings: StringMatch[] = [];

while (match) {
strings.push({
index: match.index,
content: match[0],
});

match = quotesRegex.exec(code);
}

return strings;
}

function replaceStrings(
code: string,
strings: StringMatch[],
replaces: string[],
) {
let out = code;

replaces.forEach((replace, i) => {
const original = strings[i];
const offset = out.length - code.length;

out =
out.slice(0, original.index + offset) +
replace +
out.slice(original.index + original.content.length + offset);
});

return out;
}

export const javascript: Translator = {
// detect changes
// translate changes
// apply translated changes to previous translation (assuming line breaks are identical)
async onUpdate(options) {
const sourceObj = parse(options.content);
const diff = diffLines(options.previousContent, options.content);
const strings = getStrings(options.content);
const previousTranslation = getStrings(options.previousTranslation);
const toTranslate: StringMatch[] = [];

let lineStartIdx = 0;
diff.forEach((change) => {
if (change.added) {
const affected = strings.filter(
(v) =>
v.index >= lineStartIdx &&
v.index < lineStartIdx + change.value.length,
);

const changes = extractChangedKeys(options.diff);
// Parse the translated content
let translatedObj: object = {};
toTranslate.push(...affected);
}

if (changes.addedKeys.length > 0) {
// If force is true, translate everything. Otherwise only new keys
const contentToTranslate: Record<string, string> = {};
for (const key of changes.addedKeys) {
contentToTranslate[key] = sourceObj[key];
if (!change.removed) {
lineStartIdx += change.value.length;
}
});

let translated: string[] = [];

if (toTranslate.length > 0) {
const { object } = await generateObject({
model: options.model,
prompt: createRecordPrompt(contentToTranslate, options),
output: "no-schema",
prompt: getPrompt(toTranslate, options),
schema: z.array(z.string()),
});

translatedObj = object as object;
translated = object;
}

const output = parse(options.previousTranslation);
const output = replaceStrings(
options.previousTranslation,
previousTranslation,
strings.map((s, i) => {
const j = toTranslate.indexOf(s);

for (const key of changes.removedKeys) {
delete output[key];
}
if (j !== -1) {
return translated[j];
}

Object.assign(output, translatedObj);
return previousTranslation[i].content;
}),
);

return {
summary: `Translated ${Object.keys(translatedObj).length} new keys`,
content: `export default ${JSON.stringify(output, null, 2)} as const;\n`,
summary: `Translated ${toTranslate.length} new keys`,
content: output,
};
},
async onNew(options) {
const sourceObj = parse(options.content);
const strings = getStrings(options.content);

const { object } = await generateObject({
model: options.model,
prompt: createRecordPrompt(sourceObj, options),
output: "no-schema",
prompt: getPrompt(strings, options),
schema: z.array(z.string()),
});

return {
content: `export default ${JSON.stringify(object, null, 2)} as const;\n`,
content: replaceStrings(options.content, strings, object),
};
},
};

function getPrompt(strings: StringMatch[], options: PromptOptions) {
return createBasePrompt(
`${baseRequirements}
- Preserve all object/property keys, syntax characters, and punctuation marks exactly
- Only translate text content within quotation marks
A list of javascript codeblocks, return the translated javascript code in a JSON array, make sure to escape special characters like line breaks:
${strings.map((v) => `\`\`\`${options.format}\n${v.content}\n\`\`\``).join("\n\n")}`,
options,
);
}
Loading

0 comments on commit 2f14b14

Please sign in to comment.