Skip to content

Commit

Permalink
Merge minimal utf-8 TextEncoder / TextDecoder
Browse files Browse the repository at this point in the history
  • Loading branch information
dckc authored Oct 14, 2020
2 parents b3e3b1e + 3bf3b8c commit 4c995ae
Show file tree
Hide file tree
Showing 4 changed files with 209 additions and 0 deletions.
85 changes: 85 additions & 0 deletions examples/data/text/main.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import { TextEncoder, TextDecoder } from "text";

const testCases = [
{ label: "empty", bytes: [], text: "" },
{ label: "euro", bytes: [226, 130, 172], text: "€" },
{ label: "CJK", bytes: [240, 160, 174, 183], text: "𠮷" },
{
label: "sample paragraph",
text: "This is a sample paragraph.",
bytes: [
84,
104,
105,
115,
32,
105,
115,
32,
97,
32,
115,
97,
109,
112,
108,
101,
32,
112,
97,
114,
97,
103,
114,
97,
112,
104,
46,
],
},
];

function traceln(s) {
trace(s);
trace("\n");
}

function cmp(a, b) {
if (a.length !== b.length) return false;
for (let pos = 0; pos < a.length; pos++) {
if (a[pos] !== b[pos]) return false;
}
return true;
}

function main() {
traceln("hello!");
const enc = new TextEncoder();
const dec = new TextDecoder();
const data = enc.encode("blort!");
traceln(data);
const text = dec.decode(data);
traceln(text);

for (const { label, bytes, text } of testCases) {
const actual = enc.encode(text);
const actualString = dec.decode(Uint8Array.from(bytes));
if (!cmp(actual, bytes)) {
traceln(
`FAIL: ${label}: expected ${JSON.stringify(bytes)} actual ${
actual.length
} ${JSON.stringify(Array.from(actual))}`
);
} else if (actualString !== text) {
traceln(
`FAIL: ${label}: expected ${JSON.stringify(
text
)} actual ${JSON.stringify(Array.from(actualString))}`
);
} else {
traceln(`PASS: ${label}`);
}
}
}

main();
13 changes: 13 additions & 0 deletions examples/data/text/manifest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"include": [
"$(MODDABLE)/examples/manifest_base.json",
],
"modules": {
"*": [
"./main",
"$(MODDABLE)/modules/data/text/text",
],
},
"preload": [
],
}
39 changes: 39 additions & 0 deletions modules/data/text/text.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/**
* We take advantage of the internal representation of strings
* so that conversion is just copying bytes.
*
* "A string value is a pointer to a UTF-8 C string."
* -- https://github.com/Moddable-OpenSource/moddable/blob/public/documentation/xs/XS%20in%20C.md#strings
**/
#include <stdlib.h>
#include "xs.h"

/**
* Decode text from utf-8 to string
*
* @param {ArrayBuffer} xsArg(0)
* @returns {string}
*/
void xs_utf8_decode(xsMachine *the)
{
char *data = xsToArrayBuffer(xsArg(0));
size_t size = xsGetArrayBufferLength(xsArg(0));
xsResult = xsStringBuffer(data, size + 1);
char *dest = xsToString(xsResult);
dest[size] = 0;
}

/**
* Encode string of text as utf-8 bytes
*
* @param {string} xsArg(0)
* @returns {ArrayBuffer}
*
* WARNING: returned ArrayBuffer will be "detatched" in the 0-length case.
**/
void xs_utf8_encode(xsMachine *the)
{
xsStringValue string = xsToString(xsArg(0));
int length = c_strlen(string);
xsResult = xsArrayBuffer(string, length);
}
72 changes: 72 additions & 0 deletions modules/data/text/text.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/**
* minimal TextDecoder
* No support for encodeInto.
*
* ref https://developer.mozilla.org/en-US/docs/Web/API/TextEncoder
*/
export class TextEncoder {
constructor() {

}
get encoding() {
return 'utf-8';
}
encode(s) {
if (typeof s !== 'string') {
throw new TypeError(typeof s);
}
let arrayBuffer;
let bytes;
// fxArrayBuffer only allocates a chunk if length > 0
// else new Uint8Array(enc.encode(""))
// throws new "detached buffer!"
if (s.length === 0) {
// arrayBuffer = undefined;
bytes = new Uint8Array();
} else {
arrayBuffer = utf8_encode(s);
bytes = new Uint8Array(arrayBuffer);
}
// trace(`encode ${JSON.stringify(s)} -> ArrayBuffer(${arrayBuffer ? arrayBuffer.byteLength : ''}) -> Uint8Array(${bytes.length})\n`);
return bytes;
}
}

const UTF8Names = ["unicode-1-1-utf-8", "utf-8", "utf8"];

/**
* minimal utf-8 TextDecoder
* no support for fatal, stream, etc.
*
* ref https://developer.mozilla.org/en-US/docs/Web/API/TextEncoder
*/
export class TextDecoder {
/**
* @param {string=} utfLabel optional name for UTF-8
* @param {*} options fatal is not supported
*/
constructor(utfLabel, options) {
if (utfLabel & !UTF8Names.includes(utfLabel)) {
throw new TypeError(utfLabel);
}
if (options && options.fatal) {
throw new TypeError('fatal not supported');
}
}
/**
* @param {Uint8Array} bytes
* @param {*} options stream is not supported
*/
decode(bytes, options) {
if (options && options.stream) {
throw new TypeError('stream is unsupported');
}
if (!(bytes instanceof Uint8Array)) {
throw new TypeError('arg must be Uint8Array');
}
return utf8_decode(bytes.buffer);
}
}

function utf8_encode(string) @ "xs_utf8_encode";
function utf8_decode(buffer) @ "xs_utf8_decode";

0 comments on commit 4c995ae

Please sign in to comment.