forked from Moddable-OpenSource/moddable
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge minimal utf-8 TextEncoder / TextDecoder
- Loading branch information
Showing
4 changed files
with
209 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
import { TextEncoder, TextDecoder } from "text"; | ||
|
||
const testCases = [ | ||
{ label: "empty", bytes: [], text: "" }, | ||
{ label: "euro", bytes: [226, 130, 172], text: "€" }, | ||
{ label: "CJK", bytes: [240, 160, 174, 183], text: "𠮷" }, | ||
{ | ||
label: "sample paragraph", | ||
text: "This is a sample paragraph.", | ||
bytes: [ | ||
84, | ||
104, | ||
105, | ||
115, | ||
32, | ||
105, | ||
115, | ||
32, | ||
97, | ||
32, | ||
115, | ||
97, | ||
109, | ||
112, | ||
108, | ||
101, | ||
32, | ||
112, | ||
97, | ||
114, | ||
97, | ||
103, | ||
114, | ||
97, | ||
112, | ||
104, | ||
46, | ||
], | ||
}, | ||
]; | ||
|
||
function traceln(s) { | ||
trace(s); | ||
trace("\n"); | ||
} | ||
|
||
function cmp(a, b) { | ||
if (a.length !== b.length) return false; | ||
for (let pos = 0; pos < a.length; pos++) { | ||
if (a[pos] !== b[pos]) return false; | ||
} | ||
return true; | ||
} | ||
|
||
function main() { | ||
traceln("hello!"); | ||
const enc = new TextEncoder(); | ||
const dec = new TextDecoder(); | ||
const data = enc.encode("blort!"); | ||
traceln(data); | ||
const text = dec.decode(data); | ||
traceln(text); | ||
|
||
for (const { label, bytes, text } of testCases) { | ||
const actual = enc.encode(text); | ||
const actualString = dec.decode(Uint8Array.from(bytes)); | ||
if (!cmp(actual, bytes)) { | ||
traceln( | ||
`FAIL: ${label}: expected ${JSON.stringify(bytes)} actual ${ | ||
actual.length | ||
} ${JSON.stringify(Array.from(actual))}` | ||
); | ||
} else if (actualString !== text) { | ||
traceln( | ||
`FAIL: ${label}: expected ${JSON.stringify( | ||
text | ||
)} actual ${JSON.stringify(Array.from(actualString))}` | ||
); | ||
} else { | ||
traceln(`PASS: ${label}`); | ||
} | ||
} | ||
} | ||
|
||
main(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
{ | ||
"include": [ | ||
"$(MODDABLE)/examples/manifest_base.json", | ||
], | ||
"modules": { | ||
"*": [ | ||
"./main", | ||
"$(MODDABLE)/modules/data/text/text", | ||
], | ||
}, | ||
"preload": [ | ||
], | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
/** | ||
* We take advantage of the internal representation of strings | ||
* so that conversion is just copying bytes. | ||
* | ||
* "A string value is a pointer to a UTF-8 C string." | ||
* -- https://github.com/Moddable-OpenSource/moddable/blob/public/documentation/xs/XS%20in%20C.md#strings | ||
**/ | ||
#include <stdlib.h> | ||
#include "xs.h" | ||
|
||
/** | ||
* Decode text from utf-8 to string | ||
* | ||
* @param {ArrayBuffer} xsArg(0) | ||
* @returns {string} | ||
*/ | ||
void xs_utf8_decode(xsMachine *the) | ||
{ | ||
char *data = xsToArrayBuffer(xsArg(0)); | ||
size_t size = xsGetArrayBufferLength(xsArg(0)); | ||
xsResult = xsStringBuffer(data, size + 1); | ||
char *dest = xsToString(xsResult); | ||
dest[size] = 0; | ||
} | ||
|
||
/** | ||
* Encode string of text as utf-8 bytes | ||
* | ||
* @param {string} xsArg(0) | ||
* @returns {ArrayBuffer} | ||
* | ||
* WARNING: returned ArrayBuffer will be "detatched" in the 0-length case. | ||
**/ | ||
void xs_utf8_encode(xsMachine *the) | ||
{ | ||
xsStringValue string = xsToString(xsArg(0)); | ||
int length = c_strlen(string); | ||
xsResult = xsArrayBuffer(string, length); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
/** | ||
* minimal TextDecoder | ||
* No support for encodeInto. | ||
* | ||
* ref https://developer.mozilla.org/en-US/docs/Web/API/TextEncoder | ||
*/ | ||
export class TextEncoder { | ||
constructor() { | ||
|
||
} | ||
get encoding() { | ||
return 'utf-8'; | ||
} | ||
encode(s) { | ||
if (typeof s !== 'string') { | ||
throw new TypeError(typeof s); | ||
} | ||
let arrayBuffer; | ||
let bytes; | ||
// fxArrayBuffer only allocates a chunk if length > 0 | ||
// else new Uint8Array(enc.encode("")) | ||
// throws new "detached buffer!" | ||
if (s.length === 0) { | ||
// arrayBuffer = undefined; | ||
bytes = new Uint8Array(); | ||
} else { | ||
arrayBuffer = utf8_encode(s); | ||
bytes = new Uint8Array(arrayBuffer); | ||
} | ||
// trace(`encode ${JSON.stringify(s)} -> ArrayBuffer(${arrayBuffer ? arrayBuffer.byteLength : ''}) -> Uint8Array(${bytes.length})\n`); | ||
return bytes; | ||
} | ||
} | ||
|
||
const UTF8Names = ["unicode-1-1-utf-8", "utf-8", "utf8"]; | ||
|
||
/** | ||
* minimal utf-8 TextDecoder | ||
* no support for fatal, stream, etc. | ||
* | ||
* ref https://developer.mozilla.org/en-US/docs/Web/API/TextEncoder | ||
*/ | ||
export class TextDecoder { | ||
/** | ||
* @param {string=} utfLabel optional name for UTF-8 | ||
* @param {*} options fatal is not supported | ||
*/ | ||
constructor(utfLabel, options) { | ||
if (utfLabel & !UTF8Names.includes(utfLabel)) { | ||
throw new TypeError(utfLabel); | ||
} | ||
if (options && options.fatal) { | ||
throw new TypeError('fatal not supported'); | ||
} | ||
} | ||
/** | ||
* @param {Uint8Array} bytes | ||
* @param {*} options stream is not supported | ||
*/ | ||
decode(bytes, options) { | ||
if (options && options.stream) { | ||
throw new TypeError('stream is unsupported'); | ||
} | ||
if (!(bytes instanceof Uint8Array)) { | ||
throw new TypeError('arg must be Uint8Array'); | ||
} | ||
return utf8_decode(bytes.buffer); | ||
} | ||
} | ||
|
||
function utf8_encode(string) @ "xs_utf8_encode"; | ||
function utf8_decode(buffer) @ "xs_utf8_decode"; |