Skip to content

Commit

Permalink
Merge pull request #10 from danini-the-panini/performance
Browse files Browse the repository at this point in the history
Use codeUnits instead of runes
  • Loading branch information
danini-the-panini authored Jan 3, 2025
2 parents 0e2cb9c + bff9f04 commit 33b3728
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 113 deletions.
18 changes: 9 additions & 9 deletions lib/src/document.dart
Original file line number Diff line number Diff line change
Expand Up @@ -488,11 +488,11 @@ class _StringDumper {
String dump() {
if (_isBareIdentifier()) return _string;

return "\"${_string.runes.map(_escape).join('')}\"";
return "\"${_string.codeUnits.map(_escape).join('')}\"";
}

String _escape(int rune) {
switch (rune) {
String _escape(int code) {
switch (code) {
case 10:
return "\\n";
case 13:
Expand All @@ -508,15 +508,15 @@ class _StringDumper {
case 12:
return "\\f";
default:
return String.fromCharCode(rune);
return String.fromCharCode(code);
}
}

static final forbidden = [
...KdlTokenizer.symbols.keys.map((e) => e.runes.single),
...KdlTokenizer.whitespace.map((e) => e.runes.single),
...KdlTokenizer.newlines.map((e) => e.runes.single),
..."()[]/\\\"#".runes,
...KdlTokenizer.symbols.keys.map((e) => e.codeUnits.single),
...KdlTokenizer.whitespace.map((e) => e.codeUnits.single),
...KdlTokenizer.newlines.map((e) => e.codeUnits.single),
..."()[]/\\\"#".codeUnits,
...List.generate(0x20, (e) => e),
];

Expand All @@ -540,6 +540,6 @@ class _StringDumper {
return false;
}

return !_string.runes.any((c) => forbidden.contains(c));
return !_string.codeUnits.any((c) => forbidden.contains(c));
}
}
18 changes: 9 additions & 9 deletions lib/src/tokenizer.dart
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ class KdlTokenizer {
String nl = _expectNewline(_index + 3);
_context = _KdlTokenizerContext.multiLineString;
_buffer = '';
_traverse(3 + nl.runes.length);
_traverse(3 + nl.length);
} else {
_context = _KdlTokenizerContext.string;
_buffer = '';
Expand All @@ -315,7 +315,7 @@ class KdlTokenizer {
_context = _KdlTokenizerContext.multiLineRawstring;
_rawstringHashes = 1;
_buffer = '';
_traverse(4 + nl.runes.length);
_traverse(4 + nl.length);
continue;
} else {
_context = _KdlTokenizerContext.rawstring;
Expand All @@ -336,7 +336,7 @@ class KdlTokenizer {
String nl = _expectNewline(i + 3);
_context = _KdlTokenizerContext.multiLineRawstring;
_buffer = '';
_traverse(_rawstringHashes + 3 + nl.runes.length);
_traverse(_rawstringHashes + 3 + nl.length);
continue;
} else {
_context = _KdlTokenizerContext.rawstring;
Expand Down Expand Up @@ -408,7 +408,7 @@ class KdlTokenizer {
return KdlToken(symbols[c]!, c);
} else if (c == "\r" || newlines.contains(c)) {
String nl = _expectNewline(_index);
_traverse(nl.runes.length);
_traverse(nl.length);
return _token(KdlTerm.newline, nl);
} else if (c == "/") {
var n = _char(_index + 1);
Expand Down Expand Up @@ -710,10 +710,10 @@ class KdlTokenizer {
}

_char(int i) {
if (i < 0 || i >= _str.runes.length) {
if (i < 0 || i >= _str.length) {
return null;
}
var char = String.fromCharCode(_str.runes.elementAt(i));
var char = _str.substring(i, i+1);
if (_forbidden.contains(char)) {
_fail("Forbidden character: $char");
}
Expand Down Expand Up @@ -787,8 +787,8 @@ class KdlTokenizer {
return _token(KdlTerm.integer, _parseInteger(_munchUnderscores(s), 10));
} catch (e) {
if (_nonInitialIdentifierChars
.contains(String.fromCharCode(s.runes.first)) ||
s.runes.skip(1).any(
.contains(String.fromCharCode(s.codeUnitAt(0))) ||
s.codeUnits.skip(1).any(
(c) => _nonIdentifierChars.contains(String.fromCharCode(c)))) {
rethrow;
}
Expand Down Expand Up @@ -1081,7 +1081,7 @@ class KdlV1Tokenizer extends KdlTokenizer {
return _token(_symbols[c]!, c);
} else if (c == "\r" || _newlines.contains(c)) {
String nl = _expectNewline(_index);
_traverse(nl.runes.length);
_traverse(nl.length);
return _token(KdlTerm.newline, nl);
} else if (c == "/") {
var n = _char(_index + 1);
Expand Down
6 changes: 3 additions & 3 deletions lib/src/types/email.dart
Original file line number Diff line number Diff line change
Expand Up @@ -167,11 +167,11 @@ class _EmailTokenizer {

String _substring(int start, [int? end]) {
return String.fromCharCodes(
_string.runes.toList().sublist(start, end ?? _length(_string)));
_string.codeUnits.sublist(start, end ?? _length(_string)));
}

int _length(String str) {
return str.runes.length;
return str.length;
}

_EmailToken nextToken() {
Expand Down Expand Up @@ -246,5 +246,5 @@ class _EmailTokenizer {

RegExp _localPartChars() => _idn ? _localPartIdn : _localPartAscii;

String _charAt(int i) => String.fromCharCode(_string.runes.elementAt(i));
String _charAt(int i) => String.fromCharCode(_string.codeUnitAt(i));
}
97 changes: 6 additions & 91 deletions lib/src/types/irl/parser.dart
Original file line number Diff line number Diff line change
Expand Up @@ -36,95 +36,10 @@ class IrlParser {
r"^(?:(?:([a-z][a-z0-9+.\-]+)):\/\/([^@]+@)?([^\/?#]+)?)?(\/?[^?#]*)?(?:\?([^#]*))?(?:#(.*))?$",
caseSensitive: false);

static const _reservedUrlChars = [
'!',
'#',
'&',
"'",
'(',
')',
'*',
'+',
',',
'/',
':',
';',
'=',
'?',
'@',
'[',
']',
'%'
];
static const _unreservedUrlChars = [
'A',
'B',
'C',
'D',
'E',
'F',
'G',
'H',
'I',
'J',
'K',
'L',
'M',
'N',
'O',
'P',
'Q',
'R',
'S',
'T',
'U',
'V',
'W',
'X',
'Y',
'Z',
'a',
'b',
'c',
'd',
'e',
'f',
'g',
'h',
'i',
'j',
'k',
'l',
'm',
'n',
'o',
'p',
'q',
'r',
's',
't',
'u',
'v',
'w',
'x',
'y',
'z',
'0',
'1',
'2',
'3',
'4',
'5',
'6',
'7',
'8',
'9',
'-',
'_',
'.',
'~'
];
static final _urlChars = _reservedUrlChars + _unreservedUrlChars;
static const _reservedUrlChars = "!#&'()*+,/:;=?@[]%";
static const _unreservedUrlChars =
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~';
static final _urlChars = (_reservedUrlChars + _unreservedUrlChars).codeUnits;

final String _string;
final bool _isReference;
Expand Down Expand Up @@ -199,8 +114,8 @@ class IrlParser {
static bool _isValidUrlPart(String? string) {
if (string == null) return true;

return !string.runes.any((rune) =>
rune <= 127 && !_urlChars.contains(String.fromCharCode(rune)));
return !string.codeUnits
.any((code) => code <= 127 && !_urlChars.contains(code));
}

static String? _encode(String? string) {
Expand Down
2 changes: 1 addition & 1 deletion test/tokenizer_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ smile "😁"
tokenizer.nextToken(), equals(KdlToken(KdlTerm.whitespace, ' ', 1, 6)));
expect(tokenizer.nextToken(), equals(KdlToken(KdlTerm.string, '😁', 1, 7)));
expect(
tokenizer.nextToken(), equals(KdlToken(KdlTerm.newline, "\n", 1, 10)));
tokenizer.nextToken(), equals(KdlToken(KdlTerm.newline, "\n", 1, 11)));
expect(tokenizer.nextToken(), equals(KdlToken(KdlTerm.ident, 'ノード', 2, 1)));
expect(
tokenizer.nextToken(), equals(KdlToken(KdlTerm.whitespace, ' ', 2, 4)));
Expand Down

0 comments on commit 33b3728

Please sign in to comment.