Skip to content

Commit

Permalink
Clojure: Improved tokenization (#3056)
Browse files Browse the repository at this point in the history
  • Loading branch information
RunDevelopment authored Sep 12, 2021
1 parent 148c1ec commit 8d0b74b
Show file tree
Hide file tree
Showing 8 changed files with 102 additions and 27 deletions.
30 changes: 24 additions & 6 deletions components/prism-clojure.js
Original file line number Diff line number Diff line change
@@ -1,16 +1,34 @@
// Copied from https://github.com/jeluard/prism-clojure
Prism.languages.clojure = {
'comment': /;.*/,
'string': {
pattern: /"(?:[^"\\]|\\.)*"/,
'comment': {
pattern: /;.*/,
greedy: true
},
'operator': /(?:::|[:|'])\b[a-z][\w*+!?-]*\b/i, //used for symbols and keywords
'string': [
{
pattern: /"(?:[^"\\]|\\.)*"/,
greedy: true
},
// characters
/\\\w+/
],
'symbol': {
pattern: /(^|[\s()\[\]{},])::?[\w*+!?'<>=/.-]+/,
lookbehind: true
},
'keyword': {
pattern: /([^\w+*'?-])(?:def|if|do|let|\.\.|quote|var|->>|->|fn|loop|recur|throw|try|monitor-enter|\.|new|set!|def-|defn|defn-|defmacro|defmulti|defmethod|defstruct|defonce|declare|definline|definterface|defprotocol|==|defrecord|>=|deftype|<=|defproject|ns|\*|\+|-|\/|<|=|>|accessor|agent|agent-errors|aget|alength|all-ns|alter|and|append-child|apply|array-map|aset|aset-boolean|aset-byte|aset-char|aset-double|aset-float|aset-int|aset-long|aset-short|assert|assoc|await|await-for|bean|binding|bit-and|bit-not|bit-or|bit-shift-left|bit-shift-right|bit-xor|boolean|branch\?|butlast|byte|cast|char|children|class|clear-agent-errors|comment|commute|comp|comparator|complement|concat|conj|cons|constantly|cond|if-not|construct-proxy|contains\?|count|create-ns|create-struct|cycle|dec|deref|difference|disj|dissoc|distinct|doall|doc|dorun|doseq|dosync|dotimes|doto|double|down|drop|drop-while|edit|end\?|ensure|eval|every\?|false\?|ffirst|file-seq|filter|find|find-doc|find-ns|find-var|first|float|flush|for|fnseq|frest|gensym|get-proxy-class|get|hash-map|hash-set|identical\?|identity|if-let|import|in-ns|inc|index|insert-child|insert-left|insert-right|inspect-table|inspect-tree|instance\?|int|interleave|intersection|into|into-array|iterate|join|key|keys|keyword|keyword\?|last|lazy-cat|lazy-cons|left|lefts|line-seq|list\*|list|load|load-file|locking|long|macroexpand|macroexpand-1|make-array|make-node|map|map-invert|map\?|mapcat|max|max-key|memfn|merge|merge-with|meta|min|min-key|name|namespace|neg\?|newline|next|nil\?|node|not|not-any\?|not-every\?|not=|ns-imports|ns-interns|ns-map|ns-name|ns-publics|ns-refers|ns-resolve|ns-unmap|nth|nthrest|or|parse|partial|path|peek|pop|pos\?|pr|pr-str|print|print-str|println|println-str|prn|prn-str|project|proxy|proxy-mappings|quot|rand|rand-int|range|re-find|re-groups|re-matcher|re-matches|re-pattern|re-seq|read|read-line|reduce|ref|ref-set|refer|rem|remove|remove-method|remove-ns|rename|rename-keys|repeat|replace|replicate|resolve|rest|resultset-seq|reverse|rfirst|right|rights|root|rrest|rseq|second|select|select-keys|send|send-off|seq|seq-zip|seq\?|set|short|slurp|some|sort|sort-by|sorted-map|sorted-map-by|sorted-set|special-symbol\?|split-at|split-with|str|string\?|struct|struct-map|subs|subvec|symbol|symbol\?|sync|take|take-nth|take-while|test|time|to-array|to-array-2d|tree-seq|true\?|union|up|update-proxy|val|vals|var-get|var-set|var\?|vector|vector-zip|vector\?|when|when-first|when-let|when-not|with-local-vars|with-meta|with-open|with-out-str|xml-seq|xml-zip|zero\?|zipmap|zipper)(?=[^\w+*'?-])/,
pattern: /(\()(?:-|->|->>|\.|\.\.|\*|\/|\+|<|<=|=|==|>|>=|accessor|agent|agent-errors|aget|alength|all-ns|alter|and|append-child|apply|array-map|aset|aset-boolean|aset-byte|aset-char|aset-double|aset-float|aset-int|aset-long|aset-short|assert|assoc|await|await-for|bean|binding|bit-and|bit-not|bit-or|bit-shift-left|bit-shift-right|bit-xor|boolean|branch\?|butlast|byte|cast|char|children|class|clear-agent-errors|comment|commute|comp|comparator|complement|concat|cond|conj|cons|constantly|construct-proxy|contains\?|count|create-ns|create-struct|cycle|dec|declare|def|def-|definline|definterface|defmacro|defmethod|defmulti|defn|defn-|defonce|defproject|defprotocol|defrecord|defstruct|deftype|deref|difference|disj|dissoc|distinct|do|doall|doc|dorun|doseq|dosync|dotimes|doto|double|down|drop|drop-while|edit|end\?|ensure|eval|every\?|false\?|ffirst|file-seq|filter|find|find-doc|find-ns|find-var|first|float|flush|fn|fnseq|for|frest|gensym|get|get-proxy-class|hash-map|hash-set|identical\?|identity|if|if-let|if-not|import|in-ns|inc|index|insert-child|insert-left|insert-right|inspect-table|inspect-tree|instance\?|int|interleave|intersection|into|into-array|iterate|join|key|keys|keyword|keyword\?|last|lazy-cat|lazy-cons|left|lefts|let|line-seq|list|list\*|load|load-file|locking|long|loop|macroexpand|macroexpand-1|make-array|make-node|map|map-invert|map\?|mapcat|max|max-key|memfn|merge|merge-with|meta|min|min-key|monitor-enter|name|namespace|neg\?|new|newline|next|nil\?|node|not|not-any\?|not-every\?|not=|ns|ns-imports|ns-interns|ns-map|ns-name|ns-publics|ns-refers|ns-resolve|ns-unmap|nth|nthrest|or|parse|partial|path|peek|pop|pos\?|pr|pr-str|print|print-str|println|println-str|prn|prn-str|project|proxy|proxy-mappings|quot|quote|rand|rand-int|range|re-find|re-groups|re-matcher|re-matches|re-pattern|re-seq|read|read-line|recur|reduce|ref|ref-set|refer|rem|remove|remove-method|remove-ns|rename|rename-keys|repeat|replace|replicate|resolve|rest|resultset-seq|reverse|rfirst|right|rights|root|rrest|rseq|second|select|select-keys|send|send-off|seq|seq-zip|seq\?|set|set!|short|slurp|some|sort|sort-by|sorted-map|sorted-map-by|sorted-set|special-symbol\?|split-at|split-with|str|string\?|struct|struct-map|subs|subvec|symbol|symbol\?|sync|take|take-nth|take-while|test|throw|time|to-array|to-array-2d|tree-seq|true\?|try|union|up|update-proxy|val|vals|var|var-get|var-set|var\?|vector|vector-zip|vector\?|when|when-first|when-let|when-not|with-local-vars|with-meta|with-open|with-out-str|xml-seq|xml-zip|zero\?|zipmap|zipper)(?=[\s)]|$)/,
lookbehind: true
},
'boolean': /\b(?:true|false|nil)\b/,
'number': /\b[\da-f]+\b/i,
'number': {
pattern: /(^|[^\w$@])(?:\d+(?:[/.]\d+)?(?:e[+-]?\d+)?|0x[a-f0-9]+|[1-9]\d?r[a-z0-9]+)[lmn]?(?![\w$@])/i,
lookbehind: true
},
'function': {
pattern: /((?:^|[^'])\()[\w*+!?'<>=/.-]+(?=[\s)]|$)/,
lookbehind: true
},
'operator': /[#@^`~]/,
'punctuation': /[{}\[\](),]/
};
2 changes: 1 addition & 1 deletion components/prism-clojure.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions tests/languages/clojure/function_feature.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
(foo args)

; not a function
'(a b c)

----------------------------------------------------

[
["punctuation", "("], ["function", "foo"], " args", ["punctuation", ")"],

["comment", "; not a function"],
"\r\n'", ["punctuation", "("], "a b c", ["punctuation", ")"]
]
27 changes: 27 additions & 0 deletions tests/languages/clojure/number_feature.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
123
01234
0xFFF
2r0101011
8r52
36r16
1.0
1M
2/3
0.6666666666666666
36786883868216818816N

----------------------------------------------------

[
["number", "123"],
["number", "01234"],
["number", "0xFFF"],
["number", "2r0101011"],
["number", "8r52"],
["number", "36r16"],
["number", "1.0"],
["number", "1M"],
["number", "2/3"],
["number", "0.6666666666666666"],
["number", "36786883868216818816N"]
]
20 changes: 0 additions & 20 deletions tests/languages/clojure/operator_and_punctuation.test

This file was deleted.

11 changes: 11 additions & 0 deletions tests/languages/clojure/operator_feature.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# @ ^ ` ~

----------------------------------------------------

[
["operator", "#"],
["operator", "@"],
["operator", "^"],
["operator", "`"],
["operator", "~"]
]
15 changes: 15 additions & 0 deletions tests/languages/clojure/punctuation_feature.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{ } [ ] ( )
,

----------------------------------------------------

[
["punctuation", "{"],
["punctuation", "}"],
["punctuation", "["],
["punctuation", "]"],
["punctuation", "("],
["punctuation", ")"],

["punctuation", ","]
]
11 changes: 11 additions & 0 deletions tests/languages/clojure/symbol_feature.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
:foo
:foo/bar-baz
::foo

----------------------------------------------------

[
["symbol", ":foo"],
["symbol", ":foo/bar-baz"],
["symbol", "::foo"]
]

0 comments on commit 8d0b74b

Please sign in to comment.