From adc827765ee245a9a59c9c6696cecf7e5716f8f0 Mon Sep 17 00:00:00 2001 From: Mark Roberts Date: Thu, 7 Nov 2024 01:44:46 +0100 Subject: [PATCH] feat: implement `scanSync` (#6) --- binding.gyp | 3 +- src/addon.cc | 5 + src/lib/binding.ts | 15 ++ src/lib/index.ts | 1 + src/lib/tokens.ts | 536 +++++++++++++++++++++++++++++++++++++++++++++ src/sync.cc | 43 ++++ src/sync.h | 1 + 7 files changed, 603 insertions(+), 1 deletion(-) create mode 100644 src/lib/tokens.ts diff --git a/binding.gyp b/binding.gyp index e3afa26..da63a95 100644 --- a/binding.gyp +++ b/binding.gyp @@ -16,7 +16,8 @@ ], "include_dirs": [ " bindings('queryparser') @@ -19,6 +20,7 @@ let PgQuery: { callback: (err: Error | null, result: string) => void, ) => void fingerprintSync: (query: string) => string + scanSync: (query: string) => ScanResult splitWithScannerSync: (query: string) => { location: number length: number @@ -30,6 +32,15 @@ export type ParseResult = { version: number } +export interface Token { + kind: TokenKind + start: number + end: number + keyword: KeywordKind +} + +export type ScanResult = Token[] + export function parseQuery(query: string) { return new Promise((resolve, reject) => { PgQuery ??= loadAddon() @@ -69,6 +80,10 @@ export function fingerprintSync(query: string): string { return (PgQuery ??= loadAddon()).fingerprintSync(query) } +export function scanSync(query: string) { + return (PgQuery ??= loadAddon()).scanSync(query) +} + export function splitWithScannerSync(query: string) { return (PgQuery ??= loadAddon()).splitWithScannerSync(query) } diff --git a/src/lib/index.ts b/src/lib/index.ts index c744656..2964366 100644 --- a/src/lib/index.ts +++ b/src/lib/index.ts @@ -2,4 +2,5 @@ export * from './ast.js' export * from './binding.js' export * from './node.js' export * from './select.js' +export * from './tokens.js' export * from './walk.js' diff --git a/src/lib/tokens.ts b/src/lib/tokens.ts new file mode 100644 index 0000000..b07f4e4 --- /dev/null +++ b/src/lib/tokens.ts @@ -0,0 +1,536 @@ +// Synced from https://github.com/pganalyze/libpg_query/blob/16-latest/protobuf/pg_query.proto + +export type KeywordKind = + | 'NO_KEYWORD' + | 'UNRESERVED_KEYWORD' + | 'COL_NAME_KEYWORD' + | 'TYPE_FUNC_NAME_KEYWORD' + | 'RESERVED_KEYWORD' + +export type TokenKind = + | 'NUL' + // Single-character tokens that are returned 1:1 (identical with "self" list in scan.l) + // Either supporting syntax, or single-character operators (some can be both) + // Also see https://www.postgresql.org/docs/12/sql-syntax-lexical.html#SQL-SYNTAX-SPECIAL-CHARS + | 'ASCII_36' // "$" + | 'ASCII_37' // "%" + | 'ASCII_40' // "(" + | 'ASCII_41' // ")" + | 'ASCII_42' // "*" + | 'ASCII_43' // "+" + | 'ASCII_44' // "," + | 'ASCII_45' // "-" + | 'ASCII_46' // "." + | 'ASCII_47' // "/" + | 'ASCII_58' // ":" + | 'ASCII_59' // ";" + | 'ASCII_60' // "<" + | 'ASCII_61' // "=" + | 'ASCII_62' // ">" + | 'ASCII_63' // "?" + | 'ASCII_91' // "[" + | 'ASCII_92' // "\" + | 'ASCII_93' // "]" + | 'ASCII_94' // "^" + // Named tokens in scan.l + | 'IDENT' + | 'UIDENT' + | 'FCONST' + | 'SCONST' + | 'USCONST' + | 'BCONST' + | 'XCONST' + | 'Op' + | 'ICONST' + | 'PARAM' + | 'TYPECAST' + | 'DOT_DOT' + | 'COLON_EQUALS' + | 'EQUALS_GREATER' + | 'LESS_EQUALS' + | 'GREATER_EQUALS' + | 'NOT_EQUALS' + | 'SQL_COMMENT' + | 'C_COMMENT' + | 'ABORT_P' + | 'ABSENT' + | 'ABSOLUTE_P' + | 'ACCESS' + | 'ACTION' + | 'ADD_P' + | 'ADMIN' + | 'AFTER' + | 'AGGREGATE' + | 'ALL' + | 'ALSO' + | 'ALTER' + | 'ALWAYS' + | 'ANALYSE' + | 'ANALYZE' + | 'AND' + | 'ANY' + | 'ARRAY' + | 'AS' + | 'ASC' + | 'ASENSITIVE' + | 'ASSERTION' + | 'ASSIGNMENT' + | 'ASYMMETRIC' + | 'ATOMIC' + | 'AT' + | 'ATTACH' + | 'ATTRIBUTE' + | 'AUTHORIZATION' + | 'BACKWARD' + | 'BEFORE' + | 'BEGIN_P' + | 'BETWEEN' + | 'BIGINT' + | 'BINARY' + | 'BIT' + | 'BOOLEAN_P' + | 'BOTH' + | 'BREADTH' + | 'BY' + | 'CACHE' + | 'CALL' + | 'CALLED' + | 'CASCADE' + | 'CASCADED' + | 'CASE' + | 'CAST' + | 'CATALOG_P' + | 'CHAIN' + | 'CHAR_P' + | 'CHARACTER' + | 'CHARACTERISTICS' + | 'CHECK' + | 'CHECKPOINT' + | 'CLASS' + | 'CLOSE' + | 'CLUSTER' + | 'COALESCE' + | 'COLLATE' + | 'COLLATION' + | 'COLUMN' + | 'COLUMNS' + | 'COMMENT' + | 'COMMENTS' + | 'COMMIT' + | 'COMMITTED' + | 'COMPRESSION' + | 'CONCURRENTLY' + | 'CONFIGURATION' + | 'CONFLICT' + | 'CONNECTION' + | 'CONSTRAINT' + | 'CONSTRAINTS' + | 'CONTENT_P' + | 'CONTINUE_P' + | 'CONVERSION_P' + | 'COPY' + | 'COST' + | 'CREATE' + | 'CROSS' + | 'CSV' + | 'CUBE' + | 'CURRENT_P' + | 'CURRENT_CATALOG' + | 'CURRENT_DATE' + | 'CURRENT_ROLE' + | 'CURRENT_SCHEMA' + | 'CURRENT_TIME' + | 'CURRENT_TIMESTAMP' + | 'CURRENT_USER' + | 'CURSOR' + | 'CYCLE' + | 'DATA_P' + | 'DATABASE' + | 'DAY_P' + | 'DEALLOCATE' + | 'DEC' + | 'DECIMAL_P' + | 'DECLARE' + | 'DEFAULT' + | 'DEFAULTS' + | 'DEFERRABLE' + | 'DEFERRED' + | 'DEFINER' + | 'DELETE_P' + | 'DELIMITER' + | 'DELIMITERS' + | 'DEPENDS' + | 'DEPTH' + | 'DESC' + | 'DETACH' + | 'DICTIONARY' + | 'DISABLE_P' + | 'DISCARD' + | 'DISTINCT' + | 'DO' + | 'DOCUMENT_P' + | 'DOMAIN_P' + | 'DOUBLE_P' + | 'DROP' + | 'EACH' + | 'ELSE' + | 'ENABLE_P' + | 'ENCODING' + | 'ENCRYPTED' + | 'END_P' + | 'ENUM_P' + | 'ESCAPE' + | 'EVENT' + | 'EXCEPT' + | 'EXCLUDE' + | 'EXCLUDING' + | 'EXCLUSIVE' + | 'EXECUTE' + | 'EXISTS' + | 'EXPLAIN' + | 'EXPRESSION' + | 'EXTENSION' + | 'EXTERNAL' + | 'EXTRACT' + | 'FALSE_P' + | 'FAMILY' + | 'FETCH' + | 'FILTER' + | 'FINALIZE' + | 'FIRST_P' + | 'FLOAT_P' + | 'FOLLOWING' + | 'FOR' + | 'FORCE' + | 'FOREIGN' + | 'FORMAT' + | 'FORWARD' + | 'FREEZE' + | 'FROM' + | 'FULL' + | 'FUNCTION' + | 'FUNCTIONS' + | 'GENERATED' + | 'GLOBAL' + | 'GRANT' + | 'GRANTED' + | 'GREATEST' + | 'GROUP_P' + | 'GROUPING' + | 'GROUPS' + | 'HANDLER' + | 'HAVING' + | 'HEADER_P' + | 'HOLD' + | 'HOUR_P' + | 'IDENTITY_P' + | 'IF_P' + | 'ILIKE' + | 'IMMEDIATE' + | 'IMMUTABLE' + | 'IMPLICIT_P' + | 'IMPORT_P' + | 'IN_P' + | 'INCLUDE' + | 'INCLUDING' + | 'INCREMENT' + | 'INDENT' + | 'INDEX' + | 'INDEXES' + | 'INHERIT' + | 'INHERITS' + | 'INITIALLY' + | 'INLINE_P' + | 'INNER_P' + | 'INOUT' + | 'INPUT_P' + | 'INSENSITIVE' + | 'INSERT' + | 'INSTEAD' + | 'INT_P' + | 'INTEGER' + | 'INTERSECT' + | 'INTERVAL' + | 'INTO' + | 'INVOKER' + | 'IS' + | 'ISNULL' + | 'ISOLATION' + | 'JOIN' + | 'JSON' + | 'JSON_ARRAY' + | 'JSON_ARRAYAGG' + | 'JSON_OBJECT' + | 'JSON_OBJECTAGG' + | 'KEY' + | 'KEYS' + | 'LABEL' + | 'LANGUAGE' + | 'LARGE_P' + | 'LAST_P' + | 'LATERAL_P' + | 'LEADING' + | 'LEAKPROOF' + | 'LEAST' + | 'LEFT' + | 'LEVEL' + | 'LIKE' + | 'LIMIT' + | 'LISTEN' + | 'LOAD' + | 'LOCAL' + | 'LOCALTIME' + | 'LOCALTIMESTAMP' + | 'LOCATION' + | 'LOCK_P' + | 'LOCKED' + | 'LOGGED' + | 'MAPPING' + | 'MATCH' + | 'MATCHED' + | 'MATERIALIZED' + | 'MAXVALUE' + | 'MERGE' + | 'METHOD' + | 'MINUTE_P' + | 'MINVALUE' + | 'MODE' + | 'MONTH_P' + | 'MOVE' + | 'NAME_P' + | 'NAMES' + | 'NATIONAL' + | 'NATURAL' + | 'NCHAR' + | 'NEW' + | 'NEXT' + | 'NFC' + | 'NFD' + | 'NFKC' + | 'NFKD' + | 'NO' + | 'NONE' + | 'NORMALIZE' + | 'NORMALIZED' + | 'NOT' + | 'NOTHING' + | 'NOTIFY' + | 'NOTNULL' + | 'NOWAIT' + | 'NULL_P' + | 'NULLIF' + | 'NULLS_P' + | 'NUMERIC' + | 'OBJECT_P' + | 'OF' + | 'OFF' + | 'OFFSET' + | 'OIDS' + | 'OLD' + | 'ON' + | 'ONLY' + | 'OPERATOR' + | 'OPTION' + | 'OPTIONS' + | 'OR' + | 'ORDER' + | 'ORDINALITY' + | 'OTHERS' + | 'OUT_P' + | 'OUTER_P' + | 'OVER' + | 'OVERLAPS' + | 'OVERLAY' + | 'OVERRIDING' + | 'OWNED' + | 'OWNER' + | 'PARALLEL' + | 'PARAMETER' + | 'PARSER' + | 'PARTIAL' + | 'PARTITION' + | 'PASSING' + | 'PASSWORD' + | 'PLACING' + | 'PLANS' + | 'POLICY' + | 'POSITION' + | 'PRECEDING' + | 'PRECISION' + | 'PRESERVE' + | 'PREPARE' + | 'PREPARED' + | 'PRIMARY' + | 'PRIOR' + | 'PRIVILEGES' + | 'PROCEDURAL' + | 'PROCEDURE' + | 'PROCEDURES' + | 'PROGRAM' + | 'PUBLICATION' + | 'QUOTE' + | 'RANGE' + | 'READ' + | 'REAL' + | 'REASSIGN' + | 'RECHECK' + | 'RECURSIVE' + | 'REF_P' + | 'REFERENCES' + | 'REFERENCING' + | 'REFRESH' + | 'REINDEX' + | 'RELATIVE_P' + | 'RELEASE' + | 'RENAME' + | 'REPEATABLE' + | 'REPLACE' + | 'REPLICA' + | 'RESET' + | 'RESTART' + | 'RESTRICT' + | 'RETURN' + | 'RETURNING' + | 'RETURNS' + | 'REVOKE' + | 'RIGHT' + | 'ROLE' + | 'ROLLBACK' + | 'ROLLUP' + | 'ROUTINE' + | 'ROUTINES' + | 'ROW' + | 'ROWS' + | 'RULE' + | 'SAVEPOINT' + | 'SCALAR' + | 'SCHEMA' + | 'SCHEMAS' + | 'SCROLL' + | 'SEARCH' + | 'SECOND_P' + | 'SECURITY' + | 'SELECT' + | 'SEQUENCE' + | 'SEQUENCES' + | 'SERIALIZABLE' + | 'SERVER' + | 'SESSION' + | 'SESSION_USER' + | 'SET' + | 'SETS' + | 'SETOF' + | 'SHARE' + | 'SHOW' + | 'SIMILAR' + | 'SIMPLE' + | 'SKIP' + | 'SMALLINT' + | 'SNAPSHOT' + | 'SOME' + | 'SQL_P' + | 'STABLE' + | 'STANDALONE_P' + | 'START' + | 'STATEMENT' + | 'STATISTICS' + | 'STDIN' + | 'STDOUT' + | 'STORAGE' + | 'STORED' + | 'STRICT_P' + | 'STRIP_P' + | 'SUBSCRIPTION' + | 'SUBSTRING' + | 'SUPPORT' + | 'SYMMETRIC' + | 'SYSID' + | 'SYSTEM_P' + | 'SYSTEM_USER' + | 'TABLE' + | 'TABLES' + | 'TABLESAMPLE' + | 'TABLESPACE' + | 'TEMP' + | 'TEMPLATE' + | 'TEMPORARY' + | 'TEXT_P' + | 'THEN' + | 'TIES' + | 'TIME' + | 'TIMESTAMP' + | 'TO' + | 'TRAILING' + | 'TRANSACTION' + | 'TRANSFORM' + | 'TREAT' + | 'TRIGGER' + | 'TRIM' + | 'TRUE_P' + | 'TRUNCATE' + | 'TRUSTED' + | 'TYPE_P' + | 'TYPES_P' + | 'UESCAPE' + | 'UNBOUNDED' + | 'UNCOMMITTED' + | 'UNENCRYPTED' + | 'UNION' + | 'UNIQUE' + | 'UNKNOWN' + | 'UNLISTEN' + | 'UNLOGGED' + | 'UNTIL' + | 'UPDATE' + | 'USER' + | 'USING' + | 'VACUUM' + | 'VALID' + | 'VALIDATE' + | 'VALIDATOR' + | 'VALUE_P' + | 'VALUES' + | 'VARCHAR' + | 'VARIADIC' + | 'VARYING' + | 'VERBOSE' + | 'VERSION_P' + | 'VIEW' + | 'VIEWS' + | 'VOLATILE' + | 'WHEN' + | 'WHERE' + | 'WHITESPACE_P' + | 'WINDOW' + | 'WITH' + | 'WITHIN' + | 'WITHOUT' + | 'WORK' + | 'WRAPPER' + | 'WRITE' + | 'XML_P' + | 'XMLATTRIBUTES' + | 'XMLCONCAT' + | 'XMLELEMENT' + | 'XMLEXISTS' + | 'XMLFOREST' + | 'XMLNAMESPACES' + | 'XMLPARSE' + | 'XMLPI' + | 'XMLROOT' + | 'XMLSERIALIZE' + | 'XMLTABLE' + | 'YEAR_P' + | 'YES_P' + | 'ZONE' + | 'FORMAT_LA' + | 'NOT_LA' + | 'NULLS_LA' + | 'WITH_LA' + | 'WITHOUT_LA' + | 'MODE_TYPE_NAME' + | 'MODE_PLPGSQL_EXPR' + | 'MODE_PLPGSQL_ASSIGN1' + | 'MODE_PLPGSQL_ASSIGN2' + | 'MODE_PLPGSQL_ASSIGN3' + | 'UMINUS' diff --git a/src/sync.cc b/src/sync.cc index 9951079..6ff44ce 100644 --- a/src/sync.cc +++ b/src/sync.cc @@ -2,6 +2,7 @@ #include #include "sync.h" // NOLINT(build/include) #include "helpers.h" // NOLINT(build/include) +#include "protobuf/pg_query.pb-c.h" // NOLINT(build/include) Napi::String ParseQuerySync(const Napi::CallbackInfo& info) { std::string query = info[0].As(); @@ -24,6 +25,48 @@ Napi::String FingerprintSync(const Napi::CallbackInfo& info) { return FingerprintResult(info.Env(), result); } +Napi::Value ScanSync(const Napi::CallbackInfo& info) { + Napi::Env env = info.Env(); + + if (info.Length() < 1 || !info[0].IsString()) { + Napi::TypeError::New(env, "String expected").ThrowAsJavaScriptException(); + return env.Null(); + } + + std::string input = info[0].As().Utf8Value(); + PgQueryScanResult result = pg_query_scan(input.c_str()); + + if (result.error) { + Napi::Error::New(env, result.error->message).ThrowAsJavaScriptException(); + pg_query_free_scan_result(result); + return env.Null(); + } + + PgQuery__ScanResult* scan_result = pg_query__scan_result__unpack(NULL, result.pbuf.len, (const uint8_t *) result.pbuf.data); + + Napi::Array tokens = Napi::Array::New(env, scan_result->n_tokens); + + for (size_t i = 0; i < scan_result->n_tokens; i++) { + PgQuery__ScanToken* scan_token = scan_result->tokens[i]; + const ProtobufCEnumValue* token_kind = protobuf_c_enum_descriptor_get_value(&pg_query__token__descriptor, scan_token->token); + const ProtobufCEnumValue* keyword_kind = protobuf_c_enum_descriptor_get_value(&pg_query__keyword_kind__descriptor, scan_token->keyword_kind); + + Napi::Object token = Napi::Object::New(env); + token.Set("kind", token_kind->name); + token.Set("start", scan_token->start); + token.Set("end", scan_token->end); + token.Set("keyword", keyword_kind->name); + + tokens[i] = token; + } + + pg_query__scan_result__free_unpacked(scan_result, NULL); + + pg_query_free_scan_result(result); + + return tokens; +} + Napi::Value SplitWithScannerSync(const Napi::CallbackInfo& info) { Napi::Env env = info.Env(); diff --git a/src/sync.h b/src/sync.h index 8e98caa..5accd4b 100644 --- a/src/sync.h +++ b/src/sync.h @@ -3,4 +3,5 @@ Napi::String ParseQuerySync(const Napi::CallbackInfo& info); Napi::String ParsePlPgSQLSync(const Napi::CallbackInfo& info); Napi::String FingerprintSync(const Napi::CallbackInfo& info); +Napi::Value ScanSync(const Napi::CallbackInfo& info); Napi::Value SplitWithScannerSync(const Napi::CallbackInfo& info);