-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: initial KMP XML reader implementation (#601)
- Loading branch information
Showing
8 changed files
with
998 additions
and
56 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
65 changes: 65 additions & 0 deletions
65
...serde/serde-xml/common/src/aws/smithy/kotlin/runtime/serde/xml/tokenization/LexerState.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
/* | ||
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
* SPDX-License-Identifier: Apache-2.0. | ||
*/ | ||
package aws.smithy.kotlin.runtime.serde.xml.tokenization | ||
|
||
import aws.smithy.kotlin.runtime.serde.xml.XmlToken | ||
|
||
/** | ||
* Describes the internal state of an [XmlLexer]. | ||
*/ | ||
internal sealed class LexerState { | ||
/** | ||
* The node depth at which the lexer is parsing tokens. Like the concept of depth in [XmlToken], this depth is 1 at | ||
* the root (but 0 outside the root). | ||
*/ | ||
abstract val depth: Int | ||
|
||
/** | ||
* The initial state at the beginning of a document before reading any tags, DTD, or prolog. | ||
*/ | ||
object Initial : LexerState() { | ||
override val depth = 0 | ||
} | ||
|
||
/** | ||
* The lexer is expecting the root tag next. | ||
*/ | ||
object BeforeRootTag : LexerState() { | ||
override val depth = 0 | ||
} | ||
|
||
/** | ||
* Describes the state of being inside a tag. | ||
*/ | ||
sealed class Tag : LexerState() { | ||
override val depth: Int by lazy { (parent?.depth ?: 0) + 1 } | ||
|
||
abstract val name: XmlToken.QualifiedName | ||
abstract val parent: OpenTag? | ||
|
||
/** | ||
* The lexer is inside a tag. The next close tag should match the name of this tag. | ||
*/ | ||
data class OpenTag( | ||
override val name: XmlToken.QualifiedName, | ||
override val parent: OpenTag?, | ||
val seenChildren: Boolean, | ||
) : Tag() | ||
|
||
/** | ||
* The lexer has read a self-closing tag (e.g., '<foo />') but only returned the [XmlToken.BeginElement] token | ||
* to the caller. The subsequent [XmlLexer.parseNext] call will return an [XmlToken.EndElement] without | ||
* actually reading more from the source. | ||
*/ | ||
data class EmptyTag(override val name: XmlToken.QualifiedName, override val parent: OpenTag?) : Tag() | ||
} | ||
|
||
/** | ||
* The end of the document is reached. No more data is available. | ||
*/ | ||
object EndOfDocument : LexerState() { | ||
override val depth = 0 | ||
} | ||
} |
133 changes: 133 additions & 0 deletions
133
...-xml/common/src/aws/smithy/kotlin/runtime/serde/xml/tokenization/LexingXmlStreamReader.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
/* | ||
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
* SPDX-License-Identifier: Apache-2.0. | ||
*/ | ||
package aws.smithy.kotlin.runtime.serde.xml.tokenization | ||
|
||
import aws.smithy.kotlin.runtime.serde.DeserializationException | ||
import aws.smithy.kotlin.runtime.serde.xml.XmlStreamReader | ||
import aws.smithy.kotlin.runtime.serde.xml.XmlToken | ||
import aws.smithy.kotlin.runtime.serde.xml.terminates | ||
|
||
/** | ||
* An [XmlStreamReader] that provides [XmlToken] elements from an [XmlLexer]. This class internally maintains a peek | ||
* state, [lastToken], etc., but delegates all parsing operations to the scanner. | ||
* @param source The [XmlLexer] to use for XML parsing. | ||
*/ | ||
class LexingXmlStreamReader(private val source: XmlLexer) : XmlStreamReader { | ||
private val peekQueue = ArrayDeque<XmlToken>() | ||
|
||
/** | ||
* Throws a [DeserializationException] with the given message and location string. | ||
* @param msg The error message to include with the exception. | ||
*/ | ||
@Suppress("NOTHING_TO_INLINE") | ||
internal inline fun error(msg: String): Nothing = source.error(msg) | ||
|
||
override var lastToken: XmlToken? = null | ||
private set | ||
|
||
override fun nextToken(): XmlToken? = | ||
(peekQueue.removeFirstOrNull() ?: source.parseNext()).also { lastToken = it } | ||
|
||
override fun peek(index: Int): XmlToken? { | ||
while (index > peekQueue.size && !source.endOfDocument) { | ||
peekQueue.addLast(source.parseNext()!!) | ||
} | ||
return peekQueue.getOrNull(index - 1) | ||
} | ||
|
||
override fun skipNext() { | ||
val peekToken = peek(1) ?: return | ||
val startDepth = peekToken.depth | ||
|
||
tailrec fun scanUntilDepth(from: XmlToken?) { | ||
when { | ||
// TODO Is EndDocument actually returned in the XmlStreamReaderXmlPull implementation? If not, remove... | ||
from == null || from is XmlToken.EndDocument -> return // End of document | ||
from is XmlToken.EndElement && from.depth == startDepth -> return // Returned to original start depth | ||
else -> scanUntilDepth(nextToken()) // Keep scannin'! | ||
} | ||
} | ||
|
||
scanUntilDepth(nextToken()) | ||
} | ||
|
||
override fun subTreeReader(subtreeStartDepth: XmlStreamReader.SubtreeStartDepth): XmlStreamReader = | ||
if (peek(1).terminates(lastToken)) { | ||
// Special case—return an empty subtree _and_ advance the token. | ||
nextToken() | ||
EmptyXmlStreamReader(this) | ||
} else { | ||
ChildXmlStreamReader(this, subtreeStartDepth) | ||
} | ||
} | ||
|
||
/** | ||
* A child (i.e., subtree) XML stream reader that terminates after returning to the depth at which it started. | ||
* @param parent The [LexingXmlStreamReader] upon which this child reader is based. | ||
* @param subtreeStartDepth The depth termination method. | ||
*/ | ||
private class ChildXmlStreamReader( | ||
private val parent: LexingXmlStreamReader, | ||
private val subtreeStartDepth: XmlStreamReader.SubtreeStartDepth, | ||
) : XmlStreamReader { | ||
override val lastToken: XmlToken? | ||
get() = parent.lastToken | ||
|
||
private val minimumDepth = when (subtreeStartDepth) { | ||
XmlStreamReader.SubtreeStartDepth.CHILD -> lastToken?.depth?.plus(1) | ||
XmlStreamReader.SubtreeStartDepth.CURRENT -> lastToken?.depth | ||
} ?: error("Unable to determine depth of last node") | ||
|
||
/** | ||
* Throws a [DeserializationException] with the given message and location string. | ||
* @param msg The error message to include with the exception. | ||
*/ | ||
@Suppress("NOTHING_TO_INLINE") | ||
inline fun error(msg: String): Nothing = parent.error(msg) | ||
|
||
override fun nextToken(): XmlToken? { | ||
val next = parent.peek(1) ?: return null | ||
|
||
val peekToken = when { | ||
subtreeStartDepth == XmlStreamReader.SubtreeStartDepth.CHILD && next.depth < minimumDepth -> { | ||
val subsequent = parent.peek(2) ?: return null | ||
if (subsequent.depth >= minimumDepth) parent.nextToken() | ||
subsequent | ||
} | ||
else -> next | ||
} | ||
|
||
return if (peekToken.depth >= minimumDepth) parent.nextToken() else null | ||
} | ||
|
||
override fun peek(index: Int): XmlToken? { | ||
val peekToken = parent.peek(index) ?: return null | ||
return if (peekToken.depth >= minimumDepth) peekToken else null | ||
} | ||
|
||
override fun skipNext() = parent.skipNext() | ||
|
||
override fun subTreeReader(subtreeStartDepth: XmlStreamReader.SubtreeStartDepth): XmlStreamReader = | ||
parent.subTreeReader(subtreeStartDepth) | ||
} | ||
|
||
/** | ||
* An empty XML stream reader that trivially returns `null` for all [nextToken] and [peek] invocations. | ||
* @param parent The [LexingXmlStreamReader] on which this child reader is based. | ||
*/ | ||
private class EmptyXmlStreamReader(private val parent: XmlStreamReader) : XmlStreamReader { | ||
override val lastToken: XmlToken? | ||
get() = parent.lastToken | ||
|
||
override fun nextToken(): XmlToken? = null | ||
|
||
override fun peek(index: Int): XmlToken? = null | ||
|
||
override fun skipNext() = Unit | ||
|
||
override fun subTreeReader(subtreeStartDepth: XmlStreamReader.SubtreeStartDepth): XmlStreamReader = this | ||
} | ||
|
||
private fun <T> List<T>.getOrNull(index: Int): T? = if (index < size) this[index] else null |
Oops, something went wrong.