Skip to content

Commit

Permalink
feat: initial KMP XML reader implementation (#601)
Browse files Browse the repository at this point in the history
  • Loading branch information
ianbotsf authored Mar 14, 2022
1 parent d7639f7 commit 476a9de
Show file tree
Hide file tree
Showing 8 changed files with 998 additions and 56 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@

package aws.smithy.kotlin.runtime.serde.xml

import aws.smithy.kotlin.runtime.serde.xml.tokenization.StringTextStream
import aws.smithy.kotlin.runtime.serde.xml.tokenization.LexingXmlStreamReader
import aws.smithy.kotlin.runtime.serde.xml.tokenization.XmlLexer

/**
* Provides stream-style access to an XML payload. This abstraction
* supports the ability to look ahead an arbitrary number of elements. It can also
Expand All @@ -22,7 +26,7 @@ interface XmlStreamReader {
/**
* The subtree's minimum depth is the same as the current node depth + 1.
*/
CHILD
CHILD,
}
/**
* Return the last token that was consumed by the reader.
Expand All @@ -39,7 +43,7 @@ interface XmlStreamReader {
/**
* Return the next actionable token or null if stream is exhausted.
*
* @throws XmlGenerationException upon any error.
* @throws [aws.smithy.kotlin.runtime.serde.DeserializationException] upon any error.
*/
fun nextToken(): XmlToken?

Expand All @@ -63,17 +67,23 @@ interface XmlStreamReader {
*/
inline fun <reified T : XmlToken> XmlStreamReader.seek(selectionPredicate: (T) -> Boolean = { true }): T? {
var token: XmlToken? = lastToken
var foundMatch = false

while (token != null && !foundMatch) {
foundMatch = if (token is T) selectionPredicate.invoke(token) else false
do {
val foundMatch = if (token is T) selectionPredicate.invoke(token) else false
if (!foundMatch) token = nextToken()
}
} while (token != null && !foundMatch)

return token as T?
}

/*
* Creates an [XmlStreamReader] instance
*/
expect fun xmlStreamReader(payload: ByteArray): XmlStreamReader
/**
* Creates an [XmlStreamReader] instance
*/
fun xmlStreamReader(payload: ByteArray): XmlStreamReader {
val stream = StringTextStream(payload.decodeToString())
val lexer = XmlLexer(stream)
return LexingXmlStreamReader(lexer)
}

// TODO remove me!
expect fun xmlPull(payload: ByteArray): XmlStreamReader
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
package aws.smithy.kotlin.runtime.serde.xml

/**
* Raw tokens produced when reading a XML document as a stream
* Raw tokens produced when reading an XML document as a stream
*/
sealed class XmlToken {

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0.
*/
package aws.smithy.kotlin.runtime.serde.xml.tokenization

import aws.smithy.kotlin.runtime.serde.xml.XmlToken

/**
* Describes the internal state of an [XmlLexer].
*/
internal sealed class LexerState {
/**
* The node depth at which the lexer is parsing tokens. Like the concept of depth in [XmlToken], this depth is 1 at
* the root (but 0 outside the root).
*/
abstract val depth: Int

/**
* The initial state at the beginning of a document before reading any tags, DTD, or prolog.
*/
object Initial : LexerState() {
override val depth = 0
}

/**
* The lexer is expecting the root tag next.
*/
object BeforeRootTag : LexerState() {
override val depth = 0
}

/**
* Describes the state of being inside a tag.
*/
sealed class Tag : LexerState() {
override val depth: Int by lazy { (parent?.depth ?: 0) + 1 }

abstract val name: XmlToken.QualifiedName
abstract val parent: OpenTag?

/**
* The lexer is inside a tag. The next close tag should match the name of this tag.
*/
data class OpenTag(
override val name: XmlToken.QualifiedName,
override val parent: OpenTag?,
val seenChildren: Boolean,
) : Tag()

/**
* The lexer has read a self-closing tag (e.g., '<foo />') but only returned the [XmlToken.BeginElement] token
* to the caller. The subsequent [XmlLexer.parseNext] call will return an [XmlToken.EndElement] without
* actually reading more from the source.
*/
data class EmptyTag(override val name: XmlToken.QualifiedName, override val parent: OpenTag?) : Tag()
}

/**
* The end of the document is reached. No more data is available.
*/
object EndOfDocument : LexerState() {
override val depth = 0
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0.
*/
package aws.smithy.kotlin.runtime.serde.xml.tokenization

import aws.smithy.kotlin.runtime.serde.DeserializationException
import aws.smithy.kotlin.runtime.serde.xml.XmlStreamReader
import aws.smithy.kotlin.runtime.serde.xml.XmlToken
import aws.smithy.kotlin.runtime.serde.xml.terminates

/**
* An [XmlStreamReader] that provides [XmlToken] elements from an [XmlLexer]. This class internally maintains a peek
* state, [lastToken], etc., but delegates all parsing operations to the scanner.
* @param source The [XmlLexer] to use for XML parsing.
*/
class LexingXmlStreamReader(private val source: XmlLexer) : XmlStreamReader {
private val peekQueue = ArrayDeque<XmlToken>()

/**
* Throws a [DeserializationException] with the given message and location string.
* @param msg The error message to include with the exception.
*/
@Suppress("NOTHING_TO_INLINE")
internal inline fun error(msg: String): Nothing = source.error(msg)

override var lastToken: XmlToken? = null
private set

override fun nextToken(): XmlToken? =
(peekQueue.removeFirstOrNull() ?: source.parseNext()).also { lastToken = it }

override fun peek(index: Int): XmlToken? {
while (index > peekQueue.size && !source.endOfDocument) {
peekQueue.addLast(source.parseNext()!!)
}
return peekQueue.getOrNull(index - 1)
}

override fun skipNext() {
val peekToken = peek(1) ?: return
val startDepth = peekToken.depth

tailrec fun scanUntilDepth(from: XmlToken?) {
when {
// TODO Is EndDocument actually returned in the XmlStreamReaderXmlPull implementation? If not, remove...
from == null || from is XmlToken.EndDocument -> return // End of document
from is XmlToken.EndElement && from.depth == startDepth -> return // Returned to original start depth
else -> scanUntilDepth(nextToken()) // Keep scannin'!
}
}

scanUntilDepth(nextToken())
}

override fun subTreeReader(subtreeStartDepth: XmlStreamReader.SubtreeStartDepth): XmlStreamReader =
if (peek(1).terminates(lastToken)) {
// Special case—return an empty subtree _and_ advance the token.
nextToken()
EmptyXmlStreamReader(this)
} else {
ChildXmlStreamReader(this, subtreeStartDepth)
}
}

/**
* A child (i.e., subtree) XML stream reader that terminates after returning to the depth at which it started.
* @param parent The [LexingXmlStreamReader] upon which this child reader is based.
* @param subtreeStartDepth The depth termination method.
*/
private class ChildXmlStreamReader(
private val parent: LexingXmlStreamReader,
private val subtreeStartDepth: XmlStreamReader.SubtreeStartDepth,
) : XmlStreamReader {
override val lastToken: XmlToken?
get() = parent.lastToken

private val minimumDepth = when (subtreeStartDepth) {
XmlStreamReader.SubtreeStartDepth.CHILD -> lastToken?.depth?.plus(1)
XmlStreamReader.SubtreeStartDepth.CURRENT -> lastToken?.depth
} ?: error("Unable to determine depth of last node")

/**
* Throws a [DeserializationException] with the given message and location string.
* @param msg The error message to include with the exception.
*/
@Suppress("NOTHING_TO_INLINE")
inline fun error(msg: String): Nothing = parent.error(msg)

override fun nextToken(): XmlToken? {
val next = parent.peek(1) ?: return null

val peekToken = when {
subtreeStartDepth == XmlStreamReader.SubtreeStartDepth.CHILD && next.depth < minimumDepth -> {
val subsequent = parent.peek(2) ?: return null
if (subsequent.depth >= minimumDepth) parent.nextToken()
subsequent
}
else -> next
}

return if (peekToken.depth >= minimumDepth) parent.nextToken() else null
}

override fun peek(index: Int): XmlToken? {
val peekToken = parent.peek(index) ?: return null
return if (peekToken.depth >= minimumDepth) peekToken else null
}

override fun skipNext() = parent.skipNext()

override fun subTreeReader(subtreeStartDepth: XmlStreamReader.SubtreeStartDepth): XmlStreamReader =
parent.subTreeReader(subtreeStartDepth)
}

/**
* An empty XML stream reader that trivially returns `null` for all [nextToken] and [peek] invocations.
* @param parent The [LexingXmlStreamReader] on which this child reader is based.
*/
private class EmptyXmlStreamReader(private val parent: XmlStreamReader) : XmlStreamReader {
override val lastToken: XmlToken?
get() = parent.lastToken

override fun nextToken(): XmlToken? = null

override fun peek(index: Int): XmlToken? = null

override fun skipNext() = Unit

override fun subTreeReader(subtreeStartDepth: XmlStreamReader.SubtreeStartDepth): XmlStreamReader = this
}

private fun <T> List<T>.getOrNull(index: Int): T? = if (index < size) this[index] else null
Loading

0 comments on commit 476a9de

Please sign in to comment.