From e353c9b5dbf7efacd6aac36179a5a89d3b47ec5c Mon Sep 17 00:00:00 2001
From: woile <woile@users.noreply.github.com>
Date: Wed, 27 Nov 2024 13:34:17 +0000
Subject: [PATCH] =?UTF-8?q?Deploy=20preview=20for=20PR=20248=20?=
 =?UTF-8?q?=F0=9F=9B=AB?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pr-preview/pr-248/middleware/index.html    | 56 ++++++++++++----------
 pr-preview/pr-248/search/search_index.json |  2 +-
 pr-preview/pr-248/stream/index.html        | 10 ++--
 3 files changed, 38 insertions(+), 30 deletions(-)
diff --git a/pr-preview/pr-248/middleware/index.html b/pr-preview/pr-248/middleware/index.html
index fffeee98..94f48899 100644
--- a/pr-preview/pr-248/middleware/index.html
+++ b/pr-preview/pr-248/middleware/index.html
@@ -816,7 +816,15 @@ <h1 id="middleware">Middleware</h1>
 <span class="normal">25</span>
 <span class="normal">26</span>
 <span class="normal">27</span>
-<span class="normal">28</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">class</span> <span class="nc">MiddlewareProtocol</span><span class="p">(</span><span class="n">typing</span><span class="o">.</span><span class="n">Protocol</span><span class="p">):</span>
+<span class="normal">28</span>
+<span class="normal">29</span>
+<span class="normal">30</span>
+<span class="normal">31</span>
+<span class="normal">32</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">class</span> <span class="nc">MiddlewareProtocol</span><span class="p">(</span><span class="n">typing</span><span class="o">.</span><span class="n">Protocol</span><span class="p">):</span>
+    <span class="n">next_call</span><span class="p">:</span> <span class="n">types</span><span class="o">.</span><span class="n">NextMiddlewareCall</span>
+    <span class="n">send</span><span class="p">:</span> <span class="n">types</span><span class="o">.</span><span class="n">Send</span>
+    <span class="n">stream</span><span class="p">:</span> <span class="s2">&quot;Stream&quot;</span>
+
     <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
         <span class="bp">self</span><span class="p">,</span>
         <span class="o">*</span><span class="p">,</span>
@@ -946,15 +954,7 @@ <h2 id="default-middleware">Default Middleware</h2>
 
               <details class="quote">
                 <summary>Source code in <code>kstreams/middleware/middleware.py</code></summary>
-                <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 63</span>
-<span class="normal"> 64</span>
-<span class="normal"> 65</span>
-<span class="normal"> 66</span>
-<span class="normal"> 67</span>
-<span class="normal"> 68</span>
-<span class="normal"> 69</span>
-<span class="normal"> 70</span>
-<span class="normal"> 71</span>
+                <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 71</span>
 <span class="normal"> 72</span>
 <span class="normal"> 73</span>
 <span class="normal"> 74</span>
@@ -1030,7 +1030,15 @@ <h2 id="default-middleware">Default Middleware</h2>
 <span class="normal">144</span>
 <span class="normal">145</span>
 <span class="normal">146</span>
-<span class="normal">147</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">class</span> <span class="nc">ExceptionMiddleware</span><span class="p">(</span><span class="n">BaseMiddleware</span><span class="p">):</span>
+<span class="normal">147</span>
+<span class="normal">148</span>
+<span class="normal">149</span>
+<span class="normal">150</span>
+<span class="normal">151</span>
+<span class="normal">152</span>
+<span class="normal">153</span>
+<span class="normal">154</span>
+<span class="normal">155</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">class</span> <span class="nc">ExceptionMiddleware</span><span class="p">(</span><span class="n">BaseMiddleware</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    This is always the first Middleware in the middleware stack</span>
 <span class="sd">    to catch any exception that might occur. Any exception raised</span>
@@ -1062,7 +1070,7 @@ <h2 id="default-middleware">Default Middleware</h2>
 
     <span class="k">async</span> <span class="k">def</span> <span class="nf">cleanup_policy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">exc</span><span class="p">:</span> <span class="ne">Exception</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
 <span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
-<span class="sd">        Execute clenup policicy according to the Stream configuration.</span>
+<span class="sd">        Execute cleanup policy according to the Stream configuration.</span>
 
 <span class="sd">        At this point we are inside the asyncio.Lock `is_processing`</span>
 <span class="sd">        as an event is being processed and an exeption has occured.</span>
@@ -1145,7 +1153,7 @@ <h2 id="kstreams.middleware.middleware.ExceptionMiddleware.cleanup_policy" class
 
     <div class="doc doc-contents ">
 
-        <p>Execute clenup policicy according to the Stream configuration.</p>
+        <p>Execute cleanup policy according to the Stream configuration.</p>
 <p>At this point we are inside the asyncio.Lock <code>is_processing</code>
 as an event is being processed and an exeption has occured.
 The Lock must be released to stop the Stream
@@ -1224,15 +1232,7 @@ <h2 id="kstreams.middleware.middleware.ExceptionMiddleware.cleanup_policy" class
 
             <details class="quote">
               <summary>Source code in <code>kstreams/middleware/middleware.py</code></summary>
-              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 93</span>
-<span class="normal"> 94</span>
-<span class="normal"> 95</span>
-<span class="normal"> 96</span>
-<span class="normal"> 97</span>
-<span class="normal"> 98</span>
-<span class="normal"> 99</span>
-<span class="normal">100</span>
-<span class="normal">101</span>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">101</span>
 <span class="normal">102</span>
 <span class="normal">103</span>
 <span class="normal">104</span>
@@ -1278,9 +1278,17 @@ <h2 id="kstreams.middleware.middleware.ExceptionMiddleware.cleanup_policy" class
 <span class="normal">144</span>
 <span class="normal">145</span>
 <span class="normal">146</span>
-<span class="normal">147</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">async</span> <span class="k">def</span> <span class="nf">cleanup_policy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">exc</span><span class="p">:</span> <span class="ne">Exception</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+<span class="normal">147</span>
+<span class="normal">148</span>
+<span class="normal">149</span>
+<span class="normal">150</span>
+<span class="normal">151</span>
+<span class="normal">152</span>
+<span class="normal">153</span>
+<span class="normal">154</span>
+<span class="normal">155</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">async</span> <span class="k">def</span> <span class="nf">cleanup_policy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">exc</span><span class="p">:</span> <span class="ne">Exception</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-<span class="sd">    Execute clenup policicy according to the Stream configuration.</span>
+<span class="sd">    Execute cleanup policy according to the Stream configuration.</span>
 
 <span class="sd">    At this point we are inside the asyncio.Lock `is_processing`</span>
 <span class="sd">    as an event is being processed and an exeption has occured.</span>
diff --git a/pr-preview/pr-248/search/search_index.json b/pr-preview/pr-248/search/search_index.json
index 1f10bbfd..8e628fd7 100644
--- a/pr-preview/pr-248/search/search_index.json
+++ b/pr-preview/pr-248/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Kstreams","text":"<p><code>kstreams</code> is a library/micro framework to use with <code>kafka</code>. It has simple kafka streams implementation that gives certain guarantees, see below.</p> <p> </p>"},{"location":"#requirements","title":"Requirements","text":"<p>python 3.8+</p>"},{"location":"#installation","title":"Installation","text":"<pre><code>pip install kstreams\n</code></pre> <p>You will need a worker, we recommend aiorun</p> <pre><code>pip install aiorun\n</code></pre>"},{"location":"#usage","title":"Usage","text":"<pre><code>import aiorun\nfrom kstreams import create_engine, ConsumerRecord\n\n\nstream_engine = create_engine(title=\"my-stream-engine\")\n\n@stream_engine.stream(\"local--kstream\")\nasync def consume(cr: ConsumerRecord):\n    print(f\"Event consumed: headers: {cr.headers}, payload: {cr.value}\")\n\n\nasync def produce():\n    payload = b'{\"message\": \"Hello world!\"}'\n\n    for i in range(5):\n        metadata = await stream_engine.send(\"local--kstreams\", value=payload)\n        print(f\"Message sent: {metadata}\")\n\n\nasync def start():\n    await stream_engine.start()\n    await produce()\n\n\nasync def shutdown(loop):\n    await stream_engine.stop()\n\n\nif __name__ == \"__main__\":\n    aiorun.run(start(), stop_on_unhandled_errors=True, shutdown_callback=shutdown)\n</code></pre>"},{"location":"#kafka-configuration","title":"Kafka configuration","text":"<p>Configure kafka using the kafka backend provided.</p>"},{"location":"#development","title":"Development","text":"<p>This repo requires the use of poetry instead of pip. Note: If you want to have the <code>virtualenv</code> in the same path as the project first you should run <code>poetry config --local virtualenvs.in-project true</code></p> <p>To install the dependencies just execute:</p> <pre><code>poetry install\n</code></pre> <p>Then you can activate the <code>virtualenv</code> with</p> <pre><code>poetry shell\n</code></pre> <p>Run test:</p> <pre><code>./scripts/test\n</code></pre> <p>Run code linting (<code>black</code> and <code>isort</code>)</p> <pre><code>./scripts/lint\n</code></pre>"},{"location":"#commit-messages","title":"Commit messages","text":"<p>The use of commitizen is recommended. Commitizen is part of the dev dependencies.</p> <pre><code>cz commit\n</code></pre>"},{"location":"backends/","title":"Backends","text":"<p>The main idea of a backend is to supply the necessary configuration to create a connection with the backend.</p> <p><code>kstreams</code> currently has support for <code>Kafka</code> as a backend.</p>"},{"location":"backends/#kstreams.backends.kafka.Kafka","title":"<code>kstreams.backends.kafka.Kafka</code>","text":"<p>The <code>Kafka</code> backend validates the given attributes.</p> <p>It uses pydantic internally.</p> <p>Attributes:</p> Name Type Description <code>bootstrap_servers</code> <code>List[str]</code> <p>kafka list of <code>hostname:port</code></p> <code>security_protocol</code> <code>SecurityProtocol</code> <p>Protocol used to communicate with brokers</p> <code>ssl_context</code> <code>Optional[SSLContext]</code> <p>a python std <code>ssl.SSLContext</code> instance, you can generate it with <code>create_ssl_context</code> or <code>create_ssl_context_from_mem</code></p> <code>sasl_mechanism</code> <code>SaslMechanism</code> <p>Authentication mechanism when <code>security_protocol</code> is configured for <code>SASL_PLAINTEXT</code> or <code>SASL_SSL</code></p> <code>sasl_plain_username</code> <code>Optional[str]</code> <p>username for sasl PLAIN authentication</p> <code>sasl_plain_password</code> <code>Optional[str]</code> <p>password for sasl PLAIN authentication</p> <code>sasl_oauth_token_provider</code> <code>Optional[str]</code> <p>smth</p> <p>Raises:</p> Type Description <code>ValidationError</code> <p>a <code>pydantic.ValidationError</code> exception</p>"},{"location":"backends/#kstreams.backends.kafka.Kafka--plaintext","title":"PLAINTEXT","text":"<p>Example</p> <pre><code>from kstreams.backends.kafka import Kafka\nfrom kstreams import create_engine, Stream\n\nbackend = Kafka(bootstrap_servers=[\"localhost:9092\"])\nstream_engine = create_engine(title=\"my-stream-engine\", backend=backend)\n</code></pre>"},{"location":"backends/#kstreams.backends.kafka.Kafka--ssl","title":"SSL","text":"<p>Example</p> Create SSL context<pre><code>import ssl\n\nfrom kstreams.backends.kafka import Kafka\nfrom kstreams import create_engine, utils, Stream\n\n\ndef get_ssl_context() -&gt; ssl.SSLContext:\n    return utils.create_ssl_context(\n        cafile=\"certificate-authority-file-path\",\n        capath=\"points-to-directory-with-several-ca-certificates\",\n        cadata=\"same-as-cafile-but-ASCII-or-bytes-format\",\n        certfile=\"client-certificate-file-name\",\n        keyfile=\"client-private-key-file-name\",\n        password=\"password-to-load-certificate-chain\",\n    )\n\nbackend = Kafka(\n    bootstrap_servers=[\"localhost:9094\"],\n    security_protocol=\"SSL\",\n    ssl_context=get_ssl_context(),\n)\n\nstream_engine = create_engine(title=\"my-stream-engine\", backend=backend)\n</code></pre> <p>Note</p> <p>Check create ssl context util</p> <p>Example</p> Create SSL context from memory<pre><code>import ssl\n\nfrom kstreams.backends.kafka import Kafka\nfrom kstreams import create_engine, utils, Stream\n\n\ndef get_ssl_context() -&gt; ssl.SSLContext:\n    return utils.create_ssl_context_from_mem(\n        cadata=\"ca-certificates-as-unicode\",\n        certdata=\"client-certificate-as-unicode\",\n        keydata=\"client-private-key-as-unicode\",\n        password=\"optional-password-to-load-certificate-chain\",\n    )\n\nbackend = Kafka(\n    bootstrap_servers=[\"localhost:9094\"],\n    security_protocol=\"SSL\",\n    ssl_context=get_ssl_context(),\n)\n\nstream_engine = create_engine(title=\"my-stream-engine\", backend=backend)\n</code></pre> <p>Note</p> <p>Check create ssl context from memerory util</p> Source code in <code>kstreams/backends/kafka.py</code> <pre><code>class Kafka(BaseModel):\n    \"\"\"\n    The `Kafka` backend validates the given attributes.\n\n    It uses pydantic internally.\n\n    Attributes:\n        bootstrap_servers: kafka list of `hostname:port`\n        security_protocol: Protocol used to communicate with brokers\n        ssl_context: a python std `ssl.SSLContext` instance, you can generate\n            it with `create_ssl_context`\n            or `create_ssl_context_from_mem`\n        sasl_mechanism: Authentication mechanism when `security_protocol` is configured\n            for `SASL_PLAINTEXT` or `SASL_SSL`\n        sasl_plain_username: username for sasl PLAIN authentication\n        sasl_plain_password: password for sasl PLAIN authentication\n        sasl_oauth_token_provider: smth\n\n    Raises:\n        ValidationError: a `pydantic.ValidationError` exception\n\n    ## PLAINTEXT\n\n    !!! Example\n        ```python\n        from kstreams.backends.kafka import Kafka\n        from kstreams import create_engine, Stream\n\n        backend = Kafka(bootstrap_servers=[\"localhost:9092\"])\n        stream_engine = create_engine(title=\"my-stream-engine\", backend=backend)\n        ```\n\n    ## SSL\n\n    !!! Example\n        ```python title=\"Create SSL context\"\n        import ssl\n\n        from kstreams.backends.kafka import Kafka\n        from kstreams import create_engine, utils, Stream\n\n\n        def get_ssl_context() -&gt; ssl.SSLContext:\n            return utils.create_ssl_context(\n                cafile=\"certificate-authority-file-path\",\n                capath=\"points-to-directory-with-several-ca-certificates\",\n                cadata=\"same-as-cafile-but-ASCII-or-bytes-format\",\n                certfile=\"client-certificate-file-name\",\n                keyfile=\"client-private-key-file-name\",\n                password=\"password-to-load-certificate-chain\",\n            )\n\n        backend = Kafka(\n            bootstrap_servers=[\"localhost:9094\"],\n            security_protocol=\"SSL\",\n            ssl_context=get_ssl_context(),\n        )\n\n        stream_engine = create_engine(title=\"my-stream-engine\", backend=backend)\n        ```\n\n        !!! note\n            Check [create ssl context util](https://kpn.github.io/kstreams/utils/#kstreams.utils.create_ssl_context)\n\n    !!! Example\n        ```python title=\"Create SSL context from memory\"\n        import ssl\n\n        from kstreams.backends.kafka import Kafka\n        from kstreams import create_engine, utils, Stream\n\n\n        def get_ssl_context() -&gt; ssl.SSLContext:\n            return utils.create_ssl_context_from_mem(\n                cadata=\"ca-certificates-as-unicode\",\n                certdata=\"client-certificate-as-unicode\",\n                keydata=\"client-private-key-as-unicode\",\n                password=\"optional-password-to-load-certificate-chain\",\n            )\n\n        backend = Kafka(\n            bootstrap_servers=[\"localhost:9094\"],\n            security_protocol=\"SSL\",\n            ssl_context=get_ssl_context(),\n        )\n\n        stream_engine = create_engine(title=\"my-stream-engine\", backend=backend)\n        ```\n\n        !!! note\n            Check [create ssl context from memerory util](https://kpn.github.io/kstreams/utils/#kstreams.utils.create_ssl_context_from_mem)\n    \"\"\"\n\n    bootstrap_servers: List[str] = [\"localhost:9092\"]\n    security_protocol: SecurityProtocol = SecurityProtocol.PLAINTEXT\n\n    ssl_context: Optional[ssl.SSLContext] = None\n\n    sasl_mechanism: SaslMechanism = SaslMechanism.PLAIN\n    sasl_plain_username: Optional[str] = None\n    sasl_plain_password: Optional[str] = None\n    sasl_oauth_token_provider: Optional[str] = None\n    model_config = ConfigDict(arbitrary_types_allowed=True, use_enum_values=True)\n\n    @model_validator(mode=\"after\")\n    @classmethod\n    def protocols_validation(cls, values):\n        security_protocol = values.security_protocol\n\n        if security_protocol == SecurityProtocol.PLAINTEXT:\n            return values\n        elif security_protocol == SecurityProtocol.SSL:\n            if values.ssl_context is None:\n                raise ValueError(\"`ssl_context` is required\")\n            return values\n        elif security_protocol == SecurityProtocol.SASL_PLAINTEXT:\n            if values.sasl_mechanism is SaslMechanism.OAUTHBEARER:\n                # We don't perform a username and password check if OAUTHBEARER\n                return values\n            if (\n                values.sasl_mechanism is SaslMechanism.PLAIN\n                and values.sasl_plain_username is None\n            ):\n                raise ValueError(\n                    \"`sasl_plain_username` is required when using SASL_PLAIN\"\n                )\n            if (\n                values.sasl_mechanism is SaslMechanism.PLAIN\n                and values.sasl_plain_password is None\n            ):\n                raise ValueError(\n                    \"`sasl_plain_password` is required when using SASL_PLAIN\"\n                )\n            return values\n        elif security_protocol == SecurityProtocol.SASL_SSL:\n            if values.ssl_context is None:\n                raise ValueError(\"`ssl_context` is required\")\n            if (\n                values.sasl_mechanism is SaslMechanism.PLAIN\n                and values.sasl_plain_username is None\n            ):\n                raise ValueError(\n                    \"`sasl_plain_username` is required when using SASL_PLAIN\"\n                )\n            if (\n                values.sasl_mechanism is SaslMechanism.PLAIN\n                and values.sasl_plain_password is None\n            ):\n                raise ValueError(\n                    \"`sasl_plain_password` is required when using SASL_PLAIN\"\n                )\n            return values\n</code></pre>"},{"location":"engine/","title":"StreamEngine","text":""},{"location":"engine/#kstreams.engine.StreamEngine","title":"<code>kstreams.engine.StreamEngine</code>","text":"<p>Attributes:</p> Name Type Description <code>backend</code> <code>Kafka</code> <p>Backend to connect. Default <code>Kafka</code></p> <code>consumer_class</code> <code>Consumer</code> <p>The consumer class to use when instanciate a consumer. Default kstreams.Consumer</p> <code>producer_class</code> <code>Producer</code> <p>The producer class to use when instanciate the producer. Default kstreams.Producer</p> <code>monitor</code> <code>PrometheusMonitor</code> <p>Prometheus monitor that holds the metrics</p> <code>title</code> <code>str | None</code> <p>Engine name</p> <code>serializer</code> <code>Serializer | None</code> <p>Serializer to use when an event is produced.</p> <code>deserializer</code> <code>Deserializer | None</code> <p>Deserializer to be used when an event is consumed. If provided it will be used in all Streams instances as a general one. To override it per Stream, you can provide one per Stream</p> <p>Example</p> Usage<pre><code>import kstreams\n\nstream_engine = kstreams.create_engine(\n    title=\"my-stream-engine\"\n)\n\n@kstreams.stream(\"local--hello-world\", group_id=\"example-group\")\nasync def consume(stream: kstreams.ConsumerRecord) -&gt; None:\n    print(f\"showing bytes: {cr.value}\")\n\n\nawait stream_engine.start()\n</code></pre> Source code in <code>kstreams/engine.py</code> <pre><code>class StreamEngine:\n    \"\"\"\n    Attributes:\n        backend kstreams.backends.Kafka: Backend to connect. Default `Kafka`\n        consumer_class kstreams.Consumer: The consumer class to use when\n            instanciate a consumer. Default kstreams.Consumer\n        producer_class kstreams.Producer: The producer class to use when\n            instanciate the producer. Default kstreams.Producer\n        monitor kstreams.PrometheusMonitor: Prometheus monitor that holds\n            the [metrics](https://kpn.github.io/kstreams/metrics/)\n        title str | None: Engine name\n        serializer kstreams.serializers.Serializer | None: Serializer to\n            use when an event is produced.\n        deserializer kstreams.serializers.Deserializer | None: Deserializer\n            to be used when an event is consumed.\n            If provided it will be used in all Streams instances as a general one.\n            To override it per Stream, you can provide one per Stream\n\n    !!! Example\n        ```python title=\"Usage\"\n        import kstreams\n\n        stream_engine = kstreams.create_engine(\n            title=\"my-stream-engine\"\n        )\n\n        @kstreams.stream(\"local--hello-world\", group_id=\"example-group\")\n        async def consume(stream: kstreams.ConsumerRecord) -&gt; None:\n            print(f\"showing bytes: {cr.value}\")\n\n\n        await stream_engine.start()\n        ```\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        backend: Kafka,\n        consumer_class: typing.Type[Consumer],\n        producer_class: typing.Type[Producer],\n        monitor: PrometheusMonitor,\n        title: typing.Optional[str] = None,\n        deserializer: Deprecated[typing.Optional[Deserializer]] = None,\n        serializer: typing.Optional[Serializer] = None,\n        on_startup: typing.Optional[EngineHooks] = None,\n        on_stop: typing.Optional[EngineHooks] = None,\n        after_startup: typing.Optional[EngineHooks] = None,\n        after_stop: typing.Optional[EngineHooks] = None,\n    ) -&gt; None:\n        self.title = title\n        self.backend = backend\n        self.consumer_class = consumer_class\n        self.producer_class = producer_class\n        self.deserializer = deserializer\n        self.serializer = serializer\n        self.monitor = monitor\n        self._producer: typing.Optional[typing.Type[Producer]] = None\n        self._streams: typing.List[Stream] = []\n        self._on_startup = [] if on_startup is None else list(on_startup)\n        self._on_stop = [] if on_stop is None else list(on_stop)\n        self._after_startup = [] if after_startup is None else list(after_startup)\n        self._after_stop = [] if after_stop is None else list(after_stop)\n\n    async def send(\n        self,\n        topic: str,\n        value: typing.Any = None,\n        key: typing.Any = None,\n        partition: typing.Optional[int] = None,\n        timestamp_ms: typing.Optional[int] = None,\n        headers: typing.Optional[Headers] = None,\n        serializer: typing.Optional[Serializer] = None,\n        serializer_kwargs: typing.Optional[typing.Dict] = None,\n    ):\n        \"\"\"\n        Attributes:\n            topic str: Topic name to send the event to\n            value Any: Event value\n            key str | None: Event key\n            partition int | None: Topic partition\n            timestamp_ms int | None: Event timestamp in miliseconds\n            headers Dict[str, str] | None: Event headers\n            serializer kstreams.serializers.Serializer | None: Serializer to\n                encode the event\n            serializer_kwargs Dict[str, Any] | None: Serializer kwargs\n        \"\"\"\n        if self._producer is None:\n            raise EngineNotStartedException()\n\n        serializer = serializer or self.serializer\n\n        # serialize only when value and serializer are present\n        if value is not None and serializer is not None:\n            value = await serializer.serialize(\n                value, headers=headers, serializer_kwargs=serializer_kwargs\n            )\n\n        encoded_headers = None\n        if headers is not None:\n            encoded_headers = encode_headers(headers)\n\n        fut = await self._producer.send(\n            topic,\n            value=value,\n            key=key,\n            partition=partition,\n            timestamp_ms=timestamp_ms,\n            headers=encoded_headers,\n        )\n        metadata: RecordMetadata = await fut\n        self.monitor.add_topic_partition_offset(\n            topic, metadata.partition, metadata.offset\n        )\n\n        return metadata\n\n    async def start(self) -&gt; None:\n        # Execute on_startup hooks\n        await execute_hooks(self._on_startup)\n\n        # add the producer and streams to the Monitor\n        self.monitor.add_producer(self._producer)\n        self.monitor.add_streams(self._streams)\n\n        await self.start_producer()\n        await self.start_streams()\n\n        # Execute after_startup hooks\n        await execute_hooks(self._after_startup)\n\n    def on_startup(\n        self,\n        func: typing.Callable[[], typing.Any],\n    ) -&gt; typing.Callable[[], typing.Any]:\n        \"\"\"\n        A list of callables to run before the engine starts.\n        Handler are callables that do not take any arguments, and may be either\n        standard functions, or async functions.\n\n        Attributes:\n            func typing.Callable[[], typing.Any]: Func to callable before engine starts\n\n        !!! Example\n            ```python title=\"Engine before startup\"\n\n            import kstreams\n\n            stream_engine = kstreams.create_engine(\n                title=\"my-stream-engine\"\n            )\n\n            @stream_engine.on_startup\n            async def init_db() -&gt; None:\n                print(\"Initializing Database Connections\")\n                await init_db()\n\n\n            @stream_engine.on_startup\n            async def start_background_task() -&gt; None:\n                print(\"Some background task\")\n            ```\n        \"\"\"\n        self._on_startup.append(func)\n        return func\n\n    def on_stop(\n        self,\n        func: typing.Callable[[], typing.Any],\n    ) -&gt; typing.Callable[[], typing.Any]:\n        \"\"\"\n        A list of callables to run before the engine stops.\n        Handler are callables that do not take any arguments, and may be either\n        standard functions, or async functions.\n\n        Attributes:\n            func typing.Callable[[], typing.Any]: Func to callable before engine stops\n\n        !!! Example\n            ```python title=\"Engine before stops\"\n\n            import kstreams\n\n            stream_engine = kstreams.create_engine(\n                title=\"my-stream-engine\"\n            )\n\n            @stream_engine.on_stop\n            async def close_db() -&gt; None:\n                print(\"Closing Database Connections\")\n                await db_close()\n            ```\n        \"\"\"\n        self._on_stop.append(func)\n        return func\n\n    def after_startup(\n        self,\n        func: typing.Callable[[], typing.Any],\n    ) -&gt; typing.Callable[[], typing.Any]:\n        \"\"\"\n        A list of callables to run after the engine starts.\n        Handler are callables that do not take any arguments, and may be either\n        standard functions, or async functions.\n\n        Attributes:\n            func typing.Callable[[], typing.Any]: Func to callable after engine starts\n\n        !!! Example\n            ```python title=\"Engine after startup\"\n\n            import kstreams\n\n            stream_engine = kstreams.create_engine(\n                title=\"my-stream-engine\"\n            )\n\n            @stream_engine.after_startup\n            async def after_startup() -&gt; None:\n                print(\"Set pod as healthy\")\n                await mark_healthy_pod()\n            ```\n        \"\"\"\n        self._after_startup.append(func)\n        return func\n\n    def after_stop(\n        self,\n        func: typing.Callable[[], typing.Any],\n    ) -&gt; typing.Callable[[], typing.Any]:\n        \"\"\"\n        A list of callables to run after the engine stops.\n        Handler are callables that do not take any arguments, and may be either\n        standard functions, or async functions.\n\n        Attributes:\n            func typing.Callable[[], typing.Any]: Func to callable after engine stops\n\n        !!! Example\n            ```python title=\"Engine after stops\"\n\n            import kstreams\n\n            stream_engine = kstreams.create_engine(\n                title=\"my-stream-engine\"\n            )\n\n            @stream_engine.after_stop\n            async def after_stop() -&gt; None:\n                print(\"Finishing backgrpund tasks\")\n            ```\n        \"\"\"\n        self._after_stop.append(func)\n        return func\n\n    async def stop(self) -&gt; None:\n        # Execute on_startup hooks\n        await execute_hooks(self._on_stop)\n\n        await self.monitor.stop()\n        await self.stop_producer()\n        await self.stop_streams()\n\n        # Execute after_startup hooks\n        await execute_hooks(self._after_stop)\n\n    async def stop_producer(self):\n        if self._producer is not None:\n            await self._producer.stop()\n        logger.info(\"Producer has STOPPED....\")\n\n    async def start_producer(self, **kwargs) -&gt; None:\n        if self.producer_class is None:\n            return None\n        config = {**self.backend.model_dump(), **kwargs}\n        self._producer = self.producer_class(**config)\n        if self._producer is None:\n            return None\n        await self._producer.start()\n\n    async def start_streams(self) -&gt; None:\n        # Only start the Streams that are not async_generators\n        streams = [\n            stream\n            for stream in self._streams\n            if not inspect.isasyncgenfunction(stream.func)\n        ]\n\n        await self._start_streams_on_background_mode(streams)\n\n    async def _start_streams_on_background_mode(\n        self, streams: typing.List[Stream]\n    ) -&gt; None:\n        # start all the streams\n        for stream in streams:\n            asyncio.create_task(stream.start())\n\n        # start monitoring\n        asyncio.create_task(self.monitor.start())\n\n    async def stop_streams(self) -&gt; None:\n        for stream in self._streams:\n            await stream.stop()\n        logger.info(\"Streams have STOPPED....\")\n\n    async def clean_streams(self):\n        await self.stop_streams()\n        self._streams = []\n\n    def exist_stream(self, name: str) -&gt; bool:\n        stream = self.get_stream(name)\n        return True if stream is not None else False\n\n    def get_stream(self, name: str) -&gt; typing.Optional[Stream]:\n        stream = next((stream for stream in self._streams if stream.name == name), None)\n\n        return stream\n\n    def add_stream(\n        self, stream: Stream, error_policy: typing.Optional[StreamErrorPolicy] = None\n    ) -&gt; None:\n        \"\"\"\n        Add a stream to the engine.\n\n        This method registers a new stream with the engine, setting up necessary\n        configurations and handlers. If a stream with the same name already exists,\n        a DuplicateStreamException is raised.\n\n        Args:\n            stream: The stream to be added.\n            error_policy: An optional error policy to be applied to the stream.\n                You should probably set directly when instanciating a Stream, not here.\n\n        Raises:\n            DuplicateStreamException: If a stream with the same name already exists.\n\n        Notes:\n            - If the stream does not have a deserializer, the engine's deserializer\n              is assigned to it.\n            - If the stream does not have a rebalance listener, a default\n              MetricsRebalanceListener is assigned.\n            - The stream's UDF handler is set up with the provided function and\n              engine's send method.\n            - If the stream's UDF handler type is not NO_TYPING, a middleware stack\n              is built for the stream's function.\n        \"\"\"\n        if self.exist_stream(stream.name):\n            raise DuplicateStreamException(name=stream.name)\n\n        if error_policy is not None:\n            stream.error_policy = error_policy\n\n        stream.backend = self.backend\n        if stream.deserializer is None:\n            stream.deserializer = self.deserializer\n        self._streams.append(stream)\n\n        if stream.rebalance_listener is None:\n            # set the stream to the listener to it will be available\n            # when the callbacks are called\n            stream.rebalance_listener = MetricsRebalanceListener()\n\n        stream.rebalance_listener.stream = stream\n        stream.rebalance_listener.engine = self\n\n        stream.udf_handler = UdfHandler(\n            next_call=stream.func,\n            send=self.send,\n            stream=stream,\n        )\n\n        # NOTE: When `no typing` support is deprecated this check can\n        # be removed\n        if stream.udf_handler.type != UDFType.NO_TYPING:\n            stream.func = self._build_stream_middleware_stack(stream=stream)\n\n    def _build_stream_middleware_stack(self, *, stream: Stream) -&gt; NextMiddlewareCall:\n        assert stream.udf_handler, \"UdfHandler can not be None\"\n\n        middlewares = stream.get_middlewares(self)\n        next_call = stream.udf_handler\n        for middleware, options in reversed(middlewares):\n            next_call = middleware(\n                next_call=next_call, send=self.send, stream=stream, **options\n            )\n        return next_call\n\n    async def remove_stream(self, stream: Stream) -&gt; None:\n        consumer = stream.consumer\n        self._streams.remove(stream)\n        await stream.stop()\n\n        if consumer is not None:\n            self.monitor.clean_stream_consumer_metrics(consumer=consumer)\n\n    def stream(\n        self,\n        topics: typing.Union[typing.List[str], str],\n        *,\n        name: typing.Optional[str] = None,\n        deserializer: Deprecated[typing.Optional[Deserializer]] = None,\n        initial_offsets: typing.Optional[typing.List[TopicPartitionOffset]] = None,\n        rebalance_listener: typing.Optional[RebalanceListener] = None,\n        middlewares: typing.Optional[typing.List[Middleware]] = None,\n        subscribe_by_pattern: bool = False,\n        error_policy: StreamErrorPolicy = StreamErrorPolicy.STOP,\n        **kwargs,\n    ) -&gt; typing.Callable[[StreamFunc], Stream]:\n        def decorator(func: StreamFunc) -&gt; Stream:\n            stream_from_func = stream_func(\n                topics,\n                name=name,\n                deserializer=deserializer,\n                initial_offsets=initial_offsets,\n                rebalance_listener=rebalance_listener,\n                middlewares=middlewares,\n                subscribe_by_pattern=subscribe_by_pattern,\n                **kwargs,\n            )(func)\n            self.add_stream(stream_from_func, error_policy=error_policy)\n\n            return stream_from_func\n\n        return decorator\n</code></pre>"},{"location":"engine/#kstreams.engine.StreamEngine.send","title":"<code>send(topic, value=None, key=None, partition=None, timestamp_ms=None, headers=None, serializer=None, serializer_kwargs=None)</code>  <code>async</code>","text":"<p>Attributes:</p> Name Type Description <code>topic</code> <code>str</code> <p>Topic name to send the event to</p> <code>value</code> <code>Any</code> <p>Event value</p> <code>key</code> <code>str | None</code> <p>Event key</p> <code>partition</code> <code>int | None</code> <p>Topic partition</p> <code>timestamp_ms</code> <code>int | None</code> <p>Event timestamp in miliseconds</p> <code>headers</code> <code>Dict[str, str] | None</code> <p>Event headers</p> <code>serializer</code> <code>Serializer | None</code> <p>Serializer to encode the event</p> <code>serializer_kwargs</code> <code>Dict[str, Any] | None</code> <p>Serializer kwargs</p> Source code in <code>kstreams/engine.py</code> <pre><code>async def send(\n    self,\n    topic: str,\n    value: typing.Any = None,\n    key: typing.Any = None,\n    partition: typing.Optional[int] = None,\n    timestamp_ms: typing.Optional[int] = None,\n    headers: typing.Optional[Headers] = None,\n    serializer: typing.Optional[Serializer] = None,\n    serializer_kwargs: typing.Optional[typing.Dict] = None,\n):\n    \"\"\"\n    Attributes:\n        topic str: Topic name to send the event to\n        value Any: Event value\n        key str | None: Event key\n        partition int | None: Topic partition\n        timestamp_ms int | None: Event timestamp in miliseconds\n        headers Dict[str, str] | None: Event headers\n        serializer kstreams.serializers.Serializer | None: Serializer to\n            encode the event\n        serializer_kwargs Dict[str, Any] | None: Serializer kwargs\n    \"\"\"\n    if self._producer is None:\n        raise EngineNotStartedException()\n\n    serializer = serializer or self.serializer\n\n    # serialize only when value and serializer are present\n    if value is not None and serializer is not None:\n        value = await serializer.serialize(\n            value, headers=headers, serializer_kwargs=serializer_kwargs\n        )\n\n    encoded_headers = None\n    if headers is not None:\n        encoded_headers = encode_headers(headers)\n\n    fut = await self._producer.send(\n        topic,\n        value=value,\n        key=key,\n        partition=partition,\n        timestamp_ms=timestamp_ms,\n        headers=encoded_headers,\n    )\n    metadata: RecordMetadata = await fut\n    self.monitor.add_topic_partition_offset(\n        topic, metadata.partition, metadata.offset\n    )\n\n    return metadata\n</code></pre>"},{"location":"engine/#kstreams.engine.StreamEngine.on_startup","title":"<code>on_startup(func)</code>","text":"<p>A list of callables to run before the engine starts. Handler are callables that do not take any arguments, and may be either standard functions, or async functions.</p> <p>Attributes:</p> Name Type Description <code>func</code> <code>Callable[[], Any]</code> <p>Func to callable before engine starts</p> <p>Example</p> Engine before startup<pre><code>import kstreams\n\nstream_engine = kstreams.create_engine(\n    title=\"my-stream-engine\"\n)\n\n@stream_engine.on_startup\nasync def init_db() -&gt; None:\n    print(\"Initializing Database Connections\")\n    await init_db()\n\n\n@stream_engine.on_startup\nasync def start_background_task() -&gt; None:\n    print(\"Some background task\")\n</code></pre> Source code in <code>kstreams/engine.py</code> <pre><code>def on_startup(\n    self,\n    func: typing.Callable[[], typing.Any],\n) -&gt; typing.Callable[[], typing.Any]:\n    \"\"\"\n    A list of callables to run before the engine starts.\n    Handler are callables that do not take any arguments, and may be either\n    standard functions, or async functions.\n\n    Attributes:\n        func typing.Callable[[], typing.Any]: Func to callable before engine starts\n\n    !!! Example\n        ```python title=\"Engine before startup\"\n\n        import kstreams\n\n        stream_engine = kstreams.create_engine(\n            title=\"my-stream-engine\"\n        )\n\n        @stream_engine.on_startup\n        async def init_db() -&gt; None:\n            print(\"Initializing Database Connections\")\n            await init_db()\n\n\n        @stream_engine.on_startup\n        async def start_background_task() -&gt; None:\n            print(\"Some background task\")\n        ```\n    \"\"\"\n    self._on_startup.append(func)\n    return func\n</code></pre>"},{"location":"engine/#kstreams.engine.StreamEngine.on_stop","title":"<code>on_stop(func)</code>","text":"<p>A list of callables to run before the engine stops. Handler are callables that do not take any arguments, and may be either standard functions, or async functions.</p> <p>Attributes:</p> Name Type Description <code>func</code> <code>Callable[[], Any]</code> <p>Func to callable before engine stops</p> <p>Example</p> Engine before stops<pre><code>import kstreams\n\nstream_engine = kstreams.create_engine(\n    title=\"my-stream-engine\"\n)\n\n@stream_engine.on_stop\nasync def close_db() -&gt; None:\n    print(\"Closing Database Connections\")\n    await db_close()\n</code></pre> Source code in <code>kstreams/engine.py</code> <pre><code>def on_stop(\n    self,\n    func: typing.Callable[[], typing.Any],\n) -&gt; typing.Callable[[], typing.Any]:\n    \"\"\"\n    A list of callables to run before the engine stops.\n    Handler are callables that do not take any arguments, and may be either\n    standard functions, or async functions.\n\n    Attributes:\n        func typing.Callable[[], typing.Any]: Func to callable before engine stops\n\n    !!! Example\n        ```python title=\"Engine before stops\"\n\n        import kstreams\n\n        stream_engine = kstreams.create_engine(\n            title=\"my-stream-engine\"\n        )\n\n        @stream_engine.on_stop\n        async def close_db() -&gt; None:\n            print(\"Closing Database Connections\")\n            await db_close()\n        ```\n    \"\"\"\n    self._on_stop.append(func)\n    return func\n</code></pre>"},{"location":"engine/#kstreams.engine.StreamEngine.after_startup","title":"<code>after_startup(func)</code>","text":"<p>A list of callables to run after the engine starts. Handler are callables that do not take any arguments, and may be either standard functions, or async functions.</p> <p>Attributes:</p> Name Type Description <code>func</code> <code>Callable[[], Any]</code> <p>Func to callable after engine starts</p> <p>Example</p> Engine after startup<pre><code>import kstreams\n\nstream_engine = kstreams.create_engine(\n    title=\"my-stream-engine\"\n)\n\n@stream_engine.after_startup\nasync def after_startup() -&gt; None:\n    print(\"Set pod as healthy\")\n    await mark_healthy_pod()\n</code></pre> Source code in <code>kstreams/engine.py</code> <pre><code>def after_startup(\n    self,\n    func: typing.Callable[[], typing.Any],\n) -&gt; typing.Callable[[], typing.Any]:\n    \"\"\"\n    A list of callables to run after the engine starts.\n    Handler are callables that do not take any arguments, and may be either\n    standard functions, or async functions.\n\n    Attributes:\n        func typing.Callable[[], typing.Any]: Func to callable after engine starts\n\n    !!! Example\n        ```python title=\"Engine after startup\"\n\n        import kstreams\n\n        stream_engine = kstreams.create_engine(\n            title=\"my-stream-engine\"\n        )\n\n        @stream_engine.after_startup\n        async def after_startup() -&gt; None:\n            print(\"Set pod as healthy\")\n            await mark_healthy_pod()\n        ```\n    \"\"\"\n    self._after_startup.append(func)\n    return func\n</code></pre>"},{"location":"engine/#kstreams.engine.StreamEngine.after_stop","title":"<code>after_stop(func)</code>","text":"<p>A list of callables to run after the engine stops. Handler are callables that do not take any arguments, and may be either standard functions, or async functions.</p> <p>Attributes:</p> Name Type Description <code>func</code> <code>Callable[[], Any]</code> <p>Func to callable after engine stops</p> <p>Example</p> Engine after stops<pre><code>import kstreams\n\nstream_engine = kstreams.create_engine(\n    title=\"my-stream-engine\"\n)\n\n@stream_engine.after_stop\nasync def after_stop() -&gt; None:\n    print(\"Finishing backgrpund tasks\")\n</code></pre> Source code in <code>kstreams/engine.py</code> <pre><code>def after_stop(\n    self,\n    func: typing.Callable[[], typing.Any],\n) -&gt; typing.Callable[[], typing.Any]:\n    \"\"\"\n    A list of callables to run after the engine stops.\n    Handler are callables that do not take any arguments, and may be either\n    standard functions, or async functions.\n\n    Attributes:\n        func typing.Callable[[], typing.Any]: Func to callable after engine stops\n\n    !!! Example\n        ```python title=\"Engine after stops\"\n\n        import kstreams\n\n        stream_engine = kstreams.create_engine(\n            title=\"my-stream-engine\"\n        )\n\n        @stream_engine.after_stop\n        async def after_stop() -&gt; None:\n            print(\"Finishing backgrpund tasks\")\n        ```\n    \"\"\"\n    self._after_stop.append(func)\n    return func\n</code></pre>"},{"location":"engine/#kstreams.engine.StreamEngine.add_stream","title":"<code>add_stream(stream, error_policy=None)</code>","text":"<p>Add a stream to the engine.</p> <p>This method registers a new stream with the engine, setting up necessary configurations and handlers. If a stream with the same name already exists, a DuplicateStreamException is raised.</p> <p>Parameters:</p> Name Type Description Default <code>stream</code> <code>Stream</code> <p>The stream to be added.</p> required <code>error_policy</code> <code>Optional[StreamErrorPolicy]</code> <p>An optional error policy to be applied to the stream. You should probably set directly when instanciating a Stream, not here.</p> <code>None</code> <p>Raises:</p> Type Description <code>DuplicateStreamException</code> <p>If a stream with the same name already exists.</p> Notes <ul> <li>If the stream does not have a deserializer, the engine's deserializer   is assigned to it.</li> <li>If the stream does not have a rebalance listener, a default   MetricsRebalanceListener is assigned.</li> <li>The stream's UDF handler is set up with the provided function and   engine's send method.</li> <li>If the stream's UDF handler type is not NO_TYPING, a middleware stack   is built for the stream's function.</li> </ul> Source code in <code>kstreams/engine.py</code> <pre><code>def add_stream(\n    self, stream: Stream, error_policy: typing.Optional[StreamErrorPolicy] = None\n) -&gt; None:\n    \"\"\"\n    Add a stream to the engine.\n\n    This method registers a new stream with the engine, setting up necessary\n    configurations and handlers. If a stream with the same name already exists,\n    a DuplicateStreamException is raised.\n\n    Args:\n        stream: The stream to be added.\n        error_policy: An optional error policy to be applied to the stream.\n            You should probably set directly when instanciating a Stream, not here.\n\n    Raises:\n        DuplicateStreamException: If a stream with the same name already exists.\n\n    Notes:\n        - If the stream does not have a deserializer, the engine's deserializer\n          is assigned to it.\n        - If the stream does not have a rebalance listener, a default\n          MetricsRebalanceListener is assigned.\n        - The stream's UDF handler is set up with the provided function and\n          engine's send method.\n        - If the stream's UDF handler type is not NO_TYPING, a middleware stack\n          is built for the stream's function.\n    \"\"\"\n    if self.exist_stream(stream.name):\n        raise DuplicateStreamException(name=stream.name)\n\n    if error_policy is not None:\n        stream.error_policy = error_policy\n\n    stream.backend = self.backend\n    if stream.deserializer is None:\n        stream.deserializer = self.deserializer\n    self._streams.append(stream)\n\n    if stream.rebalance_listener is None:\n        # set the stream to the listener to it will be available\n        # when the callbacks are called\n        stream.rebalance_listener = MetricsRebalanceListener()\n\n    stream.rebalance_listener.stream = stream\n    stream.rebalance_listener.engine = self\n\n    stream.udf_handler = UdfHandler(\n        next_call=stream.func,\n        send=self.send,\n        stream=stream,\n    )\n\n    # NOTE: When `no typing` support is deprecated this check can\n    # be removed\n    if stream.udf_handler.type != UDFType.NO_TYPING:\n        stream.func = self._build_stream_middleware_stack(stream=stream)\n</code></pre>"},{"location":"getting_started/","title":"Getting Started","text":"<p>You can starting using <code>kstreams</code> with simple <code>producers</code> and <code>consumers</code> and/or integrated it with any <code>async</code> framework like <code>FastAPI</code></p>"},{"location":"getting_started/#simple-consumer-and-producer","title":"Simple consumer and producer","text":"Simple use case<pre><code>import asyncio\nfrom kstreams import create_engine, ConsumerRecord\n\nstream_engine = create_engine(title=\"my-stream-engine\")\n\n\n@stream_engine.stream(\"local--py-stream\", group_id=\"de-my-partition\")\nasync def consume(cr: ConsumerRecord):\n    print(f\"Event consumed: headers: {cr.headers}, payload: {value}\")\n\n\nasync def produce():\n    payload = b'{\"message\": \"Hello world!\"}'\n\n    for i in range(5):\n        metadata = await stream_engine.send(\"local--py-streams\", value=payload, key=\"1\")\n        print(f\"Message sent: {metadata}\")\n        await asyncio.sleep(5)\n\n\nasync def start():\n    await stream_engine.start()\n    await produce()\n\n\nasync def shutdown():\n    await stream_engine.stop()\n\n\nif __name__ == \"__main__\":\n    loop = asyncio.get_event_loop()\n    try:\n        loop.run_until_complete(start())\n        loop.run_forever()\n    finally:\n        loop.run_until_complete(shutdown())\n        loop.close()\n</code></pre> <p>(This script is complete, it should run \"as is\")</p>"},{"location":"getting_started/#recommended-usage","title":"Recommended usage","text":"<p>In the previous example you can see some boiler plate regarding how to start the program. We recommend to use aiorun, so you want have to worry about <code>set signal handlers</code>, <code>shutdown callbacks</code>, <code>graceful shutdown</code> and <code>close the event loop</code>.</p> Usage with aiorun<pre><code>import aiorun\nfrom kstreams import create_engine, ConsumerRecord\n\nstream_engine = create_engine(title=\"my-stream-engine\")\n\n\n@stream_engine.stream(\"local--py-stream\", group_id=\"de-my-partition\")\nasync def consume(cr: ConsumerRecord):\n    print(f\"Event consumed: headers: {cr.headers}, payload: {value}\")\n\n\nasync def produce():\n    payload = b'{\"message\": \"Hello world!\"}'\n\n    for i in range(5):\n        metadata = await stream_engine.send(\"local--py-streams\", value=payload, key=\"1\")\n        print(f\"Message sent: {metadata}\")\n        await asyncio.sleep(5)\n\n\nasync def start():\n    await stream_engine.start()\n    await produce()\n\n\nasync def shutdown(loop):\n    await stream_engine.stop()\n\n\nif __name__ == \"__main__\":\n    aiorun.run(start(), stop_on_unhandled_errors=True, shutdown_callback=shutdown)\n</code></pre> <p>(This script is complete, it should run \"as is\")</p>"},{"location":"getting_started/#fastapi","title":"FastAPI","text":"<p>The following code example shows how <code>kstreams</code> can be integrated with any <code>async</code> framework like <code>FastAPI</code>. The full example can be found here</p> <p>First, we need to create an <code>engine</code>:</p> Create the StreamEngine<pre><code># streaming.engine.py\nfrom kstreams import create_engine\n\nstream_engine = create_engine(\n    title=\"my-stream-engine\",\n)\n</code></pre> <p>Define the <code>streams</code>:</p> Application stream<pre><code># streaming.streams.py\nfrom .engine import stream_engine\nfrom kstreams import ConsumerRecord\n\n\n@stream_engine.stream(\"local--kstream\")\nasync def stream(cr: ConsumerRecord):\n    print(f\"Event consumed: headers: {cr.headers}, payload: {cr.payload}\")\n</code></pre> <p>Create the <code>FastAPI</code>:</p> FastAPI<pre><code># app.py\nfrom fastapi import FastAPI\nfrom starlette.responses import Response\nfrom starlette_prometheus import PrometheusMiddleware, metrics\n\nfrom .streaming.streams import stream_engine\n\napp = FastAPI()\n\n@app.on_event(\"startup\")\nasync def startup_event():\n    await stream_engine.start()\n\n@app.on_event(\"shutdown\")\nasync def shutdown_event():\n    await stream_engine.stop()\n\n\n@app.get(\"/events\")\nasync def post_produce_event() -&gt; Response:\n    payload = '{\"message\": \"hello world!\"}'\n\n    metadata = await stream_engine.send(\n        \"local--kstream\",\n        value=payload.encode(),\n    )\n    msg = (\n        f\"Produced event on topic: {metadata.topic}, \"\n        f\"part: {metadata.partition}, offset: {metadata.offset}\"\n    )\n\n    return Response(msg)\n\n\napp.add_middleware(PrometheusMiddleware, filter_unhandled_paths=True)\napp.add_api_route(\"/metrics\", metrics)\n</code></pre>"},{"location":"getting_started/#changing-kafka-settings","title":"Changing Kafka settings","text":"<p>To modify the settings of a cluster, like the servers, refer to the backends docs</p>"},{"location":"large_project_structure/","title":"Large Projects","text":"<p>If you have a large project with maybe multiple <code>streams</code> we recommend the following project structure:</p> <pre><code>\u251c\u2500\u2500 my-project\n\u2502   \u251c\u2500\u2500 my_project\n\u2502   \u2502\u00a0\u00a0 \u251c\u2500\u2500 __init__.py\n\u2502   \u2502\u00a0\u00a0 \u251c\u2500\u2500 app.py\n\u2502   \u2502\u00a0\u00a0 \u251c\u2500\u2500 resources.py\n\u2502   \u2502\u00a0\u00a0 \u251c\u2500\u2500 streams.py\n\u2502   \u2502\u00a0\u00a0 \u2514\u2500\u2500 streams_roster.py\n\u2502   \u2502\u2500\u2500 tests\n\u2502   \u2502   \u251c\u2500\u2500 __init__.py\n\u2502   \u2502   \u251c\u2500\u2500 conftest.py\n\u2502   \u2502\u2500\u2500 pyproject.toml\n\u2502   \u2502\u2500\u2500 README.md\n</code></pre> <ul> <li>The file <code>my_project/resouces.py</code> contains the creation of the <code>StreamEngine</code></li> <li>The file <code>my_project/app.py</code> contains the entrypoint of your program</li> <li>The file <code>my_project/streams.py</code> contains all the <code>Streams</code></li> </ul> <p>A full project example ready to use can be found here</p> <p>Note</p> <p>This is just a recommendation, there are many ways to structure your project</p>"},{"location":"large_project_structure/#resources","title":"Resources","text":"<p>This python module contains any global resource that will be used later in the application, for example <code>DB connections</code> or the <code>StreamEngine</code>. Typically we will have the following:</p> <pre><code>from kstreams import backends, create_engine\n\nbackend = backends.Kafka(\n    bootstrap_servers=[\"localhost:9092\"],\n    security_protocol=backends.kafka.SecurityProtocol.PLAINTEXT,\n)\n\nstream_engine = kstreams.create_engine(\n    title=\"my-stream-engine\",\n    backend=backend,\n)\n</code></pre> <p>Then later <code>stream_engine</code> can be reused to start the application.</p>"},{"location":"large_project_structure/#streams","title":"Streams","text":"<p>When starting your project you can have <code>N</code> number of <code>Streams</code> with its <code>handler</code>, let's say in <code>streams.py</code> module. All of the <code>Streams</code> will run next to each other and because they are in the same project it is really easy to share common code. However, this comes with a downside of <code>scalability</code> as it is not possible to take the advantages of <code>kafka</code> and scale up <code>Streams</code> individually. In next versions the <code>StreamEngine</code> will be able to select which <code>Stream/s</code> should run to mitigate this issue. Typically, your <code>streams.py</code> will look like:</p> <pre><code>from kstreams import Stream\n\nfrom .streams_roster import stream_roster, stream_two_roster\n\n\nmy_stream = Stream(\n    \"local--hello-world\",\n    func=stream_roster,\n    config={\n        \"group_id\": \"example-group\",\n    },\n    ...\n)\n\nmy_second_stream = Stream(\n    \"local--hello-world-2\",\n    func=stream_two_roster,\n    config={\n        \"group_id\": \"example-group-2\",\n    },\n    ...\n)\n\n...\n</code></pre> <p>and <code>streams_roster.py</code> contains all the <code>coroutines</code> that will be executed when an event arrives</p> <pre><code>import logging\n\nfrom kstreams import ConsumerRecord, Send, Stream\n\nlogger = logging.getLogger(__name__)\n\n\nasync def stream_roster(cr: ConsumerRecord, send: Send) -&gt; None:\n    logger.info(f\"showing bytes: {cr.value}\")\n    value = f\"Event confirmed. {cr.value}\"\n\n    await send(\n        \"another-topic-to-wink\",\n        value=value.encode(),\n        key=\"1\",\n    )\n\n\nasync def stream_two_roster(cr: ConsumerRecord, send: Send, stream: Stream) -&gt; None:\n    ...\n</code></pre> <p>It is worth to note three things:</p> <ul> <li>We separate the <code>Stream</code> with its <code>coroutine</code> to be able to test the <code>business logic</code> easily</li> <li>If you need to produce events inside a <code>Stream</code> add the <code>send coroutine</code> using dependency-injection</li> <li>We are not using <code>StreamEngine</code> at all to avoid <code>circular import</code> errors</li> </ul>"},{"location":"large_project_structure/#application","title":"Application","text":"<p>The <code>entrypoint</code> is usually in <code>app.py</code>. The module contains the import of <code>stream_engine</code>, it's <code>hooks</code> and the <code>streams</code> to be added to the <code>engine</code>:</p> <pre><code>import aiorun\nimport asyncio\nimport logging\n\nfrom kstreams.stream_utils import StreamErrorPolicy\n\nfrom .resources import stream_engine\nfrom .streams import my_stream, my_second_stream\n\nlogger = logging.getLogger(__name__)\n\n\n# hooks\n@stream_engine.after_startup\nasync def init_events():\n    await stream_engine.send(\"local--hello-world\", value=\"Hi Kstreams!\")\n\n\n# add the stream to the stream_engine\nstream_engine.add_stream(my_stream, error_policy=StreamErrorPolicy.RESTART)\nstream_engine.add_stream(my_second_stream, error_policy=StreamErrorPolicy.STOP_ENGINE)\n\n\nasync def start():\n    await stream_engine.start()\n\n\nasync def stop(loop: asyncio.AbstractEventLoop):\n    await stream_engine.stop()\n\n\ndef main():\n    logging.basicConfig(level=logging.INFO)\n    logger.info(\"Starting application...\")\n    aiorun.run(start(), stop_on_unhandled_errors=True, shutdown_callback=stop)\n</code></pre> <p>To run it we recommend aiorun. It can be also run with <code>asyncio</code> directly but <code>aiorun</code> does all the boilerplate for us.</p>"},{"location":"large_project_structure/#tests","title":"Tests","text":"<p>In this module you test your application using the <code>TestStreamClient</code>, usually provided as a <code>fixture</code> thanks to <code>pytest</code>. The package <code>pytest-asyncio</code> is also needed  to test <code>async</code> code.</p> <pre><code># conftest.py\nimport pytest\n\nfrom kstreams.test_utils import TestStreamClient\n\nfrom my_project.resources import stream_engine\n\n\n@pytest.fixture\ndef stream_client():\n    return TestStreamClient(stream_engine=stream_engine)\n</code></pre> <p>then you can test your streams</p> <pre><code># test_app.py\nimport pytest\n\n\n@pytest.mark.asyncio\nasync def test_my_stream(stream_client):\n    topic = \"local--hello-world\"  # Use the same topic as the stream\n    event = b'{\"message\": \"Hello world!\"}'\n\n    async with stream_client:\n        metadata = await stream_client.send(topic, value=event, key=\"1\")\n        assert metadata.topic == topic\n</code></pre>"},{"location":"metrics/","title":"Metrics","text":"<p>Metrics are generated by prometheus_client. You must be responsable of setting up a <code>webserver</code> to expose the <code>metrics</code>.</p>"},{"location":"metrics/#metrics","title":"Metrics","text":""},{"location":"metrics/#producer","title":"Producer","text":"<ul> <li><code>topic_partition_offsets</code>: <code>Gauge</code> of offsets per <code>topic/partition</code></li> </ul>"},{"location":"metrics/#consumer","title":"Consumer","text":"<ul> <li><code>consumer_committed</code>: <code>Gauge</code> of consumer commited per <code>topic/partition</code> in a <code>consumer group</code></li> <li><code>consumer_position</code>: <code>Gauge</code> of consumer <code>current position</code> per <code>topic/partition</code> in a <code>consumer group</code></li> <li><code>consumer_highwater</code>: <code>Gauge</code> of consumer <code>highwater</code> per <code>topic/partition</code> in a <code>consumer group</code></li> <li><code>consumer_lag</code>: <code>Gauge</code> of current consumer <code>lag</code> per <code>topic/partition</code> in a <code>consumer group</code> calculated with the last commited offset</li> <li><code>position_lag</code>: <code>Gauge</code> of current consumer <code>position_lag</code> per <code>topic/partition</code> in a <code>consumer group</code> calculated using the consumer position</li> </ul>"},{"location":"middleware/","title":"Middleware","text":"<p>Kstreams allows you to include middlewares for adding behavior to streams. </p> <p>A middleware is a <code>callable</code> that works with every <code>ConsumerRecord</code> (CR) before and after it is processed by a specific <code>stream</code>. <code>Middlewares</code> also have access to the <code>stream</code> and <code>send</code> function.</p> <ul> <li>It takes each <code>CR</code> that arrives to a <code>kafka topic</code>.</li> <li>Then it can do something to the <code>CR</code> or run any needed code.</li> <li>Then it passes the <code>CR</code> to be processed by another <code>callable</code> (other middleware or stream).</li> <li>Once the <code>CR</code> is processed by the stream, the chain is \"completed\".</li> <li>If there is code after the <code>self.next_call(cr)</code> then it will be executed.</li> </ul> <p>Kstreams <code>Middleware</code> have the following protocol:</p> <p>               Bases: <code>Protocol</code></p> Source code in <code>kstreams/middleware/middleware.py</code> <pre><code>class MiddlewareProtocol(typing.Protocol):\n    def __init__(\n        self,\n        *,\n        next_call: types.NextMiddlewareCall,\n        send: types.Send,\n        stream: \"Stream\",\n        **kwargs: typing.Any,\n    ) -&gt; None: ...  #  pragma: no cover\n\n    async def __call__(\n        self, cr: types.ConsumerRecord\n    ) -&gt; typing.Any: ...  #  pragma: no cover\n</code></pre> <p>Note</p> <p>The <code>__call__</code> method can return anything so previous calls can use the returned value. Make sure that the line <code>return await self.next_call(cr)</code> is in your method</p> <p>Warning</p> <p>Middlewares only work with the new Dependency Injection approach</p>"},{"location":"middleware/#creating-a-middleware","title":"Creating a middleware","text":"<p>To create a middleware you have to create a class that inherits from <code>BaseMiddleware</code>. Then, the method <code>async def __call__</code> must be defined. Let's consider that we want to save the CR to <code>elastic</code> before it is processed:</p> <pre><code>import typing\n\nfrom kstreams import ConsumerRecord, middleware\n\nasync def save_to_elastic(cr: ConsumerRecord) -&gt; None:\n    ...\n\n\nclass ElasticMiddleware(middleware.BaseMiddleware):\n    async def __call__(self, cr: ConsumerRecord) -&gt; typing.Any:\n        # save to elastic before calling the next\n        await save_to_elastic(cr)\n\n        # the next call could be another middleware\n        return await self.next_call(cr)\n</code></pre> <p>Then, we have to include the middleware:</p> <pre><code>from kstreams import ConsumerRecord, middleware\n\nfrom .engine import stream_engine\n\n\nmiddlewares = [middleware.Middleware(ElasticMiddleware)]\n\n@stream_engine.stream(\"kstreams-topic\", middlewares=middlewares)\n    async def processor(cr: ConsumerRecord):\n        ...\n</code></pre> <p>Note</p> <p>The <code>Middleware</code> concept also applies for <code>async generators</code> (yield from a stream)</p>"},{"location":"middleware/#adding-extra-configuration-to-middlewares","title":"Adding extra configuration to middlewares","text":"<p>If you want to provide extra configuration to middleware you should override the init method with the extra options as <code>keywargs</code> and then call <code>super().__init__(**kwargs)</code></p> <p>Let's consider that we want to send an event to a spcific <code>topic</code> when a <code>ValueError</code> is raised inside a <code>stream</code> (Dead Letter Queue)</p> <pre><code>from kstreams import ConsumerRecord, types, Stream, middleware\n\n\nclass DLQMiddleware(middleware.BaseMiddleware):\n    def __init__(self, *, topic: str, **kwargs) -&gt; None:\n        super().__init__(**kwargs)\n        self.topic = topic\n\n    async def __call__(self, cr: ConsumerRecord):\n        try:\n            return await self.next_call(cr)\n        except ValueError:\n            await self.send(self.topic, key=cr.key, value=cr.value)\n\n\n# Create the middlewares\nmiddlewares = [\n    middleware.Middleware(\n        DLQMiddleware, topic=\"kstreams-dlq-topic\"\n    )\n]\n\n@stream_engine.stream(\"kstreams-topic\", middlewares=middlewares)\n    async def processor(cr: ConsumerRecord):\n        if cr.value == b\"joker\":\n            raise ValueError(\"Joker received...\")\n</code></pre>"},{"location":"middleware/#default-middleware","title":"Default Middleware","text":"<p>This is always the first Middleware in the middleware stack to catch any exception that might occur. Any exception raised when consuming events that is not handled by the end user will be handled by this ExceptionMiddleware executing the policy_error that was stablished.</p> Source code in <code>kstreams/middleware/middleware.py</code> <pre><code>class ExceptionMiddleware(BaseMiddleware):\n    \"\"\"\n    This is always the first Middleware in the middleware stack\n    to catch any exception that might occur. Any exception raised\n    when consuming events that is not handled by the end user\n    will be handled by this ExceptionMiddleware executing the\n    policy_error that was stablished.\n    \"\"\"\n\n    def __init__(\n        self, *, engine: \"StreamEngine\", error_policy: StreamErrorPolicy, **kwargs\n    ) -&gt; None:\n        super().__init__(**kwargs)\n        self.engine = engine\n        self.error_policy = error_policy\n\n    async def __call__(self, cr: types.ConsumerRecord) -&gt; typing.Any:\n        try:\n            return await self.next_call(cr)\n        except Exception as exc:\n            logger.exception(\n                \"Unhandled error occurred while listening to the stream. \"\n                f\"Stream consuming from topics {self.stream.topics} CRASHED!!! \\n\\n \"\n            )\n            if sys.version_info &gt;= (3, 11):\n                exc.add_note(f\"Handler: {self.stream.func}\")\n                exc.add_note(f\"Topics: {self.stream.topics}\")\n\n            await self.cleanup_policy(exc)\n\n    async def cleanup_policy(self, exc: Exception) -&gt; None:\n        \"\"\"\n        Execute clenup policicy according to the Stream configuration.\n\n        At this point we are inside the asyncio.Lock `is_processing`\n        as an event is being processed and an exeption has occured.\n        The Lock must be released to stop the Stream\n        (which must happen for any policy), then before re-raising\n        the exception the Lock must be acquire again to continue the processing\n\n        Exception and policies:\n\n            - STOP: The exception is re-raised as the Stream will be stopped\n              and the end user will deal with it\n\n            - STOP_ENGINE: The exception is re-raised as the Engine will be stopped\n              (all Streams and Producer) and the end user will deal with it\n\n            - RESTART: The exception is not re-raised as the Stream\n              will recover and continue the processing. The logger.exception\n              from __call__ will record that something went wrong\n\n            - STOP_APPLICATION: The exception is not re-raised as the entire\n              application will be stopped. This is only useful when using kstreams\n              with another library like FastAPI. The logger.exception\n              from __call__ will record that something went wrong\n\n        Args:\n            exc (Exception): Any Exception that causes the Stream to crash\n\n        Raises:\n            exc: Exception is the policy is `STOP` or `STOP_ENGINE`\n        \"\"\"\n        self.stream.is_processing.release()\n\n        if self.error_policy == StreamErrorPolicy.RESTART:\n            await self.stream.stop()\n            await self.stream.start()\n        elif self.error_policy == StreamErrorPolicy.STOP:\n            await self.stream.stop()\n            # acquire `is_processing` Lock again to resume processing\n            # and avoid `RuntimeError: Lock is not acquired.`\n            await self.stream.is_processing.acquire()\n            raise exc\n        elif self.error_policy == StreamErrorPolicy.STOP_ENGINE:\n            await self.engine.stop()\n            # acquire `is_processing` Lock again to resume processing\n            # and avoid `RuntimeError: Lock is not acquired.`\n            await self.stream.is_processing.acquire()\n            raise exc\n        else:\n            # STOP_APPLICATION\n            await self.engine.stop()\n            await self.stream.is_processing.acquire()\n            signal.raise_signal(signal.SIGTERM)\n</code></pre>"},{"location":"middleware/#kstreams.middleware.middleware.ExceptionMiddleware.cleanup_policy","title":"<code>cleanup_policy(exc)</code>  <code>async</code>","text":"<p>Execute clenup policicy according to the Stream configuration.</p> <p>At this point we are inside the asyncio.Lock <code>is_processing</code> as an event is being processed and an exeption has occured. The Lock must be released to stop the Stream (which must happen for any policy), then before re-raising the exception the Lock must be acquire again to continue the processing</p> <p>Exception and policies:</p> <pre><code>- STOP: The exception is re-raised as the Stream will be stopped\n  and the end user will deal with it\n\n- STOP_ENGINE: The exception is re-raised as the Engine will be stopped\n  (all Streams and Producer) and the end user will deal with it\n\n- RESTART: The exception is not re-raised as the Stream\n  will recover and continue the processing. The logger.exception\n  from __call__ will record that something went wrong\n\n- STOP_APPLICATION: The exception is not re-raised as the entire\n  application will be stopped. This is only useful when using kstreams\n  with another library like FastAPI. The logger.exception\n  from __call__ will record that something went wrong\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>exc</code> <code>Exception</code> <p>Any Exception that causes the Stream to crash</p> required <p>Raises:</p> Type Description <code>exc</code> <p>Exception is the policy is <code>STOP</code> or <code>STOP_ENGINE</code></p> Source code in <code>kstreams/middleware/middleware.py</code> <pre><code>async def cleanup_policy(self, exc: Exception) -&gt; None:\n    \"\"\"\n    Execute clenup policicy according to the Stream configuration.\n\n    At this point we are inside the asyncio.Lock `is_processing`\n    as an event is being processed and an exeption has occured.\n    The Lock must be released to stop the Stream\n    (which must happen for any policy), then before re-raising\n    the exception the Lock must be acquire again to continue the processing\n\n    Exception and policies:\n\n        - STOP: The exception is re-raised as the Stream will be stopped\n          and the end user will deal with it\n\n        - STOP_ENGINE: The exception is re-raised as the Engine will be stopped\n          (all Streams and Producer) and the end user will deal with it\n\n        - RESTART: The exception is not re-raised as the Stream\n          will recover and continue the processing. The logger.exception\n          from __call__ will record that something went wrong\n\n        - STOP_APPLICATION: The exception is not re-raised as the entire\n          application will be stopped. This is only useful when using kstreams\n          with another library like FastAPI. The logger.exception\n          from __call__ will record that something went wrong\n\n    Args:\n        exc (Exception): Any Exception that causes the Stream to crash\n\n    Raises:\n        exc: Exception is the policy is `STOP` or `STOP_ENGINE`\n    \"\"\"\n    self.stream.is_processing.release()\n\n    if self.error_policy == StreamErrorPolicy.RESTART:\n        await self.stream.stop()\n        await self.stream.start()\n    elif self.error_policy == StreamErrorPolicy.STOP:\n        await self.stream.stop()\n        # acquire `is_processing` Lock again to resume processing\n        # and avoid `RuntimeError: Lock is not acquired.`\n        await self.stream.is_processing.acquire()\n        raise exc\n    elif self.error_policy == StreamErrorPolicy.STOP_ENGINE:\n        await self.engine.stop()\n        # acquire `is_processing` Lock again to resume processing\n        # and avoid `RuntimeError: Lock is not acquired.`\n        await self.stream.is_processing.acquire()\n        raise exc\n    else:\n        # STOP_APPLICATION\n        await self.engine.stop()\n        await self.stream.is_processing.acquire()\n        signal.raise_signal(signal.SIGTERM)\n</code></pre>"},{"location":"middleware/#middleware-chain","title":"Middleware chain","text":"<p>It is possible to add as many middlewares as you want to split and reuse business logic, however the downside is extra complexity and the code might become slower. The middleware order is important as they are evaluated in the order that were placed in the stream.</p> <p>In the following example we are adding three middelwares in the following order: <code>DLQMiddleware</code>, <code>ElasticMiddleware</code>, and <code>S3Middleware</code>. The code chain execution will be:</p> <pre><code>sequenceDiagram\n    autonumber\n    ExceptionMiddleware-&gt;&gt;DLQMiddleware: \n    Note left of ExceptionMiddleware: Event received\n    alt No Processing Error\n    DLQMiddleware-&gt;&gt;ElasticMiddleware: \n    Note right of ElasticMiddleware: Store CR on Elastic\n    ElasticMiddleware-&gt;&gt;S3Middleware: \n    Note right of S3Middleware: Store CR on S3\n    S3Middleware-&gt;&gt;Stream: \n    Note right of Stream: CR processed\n    Stream--&gt;&gt;S3Middleware: \n    S3Middleware--&gt;&gt;ElasticMiddleware: \n    ElasticMiddleware--&gt;&gt;DLQMiddleware: \n    DLQMiddleware--&gt;&gt;ExceptionMiddleware: \n    end</code></pre> Multiple middlewares example<pre><code>from kstreams import ConsumerRecord, Stream, middleware\n\n\nclass DLQMiddleware(middleware.BaseMiddleware):\n    async def __call__(self, cr: ConsumerRecord):\n        try:\n            return await self.next_call(cr)\n        except ValueError:\n            await dlq(cr.value)\n\n\nclass ElasticMiddleware(middleware.BaseMiddleware):\n    async def __call__(self, cr: ConsumerRecord):\n        await save_to_elastic(cr.value)\n        return await self.next_call(cr)\n\n\nclass S3Middleware(middleware.BaseMiddleware):\n    async def __call__(self, cr: ConsumerRecord):\n        await backup_to_s3(cr.value)\n        return await self.next_call(cr)\n\n\nmiddlewares = [\n    middleware.Middleware(DLQMiddleware),\n    middleware.Middleware(ElasticMiddleware),\n    middleware.Middleware(S3Middleware),\n]\n\n\n@stream_engine.stream(\"kstreams-topic\", middlewares=middlewares)\nasync def processor(cr: ConsumerRecord):\n    if cr.value == event_2:\n        raise ValueError(\"Error from stream...\")\n    await save_to_db(cr.value)\n</code></pre> <p>Note</p> <p>In the example we can see that always the <code>cr</code> will be save into <code>elastic</code> and <code>s3</code> regardless an error</p>"},{"location":"middleware/#executing-code-after-the-cr-was-processed","title":"Executing Code after the CR was processed","text":"<p>As mentioned in the introduction, it is possible to execute code after the <code>CR</code> is handled. To do this, we need to place code after <code>next_call</code> is called:</p> Execute code after CR is handled<pre><code>from kstreams import ConsumerRecord, Stream, middleware\n\n\nclass DLQMiddleware(middleware.BaseMiddleware):\n    async def __call__(self, cr: ConsumerRecord):\n        try:\n            return await self.next_call(cr)\n        except ValueError:\n            await dlq(cr.value)\n\n\nclass ElasticMiddleware(middleware.BaseMiddleware):\n    async def __call__(self, cr: ConsumerRecord):\n        return await self.next_call(cr)\n        # This will be called after the whole chain has finished\n        await save_to_elastic(cr.value)\n\n\nmiddlewares = [\n    middleware.Middleware(DLQMiddleware),\n    middleware.Middleware(ElasticMiddleware),\n]\n\n\n@stream_engine.stream(\"kstreams-topic\", middlewares=middlewares)\nasync def processor(cr: ConsumerRecord):\n    if cr.value == event_2:\n        raise ValueError(\"Error from stream...\")\n    await save_to_db(cr.value)\n</code></pre> <p>Note</p> <p>In the example we can see that only if there is not an <code>error</code> the event is saved to <code>elastic</code></p>"},{"location":"middleware/#deserialization","title":"Deserialization","text":"<p>To <code>deserialize</code> bytes into a different structure like <code>dict</code> middlewares are the preferred way to it. Examples:</p> Source code in <code>examples/dataclasses-avroschema-example/dataclasses_avroschema_example/middlewares.py</code> <pre><code>class AvroDeserializerMiddleware(middleware.BaseMiddleware):\n    def __init__(self, *, model: AvroModel, **kwargs) -&gt; None:\n        super().__init__(**kwargs)\n        self.model = model\n\n    async def __call__(self, cr: ConsumerRecord):\n        \"\"\"\n        Deserialize a payload to an AvroModel\n        \"\"\"\n        if cr.value is not None:\n            data = self.model.deserialize(cr.value)\n            cr.value = data\n        return await self.next_call(cr)\n</code></pre> Source code in <code>examples/confluent-example/confluent_example/middlewares.py</code> <pre><code>class ConfluentMiddlewareDeserializer(\n    middleware.BaseMiddleware, AsyncAvroMessageSerializer\n):\n    def __init__(\n        self,\n        *,\n        schema_registry_client: AsyncSchemaRegistryClient,\n        reader_schema: Optional[schema.AvroSchema] = None,\n        return_record_name: bool = False,\n        **kwargs,\n    ):\n        super().__init__(**kwargs)\n        self.schemaregistry_client = schema_registry_client\n        self.reader_schema = reader_schema\n        self.return_record_name = return_record_name\n        self.id_to_decoder_func: Dict = {}\n        self.id_to_writers: Dict = {}\n\n    async def __call__(self, cr: ConsumerRecord):\n        \"\"\"\n        Deserialize the event to a dict\n        \"\"\"\n        data = await self.decode_message(cr.value)\n        cr.value = data\n        return await self.next_call(cr)\n</code></pre>"},{"location":"monitoring/","title":"Monitoring","text":"<p>This page discusses how to monitor your application using the Kafka metrics that are accessible in Prometheus.</p> <p>Before we begin, it's crucial to note that Kafka itself makes a number of useful metrics available, including the cluster, broker, and clients (producer and consumers).</p> <p>This means that we can quickly add some graphs to our dashboards by utilizing the already-exposed metrics.</p> <p><code>Kstreams</code> includes a collection of metrics. See Metrics Docs for more information.</p>"},{"location":"monitoring/#kstreams.PrometheusMonitor","title":"<code>kstreams.PrometheusMonitor</code>","text":"<p>Metrics monitor to keep track of Producers and Consumers.</p> <p>Attributes:     metrics_scrape_time float: Amount of seconds that the monitor         will wait until next scrape iteration</p> Source code in <code>kstreams/prometheus/monitor.py</code> <pre><code>class PrometheusMonitor:\n    \"\"\"\n    Metrics monitor to keep track of Producers and Consumers.\n\n     Attributes:\n        metrics_scrape_time float: Amount of seconds that the monitor\n            will wait until next scrape iteration\n    \"\"\"\n\n    # Producer metrics\n    MET_OFFSETS = Gauge(\n        \"topic_partition_offsets\", \"help producer offsets\", [\"topic\", \"partition\"]\n    )\n\n    # Consumer metrics\n    MET_COMMITTED = Gauge(\n        \"consumer_committed\",\n        \"help consumer committed\",\n        [\"topic\", \"partition\", \"consumer_group\"],\n    )\n    MET_POSITION = Gauge(\n        \"consumer_position\",\n        \"help consumer position\",\n        [\"topic\", \"partition\", \"consumer_group\"],\n    )\n    MET_HIGHWATER = Gauge(\n        \"consumer_highwater\",\n        \"help consumer highwater\",\n        [\"topic\", \"partition\", \"consumer_group\"],\n    )\n    MET_LAG = Gauge(\n        \"consumer_lag\",\n        \"help consumer lag calculated using the last commited offset\",\n        [\"topic\", \"partition\", \"consumer_group\"],\n    )\n    MET_POSITION_LAG = Gauge(\n        \"position_lag\",\n        \"help consumer position lag calculated using the consumer position\",\n        [\"topic\", \"partition\", \"consumer_group\"],\n    )\n\n    def __init__(self, metrics_scrape_time: float = 3):\n        self.metrics_scrape_time = metrics_scrape_time\n        self.running = False\n        self._producer = None\n        self._streams: List[Stream] = []\n\n    async def start(self) -&gt; None:\n        self.running = True\n        logger.info(\"Starting Prometheus Monitoring started...\")\n        await self._metrics_task()\n\n    async def stop(self) -&gt; None:\n        self.running = False\n        self._clean_consumer_metrics()\n        logger.info(\"Prometheus Monitoring stopped...\")\n\n    def add_topic_partition_offset(\n        self, topic: str, partition: int, offset: int\n    ) -&gt; None:\n        self.MET_OFFSETS.labels(topic=topic, partition=partition).set(offset)\n\n    def _add_consumer_metrics(self, metrics_dict: MetricsType):\n        for topic_partition, partitions_metadata in metrics_dict.items():\n            group_id = partitions_metadata[\"group_id\"]\n            position = partitions_metadata[\"position\"]\n            committed = partitions_metadata[\"committed\"]\n            highwater = partitions_metadata[\"highwater\"]\n            lag = partitions_metadata[\"lag\"]\n            position_lag = partitions_metadata[\"position_lag\"]\n\n            self.MET_COMMITTED.labels(\n                topic=topic_partition.topic,\n                partition=topic_partition.partition,\n                consumer_group=group_id,\n            ).set(committed or 0)\n            self.MET_POSITION.labels(\n                topic=topic_partition.topic,\n                partition=topic_partition.partition,\n                consumer_group=group_id,\n            ).set(position or -1)\n            self.MET_HIGHWATER.labels(\n                topic=topic_partition.topic,\n                partition=topic_partition.partition,\n                consumer_group=group_id,\n            ).set(highwater or 0)\n            self.MET_LAG.labels(\n                topic=topic_partition.topic,\n                partition=topic_partition.partition,\n                consumer_group=group_id,\n            ).set(lag or 0)\n            self.MET_POSITION_LAG.labels(\n                topic=topic_partition.topic,\n                partition=topic_partition.partition,\n                consumer_group=group_id,\n            ).set(position_lag or 0)\n\n    def _clean_consumer_metrics(self) -&gt; None:\n        \"\"\"\n        This method should be called when a rebalance takes place\n        to clean all consumers metrics. When the rebalance finishes\n        new metrics will be generated per consumer based on the\n        consumer assigments\n        \"\"\"\n        self.MET_LAG.clear()\n        self.MET_POSITION_LAG.clear()\n        self.MET_COMMITTED.clear()\n        self.MET_POSITION.clear()\n        self.MET_HIGHWATER.clear()\n\n    def clean_stream_consumer_metrics(self, consumer: Consumer) -&gt; None:\n        topic_partitions = consumer.assignment()\n        group_id = consumer._group_id\n        for topic_partition in topic_partitions:\n            topic = topic_partition.topic\n            partition = topic_partition.partition\n\n            metrics_found = False\n            for sample in list(self.MET_LAG.collect())[0].samples:\n                if {\n                    \"topic\": topic,\n                    \"partition\": str(partition),\n                    \"consumer_group\": group_id,\n                } == sample.labels:\n                    metrics_found = True\n\n            if metrics_found:\n                self.MET_LAG.remove(topic, partition, group_id)\n                self.MET_POSITION_LAG.remove(topic, partition, group_id)\n                self.MET_COMMITTED.remove(topic, partition, group_id)\n                self.MET_POSITION.remove(topic, partition, group_id)\n                self.MET_HIGHWATER.remove(topic, partition, group_id)\n            else:\n                logger.debug(\n                    \"Metrics for consumer with group-id: \"\n                    f\"{consumer._group_id} not found\"\n                )\n\n    def add_producer(self, producer):\n        self._producer = producer\n\n    def add_streams(self, streams):\n        self._streams = streams\n\n    async def generate_consumer_metrics(self, consumer: Consumer):\n        \"\"\"\n        Generate Consumer Metrics for Prometheus\n\n        Format:\n            {\n                \"topic-1\": {\n                    \"1\": (\n                        [topic-1, partition-number, 'group-id-1'],\n                        committed, position, highwater, lag, position_lag\n                    )\n                    \"2\": (\n                        [topic-1, partition-number, 'group-id-1'],\n                        committed, position, highwater, lag, position_lag\n                    )\n                },\n                ...\n                \"topic-n\": {\n                    \"1\": (\n                        [topic-n, partition-number, 'group-id-n'],\n                        committed, position, highwater, lag, position_lag\n                    )\n                    \"2\": (\n                        [topic-n, partition-number, 'group-id-n'],\n                        committed, position, highwater, lag, position_lag\n                    )\n                }\n            }\n        \"\"\"\n        metrics: MetricsType = DefaultDict(dict)\n\n        topic_partitions = consumer.assignment()\n\n        for topic_partition in topic_partitions:\n            committed = await consumer.committed(topic_partition) or 0\n            position = await consumer.position(topic_partition)\n            highwater = consumer.highwater(topic_partition)\n\n            lag = position_lag = None\n            if highwater:\n                lag = highwater - committed\n                position_lag = highwater - position\n\n            metrics[topic_partition] = {\n                \"group_id\": consumer._group_id,\n                \"committed\": committed,\n                \"position\": position,\n                \"highwater\": highwater,\n                \"lag\": lag,\n                \"position_lag\": position_lag,\n            }\n\n        self._add_consumer_metrics(metrics)\n\n    async def _metrics_task(self) -&gt; None:\n        \"\"\"\n        Task that runs in `backgroud` to generate\n        consumer metrics.\n\n        When self.running is False the task will finish and it\n        will be safe to stop consumers and producers.\n        \"\"\"\n        while self.running:\n            await asyncio.sleep(self.metrics_scrape_time)\n            for stream in self._streams:\n                if stream.consumer is not None:\n                    try:\n                        await self.generate_consumer_metrics(stream.consumer)\n                    except RuntimeError:\n                        logger.debug(\n                            f\"Metrics for stream {stream.name} can not be generated \"\n                            \"probably because it has been removed\"\n                        )\n</code></pre>"},{"location":"monitoring/#kstreams.PrometheusMonitor.generate_consumer_metrics","title":"<code>generate_consumer_metrics(consumer)</code>  <code>async</code>","text":"<p>Generate Consumer Metrics for Prometheus</p> Format <p>{     \"topic-1\": {         \"1\": (             [topic-1, partition-number, 'group-id-1'],             committed, position, highwater, lag, position_lag         )         \"2\": (             [topic-1, partition-number, 'group-id-1'],             committed, position, highwater, lag, position_lag         )     },     ...     \"topic-n\": {         \"1\": (             [topic-n, partition-number, 'group-id-n'],             committed, position, highwater, lag, position_lag         )         \"2\": (             [topic-n, partition-number, 'group-id-n'],             committed, position, highwater, lag, position_lag         )     } }</p> Source code in <code>kstreams/prometheus/monitor.py</code> <pre><code>async def generate_consumer_metrics(self, consumer: Consumer):\n    \"\"\"\n    Generate Consumer Metrics for Prometheus\n\n    Format:\n        {\n            \"topic-1\": {\n                \"1\": (\n                    [topic-1, partition-number, 'group-id-1'],\n                    committed, position, highwater, lag, position_lag\n                )\n                \"2\": (\n                    [topic-1, partition-number, 'group-id-1'],\n                    committed, position, highwater, lag, position_lag\n                )\n            },\n            ...\n            \"topic-n\": {\n                \"1\": (\n                    [topic-n, partition-number, 'group-id-n'],\n                    committed, position, highwater, lag, position_lag\n                )\n                \"2\": (\n                    [topic-n, partition-number, 'group-id-n'],\n                    committed, position, highwater, lag, position_lag\n                )\n            }\n        }\n    \"\"\"\n    metrics: MetricsType = DefaultDict(dict)\n\n    topic_partitions = consumer.assignment()\n\n    for topic_partition in topic_partitions:\n        committed = await consumer.committed(topic_partition) or 0\n        position = await consumer.position(topic_partition)\n        highwater = consumer.highwater(topic_partition)\n\n        lag = position_lag = None\n        if highwater:\n            lag = highwater - committed\n            position_lag = highwater - position\n\n        metrics[topic_partition] = {\n            \"group_id\": consumer._group_id,\n            \"committed\": committed,\n            \"position\": position,\n            \"highwater\": highwater,\n            \"lag\": lag,\n            \"position_lag\": position_lag,\n        }\n\n    self._add_consumer_metrics(metrics)\n</code></pre>"},{"location":"monitoring/#consumer-metrics","title":"Consumer Metrics","text":"<p>We advise including the <code>consumer_lag</code> in your application's grafana dashboard.</p> <p><code>consumer_lag</code> will show you how far your consumers are lagging behind the published events in the topic they are reading. For instance, if you have a single consumer and another team is producing millions of events, the consumer might not be able to handle them in time (where in time is defined by you, like: \"in an hour of receiving a message it should be consumed\").</p> <p>Based on the lag, you will have to develop your own alerts. An alert should be pushed to Slack if you experience more than a particular amount of lag.</p> <p>You will require your <code>consumer_group</code> name in order to design a basic dashboard using the <code>consumer_lag</code>.</p> <p>We could add a query in Grafana like this:</p> <pre><code>sum(kafka_consumer_group_ConsumerLagMetrics_Value{topic =~ \"YOUR_OWN_TOPIC_NAME\", groupId =~\"YOUR_CONSUMER_GROUP\", name=\"SumOffsetLag\"}) by (topic)\n</code></pre> <p>Remember to replace <code>YOUR_CONSUMER_GROUP</code> and <code>YOUR_OWN_TOPIC_NAME</code> with your <code>consumer_group</code> and <code>topic</code> respectively \u2b06\ufe0f</p>"},{"location":"monitoring/#producer-metrics","title":"Producer Metrics","text":"<p>If you have producers, it's a good idea to monitor the growth of Log End Offset (LEO).</p> <p>The increase in LEO indicates the number of events produced in the last <code>N</code> minutes.</p> <p>If you know that events should occur every <code>N</code> minutes, you can trigger alerts if no events occur because this metric will tell you whether or not events occurred.</p> <p>We could add a query in Grafana like this, where <code>N</code> is <code>10m</code>:</p> <pre><code>sum(max(increase(kafka_log_Log_Value{name=\"LogEndOffset\", topic =~ \"TOPIC_NAME\"}[10m])) by (partition, topic)) by (topic)\n</code></pre> <p>Remember to modify <code>TOPIC_NAME</code> to the name of the topic you want to track \u2b06\ufe0f</p>"},{"location":"monitoring/#custom-business-metrics","title":"Custom Business Metrics","text":"<p>One benefit of Prometheus is that you can design your own custom metrics.</p> <p>Scenario: Consider an event-based ordering system. Assume you receive X orders daily and ship Y orders daily. Most likely, you will create a dashboard using this data.</p> <p>Fortunately, we can create our own custom metrics by using the Prometheus Python client.</p> <p>You can construct a variety of metrics with prometheus:</p> <ul> <li><code>Gauge</code></li> <li><code>Counter</code></li> <li><code>Histogram</code></li> <li><code>Summary</code></li> </ul> <p>You can read more about it in prometheus metric_types website.</p> <p>In our scenario, we will most likely want a <code>Counter</code> for orders received and a <code>Counter</code> for orders shipped.</p> <pre><code>from prometheus_client import Counter\nfrom kstreams import PrometheusMonitor\n\nclass MyAppPrometheusMonitor(PrometheusMonitor):\n    def __init__(self):\n        super().__init__() # initialize kstream metrics\n        self.orders_received = Counter('orders_received', 'Amount of orders received')\n        self.orders_shipped = Counter('orders_shipped', 'Amount of orders shipped')\n\n    def increase_received(self, amount: int = 1):\n        self.orders_received.inc(amount)\n\n    def increase_shipped(self, amount: int = 1):\n        self.orders_shipped.inc(amount)\n</code></pre> <p>In our kstreams app, we can:</p> <pre><code>stream_engine = create_engine(title=\"my-engine\", monitor=MyAppPrometheusMonitor())\n\n@stream_engine.stream(\"my-special-orders\")\nasync def consume_orders_received(cr: ConsumerRecord):\n    if cr.value.status == \"NEW\":\n        stream_engine.monitor.increase_received()\n    elif cr.value.status == \"SHIPPED\":\n        stream_engine.monitor.increase_shipped()\n</code></pre> <p>Your app's prometheus would display this data, which you might utilize to build a stylish \u2728dashboard\u2728 interface.</p> <p>For further details, see the Prometheus python client documentation.</p>"},{"location":"serialization/","title":"Serialization","text":"<p>Kafka's job is to move bytes from producer to consumers, through a topic.</p> <p>By default, this is what kstream does.</p> <pre><code>from kstreams import Stream\n\nfrom .streams_roster import stream_roster\n\nmy_stream = Stream(\n    \"local--hello-world\",\n    func=stream_roster,\n    config={\n        \"group_id\": \"example-group\",\n    },\n)\n</code></pre> <p>As you can see the ConsumerRecord's <code>value</code> is bytes.</p> <p>In order to keep your code pythonic, we provide a mechanism to serialize/deserialize these bytes, into something more useful. This way, you can work with other data structures, like a <code>dict</code> or <code>dataclasses</code>.</p> <p>Sometimes it is easier to work with a <code>dict</code> in your app, give it to <code>kstreams</code>, and let it transform it into <code>bytes</code> to be delivered to Kafka. For this situation, you need to implement <code>kstreams.serializers.Serializer</code>.</p> <p>The other situation is when you consume from Kafka (or other brokers). Instead of dealing with <code>bytes</code>, you may want to receive in your function the <code>dict</code> ready to be used. For those cases, we need to use middleware. For example, we can implement a <code>JsonMiddleware</code>:</p> <pre><code>from kstreams import middleware, ConsumerRecord\n\n\nclass JsonDeserializerMiddleware(middleware.BaseMiddleware):\n    async def __call__(self, cr: ConsumerRecord):\n        if cr.value is not None:\n            data = json.loads(cr.value.decode())\n            cr.value = data\n        return await self.next_call(cr)\n</code></pre> <p>It is also possble to use <code>kstreams.serializers.Deserializer</code> for deserialization, but this will be deprecated</p> <p>Warning</p> <p><code>kstreams.serializers.Deserializer</code> will be deprecated, use middlewares instead</p>"},{"location":"serialization/#kstreams.serializers.Serializer","title":"<code>kstreams.serializers.Serializer</code>","text":"<p>Protocol used by the Stream to serialize.</p> <p>A Protocol is similar to other languages features like an interface or a trait.</p> <p>End users should provide their own class implementing this protocol.</p> <p>For example a <code>JsonSerializer</code></p> <pre><code>from typing import Optional, Dict\nimport json\n\nclass JsonSerializer:\n\n    async def serialize(\n        self,\n        payload: dict,\n        headers: Optional[Dict[str, str]] = None,\n        serializer_kwargs: Optional[Dict] = None,\n    ) -&gt; bytes:\n        \"\"\"Return UTF-8 encoded payload\"\"\"\n        value = json.dumps(payload)\n        return value.encode()\n</code></pre> <p>Notice that you don't need to inherit anything, you just have to comply with the Protocol.</p> Source code in <code>kstreams/serializers.py</code> <pre><code>class Serializer(Protocol):\n    \"\"\"Protocol used by the Stream to serialize.\n\n    A Protocol is similar to other languages features like an interface or a trait.\n\n    End users should provide their own class implementing this protocol.\n\n    For example a `JsonSerializer`\n\n    ```python\n    from typing import Optional, Dict\n    import json\n\n    class JsonSerializer:\n\n        async def serialize(\n            self,\n            payload: dict,\n            headers: Optional[Dict[str, str]] = None,\n            serializer_kwargs: Optional[Dict] = None,\n        ) -&gt; bytes:\n            \\\"\"\"Return UTF-8 encoded payload\\\"\"\"\n            value = json.dumps(payload)\n            return value.encode()\n    ```\n\n    Notice that you don't need to inherit anything,\n    you just have to comply with the Protocol.\n    \"\"\"\n\n    async def serialize(\n        self,\n        payload: Any,\n        headers: Optional[Headers] = None,\n        serializer_kwargs: Optional[Dict] = None,\n    ) -&gt; bytes:\n        \"\"\"\n        Implement this method to deserialize the data received from the topic.\n        \"\"\"\n        ...\n</code></pre>"},{"location":"serialization/#kstreams.serializers.Serializer.serialize","title":"<code>serialize(payload, headers=None, serializer_kwargs=None)</code>  <code>async</code>","text":"<p>Implement this method to deserialize the data received from the topic.</p> Source code in <code>kstreams/serializers.py</code> <pre><code>async def serialize(\n    self,\n    payload: Any,\n    headers: Optional[Headers] = None,\n    serializer_kwargs: Optional[Dict] = None,\n) -&gt; bytes:\n    \"\"\"\n    Implement this method to deserialize the data received from the topic.\n    \"\"\"\n    ...\n</code></pre>"},{"location":"serialization/#kstreams.serializers.Deserializer","title":"<code>kstreams.serializers.Deserializer</code>","text":"<p>Protocol used by the Stream to deserialize.</p> <p>A Protocol is similar to other languages features like an interface or a trait.</p> <p>End users should provide their own class implementing this protocol.</p> <p>For example a <code>JsonDeserializer</code></p> <pre><code>import json\nfrom kstreams import ConsumerRecord\n\nclass JsonDeserializer:\n\n    async def deserialize(\n        self, consumer_record: ConsumerRecord, **kwargs\n    ) -&gt; ConsumerRecord:\n        data = json.loads(consumer_record.value.decode())\n        consumer_record.value = data\n        return consumer_record\n</code></pre> Source code in <code>kstreams/serializers.py</code> <pre><code>class Deserializer(Protocol):\n    \"\"\"Protocol used by the Stream to deserialize.\n\n    A Protocol is similar to other languages features like an interface or a trait.\n\n    End users should provide their own class implementing this protocol.\n\n    For example a `JsonDeserializer`\n\n    ```python\n    import json\n    from kstreams import ConsumerRecord\n\n    class JsonDeserializer:\n\n        async def deserialize(\n            self, consumer_record: ConsumerRecord, **kwargs\n        ) -&gt; ConsumerRecord:\n            data = json.loads(consumer_record.value.decode())\n            consumer_record.value = data\n            return consumer_record\n    ```\n    \"\"\"\n\n    async def deserialize(\n        self, consumer_record: ConsumerRecord, **kwargs\n    ) -&gt; ConsumerRecord:\n        \"\"\"\n        Implement this method to deserialize the data received from the topic.\n        \"\"\"\n        ...\n</code></pre>"},{"location":"serialization/#kstreams.serializers.Deserializer.deserialize","title":"<code>deserialize(consumer_record, **kwargs)</code>  <code>async</code>","text":"<p>Implement this method to deserialize the data received from the topic.</p> Source code in <code>kstreams/serializers.py</code> <pre><code>async def deserialize(\n    self, consumer_record: ConsumerRecord, **kwargs\n) -&gt; ConsumerRecord:\n    \"\"\"\n    Implement this method to deserialize the data received from the topic.\n    \"\"\"\n    ...\n</code></pre>"},{"location":"serialization/#usage","title":"Usage","text":"<p>Once you have written your serializer or deserializer, there are 2 ways of using them, in a generic fashion or per stream.</p>"},{"location":"serialization/#initialize-the-engine-with-your-serializers","title":"Initialize the engine with your serializers","text":"<p>By doing this all the streams will use these serializers by default.</p> <pre><code>stream_engine = create_engine(\n    title=\"my-stream-engine\",\n    serializer=JsonSerializer(),\n)\n</code></pre>"},{"location":"serialization/#initilize-streams-with-a-deserializer-and-produce-events-with-serializers","title":"Initilize <code>streams</code> with a <code>deserializer</code> and produce events with <code>serializers</code>","text":"<pre><code>from kstreams import middleware, ConsumerRecord\n\n\n@stream_engine.stream(topic, middlewares=[middleware.Middleware(JsonDeserializerMiddleware)])\nasync def hello_stream(cr: ConsumerRecord):\n    # remember event.value is now a dict\n    print(cr.value[\"message\"])\n    save_to_db(cr)\n</code></pre> <pre><code>await stream_engine.send(\n    topic,\n    value={\"message\": \"test\"}\n    headers={\"content-type\": consts.APPLICATION_JSON,}\n    key=\"1\",\n)\n</code></pre>"},{"location":"stream/","title":"Streams","text":"<p>A <code>Stream</code> in <code>kstreams</code> is an extension of AIOKafkaConsumer</p> <p>Consuming can be done using <code>kstreams.Stream</code>. You only need to decorate a <code>coroutine</code> with <code>@stream_engine.streams</code>. The decorator has the same  aiokafka consumer API at initialization, in other words they accept the same <code>args</code> and <code>kwargs</code> that the <code>aiokafka consumer</code> accepts.</p>"},{"location":"stream/#kstreams.streams.Stream","title":"<code>kstreams.streams.Stream</code>","text":"<p>Attributes:</p> Name Type Description <code>name</code> <code>Optional[str]</code> <p>Stream name. Default is a generated uuid4</p> <code>topics</code> <code>List[str]</code> <p>List of topics to consume</p> <code>subscribe_by_pattern</code> <code>bool</code> <p>Whether subscribe to topics by pattern</p> <code>backend</code> <code>Kafka</code> <p>backend kstreams.backends.kafka.Kafka: Backend to connect. Default <code>Kafka</code></p> <code>func</code> <code>Callable[[Stream], Awaitable[Any]]</code> <p>Coroutine fucntion or generator to be called when an event arrives</p> <code>config</code> <code>Dict[str, Any]</code> <p>Stream configuration. Here all the properties can be passed in the dictionary</p> <code>deserializer</code> <code>Deserializer</code> <p>Deserializer to be used when an event is consumed</p> <code>initial_offsets</code> <code>List[TopicPartitionOffset]</code> <p>List of TopicPartitionOffset that will <code>seek</code> the initial offsets to</p> <code>rebalance_listener</code> <code>RebalanceListener</code> <p>Listener callbacks when partition are assigned or revoked</p>"},{"location":"stream/#kstreams.streams.Stream--subscribe-to-a-topic","title":"Subscribe to a topic","text":"<p>Example</p> <pre><code>import aiorun\nfrom kstreams import create_engine, ConsumerRecord\n\nstream_engine = create_engine(title=\"my-stream-engine\")\n\n\n@stream_engine.stream(\"local--kstreams\", group_id=\"my-group-id\")\nasync def stream(cr: ConsumerRecord) -&gt; None:\n    print(f\"Event consumed: headers: {cr.headers}, payload: {cr.value}\")\n\n\nasync def start():\n    await stream_engine.start()\n\n\nasync def shutdown(loop):\n    await stream_engine.stop()\n\n\nif __name__ == \"__main__\":\n    aiorun.run(\n        start(),\n        stop_on_unhandled_errors=True,\n        shutdown_callback=shutdown\n    )\n</code></pre>"},{"location":"stream/#kstreams.streams.Stream--subscribe-to-multiple-topics","title":"Subscribe to multiple topics","text":"<p>Consuming from multiple topics using one <code>stream</code> is possible. A <code>List[str]</code> of topics must be provided.</p> <p>Example</p> <pre><code>import aiorun\nfrom kstreams import create_engine, ConsumerRecord\n\nstream_engine = create_engine(title=\"my-stream-engine\")\n\n\n@stream_engine.stream(\n    [\"local--kstreams\", \"local--hello-world\"],\n    group_id=\"my-group-id\",\n)\nasync def consume(cr: ConsumerRecord) -&gt; None:\n    print(f\"Event from {cr.topic}: headers: {cr.headers}, payload: {cr.value}\")\n</code></pre>"},{"location":"stream/#kstreams.streams.Stream--subscribe-to-topics-by-pattern","title":"Subscribe to topics by pattern","text":"<p>In the following example the stream will subscribe to any topic that matches the regex <code>^dev--customer-.*</code>, for example <code>dev--customer-invoice</code> or <code>dev--customer-profile</code>. The <code>subscribe_by_pattern</code> flag must be set to <code>True</code>.</p> <p>Example</p> <pre><code>import aiorun\nfrom kstreams import create_engine, ConsumerRecord\n\nstream_engine = create_engine(title=\"my-stream-engine\")\n\n\n@stream_engine.stream(\n    topics=\"^dev--customer-.*$\",\n    subscribe_by_pattern=True,\n    group_id=\"my-group-id\",\n)\nasync def stream(cr: ConsumerRecord) -&gt; None:\n    if cr.topic == \"dev--customer-invoice\":\n        print(\"Event from topic dev--customer-invoice\"\n    elif cr.topic == \"dev--customer-profile\":\n        print(\"Event from topic dev--customer-profile\"\n    else:\n        raise ValueError(f\"Invalid topic {cr.topic}\")\n\n\nasync def start():\n    await stream_engine.start()\n\nasync def shutdown(loop):\n    await stream_engine.stop()\n\n\nif __name__ == \"__main__\":\n    aiorun.run(\n        start(),\n        stop_on_unhandled_errors=True,\n        shutdown_callback=shutdown\n    )\n</code></pre>"},{"location":"stream/#dependency-injection","title":"Dependency Injection","text":"<p>The old way to itereate over a stream is with the <code>async for _ in stream</code> loop. The iterable approach works but in most cases end users are interested only in the <code>ConsumerRecord</code>, for this reason it is possible to remove the <code>async for loop</code> using proper <code>typing hints</code>. The available <code>typing hints</code> are:</p> <ul> <li><code>ConsumerRecord</code>: The <code>aiokafka</code> ConsumerRecord that will be received every time that a new event is in the <code>Stream</code></li> <li><code>Stream</code>: The <code>Stream</code> object that is subscribed to the topic/s. Useful when <code>manual</code> commit is enabled or when other <code>Stream</code> operations are needed</li> <li><code>Send</code>: Coroutine to produce events. The same as <code>stream_engine.send(...)</code></li> </ul> <p>if you use <code>type hints</code> then every time that a new event is in the stream the <code>coroutine</code> function defined by the end user will ba <code>awaited</code> with the specified types</p> ConsumerRecordConsumerRecord and StreamConsumerRecord, Stream and SendOld fashion <pre><code>@stream_engine.stream(topic)\nasync def my_stream(cr: ConsumerRecord):\n    print(cr.value)\n</code></pre> <pre><code>@stream_engine.stream(topic, enable_auto_commit=False)\nasync def my_stream(cr: ConsumerRecord, stream: Stream):\n    print(cr.value)\n    await stream.commit()\n</code></pre> <pre><code>@stream_engine.stream(topic, enable_auto_commit=False)\nasync def my_stream(cr: ConsumerRecord, stream: Stream, send: Send):\n    print(cr.value)\n    await stream.commit()\n    await send(\"sink-to-elastic-topic\", value=cr.value)\n</code></pre> <pre><code>@stream_engine.stream(topic)\nasync def consume(stream):  # you can specify the type but it will be the same result\n    async for cr in stream:\n        print(cr.value)\n        # you can do something with the stream as well!!\n</code></pre> <p>Note</p> <p>The type arguments can be in <code>any</code> order. This might change in the future.</p> <p>Warning</p> <p>It is still possible to use the <code>async for in</code> loop, but it might be removed in the future. Migrate to the typing approach</p>"},{"location":"stream/#creating-a-stream-instance","title":"Creating a Stream instance","text":"<p>If for any reason you need to create <code>Streams</code> instances directly, you can do it without using the decorator <code>stream_engine.stream</code>.</p> Stream instance<pre><code>import aiorun\nfrom kstreams import create_engine, Stream, ConsumerRecord\n\nstream_engine = create_engine(title=\"my-stream-engine\")\n\n\nclass MyDeserializer:\n\n    async def deserialize(self, consumer_record: ConsumerRecord, **kwargs):\n        return consumer_record.value.decode()\n\n\nasync def stream(cr: ConsumerRecord) -&gt; None:\n    print(f\"Event consumed: headers: {cr.headers}, payload: {cr.value}\")\n\n\nstream = Stream(\n    \"local--kstreams\",\n    name=\"my-stream\"\n    func=stream,  # coroutine or async generator\n    deserializer=MyDeserializer(),\n)\n# add the stream to the engine\nstream_engine.add_stream(stream)\n\n\nasync def start():\n    await stream_engine.start()\n    await produce()\n\n\nasync def shutdown(loop):\n    await stream_engine.stop()\n\n\nif __name__ == \"__main__\":\n    aiorun.run(start(), stop_on_unhandled_errors=True, shutdown_callback=shutdown)\n</code></pre>"},{"location":"stream/#removing-a-stream-from-the-engine","title":"Removing a stream from the engine","text":"Removing stream<pre><code>stream_engine.remove_stream(stream)\n</code></pre>"},{"location":"stream/#starting-the-stream-with-initial-offsets","title":"Starting the stream with initial offsets","text":"<p>If you want to start your consumption from certain offsets, you can include that in your stream instantiation.</p> <p>Use case: This feature is useful if one wants to manage their own offsets, rather than committing consumed offsets to Kafka. When an application manages its own offsets and tries to start a stream, we start the stream using the initial offsets as defined in the database.</p> <p>If you try to seek on a partition or topic that is not assigned to your stream, the code will ignore the seek and print out a warning. For example, if you have two consumers that are consuming from different partitions, and you try to seek for all of the partitions on each consumer, each consumer will seek for the partitions it has been assigned, and it will print out a warning log for the ones it was not assigned.</p> <p>If you try to seek on offsets that are not yet present on your partition, the consumer will revert to the auto_offset_reset config. There will not be a warning, so be aware of this.</p> <p>Also be aware that when your application restarts, it most likely will trigger the initial_offsets again. This means that setting intial_offsets to be a hardcoded number might not get the results you expect.</p> Initial Offsets from Database<pre><code>from kstreams import Stream, structs\n\n\ntopic_name = \"local--kstreams\"\ndb_table = ExampleDatabase()\ninitial_offset = structs.TopicPartitionOffset(topic=topic_name, partition=0, offset=db_table.offset)\n\n\nasync def my_stream(stream: Stream):\n    ...\n\n\nstream = Stream(\n    topic_name,\n    name=\"my-stream\",\n    func=my_stream,  # coroutine or async generator\n    deserializer=MyDeserializer(),\n    initial_offsets=[initial_offset],\n)\n</code></pre>"},{"location":"stream/#stream-crashing","title":"Stream crashing","text":"<p>If your stream <code>crashes</code> for any reason the event consumption is stopped, meaning that non event will be consumed from the <code>topic</code>. However, it is possible to set three different <code>error policies</code> per stream:</p> <ul> <li><code>StreamErrorPolicy.STOP</code> (default): Stop the <code>Stream</code> when an exception occurs. The exception is raised after the stream is properly stopped.</li> <li><code>StreamErrorPolicy.RESTART</code>: Stop and restart the <code>Stream</code> when an exception occurs. The event that caused the exception is skipped. The exception is NOT raised because the application should contine working, however <code>logger.exception()</code> is used to alert the user.</li> <li><code>StreamErrorPolicy.STOP_ENGINE</code>: Stop the <code>StreamEngine</code> when an exception occurs. The exception is raised after ALL the Streams were properly stopped.</li> <li><code>StreamErrorPolicy.STOP_APPLICATION</code>: Stop the <code>StreamEngine</code> when an exception occurs and raises <code>signal.SIGTERM</code>. Useful when using <code>kstreams</code> with other libraries such us <code>FastAPI</code>.</li> </ul> <p>In the following example, the <code>StreamErrorPolicy.RESTART</code> error policy is specifed. If the <code>Stream</code> crashed with the <code>ValueError</code> exception it is restarted:</p> <pre><code>from kstreams import create_engine, ConsumerRecord\nfrom kstreams.stream_utils import StreamErrorPolicy\n\nstream_engine = create_engine(title=\"my-stream-engine\")\n\n\n@stream_engine.stream(\n    \"local--hello-world\",\n    group_id=\"example-group\",\n    error_policy=StreamErrorPolicy.RESTART\n)\nasync def stream(cr: ConsumerRecord) -&gt; None:\n    if cr.key == b\"error\":\n        # Stream will be restarted after the ValueError is raised\n        raise ValueError(\"error....\")\n\n    print(f\"Event consumed. Payload {cr.value}\")\n</code></pre> <p>We can see the logs:</p> <pre><code>ValueError: error....\nINFO:aiokafka.consumer.group_coordinator:LeaveGroup request succeeded\nINFO:aiokafka.consumer.consumer:Unsubscribed all topics or patterns and assigned partitions\nINFO:kstreams.streams:Stream consuming from topics ['local--hello-world'] has stopped!!! \n\n\nINFO:kstreams.middleware.middleware:Restarting stream &lt;kstreams.streams.Stream object at 0x102d44050&gt;\nINFO:aiokafka.consumer.subscription_state:Updating subscribed topics to: frozenset({'local--hello-world'})\n...\nINFO:aiokafka.consumer.group_coordinator:Setting newly assigned partitions {TopicPartition(topic='local--hello-world', partition=0)} for group example-group\n</code></pre> <p>Note</p> <p>If you are using <code>aiorun</code> with <code>stop_on_unhandled_errors=True</code> and the <code>error_policy</code> is <code>StreamErrorPolicy.RESTART</code> then the <code>application</code> will NOT stop as the exception that caused the <code>Stream</code> to <code>crash</code> is not <code>raised</code></p>"},{"location":"stream/#changing-consumer-behavior","title":"Changing consumer behavior","text":"<p>Most of the time you will only set the <code>topic</code> and the <code>group_id</code> to the <code>consumer</code>, but sometimes you might want more control over it, for example changing the <code>policy for resetting offsets on OffsetOutOfRange errors</code> or <code>session timeout</code>. To do this, you have to use the same <code>kwargs</code> as the aiokafka consumer API</p> <pre><code># The consumer sends periodic heartbeats every 500 ms\n# On OffsetOutOfRange errors, the offset will move to the oldest available message (\u2018earliest\u2019)\n\n@stream_engine.stream(\"local--kstream\", group_id=\"de-my-partition\", session_timeout_ms=500, auto_offset_reset\"earliest\")\nasync def stream(cr: ConsumerRecord):\n    print(f\"Event consumed: headers: {cr.headers}, payload: {cr.value}\")\n</code></pre>"},{"location":"stream/#manual-commit","title":"Manual commit","text":"<p>When processing more sensitive data and you want to be sure that the <code>kafka offeset</code> is commited once that you have done your tasks, you can use <code>enable_auto_commit=False</code> mode of Consumer.</p> Manual commit example<pre><code>@stream_engine.stream(\"local--kstream\", group_id=\"de-my-partition\", enable_auto_commit=False)\nasync def stream(cr: ConsumerRecord, stream: Stream):\n    print(f\"Event consumed: headers: {cr.headers}, payload: {cr.value}\")\n\n    # We need to make sure that the pyalod was stored before commiting the kafka offset\n    await store_in_database(payload)\n    await stream.commit()  # You need to commit!!!\n</code></pre> <p>Note</p> <p>This is a tradeoff from at most once to at least once delivery, to achieve exactly once you will need to save offsets in the destination database and validate those yourself.</p>"},{"location":"stream/#yield-from-stream","title":"Yield from stream","text":"<p>Sometimes is useful to <code>yield</code> values from a <code>stream</code> so you can consume events in your on phase or because you want to return results to the frontend (SSE example). If you use the <code>yield</code> keyword inside a <code>coroutine</code> it will be \"transform\" to a  <code>asynchronous generator function</code>, meaning that inside there is an <code>async generator</code> and it can be consumed.</p> <p>Consuming an <code>async generator</code> is simple, you just use the <code>async for in</code> clause. Because consuming events only happens with the <code>for loop</code>, you have to make sure that the <code>Stream</code> has been started properly and after leaving the <code>async for in</code> the <code>stream</code> has been properly stopped.</p> <p>To facilitate the process, we have <code>context manager</code> that makes sure of the <code>starting/stopping</code> process.</p> Yield example<pre><code># Create your stream\n@stream_engine.stream(\"local--kstream\")\nasync def stream(cr: ConsumerRecord, stream: Stream):\n    yield cr.value\n\n\n# Consume the stream:\nasync with stream as stream_flow:  # Use the context manager\n    async for value in stream_flow:\n        ...\n        # do something with value (cr.value)\n</code></pre> <p>Note</p> <p>If for some reason you interrupt the \"async for in\" in the async generator, the Stream will stopped consuming events meaning that the lag will increase.</p> <p>Note</p> <p>Yield from a stream only works with the typing approach</p>"},{"location":"stream/#get-many","title":"Get many","text":"<p>Get a batch of events from the assigned TopicPartition.</p> <p>Prefetched events are returned in batches by topic-partition. If messages is not available in the prefetched buffer this method waits <code>timeout_ms</code> milliseconds.</p> <p>Attributes:</p> Name Type Description <code>partitions</code> <code>List[TopicPartition] | None</code> <p>The partitions that need fetching message. If no one partition specified then all subscribed partitions will be used</p> <code>timeout_ms</code> <code>int | None</code> <p>milliseconds spent waiting if data is not available in the buffer. If 0, returns immediately with any records that are available currently in the buffer, else returns empty. Must not be negative.</p> <code>max_records</code> <code>int | None</code> <p>The amount of records to fetch. if <code>timeout_ms</code> was defined and reached and the fetched records has not reach <code>max_records</code> then returns immediately with any records that are available currently in the buffer</p> <p>Returns:</p> Type Description <code>Dict[TopicPartition, List[ConsumerRecord]]</code> <p>Topic to list of records</p> <p>Example</p> <pre><code>@stream_engine.stream(topic, ...)\nasync def stream(stream: Stream):\n    while True:\n        data = await stream.getmany(max_records=5)\n        print(data)\n</code></pre> Source code in <code>kstreams/streams.py</code> <pre><code>async def getmany(\n    self,\n    partitions: typing.Optional[typing.List[TopicPartition]] = None,\n    timeout_ms: int = 0,\n    max_records: typing.Optional[int] = None,\n) -&gt; typing.Dict[TopicPartition, typing.List[ConsumerRecord]]:\n    \"\"\"\n    Get a batch of events from the assigned TopicPartition.\n\n    Prefetched events are returned in batches by topic-partition.\n    If messages is not available in the prefetched buffer this method waits\n    `timeout_ms` milliseconds.\n\n    Attributes:\n        partitions List[TopicPartition] | None: The partitions that need\n            fetching message. If no one partition specified then all\n            subscribed partitions will be used\n        timeout_ms int | None: milliseconds spent waiting if\n            data is not available in the buffer. If 0, returns immediately\n            with any records that are available currently in the buffer,\n            else returns empty. Must not be negative.\n        max_records int | None: The amount of records to fetch.\n            if `timeout_ms` was defined and reached and the fetched records\n            has not reach `max_records` then returns immediately\n            with any records that are available currently in the buffer\n\n    Returns:\n        Topic to list of records\n\n    !!! Example\n        ```python\n        @stream_engine.stream(topic, ...)\n        async def stream(stream: Stream):\n            while True:\n                data = await stream.getmany(max_records=5)\n                print(data)\n        ```\n    \"\"\"\n    partitions = partitions or []\n    return await self.consumer.getmany(  # type: ignore\n        *partitions, timeout_ms=timeout_ms, max_records=max_records\n    )\n</code></pre> <p>Warning</p> <p>This approach does not works with <code>Dependency Injection</code>.</p>"},{"location":"stream/#rebalance-listener","title":"Rebalance Listener","text":"<p>For some cases you will need a <code>RebalanceListener</code> so when partitions are <code>assigned</code> or <code>revoked</code> to the stream different accions can be performed.</p>"},{"location":"stream/#use-cases","title":"Use cases","text":"<ul> <li>Cleanup or custom state save on the start of a rebalance operation</li> <li>Saving offsets in a custom store when a partition is <code>revoked</code></li> <li>Load a state or cache warmup on completion of a successful partition re-assignment.</li> </ul>"},{"location":"stream/#metrics-rebalance-listener","title":"Metrics Rebalance Listener","text":"<p>Kstreams use a default listener for all the streams to clean the metrics after a rebalance takes place</p>"},{"location":"stream/#kstreams.MetricsRebalanceListener","title":"<code>kstreams.MetricsRebalanceListener</code>","text":"Source code in <code>kstreams/rebalance_listener.py</code> <pre><code>class MetricsRebalanceListener(RebalanceListener):\n    async def on_partitions_revoked(self, revoked: typing.Set[TopicPartition]) -&gt; None:\n        \"\"\"\n        Coroutine to be called *before* a rebalance operation starts and\n        *after* the consumer stops fetching data.\n\n        This will method will clean up the `Prometheus` metrics\n\n        Attributes:\n            revoked Set[TopicPartitions]: Partitions that were assigned\n                to the consumer on the last rebalance\n        \"\"\"\n        # lock all asyncio Tasks so no new metrics will be added to the Monitor\n        if revoked and self.engine is not None:\n            async with asyncio.Lock():\n                if self.stream is not None and self.stream.consumer is not None:\n                    self.engine.monitor.clean_stream_consumer_metrics(\n                        self.stream.consumer\n                    )\n\n    async def on_partitions_assigned(\n        self, assigned: typing.Set[TopicPartition]\n    ) -&gt; None:\n        \"\"\"\n        Coroutine to be called *after* partition re-assignment completes\n        and *before* the consumer starts fetching data again.\n\n        This method will start the `Prometheus` metrics\n\n        Attributes:\n            assigned Set[TopicPartition]: Partitions assigned to the\n                consumer (may include partitions that were previously assigned)\n        \"\"\"\n        # lock all asyncio Tasks so no new metrics will be added to the Monitor\n        if assigned and self.engine is not None:\n            async with asyncio.Lock():\n                if self.stream is not None:\n                    self.stream.seek_to_initial_offsets()\n</code></pre>"},{"location":"stream/#kstreams.MetricsRebalanceListener.on_partitions_assigned","title":"<code>on_partitions_assigned(assigned)</code>  <code>async</code>","text":"<p>Coroutine to be called after partition re-assignment completes and before the consumer starts fetching data again.</p> <p>This method will start the <code>Prometheus</code> metrics</p> <p>Attributes:</p> Name Type Description <code>assigned</code> <code>Set[TopicPartition]</code> <p>Partitions assigned to the consumer (may include partitions that were previously assigned)</p> Source code in <code>kstreams/rebalance_listener.py</code> <pre><code>async def on_partitions_assigned(\n    self, assigned: typing.Set[TopicPartition]\n) -&gt; None:\n    \"\"\"\n    Coroutine to be called *after* partition re-assignment completes\n    and *before* the consumer starts fetching data again.\n\n    This method will start the `Prometheus` metrics\n\n    Attributes:\n        assigned Set[TopicPartition]: Partitions assigned to the\n            consumer (may include partitions that were previously assigned)\n    \"\"\"\n    # lock all asyncio Tasks so no new metrics will be added to the Monitor\n    if assigned and self.engine is not None:\n        async with asyncio.Lock():\n            if self.stream is not None:\n                self.stream.seek_to_initial_offsets()\n</code></pre>"},{"location":"stream/#kstreams.MetricsRebalanceListener.on_partitions_revoked","title":"<code>on_partitions_revoked(revoked)</code>  <code>async</code>","text":"<p>Coroutine to be called before a rebalance operation starts and after the consumer stops fetching data.</p> <p>This will method will clean up the <code>Prometheus</code> metrics</p> <p>Attributes:</p> Name Type Description <code>revoked</code> <code>Set[TopicPartitions]</code> <p>Partitions that were assigned to the consumer on the last rebalance</p> Source code in <code>kstreams/rebalance_listener.py</code> <pre><code>async def on_partitions_revoked(self, revoked: typing.Set[TopicPartition]) -&gt; None:\n    \"\"\"\n    Coroutine to be called *before* a rebalance operation starts and\n    *after* the consumer stops fetching data.\n\n    This will method will clean up the `Prometheus` metrics\n\n    Attributes:\n        revoked Set[TopicPartitions]: Partitions that were assigned\n            to the consumer on the last rebalance\n    \"\"\"\n    # lock all asyncio Tasks so no new metrics will be added to the Monitor\n    if revoked and self.engine is not None:\n        async with asyncio.Lock():\n            if self.stream is not None and self.stream.consumer is not None:\n                self.engine.monitor.clean_stream_consumer_metrics(\n                    self.stream.consumer\n                )\n</code></pre>"},{"location":"stream/#manual-commit_1","title":"Manual Commit","text":"<p>If <code>manual</code> commit is enabled, you migh want to use the <code>ManualCommitRebalanceListener</code>. This <code>rebalance listener</code> will call <code>commit</code> before the <code>stream</code> partitions are revoked to avoid the error <code>CommitFailedError</code> and duplicate message delivery after a rebalance. See code example with manual <code>commit</code></p> <p>Note</p> <p><code>ManualCommitRebalanceListener</code> also includes the <code>MetricsRebalanceListener</code> funcionality.</p>"},{"location":"stream/#kstreams.ManualCommitRebalanceListener","title":"<code>kstreams.ManualCommitRebalanceListener</code>","text":"Source code in <code>kstreams/rebalance_listener.py</code> <pre><code>class ManualCommitRebalanceListener(MetricsRebalanceListener):\n    async def on_partitions_revoked(self, revoked: typing.Set[TopicPartition]) -&gt; None:\n        \"\"\"\n        Coroutine to be called *before* a rebalance operation starts and\n        *after* the consumer stops fetching data.\n\n        If manual commit is enabled, `commit` is called before the consumers\n        partitions are revoked to prevent the error `CommitFailedError`\n        and duplicate message delivery after a rebalance.\n\n        Attributes:\n            revoked Set[TopicPartitions]: Partitions that were assigned\n                to the consumer on the last rebalance\n        \"\"\"\n        if (\n            revoked\n            and self.stream is not None\n            and self.stream.consumer is not None\n            and not self.stream.consumer._enable_auto_commit\n        ):\n            logger.info(\n                f\"Manual commit enabled for stream {self.stream}. \"\n                \"Performing `commit` before revoking partitions\"\n            )\n            async with asyncio.Lock():\n                await self.stream.commit()\n\n            await super().on_partitions_revoked(revoked=revoked)\n</code></pre>"},{"location":"stream/#kstreams.ManualCommitRebalanceListener.on_partitions_revoked","title":"<code>on_partitions_revoked(revoked)</code>  <code>async</code>","text":"<p>Coroutine to be called before a rebalance operation starts and after the consumer stops fetching data.</p> <p>If manual commit is enabled, <code>commit</code> is called before the consumers partitions are revoked to prevent the error <code>CommitFailedError</code> and duplicate message delivery after a rebalance.</p> <p>Attributes:</p> Name Type Description <code>revoked</code> <code>Set[TopicPartitions]</code> <p>Partitions that were assigned to the consumer on the last rebalance</p> Source code in <code>kstreams/rebalance_listener.py</code> <pre><code>async def on_partitions_revoked(self, revoked: typing.Set[TopicPartition]) -&gt; None:\n    \"\"\"\n    Coroutine to be called *before* a rebalance operation starts and\n    *after* the consumer stops fetching data.\n\n    If manual commit is enabled, `commit` is called before the consumers\n    partitions are revoked to prevent the error `CommitFailedError`\n    and duplicate message delivery after a rebalance.\n\n    Attributes:\n        revoked Set[TopicPartitions]: Partitions that were assigned\n            to the consumer on the last rebalance\n    \"\"\"\n    if (\n        revoked\n        and self.stream is not None\n        and self.stream.consumer is not None\n        and not self.stream.consumer._enable_auto_commit\n    ):\n        logger.info(\n            f\"Manual commit enabled for stream {self.stream}. \"\n            \"Performing `commit` before revoking partitions\"\n        )\n        async with asyncio.Lock():\n            await self.stream.commit()\n\n        await super().on_partitions_revoked(revoked=revoked)\n</code></pre>"},{"location":"stream/#custom-rebalance-listener","title":"Custom Rebalance Listener","text":"<p>If you want to define a custom <code>RebalanceListener</code>, it has to inherits from <code>kstreams.RebalanceListener</code>.</p> <p>Note</p> <p>It also possible to inherits from <code>ManualCommitRebalanceListener</code> and <code>MetricsRebalanceListener</code></p>"},{"location":"stream/#kstreams.RebalanceListener","title":"<code>kstreams.RebalanceListener</code>","text":"<p>A callback interface that the user can implement to trigger custom actions when the set of partitions are assigned or revoked to the <code>Stream</code>.</p> <p>Example</p> <pre><code>from kstreams import RebalanceListener, TopicPartition\nfrom .resource import stream_engine\n\n\nclass MyRebalanceListener(RebalanceListener):\n\n    async def on_partitions_revoked(\n        self, revoked: Set[TopicPartition]\n    ) -&gt; None:\n        # Do something with the revoked partitions\n        # or with the Stream\n        print(self.stream)\n\n    async def on_partitions_assigned(\n        self, assigned: Set[TopicPartition]\n    ) -&gt; None:\n        # Do something with the assigned partitions\n        # or with the Stream\n        print(self.stream)\n\n\n@stream_engine.stream(topic, rebalance_listener=MyRebalanceListener())\nasync def my_stream(stream: Stream):\n    async for event in stream:\n        ...\n</code></pre> Source code in <code>kstreams/rebalance_listener.py</code> <pre><code>class RebalanceListener(ConsumerRebalanceListener):\n    \"\"\"\n    A callback interface that the user can implement to trigger custom actions\n    when the set of partitions are assigned or revoked to the `Stream`.\n\n    !!! Example\n        ```python\n        from kstreams import RebalanceListener, TopicPartition\n        from .resource import stream_engine\n\n\n        class MyRebalanceListener(RebalanceListener):\n\n            async def on_partitions_revoked(\n                self, revoked: Set[TopicPartition]\n            ) -&gt; None:\n                # Do something with the revoked partitions\n                # or with the Stream\n                print(self.stream)\n\n            async def on_partitions_assigned(\n                self, assigned: Set[TopicPartition]\n            ) -&gt; None:\n                # Do something with the assigned partitions\n                # or with the Stream\n                print(self.stream)\n\n\n        @stream_engine.stream(topic, rebalance_listener=MyRebalanceListener())\n        async def my_stream(stream: Stream):\n            async for event in stream:\n                ...\n        ```\n    \"\"\"\n\n    def __init__(self) -&gt; None:\n        self.stream: typing.Optional[\"Stream\"] = None\n        # engine added so it can react on rebalance events\n        self.engine: typing.Optional[\"StreamEngine\"] = None\n\n    async def on_partitions_revoked(self, revoked: typing.Set[TopicPartition]) -&gt; None:\n        \"\"\"\n        Coroutine to be called *before* a rebalance operation starts and\n        *after* the consumer stops fetching data.\n\n        If you are using manual commit you have to commit all consumed offsets\n        here, to avoid duplicate message delivery after rebalance is finished.\n\n        Use cases:\n            - cleanup or custom state save on the start of a rebalance operation\n            - saving offsets in a custom store\n\n        Attributes:\n            revoked Set[TopicPartitions]: Partitions that were assigned\n                to the consumer on the last rebalance\n\n        !!! note\n            The `Stream` is available using `self.stream`\n        \"\"\"\n        ...  # pragma: no cover\n\n    async def on_partitions_assigned(\n        self, assigned: typing.Set[TopicPartition]\n    ) -&gt; None:\n        \"\"\"\n        Coroutine to be called *after* partition re-assignment completes\n        and *before* the consumer starts fetching data again.\n\n        It is guaranteed that all the processes in a consumer group will\n        execute their `on_partitions_revoked` callback before any instance\n        executes its `on_partitions_assigned` callback.\n\n        Use cases:\n            - Load a state or cache warmup on completion of a successful\n            partition re-assignment.\n\n        Attributes:\n            assigned Set[TopicPartition]: Partitions assigned to the\n                consumer (may include partitions that were previously assigned)\n\n        !!! note\n            The `Stream` is available using `self.stream`\n        \"\"\"\n        ...  # pragma: no cover\n</code></pre>"},{"location":"stream/#kstreams.RebalanceListener.on_partitions_assigned","title":"<code>on_partitions_assigned(assigned)</code>  <code>async</code>","text":"<p>Coroutine to be called after partition re-assignment completes and before the consumer starts fetching data again.</p> <p>It is guaranteed that all the processes in a consumer group will execute their <code>on_partitions_revoked</code> callback before any instance executes its <code>on_partitions_assigned</code> callback.</p> Use cases <ul> <li>Load a state or cache warmup on completion of a successful partition re-assignment.</li> </ul> <p>Attributes:</p> Name Type Description <code>assigned</code> <code>Set[TopicPartition]</code> <p>Partitions assigned to the consumer (may include partitions that were previously assigned)</p> <p>Note</p> <p>The <code>Stream</code> is available using <code>self.stream</code></p> Source code in <code>kstreams/rebalance_listener.py</code> <pre><code>async def on_partitions_assigned(\n    self, assigned: typing.Set[TopicPartition]\n) -&gt; None:\n    \"\"\"\n    Coroutine to be called *after* partition re-assignment completes\n    and *before* the consumer starts fetching data again.\n\n    It is guaranteed that all the processes in a consumer group will\n    execute their `on_partitions_revoked` callback before any instance\n    executes its `on_partitions_assigned` callback.\n\n    Use cases:\n        - Load a state or cache warmup on completion of a successful\n        partition re-assignment.\n\n    Attributes:\n        assigned Set[TopicPartition]: Partitions assigned to the\n            consumer (may include partitions that were previously assigned)\n\n    !!! note\n        The `Stream` is available using `self.stream`\n    \"\"\"\n    ...  # pragma: no cover\n</code></pre>"},{"location":"stream/#kstreams.RebalanceListener.on_partitions_revoked","title":"<code>on_partitions_revoked(revoked)</code>  <code>async</code>","text":"<p>Coroutine to be called before a rebalance operation starts and after the consumer stops fetching data.</p> <p>If you are using manual commit you have to commit all consumed offsets here, to avoid duplicate message delivery after rebalance is finished.</p> Use cases <ul> <li>cleanup or custom state save on the start of a rebalance operation</li> <li>saving offsets in a custom store</li> </ul> <p>Attributes:</p> Name Type Description <code>revoked</code> <code>Set[TopicPartitions]</code> <p>Partitions that were assigned to the consumer on the last rebalance</p> <p>Note</p> <p>The <code>Stream</code> is available using <code>self.stream</code></p> Source code in <code>kstreams/rebalance_listener.py</code> <pre><code>async def on_partitions_revoked(self, revoked: typing.Set[TopicPartition]) -&gt; None:\n    \"\"\"\n    Coroutine to be called *before* a rebalance operation starts and\n    *after* the consumer stops fetching data.\n\n    If you are using manual commit you have to commit all consumed offsets\n    here, to avoid duplicate message delivery after rebalance is finished.\n\n    Use cases:\n        - cleanup or custom state save on the start of a rebalance operation\n        - saving offsets in a custom store\n\n    Attributes:\n        revoked Set[TopicPartitions]: Partitions that were assigned\n            to the consumer on the last rebalance\n\n    !!! note\n        The `Stream` is available using `self.stream`\n    \"\"\"\n    ...  # pragma: no cover\n</code></pre>"},{"location":"test_client/","title":"Testing","text":"<p>To test <code>streams</code> and <code>producers</code> or perform <code>e2e</code> tests you can make use of the <code>test_utils.TestStreamClient</code>.</p> <p>The <code>TestStreamClient</code> aims to emulate as much as possible the <code>kafka</code> behaviour using <code>asyncio.Queue</code>. This is excellent because you can test quite easily your code without spinning up <code>kafka</code>, but this comes with some limitations. It is not possible to know beforehand how many topics exist, how many partitions per topic exist, the replication factor, current offsets, etc. So, the <code>test client</code> will create <code>topics</code>, <code>partitions</code>, <code>assigments</code>, etc on runtime. Each <code>Stream</code> in your application will have assigned 3 partitions per topic by default (0, 1 and 2) during <code>test environment</code></p> <p>With the <code>test client</code> you can:</p> <ul> <li>Send events so you won't need to mock the <code>producer</code></li> <li>Call the consumer code, then the client will make sure that all the events are consumed before leaving the <code>async context</code></li> </ul>"},{"location":"test_client/#using-teststreamclient","title":"Using <code>TestStreamClient</code>","text":"<p>Import <code>TestStreamClient</code>.</p> <p>Create a <code>TestStreamClient</code> by passing the engine instance to it.</p> <p>Create functions with a name that starts with <code>test_</code> (this is standard <code>pytest</code> conventions).</p> <p>Use the <code>TestStreamClient</code> object the same way as you do with <code>engine</code>.</p> <p>Write simple <code>assert</code> statements with the standard Python expressions that you need to check (again, standard <code>pytest</code>).</p>"},{"location":"test_client/#example","title":"Example","text":"<p>Let's assume that you have the following code example. The goal is to store all the consumed events in an <code>EventStore</code> for future analysis.</p> <pre><code># example.py\nimport aiorun\nimport typing\nfrom dataclasses import dataclass, field\n\nfrom kstreams import ConsumerRecord, create_engine\nfrom kstreams.streams import Stream\n\ntopic = \"local--kstreams\"\n\nstream_engine = create_engine(title=\"my-stream-engine\")\n\n\n@dataclass\nclass EventStore:\n    \"\"\"\n    Store events in memory\n    \"\"\"\n    events: typing.List[ConsumerRecord] = field(default_factory=list)\n\n    def add(self, event: ConsumerRecord) -&gt; None:\n        self.events.append(event)\n\n    @property\n    def total(self):\n        return len(self.events)\n\n\nevent_store = EventStore()\n\n\n@stream_engine.stream(topic, group_id=\"example-group\")\nasync def consume(cr: ConsumerRecord):\n    event_store.add(cr)\n\n\nasync def produce():\n    payload = b'{\"message\": \"Hello world!\"}'\n\n    for _ in range(5):\n        await stream_engine.send(topic, value=payload, key=\"1\")\n        await asyncio.sleep(2)\n\n\nasync def start():\n    await stream_engine.start()\n    await produce()\n\n\nasync def shutdown(loop):\n    await stream_engine.stop()\n\n\ndef main():\n    aiorun.run(start(), stop_on_unhandled_errors=True, shutdown_callback=shutdown)\n</code></pre> <p>Then you could have a <code>test_stream.py</code> file to test the code, you need to instanciate the <code>TestStreamClient</code> with the <code>engine</code>:</p> <pre><code># test_stream.py\nimport pytest\nfrom kstreams.test_utils import TestStreamClient\n\nfrom example import stream_engine, event_store\n\nclient = TestStreamClient(stream_engine)\n\n\n@pytest.mark.asyncio\nasync def test_add_event_on_consume():\n    \"\"\"\n    Produce some events and check that the EventStore is updated.\n    \"\"\"\n    topic = \"local--kstreams\"  # Use the same topic as the stream\n    event = b'{\"message\": \"Hello world!\"}'\n\n    async with client:\n        metadata = await client.send(topic, value=event, key=\"1\")  # send the event with the test client\n        current_offset = metadata.offset\n        assert metadata.topic == topic\n\n        # send another event and check that the offset was incremented\n        metadata = await client.send(topic, value=b'{\"message\": \"Hello world!\"}', key=\"1\")\n        assert metadata.offset == current_offset + 1\n\n    # check that the event_store has 2 events stored\n    assert event_store.total == 2\n</code></pre> <p>Note</p> <p>Notice that the <code>produce</code> coroutine is not used to send events in the test case. The <code>TestStreamClient.send</code> coroutine is used instead. This allows to test <code>streams</code> without having producer code in your application</p>"},{"location":"test_client/#testing-the-commit","title":"Testing the Commit","text":"<p>In some cases your stream will commit, in this situation checking the commited partitions can be useful.</p> <pre><code>import pytest\nfrom kstreams.test_utils import TestStreamClient\nfrom kstreams import ConsumerRecord, Stream, TopicPartition\n\nfrom .example import produce, stream_engine\n\ntopic_name = \"local--kstreams-marcos\"\nvalue = b'{\"message\": \"Hello world!\"}'\nname = \"my-stream\"\nkey = \"1\"\npartition = 2\ntp = TopicPartition(\n    topic=topic_name,\n    partition=partition,\n)\ntotal_events = 10\n\n@stream_engine.stream(topic_name, name=name)\nasync def my_stream(cr: ConsumerRecord, stream: Stream):\n    # commit every time that an event arrives\n    await stream.commit({tp: cr.offset})\n\n\n# test the code\nclient = TestStreamClient(stream_engine)\n\n@pytest.mark.asyncio\nasync def test_consumer_commit(stream_engine: StreamEngine):\n    async with client:\n        for _ in range(0, total_events):\n            await client.send(topic_name, partition=partition, value=value, key=key)\n\n        # check that everything was commited\n        stream = stream_engine.get_stream(name)\n        assert (await stream.committed(tp)) == total_events\n</code></pre>"},{"location":"test_client/#e2e-test","title":"E2E test","text":"<p>In the previous code example the application produces to and consumes from the same topic, then <code>TestStreamClient.send</code> is not needed because the <code>engine.send</code> is producing. For those situation you can just use your <code>producer</code> code and check that certain code was called.</p> <pre><code># test_example.py\nimport pytest\nfrom kstreams.test_utils import TestStreamClient\n\nfrom .example import produce, stream_engine\n\nclient = TestStreamClient(stream_engine)\n\n\n@pytest.mark.asyncio\nasync def test_e2e_example():\n    \"\"\"\n    Test that events are produce by the engine and consumed by the streams\n    \"\"\"\n    with patch(\"example.on_consume\") as on_consume, patch(\"example.on_produce\") as on_produce:\n        async with client:\n            await produce()\n\n    on_produce.call_count == 5\n    on_consume.call_count == 5\n</code></pre>"},{"location":"test_client/#producer-only","title":"Producer only","text":"<p>In some scenarios, your application will only produce events and other application/s will consume it, but you want to make sure that the event was procuced in a proper way and the <code>topic</code> contains that <code>event</code>.</p> <pre><code># producer_example.py\nfrom kstreams import create_engine\nimport aiorun\nimport asyncio\n\nstream_engine = create_engine(title=\"my-stream-engine\")\n\n\nasync def produce(topic: str, value: bytes, key: str):\n    # This could be a complicated function or something like a FastAPI view\n    await stream_engine.send(topic, value=value, key=key)\n\n\nasync def start():\n    await stream_engine.start()\n    await produce()\n\n\nasync def shutdown(loop):\n    await stream_engine.stop()\n\n\ndef main():\n    aiorun.run(start(), stop_on_unhandled_errors=True, shutdown_callback=shutdown)\n</code></pre> <p>Then you could have a <code>test_producer_example.py</code> file to test the code:</p> <pre><code># test_producer_example.py\nimport pytest\nfrom kstreams.test_utils import TestStreamClient\n\nfrom producer_example import stream_engine, produce\n\nclient = TestStreamClient(stream_engine)\n\n\n@pytest.mark.asyncio\nasync def test_event_produced():\n    topic_name = \"local--kstreams\"\n    value = b'{\"message\": \"Hello world!\"}'\n    key = \"1\"\n\n    async with client:\n        await produce(topic=topic_name ,value=value, key=key) # use the produce code to send events\n\n        # check that the event was placed in a topic in a proper way\n        consumer_record = await client.get_event(topic_name=topic_name)\n\n        assert consumer_record.value == value\n        assert consumer_record.key == key\n</code></pre> <p>Note</p> <p>Even thought the previous example is using a simple <code>produce</code> function, it shows what to do when the <code>procuder code</code> is encapsulated in other functions, for example a <code>FastAPI</code> view. Then you don't want to use <code>client.send</code> directly, just called the function that contains <code>stream_engine.send(...)</code></p>"},{"location":"test_client/#defining-extra-topics","title":"Defining extra topics","text":"<p>For some uses cases is required to produce an event to a topic (<code>target topic</code>) after it was consumed (<code>source topic</code>). We are in control of the <code>source topic</code> because it has a <code>stream</code> associated with it and we want to consume events from it, however we might not be in control of the <code>target topic</code>.</p> <p>How can we consume an event from the <code>target topic</code> which has not a <code>stream</code> associated and the topic will be created only when a <code>send</code> is reached? The answer is to pre define the extra topics before the test cycle has started. Let's take a look an example:</p> <p>Let's imagine that we have the following code:</p> <pre><code>from kstreams import ConsumerRecord\n\nfrom .engine import stream_engine\n\n\n@stream_engine.stream(\"source-topic\", name=name)\nasync def consume(cr: ConsumerRecord) -&gt; None:\n    # do something, for example save to db\n    await save_to_db(cr)\n\n    # then produce the event to the `target topic`\n    await stream_engine.send(\"target-topic\", value=cr.value, key=cr.key, headers=cr.headers)\n</code></pre> <p>Here we can test two things:</p> <ol> <li>Sending an event to the <code>source-topic</code> and check that the event has been consumed and saved to the DB</li> <li>Check that the event was send to the <code>target-topic</code></li> </ol> <p>Testing point <code>1</code> is straightforward:</p> <pre><code>import pytest\nfrom kstreams.test_utils import TestStreamClient\n\nfrom .engine import stream_engine\n\n\nclient = TestStreamClient(stream_engine)\nvalue = b'{\"message\": \"Hello world!\"}'\nkey = \"my-key\"\n\nasync with client:\n    # produce to the topic that has a stream\n    await client.send(\"source-topic\", value=value, key=key)\n\n    # check that the event was saved to the DB\n    assert await db.get(...)\n</code></pre> <p>However to test the point <code>2</code> we need more effort as the <code>TestStreamClient</code> is not aware of the <code>target topic</code> until it reaches the <code>send</code> inside the <code>consume</code> coroutine. If we try to get the <code>target topic</code> event inside the <code>async with</code> context we will have an error:</p> <pre><code>async with client:\n    # produce to the topic that has a stream\n    await client.send(\"source-topic\", value=value, key=key)\n\n    ...\n    # Let's check if it was received by the target topic\n    event = await client.get_event(topic_name=\"target-topic\")\n\n\nValueError: You might be trying to get the topic target-topic outside the `client async context` or trying to get an event from an empty topic target-topic. Make sure that the code is inside the async contextand the topic has events.\n</code></pre> <p>We can solve this with a <code>delay</code> (<code>await asyncio.sleep(...)</code>) inside the <code>async with</code> context to give time to the <code>TestStreamClient</code> to create the topic, however if the buisness logic inside the <code>consume</code> is slow we need to add more delay, then it will become a <code>race condition</code>.  </p> <p>To proper solve it, we can specify to the <code>TestStreamClient</code> the extra topics that we need during the test cycle.</p> <pre><code>import pytest\nfrom kstreams.test_utils import TestStreamClient\n\nfrom .engine import stream_engine\n\n\n# tell the client to create the extra topics\nclient = TestStreamClient(stream_engine, topics=[\"target-topic\"])\nvalue = b'{\"message\": \"Hello world!\"}'\nkey = \"my-key\"\n\nasync with client:\n    # produce to the topic that has a stream\n    await client.send(\"source-topic\", value=value, key=key)\n\n    # check that the event was saved to the DB\n    assert await db.get(...)\n\n    # Let's check if it was received by the target topic\n    event = await client.get_event(topic_name=\"target-topic\")\n    assert event.value == value\n    assert event.key == key\n</code></pre>"},{"location":"test_client/#topics-subscribed-by-pattern","title":"Topics subscribed by pattern","text":"<p>When a <code>Stream</code> is using <code>pattern</code> subscription it is not possible to know before hand how many topics the <code>Stream</code> will consume from. To solve this problem the <code>topics</code> must be pre defined using the <code>extra topics</code> features from the <code>TestClient</code>:</p> <p>In the following example we have a <code>Stream</code> that will consume from topics that match the regular expression <code>^dev--customer-.*$</code>, for example <code>dev--customer-invoice</code> and <code>dev--customer-profile</code>.</p> <pre><code># app.py\nfrom kstreams import ConsumerRecord\n\nstream_engine = create_engine(title=\"my-stream-engine\")\n\n\n@stream_engine.stream(topics=\"^dev--customer-.*$\", subscribe_by_pattern=True)\nasync def stream(cr: ConsumerRecord):\n    if cr.topic == customer_invoice_topic:\n        assert cr.value == invoice_event\n    elif cr.topic == customer_profile_topic:\n        assert cr.value == profile_event\n    else:\n        raise ValueError(f\"Invalid topic {cr.topic}\")\n</code></pre> <p>Then to test our <code>Stream</code>, we need to pre define the topics:</p> <pre><code># test_stream.py\nimport pytest\nfrom kstreams.test_utils import TestStreamClient\n\nfrom app import stream_engine\n\n\n@pytest.mark.asyncio\nasync def test_consume_events_topics_by_pattern():\n    \"\"\"\n    This test shows the possibility to subscribe to multiple topics using a pattern\n    \"\"\"\n    customer_invoice_topic = \"dev--customer-invoice\"\n    customer_profile_topic = \"dev--customer-profile\"\n\n    client = TestStreamClient(\n        stream_engine, topics=[customer_invoice_topic, customer_profile_topic]\n    )\n\n    async with client:\n        await client.send(customer_invoice_topic, value=b\"invoice-1\", key=\"1\")\n        await client.send(customer_profile_topic, value=b\"profile-1\", key=\"1\")\n\n        # give some time to consume all the events\n        await asyncio.sleep(0.1)\n        assert TopicManager.all_messages_consumed()\n</code></pre>"},{"location":"test_client/#disabling-monitoring-during-testing","title":"Disabling monitoring during testing","text":"<p>Monitoring streams and producers is vital for streaming application but it requires extra effort. Sometimes during testing, monitoring is not required as we only want to focus on testing the buisness logic. In order to disable monitoring during testing use:</p> <pre><code>client = TestStreamClient(stream_engine, monitoring_enabled=False)\n</code></pre>"},{"location":"utils/","title":"Utils","text":"<p>Utility functions</p>"},{"location":"utils/#kstreams.utils","title":"<code>kstreams.utils</code>","text":""},{"location":"utils/#kstreams.utils.create_ssl_context","title":"<code>create_ssl_context(*, cafile=None, capath=None, cadata=None, certfile=None, keyfile=None, password=None, crlfile=None)</code>","text":"<p>Wrapper of aiokafka.helpers.create_ssl_context with typehints.</p> <p>Parameters:</p> Name Type Description Default <code>cafile</code> <code>Optional[str]</code> <p>Certificate Authority file path containing certificates used to sign broker certificates</p> <code>None</code> <code>capath</code> <code>Optional[str]</code> <p>Same as <code>cafile</code>, but points to a directory containing several CA certificates</p> <code>None</code> <code>cadata</code> <code>Union[str, bytes, None]</code> <p>Same as <code>cafile</code>, but instead contains already read data in either ASCII or bytes format</p> <code>None</code> <code>certfile</code> <code>Optional[str]</code> <p>optional filename of file in PEM format containing the client certificate, as well as any CA certificates needed to establish the certificate's authenticity</p> <code>None</code> <code>keyfile</code> <code>Optional[str]</code> <p>optional filename containing the client private key.</p> <code>None</code> <code>password</code> <code>Optional[str]</code> <p>optional password to be used when loading the certificate chain</p> <code>None</code> Source code in <code>kstreams/utils.py</code> <pre><code>def create_ssl_context(\n    *,\n    cafile: Optional[str] = None,\n    capath: Optional[str] = None,\n    cadata: Union[str, bytes, None] = None,\n    certfile: Optional[str] = None,\n    keyfile: Optional[str] = None,\n    password: Optional[str] = None,\n    crlfile: Any = None,\n):\n    \"\"\"Wrapper of [aiokafka.helpers.create_ssl_context](\n        https://aiokafka.readthedocs.io/en/stable/api.html#helpers\n    )\n    with typehints.\n\n    Arguments:\n        cafile: Certificate Authority file path containing certificates\n            used to sign broker certificates\n        capath: Same as `cafile`, but points to a directory containing\n            several CA certificates\n        cadata: Same as `cafile`, but instead contains already\n            read data in either ASCII or bytes format\n        certfile: optional filename of file in PEM format containing\n            the client certificate, as well as any CA certificates needed to\n            establish the certificate's authenticity\n        keyfile: optional filename containing the client private key.\n        password: optional password to be used when loading the\n            certificate chain\n\n    \"\"\"\n    return aiokafka_create_ssl_context(\n        cafile=cafile,\n        capath=capath,\n        cadata=cadata,\n        certfile=certfile,\n        keyfile=keyfile,\n        password=password,\n        crlfile=crlfile,\n    )\n</code></pre>"},{"location":"utils/#kstreams.utils.create_ssl_context_from_mem","title":"<code>create_ssl_context_from_mem(*, certdata, keydata, password=None, cadata=None)</code>","text":"<p>Create a SSL context from data on memory.</p> <p>This makes it easy to read the certificates from environmental variables Usually the data is loaded from env variables.</p> <p>Parameters:</p> Name Type Description Default <code>cadata</code> <code>Optional[str]</code> <p>certificates used to sign broker certificates provided as unicode str</p> <code>None</code> <code>certdata</code> <code>str</code> <p>the client certificate, as well as any CA certificates needed to establish the certificate's authenticity provided as unicode str</p> required <code>keydata</code> <code>str</code> <p>the client private key provided as unicode str</p> required <code>password</code> <code>Optional[str]</code> <p>optional password to be used when loading the certificate chain</p> <code>None</code> Source code in <code>kstreams/utils.py</code> <pre><code>def create_ssl_context_from_mem(\n    *,\n    certdata: str,\n    keydata: str,\n    password: Optional[str] = None,\n    cadata: Optional[str] = None,\n) -&gt; Optional[ssl.SSLContext]:\n    \"\"\"Create a SSL context from data on memory.\n\n    This makes it easy to read the certificates from environmental variables\n    Usually the data is loaded from env variables.\n\n    Arguments:\n        cadata: certificates used to sign broker certificates provided as unicode str\n        certdata: the client certificate, as well as any CA certificates needed to\n            establish the certificate's authenticity provided as unicode str\n        keydata: the client private key provided as unicode str\n        password: optional password to be used when loading the\n            certificate chain\n    \"\"\"\n    with contextlib.ExitStack() as stack:\n        cert_file = stack.enter_context(NamedTemporaryFile(suffix=\".crt\"))\n        key_file = stack.enter_context(NamedTemporaryFile(suffix=\".key\"))\n\n        # expecting unicode data, writing it as bytes to files as utf-8\n        cert_file.write(certdata.encode(\"utf-8\"))\n        cert_file.flush()\n\n        key_file.write(keydata.encode(\"utf-8\"))\n        key_file.flush()\n\n        ssl_context = ssl.create_default_context(cadata=cadata)\n        ssl_context.load_cert_chain(\n            cert_file.name, keyfile=key_file.name, password=password\n        )\n        return ssl_context\n    return None\n</code></pre>"}]}
\ No newline at end of file
+{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Kstreams","text":"<p><code>kstreams</code> is a library/micro framework to use with <code>kafka</code>. It has simple kafka streams implementation that gives certain guarantees, see below.</p> <p> </p>"},{"location":"#requirements","title":"Requirements","text":"<p>python 3.8+</p>"},{"location":"#installation","title":"Installation","text":"<pre><code>pip install kstreams\n</code></pre> <p>You will need a worker, we recommend aiorun</p> <pre><code>pip install aiorun\n</code></pre>"},{"location":"#usage","title":"Usage","text":"<pre><code>import aiorun\nfrom kstreams import create_engine, ConsumerRecord\n\n\nstream_engine = create_engine(title=\"my-stream-engine\")\n\n@stream_engine.stream(\"local--kstream\")\nasync def consume(cr: ConsumerRecord):\n    print(f\"Event consumed: headers: {cr.headers}, payload: {cr.value}\")\n\n\nasync def produce():\n    payload = b'{\"message\": \"Hello world!\"}'\n\n    for i in range(5):\n        metadata = await stream_engine.send(\"local--kstreams\", value=payload)\n        print(f\"Message sent: {metadata}\")\n\n\nasync def start():\n    await stream_engine.start()\n    await produce()\n\n\nasync def shutdown(loop):\n    await stream_engine.stop()\n\n\nif __name__ == \"__main__\":\n    aiorun.run(start(), stop_on_unhandled_errors=True, shutdown_callback=shutdown)\n</code></pre>"},{"location":"#kafka-configuration","title":"Kafka configuration","text":"<p>Configure kafka using the kafka backend provided.</p>"},{"location":"#development","title":"Development","text":"<p>This repo requires the use of poetry instead of pip. Note: If you want to have the <code>virtualenv</code> in the same path as the project first you should run <code>poetry config --local virtualenvs.in-project true</code></p> <p>To install the dependencies just execute:</p> <pre><code>poetry install\n</code></pre> <p>Then you can activate the <code>virtualenv</code> with</p> <pre><code>poetry shell\n</code></pre> <p>Run test:</p> <pre><code>./scripts/test\n</code></pre> <p>Run code linting (<code>black</code> and <code>isort</code>)</p> <pre><code>./scripts/lint\n</code></pre>"},{"location":"#commit-messages","title":"Commit messages","text":"<p>The use of commitizen is recommended. Commitizen is part of the dev dependencies.</p> <pre><code>cz commit\n</code></pre>"},{"location":"backends/","title":"Backends","text":"<p>The main idea of a backend is to supply the necessary configuration to create a connection with the backend.</p> <p><code>kstreams</code> currently has support for <code>Kafka</code> as a backend.</p>"},{"location":"backends/#kstreams.backends.kafka.Kafka","title":"<code>kstreams.backends.kafka.Kafka</code>","text":"<p>The <code>Kafka</code> backend validates the given attributes.</p> <p>It uses pydantic internally.</p> <p>Attributes:</p> Name Type Description <code>bootstrap_servers</code> <code>List[str]</code> <p>kafka list of <code>hostname:port</code></p> <code>security_protocol</code> <code>SecurityProtocol</code> <p>Protocol used to communicate with brokers</p> <code>ssl_context</code> <code>Optional[SSLContext]</code> <p>a python std <code>ssl.SSLContext</code> instance, you can generate it with <code>create_ssl_context</code> or <code>create_ssl_context_from_mem</code></p> <code>sasl_mechanism</code> <code>SaslMechanism</code> <p>Authentication mechanism when <code>security_protocol</code> is configured for <code>SASL_PLAINTEXT</code> or <code>SASL_SSL</code></p> <code>sasl_plain_username</code> <code>Optional[str]</code> <p>username for sasl PLAIN authentication</p> <code>sasl_plain_password</code> <code>Optional[str]</code> <p>password for sasl PLAIN authentication</p> <code>sasl_oauth_token_provider</code> <code>Optional[str]</code> <p>smth</p> <p>Raises:</p> Type Description <code>ValidationError</code> <p>a <code>pydantic.ValidationError</code> exception</p>"},{"location":"backends/#kstreams.backends.kafka.Kafka--plaintext","title":"PLAINTEXT","text":"<p>Example</p> <pre><code>from kstreams.backends.kafka import Kafka\nfrom kstreams import create_engine, Stream\n\nbackend = Kafka(bootstrap_servers=[\"localhost:9092\"])\nstream_engine = create_engine(title=\"my-stream-engine\", backend=backend)\n</code></pre>"},{"location":"backends/#kstreams.backends.kafka.Kafka--ssl","title":"SSL","text":"<p>Example</p> Create SSL context<pre><code>import ssl\n\nfrom kstreams.backends.kafka import Kafka\nfrom kstreams import create_engine, utils, Stream\n\n\ndef get_ssl_context() -&gt; ssl.SSLContext:\n    return utils.create_ssl_context(\n        cafile=\"certificate-authority-file-path\",\n        capath=\"points-to-directory-with-several-ca-certificates\",\n        cadata=\"same-as-cafile-but-ASCII-or-bytes-format\",\n        certfile=\"client-certificate-file-name\",\n        keyfile=\"client-private-key-file-name\",\n        password=\"password-to-load-certificate-chain\",\n    )\n\nbackend = Kafka(\n    bootstrap_servers=[\"localhost:9094\"],\n    security_protocol=\"SSL\",\n    ssl_context=get_ssl_context(),\n)\n\nstream_engine = create_engine(title=\"my-stream-engine\", backend=backend)\n</code></pre> <p>Note</p> <p>Check create ssl context util</p> <p>Example</p> Create SSL context from memory<pre><code>import ssl\n\nfrom kstreams.backends.kafka import Kafka\nfrom kstreams import create_engine, utils, Stream\n\n\ndef get_ssl_context() -&gt; ssl.SSLContext:\n    return utils.create_ssl_context_from_mem(\n        cadata=\"ca-certificates-as-unicode\",\n        certdata=\"client-certificate-as-unicode\",\n        keydata=\"client-private-key-as-unicode\",\n        password=\"optional-password-to-load-certificate-chain\",\n    )\n\nbackend = Kafka(\n    bootstrap_servers=[\"localhost:9094\"],\n    security_protocol=\"SSL\",\n    ssl_context=get_ssl_context(),\n)\n\nstream_engine = create_engine(title=\"my-stream-engine\", backend=backend)\n</code></pre> <p>Note</p> <p>Check create ssl context from memerory util</p> Source code in <code>kstreams/backends/kafka.py</code> <pre><code>class Kafka(BaseModel):\n    \"\"\"\n    The `Kafka` backend validates the given attributes.\n\n    It uses pydantic internally.\n\n    Attributes:\n        bootstrap_servers: kafka list of `hostname:port`\n        security_protocol: Protocol used to communicate with brokers\n        ssl_context: a python std `ssl.SSLContext` instance, you can generate\n            it with `create_ssl_context`\n            or `create_ssl_context_from_mem`\n        sasl_mechanism: Authentication mechanism when `security_protocol` is configured\n            for `SASL_PLAINTEXT` or `SASL_SSL`\n        sasl_plain_username: username for sasl PLAIN authentication\n        sasl_plain_password: password for sasl PLAIN authentication\n        sasl_oauth_token_provider: smth\n\n    Raises:\n        ValidationError: a `pydantic.ValidationError` exception\n\n    ## PLAINTEXT\n\n    !!! Example\n        ```python\n        from kstreams.backends.kafka import Kafka\n        from kstreams import create_engine, Stream\n\n        backend = Kafka(bootstrap_servers=[\"localhost:9092\"])\n        stream_engine = create_engine(title=\"my-stream-engine\", backend=backend)\n        ```\n\n    ## SSL\n\n    !!! Example\n        ```python title=\"Create SSL context\"\n        import ssl\n\n        from kstreams.backends.kafka import Kafka\n        from kstreams import create_engine, utils, Stream\n\n\n        def get_ssl_context() -&gt; ssl.SSLContext:\n            return utils.create_ssl_context(\n                cafile=\"certificate-authority-file-path\",\n                capath=\"points-to-directory-with-several-ca-certificates\",\n                cadata=\"same-as-cafile-but-ASCII-or-bytes-format\",\n                certfile=\"client-certificate-file-name\",\n                keyfile=\"client-private-key-file-name\",\n                password=\"password-to-load-certificate-chain\",\n            )\n\n        backend = Kafka(\n            bootstrap_servers=[\"localhost:9094\"],\n            security_protocol=\"SSL\",\n            ssl_context=get_ssl_context(),\n        )\n\n        stream_engine = create_engine(title=\"my-stream-engine\", backend=backend)\n        ```\n\n        !!! note\n            Check [create ssl context util](https://kpn.github.io/kstreams/utils/#kstreams.utils.create_ssl_context)\n\n    !!! Example\n        ```python title=\"Create SSL context from memory\"\n        import ssl\n\n        from kstreams.backends.kafka import Kafka\n        from kstreams import create_engine, utils, Stream\n\n\n        def get_ssl_context() -&gt; ssl.SSLContext:\n            return utils.create_ssl_context_from_mem(\n                cadata=\"ca-certificates-as-unicode\",\n                certdata=\"client-certificate-as-unicode\",\n                keydata=\"client-private-key-as-unicode\",\n                password=\"optional-password-to-load-certificate-chain\",\n            )\n\n        backend = Kafka(\n            bootstrap_servers=[\"localhost:9094\"],\n            security_protocol=\"SSL\",\n            ssl_context=get_ssl_context(),\n        )\n\n        stream_engine = create_engine(title=\"my-stream-engine\", backend=backend)\n        ```\n\n        !!! note\n            Check [create ssl context from memerory util](https://kpn.github.io/kstreams/utils/#kstreams.utils.create_ssl_context_from_mem)\n    \"\"\"\n\n    bootstrap_servers: List[str] = [\"localhost:9092\"]\n    security_protocol: SecurityProtocol = SecurityProtocol.PLAINTEXT\n\n    ssl_context: Optional[ssl.SSLContext] = None\n\n    sasl_mechanism: SaslMechanism = SaslMechanism.PLAIN\n    sasl_plain_username: Optional[str] = None\n    sasl_plain_password: Optional[str] = None\n    sasl_oauth_token_provider: Optional[str] = None\n    model_config = ConfigDict(arbitrary_types_allowed=True, use_enum_values=True)\n\n    @model_validator(mode=\"after\")\n    @classmethod\n    def protocols_validation(cls, values):\n        security_protocol = values.security_protocol\n\n        if security_protocol == SecurityProtocol.PLAINTEXT:\n            return values\n        elif security_protocol == SecurityProtocol.SSL:\n            if values.ssl_context is None:\n                raise ValueError(\"`ssl_context` is required\")\n            return values\n        elif security_protocol == SecurityProtocol.SASL_PLAINTEXT:\n            if values.sasl_mechanism is SaslMechanism.OAUTHBEARER:\n                # We don't perform a username and password check if OAUTHBEARER\n                return values\n            if (\n                values.sasl_mechanism is SaslMechanism.PLAIN\n                and values.sasl_plain_username is None\n            ):\n                raise ValueError(\n                    \"`sasl_plain_username` is required when using SASL_PLAIN\"\n                )\n            if (\n                values.sasl_mechanism is SaslMechanism.PLAIN\n                and values.sasl_plain_password is None\n            ):\n                raise ValueError(\n                    \"`sasl_plain_password` is required when using SASL_PLAIN\"\n                )\n            return values\n        elif security_protocol == SecurityProtocol.SASL_SSL:\n            if values.ssl_context is None:\n                raise ValueError(\"`ssl_context` is required\")\n            if (\n                values.sasl_mechanism is SaslMechanism.PLAIN\n                and values.sasl_plain_username is None\n            ):\n                raise ValueError(\n                    \"`sasl_plain_username` is required when using SASL_PLAIN\"\n                )\n            if (\n                values.sasl_mechanism is SaslMechanism.PLAIN\n                and values.sasl_plain_password is None\n            ):\n                raise ValueError(\n                    \"`sasl_plain_password` is required when using SASL_PLAIN\"\n                )\n            return values\n</code></pre>"},{"location":"engine/","title":"StreamEngine","text":""},{"location":"engine/#kstreams.engine.StreamEngine","title":"<code>kstreams.engine.StreamEngine</code>","text":"<p>Attributes:</p> Name Type Description <code>backend</code> <code>Kafka</code> <p>Backend to connect. Default <code>Kafka</code></p> <code>consumer_class</code> <code>Consumer</code> <p>The consumer class to use when instanciate a consumer. Default kstreams.Consumer</p> <code>producer_class</code> <code>Producer</code> <p>The producer class to use when instanciate the producer. Default kstreams.Producer</p> <code>monitor</code> <code>PrometheusMonitor</code> <p>Prometheus monitor that holds the metrics</p> <code>title</code> <code>str | None</code> <p>Engine name</p> <code>serializer</code> <code>Serializer | None</code> <p>Serializer to use when an event is produced.</p> <code>deserializer</code> <code>Deserializer | None</code> <p>Deserializer to be used when an event is consumed. If provided it will be used in all Streams instances as a general one. To override it per Stream, you can provide one per Stream</p> <p>Example</p> Usage<pre><code>import kstreams\n\nstream_engine = kstreams.create_engine(\n    title=\"my-stream-engine\"\n)\n\n@kstreams.stream(\"local--hello-world\", group_id=\"example-group\")\nasync def consume(stream: kstreams.ConsumerRecord) -&gt; None:\n    print(f\"showing bytes: {cr.value}\")\n\n\nawait stream_engine.start()\n</code></pre> Source code in <code>kstreams/engine.py</code> <pre><code>class StreamEngine:\n    \"\"\"\n    Attributes:\n        backend kstreams.backends.Kafka: Backend to connect. Default `Kafka`\n        consumer_class kstreams.Consumer: The consumer class to use when\n            instanciate a consumer. Default kstreams.Consumer\n        producer_class kstreams.Producer: The producer class to use when\n            instanciate the producer. Default kstreams.Producer\n        monitor kstreams.PrometheusMonitor: Prometheus monitor that holds\n            the [metrics](https://kpn.github.io/kstreams/metrics/)\n        title str | None: Engine name\n        serializer kstreams.serializers.Serializer | None: Serializer to\n            use when an event is produced.\n        deserializer kstreams.serializers.Deserializer | None: Deserializer\n            to be used when an event is consumed.\n            If provided it will be used in all Streams instances as a general one.\n            To override it per Stream, you can provide one per Stream\n\n    !!! Example\n        ```python title=\"Usage\"\n        import kstreams\n\n        stream_engine = kstreams.create_engine(\n            title=\"my-stream-engine\"\n        )\n\n        @kstreams.stream(\"local--hello-world\", group_id=\"example-group\")\n        async def consume(stream: kstreams.ConsumerRecord) -&gt; None:\n            print(f\"showing bytes: {cr.value}\")\n\n\n        await stream_engine.start()\n        ```\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        backend: Kafka,\n        consumer_class: typing.Type[Consumer],\n        producer_class: typing.Type[Producer],\n        monitor: PrometheusMonitor,\n        title: typing.Optional[str] = None,\n        deserializer: Deprecated[typing.Optional[Deserializer]] = None,\n        serializer: typing.Optional[Serializer] = None,\n        on_startup: typing.Optional[EngineHooks] = None,\n        on_stop: typing.Optional[EngineHooks] = None,\n        after_startup: typing.Optional[EngineHooks] = None,\n        after_stop: typing.Optional[EngineHooks] = None,\n    ) -&gt; None:\n        self.title = title\n        self.backend = backend\n        self.consumer_class = consumer_class\n        self.producer_class = producer_class\n        self.deserializer = deserializer\n        self.serializer = serializer\n        self.monitor = monitor\n        self._producer: typing.Optional[typing.Type[Producer]] = None\n        self._streams: typing.List[Stream] = []\n        self._on_startup = [] if on_startup is None else list(on_startup)\n        self._on_stop = [] if on_stop is None else list(on_stop)\n        self._after_startup = [] if after_startup is None else list(after_startup)\n        self._after_stop = [] if after_stop is None else list(after_stop)\n\n    async def send(\n        self,\n        topic: str,\n        value: typing.Any = None,\n        key: typing.Any = None,\n        partition: typing.Optional[int] = None,\n        timestamp_ms: typing.Optional[int] = None,\n        headers: typing.Optional[Headers] = None,\n        serializer: typing.Optional[Serializer] = None,\n        serializer_kwargs: typing.Optional[typing.Dict] = None,\n    ):\n        \"\"\"\n        Attributes:\n            topic str: Topic name to send the event to\n            value Any: Event value\n            key str | None: Event key\n            partition int | None: Topic partition\n            timestamp_ms int | None: Event timestamp in miliseconds\n            headers Dict[str, str] | None: Event headers\n            serializer kstreams.serializers.Serializer | None: Serializer to\n                encode the event\n            serializer_kwargs Dict[str, Any] | None: Serializer kwargs\n        \"\"\"\n        if self._producer is None:\n            raise EngineNotStartedException()\n\n        serializer = serializer or self.serializer\n\n        # serialize only when value and serializer are present\n        if value is not None and serializer is not None:\n            value = await serializer.serialize(\n                value, headers=headers, serializer_kwargs=serializer_kwargs\n            )\n\n        encoded_headers = None\n        if headers is not None:\n            encoded_headers = encode_headers(headers)\n\n        fut = await self._producer.send(\n            topic,\n            value=value,\n            key=key,\n            partition=partition,\n            timestamp_ms=timestamp_ms,\n            headers=encoded_headers,\n        )\n        metadata: RecordMetadata = await fut\n        self.monitor.add_topic_partition_offset(\n            topic, metadata.partition, metadata.offset\n        )\n\n        return metadata\n\n    async def start(self) -&gt; None:\n        # Execute on_startup hooks\n        await execute_hooks(self._on_startup)\n\n        # add the producer and streams to the Monitor\n        self.monitor.add_producer(self._producer)\n        self.monitor.add_streams(self._streams)\n\n        await self.start_producer()\n        await self.start_streams()\n\n        # Execute after_startup hooks\n        await execute_hooks(self._after_startup)\n\n    def on_startup(\n        self,\n        func: typing.Callable[[], typing.Any],\n    ) -&gt; typing.Callable[[], typing.Any]:\n        \"\"\"\n        A list of callables to run before the engine starts.\n        Handler are callables that do not take any arguments, and may be either\n        standard functions, or async functions.\n\n        Attributes:\n            func typing.Callable[[], typing.Any]: Func to callable before engine starts\n\n        !!! Example\n            ```python title=\"Engine before startup\"\n\n            import kstreams\n\n            stream_engine = kstreams.create_engine(\n                title=\"my-stream-engine\"\n            )\n\n            @stream_engine.on_startup\n            async def init_db() -&gt; None:\n                print(\"Initializing Database Connections\")\n                await init_db()\n\n\n            @stream_engine.on_startup\n            async def start_background_task() -&gt; None:\n                print(\"Some background task\")\n            ```\n        \"\"\"\n        self._on_startup.append(func)\n        return func\n\n    def on_stop(\n        self,\n        func: typing.Callable[[], typing.Any],\n    ) -&gt; typing.Callable[[], typing.Any]:\n        \"\"\"\n        A list of callables to run before the engine stops.\n        Handler are callables that do not take any arguments, and may be either\n        standard functions, or async functions.\n\n        Attributes:\n            func typing.Callable[[], typing.Any]: Func to callable before engine stops\n\n        !!! Example\n            ```python title=\"Engine before stops\"\n\n            import kstreams\n\n            stream_engine = kstreams.create_engine(\n                title=\"my-stream-engine\"\n            )\n\n            @stream_engine.on_stop\n            async def close_db() -&gt; None:\n                print(\"Closing Database Connections\")\n                await db_close()\n            ```\n        \"\"\"\n        self._on_stop.append(func)\n        return func\n\n    def after_startup(\n        self,\n        func: typing.Callable[[], typing.Any],\n    ) -&gt; typing.Callable[[], typing.Any]:\n        \"\"\"\n        A list of callables to run after the engine starts.\n        Handler are callables that do not take any arguments, and may be either\n        standard functions, or async functions.\n\n        Attributes:\n            func typing.Callable[[], typing.Any]: Func to callable after engine starts\n\n        !!! Example\n            ```python title=\"Engine after startup\"\n\n            import kstreams\n\n            stream_engine = kstreams.create_engine(\n                title=\"my-stream-engine\"\n            )\n\n            @stream_engine.after_startup\n            async def after_startup() -&gt; None:\n                print(\"Set pod as healthy\")\n                await mark_healthy_pod()\n            ```\n        \"\"\"\n        self._after_startup.append(func)\n        return func\n\n    def after_stop(\n        self,\n        func: typing.Callable[[], typing.Any],\n    ) -&gt; typing.Callable[[], typing.Any]:\n        \"\"\"\n        A list of callables to run after the engine stops.\n        Handler are callables that do not take any arguments, and may be either\n        standard functions, or async functions.\n\n        Attributes:\n            func typing.Callable[[], typing.Any]: Func to callable after engine stops\n\n        !!! Example\n            ```python title=\"Engine after stops\"\n\n            import kstreams\n\n            stream_engine = kstreams.create_engine(\n                title=\"my-stream-engine\"\n            )\n\n            @stream_engine.after_stop\n            async def after_stop() -&gt; None:\n                print(\"Finishing backgrpund tasks\")\n            ```\n        \"\"\"\n        self._after_stop.append(func)\n        return func\n\n    async def stop(self) -&gt; None:\n        # Execute on_startup hooks\n        await execute_hooks(self._on_stop)\n\n        await self.monitor.stop()\n        await self.stop_producer()\n        await self.stop_streams()\n\n        # Execute after_startup hooks\n        await execute_hooks(self._after_stop)\n\n    async def stop_producer(self):\n        if self._producer is not None:\n            await self._producer.stop()\n        logger.info(\"Producer has STOPPED....\")\n\n    async def start_producer(self, **kwargs) -&gt; None:\n        if self.producer_class is None:\n            return None\n        config = {**self.backend.model_dump(), **kwargs}\n        self._producer = self.producer_class(**config)\n        if self._producer is None:\n            return None\n        await self._producer.start()\n\n    async def start_streams(self) -&gt; None:\n        # Only start the Streams that are not async_generators\n        streams = [\n            stream\n            for stream in self._streams\n            if not inspect.isasyncgenfunction(stream.func)\n        ]\n\n        await self._start_streams_on_background_mode(streams)\n\n    async def _start_streams_on_background_mode(\n        self, streams: typing.List[Stream]\n    ) -&gt; None:\n        # start all the streams\n        for stream in streams:\n            asyncio.create_task(stream.start())\n\n        # start monitoring\n        asyncio.create_task(self.monitor.start())\n\n    async def stop_streams(self) -&gt; None:\n        for stream in self._streams:\n            await stream.stop()\n        logger.info(\"Streams have STOPPED....\")\n\n    async def clean_streams(self):\n        await self.stop_streams()\n        self._streams = []\n\n    def exist_stream(self, name: str) -&gt; bool:\n        stream = self.get_stream(name)\n        return True if stream is not None else False\n\n    def get_stream(self, name: str) -&gt; typing.Optional[Stream]:\n        stream = next((stream for stream in self._streams if stream.name == name), None)\n\n        return stream\n\n    def add_stream(\n        self, stream: Stream, error_policy: typing.Optional[StreamErrorPolicy] = None\n    ) -&gt; None:\n        \"\"\"\n        Add a stream to the engine.\n\n        This method registers a new stream with the engine, setting up necessary\n        configurations and handlers. If a stream with the same name already exists,\n        a DuplicateStreamException is raised.\n\n        Args:\n            stream: The stream to be added.\n            error_policy: An optional error policy to be applied to the stream.\n                You should probably set directly when instanciating a Stream, not here.\n\n        Raises:\n            DuplicateStreamException: If a stream with the same name already exists.\n\n        Notes:\n            - If the stream does not have a deserializer, the engine's deserializer\n              is assigned to it.\n            - If the stream does not have a rebalance listener, a default\n              MetricsRebalanceListener is assigned.\n            - The stream's UDF handler is set up with the provided function and\n              engine's send method.\n            - If the stream's UDF handler type is not NO_TYPING, a middleware stack\n              is built for the stream's function.\n        \"\"\"\n        if self.exist_stream(stream.name):\n            raise DuplicateStreamException(name=stream.name)\n\n        if error_policy is not None:\n            stream.error_policy = error_policy\n\n        stream.backend = self.backend\n        if stream.deserializer is None:\n            stream.deserializer = self.deserializer\n        self._streams.append(stream)\n\n        if stream.rebalance_listener is None:\n            # set the stream to the listener to it will be available\n            # when the callbacks are called\n            stream.rebalance_listener = MetricsRebalanceListener()\n\n        stream.rebalance_listener.stream = stream\n        stream.rebalance_listener.engine = self\n\n        stream.udf_handler = UdfHandler(\n            next_call=stream.func,\n            send=self.send,\n            stream=stream,\n        )\n\n        # NOTE: When `no typing` support is deprecated this check can\n        # be removed\n        if stream.udf_handler.type != UDFType.NO_TYPING:\n            stream.func = self._build_stream_middleware_stack(stream=stream)\n\n    def _build_stream_middleware_stack(self, *, stream: Stream) -&gt; NextMiddlewareCall:\n        assert stream.udf_handler, \"UdfHandler can not be None\"\n\n        middlewares = stream.get_middlewares(self)\n        next_call = stream.udf_handler\n        for middleware, options in reversed(middlewares):\n            next_call = middleware(\n                next_call=next_call, send=self.send, stream=stream, **options\n            )\n        return next_call\n\n    async def remove_stream(self, stream: Stream) -&gt; None:\n        consumer = stream.consumer\n        self._streams.remove(stream)\n        await stream.stop()\n\n        if consumer is not None:\n            self.monitor.clean_stream_consumer_metrics(consumer=consumer)\n\n    def stream(\n        self,\n        topics: typing.Union[typing.List[str], str],\n        *,\n        name: typing.Optional[str] = None,\n        deserializer: Deprecated[typing.Optional[Deserializer]] = None,\n        initial_offsets: typing.Optional[typing.List[TopicPartitionOffset]] = None,\n        rebalance_listener: typing.Optional[RebalanceListener] = None,\n        middlewares: typing.Optional[typing.List[Middleware]] = None,\n        subscribe_by_pattern: bool = False,\n        error_policy: StreamErrorPolicy = StreamErrorPolicy.STOP,\n        **kwargs,\n    ) -&gt; typing.Callable[[StreamFunc], Stream]:\n        def decorator(func: StreamFunc) -&gt; Stream:\n            stream_from_func = stream_func(\n                topics,\n                name=name,\n                deserializer=deserializer,\n                initial_offsets=initial_offsets,\n                rebalance_listener=rebalance_listener,\n                middlewares=middlewares,\n                subscribe_by_pattern=subscribe_by_pattern,\n                **kwargs,\n            )(func)\n            self.add_stream(stream_from_func, error_policy=error_policy)\n\n            return stream_from_func\n\n        return decorator\n</code></pre>"},{"location":"engine/#kstreams.engine.StreamEngine.send","title":"<code>send(topic, value=None, key=None, partition=None, timestamp_ms=None, headers=None, serializer=None, serializer_kwargs=None)</code>  <code>async</code>","text":"<p>Attributes:</p> Name Type Description <code>topic</code> <code>str</code> <p>Topic name to send the event to</p> <code>value</code> <code>Any</code> <p>Event value</p> <code>key</code> <code>str | None</code> <p>Event key</p> <code>partition</code> <code>int | None</code> <p>Topic partition</p> <code>timestamp_ms</code> <code>int | None</code> <p>Event timestamp in miliseconds</p> <code>headers</code> <code>Dict[str, str] | None</code> <p>Event headers</p> <code>serializer</code> <code>Serializer | None</code> <p>Serializer to encode the event</p> <code>serializer_kwargs</code> <code>Dict[str, Any] | None</code> <p>Serializer kwargs</p> Source code in <code>kstreams/engine.py</code> <pre><code>async def send(\n    self,\n    topic: str,\n    value: typing.Any = None,\n    key: typing.Any = None,\n    partition: typing.Optional[int] = None,\n    timestamp_ms: typing.Optional[int] = None,\n    headers: typing.Optional[Headers] = None,\n    serializer: typing.Optional[Serializer] = None,\n    serializer_kwargs: typing.Optional[typing.Dict] = None,\n):\n    \"\"\"\n    Attributes:\n        topic str: Topic name to send the event to\n        value Any: Event value\n        key str | None: Event key\n        partition int | None: Topic partition\n        timestamp_ms int | None: Event timestamp in miliseconds\n        headers Dict[str, str] | None: Event headers\n        serializer kstreams.serializers.Serializer | None: Serializer to\n            encode the event\n        serializer_kwargs Dict[str, Any] | None: Serializer kwargs\n    \"\"\"\n    if self._producer is None:\n        raise EngineNotStartedException()\n\n    serializer = serializer or self.serializer\n\n    # serialize only when value and serializer are present\n    if value is not None and serializer is not None:\n        value = await serializer.serialize(\n            value, headers=headers, serializer_kwargs=serializer_kwargs\n        )\n\n    encoded_headers = None\n    if headers is not None:\n        encoded_headers = encode_headers(headers)\n\n    fut = await self._producer.send(\n        topic,\n        value=value,\n        key=key,\n        partition=partition,\n        timestamp_ms=timestamp_ms,\n        headers=encoded_headers,\n    )\n    metadata: RecordMetadata = await fut\n    self.monitor.add_topic_partition_offset(\n        topic, metadata.partition, metadata.offset\n    )\n\n    return metadata\n</code></pre>"},{"location":"engine/#kstreams.engine.StreamEngine.on_startup","title":"<code>on_startup(func)</code>","text":"<p>A list of callables to run before the engine starts. Handler are callables that do not take any arguments, and may be either standard functions, or async functions.</p> <p>Attributes:</p> Name Type Description <code>func</code> <code>Callable[[], Any]</code> <p>Func to callable before engine starts</p> <p>Example</p> Engine before startup<pre><code>import kstreams\n\nstream_engine = kstreams.create_engine(\n    title=\"my-stream-engine\"\n)\n\n@stream_engine.on_startup\nasync def init_db() -&gt; None:\n    print(\"Initializing Database Connections\")\n    await init_db()\n\n\n@stream_engine.on_startup\nasync def start_background_task() -&gt; None:\n    print(\"Some background task\")\n</code></pre> Source code in <code>kstreams/engine.py</code> <pre><code>def on_startup(\n    self,\n    func: typing.Callable[[], typing.Any],\n) -&gt; typing.Callable[[], typing.Any]:\n    \"\"\"\n    A list of callables to run before the engine starts.\n    Handler are callables that do not take any arguments, and may be either\n    standard functions, or async functions.\n\n    Attributes:\n        func typing.Callable[[], typing.Any]: Func to callable before engine starts\n\n    !!! Example\n        ```python title=\"Engine before startup\"\n\n        import kstreams\n\n        stream_engine = kstreams.create_engine(\n            title=\"my-stream-engine\"\n        )\n\n        @stream_engine.on_startup\n        async def init_db() -&gt; None:\n            print(\"Initializing Database Connections\")\n            await init_db()\n\n\n        @stream_engine.on_startup\n        async def start_background_task() -&gt; None:\n            print(\"Some background task\")\n        ```\n    \"\"\"\n    self._on_startup.append(func)\n    return func\n</code></pre>"},{"location":"engine/#kstreams.engine.StreamEngine.on_stop","title":"<code>on_stop(func)</code>","text":"<p>A list of callables to run before the engine stops. Handler are callables that do not take any arguments, and may be either standard functions, or async functions.</p> <p>Attributes:</p> Name Type Description <code>func</code> <code>Callable[[], Any]</code> <p>Func to callable before engine stops</p> <p>Example</p> Engine before stops<pre><code>import kstreams\n\nstream_engine = kstreams.create_engine(\n    title=\"my-stream-engine\"\n)\n\n@stream_engine.on_stop\nasync def close_db() -&gt; None:\n    print(\"Closing Database Connections\")\n    await db_close()\n</code></pre> Source code in <code>kstreams/engine.py</code> <pre><code>def on_stop(\n    self,\n    func: typing.Callable[[], typing.Any],\n) -&gt; typing.Callable[[], typing.Any]:\n    \"\"\"\n    A list of callables to run before the engine stops.\n    Handler are callables that do not take any arguments, and may be either\n    standard functions, or async functions.\n\n    Attributes:\n        func typing.Callable[[], typing.Any]: Func to callable before engine stops\n\n    !!! Example\n        ```python title=\"Engine before stops\"\n\n        import kstreams\n\n        stream_engine = kstreams.create_engine(\n            title=\"my-stream-engine\"\n        )\n\n        @stream_engine.on_stop\n        async def close_db() -&gt; None:\n            print(\"Closing Database Connections\")\n            await db_close()\n        ```\n    \"\"\"\n    self._on_stop.append(func)\n    return func\n</code></pre>"},{"location":"engine/#kstreams.engine.StreamEngine.after_startup","title":"<code>after_startup(func)</code>","text":"<p>A list of callables to run after the engine starts. Handler are callables that do not take any arguments, and may be either standard functions, or async functions.</p> <p>Attributes:</p> Name Type Description <code>func</code> <code>Callable[[], Any]</code> <p>Func to callable after engine starts</p> <p>Example</p> Engine after startup<pre><code>import kstreams\n\nstream_engine = kstreams.create_engine(\n    title=\"my-stream-engine\"\n)\n\n@stream_engine.after_startup\nasync def after_startup() -&gt; None:\n    print(\"Set pod as healthy\")\n    await mark_healthy_pod()\n</code></pre> Source code in <code>kstreams/engine.py</code> <pre><code>def after_startup(\n    self,\n    func: typing.Callable[[], typing.Any],\n) -&gt; typing.Callable[[], typing.Any]:\n    \"\"\"\n    A list of callables to run after the engine starts.\n    Handler are callables that do not take any arguments, and may be either\n    standard functions, or async functions.\n\n    Attributes:\n        func typing.Callable[[], typing.Any]: Func to callable after engine starts\n\n    !!! Example\n        ```python title=\"Engine after startup\"\n\n        import kstreams\n\n        stream_engine = kstreams.create_engine(\n            title=\"my-stream-engine\"\n        )\n\n        @stream_engine.after_startup\n        async def after_startup() -&gt; None:\n            print(\"Set pod as healthy\")\n            await mark_healthy_pod()\n        ```\n    \"\"\"\n    self._after_startup.append(func)\n    return func\n</code></pre>"},{"location":"engine/#kstreams.engine.StreamEngine.after_stop","title":"<code>after_stop(func)</code>","text":"<p>A list of callables to run after the engine stops. Handler are callables that do not take any arguments, and may be either standard functions, or async functions.</p> <p>Attributes:</p> Name Type Description <code>func</code> <code>Callable[[], Any]</code> <p>Func to callable after engine stops</p> <p>Example</p> Engine after stops<pre><code>import kstreams\n\nstream_engine = kstreams.create_engine(\n    title=\"my-stream-engine\"\n)\n\n@stream_engine.after_stop\nasync def after_stop() -&gt; None:\n    print(\"Finishing backgrpund tasks\")\n</code></pre> Source code in <code>kstreams/engine.py</code> <pre><code>def after_stop(\n    self,\n    func: typing.Callable[[], typing.Any],\n) -&gt; typing.Callable[[], typing.Any]:\n    \"\"\"\n    A list of callables to run after the engine stops.\n    Handler are callables that do not take any arguments, and may be either\n    standard functions, or async functions.\n\n    Attributes:\n        func typing.Callable[[], typing.Any]: Func to callable after engine stops\n\n    !!! Example\n        ```python title=\"Engine after stops\"\n\n        import kstreams\n\n        stream_engine = kstreams.create_engine(\n            title=\"my-stream-engine\"\n        )\n\n        @stream_engine.after_stop\n        async def after_stop() -&gt; None:\n            print(\"Finishing backgrpund tasks\")\n        ```\n    \"\"\"\n    self._after_stop.append(func)\n    return func\n</code></pre>"},{"location":"engine/#kstreams.engine.StreamEngine.add_stream","title":"<code>add_stream(stream, error_policy=None)</code>","text":"<p>Add a stream to the engine.</p> <p>This method registers a new stream with the engine, setting up necessary configurations and handlers. If a stream with the same name already exists, a DuplicateStreamException is raised.</p> <p>Parameters:</p> Name Type Description Default <code>stream</code> <code>Stream</code> <p>The stream to be added.</p> required <code>error_policy</code> <code>Optional[StreamErrorPolicy]</code> <p>An optional error policy to be applied to the stream. You should probably set directly when instanciating a Stream, not here.</p> <code>None</code> <p>Raises:</p> Type Description <code>DuplicateStreamException</code> <p>If a stream with the same name already exists.</p> Notes <ul> <li>If the stream does not have a deserializer, the engine's deserializer   is assigned to it.</li> <li>If the stream does not have a rebalance listener, a default   MetricsRebalanceListener is assigned.</li> <li>The stream's UDF handler is set up with the provided function and   engine's send method.</li> <li>If the stream's UDF handler type is not NO_TYPING, a middleware stack   is built for the stream's function.</li> </ul> Source code in <code>kstreams/engine.py</code> <pre><code>def add_stream(\n    self, stream: Stream, error_policy: typing.Optional[StreamErrorPolicy] = None\n) -&gt; None:\n    \"\"\"\n    Add a stream to the engine.\n\n    This method registers a new stream with the engine, setting up necessary\n    configurations and handlers. If a stream with the same name already exists,\n    a DuplicateStreamException is raised.\n\n    Args:\n        stream: The stream to be added.\n        error_policy: An optional error policy to be applied to the stream.\n            You should probably set directly when instanciating a Stream, not here.\n\n    Raises:\n        DuplicateStreamException: If a stream with the same name already exists.\n\n    Notes:\n        - If the stream does not have a deserializer, the engine's deserializer\n          is assigned to it.\n        - If the stream does not have a rebalance listener, a default\n          MetricsRebalanceListener is assigned.\n        - The stream's UDF handler is set up with the provided function and\n          engine's send method.\n        - If the stream's UDF handler type is not NO_TYPING, a middleware stack\n          is built for the stream's function.\n    \"\"\"\n    if self.exist_stream(stream.name):\n        raise DuplicateStreamException(name=stream.name)\n\n    if error_policy is not None:\n        stream.error_policy = error_policy\n\n    stream.backend = self.backend\n    if stream.deserializer is None:\n        stream.deserializer = self.deserializer\n    self._streams.append(stream)\n\n    if stream.rebalance_listener is None:\n        # set the stream to the listener to it will be available\n        # when the callbacks are called\n        stream.rebalance_listener = MetricsRebalanceListener()\n\n    stream.rebalance_listener.stream = stream\n    stream.rebalance_listener.engine = self\n\n    stream.udf_handler = UdfHandler(\n        next_call=stream.func,\n        send=self.send,\n        stream=stream,\n    )\n\n    # NOTE: When `no typing` support is deprecated this check can\n    # be removed\n    if stream.udf_handler.type != UDFType.NO_TYPING:\n        stream.func = self._build_stream_middleware_stack(stream=stream)\n</code></pre>"},{"location":"getting_started/","title":"Getting Started","text":"<p>You can starting using <code>kstreams</code> with simple <code>producers</code> and <code>consumers</code> and/or integrated it with any <code>async</code> framework like <code>FastAPI</code></p>"},{"location":"getting_started/#simple-consumer-and-producer","title":"Simple consumer and producer","text":"Simple use case<pre><code>import asyncio\nfrom kstreams import create_engine, ConsumerRecord\n\nstream_engine = create_engine(title=\"my-stream-engine\")\n\n\n@stream_engine.stream(\"local--py-stream\", group_id=\"de-my-partition\")\nasync def consume(cr: ConsumerRecord):\n    print(f\"Event consumed: headers: {cr.headers}, payload: {value}\")\n\n\nasync def produce():\n    payload = b'{\"message\": \"Hello world!\"}'\n\n    for i in range(5):\n        metadata = await stream_engine.send(\"local--py-streams\", value=payload, key=\"1\")\n        print(f\"Message sent: {metadata}\")\n        await asyncio.sleep(5)\n\n\nasync def start():\n    await stream_engine.start()\n    await produce()\n\n\nasync def shutdown():\n    await stream_engine.stop()\n\n\nif __name__ == \"__main__\":\n    loop = asyncio.get_event_loop()\n    try:\n        loop.run_until_complete(start())\n        loop.run_forever()\n    finally:\n        loop.run_until_complete(shutdown())\n        loop.close()\n</code></pre> <p>(This script is complete, it should run \"as is\")</p>"},{"location":"getting_started/#recommended-usage","title":"Recommended usage","text":"<p>In the previous example you can see some boiler plate regarding how to start the program. We recommend to use aiorun, so you want have to worry about <code>set signal handlers</code>, <code>shutdown callbacks</code>, <code>graceful shutdown</code> and <code>close the event loop</code>.</p> Usage with aiorun<pre><code>import aiorun\nfrom kstreams import create_engine, ConsumerRecord\n\nstream_engine = create_engine(title=\"my-stream-engine\")\n\n\n@stream_engine.stream(\"local--py-stream\", group_id=\"de-my-partition\")\nasync def consume(cr: ConsumerRecord):\n    print(f\"Event consumed: headers: {cr.headers}, payload: {value}\")\n\n\nasync def produce():\n    payload = b'{\"message\": \"Hello world!\"}'\n\n    for i in range(5):\n        metadata = await stream_engine.send(\"local--py-streams\", value=payload, key=\"1\")\n        print(f\"Message sent: {metadata}\")\n        await asyncio.sleep(5)\n\n\nasync def start():\n    await stream_engine.start()\n    await produce()\n\n\nasync def shutdown(loop):\n    await stream_engine.stop()\n\n\nif __name__ == \"__main__\":\n    aiorun.run(start(), stop_on_unhandled_errors=True, shutdown_callback=shutdown)\n</code></pre> <p>(This script is complete, it should run \"as is\")</p>"},{"location":"getting_started/#fastapi","title":"FastAPI","text":"<p>The following code example shows how <code>kstreams</code> can be integrated with any <code>async</code> framework like <code>FastAPI</code>. The full example can be found here</p> <p>First, we need to create an <code>engine</code>:</p> Create the StreamEngine<pre><code># streaming.engine.py\nfrom kstreams import create_engine\n\nstream_engine = create_engine(\n    title=\"my-stream-engine\",\n)\n</code></pre> <p>Define the <code>streams</code>:</p> Application stream<pre><code># streaming.streams.py\nfrom .engine import stream_engine\nfrom kstreams import ConsumerRecord\n\n\n@stream_engine.stream(\"local--kstream\")\nasync def stream(cr: ConsumerRecord):\n    print(f\"Event consumed: headers: {cr.headers}, payload: {cr.payload}\")\n</code></pre> <p>Create the <code>FastAPI</code>:</p> FastAPI<pre><code># app.py\nfrom fastapi import FastAPI\nfrom starlette.responses import Response\nfrom starlette_prometheus import PrometheusMiddleware, metrics\n\nfrom .streaming.streams import stream_engine\n\napp = FastAPI()\n\n@app.on_event(\"startup\")\nasync def startup_event():\n    await stream_engine.start()\n\n@app.on_event(\"shutdown\")\nasync def shutdown_event():\n    await stream_engine.stop()\n\n\n@app.get(\"/events\")\nasync def post_produce_event() -&gt; Response:\n    payload = '{\"message\": \"hello world!\"}'\n\n    metadata = await stream_engine.send(\n        \"local--kstream\",\n        value=payload.encode(),\n    )\n    msg = (\n        f\"Produced event on topic: {metadata.topic}, \"\n        f\"part: {metadata.partition}, offset: {metadata.offset}\"\n    )\n\n    return Response(msg)\n\n\napp.add_middleware(PrometheusMiddleware, filter_unhandled_paths=True)\napp.add_api_route(\"/metrics\", metrics)\n</code></pre>"},{"location":"getting_started/#changing-kafka-settings","title":"Changing Kafka settings","text":"<p>To modify the settings of a cluster, like the servers, refer to the backends docs</p>"},{"location":"large_project_structure/","title":"Large Projects","text":"<p>If you have a large project with maybe multiple <code>streams</code> we recommend the following project structure:</p> <pre><code>\u251c\u2500\u2500 my-project\n\u2502   \u251c\u2500\u2500 my_project\n\u2502   \u2502\u00a0\u00a0 \u251c\u2500\u2500 __init__.py\n\u2502   \u2502\u00a0\u00a0 \u251c\u2500\u2500 app.py\n\u2502   \u2502\u00a0\u00a0 \u251c\u2500\u2500 resources.py\n\u2502   \u2502\u00a0\u00a0 \u251c\u2500\u2500 streams.py\n\u2502   \u2502\u00a0\u00a0 \u2514\u2500\u2500 streams_roster.py\n\u2502   \u2502\u2500\u2500 tests\n\u2502   \u2502   \u251c\u2500\u2500 __init__.py\n\u2502   \u2502   \u251c\u2500\u2500 conftest.py\n\u2502   \u2502\u2500\u2500 pyproject.toml\n\u2502   \u2502\u2500\u2500 README.md\n</code></pre> <ul> <li>The file <code>my_project/resouces.py</code> contains the creation of the <code>StreamEngine</code></li> <li>The file <code>my_project/app.py</code> contains the entrypoint of your program</li> <li>The file <code>my_project/streams.py</code> contains all the <code>Streams</code></li> </ul> <p>A full project example ready to use can be found here</p> <p>Note</p> <p>This is just a recommendation, there are many ways to structure your project</p>"},{"location":"large_project_structure/#resources","title":"Resources","text":"<p>This python module contains any global resource that will be used later in the application, for example <code>DB connections</code> or the <code>StreamEngine</code>. Typically we will have the following:</p> <pre><code>from kstreams import backends, create_engine\n\nbackend = backends.Kafka(\n    bootstrap_servers=[\"localhost:9092\"],\n    security_protocol=backends.kafka.SecurityProtocol.PLAINTEXT,\n)\n\nstream_engine = kstreams.create_engine(\n    title=\"my-stream-engine\",\n    backend=backend,\n)\n</code></pre> <p>Then later <code>stream_engine</code> can be reused to start the application.</p>"},{"location":"large_project_structure/#streams","title":"Streams","text":"<p>When starting your project you can have <code>N</code> number of <code>Streams</code> with its <code>handler</code>, let's say in <code>streams.py</code> module. All of the <code>Streams</code> will run next to each other and because they are in the same project it is really easy to share common code. However, this comes with a downside of <code>scalability</code> as it is not possible to take the advantages of <code>kafka</code> and scale up <code>Streams</code> individually. In next versions the <code>StreamEngine</code> will be able to select which <code>Stream/s</code> should run to mitigate this issue. Typically, your <code>streams.py</code> will look like:</p> <pre><code>from kstreams import Stream\n\nfrom .streams_roster import stream_roster, stream_two_roster\n\n\nmy_stream = Stream(\n    \"local--hello-world\",\n    func=stream_roster,\n    config={\n        \"group_id\": \"example-group\",\n    },\n    ...\n)\n\nmy_second_stream = Stream(\n    \"local--hello-world-2\",\n    func=stream_two_roster,\n    config={\n        \"group_id\": \"example-group-2\",\n    },\n    ...\n)\n\n...\n</code></pre> <p>and <code>streams_roster.py</code> contains all the <code>coroutines</code> that will be executed when an event arrives</p> <pre><code>import logging\n\nfrom kstreams import ConsumerRecord, Send, Stream\n\nlogger = logging.getLogger(__name__)\n\n\nasync def stream_roster(cr: ConsumerRecord, send: Send) -&gt; None:\n    logger.info(f\"showing bytes: {cr.value}\")\n    value = f\"Event confirmed. {cr.value}\"\n\n    await send(\n        \"another-topic-to-wink\",\n        value=value.encode(),\n        key=\"1\",\n    )\n\n\nasync def stream_two_roster(cr: ConsumerRecord, send: Send, stream: Stream) -&gt; None:\n    ...\n</code></pre> <p>It is worth to note three things:</p> <ul> <li>We separate the <code>Stream</code> with its <code>coroutine</code> to be able to test the <code>business logic</code> easily</li> <li>If you need to produce events inside a <code>Stream</code> add the <code>send coroutine</code> using dependency-injection</li> <li>We are not using <code>StreamEngine</code> at all to avoid <code>circular import</code> errors</li> </ul>"},{"location":"large_project_structure/#application","title":"Application","text":"<p>The <code>entrypoint</code> is usually in <code>app.py</code>. The module contains the import of <code>stream_engine</code>, it's <code>hooks</code> and the <code>streams</code> to be added to the <code>engine</code>:</p> <pre><code>import aiorun\nimport asyncio\nimport logging\n\nfrom kstreams.stream_utils import StreamErrorPolicy\n\nfrom .resources import stream_engine\nfrom .streams import my_stream, my_second_stream\n\nlogger = logging.getLogger(__name__)\n\n\n# hooks\n@stream_engine.after_startup\nasync def init_events():\n    await stream_engine.send(\"local--hello-world\", value=\"Hi Kstreams!\")\n\n\n# add the stream to the stream_engine\nstream_engine.add_stream(my_stream, error_policy=StreamErrorPolicy.RESTART)\nstream_engine.add_stream(my_second_stream, error_policy=StreamErrorPolicy.STOP_ENGINE)\n\n\nasync def start():\n    await stream_engine.start()\n\n\nasync def stop(loop: asyncio.AbstractEventLoop):\n    await stream_engine.stop()\n\n\ndef main():\n    logging.basicConfig(level=logging.INFO)\n    logger.info(\"Starting application...\")\n    aiorun.run(start(), stop_on_unhandled_errors=True, shutdown_callback=stop)\n</code></pre> <p>To run it we recommend aiorun. It can be also run with <code>asyncio</code> directly but <code>aiorun</code> does all the boilerplate for us.</p>"},{"location":"large_project_structure/#tests","title":"Tests","text":"<p>In this module you test your application using the <code>TestStreamClient</code>, usually provided as a <code>fixture</code> thanks to <code>pytest</code>. The package <code>pytest-asyncio</code> is also needed  to test <code>async</code> code.</p> <pre><code># conftest.py\nimport pytest\n\nfrom kstreams.test_utils import TestStreamClient\n\nfrom my_project.resources import stream_engine\n\n\n@pytest.fixture\ndef stream_client():\n    return TestStreamClient(stream_engine=stream_engine)\n</code></pre> <p>then you can test your streams</p> <pre><code># test_app.py\nimport pytest\n\n\n@pytest.mark.asyncio\nasync def test_my_stream(stream_client):\n    topic = \"local--hello-world\"  # Use the same topic as the stream\n    event = b'{\"message\": \"Hello world!\"}'\n\n    async with stream_client:\n        metadata = await stream_client.send(topic, value=event, key=\"1\")\n        assert metadata.topic == topic\n</code></pre>"},{"location":"metrics/","title":"Metrics","text":"<p>Metrics are generated by prometheus_client. You must be responsable of setting up a <code>webserver</code> to expose the <code>metrics</code>.</p>"},{"location":"metrics/#metrics","title":"Metrics","text":""},{"location":"metrics/#producer","title":"Producer","text":"<ul> <li><code>topic_partition_offsets</code>: <code>Gauge</code> of offsets per <code>topic/partition</code></li> </ul>"},{"location":"metrics/#consumer","title":"Consumer","text":"<ul> <li><code>consumer_committed</code>: <code>Gauge</code> of consumer commited per <code>topic/partition</code> in a <code>consumer group</code></li> <li><code>consumer_position</code>: <code>Gauge</code> of consumer <code>current position</code> per <code>topic/partition</code> in a <code>consumer group</code></li> <li><code>consumer_highwater</code>: <code>Gauge</code> of consumer <code>highwater</code> per <code>topic/partition</code> in a <code>consumer group</code></li> <li><code>consumer_lag</code>: <code>Gauge</code> of current consumer <code>lag</code> per <code>topic/partition</code> in a <code>consumer group</code> calculated with the last commited offset</li> <li><code>position_lag</code>: <code>Gauge</code> of current consumer <code>position_lag</code> per <code>topic/partition</code> in a <code>consumer group</code> calculated using the consumer position</li> </ul>"},{"location":"middleware/","title":"Middleware","text":"<p>Kstreams allows you to include middlewares for adding behavior to streams. </p> <p>A middleware is a <code>callable</code> that works with every <code>ConsumerRecord</code> (CR) before and after it is processed by a specific <code>stream</code>. <code>Middlewares</code> also have access to the <code>stream</code> and <code>send</code> function.</p> <ul> <li>It takes each <code>CR</code> that arrives to a <code>kafka topic</code>.</li> <li>Then it can do something to the <code>CR</code> or run any needed code.</li> <li>Then it passes the <code>CR</code> to be processed by another <code>callable</code> (other middleware or stream).</li> <li>Once the <code>CR</code> is processed by the stream, the chain is \"completed\".</li> <li>If there is code after the <code>self.next_call(cr)</code> then it will be executed.</li> </ul> <p>Kstreams <code>Middleware</code> have the following protocol:</p> <p>               Bases: <code>Protocol</code></p> Source code in <code>kstreams/middleware/middleware.py</code> <pre><code>class MiddlewareProtocol(typing.Protocol):\n    next_call: types.NextMiddlewareCall\n    send: types.Send\n    stream: \"Stream\"\n\n    def __init__(\n        self,\n        *,\n        next_call: types.NextMiddlewareCall,\n        send: types.Send,\n        stream: \"Stream\",\n        **kwargs: typing.Any,\n    ) -&gt; None: ...  #  pragma: no cover\n\n    async def __call__(\n        self, cr: types.ConsumerRecord\n    ) -&gt; typing.Any: ...  #  pragma: no cover\n</code></pre> <p>Note</p> <p>The <code>__call__</code> method can return anything so previous calls can use the returned value. Make sure that the line <code>return await self.next_call(cr)</code> is in your method</p> <p>Warning</p> <p>Middlewares only work with the new Dependency Injection approach</p>"},{"location":"middleware/#creating-a-middleware","title":"Creating a middleware","text":"<p>To create a middleware you have to create a class that inherits from <code>BaseMiddleware</code>. Then, the method <code>async def __call__</code> must be defined. Let's consider that we want to save the CR to <code>elastic</code> before it is processed:</p> <pre><code>import typing\n\nfrom kstreams import ConsumerRecord, middleware\n\nasync def save_to_elastic(cr: ConsumerRecord) -&gt; None:\n    ...\n\n\nclass ElasticMiddleware(middleware.BaseMiddleware):\n    async def __call__(self, cr: ConsumerRecord) -&gt; typing.Any:\n        # save to elastic before calling the next\n        await save_to_elastic(cr)\n\n        # the next call could be another middleware\n        return await self.next_call(cr)\n</code></pre> <p>Then, we have to include the middleware:</p> <pre><code>from kstreams import ConsumerRecord, middleware\n\nfrom .engine import stream_engine\n\n\nmiddlewares = [middleware.Middleware(ElasticMiddleware)]\n\n@stream_engine.stream(\"kstreams-topic\", middlewares=middlewares)\n    async def processor(cr: ConsumerRecord):\n        ...\n</code></pre> <p>Note</p> <p>The <code>Middleware</code> concept also applies for <code>async generators</code> (yield from a stream)</p>"},{"location":"middleware/#adding-extra-configuration-to-middlewares","title":"Adding extra configuration to middlewares","text":"<p>If you want to provide extra configuration to middleware you should override the init method with the extra options as <code>keywargs</code> and then call <code>super().__init__(**kwargs)</code></p> <p>Let's consider that we want to send an event to a spcific <code>topic</code> when a <code>ValueError</code> is raised inside a <code>stream</code> (Dead Letter Queue)</p> <pre><code>from kstreams import ConsumerRecord, types, Stream, middleware\n\n\nclass DLQMiddleware(middleware.BaseMiddleware):\n    def __init__(self, *, topic: str, **kwargs) -&gt; None:\n        super().__init__(**kwargs)\n        self.topic = topic\n\n    async def __call__(self, cr: ConsumerRecord):\n        try:\n            return await self.next_call(cr)\n        except ValueError:\n            await self.send(self.topic, key=cr.key, value=cr.value)\n\n\n# Create the middlewares\nmiddlewares = [\n    middleware.Middleware(\n        DLQMiddleware, topic=\"kstreams-dlq-topic\"\n    )\n]\n\n@stream_engine.stream(\"kstreams-topic\", middlewares=middlewares)\n    async def processor(cr: ConsumerRecord):\n        if cr.value == b\"joker\":\n            raise ValueError(\"Joker received...\")\n</code></pre>"},{"location":"middleware/#default-middleware","title":"Default Middleware","text":"<p>This is always the first Middleware in the middleware stack to catch any exception that might occur. Any exception raised when consuming events that is not handled by the end user will be handled by this ExceptionMiddleware executing the policy_error that was stablished.</p> Source code in <code>kstreams/middleware/middleware.py</code> <pre><code>class ExceptionMiddleware(BaseMiddleware):\n    \"\"\"\n    This is always the first Middleware in the middleware stack\n    to catch any exception that might occur. Any exception raised\n    when consuming events that is not handled by the end user\n    will be handled by this ExceptionMiddleware executing the\n    policy_error that was stablished.\n    \"\"\"\n\n    def __init__(\n        self, *, engine: \"StreamEngine\", error_policy: StreamErrorPolicy, **kwargs\n    ) -&gt; None:\n        super().__init__(**kwargs)\n        self.engine = engine\n        self.error_policy = error_policy\n\n    async def __call__(self, cr: types.ConsumerRecord) -&gt; typing.Any:\n        try:\n            return await self.next_call(cr)\n        except Exception as exc:\n            logger.exception(\n                \"Unhandled error occurred while listening to the stream. \"\n                f\"Stream consuming from topics {self.stream.topics} CRASHED!!! \\n\\n \"\n            )\n            if sys.version_info &gt;= (3, 11):\n                exc.add_note(f\"Handler: {self.stream.func}\")\n                exc.add_note(f\"Topics: {self.stream.topics}\")\n\n            await self.cleanup_policy(exc)\n\n    async def cleanup_policy(self, exc: Exception) -&gt; None:\n        \"\"\"\n        Execute cleanup policy according to the Stream configuration.\n\n        At this point we are inside the asyncio.Lock `is_processing`\n        as an event is being processed and an exeption has occured.\n        The Lock must be released to stop the Stream\n        (which must happen for any policy), then before re-raising\n        the exception the Lock must be acquire again to continue the processing\n\n        Exception and policies:\n\n            - STOP: The exception is re-raised as the Stream will be stopped\n              and the end user will deal with it\n\n            - STOP_ENGINE: The exception is re-raised as the Engine will be stopped\n              (all Streams and Producer) and the end user will deal with it\n\n            - RESTART: The exception is not re-raised as the Stream\n              will recover and continue the processing. The logger.exception\n              from __call__ will record that something went wrong\n\n            - STOP_APPLICATION: The exception is not re-raised as the entire\n              application will be stopped. This is only useful when using kstreams\n              with another library like FastAPI. The logger.exception\n              from __call__ will record that something went wrong\n\n        Args:\n            exc (Exception): Any Exception that causes the Stream to crash\n\n        Raises:\n            exc: Exception is the policy is `STOP` or `STOP_ENGINE`\n        \"\"\"\n        self.stream.is_processing.release()\n\n        if self.error_policy == StreamErrorPolicy.RESTART:\n            await self.stream.stop()\n            await self.stream.start()\n        elif self.error_policy == StreamErrorPolicy.STOP:\n            await self.stream.stop()\n            # acquire `is_processing` Lock again to resume processing\n            # and avoid `RuntimeError: Lock is not acquired.`\n            await self.stream.is_processing.acquire()\n            raise exc\n        elif self.error_policy == StreamErrorPolicy.STOP_ENGINE:\n            await self.engine.stop()\n            # acquire `is_processing` Lock again to resume processing\n            # and avoid `RuntimeError: Lock is not acquired.`\n            await self.stream.is_processing.acquire()\n            raise exc\n        else:\n            # STOP_APPLICATION\n            await self.engine.stop()\n            await self.stream.is_processing.acquire()\n            signal.raise_signal(signal.SIGTERM)\n</code></pre>"},{"location":"middleware/#kstreams.middleware.middleware.ExceptionMiddleware.cleanup_policy","title":"<code>cleanup_policy(exc)</code>  <code>async</code>","text":"<p>Execute cleanup policy according to the Stream configuration.</p> <p>At this point we are inside the asyncio.Lock <code>is_processing</code> as an event is being processed and an exeption has occured. The Lock must be released to stop the Stream (which must happen for any policy), then before re-raising the exception the Lock must be acquire again to continue the processing</p> <p>Exception and policies:</p> <pre><code>- STOP: The exception is re-raised as the Stream will be stopped\n  and the end user will deal with it\n\n- STOP_ENGINE: The exception is re-raised as the Engine will be stopped\n  (all Streams and Producer) and the end user will deal with it\n\n- RESTART: The exception is not re-raised as the Stream\n  will recover and continue the processing. The logger.exception\n  from __call__ will record that something went wrong\n\n- STOP_APPLICATION: The exception is not re-raised as the entire\n  application will be stopped. This is only useful when using kstreams\n  with another library like FastAPI. The logger.exception\n  from __call__ will record that something went wrong\n</code></pre> <p>Parameters:</p> Name Type Description Default <code>exc</code> <code>Exception</code> <p>Any Exception that causes the Stream to crash</p> required <p>Raises:</p> Type Description <code>exc</code> <p>Exception is the policy is <code>STOP</code> or <code>STOP_ENGINE</code></p> Source code in <code>kstreams/middleware/middleware.py</code> <pre><code>async def cleanup_policy(self, exc: Exception) -&gt; None:\n    \"\"\"\n    Execute cleanup policy according to the Stream configuration.\n\n    At this point we are inside the asyncio.Lock `is_processing`\n    as an event is being processed and an exeption has occured.\n    The Lock must be released to stop the Stream\n    (which must happen for any policy), then before re-raising\n    the exception the Lock must be acquire again to continue the processing\n\n    Exception and policies:\n\n        - STOP: The exception is re-raised as the Stream will be stopped\n          and the end user will deal with it\n\n        - STOP_ENGINE: The exception is re-raised as the Engine will be stopped\n          (all Streams and Producer) and the end user will deal with it\n\n        - RESTART: The exception is not re-raised as the Stream\n          will recover and continue the processing. The logger.exception\n          from __call__ will record that something went wrong\n\n        - STOP_APPLICATION: The exception is not re-raised as the entire\n          application will be stopped. This is only useful when using kstreams\n          with another library like FastAPI. The logger.exception\n          from __call__ will record that something went wrong\n\n    Args:\n        exc (Exception): Any Exception that causes the Stream to crash\n\n    Raises:\n        exc: Exception is the policy is `STOP` or `STOP_ENGINE`\n    \"\"\"\n    self.stream.is_processing.release()\n\n    if self.error_policy == StreamErrorPolicy.RESTART:\n        await self.stream.stop()\n        await self.stream.start()\n    elif self.error_policy == StreamErrorPolicy.STOP:\n        await self.stream.stop()\n        # acquire `is_processing` Lock again to resume processing\n        # and avoid `RuntimeError: Lock is not acquired.`\n        await self.stream.is_processing.acquire()\n        raise exc\n    elif self.error_policy == StreamErrorPolicy.STOP_ENGINE:\n        await self.engine.stop()\n        # acquire `is_processing` Lock again to resume processing\n        # and avoid `RuntimeError: Lock is not acquired.`\n        await self.stream.is_processing.acquire()\n        raise exc\n    else:\n        # STOP_APPLICATION\n        await self.engine.stop()\n        await self.stream.is_processing.acquire()\n        signal.raise_signal(signal.SIGTERM)\n</code></pre>"},{"location":"middleware/#middleware-chain","title":"Middleware chain","text":"<p>It is possible to add as many middlewares as you want to split and reuse business logic, however the downside is extra complexity and the code might become slower. The middleware order is important as they are evaluated in the order that were placed in the stream.</p> <p>In the following example we are adding three middelwares in the following order: <code>DLQMiddleware</code>, <code>ElasticMiddleware</code>, and <code>S3Middleware</code>. The code chain execution will be:</p> <pre><code>sequenceDiagram\n    autonumber\n    ExceptionMiddleware-&gt;&gt;DLQMiddleware: \n    Note left of ExceptionMiddleware: Event received\n    alt No Processing Error\n    DLQMiddleware-&gt;&gt;ElasticMiddleware: \n    Note right of ElasticMiddleware: Store CR on Elastic\n    ElasticMiddleware-&gt;&gt;S3Middleware: \n    Note right of S3Middleware: Store CR on S3\n    S3Middleware-&gt;&gt;Stream: \n    Note right of Stream: CR processed\n    Stream--&gt;&gt;S3Middleware: \n    S3Middleware--&gt;&gt;ElasticMiddleware: \n    ElasticMiddleware--&gt;&gt;DLQMiddleware: \n    DLQMiddleware--&gt;&gt;ExceptionMiddleware: \n    end</code></pre> Multiple middlewares example<pre><code>from kstreams import ConsumerRecord, Stream, middleware\n\n\nclass DLQMiddleware(middleware.BaseMiddleware):\n    async def __call__(self, cr: ConsumerRecord):\n        try:\n            return await self.next_call(cr)\n        except ValueError:\n            await dlq(cr.value)\n\n\nclass ElasticMiddleware(middleware.BaseMiddleware):\n    async def __call__(self, cr: ConsumerRecord):\n        await save_to_elastic(cr.value)\n        return await self.next_call(cr)\n\n\nclass S3Middleware(middleware.BaseMiddleware):\n    async def __call__(self, cr: ConsumerRecord):\n        await backup_to_s3(cr.value)\n        return await self.next_call(cr)\n\n\nmiddlewares = [\n    middleware.Middleware(DLQMiddleware),\n    middleware.Middleware(ElasticMiddleware),\n    middleware.Middleware(S3Middleware),\n]\n\n\n@stream_engine.stream(\"kstreams-topic\", middlewares=middlewares)\nasync def processor(cr: ConsumerRecord):\n    if cr.value == event_2:\n        raise ValueError(\"Error from stream...\")\n    await save_to_db(cr.value)\n</code></pre> <p>Note</p> <p>In the example we can see that always the <code>cr</code> will be save into <code>elastic</code> and <code>s3</code> regardless an error</p>"},{"location":"middleware/#executing-code-after-the-cr-was-processed","title":"Executing Code after the CR was processed","text":"<p>As mentioned in the introduction, it is possible to execute code after the <code>CR</code> is handled. To do this, we need to place code after <code>next_call</code> is called:</p> Execute code after CR is handled<pre><code>from kstreams import ConsumerRecord, Stream, middleware\n\n\nclass DLQMiddleware(middleware.BaseMiddleware):\n    async def __call__(self, cr: ConsumerRecord):\n        try:\n            return await self.next_call(cr)\n        except ValueError:\n            await dlq(cr.value)\n\n\nclass ElasticMiddleware(middleware.BaseMiddleware):\n    async def __call__(self, cr: ConsumerRecord):\n        return await self.next_call(cr)\n        # This will be called after the whole chain has finished\n        await save_to_elastic(cr.value)\n\n\nmiddlewares = [\n    middleware.Middleware(DLQMiddleware),\n    middleware.Middleware(ElasticMiddleware),\n]\n\n\n@stream_engine.stream(\"kstreams-topic\", middlewares=middlewares)\nasync def processor(cr: ConsumerRecord):\n    if cr.value == event_2:\n        raise ValueError(\"Error from stream...\")\n    await save_to_db(cr.value)\n</code></pre> <p>Note</p> <p>In the example we can see that only if there is not an <code>error</code> the event is saved to <code>elastic</code></p>"},{"location":"middleware/#deserialization","title":"Deserialization","text":"<p>To <code>deserialize</code> bytes into a different structure like <code>dict</code> middlewares are the preferred way to it. Examples:</p> Source code in <code>examples/dataclasses-avroschema-example/dataclasses_avroschema_example/middlewares.py</code> <pre><code>class AvroDeserializerMiddleware(middleware.BaseMiddleware):\n    def __init__(self, *, model: AvroModel, **kwargs) -&gt; None:\n        super().__init__(**kwargs)\n        self.model = model\n\n    async def __call__(self, cr: ConsumerRecord):\n        \"\"\"\n        Deserialize a payload to an AvroModel\n        \"\"\"\n        if cr.value is not None:\n            data = self.model.deserialize(cr.value)\n            cr.value = data\n        return await self.next_call(cr)\n</code></pre> Source code in <code>examples/confluent-example/confluent_example/middlewares.py</code> <pre><code>class ConfluentMiddlewareDeserializer(\n    middleware.BaseMiddleware, AsyncAvroMessageSerializer\n):\n    def __init__(\n        self,\n        *,\n        schema_registry_client: AsyncSchemaRegistryClient,\n        reader_schema: Optional[schema.AvroSchema] = None,\n        return_record_name: bool = False,\n        **kwargs,\n    ):\n        super().__init__(**kwargs)\n        self.schemaregistry_client = schema_registry_client\n        self.reader_schema = reader_schema\n        self.return_record_name = return_record_name\n        self.id_to_decoder_func: Dict = {}\n        self.id_to_writers: Dict = {}\n\n    async def __call__(self, cr: ConsumerRecord):\n        \"\"\"\n        Deserialize the event to a dict\n        \"\"\"\n        data = await self.decode_message(cr.value)\n        cr.value = data\n        return await self.next_call(cr)\n</code></pre>"},{"location":"monitoring/","title":"Monitoring","text":"<p>This page discusses how to monitor your application using the Kafka metrics that are accessible in Prometheus.</p> <p>Before we begin, it's crucial to note that Kafka itself makes a number of useful metrics available, including the cluster, broker, and clients (producer and consumers).</p> <p>This means that we can quickly add some graphs to our dashboards by utilizing the already-exposed metrics.</p> <p><code>Kstreams</code> includes a collection of metrics. See Metrics Docs for more information.</p>"},{"location":"monitoring/#kstreams.PrometheusMonitor","title":"<code>kstreams.PrometheusMonitor</code>","text":"<p>Metrics monitor to keep track of Producers and Consumers.</p> <p>Attributes:     metrics_scrape_time float: Amount of seconds that the monitor         will wait until next scrape iteration</p> Source code in <code>kstreams/prometheus/monitor.py</code> <pre><code>class PrometheusMonitor:\n    \"\"\"\n    Metrics monitor to keep track of Producers and Consumers.\n\n     Attributes:\n        metrics_scrape_time float: Amount of seconds that the monitor\n            will wait until next scrape iteration\n    \"\"\"\n\n    # Producer metrics\n    MET_OFFSETS = Gauge(\n        \"topic_partition_offsets\", \"help producer offsets\", [\"topic\", \"partition\"]\n    )\n\n    # Consumer metrics\n    MET_COMMITTED = Gauge(\n        \"consumer_committed\",\n        \"help consumer committed\",\n        [\"topic\", \"partition\", \"consumer_group\"],\n    )\n    MET_POSITION = Gauge(\n        \"consumer_position\",\n        \"help consumer position\",\n        [\"topic\", \"partition\", \"consumer_group\"],\n    )\n    MET_HIGHWATER = Gauge(\n        \"consumer_highwater\",\n        \"help consumer highwater\",\n        [\"topic\", \"partition\", \"consumer_group\"],\n    )\n    MET_LAG = Gauge(\n        \"consumer_lag\",\n        \"help consumer lag calculated using the last commited offset\",\n        [\"topic\", \"partition\", \"consumer_group\"],\n    )\n    MET_POSITION_LAG = Gauge(\n        \"position_lag\",\n        \"help consumer position lag calculated using the consumer position\",\n        [\"topic\", \"partition\", \"consumer_group\"],\n    )\n\n    def __init__(self, metrics_scrape_time: float = 3):\n        self.metrics_scrape_time = metrics_scrape_time\n        self.running = False\n        self._producer = None\n        self._streams: List[Stream] = []\n\n    async def start(self) -&gt; None:\n        self.running = True\n        logger.info(\"Starting Prometheus Monitoring started...\")\n        await self._metrics_task()\n\n    async def stop(self) -&gt; None:\n        self.running = False\n        self._clean_consumer_metrics()\n        logger.info(\"Prometheus Monitoring stopped...\")\n\n    def add_topic_partition_offset(\n        self, topic: str, partition: int, offset: int\n    ) -&gt; None:\n        self.MET_OFFSETS.labels(topic=topic, partition=partition).set(offset)\n\n    def _add_consumer_metrics(self, metrics_dict: MetricsType):\n        for topic_partition, partitions_metadata in metrics_dict.items():\n            group_id = partitions_metadata[\"group_id\"]\n            position = partitions_metadata[\"position\"]\n            committed = partitions_metadata[\"committed\"]\n            highwater = partitions_metadata[\"highwater\"]\n            lag = partitions_metadata[\"lag\"]\n            position_lag = partitions_metadata[\"position_lag\"]\n\n            self.MET_COMMITTED.labels(\n                topic=topic_partition.topic,\n                partition=topic_partition.partition,\n                consumer_group=group_id,\n            ).set(committed or 0)\n            self.MET_POSITION.labels(\n                topic=topic_partition.topic,\n                partition=topic_partition.partition,\n                consumer_group=group_id,\n            ).set(position or -1)\n            self.MET_HIGHWATER.labels(\n                topic=topic_partition.topic,\n                partition=topic_partition.partition,\n                consumer_group=group_id,\n            ).set(highwater or 0)\n            self.MET_LAG.labels(\n                topic=topic_partition.topic,\n                partition=topic_partition.partition,\n                consumer_group=group_id,\n            ).set(lag or 0)\n            self.MET_POSITION_LAG.labels(\n                topic=topic_partition.topic,\n                partition=topic_partition.partition,\n                consumer_group=group_id,\n            ).set(position_lag or 0)\n\n    def _clean_consumer_metrics(self) -&gt; None:\n        \"\"\"\n        This method should be called when a rebalance takes place\n        to clean all consumers metrics. When the rebalance finishes\n        new metrics will be generated per consumer based on the\n        consumer assigments\n        \"\"\"\n        self.MET_LAG.clear()\n        self.MET_POSITION_LAG.clear()\n        self.MET_COMMITTED.clear()\n        self.MET_POSITION.clear()\n        self.MET_HIGHWATER.clear()\n\n    def clean_stream_consumer_metrics(self, consumer: Consumer) -&gt; None:\n        topic_partitions = consumer.assignment()\n        group_id = consumer._group_id\n        for topic_partition in topic_partitions:\n            topic = topic_partition.topic\n            partition = topic_partition.partition\n\n            metrics_found = False\n            for sample in list(self.MET_LAG.collect())[0].samples:\n                if {\n                    \"topic\": topic,\n                    \"partition\": str(partition),\n                    \"consumer_group\": group_id,\n                } == sample.labels:\n                    metrics_found = True\n\n            if metrics_found:\n                self.MET_LAG.remove(topic, partition, group_id)\n                self.MET_POSITION_LAG.remove(topic, partition, group_id)\n                self.MET_COMMITTED.remove(topic, partition, group_id)\n                self.MET_POSITION.remove(topic, partition, group_id)\n                self.MET_HIGHWATER.remove(topic, partition, group_id)\n            else:\n                logger.debug(\n                    \"Metrics for consumer with group-id: \"\n                    f\"{consumer._group_id} not found\"\n                )\n\n    def add_producer(self, producer):\n        self._producer = producer\n\n    def add_streams(self, streams):\n        self._streams = streams\n\n    async def generate_consumer_metrics(self, consumer: Consumer):\n        \"\"\"\n        Generate Consumer Metrics for Prometheus\n\n        Format:\n            {\n                \"topic-1\": {\n                    \"1\": (\n                        [topic-1, partition-number, 'group-id-1'],\n                        committed, position, highwater, lag, position_lag\n                    )\n                    \"2\": (\n                        [topic-1, partition-number, 'group-id-1'],\n                        committed, position, highwater, lag, position_lag\n                    )\n                },\n                ...\n                \"topic-n\": {\n                    \"1\": (\n                        [topic-n, partition-number, 'group-id-n'],\n                        committed, position, highwater, lag, position_lag\n                    )\n                    \"2\": (\n                        [topic-n, partition-number, 'group-id-n'],\n                        committed, position, highwater, lag, position_lag\n                    )\n                }\n            }\n        \"\"\"\n        metrics: MetricsType = DefaultDict(dict)\n\n        topic_partitions = consumer.assignment()\n\n        for topic_partition in topic_partitions:\n            committed = await consumer.committed(topic_partition) or 0\n            position = await consumer.position(topic_partition)\n            highwater = consumer.highwater(topic_partition)\n\n            lag = position_lag = None\n            if highwater:\n                lag = highwater - committed\n                position_lag = highwater - position\n\n            metrics[topic_partition] = {\n                \"group_id\": consumer._group_id,\n                \"committed\": committed,\n                \"position\": position,\n                \"highwater\": highwater,\n                \"lag\": lag,\n                \"position_lag\": position_lag,\n            }\n\n        self._add_consumer_metrics(metrics)\n\n    async def _metrics_task(self) -&gt; None:\n        \"\"\"\n        Task that runs in `backgroud` to generate\n        consumer metrics.\n\n        When self.running is False the task will finish and it\n        will be safe to stop consumers and producers.\n        \"\"\"\n        while self.running:\n            await asyncio.sleep(self.metrics_scrape_time)\n            for stream in self._streams:\n                if stream.consumer is not None:\n                    try:\n                        await self.generate_consumer_metrics(stream.consumer)\n                    except RuntimeError:\n                        logger.debug(\n                            f\"Metrics for stream {stream.name} can not be generated \"\n                            \"probably because it has been removed\"\n                        )\n</code></pre>"},{"location":"monitoring/#kstreams.PrometheusMonitor.generate_consumer_metrics","title":"<code>generate_consumer_metrics(consumer)</code>  <code>async</code>","text":"<p>Generate Consumer Metrics for Prometheus</p> Format <p>{     \"topic-1\": {         \"1\": (             [topic-1, partition-number, 'group-id-1'],             committed, position, highwater, lag, position_lag         )         \"2\": (             [topic-1, partition-number, 'group-id-1'],             committed, position, highwater, lag, position_lag         )     },     ...     \"topic-n\": {         \"1\": (             [topic-n, partition-number, 'group-id-n'],             committed, position, highwater, lag, position_lag         )         \"2\": (             [topic-n, partition-number, 'group-id-n'],             committed, position, highwater, lag, position_lag         )     } }</p> Source code in <code>kstreams/prometheus/monitor.py</code> <pre><code>async def generate_consumer_metrics(self, consumer: Consumer):\n    \"\"\"\n    Generate Consumer Metrics for Prometheus\n\n    Format:\n        {\n            \"topic-1\": {\n                \"1\": (\n                    [topic-1, partition-number, 'group-id-1'],\n                    committed, position, highwater, lag, position_lag\n                )\n                \"2\": (\n                    [topic-1, partition-number, 'group-id-1'],\n                    committed, position, highwater, lag, position_lag\n                )\n            },\n            ...\n            \"topic-n\": {\n                \"1\": (\n                    [topic-n, partition-number, 'group-id-n'],\n                    committed, position, highwater, lag, position_lag\n                )\n                \"2\": (\n                    [topic-n, partition-number, 'group-id-n'],\n                    committed, position, highwater, lag, position_lag\n                )\n            }\n        }\n    \"\"\"\n    metrics: MetricsType = DefaultDict(dict)\n\n    topic_partitions = consumer.assignment()\n\n    for topic_partition in topic_partitions:\n        committed = await consumer.committed(topic_partition) or 0\n        position = await consumer.position(topic_partition)\n        highwater = consumer.highwater(topic_partition)\n\n        lag = position_lag = None\n        if highwater:\n            lag = highwater - committed\n            position_lag = highwater - position\n\n        metrics[topic_partition] = {\n            \"group_id\": consumer._group_id,\n            \"committed\": committed,\n            \"position\": position,\n            \"highwater\": highwater,\n            \"lag\": lag,\n            \"position_lag\": position_lag,\n        }\n\n    self._add_consumer_metrics(metrics)\n</code></pre>"},{"location":"monitoring/#consumer-metrics","title":"Consumer Metrics","text":"<p>We advise including the <code>consumer_lag</code> in your application's grafana dashboard.</p> <p><code>consumer_lag</code> will show you how far your consumers are lagging behind the published events in the topic they are reading. For instance, if you have a single consumer and another team is producing millions of events, the consumer might not be able to handle them in time (where in time is defined by you, like: \"in an hour of receiving a message it should be consumed\").</p> <p>Based on the lag, you will have to develop your own alerts. An alert should be pushed to Slack if you experience more than a particular amount of lag.</p> <p>You will require your <code>consumer_group</code> name in order to design a basic dashboard using the <code>consumer_lag</code>.</p> <p>We could add a query in Grafana like this:</p> <pre><code>sum(kafka_consumer_group_ConsumerLagMetrics_Value{topic =~ \"YOUR_OWN_TOPIC_NAME\", groupId =~\"YOUR_CONSUMER_GROUP\", name=\"SumOffsetLag\"}) by (topic)\n</code></pre> <p>Remember to replace <code>YOUR_CONSUMER_GROUP</code> and <code>YOUR_OWN_TOPIC_NAME</code> with your <code>consumer_group</code> and <code>topic</code> respectively \u2b06\ufe0f</p>"},{"location":"monitoring/#producer-metrics","title":"Producer Metrics","text":"<p>If you have producers, it's a good idea to monitor the growth of Log End Offset (LEO).</p> <p>The increase in LEO indicates the number of events produced in the last <code>N</code> minutes.</p> <p>If you know that events should occur every <code>N</code> minutes, you can trigger alerts if no events occur because this metric will tell you whether or not events occurred.</p> <p>We could add a query in Grafana like this, where <code>N</code> is <code>10m</code>:</p> <pre><code>sum(max(increase(kafka_log_Log_Value{name=\"LogEndOffset\", topic =~ \"TOPIC_NAME\"}[10m])) by (partition, topic)) by (topic)\n</code></pre> <p>Remember to modify <code>TOPIC_NAME</code> to the name of the topic you want to track \u2b06\ufe0f</p>"},{"location":"monitoring/#custom-business-metrics","title":"Custom Business Metrics","text":"<p>One benefit of Prometheus is that you can design your own custom metrics.</p> <p>Scenario: Consider an event-based ordering system. Assume you receive X orders daily and ship Y orders daily. Most likely, you will create a dashboard using this data.</p> <p>Fortunately, we can create our own custom metrics by using the Prometheus Python client.</p> <p>You can construct a variety of metrics with prometheus:</p> <ul> <li><code>Gauge</code></li> <li><code>Counter</code></li> <li><code>Histogram</code></li> <li><code>Summary</code></li> </ul> <p>You can read more about it in prometheus metric_types website.</p> <p>In our scenario, we will most likely want a <code>Counter</code> for orders received and a <code>Counter</code> for orders shipped.</p> <pre><code>from prometheus_client import Counter\nfrom kstreams import PrometheusMonitor\n\nclass MyAppPrometheusMonitor(PrometheusMonitor):\n    def __init__(self):\n        super().__init__() # initialize kstream metrics\n        self.orders_received = Counter('orders_received', 'Amount of orders received')\n        self.orders_shipped = Counter('orders_shipped', 'Amount of orders shipped')\n\n    def increase_received(self, amount: int = 1):\n        self.orders_received.inc(amount)\n\n    def increase_shipped(self, amount: int = 1):\n        self.orders_shipped.inc(amount)\n</code></pre> <p>In our kstreams app, we can:</p> <pre><code>stream_engine = create_engine(title=\"my-engine\", monitor=MyAppPrometheusMonitor())\n\n@stream_engine.stream(\"my-special-orders\")\nasync def consume_orders_received(cr: ConsumerRecord):\n    if cr.value.status == \"NEW\":\n        stream_engine.monitor.increase_received()\n    elif cr.value.status == \"SHIPPED\":\n        stream_engine.monitor.increase_shipped()\n</code></pre> <p>Your app's prometheus would display this data, which you might utilize to build a stylish \u2728dashboard\u2728 interface.</p> <p>For further details, see the Prometheus python client documentation.</p>"},{"location":"serialization/","title":"Serialization","text":"<p>Kafka's job is to move bytes from producer to consumers, through a topic.</p> <p>By default, this is what kstream does.</p> <pre><code>from kstreams import Stream\n\nfrom .streams_roster import stream_roster\n\nmy_stream = Stream(\n    \"local--hello-world\",\n    func=stream_roster,\n    config={\n        \"group_id\": \"example-group\",\n    },\n)\n</code></pre> <p>As you can see the ConsumerRecord's <code>value</code> is bytes.</p> <p>In order to keep your code pythonic, we provide a mechanism to serialize/deserialize these bytes, into something more useful. This way, you can work with other data structures, like a <code>dict</code> or <code>dataclasses</code>.</p> <p>Sometimes it is easier to work with a <code>dict</code> in your app, give it to <code>kstreams</code>, and let it transform it into <code>bytes</code> to be delivered to Kafka. For this situation, you need to implement <code>kstreams.serializers.Serializer</code>.</p> <p>The other situation is when you consume from Kafka (or other brokers). Instead of dealing with <code>bytes</code>, you may want to receive in your function the <code>dict</code> ready to be used. For those cases, we need to use middleware. For example, we can implement a <code>JsonMiddleware</code>:</p> <pre><code>from kstreams import middleware, ConsumerRecord\n\n\nclass JsonDeserializerMiddleware(middleware.BaseMiddleware):\n    async def __call__(self, cr: ConsumerRecord):\n        if cr.value is not None:\n            data = json.loads(cr.value.decode())\n            cr.value = data\n        return await self.next_call(cr)\n</code></pre> <p>It is also possble to use <code>kstreams.serializers.Deserializer</code> for deserialization, but this will be deprecated</p> <p>Warning</p> <p><code>kstreams.serializers.Deserializer</code> will be deprecated, use middlewares instead</p>"},{"location":"serialization/#kstreams.serializers.Serializer","title":"<code>kstreams.serializers.Serializer</code>","text":"<p>Protocol used by the Stream to serialize.</p> <p>A Protocol is similar to other languages features like an interface or a trait.</p> <p>End users should provide their own class implementing this protocol.</p> <p>For example a <code>JsonSerializer</code></p> <pre><code>from typing import Optional, Dict\nimport json\n\nclass JsonSerializer:\n\n    async def serialize(\n        self,\n        payload: dict,\n        headers: Optional[Dict[str, str]] = None,\n        serializer_kwargs: Optional[Dict] = None,\n    ) -&gt; bytes:\n        \"\"\"Return UTF-8 encoded payload\"\"\"\n        value = json.dumps(payload)\n        return value.encode()\n</code></pre> <p>Notice that you don't need to inherit anything, you just have to comply with the Protocol.</p> Source code in <code>kstreams/serializers.py</code> <pre><code>class Serializer(Protocol):\n    \"\"\"Protocol used by the Stream to serialize.\n\n    A Protocol is similar to other languages features like an interface or a trait.\n\n    End users should provide their own class implementing this protocol.\n\n    For example a `JsonSerializer`\n\n    ```python\n    from typing import Optional, Dict\n    import json\n\n    class JsonSerializer:\n\n        async def serialize(\n            self,\n            payload: dict,\n            headers: Optional[Dict[str, str]] = None,\n            serializer_kwargs: Optional[Dict] = None,\n        ) -&gt; bytes:\n            \\\"\"\"Return UTF-8 encoded payload\\\"\"\"\n            value = json.dumps(payload)\n            return value.encode()\n    ```\n\n    Notice that you don't need to inherit anything,\n    you just have to comply with the Protocol.\n    \"\"\"\n\n    async def serialize(\n        self,\n        payload: Any,\n        headers: Optional[Headers] = None,\n        serializer_kwargs: Optional[Dict] = None,\n    ) -&gt; bytes:\n        \"\"\"\n        Implement this method to deserialize the data received from the topic.\n        \"\"\"\n        ...\n</code></pre>"},{"location":"serialization/#kstreams.serializers.Serializer.serialize","title":"<code>serialize(payload, headers=None, serializer_kwargs=None)</code>  <code>async</code>","text":"<p>Implement this method to deserialize the data received from the topic.</p> Source code in <code>kstreams/serializers.py</code> <pre><code>async def serialize(\n    self,\n    payload: Any,\n    headers: Optional[Headers] = None,\n    serializer_kwargs: Optional[Dict] = None,\n) -&gt; bytes:\n    \"\"\"\n    Implement this method to deserialize the data received from the topic.\n    \"\"\"\n    ...\n</code></pre>"},{"location":"serialization/#kstreams.serializers.Deserializer","title":"<code>kstreams.serializers.Deserializer</code>","text":"<p>Protocol used by the Stream to deserialize.</p> <p>A Protocol is similar to other languages features like an interface or a trait.</p> <p>End users should provide their own class implementing this protocol.</p> <p>For example a <code>JsonDeserializer</code></p> <pre><code>import json\nfrom kstreams import ConsumerRecord\n\nclass JsonDeserializer:\n\n    async def deserialize(\n        self, consumer_record: ConsumerRecord, **kwargs\n    ) -&gt; ConsumerRecord:\n        data = json.loads(consumer_record.value.decode())\n        consumer_record.value = data\n        return consumer_record\n</code></pre> Source code in <code>kstreams/serializers.py</code> <pre><code>class Deserializer(Protocol):\n    \"\"\"Protocol used by the Stream to deserialize.\n\n    A Protocol is similar to other languages features like an interface or a trait.\n\n    End users should provide their own class implementing this protocol.\n\n    For example a `JsonDeserializer`\n\n    ```python\n    import json\n    from kstreams import ConsumerRecord\n\n    class JsonDeserializer:\n\n        async def deserialize(\n            self, consumer_record: ConsumerRecord, **kwargs\n        ) -&gt; ConsumerRecord:\n            data = json.loads(consumer_record.value.decode())\n            consumer_record.value = data\n            return consumer_record\n    ```\n    \"\"\"\n\n    async def deserialize(\n        self, consumer_record: ConsumerRecord, **kwargs\n    ) -&gt; ConsumerRecord:\n        \"\"\"\n        Implement this method to deserialize the data received from the topic.\n        \"\"\"\n        ...\n</code></pre>"},{"location":"serialization/#kstreams.serializers.Deserializer.deserialize","title":"<code>deserialize(consumer_record, **kwargs)</code>  <code>async</code>","text":"<p>Implement this method to deserialize the data received from the topic.</p> Source code in <code>kstreams/serializers.py</code> <pre><code>async def deserialize(\n    self, consumer_record: ConsumerRecord, **kwargs\n) -&gt; ConsumerRecord:\n    \"\"\"\n    Implement this method to deserialize the data received from the topic.\n    \"\"\"\n    ...\n</code></pre>"},{"location":"serialization/#usage","title":"Usage","text":"<p>Once you have written your serializer or deserializer, there are 2 ways of using them, in a generic fashion or per stream.</p>"},{"location":"serialization/#initialize-the-engine-with-your-serializers","title":"Initialize the engine with your serializers","text":"<p>By doing this all the streams will use these serializers by default.</p> <pre><code>stream_engine = create_engine(\n    title=\"my-stream-engine\",\n    serializer=JsonSerializer(),\n)\n</code></pre>"},{"location":"serialization/#initilize-streams-with-a-deserializer-and-produce-events-with-serializers","title":"Initilize <code>streams</code> with a <code>deserializer</code> and produce events with <code>serializers</code>","text":"<pre><code>from kstreams import middleware, ConsumerRecord\n\n\n@stream_engine.stream(topic, middlewares=[middleware.Middleware(JsonDeserializerMiddleware)])\nasync def hello_stream(cr: ConsumerRecord):\n    # remember event.value is now a dict\n    print(cr.value[\"message\"])\n    save_to_db(cr)\n</code></pre> <pre><code>await stream_engine.send(\n    topic,\n    value={\"message\": \"test\"}\n    headers={\"content-type\": consts.APPLICATION_JSON,}\n    key=\"1\",\n)\n</code></pre>"},{"location":"stream/","title":"Streams","text":"<p>A <code>Stream</code> in <code>kstreams</code> is an extension of AIOKafkaConsumer</p> <p>Consuming can be done using <code>kstreams.Stream</code>. You only need to decorate a <code>coroutine</code> with <code>@stream_engine.streams</code>. The decorator has the same  aiokafka consumer API at initialization, in other words they accept the same <code>args</code> and <code>kwargs</code> that the <code>aiokafka consumer</code> accepts.</p>"},{"location":"stream/#kstreams.streams.Stream","title":"<code>kstreams.streams.Stream</code>","text":"<p>Attributes:</p> Name Type Description <code>name</code> <code>Optional[str]</code> <p>Stream name. Default is a generated uuid4</p> <code>topics</code> <code>List[str]</code> <p>List of topics to consume</p> <code>subscribe_by_pattern</code> <code>bool</code> <p>Whether subscribe to topics by pattern</p> <code>backend</code> <code>Kafka</code> <p>backend kstreams.backends.kafka.Kafka: Backend to connect. Default <code>Kafka</code></p> <code>func</code> <code>Callable[[Stream], Awaitable[Any]]</code> <p>Coroutine fucntion or generator to be called when an event arrives</p> <code>config</code> <code>Dict[str, Any]</code> <p>Stream configuration. Here all the properties can be passed in the dictionary</p> <code>deserializer</code> <code>Deserializer</code> <p>Deserializer to be used when an event is consumed</p> <code>initial_offsets</code> <code>List[TopicPartitionOffset]</code> <p>List of TopicPartitionOffset that will <code>seek</code> the initial offsets to</p> <code>rebalance_listener</code> <code>RebalanceListener</code> <p>Listener callbacks when partition are assigned or revoked</p>"},{"location":"stream/#kstreams.streams.Stream--subscribe-to-a-topic","title":"Subscribe to a topic","text":"<p>Example</p> <pre><code>import aiorun\nfrom kstreams import create_engine, ConsumerRecord\n\nstream_engine = create_engine(title=\"my-stream-engine\")\n\n\n@stream_engine.stream(\"local--kstreams\", group_id=\"my-group-id\")\nasync def stream(cr: ConsumerRecord) -&gt; None:\n    print(f\"Event consumed: headers: {cr.headers}, payload: {cr.value}\")\n\n\nasync def start():\n    await stream_engine.start()\n\n\nasync def shutdown(loop):\n    await stream_engine.stop()\n\n\nif __name__ == \"__main__\":\n    aiorun.run(\n        start(),\n        stop_on_unhandled_errors=True,\n        shutdown_callback=shutdown\n    )\n</code></pre>"},{"location":"stream/#kstreams.streams.Stream--subscribe-to-multiple-topics","title":"Subscribe to multiple topics","text":"<p>Consuming from multiple topics using one <code>stream</code> is possible. A <code>List[str]</code> of topics must be provided.</p> <p>Example</p> <pre><code>import aiorun\nfrom kstreams import create_engine, ConsumerRecord\n\nstream_engine = create_engine(title=\"my-stream-engine\")\n\n\n@stream_engine.stream(\n    [\"local--kstreams\", \"local--hello-world\"],\n    group_id=\"my-group-id\",\n)\nasync def consume(cr: ConsumerRecord) -&gt; None:\n    print(f\"Event from {cr.topic}: headers: {cr.headers}, payload: {cr.value}\")\n</code></pre>"},{"location":"stream/#kstreams.streams.Stream--subscribe-to-topics-by-pattern","title":"Subscribe to topics by pattern","text":"<p>In the following example the stream will subscribe to any topic that matches the regex <code>^dev--customer-.*</code>, for example <code>dev--customer-invoice</code> or <code>dev--customer-profile</code>. The <code>subscribe_by_pattern</code> flag must be set to <code>True</code>.</p> <p>Example</p> <pre><code>import aiorun\nfrom kstreams import create_engine, ConsumerRecord\n\nstream_engine = create_engine(title=\"my-stream-engine\")\n\n\n@stream_engine.stream(\n    topics=\"^dev--customer-.*$\",\n    subscribe_by_pattern=True,\n    group_id=\"my-group-id\",\n)\nasync def stream(cr: ConsumerRecord) -&gt; None:\n    if cr.topic == \"dev--customer-invoice\":\n        print(\"Event from topic dev--customer-invoice\"\n    elif cr.topic == \"dev--customer-profile\":\n        print(\"Event from topic dev--customer-profile\"\n    else:\n        raise ValueError(f\"Invalid topic {cr.topic}\")\n\n\nasync def start():\n    await stream_engine.start()\n\nasync def shutdown(loop):\n    await stream_engine.stop()\n\n\nif __name__ == \"__main__\":\n    aiorun.run(\n        start(),\n        stop_on_unhandled_errors=True,\n        shutdown_callback=shutdown\n    )\n</code></pre>"},{"location":"stream/#dependency-injection","title":"Dependency Injection","text":"<p>The old way to itereate over a stream is with the <code>async for _ in stream</code> loop. The iterable approach works but in most cases end users are interested only in the <code>ConsumerRecord</code>, for this reason it is possible to remove the <code>async for loop</code> using proper <code>typing hints</code>. The available <code>typing hints</code> are:</p> <ul> <li><code>ConsumerRecord</code>: The <code>aiokafka</code> ConsumerRecord that will be received every time that a new event is in the <code>Stream</code></li> <li><code>Stream</code>: The <code>Stream</code> object that is subscribed to the topic/s. Useful when <code>manual</code> commit is enabled or when other <code>Stream</code> operations are needed</li> <li><code>Send</code>: Coroutine to produce events. The same as <code>stream_engine.send(...)</code></li> </ul> <p>if you use <code>type hints</code> then every time that a new event is in the stream the <code>coroutine</code> function defined by the end user will ba <code>awaited</code> with the specified types</p> ConsumerRecordConsumerRecord and StreamConsumerRecord, Stream and SendOld fashion <pre><code>@stream_engine.stream(topic)\nasync def my_stream(cr: ConsumerRecord):\n    print(cr.value)\n</code></pre> <pre><code>@stream_engine.stream(topic, enable_auto_commit=False)\nasync def my_stream(cr: ConsumerRecord, stream: Stream):\n    print(cr.value)\n    await stream.commit()\n</code></pre> <pre><code>@stream_engine.stream(topic, enable_auto_commit=False)\nasync def my_stream(cr: ConsumerRecord, stream: Stream, send: Send):\n    print(cr.value)\n    await stream.commit()\n    await send(\"sink-to-elastic-topic\", value=cr.value)\n</code></pre> <pre><code>@stream_engine.stream(topic)\nasync def consume(stream):  # you can specify the type but it will be the same result\n    async for cr in stream:\n        print(cr.value)\n        # you can do something with the stream as well!!\n</code></pre> <p>Note</p> <p>The type arguments can be in <code>any</code> order. This might change in the future.</p> <p>Warning</p> <p>It is still possible to use the <code>async for in</code> loop, but it might be removed in the future. Migrate to the typing approach</p>"},{"location":"stream/#creating-a-stream-instance","title":"Creating a Stream instance","text":"<p>If for any reason you need to create <code>Streams</code> instances directly, you can do it without using the decorator <code>stream_engine.stream</code>.</p> Stream instance<pre><code>import aiorun\nfrom kstreams import create_engine, Stream, ConsumerRecord\n\nstream_engine = create_engine(title=\"my-stream-engine\")\n\n\nclass MyDeserializer:\n\n    async def deserialize(self, consumer_record: ConsumerRecord, **kwargs):\n        return consumer_record.value.decode()\n\n\nasync def stream(cr: ConsumerRecord) -&gt; None:\n    print(f\"Event consumed: headers: {cr.headers}, payload: {cr.value}\")\n\n\nstream = Stream(\n    \"local--kstreams\",\n    name=\"my-stream\"\n    func=stream,  # coroutine or async generator\n    deserializer=MyDeserializer(),\n)\n# add the stream to the engine\nstream_engine.add_stream(stream)\n\n\nasync def start():\n    await stream_engine.start()\n    await produce()\n\n\nasync def shutdown(loop):\n    await stream_engine.stop()\n\n\nif __name__ == \"__main__\":\n    aiorun.run(start(), stop_on_unhandled_errors=True, shutdown_callback=shutdown)\n</code></pre>"},{"location":"stream/#removing-a-stream-from-the-engine","title":"Removing a stream from the engine","text":"Removing stream<pre><code>stream_engine.remove_stream(stream)\n</code></pre>"},{"location":"stream/#starting-the-stream-with-initial-offsets","title":"Starting the stream with initial offsets","text":"<p>If you want to start your consumption from certain offsets, you can include that in your stream instantiation.</p> <p>Use case: This feature is useful if one wants to manage their own offsets, rather than committing consumed offsets to Kafka. When an application manages its own offsets and tries to start a stream, we start the stream using the initial offsets as defined in the database.</p> <p>If you try to seek on a partition or topic that is not assigned to your stream, the code will ignore the seek and print out a warning. For example, if you have two consumers that are consuming from different partitions, and you try to seek for all of the partitions on each consumer, each consumer will seek for the partitions it has been assigned, and it will print out a warning log for the ones it was not assigned.</p> <p>If you try to seek on offsets that are not yet present on your partition, the consumer will revert to the auto_offset_reset config. There will not be a warning, so be aware of this.</p> <p>Also be aware that when your application restarts, it most likely will trigger the initial_offsets again. This means that setting intial_offsets to be a hardcoded number might not get the results you expect.</p> Initial Offsets from Database<pre><code>from kstreams import Stream, structs\n\n\ntopic_name = \"local--kstreams\"\ndb_table = ExampleDatabase()\ninitial_offset = structs.TopicPartitionOffset(topic=topic_name, partition=0, offset=db_table.offset)\n\n\nasync def my_stream(stream: Stream):\n    ...\n\n\nstream = Stream(\n    topic_name,\n    name=\"my-stream\",\n    func=my_stream,  # coroutine or async generator\n    deserializer=MyDeserializer(),\n    initial_offsets=[initial_offset],\n)\n</code></pre>"},{"location":"stream/#stream-crashing","title":"Stream crashing","text":"<p>If your stream <code>crashes</code> for any reason the event consumption is stopped, meaning that non event will be consumed from the <code>topic</code>. However, it is possible to set three different <code>error policies</code> per stream:</p> <ul> <li><code>StreamErrorPolicy.STOP</code> (default): Stop the <code>Stream</code> when an exception occurs. The exception is raised after the stream is properly stopped.</li> <li><code>StreamErrorPolicy.RESTART</code>: Stop and restart the <code>Stream</code> when an exception occurs. The event that caused the exception is skipped. The exception is NOT raised because the application should contine working, however <code>logger.exception()</code> is used to alert the user.</li> <li><code>StreamErrorPolicy.STOP_ENGINE</code>: Stop the <code>StreamEngine</code> when an exception occurs. The exception is raised after ALL the Streams were properly stopped.</li> <li><code>StreamErrorPolicy.STOP_APPLICATION</code>: Stop the <code>StreamEngine</code> when an exception occurs and raises <code>signal.SIGTERM</code>. Useful when using <code>kstreams</code> with other libraries such us <code>FastAPI</code>.</li> </ul> <p>In the following example, the <code>StreamErrorPolicy.RESTART</code> error policy is specifed. If the <code>Stream</code> crashed with the <code>ValueError</code> exception it is restarted:</p> <pre><code>from kstreams import create_engine, ConsumerRecord\nfrom kstreams.stream_utils import StreamErrorPolicy\n\nstream_engine = create_engine(title=\"my-stream-engine\")\n\n\n@stream_engine.stream(\n    \"local--hello-world\",\n    group_id=\"example-group\",\n    error_policy=StreamErrorPolicy.RESTART\n)\nasync def stream(cr: ConsumerRecord) -&gt; None:\n    if cr.key == b\"error\":\n        # Stream will be restarted after the ValueError is raised\n        raise ValueError(\"error....\")\n\n    print(f\"Event consumed. Payload {cr.value}\")\n</code></pre> <p>We can see the logs:</p> <pre><code>ValueError: error....\nINFO:aiokafka.consumer.group_coordinator:LeaveGroup request succeeded\nINFO:aiokafka.consumer.consumer:Unsubscribed all topics or patterns and assigned partitions\nINFO:kstreams.streams:Stream consuming from topics ['local--hello-world'] has stopped!!! \n\n\nINFO:kstreams.middleware.middleware:Restarting stream &lt;kstreams.streams.Stream object at 0x102d44050&gt;\nINFO:aiokafka.consumer.subscription_state:Updating subscribed topics to: frozenset({'local--hello-world'})\n...\nINFO:aiokafka.consumer.group_coordinator:Setting newly assigned partitions {TopicPartition(topic='local--hello-world', partition=0)} for group example-group\n</code></pre> <p>Note</p> <p>If you are using <code>aiorun</code> with <code>stop_on_unhandled_errors=True</code> and the <code>error_policy</code> is <code>StreamErrorPolicy.RESTART</code> then the <code>application</code> will NOT stop as the exception that caused the <code>Stream</code> to <code>crash</code> is not <code>raised</code></p>"},{"location":"stream/#changing-consumer-behavior","title":"Changing consumer behavior","text":"<p>Most of the time you will only set the <code>topic</code> and the <code>group_id</code> to the <code>consumer</code>, but sometimes you might want more control over it, for example changing the <code>policy for resetting offsets on OffsetOutOfRange errors</code> or <code>session timeout</code>. To do this, you have to use the same <code>kwargs</code> as the aiokafka consumer API</p> <pre><code># The consumer sends periodic heartbeats every 500 ms\n# On OffsetOutOfRange errors, the offset will move to the oldest available message (\u2018earliest\u2019)\n\n@stream_engine.stream(\"local--kstream\", group_id=\"de-my-partition\", session_timeout_ms=500, auto_offset_reset\"earliest\")\nasync def stream(cr: ConsumerRecord):\n    print(f\"Event consumed: headers: {cr.headers}, payload: {cr.value}\")\n</code></pre>"},{"location":"stream/#manual-commit","title":"Manual commit","text":"<p>When processing more sensitive data and you want to be sure that the <code>kafka offeset</code> is commited once that you have done your tasks, you can use <code>enable_auto_commit=False</code> mode of Consumer.</p> Manual commit example<pre><code>@stream_engine.stream(\"local--kstream\", group_id=\"de-my-partition\", enable_auto_commit=False)\nasync def stream(cr: ConsumerRecord, stream: Stream):\n    print(f\"Event consumed: headers: {cr.headers}, payload: {cr.value}\")\n\n    # We need to make sure that the pyalod was stored before commiting the kafka offset\n    await store_in_database(payload)\n    await stream.commit()  # You need to commit!!!\n</code></pre> <p>Note</p> <p>This is a tradeoff from at most once to at least once delivery, to achieve exactly once you will need to save offsets in the destination database and validate those yourself.</p>"},{"location":"stream/#yield-from-stream","title":"Yield from stream","text":"<p>Sometimes is useful to <code>yield</code> values from a <code>stream</code> so you can consume events in your on phase or because you want to return results to the frontend (SSE example). If you use the <code>yield</code> keyword inside a <code>coroutine</code> it will be \"transform\" to a  <code>asynchronous generator function</code>, meaning that inside there is an <code>async generator</code> and it can be consumed.</p> <p>Consuming an <code>async generator</code> is simple, you just use the <code>async for in</code> clause. Because consuming events only happens with the <code>for loop</code>, you have to make sure that the <code>Stream</code> has been started properly and after leaving the <code>async for in</code> the <code>stream</code> has been properly stopped.</p> <p>To facilitate the process, we have <code>context manager</code> that makes sure of the <code>starting/stopping</code> process.</p> Yield example<pre><code># Create your stream\n@stream_engine.stream(\"local--kstream\")\nasync def stream(cr: ConsumerRecord, stream: Stream):\n    yield cr.value\n\n\n# Consume the stream:\nasync with stream as stream_flow:  # Use the context manager\n    async for value in stream_flow:\n        ...\n        # do something with value (cr.value)\n</code></pre> <p>Note</p> <p>If for some reason you interrupt the \"async for in\" in the async generator, the Stream will stopped consuming events meaning that the lag will increase.</p> <p>Note</p> <p>Yield from a stream only works with the typing approach</p>"},{"location":"stream/#get-many","title":"Get many","text":"<p>Get a batch of events from the assigned TopicPartition.</p> <p>Prefetched events are returned in batches by topic-partition. If messages is not available in the prefetched buffer this method waits <code>timeout_ms</code> milliseconds.</p> <p>Attributes:</p> Name Type Description <code>partitions</code> <code>List[TopicPartition] | None</code> <p>The partitions that need fetching message. If no one partition specified then all subscribed partitions will be used</p> <code>timeout_ms</code> <code>int | None</code> <p>milliseconds spent waiting if data is not available in the buffer. If 0, returns immediately with any records that are available currently in the buffer, else returns empty. Must not be negative.</p> <code>max_records</code> <code>int | None</code> <p>The amount of records to fetch. if <code>timeout_ms</code> was defined and reached and the fetched records has not reach <code>max_records</code> then returns immediately with any records that are available currently in the buffer</p> <p>Returns:</p> Type Description <code>Dict[TopicPartition, List[ConsumerRecord]]</code> <p>Topic to list of records</p> <p>Example</p> <pre><code>@stream_engine.stream(topic, ...)\nasync def stream(stream: Stream):\n    while True:\n        data = await stream.getmany(max_records=5)\n        print(data)\n</code></pre> Source code in <code>kstreams/streams.py</code> <pre><code>async def getmany(\n    self,\n    partitions: typing.Optional[typing.List[TopicPartition]] = None,\n    timeout_ms: int = 0,\n    max_records: typing.Optional[int] = None,\n) -&gt; typing.Dict[TopicPartition, typing.List[ConsumerRecord]]:\n    \"\"\"\n    Get a batch of events from the assigned TopicPartition.\n\n    Prefetched events are returned in batches by topic-partition.\n    If messages is not available in the prefetched buffer this method waits\n    `timeout_ms` milliseconds.\n\n    Attributes:\n        partitions List[TopicPartition] | None: The partitions that need\n            fetching message. If no one partition specified then all\n            subscribed partitions will be used\n        timeout_ms int | None: milliseconds spent waiting if\n            data is not available in the buffer. If 0, returns immediately\n            with any records that are available currently in the buffer,\n            else returns empty. Must not be negative.\n        max_records int | None: The amount of records to fetch.\n            if `timeout_ms` was defined and reached and the fetched records\n            has not reach `max_records` then returns immediately\n            with any records that are available currently in the buffer\n\n    Returns:\n        Topic to list of records\n\n    !!! Example\n        ```python\n        @stream_engine.stream(topic, ...)\n        async def stream(stream: Stream):\n            while True:\n                data = await stream.getmany(max_records=5)\n                print(data)\n        ```\n    \"\"\"\n    partitions = partitions or []\n    return await self.consumer.getmany(  # type: ignore\n        *partitions, timeout_ms=timeout_ms, max_records=max_records\n    )\n</code></pre> <p>Warning</p> <p>This approach does not works with <code>Dependency Injection</code>.</p>"},{"location":"stream/#rebalance-listener","title":"Rebalance Listener","text":"<p>For some cases you will need a <code>RebalanceListener</code> so when partitions are <code>assigned</code> or <code>revoked</code> to the stream different accions can be performed.</p>"},{"location":"stream/#use-cases","title":"Use cases","text":"<ul> <li>Cleanup or custom state save on the start of a rebalance operation</li> <li>Saving offsets in a custom store when a partition is <code>revoked</code></li> <li>Load a state or cache warmup on completion of a successful partition re-assignment.</li> </ul>"},{"location":"stream/#metrics-rebalance-listener","title":"Metrics Rebalance Listener","text":"<p>Kstreams use a default listener for all the streams to clean the metrics after a rebalance takes place</p>"},{"location":"stream/#kstreams.MetricsRebalanceListener","title":"<code>kstreams.MetricsRebalanceListener</code>","text":"Source code in <code>kstreams/rebalance_listener.py</code> <pre><code>class MetricsRebalanceListener(RebalanceListener):\n    async def on_partitions_revoked(self, revoked: typing.Set[TopicPartition]) -&gt; None:\n        \"\"\"\n        Coroutine to be called *before* a rebalance operation starts and\n        *after* the consumer stops fetching data.\n\n        This will method will clean up the `Prometheus` metrics\n\n        Attributes:\n            revoked Set[TopicPartitions]: Partitions that were assigned\n                to the consumer on the last rebalance\n        \"\"\"\n        # lock all asyncio Tasks so no new metrics will be added to the Monitor\n        if revoked and self.engine is not None:\n            async with asyncio.Lock():\n                if self.stream is not None and self.stream.consumer is not None:\n                    self.engine.monitor.clean_stream_consumer_metrics(\n                        self.stream.consumer\n                    )\n\n    async def on_partitions_assigned(\n        self, assigned: typing.Set[TopicPartition]\n    ) -&gt; None:\n        \"\"\"\n        Coroutine to be called *after* partition re-assignment completes\n        and *before* the consumer starts fetching data again.\n\n        This method will start the `Prometheus` metrics\n\n        Attributes:\n            assigned Set[TopicPartition]: Partitions assigned to the\n                consumer (may include partitions that were previously assigned)\n        \"\"\"\n        # lock all asyncio Tasks so no new metrics will be added to the Monitor\n        if assigned and self.engine is not None:\n            async with asyncio.Lock():\n                if self.stream is not None:\n                    self.stream.seek_to_initial_offsets()\n</code></pre>"},{"location":"stream/#kstreams.MetricsRebalanceListener.on_partitions_assigned","title":"<code>on_partitions_assigned(assigned)</code>  <code>async</code>","text":"<p>Coroutine to be called after partition re-assignment completes and before the consumer starts fetching data again.</p> <p>This method will start the <code>Prometheus</code> metrics</p> <p>Attributes:</p> Name Type Description <code>assigned</code> <code>Set[TopicPartition]</code> <p>Partitions assigned to the consumer (may include partitions that were previously assigned)</p> Source code in <code>kstreams/rebalance_listener.py</code> <pre><code>async def on_partitions_assigned(\n    self, assigned: typing.Set[TopicPartition]\n) -&gt; None:\n    \"\"\"\n    Coroutine to be called *after* partition re-assignment completes\n    and *before* the consumer starts fetching data again.\n\n    This method will start the `Prometheus` metrics\n\n    Attributes:\n        assigned Set[TopicPartition]: Partitions assigned to the\n            consumer (may include partitions that were previously assigned)\n    \"\"\"\n    # lock all asyncio Tasks so no new metrics will be added to the Monitor\n    if assigned and self.engine is not None:\n        async with asyncio.Lock():\n            if self.stream is not None:\n                self.stream.seek_to_initial_offsets()\n</code></pre>"},{"location":"stream/#kstreams.MetricsRebalanceListener.on_partitions_revoked","title":"<code>on_partitions_revoked(revoked)</code>  <code>async</code>","text":"<p>Coroutine to be called before a rebalance operation starts and after the consumer stops fetching data.</p> <p>This will method will clean up the <code>Prometheus</code> metrics</p> <p>Attributes:</p> Name Type Description <code>revoked</code> <code>Set[TopicPartitions]</code> <p>Partitions that were assigned to the consumer on the last rebalance</p> Source code in <code>kstreams/rebalance_listener.py</code> <pre><code>async def on_partitions_revoked(self, revoked: typing.Set[TopicPartition]) -&gt; None:\n    \"\"\"\n    Coroutine to be called *before* a rebalance operation starts and\n    *after* the consumer stops fetching data.\n\n    This will method will clean up the `Prometheus` metrics\n\n    Attributes:\n        revoked Set[TopicPartitions]: Partitions that were assigned\n            to the consumer on the last rebalance\n    \"\"\"\n    # lock all asyncio Tasks so no new metrics will be added to the Monitor\n    if revoked and self.engine is not None:\n        async with asyncio.Lock():\n            if self.stream is not None and self.stream.consumer is not None:\n                self.engine.monitor.clean_stream_consumer_metrics(\n                    self.stream.consumer\n                )\n</code></pre>"},{"location":"stream/#manual-commit_1","title":"Manual Commit","text":"<p>If <code>manual</code> commit is enabled, you migh want to use the <code>ManualCommitRebalanceListener</code>. This <code>rebalance listener</code> will call <code>commit</code> before the <code>stream</code> partitions are revoked to avoid the error <code>CommitFailedError</code> and duplicate message delivery after a rebalance. See code example with manual <code>commit</code></p> <p>Note</p> <p><code>ManualCommitRebalanceListener</code> also includes the <code>MetricsRebalanceListener</code> funcionality.</p>"},{"location":"stream/#kstreams.ManualCommitRebalanceListener","title":"<code>kstreams.ManualCommitRebalanceListener</code>","text":"Source code in <code>kstreams/rebalance_listener.py</code> <pre><code>class ManualCommitRebalanceListener(MetricsRebalanceListener):\n    async def on_partitions_revoked(self, revoked: typing.Set[TopicPartition]) -&gt; None:\n        \"\"\"\n        Coroutine to be called *before* a rebalance operation starts and\n        *after* the consumer stops fetching data.\n\n        If manual commit is enabled, `commit` is called before the consumers\n        partitions are revoked to prevent the error `CommitFailedError`\n        and duplicate message delivery after a rebalance.\n\n        Attributes:\n            revoked Set[TopicPartitions]: Partitions that were assigned\n                to the consumer on the last rebalance\n        \"\"\"\n        if (\n            revoked\n            and self.stream is not None\n            and self.stream.consumer is not None\n            and not self.stream.consumer._enable_auto_commit\n        ):\n            logger.info(\n                f\"Manual commit enabled for stream {self.stream}. \"\n                \"Performing `commit` before revoking partitions\"\n            )\n            async with asyncio.Lock():\n                await self.stream.commit()\n\n            await super().on_partitions_revoked(revoked=revoked)\n</code></pre>"},{"location":"stream/#kstreams.ManualCommitRebalanceListener.on_partitions_revoked","title":"<code>on_partitions_revoked(revoked)</code>  <code>async</code>","text":"<p>Coroutine to be called before a rebalance operation starts and after the consumer stops fetching data.</p> <p>If manual commit is enabled, <code>commit</code> is called before the consumers partitions are revoked to prevent the error <code>CommitFailedError</code> and duplicate message delivery after a rebalance.</p> <p>Attributes:</p> Name Type Description <code>revoked</code> <code>Set[TopicPartitions]</code> <p>Partitions that were assigned to the consumer on the last rebalance</p> Source code in <code>kstreams/rebalance_listener.py</code> <pre><code>async def on_partitions_revoked(self, revoked: typing.Set[TopicPartition]) -&gt; None:\n    \"\"\"\n    Coroutine to be called *before* a rebalance operation starts and\n    *after* the consumer stops fetching data.\n\n    If manual commit is enabled, `commit` is called before the consumers\n    partitions are revoked to prevent the error `CommitFailedError`\n    and duplicate message delivery after a rebalance.\n\n    Attributes:\n        revoked Set[TopicPartitions]: Partitions that were assigned\n            to the consumer on the last rebalance\n    \"\"\"\n    if (\n        revoked\n        and self.stream is not None\n        and self.stream.consumer is not None\n        and not self.stream.consumer._enable_auto_commit\n    ):\n        logger.info(\n            f\"Manual commit enabled for stream {self.stream}. \"\n            \"Performing `commit` before revoking partitions\"\n        )\n        async with asyncio.Lock():\n            await self.stream.commit()\n\n        await super().on_partitions_revoked(revoked=revoked)\n</code></pre>"},{"location":"stream/#custom-rebalance-listener","title":"Custom Rebalance Listener","text":"<p>If you want to define a custom <code>RebalanceListener</code>, it has to inherits from <code>kstreams.RebalanceListener</code>.</p> <p>Note</p> <p>It also possible to inherits from <code>ManualCommitRebalanceListener</code> and <code>MetricsRebalanceListener</code></p>"},{"location":"stream/#kstreams.RebalanceListener","title":"<code>kstreams.RebalanceListener</code>","text":"<p>A callback interface that the user can implement to trigger custom actions when the set of partitions are assigned or revoked to the <code>Stream</code>.</p> <p>Example</p> <pre><code>from kstreams import RebalanceListener, TopicPartition\nfrom .resource import stream_engine\n\n\nclass MyRebalanceListener(RebalanceListener):\n\n    async def on_partitions_revoked(\n        self, revoked: Set[TopicPartition]\n    ) -&gt; None:\n        # Do something with the revoked partitions\n        # or with the Stream\n        print(self.stream)\n\n    async def on_partitions_assigned(\n        self, assigned: Set[TopicPartition]\n    ) -&gt; None:\n        # Do something with the assigned partitions\n        # or with the Stream\n        print(self.stream)\n\n\n@stream_engine.stream(topic, rebalance_listener=MyRebalanceListener())\nasync def my_stream(stream: Stream):\n    async for event in stream:\n        ...\n</code></pre> Source code in <code>kstreams/rebalance_listener.py</code> <pre><code>class RebalanceListener(ConsumerRebalanceListener):\n    \"\"\"\n    A callback interface that the user can implement to trigger custom actions\n    when the set of partitions are assigned or revoked to the `Stream`.\n\n    !!! Example\n        ```python\n        from kstreams import RebalanceListener, TopicPartition\n        from .resource import stream_engine\n\n\n        class MyRebalanceListener(RebalanceListener):\n\n            async def on_partitions_revoked(\n                self, revoked: Set[TopicPartition]\n            ) -&gt; None:\n                # Do something with the revoked partitions\n                # or with the Stream\n                print(self.stream)\n\n            async def on_partitions_assigned(\n                self, assigned: Set[TopicPartition]\n            ) -&gt; None:\n                # Do something with the assigned partitions\n                # or with the Stream\n                print(self.stream)\n\n\n        @stream_engine.stream(topic, rebalance_listener=MyRebalanceListener())\n        async def my_stream(stream: Stream):\n            async for event in stream:\n                ...\n        ```\n    \"\"\"\n\n    def __init__(self) -&gt; None:\n        self.stream: typing.Optional[\"Stream\"] = None\n        # engine added so it can react on rebalance events\n        self.engine: typing.Optional[\"StreamEngine\"] = None\n\n    async def on_partitions_revoked(self, revoked: typing.Set[TopicPartition]) -&gt; None:\n        \"\"\"\n        Coroutine to be called *before* a rebalance operation starts and\n        *after* the consumer stops fetching data.\n\n        If you are using manual commit you have to commit all consumed offsets\n        here, to avoid duplicate message delivery after rebalance is finished.\n\n        Use cases:\n            - cleanup or custom state save on the start of a rebalance operation\n            - saving offsets in a custom store\n\n        Attributes:\n            revoked Set[TopicPartitions]: Partitions that were assigned\n                to the consumer on the last rebalance\n\n        !!! note\n            The `Stream` is available using `self.stream`\n        \"\"\"\n        ...  # pragma: no cover\n\n    async def on_partitions_assigned(\n        self, assigned: typing.Set[TopicPartition]\n    ) -&gt; None:\n        \"\"\"\n        Coroutine to be called *after* partition re-assignment completes\n        and *before* the consumer starts fetching data again.\n\n        It is guaranteed that all the processes in a consumer group will\n        execute their `on_partitions_revoked` callback before any instance\n        executes its `on_partitions_assigned` callback.\n\n        Use cases:\n            - Load a state or cache warmup on completion of a successful\n            partition re-assignment.\n\n        Attributes:\n            assigned Set[TopicPartition]: Partitions assigned to the\n                consumer (may include partitions that were previously assigned)\n\n        !!! note\n            The `Stream` is available using `self.stream`\n        \"\"\"\n        ...  # pragma: no cover\n</code></pre>"},{"location":"stream/#kstreams.RebalanceListener.on_partitions_assigned","title":"<code>on_partitions_assigned(assigned)</code>  <code>async</code>","text":"<p>Coroutine to be called after partition re-assignment completes and before the consumer starts fetching data again.</p> <p>It is guaranteed that all the processes in a consumer group will execute their <code>on_partitions_revoked</code> callback before any instance executes its <code>on_partitions_assigned</code> callback.</p> Use cases <ul> <li>Load a state or cache warmup on completion of a successful partition re-assignment.</li> </ul> <p>Attributes:</p> Name Type Description <code>assigned</code> <code>Set[TopicPartition]</code> <p>Partitions assigned to the consumer (may include partitions that were previously assigned)</p> <p>Note</p> <p>The <code>Stream</code> is available using <code>self.stream</code></p> Source code in <code>kstreams/rebalance_listener.py</code> <pre><code>async def on_partitions_assigned(\n    self, assigned: typing.Set[TopicPartition]\n) -&gt; None:\n    \"\"\"\n    Coroutine to be called *after* partition re-assignment completes\n    and *before* the consumer starts fetching data again.\n\n    It is guaranteed that all the processes in a consumer group will\n    execute their `on_partitions_revoked` callback before any instance\n    executes its `on_partitions_assigned` callback.\n\n    Use cases:\n        - Load a state or cache warmup on completion of a successful\n        partition re-assignment.\n\n    Attributes:\n        assigned Set[TopicPartition]: Partitions assigned to the\n            consumer (may include partitions that were previously assigned)\n\n    !!! note\n        The `Stream` is available using `self.stream`\n    \"\"\"\n    ...  # pragma: no cover\n</code></pre>"},{"location":"stream/#kstreams.RebalanceListener.on_partitions_revoked","title":"<code>on_partitions_revoked(revoked)</code>  <code>async</code>","text":"<p>Coroutine to be called before a rebalance operation starts and after the consumer stops fetching data.</p> <p>If you are using manual commit you have to commit all consumed offsets here, to avoid duplicate message delivery after rebalance is finished.</p> Use cases <ul> <li>cleanup or custom state save on the start of a rebalance operation</li> <li>saving offsets in a custom store</li> </ul> <p>Attributes:</p> Name Type Description <code>revoked</code> <code>Set[TopicPartitions]</code> <p>Partitions that were assigned to the consumer on the last rebalance</p> <p>Note</p> <p>The <code>Stream</code> is available using <code>self.stream</code></p> Source code in <code>kstreams/rebalance_listener.py</code> <pre><code>async def on_partitions_revoked(self, revoked: typing.Set[TopicPartition]) -&gt; None:\n    \"\"\"\n    Coroutine to be called *before* a rebalance operation starts and\n    *after* the consumer stops fetching data.\n\n    If you are using manual commit you have to commit all consumed offsets\n    here, to avoid duplicate message delivery after rebalance is finished.\n\n    Use cases:\n        - cleanup or custom state save on the start of a rebalance operation\n        - saving offsets in a custom store\n\n    Attributes:\n        revoked Set[TopicPartitions]: Partitions that were assigned\n            to the consumer on the last rebalance\n\n    !!! note\n        The `Stream` is available using `self.stream`\n    \"\"\"\n    ...  # pragma: no cover\n</code></pre>"},{"location":"test_client/","title":"Testing","text":"<p>To test <code>streams</code> and <code>producers</code> or perform <code>e2e</code> tests you can make use of the <code>test_utils.TestStreamClient</code>.</p> <p>The <code>TestStreamClient</code> aims to emulate as much as possible the <code>kafka</code> behaviour using <code>asyncio.Queue</code>. This is excellent because you can test quite easily your code without spinning up <code>kafka</code>, but this comes with some limitations. It is not possible to know beforehand how many topics exist, how many partitions per topic exist, the replication factor, current offsets, etc. So, the <code>test client</code> will create <code>topics</code>, <code>partitions</code>, <code>assigments</code>, etc on runtime. Each <code>Stream</code> in your application will have assigned 3 partitions per topic by default (0, 1 and 2) during <code>test environment</code></p> <p>With the <code>test client</code> you can:</p> <ul> <li>Send events so you won't need to mock the <code>producer</code></li> <li>Call the consumer code, then the client will make sure that all the events are consumed before leaving the <code>async context</code></li> </ul>"},{"location":"test_client/#using-teststreamclient","title":"Using <code>TestStreamClient</code>","text":"<p>Import <code>TestStreamClient</code>.</p> <p>Create a <code>TestStreamClient</code> by passing the engine instance to it.</p> <p>Create functions with a name that starts with <code>test_</code> (this is standard <code>pytest</code> conventions).</p> <p>Use the <code>TestStreamClient</code> object the same way as you do with <code>engine</code>.</p> <p>Write simple <code>assert</code> statements with the standard Python expressions that you need to check (again, standard <code>pytest</code>).</p>"},{"location":"test_client/#example","title":"Example","text":"<p>Let's assume that you have the following code example. The goal is to store all the consumed events in an <code>EventStore</code> for future analysis.</p> <pre><code># example.py\nimport aiorun\nimport typing\nfrom dataclasses import dataclass, field\n\nfrom kstreams import ConsumerRecord, create_engine\nfrom kstreams.streams import Stream\n\ntopic = \"local--kstreams\"\n\nstream_engine = create_engine(title=\"my-stream-engine\")\n\n\n@dataclass\nclass EventStore:\n    \"\"\"\n    Store events in memory\n    \"\"\"\n    events: typing.List[ConsumerRecord] = field(default_factory=list)\n\n    def add(self, event: ConsumerRecord) -&gt; None:\n        self.events.append(event)\n\n    @property\n    def total(self):\n        return len(self.events)\n\n\nevent_store = EventStore()\n\n\n@stream_engine.stream(topic, group_id=\"example-group\")\nasync def consume(cr: ConsumerRecord):\n    event_store.add(cr)\n\n\nasync def produce():\n    payload = b'{\"message\": \"Hello world!\"}'\n\n    for _ in range(5):\n        await stream_engine.send(topic, value=payload, key=\"1\")\n        await asyncio.sleep(2)\n\n\nasync def start():\n    await stream_engine.start()\n    await produce()\n\n\nasync def shutdown(loop):\n    await stream_engine.stop()\n\n\ndef main():\n    aiorun.run(start(), stop_on_unhandled_errors=True, shutdown_callback=shutdown)\n</code></pre> <p>Then you could have a <code>test_stream.py</code> file to test the code, you need to instanciate the <code>TestStreamClient</code> with the <code>engine</code>:</p> <pre><code># test_stream.py\nimport pytest\nfrom kstreams.test_utils import TestStreamClient\n\nfrom example import stream_engine, event_store\n\nclient = TestStreamClient(stream_engine)\n\n\n@pytest.mark.asyncio\nasync def test_add_event_on_consume():\n    \"\"\"\n    Produce some events and check that the EventStore is updated.\n    \"\"\"\n    topic = \"local--kstreams\"  # Use the same topic as the stream\n    event = b'{\"message\": \"Hello world!\"}'\n\n    async with client:\n        metadata = await client.send(topic, value=event, key=\"1\")  # send the event with the test client\n        current_offset = metadata.offset\n        assert metadata.topic == topic\n\n        # send another event and check that the offset was incremented\n        metadata = await client.send(topic, value=b'{\"message\": \"Hello world!\"}', key=\"1\")\n        assert metadata.offset == current_offset + 1\n\n    # check that the event_store has 2 events stored\n    assert event_store.total == 2\n</code></pre> <p>Note</p> <p>Notice that the <code>produce</code> coroutine is not used to send events in the test case. The <code>TestStreamClient.send</code> coroutine is used instead. This allows to test <code>streams</code> without having producer code in your application</p>"},{"location":"test_client/#testing-the-commit","title":"Testing the Commit","text":"<p>In some cases your stream will commit, in this situation checking the commited partitions can be useful.</p> <pre><code>import pytest\nfrom kstreams.test_utils import TestStreamClient\nfrom kstreams import ConsumerRecord, Stream, TopicPartition\n\nfrom .example import produce, stream_engine\n\ntopic_name = \"local--kstreams-marcos\"\nvalue = b'{\"message\": \"Hello world!\"}'\nname = \"my-stream\"\nkey = \"1\"\npartition = 2\ntp = TopicPartition(\n    topic=topic_name,\n    partition=partition,\n)\ntotal_events = 10\n\n@stream_engine.stream(topic_name, name=name)\nasync def my_stream(cr: ConsumerRecord, stream: Stream):\n    # commit every time that an event arrives\n    await stream.commit({tp: cr.offset})\n\n\n# test the code\nclient = TestStreamClient(stream_engine)\n\n@pytest.mark.asyncio\nasync def test_consumer_commit(stream_engine: StreamEngine):\n    async with client:\n        for _ in range(0, total_events):\n            await client.send(topic_name, partition=partition, value=value, key=key)\n\n        # check that everything was commited\n        stream = stream_engine.get_stream(name)\n        assert (await stream.committed(tp)) == total_events\n</code></pre>"},{"location":"test_client/#e2e-test","title":"E2E test","text":"<p>In the previous code example the application produces to and consumes from the same topic, then <code>TestStreamClient.send</code> is not needed because the <code>engine.send</code> is producing. For those situation you can just use your <code>producer</code> code and check that certain code was called.</p> <pre><code># test_example.py\nimport pytest\nfrom kstreams.test_utils import TestStreamClient\n\nfrom .example import produce, stream_engine\n\nclient = TestStreamClient(stream_engine)\n\n\n@pytest.mark.asyncio\nasync def test_e2e_example():\n    \"\"\"\n    Test that events are produce by the engine and consumed by the streams\n    \"\"\"\n    with patch(\"example.on_consume\") as on_consume, patch(\"example.on_produce\") as on_produce:\n        async with client:\n            await produce()\n\n    on_produce.call_count == 5\n    on_consume.call_count == 5\n</code></pre>"},{"location":"test_client/#producer-only","title":"Producer only","text":"<p>In some scenarios, your application will only produce events and other application/s will consume it, but you want to make sure that the event was procuced in a proper way and the <code>topic</code> contains that <code>event</code>.</p> <pre><code># producer_example.py\nfrom kstreams import create_engine\nimport aiorun\nimport asyncio\n\nstream_engine = create_engine(title=\"my-stream-engine\")\n\n\nasync def produce(topic: str, value: bytes, key: str):\n    # This could be a complicated function or something like a FastAPI view\n    await stream_engine.send(topic, value=value, key=key)\n\n\nasync def start():\n    await stream_engine.start()\n    await produce()\n\n\nasync def shutdown(loop):\n    await stream_engine.stop()\n\n\ndef main():\n    aiorun.run(start(), stop_on_unhandled_errors=True, shutdown_callback=shutdown)\n</code></pre> <p>Then you could have a <code>test_producer_example.py</code> file to test the code:</p> <pre><code># test_producer_example.py\nimport pytest\nfrom kstreams.test_utils import TestStreamClient\n\nfrom producer_example import stream_engine, produce\n\nclient = TestStreamClient(stream_engine)\n\n\n@pytest.mark.asyncio\nasync def test_event_produced():\n    topic_name = \"local--kstreams\"\n    value = b'{\"message\": \"Hello world!\"}'\n    key = \"1\"\n\n    async with client:\n        await produce(topic=topic_name ,value=value, key=key) # use the produce code to send events\n\n        # check that the event was placed in a topic in a proper way\n        consumer_record = await client.get_event(topic_name=topic_name)\n\n        assert consumer_record.value == value\n        assert consumer_record.key == key\n</code></pre> <p>Note</p> <p>Even thought the previous example is using a simple <code>produce</code> function, it shows what to do when the <code>procuder code</code> is encapsulated in other functions, for example a <code>FastAPI</code> view. Then you don't want to use <code>client.send</code> directly, just called the function that contains <code>stream_engine.send(...)</code></p>"},{"location":"test_client/#defining-extra-topics","title":"Defining extra topics","text":"<p>For some uses cases is required to produce an event to a topic (<code>target topic</code>) after it was consumed (<code>source topic</code>). We are in control of the <code>source topic</code> because it has a <code>stream</code> associated with it and we want to consume events from it, however we might not be in control of the <code>target topic</code>.</p> <p>How can we consume an event from the <code>target topic</code> which has not a <code>stream</code> associated and the topic will be created only when a <code>send</code> is reached? The answer is to pre define the extra topics before the test cycle has started. Let's take a look an example:</p> <p>Let's imagine that we have the following code:</p> <pre><code>from kstreams import ConsumerRecord\n\nfrom .engine import stream_engine\n\n\n@stream_engine.stream(\"source-topic\", name=name)\nasync def consume(cr: ConsumerRecord) -&gt; None:\n    # do something, for example save to db\n    await save_to_db(cr)\n\n    # then produce the event to the `target topic`\n    await stream_engine.send(\"target-topic\", value=cr.value, key=cr.key, headers=cr.headers)\n</code></pre> <p>Here we can test two things:</p> <ol> <li>Sending an event to the <code>source-topic</code> and check that the event has been consumed and saved to the DB</li> <li>Check that the event was send to the <code>target-topic</code></li> </ol> <p>Testing point <code>1</code> is straightforward:</p> <pre><code>import pytest\nfrom kstreams.test_utils import TestStreamClient\n\nfrom .engine import stream_engine\n\n\nclient = TestStreamClient(stream_engine)\nvalue = b'{\"message\": \"Hello world!\"}'\nkey = \"my-key\"\n\nasync with client:\n    # produce to the topic that has a stream\n    await client.send(\"source-topic\", value=value, key=key)\n\n    # check that the event was saved to the DB\n    assert await db.get(...)\n</code></pre> <p>However to test the point <code>2</code> we need more effort as the <code>TestStreamClient</code> is not aware of the <code>target topic</code> until it reaches the <code>send</code> inside the <code>consume</code> coroutine. If we try to get the <code>target topic</code> event inside the <code>async with</code> context we will have an error:</p> <pre><code>async with client:\n    # produce to the topic that has a stream\n    await client.send(\"source-topic\", value=value, key=key)\n\n    ...\n    # Let's check if it was received by the target topic\n    event = await client.get_event(topic_name=\"target-topic\")\n\n\nValueError: You might be trying to get the topic target-topic outside the `client async context` or trying to get an event from an empty topic target-topic. Make sure that the code is inside the async contextand the topic has events.\n</code></pre> <p>We can solve this with a <code>delay</code> (<code>await asyncio.sleep(...)</code>) inside the <code>async with</code> context to give time to the <code>TestStreamClient</code> to create the topic, however if the buisness logic inside the <code>consume</code> is slow we need to add more delay, then it will become a <code>race condition</code>.  </p> <p>To proper solve it, we can specify to the <code>TestStreamClient</code> the extra topics that we need during the test cycle.</p> <pre><code>import pytest\nfrom kstreams.test_utils import TestStreamClient\n\nfrom .engine import stream_engine\n\n\n# tell the client to create the extra topics\nclient = TestStreamClient(stream_engine, topics=[\"target-topic\"])\nvalue = b'{\"message\": \"Hello world!\"}'\nkey = \"my-key\"\n\nasync with client:\n    # produce to the topic that has a stream\n    await client.send(\"source-topic\", value=value, key=key)\n\n    # check that the event was saved to the DB\n    assert await db.get(...)\n\n    # Let's check if it was received by the target topic\n    event = await client.get_event(topic_name=\"target-topic\")\n    assert event.value == value\n    assert event.key == key\n</code></pre>"},{"location":"test_client/#topics-subscribed-by-pattern","title":"Topics subscribed by pattern","text":"<p>When a <code>Stream</code> is using <code>pattern</code> subscription it is not possible to know before hand how many topics the <code>Stream</code> will consume from. To solve this problem the <code>topics</code> must be pre defined using the <code>extra topics</code> features from the <code>TestClient</code>:</p> <p>In the following example we have a <code>Stream</code> that will consume from topics that match the regular expression <code>^dev--customer-.*$</code>, for example <code>dev--customer-invoice</code> and <code>dev--customer-profile</code>.</p> <pre><code># app.py\nfrom kstreams import ConsumerRecord\n\nstream_engine = create_engine(title=\"my-stream-engine\")\n\n\n@stream_engine.stream(topics=\"^dev--customer-.*$\", subscribe_by_pattern=True)\nasync def stream(cr: ConsumerRecord):\n    if cr.topic == customer_invoice_topic:\n        assert cr.value == invoice_event\n    elif cr.topic == customer_profile_topic:\n        assert cr.value == profile_event\n    else:\n        raise ValueError(f\"Invalid topic {cr.topic}\")\n</code></pre> <p>Then to test our <code>Stream</code>, we need to pre define the topics:</p> <pre><code># test_stream.py\nimport pytest\nfrom kstreams.test_utils import TestStreamClient\n\nfrom app import stream_engine\n\n\n@pytest.mark.asyncio\nasync def test_consume_events_topics_by_pattern():\n    \"\"\"\n    This test shows the possibility to subscribe to multiple topics using a pattern\n    \"\"\"\n    customer_invoice_topic = \"dev--customer-invoice\"\n    customer_profile_topic = \"dev--customer-profile\"\n\n    client = TestStreamClient(\n        stream_engine, topics=[customer_invoice_topic, customer_profile_topic]\n    )\n\n    async with client:\n        await client.send(customer_invoice_topic, value=b\"invoice-1\", key=\"1\")\n        await client.send(customer_profile_topic, value=b\"profile-1\", key=\"1\")\n\n        # give some time to consume all the events\n        await asyncio.sleep(0.1)\n        assert TopicManager.all_messages_consumed()\n</code></pre>"},{"location":"test_client/#disabling-monitoring-during-testing","title":"Disabling monitoring during testing","text":"<p>Monitoring streams and producers is vital for streaming application but it requires extra effort. Sometimes during testing, monitoring is not required as we only want to focus on testing the buisness logic. In order to disable monitoring during testing use:</p> <pre><code>client = TestStreamClient(stream_engine, monitoring_enabled=False)\n</code></pre>"},{"location":"utils/","title":"Utils","text":"<p>Utility functions</p>"},{"location":"utils/#kstreams.utils","title":"<code>kstreams.utils</code>","text":""},{"location":"utils/#kstreams.utils.create_ssl_context","title":"<code>create_ssl_context(*, cafile=None, capath=None, cadata=None, certfile=None, keyfile=None, password=None, crlfile=None)</code>","text":"<p>Wrapper of aiokafka.helpers.create_ssl_context with typehints.</p> <p>Parameters:</p> Name Type Description Default <code>cafile</code> <code>Optional[str]</code> <p>Certificate Authority file path containing certificates used to sign broker certificates</p> <code>None</code> <code>capath</code> <code>Optional[str]</code> <p>Same as <code>cafile</code>, but points to a directory containing several CA certificates</p> <code>None</code> <code>cadata</code> <code>Union[str, bytes, None]</code> <p>Same as <code>cafile</code>, but instead contains already read data in either ASCII or bytes format</p> <code>None</code> <code>certfile</code> <code>Optional[str]</code> <p>optional filename of file in PEM format containing the client certificate, as well as any CA certificates needed to establish the certificate's authenticity</p> <code>None</code> <code>keyfile</code> <code>Optional[str]</code> <p>optional filename containing the client private key.</p> <code>None</code> <code>password</code> <code>Optional[str]</code> <p>optional password to be used when loading the certificate chain</p> <code>None</code> Source code in <code>kstreams/utils.py</code> <pre><code>def create_ssl_context(\n    *,\n    cafile: Optional[str] = None,\n    capath: Optional[str] = None,\n    cadata: Union[str, bytes, None] = None,\n    certfile: Optional[str] = None,\n    keyfile: Optional[str] = None,\n    password: Optional[str] = None,\n    crlfile: Any = None,\n):\n    \"\"\"Wrapper of [aiokafka.helpers.create_ssl_context](\n        https://aiokafka.readthedocs.io/en/stable/api.html#helpers\n    )\n    with typehints.\n\n    Arguments:\n        cafile: Certificate Authority file path containing certificates\n            used to sign broker certificates\n        capath: Same as `cafile`, but points to a directory containing\n            several CA certificates\n        cadata: Same as `cafile`, but instead contains already\n            read data in either ASCII or bytes format\n        certfile: optional filename of file in PEM format containing\n            the client certificate, as well as any CA certificates needed to\n            establish the certificate's authenticity\n        keyfile: optional filename containing the client private key.\n        password: optional password to be used when loading the\n            certificate chain\n\n    \"\"\"\n    return aiokafka_create_ssl_context(\n        cafile=cafile,\n        capath=capath,\n        cadata=cadata,\n        certfile=certfile,\n        keyfile=keyfile,\n        password=password,\n        crlfile=crlfile,\n    )\n</code></pre>"},{"location":"utils/#kstreams.utils.create_ssl_context_from_mem","title":"<code>create_ssl_context_from_mem(*, certdata, keydata, password=None, cadata=None)</code>","text":"<p>Create a SSL context from data on memory.</p> <p>This makes it easy to read the certificates from environmental variables Usually the data is loaded from env variables.</p> <p>Parameters:</p> Name Type Description Default <code>cadata</code> <code>Optional[str]</code> <p>certificates used to sign broker certificates provided as unicode str</p> <code>None</code> <code>certdata</code> <code>str</code> <p>the client certificate, as well as any CA certificates needed to establish the certificate's authenticity provided as unicode str</p> required <code>keydata</code> <code>str</code> <p>the client private key provided as unicode str</p> required <code>password</code> <code>Optional[str]</code> <p>optional password to be used when loading the certificate chain</p> <code>None</code> Source code in <code>kstreams/utils.py</code> <pre><code>def create_ssl_context_from_mem(\n    *,\n    certdata: str,\n    keydata: str,\n    password: Optional[str] = None,\n    cadata: Optional[str] = None,\n) -&gt; Optional[ssl.SSLContext]:\n    \"\"\"Create a SSL context from data on memory.\n\n    This makes it easy to read the certificates from environmental variables\n    Usually the data is loaded from env variables.\n\n    Arguments:\n        cadata: certificates used to sign broker certificates provided as unicode str\n        certdata: the client certificate, as well as any CA certificates needed to\n            establish the certificate's authenticity provided as unicode str\n        keydata: the client private key provided as unicode str\n        password: optional password to be used when loading the\n            certificate chain\n    \"\"\"\n    with contextlib.ExitStack() as stack:\n        cert_file = stack.enter_context(NamedTemporaryFile(suffix=\".crt\"))\n        key_file = stack.enter_context(NamedTemporaryFile(suffix=\".key\"))\n\n        # expecting unicode data, writing it as bytes to files as utf-8\n        cert_file.write(certdata.encode(\"utf-8\"))\n        cert_file.flush()\n\n        key_file.write(keydata.encode(\"utf-8\"))\n        key_file.flush()\n\n        ssl_context = ssl.create_default_context(cadata=cadata)\n        ssl_context.load_cert_chain(\n            cert_file.name, keyfile=key_file.name, password=password\n        )\n        return ssl_context\n    return None\n</code></pre>"}]}
\ No newline at end of file
diff --git a/pr-preview/pr-248/stream/index.html b/pr-preview/pr-248/stream/index.html
index a37fcabd..e2a4b0d2 100644
--- a/pr-preview/pr-248/stream/index.html
+++ b/pr-preview/pr-248/stream/index.html
@@ -1711,10 +1711,7 @@ <h2 id="get-many">Get many</h2>
 
             <details class="quote">
               <summary>Source code in <code>kstreams/streams.py</code></summary>
-              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">294</span>
-<span class="normal">295</span>
-<span class="normal">296</span>
-<span class="normal">297</span>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">297</span>
 <span class="normal">298</span>
 <span class="normal">299</span>
 <span class="normal">300</span>
@@ -1752,7 +1749,10 @@ <h2 id="get-many">Get many</h2>
 <span class="normal">332</span>
 <span class="normal">333</span>
 <span class="normal">334</span>
-<span class="normal">335</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">async</span> <span class="k">def</span> <span class="nf">getmany</span><span class="p">(</span>
+<span class="normal">335</span>
+<span class="normal">336</span>
+<span class="normal">337</span>
+<span class="normal">338</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">async</span> <span class="k">def</span> <span class="nf">getmany</span><span class="p">(</span>
     <span class="bp">self</span><span class="p">,</span>
     <span class="n">partitions</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">Optional</span><span class="p">[</span><span class="n">typing</span><span class="o">.</span><span class="n">List</span><span class="p">[</span><span class="n">TopicPartition</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
     <span class="n">timeout_ms</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>