From ad13a0f7d76d21769dfaef310f907d3447d13680 Mon Sep 17 00:00:00 2001 From: David Chelimsky Date: Fri, 29 Mar 2019 12:42:55 -0500 Subject: [PATCH] return to early conversion to ByteBuffer This eliminates the need to read InputStream more than once, but still leaves us requiring them to fit in memory. Fixes #67. --- CHANGES.md | 7 +++- src/cognitect/aws/client.clj | 3 +- src/cognitect/aws/shape.clj | 1 - src/cognitect/aws/util.clj | 56 +++++++++++----------------- test/src/cognitect/aws/util_test.clj | 19 ++++++---- 5 files changed, 42 insertions(+), 44 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 8c2ce36d..3444a0ea 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,8 +1,13 @@ # aws-api 0.8 +## DEV + +* read input-stream once [#67](https://github.com/cognitect-labs/aws-api/issues/67) + ## 0.8.280 / 2019-03-25 -* support endpoint-override map [#59](https://github.com/cognitect-labs/aws-api/issues/59), [#61](https://github.com/cognitect-labs/aws-api/issues/61), [#64](https://github.com/cognitect-labs/aws-api/issues/64) +* support `:endpoint-override` map [#59](https://github.com/cognitect-labs/aws-api/issues/59), [#61](https://github.com/cognitect-labs/aws-api/issues/61), [#64](https://github.com/cognitect-labs/aws-api/issues/64) + * DEPRECATED support for `:endoint-override` string * only parse json response body when output-shape is specified [#66](https://github.com/cognitect-labs/aws-api/issues/66) ## 0.8.273 / 2019-03-01 diff --git a/src/cognitect/aws/client.clj b/src/cognitect/aws/client.clj index 0124e4d7..1905adda 100644 --- a/src/cognitect/aws/client.clj +++ b/src/cognitect/aws/client.clj @@ -69,10 +69,11 @@ http-request (sign-http-request service region (credentials/fetch credentials) (-> (build-http-request service op-map) (with-endpoint endpoint) + (update :body util/->bbuf) ((partial interceptors/modify-http-request service op-map))))] (swap! result-meta assoc :http-request http-request) (http/submit http-client - (update http-request :body util/->bbuf) + http-request (a/chan 1 (map #(with-meta (handle-http-response service op-map %) (assoc @result-meta diff --git a/src/cognitect/aws/shape.clj b/src/cognitect/aws/shape.clj index 65af1b81..a5b02e93 100644 --- a/src/cognitect/aws/shape.clj +++ b/src/cognitect/aws/shape.clj @@ -399,7 +399,6 @@ [nodes] (-> nodes first :content first)) -;; TODO (dchelimsky 2017-04-22) validate enum membership? (defmethod xml-parse* "string" [_ nodes] (or (data nodes) "")) (defmethod xml-parse* "character" [_ nodes] (or (data nodes) "")) (defmethod xml-parse* "boolean" [_ nodes] (= (data nodes) "true")) diff --git a/src/cognitect/aws/util.clj b/src/cognitect/aws/util.clj index 77f968d7..33810c25 100644 --- a/src/cognitect/aws/util.clj +++ b/src/cognitect/aws/util.clj @@ -67,34 +67,19 @@ [^bytes bytes] (String. (Hex/encodeHex bytes true))) -(defn ^bytes input-stream->byte-array - "Copies is to a byte-array, leaving the input-stream's mark intact. - - is must support .mark and .reset (e.g. BufferedInputStream)" - [^InputStream is] - (when-not (.markSupported is) - (throw (ex-info "InputStream does not support .mark and .reset." {:class (class is)}))) - (with-open [os (java.io.ByteArrayOutputStream.)] - (.mark is 0) - (io/copy is os) - (let [res (.toByteArray os)] - (.reset is) - res))) - (defn sha-256 "Returns the sha-256 hash of data, which can be a byte-array, an input-stream, or nil, in which case returns the sha-256 of the empty string." [data] - (cond (instance? InputStream data) - (sha-256 (input-stream->byte-array data)) - (string? data) - (sha-256 (.getBytes ^String data "UTF-8")) - :else - (let [digest (MessageDigest/getInstance "SHA-256")] - (when data - (.update digest data 0 (alength ^bytes data))) - (.digest digest)))) + (if (string? data) + (sha-256 (.getBytes ^String data "UTF-8")) + (let [digest (MessageDigest/getInstance "SHA-256")] + (when data + (if (instance? ByteBuffer data) + (.update digest ^ByteBuffer data) + (.update digest ^bytes data))) + (.digest digest)))) (defn hmac-sha-256 [key ^String data] @@ -102,6 +87,10 @@ (.init mac (SecretKeySpec. key "HmacSHA256")) (.doFinal mac (.getBytes data "UTF-8")))) +(defn ^bytes input-stream->byte-array [is] + (doto (byte-array (.available ^InputStream is)) + (#(.read ^InputStream is %)))) + (defn bbuf->bytes [^ByteBuffer bbuf] (when bbuf @@ -131,9 +120,12 @@ String (->bbuf [s] (->bbuf (.getBytes s "UTF-8"))) - java.io.InputStream + InputStream (->bbuf [is] (->bbuf (input-stream->byte-array is))) + ByteBuffer + (->bbuf [bb] bb) + nil (->bbuf [_])) @@ -221,8 +213,8 @@ (class (byte-array 0)) (base64-encode [ba] (.encodeToString (Base64/getEncoder) ba)) - java.io.InputStream - (base64-encode [is] (base64-encode (input-stream->byte-array is))) + ByteBuffer + (base64-encode [bb] (base64-encode (.array bb))) java.lang.String (base64-encode [s] (base64-encode (.getBytes s)))) @@ -244,18 +236,14 @@ (def ^Charset UTF8 (Charset/forName "UTF-8")) -(defn md5 - "returns hash as byte array" - [data] - (let [ba (cond - (bytes? data) data - (string? data) (.getBytes ^String data UTF8) - (instance? java.io.InputStream data) (input-stream->byte-array data)) +(defn ^bytes md5 + "returns an MD5 hash of the content of bb as a byte array" + [^ByteBuffer bb] + (let [ba (.array bb) hasher (MessageDigest/getInstance "MD5")] (.update hasher ^bytes ba) (.digest hasher))) - (defn gen-idempotency-token [] (UUID/randomUUID)) diff --git a/test/src/cognitect/aws/util_test.clj b/test/src/cognitect/aws/util_test.clj index ece9d123..bfe62734 100644 --- a/test/src/cognitect/aws/util_test.clj +++ b/test/src/cognitect/aws/util_test.clj @@ -1,21 +1,26 @@ (ns cognitect.aws.util-test (:require [clojure.test :refer :all] [clojure.java.io :as io] - [cognitect.aws.util :as util])) + [cognitect.aws.util :as util]) + (:import [java.nio ByteBuffer] + [java.util Random] + [java.util Arrays])) (deftest test-input-stream->byte-array (is (= "hi" (slurp (util/input-stream->byte-array (io/input-stream (.getBytes "hi")))))) - (testing "resets input-stream so it can be read again" - (let [stream (io/input-stream (.getBytes "hi"))] - (is (= (seq (.getBytes "hi")) (seq (util/input-stream->byte-array stream)))) - (is (= "hi" (slurp stream)))))) + (testing "works with a 1mb array" + (let [input (byte-array (int (Math/pow 2 20))) + rng (Random.) + _ (.nextBytes rng input) + output (util/input-stream->byte-array (io/input-stream input))] + (is (Arrays/equals ^bytes input ^bytes output))))) (deftest test-sha-256 (testing "returns sha for empty string if given nil" (is (= (seq (util/sha-256 nil)) (seq (util/sha-256 "")) (seq (util/sha-256 (.getBytes "")))))) - (testing "accepts string, byte array, or input stream" + (testing "accepts string, byte array, or ByteBuffer" (is (= (seq (util/sha-256 "hi")) (seq (util/sha-256 (.getBytes "hi"))) - (seq (util/sha-256 (io/input-stream (.getBytes "hi")))))))) + (seq (util/sha-256 (ByteBuffer/wrap (.getBytes "hi"))))))))