From bd5693164e7331df5f14186fd002e72e5203d7ee Mon Sep 17 00:00:00 2001 From: Aaron Gabriel Neyer Date: Tue, 12 Apr 2022 15:42:04 -0600 Subject: [PATCH] feat: track invocation id for retry metrics (#741) * feat: track invocation id for retry metrics * woops, fix the context * slight adjustment to stringify * old tests pass * lint * adjust based on new python-cloud-core changes * updated cloud core, and all working * test that invocation id changes between calls * lint and fix test name Co-authored-by: Anthonios Partheniou --- google/cloud/storage/_helpers.py | 7 +- google/cloud/storage/_http.py | 2 + setup.py | 2 +- tests/unit/test__helpers.py | 2 + tests/unit/test__http.py | 13 +- tests/unit/test_blob.py | 336 +++++++++++++++++-------------- tests/unit/test_client.py | 81 +++++--- 7 files changed, 267 insertions(+), 176 deletions(-) diff --git a/google/cloud/storage/_helpers.py b/google/cloud/storage/_helpers.py index 30866c8a3..cc85525d8 100644 --- a/google/cloud/storage/_helpers.py +++ b/google/cloud/storage/_helpers.py @@ -21,6 +21,7 @@ from hashlib import md5 import os from urllib.parse import urlsplit +from uuid import uuid4 from google import resumable_media from google.auth import environment_vars @@ -584,6 +585,10 @@ def _api_core_retry_to_resumable_media_retry(retry, num_retries=None): return resumable_media.RetryStrategy(max_retries=0) +def _get_invocation_id(): + return "gccl-invocation-id/" + str(uuid4()) + + def _get_default_headers( user_agent, content_type="application/json; charset=UTF-8", @@ -600,7 +605,7 @@ def _get_default_headers( "Accept": "application/json", "Accept-Encoding": "gzip, deflate", "User-Agent": user_agent, - "x-goog-api-client": user_agent, + "X-Goog-API-Client": f"{user_agent} {_get_invocation_id()}", "content-type": content_type, "x-upload-content-type": x_upload_content_type or content_type, } diff --git a/google/cloud/storage/_http.py b/google/cloud/storage/_http.py index 0ade1525f..9b29f6280 100644 --- a/google/cloud/storage/_http.py +++ b/google/cloud/storage/_http.py @@ -18,6 +18,7 @@ from google.cloud import _http from google.cloud.storage import __version__ +from google.cloud.storage import _helpers class Connection(_http.JSONConnection): @@ -59,6 +60,7 @@ def __init__(self, client, client_info=None, api_endpoint=None): def api_request(self, *args, **kwargs): retry = kwargs.pop("retry", None) + kwargs["extra_api_info"] = _helpers._get_invocation_id() call = functools.partial(super(Connection, self).api_request, *args, **kwargs) if retry: # If this is a ConditionalRetryPolicy, check conditions. diff --git a/setup.py b/setup.py index 3f5b157c1..af6c97fb2 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ dependencies = [ "google-auth >= 1.25.0, < 3.0dev", "google-api-core >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", - "google-cloud-core >= 1.6.0, < 3.0dev", + "google-cloud-core >= 2.3.0, < 3.0dev", "google-resumable-media >= 2.3.2", "requests >= 2.18.0, < 3.0.0dev", "protobuf", diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 6c8770576..dbe0055df 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -19,6 +19,8 @@ from google.cloud.storage.retry import DEFAULT_RETRY from google.cloud.storage.retry import DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED +GCCL_INVOCATION_TEST_CONST = "gccl-invocation-id/test-invocation-123" + class Test__get_storage_host(unittest.TestCase): @staticmethod diff --git a/tests/unit/test__http.py b/tests/unit/test__http.py index fcdb5d1a7..890fd1352 100644 --- a/tests/unit/test__http.py +++ b/tests/unit/test__http.py @@ -13,9 +13,13 @@ # limitations under the License. import unittest +from unittest.mock import patch import mock +from google.cloud.storage import _helpers +from tests.unit.test__helpers import GCCL_INVOCATION_TEST_CONST + class TestConnection(unittest.TestCase): @staticmethod @@ -44,12 +48,17 @@ def test_extra_headers(self): conn = self._make_one(client) req_data = "hey-yoooouuuuu-guuuuuyyssss" - result = conn.api_request("GET", "/rainbow", data=req_data, expect_json=False) + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + result = conn.api_request( + "GET", "/rainbow", data=req_data, expect_json=False + ) self.assertEqual(result, data) expected_headers = { "Accept-Encoding": "gzip", - base_http.CLIENT_INFO_HEADER: conn.user_agent, + base_http.CLIENT_INFO_HEADER: f"{conn.user_agent} {GCCL_INVOCATION_TEST_CONST}", "User-Agent": conn.user_agent, } expected_uri = conn.build_api_url("/rainbow") diff --git a/tests/unit/test_blob.py b/tests/unit/test_blob.py index f48b4a1e2..8c86c002e 100644 --- a/tests/unit/test_blob.py +++ b/tests/unit/test_blob.py @@ -21,11 +21,13 @@ import tempfile import unittest import http.client +from unittest.mock import patch from urllib.parse import urlencode import mock import pytest +from google.cloud.storage import _helpers from google.cloud.storage._helpers import _get_default_headers from google.cloud.storage.retry import ( DEFAULT_RETRY, @@ -33,6 +35,7 @@ ) from google.cloud.storage.retry import DEFAULT_RETRY_IF_ETAG_IN_JSON from google.cloud.storage.retry import DEFAULT_RETRY_IF_GENERATION_SPECIFIED +from tests.unit.test__helpers import GCCL_INVOCATION_TEST_CONST def _make_credentials(): @@ -2234,17 +2237,23 @@ def test__get_upload_arguments(self): blob.content_disposition = "inline" content_type = "image/jpeg" - info = blob._get_upload_arguments(client, content_type) + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + info = blob._get_upload_arguments(client, content_type) headers, object_metadata, new_content_type = info header_key_value = "W3BYd0AscEBAQWZCZnJSM3gtMmIyU0NIUiwuP1l3Uk8=" header_key_hash_value = "G0++dxF4q5rG4o9kE8gvEKn15RH6wLm0wXV1MgAlXOg=" - expected_headers = { - **_get_default_headers(client._connection.user_agent, content_type), - "X-Goog-Encryption-Algorithm": "AES256", - "X-Goog-Encryption-Key": header_key_value, - "X-Goog-Encryption-Key-Sha256": header_key_hash_value, - } + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + expected_headers = { + **_get_default_headers(client._connection.user_agent, content_type), + "X-Goog-Encryption-Algorithm": "AES256", + "X-Goog-Encryption-Key": header_key_value, + "X-Goog-Encryption-Key-Sha256": header_key_hash_value, + } self.assertEqual(headers, expected_headers) expected_metadata = { "contentDisposition": blob.content_disposition, @@ -2313,20 +2322,23 @@ def _do_multipart_success( expected_timeout = timeout timeout_kwarg = {"timeout": timeout} - response = blob._do_multipart_upload( - client, - stream, - content_type, - size, - num_retries, - predefined_acl, - if_generation_match, - if_generation_not_match, - if_metageneration_match, - if_metageneration_not_match, - retry=retry, - **timeout_kwarg - ) + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + response = blob._do_multipart_upload( + client, + stream, + content_type, + size, + num_retries, + predefined_acl, + if_generation_match, + if_generation_not_match, + if_metageneration_match, + if_metageneration_not_match, + retry=retry, + **timeout_kwarg + ) # Clean up the get_api_base_url_for_mtls mock. if mtls: @@ -2387,11 +2399,14 @@ def _do_multipart_success( + data_read + b"\r\n--==0==--" ) - headers = _get_default_headers( - client._connection.user_agent, - b'multipart/related; boundary="==0=="', - "application/xml", - ) + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + headers = _get_default_headers( + client._connection.user_agent, + b'multipart/related; boundary="==0=="', + "application/xml", + ) client._http.request.assert_called_once_with( "POST", upload_url, data=payload, headers=headers, timeout=expected_timeout ) @@ -2578,23 +2593,25 @@ def _initiate_resumable_helper( else: expected_timeout = timeout timeout_kwarg = {"timeout": timeout} - - upload, transport = blob._initiate_resumable_upload( - client, - stream, - content_type, - size, - num_retries, - extra_headers=extra_headers, - chunk_size=chunk_size, - predefined_acl=predefined_acl, - if_generation_match=if_generation_match, - if_generation_not_match=if_generation_not_match, - if_metageneration_match=if_metageneration_match, - if_metageneration_not_match=if_metageneration_not_match, - retry=retry, - **timeout_kwarg - ) + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + upload, transport = blob._initiate_resumable_upload( + client, + stream, + content_type, + size, + num_retries, + extra_headers=extra_headers, + chunk_size=chunk_size, + predefined_acl=predefined_acl, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + retry=retry, + **timeout_kwarg + ) # Clean up the get_api_base_url_for_mtls mock. if mtls: @@ -2634,18 +2651,21 @@ def _initiate_resumable_helper( upload_url += "?" + urlencode(qs_params) self.assertEqual(upload.upload_url, upload_url) - if extra_headers is None: - self.assertEqual( - upload._headers, - _get_default_headers(client._connection.user_agent, content_type), - ) - else: - expected_headers = { - **_get_default_headers(client._connection.user_agent, content_type), - **extra_headers, - } - self.assertEqual(upload._headers, expected_headers) - self.assertIsNot(upload._headers, expected_headers) + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + if extra_headers is None: + self.assertEqual( + upload._headers, + _get_default_headers(client._connection.user_agent, content_type), + ) + else: + expected_headers = { + **_get_default_headers(client._connection.user_agent, content_type), + **extra_headers, + } + self.assertEqual(upload._headers, expected_headers) + self.assertIsNot(upload._headers, expected_headers) self.assertFalse(upload.finished) if chunk_size is None: if blob_chunk_size is None: @@ -2684,9 +2704,13 @@ def _initiate_resumable_helper( # Check the mocks. blob._get_writable_metadata.assert_called_once_with() payload = json.dumps(object_metadata).encode("utf-8") - expected_headers = _get_default_headers( - client._connection.user_agent, x_upload_content_type=content_type - ) + + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + expected_headers = _get_default_headers( + client._connection.user_agent, x_upload_content_type=content_type + ) if size is not None: expected_headers["x-upload-content-length"] = str(size) if extra_headers is not None: @@ -2932,18 +2956,25 @@ def _do_resumable_helper( # Create mocks to be checked for doing transport. resumable_url = "http://test.invalid?upload_id=and-then-there-was-1" - headers1 = { - **_get_default_headers(USER_AGENT, content_type), - "location": resumable_url, - } - headers2 = { - **_get_default_headers(USER_AGENT, content_type), - "range": "bytes=0-{:d}".format(CHUNK_SIZE - 1), - } - headers3 = _get_default_headers(USER_AGENT, content_type) - transport, responses = self._make_resumable_transport( - headers1, headers2, headers3, total_bytes, data_corruption=data_corruption - ) + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + headers1 = { + **_get_default_headers(USER_AGENT, content_type), + "location": resumable_url, + } + headers2 = { + **_get_default_headers(USER_AGENT, content_type), + "range": "bytes=0-{:d}".format(CHUNK_SIZE - 1), + } + headers3 = _get_default_headers(USER_AGENT, content_type) + transport, responses = self._make_resumable_transport( + headers1, + headers2, + headers3, + total_bytes, + data_corruption=data_corruption, + ) # Create some mock arguments and call the method under test. client = mock.Mock(_http=transport, _connection=_Connection, spec=["_http"]) @@ -2963,66 +2994,70 @@ def _do_resumable_helper( expected_timeout = timeout timeout_kwarg = {"timeout": timeout} - response = blob._do_resumable_upload( - client, - stream, - content_type, - size, - num_retries, - predefined_acl, - if_generation_match, - if_generation_not_match, - if_metageneration_match, - if_metageneration_not_match, - retry=retry, - **timeout_kwarg - ) + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): - # Check the returned values. - self.assertIs(response, responses[2]) - self.assertEqual(stream.tell(), total_bytes) + response = blob._do_resumable_upload( + client, + stream, + content_type, + size, + num_retries, + predefined_acl, + if_generation_match, + if_generation_not_match, + if_metageneration_match, + if_metageneration_not_match, + retry=retry, + **timeout_kwarg + ) - # Check the mocks. - call0 = self._do_resumable_upload_call0( - client, - blob, - content_type, - size=size, - predefined_acl=predefined_acl, - if_generation_match=if_generation_match, - if_generation_not_match=if_generation_not_match, - if_metageneration_match=if_metageneration_match, - if_metageneration_not_match=if_metageneration_not_match, - timeout=expected_timeout, - ) - call1 = self._do_resumable_upload_call1( - client, - blob, - content_type, - data, - resumable_url, - size=size, - predefined_acl=predefined_acl, - if_generation_match=if_generation_match, - if_generation_not_match=if_generation_not_match, - if_metageneration_match=if_metageneration_match, - if_metageneration_not_match=if_metageneration_not_match, - timeout=expected_timeout, - ) - call2 = self._do_resumable_upload_call2( - client, - blob, - content_type, - data, - resumable_url, - total_bytes, - predefined_acl=predefined_acl, - if_generation_match=if_generation_match, - if_generation_not_match=if_generation_not_match, - if_metageneration_match=if_metageneration_match, - if_metageneration_not_match=if_metageneration_not_match, - timeout=expected_timeout, - ) + # Check the returned values. + self.assertIs(response, responses[2]) + self.assertEqual(stream.tell(), total_bytes) + + # Check the mocks. + call0 = self._do_resumable_upload_call0( + client, + blob, + content_type, + size=size, + predefined_acl=predefined_acl, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + timeout=expected_timeout, + ) + call1 = self._do_resumable_upload_call1( + client, + blob, + content_type, + data, + resumable_url, + size=size, + predefined_acl=predefined_acl, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + timeout=expected_timeout, + ) + call2 = self._do_resumable_upload_call2( + client, + blob, + content_type, + data, + resumable_url, + total_bytes, + predefined_acl=predefined_acl, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + timeout=expected_timeout, + ) self.assertEqual(transport.request.mock_calls, [call0, call1, call2]) def test__do_resumable_upload_with_custom_timeout(self): @@ -3574,19 +3609,21 @@ def _create_resumable_upload_session_helper( else: expected_timeout = timeout timeout_kwarg = {"timeout": timeout} - - new_url = blob.create_resumable_upload_session( - content_type=content_type, - size=size, - origin=origin, - client=client, - if_generation_match=if_generation_match, - if_generation_not_match=if_generation_not_match, - if_metageneration_match=if_metageneration_match, - if_metageneration_not_match=if_metageneration_not_match, - retry=retry, - **timeout_kwarg - ) + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + new_url = blob.create_resumable_upload_session( + content_type=content_type, + size=size, + origin=origin, + client=client, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + retry=retry, + **timeout_kwarg + ) # Check the returned value and (lack of) side-effect. self.assertEqual(new_url, resumable_url) @@ -3612,13 +3649,16 @@ def _create_resumable_upload_session_helper( upload_url += "?" + urlencode(qs_params) payload = b'{"name": "blob-name"}' - expected_headers = { - **_get_default_headers( - client._connection.user_agent, x_upload_content_type=content_type - ), - "x-upload-content-length": str(size), - "x-upload-content-type": content_type, - } + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + expected_headers = { + **_get_default_headers( + client._connection.user_agent, x_upload_content_type=content_type + ), + "x-upload-content-length": str(size), + "x-upload-content-type": content_type, + } if origin is not None: expected_headers["Origin"] = origin transport.request.assert_called_once_with( diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 2292c6acd..6a97d8d41 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -16,6 +16,7 @@ import http.client import io import json +from unittest.mock import patch import mock import pytest import re @@ -29,9 +30,10 @@ from google.cloud.storage._helpers import STORAGE_EMULATOR_ENV_VAR from google.cloud.storage._helpers import _get_default_headers +from google.cloud.storage import _helpers from google.cloud.storage.retry import DEFAULT_RETRY from google.cloud.storage.retry import DEFAULT_RETRY_IF_GENERATION_SPECIFIED - +from tests.unit.test__helpers import GCCL_INVOCATION_TEST_CONST from . import _read_local_json _SERVICE_ACCOUNT_JSON = _read_local_json("url_signer_v4_test_account.json") @@ -1570,15 +1572,17 @@ def test_download_blob_to_file_with_failure(self): blob._do_download.side_effect = grmp_response file_obj = io.BytesIO() - with self.assertRaises(exceptions.NotFound): - client.download_blob_to_file(blob, file_obj) - - self.assertEqual(file_obj.tell(), 0) + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + with self.assertRaises(exceptions.NotFound): + client.download_blob_to_file(blob, file_obj) - headers = { - **_get_default_headers(client._connection.user_agent), - "accept-encoding": "gzip", - } + self.assertEqual(file_obj.tell(), 0) + headers = { + **_get_default_headers(client._connection.user_agent), + "accept-encoding": "gzip", + } blob._do_download.assert_called_once_with( client._http, file_obj, @@ -1604,15 +1608,20 @@ def test_download_blob_to_file_with_uri(self): blob._get_download_url = mock.Mock() blob._do_download = mock.Mock() - with mock.patch( - "google.cloud.storage.client.Blob.from_string", return_value=blob + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST ): - client.download_blob_to_file("gs://bucket_name/path/to/object", file_obj) + with mock.patch( + "google.cloud.storage.client.Blob.from_string", return_value=blob + ): + client.download_blob_to_file( + "gs://bucket_name/path/to/object", file_obj + ) - headers = { - **_get_default_headers(client._connection.user_agent), - "accept-encoding": "gzip", - } + headers = { + **_get_default_headers(client._connection.user_agent), + "accept-encoding": "gzip", + } blob._do_download.assert_called_once_with( client._http, file_obj, @@ -1704,14 +1713,16 @@ def _download_blob_to_file_helper( blob._CHUNK_SIZE_MULTIPLE = 1 blob.chunk_size = 3 blob._do_download = mock.Mock() - file_obj = io.BytesIO() - if raw_download: - client.download_blob_to_file( - blob, file_obj, raw_download=True, **extra_kwargs - ) - else: - client.download_blob_to_file(blob, file_obj, **extra_kwargs) + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + if raw_download: + client.download_blob_to_file( + blob, file_obj, raw_download=True, **extra_kwargs + ) + else: + client.download_blob_to_file(blob, file_obj, **extra_kwargs) expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) if ( @@ -1734,7 +1745,11 @@ def _download_blob_to_file_helper( if_etag_not_match = [if_etag_not_match] headers["If-None-Match"] = ", ".join(if_etag_not_match) - headers = {**_get_default_headers(client._connection.user_agent), **headers} + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + headers = {**_get_default_headers(client._connection.user_agent), **headers} + blob._do_download.assert_called_once_with( client._http, file_obj, @@ -1760,6 +1775,24 @@ def test_download_blob_to_file_wo_chunks_w_raw(self): def test_download_blob_to_file_w_chunks_w_raw(self): self._download_blob_to_file_helper(use_chunks=True, raw_download=True) + def test_download_blob_have_different_uuid(self): + from google.cloud.storage.blob import Blob + + project = "PROJECT" + credentials = _make_credentials(project=project) + client = self._make_one(credentials=credentials) + blob = mock.create_autospec(Blob) + blob._encryption_key = None + blob._do_download = mock.Mock() + file_obj = io.BytesIO() + client.download_blob_to_file(blob, file_obj) + client.download_blob_to_file(blob, file_obj) + + self.assertNotEqual( + blob._do_download.call_args_list[0][0][3]["X-Goog-API-Client"], + blob._do_download.call_args_list[1][0][3]["X-Goog-API-Client"], + ) + def test_list_blobs_w_defaults_w_bucket_obj(self): from google.cloud.storage.bucket import Bucket from google.cloud.storage.bucket import _blobs_page_start