From 6138e1127f752f0cc5eeefcaecb50a2097f3a963 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Wed, 7 Jun 2023 10:19:33 -0600 Subject: [PATCH] feat: enable retries by default, add docs --- CHANGELOG.md | 1 + docs/usage.rst | 153 ++++++++++++++++++++++++----------- pystac_client/stac_api_io.py | 7 ++ 3 files changed, 116 insertions(+), 45 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 536679ab..7a8fbd35 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Support for fetching and merging a selection of queryables [#511](https://github.com/stac-utils/pystac-client/pull/511) - Better error messages for the CLI [#531](https://github.com/stac-utils/pystac-client/pull/531) - `Modifiable` to our public API [#534](https://github.com/stac-utils/pystac-client/pull/534) +- `max_retries` parameter to `StacApiIO` [#532](https://github.com/stac-utils/pystac-client/pull/532) ### Changed diff --git a/docs/usage.rst b/docs/usage.rst index 92d07722..c14021dd 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -117,6 +117,114 @@ there are no ``"conformsTo"`` uris set at all. But they can be explicitly set: Note, updating ``"conformsTo"`` does not change what the server supports, it just changes PySTAC client's understanding of what the server supports. +Configuring retry behavior +-------------------------- + +By default, **pystac-client** will retry requests that fail DNS lookup or have timeouts. +If you'd like to configure this behavior, e.g. to retry on some ``50x`` responses, you can configure the StacApiIO's session: + +.. code-block:: python + + from requests.adapters import HTTPAdapter + from urllib3 import Retry + + from pystac_client import Client + from pystac_client.stac_api_io import StacApiIO + + retry = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504]) + stac_api_io = StacApiIO() + stac_api_io.session.mount("https://", HTTPAdapter(max_retries=retry)) + client = Client.open( + "https://planetarycomputer.microsoft.com/api/stac/v1", stac_io=stac_api_io + ) + +Automatically modifying results +------------------------------- + +Some systems, like the `Microsoft Planetary Computer `__, +have public STAC metadata but require some `authentication `__ +to access the actual assets. + +``pystac-client`` provides a ``modifier`` keyword that can automatically +modify the STAC objects returned by the STAC API. + +.. code-block:: python + + >>> from pystac_client import Client + >>> import planetary_computer, requests + >>> catalog = Client.open( + ... 'https://planetarycomputer.microsoft.com/api/stac/v1', + ... modifier=planetary_computer.sign_inplace, + ... ) + >>> item = next(catalog.get_collection("sentinel-2-l2a").get_all_items()) + >>> requests.head(item.assets["B02"].href).status_code + 200 + +Without the modifier, we would have received a 404 error because the asset +is in a private storage container. + +``pystac-client`` expects that the ``modifier`` callable modifies the result +object in-place and returns no result. A warning is emitted if your +``modifier`` returns a non-None result that is not the same object as the +input. + +Here's an example of creating your own modifier. +Because :py:class:`~pystac_client.Modifiable` is a union, the modifier function must handle a few different types of input objects, and care must be taken to ensure that you are modifying the input object (rather than a copy). +Simplifying this interface is a space for future improvement. + +.. code-block:: python + + import urllib.parse + + import pystac + + from pystac_client import Client, Modifiable + + + def modifier(modifiable: Modifiable) -> None: + if isinstance(modifiable, dict): + if modifiable["type"] == "FeatureCollection": + new_features = list() + for item_dict in modifiable["features"]: + modifier(item_dict) + new_features.append(item_dict) + modifiable["features"] = new_features + else: + stac_object = pystac.read_dict(modifiable) + modifier(stac_object) + modifiable.update(stac_object.to_dict()) + else: + for key, asset in modifiable.assets.items(): + url = urllib.parse.urlparse(asset.href) + if not url.query: + asset.href = urllib.parse.urlunparse(url._replace(query="foo=bar")) + modifiable.assets[key] = asset + + + client = Client.open( + "https://planetarycomputer.microsoft.com/api/stac/v1", modifier=modifier + ) + item_search = client.search(collections=["landsat-c2-l2"], max_items=1) + item = next(item_search.items()) + asset = item.assets["red"] + assert urllib.parse.urlparse(asset.href).query == "foo=bar" + + +Using custom certificates +------------------------- + +If you need to use custom certificates in your ``pystac-client`` requests, you can +customize the :class:`StacApiIO` instance before +creating your :class:`Client`. + +.. code-block:: python + + >>> from pystac_client.stac_api_io import StacApiIO + >>> from pystac_client.client import Client + >>> stac_api_io = StacApiIO() + >>> stac_api_io.session.verify = "/path/to/certfile" + >>> client = Client.open("https://planetarycomputer.microsoft.com/api/stac/v1", stac_io=stac_api_io) + CollectionClient ++++++++++++++++ @@ -307,51 +415,6 @@ descending sort and a ``+`` prefix or no prefix means an ascending sort. ] ... ) -Automatically modifying results -------------------------------- - -Some systems, like the `Microsoft Planetary Computer `__, -have public STAC metadata but require some `authentication `__ -to access the actual assets. - -``pystac-client`` provides a ``modifier`` keyword that can automatically -modify the STAC objects returned by the STAC API. - -.. code-block:: python - - >>> from pystac_client import Client - >>> import planetary_computer, requests - >>> catalog = Client.open( - ... 'https://planetarycomputer.microsoft.com/api/stac/v1', - ... modifier=planetary_computer.sign_inplace, - ... ) - >>> item = next(catalog.get_collection("sentinel-2-l2a").get_all_items()) - >>> requests.head(item.assets["B02"].href).status_code - 200 - -Without the modifier, we would have received a 404 error because the asset -is in a private storage container. - -``pystac-client`` expects that the ``modifier`` callable modifies the result -object in-place and returns no result. A warning is emitted if your -``modifier`` returns a non-None result that is not the same object as the -input. - -Using custom certificates -------------------------- - -If you need to use custom certificates in your ``pystac-client`` requests, you can -customize the :class:`StacApiIO` instance before -creating your :class:`Client`. - -.. code-block:: python - - >>> from pystac_client.stac_api_io import StacApiIO - >>> from pystac_client.client import Client - >>> stac_api_io = StacApiIO() - >>> stac_api_io.session.verify = "/path/to/certfile" - >>> client = Client.open("https://planetarycomputer.microsoft.com/api/stac/v1", stac_io=stac_api_io) - Loading data ++++++++++++ diff --git a/pystac_client/stac_api_io.py b/pystac_client/stac_api_io.py index ef4586bb..b5592434 100644 --- a/pystac_client/stac_api_io.py +++ b/pystac_client/stac_api_io.py @@ -25,6 +25,7 @@ ) from pystac.stac_io import DefaultStacIO from requests import Request, Session +from requests.adapters import HTTPAdapter from typing_extensions import TypeAlias import pystac_client @@ -49,6 +50,7 @@ def __init__( parameters: Optional[Dict[str, Any]] = None, request_modifier: Optional[Callable[[Request], Union[Request, None]]] = None, timeout: Timeout = None, + max_retries: Optional[int] = 5, ): """Initialize class for API IO @@ -69,6 +71,8 @@ def __init__( timeout: Optional float or (float, float) tuple following the semantics defined by `Requests `__. + max_retries: The number of times to retry requests. Set to ``None`` to + disable retries. Return: StacApiIO : StacApiIO instance @@ -87,6 +91,9 @@ def __init__( ) self.session = Session() + if max_retries: + self.session.mount("http://", HTTPAdapter(max_retries=max_retries)) + self.session.mount("https://", HTTPAdapter(max_retries=max_retries)) self.timeout = timeout self.update( headers=headers, parameters=parameters, request_modifier=request_modifier