Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support --access-token parameter for CDSE #62

Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion LICENSE.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
MIT License

Copyright (c) 2018-2020 Scott Staniewicz
Copyright (c) 2024 Luc Hermitte, CS Group, support for double authentication on CDSE
Copyright (c) 2024 Luc Hermitte, CS Group, refactor authentication on CDSE

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ Options:
--force-asf Force the downloader to search ASF instead
of ESA.
--debug Set logging level to DEBUG
--cdse-access-token TEXT Copernicus Data Space Ecosystem access-
token. The access token can be generated
beforehand. See https://documentation.datasp
ace.copernicus.eu/APIs/Token.html
--cdse-user TEXT Copernicus Data Space Ecosystem username. If
not provided the program asks for it
--cdse-password TEXT Copernicus Data Space Ecosystem password. If
Expand Down
8 changes: 8 additions & 0 deletions eof/cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
CLI tool for downloading Sentinel 1 EOF files
"""

from __future__ import annotations

import logging
Expand Down Expand Up @@ -66,6 +67,11 @@
is_flag=True,
help="Set logging level to DEBUG",
)
@click.option(
"--cdse-access-token",
help="Copernicus Data Space Ecosystem access-token. "
"The access token can be generated beforehand. See https://documentation.dataspace.copernicus.eu/APIs/Token.html",
)
@click.option(
"--cdse-user",
help="Copernicus Data Space Ecosystem username. "
Expand Down Expand Up @@ -120,6 +126,7 @@ def cli(
debug: bool,
asf_user: str = "",
asf_password: str = "",
cdse_access_token: Optional[str] = None,
cdse_user: str = "",
cdse_password: str = "",
cdse_2fa_token: str = "",
Expand Down Expand Up @@ -154,6 +161,7 @@ def cli(
force_asf=force_asf,
asf_user=asf_user,
asf_password=asf_password,
cdse_access_token=cdse_access_token,
cdse_user=cdse_user,
cdse_password=cdse_password,
cdse_2fa_token=cdse_2fa_token,
Expand Down
100 changes: 47 additions & 53 deletions eof/dataspace_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,32 +32,39 @@ class DataspaceClient:
T1 = timedelta(seconds=60)

def __init__(
self,
username: str = "",
password: str = "",
token_2fa: str = "",
netrc_file: Optional[Filename] = None,
self,
access_token: Optional[str] = None,
username: str = "",
password: str = "",
token_2fa: str = "",
netrc_file: Optional[Filename] = None,
):
if not (username and password):
logger.debug("Get credentials form netrc")
try:
username, password = get_netrc_credentials(DATASPACE_HOST, netrc_file)
except FileNotFoundError:
logger.warning("No netrc file found.")
except ValueError as e:
if DATASPACE_HOST not in e.args[0]:
raise e
logger.warning(
f"No CDSE credentials found in netrc file {netrc_file!r}. Please create one using {SIGNUP_URL}"
)

self._username = username
self._password = password
self._token_2fa = token_2fa
self._netrc_file = netrc_file

self._access_token = access_token
if not access_token:
if not (username and password):
logger.debug(f"Get credentials form netrc ({netrc_file!r})")
try:
username, password = get_netrc_credentials(DATASPACE_HOST, netrc_file)
self._access_token = get_access_token(username, password, token_2fa)
except FileNotFoundError:
logger.warning("No netrc file found.")
except ValueError as e:
if DATASPACE_HOST not in e.args[0]:
raise e
logger.warning(
f"No CDSE credentials found in netrc file {netrc_file!r}. Please create one using {SIGNUP_URL}"
)
except Exception as e:
logger.warning(f"Error: {str(e)}")

# Obtain an access token the download request from the provided credentials

def __bool__(self):
"""Tells whether the object has been correctly initialized"""
return bool(self._access_token)

@staticmethod
def query_orbit(
self,
t0: datetime,
t1: datetime,
satellite_id: str,
Expand All @@ -75,8 +82,8 @@ def query_orbit(
# range
return query_orbit_file_service(query)

@staticmethod
def query_orbit_for_product(
self,
product,
orbit_type: str = "precise",
t0_margin: timedelta = T0,
Expand All @@ -85,16 +92,16 @@ def query_orbit_for_product(
if isinstance(product, str):
product = S1Product(product)

return self.query_orbit_by_dt(
return DataspaceClient.query_orbit_by_dt(
[product.start_time],
[product.mission],
orbit_type=orbit_type,
t0_margin=t0_margin,
t1_margin=t1_margin,
)

@staticmethod
def query_orbit_by_dt(
self,
orbit_dts,
missions,
orbit_type: str = "precise",
Expand Down Expand Up @@ -126,7 +133,7 @@ def query_orbit_by_dt(
for dt, mission in zip(orbit_dts, missions):
# Only check for precise orbits if that is what we want
if orbit_type == "precise":
products = self.query_orbit(
products = DataspaceClient.query_orbit(
dt - t0_margin,
dt + t1_margin,
# dt - timedelta(seconds=T_ORBIT + 60),
Expand All @@ -148,7 +155,7 @@ def query_orbit_by_dt(
all_results.append(result)
else:
# try with RESORB
products = self.query_orbit(
products = DataspaceClient.query_orbit(
dt - timedelta(seconds=T_ORBIT + 60),
dt + timedelta(seconds=60),
mission,
Expand Down Expand Up @@ -177,17 +184,13 @@ def download_all(
self,
query_results: list[dict],
output_directory: Filename,
netrc_file : Optional[Filename] = None,
max_workers: int = 3,
):
"""Download all the specified orbit products."""
return download_all(
query_results,
output_directory=output_directory,
username=self._username,
password=self._password,
token_2fa=self._token_2fa,
netrc_file=netrc_file,
access_token=self._access_token,
max_workers=max_workers,
)

Expand Down Expand Up @@ -224,6 +227,7 @@ def _construct_orbit_file_query(
query_template = (
"startswith(Name,'{mission_id}') and contains(Name,'{orbit_type}') "
"and ContentDate/Start lt '{start_time}' and ContentDate/End gt '{stop_time}'"
# " and productType eq {orbit_type}"
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this redundant because the current query template has contains(Name,'{orbit_type}')? or was this a mistake to comment it out?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC, this was something I wanted to test but I didn't find/take the time to do it. So far you were using contains(Name,'{orbit_type}') and while reading the doc, I wondered whether productType eq {orbit_type} would have been enough.

I shouldn't have commited it, but forgot about it.

)

# Format the query template using the values we were provided
Expand Down Expand Up @@ -287,18 +291,12 @@ def query_orbit_file_service(query: str) -> list[dict]:
return query_results


def get_access_token(username, password, token_2fa, netrc_file) -> Optional[str]:
def get_access_token(username, password, token_2fa) -> str:
"""Get an access token for the Copernicus Data Space Ecosystem (CDSE) API.

Code from https://documentation.dataspace.copernicus.eu/APIs/Token.html
"""
if not (username and password):
logger.debug("Get credentials form netrc")
try:
username, password = get_netrc_credentials(DATASPACE_HOST, netrc_file)
except FileNotFoundError:
logger.warning("No netrc file found.")
return None
assert username and password, "Username and password values are expected!"
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
assert username and password, "Username and password values are expected!"
if not (username and password):
raise ValueError("Username and password values are expected!")

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've considered username and password to be set as part of the function contract: if this precondition is not valid, then there is a programming error, and it's up to us to prevent this situation from happening.

Given username and password being set are a postcondition of get_netrc_credentials() (the function cannot return none values), which is called just before, everything is fine so far. Perfect DbC in action -- well... almost if we ignore I forgot to document the contracts in the docstring of each function, and that I should have tested for them to not be empty as well. ^^'

I usually dislike wide contracts that check every single things that are not meant to happen and throw logic errors if they do. That has a tendency to complexify source code: we add a lot of dead code that can/should never execute.

Here is the fixed code and docstring. If you really prefer wide contract instead of narrow ones. I'll throw then -- an AssertionError would still make more sense IMO.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My mistake. The 3 scenarios weren't correctly handled. This should be better now.


data = {
"client_id": "cdse-public",
Expand All @@ -313,18 +311,17 @@ def get_access_token(username, password, token_2fa, netrc_file) -> Optional[str]
r = requests.post(AUTH_URL, data=data)
r.raise_for_status()
except Exception as err:
raise RuntimeError(f"Access token creation failed. Reason: {str(err)}")
raise RuntimeError(f"CDSE access token creation failed. Reason: {str(err)}")

# Parse the access token from the response
try:
access_token = r.json()["access_token"]
return access_token
except KeyError:
raise RuntimeError(
'Failed to parsed expected field "access_token" from authentication response.'
'Failed to parse expected field "access_token" from CDSE authentication response.'
)

return access_token


def download_orbit_file(
request_url, output_directory, orbit_file_name, access_token
Expand Down Expand Up @@ -382,17 +379,14 @@ def download_orbit_file(
if chunk:
outfile.write(chunk)

logger.info(f"Orbit file downloaded to {output_orbit_file_path}")
logger.info(f"Orbit file downloaded to {output_orbit_file_path!r}")
return output_orbit_file_path


def download_all(
query_results: list[dict],
output_directory: Filename,
username: str = "",
password: str = "",
token_2fa: str = "",
netrc_file: Optional[Filename] = None,
access_token: Optional[str],
max_workers: int = 3,
) -> list[Path]:
"""Download all the specified orbit products.
Expand All @@ -414,14 +408,14 @@ def download_all(
Note that >4 connections will result in a HTTP 429 Error

"""
if not access_token:
raise RuntimeError("Invalid CDSE access token. Aborting.")
downloaded_paths: list[Path] = []
# Select an appropriate orbit file from the list returned from the query
# orbit_file_name, orbit_file_request_id = select_orbit_file(
# query_results, start_time, stop_time
# )
# Obtain an access token the download request from the provided credentials

access_token = get_access_token(username, password, token_2fa, netrc_file)
output_names = []
download_urls = []
for query_result in query_results:
Expand Down
14 changes: 12 additions & 2 deletions eof/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

See parsers for Sentinel file naming description
"""

from __future__ import annotations

import glob
Expand Down Expand Up @@ -51,6 +52,7 @@ def download_eofs(
force_asf: bool = False,
asf_user: str = "",
asf_password: str = "",
cdse_access_token: Optional[str] = None,
cdse_user: str = "",
cdse_password: str = "",
cdse_2fa_token: str = "",
Expand Down Expand Up @@ -93,8 +95,14 @@ def download_eofs(

# First, check that Scihub isn't having issues
if not force_asf:
client = DataspaceClient(username=cdse_user, password=cdse_password, token_2fa=cdse_2fa_token, netrc_file=netrc_file)
if client._username and client._password:
client = DataspaceClient(
access_token=cdse_access_token,
username=cdse_user,
password=cdse_password,
token_2fa=cdse_2fa_token,
netrc_file=netrc_file,
)
if client:
# try to search on scihub
if sentinel_file:
query = client.query_orbit_for_product(
Expand Down Expand Up @@ -212,6 +220,7 @@ def main(
force_asf: bool = False,
asf_user: str = "",
asf_password: str = "",
cdse_access_token: Optional[str] = None,
cdse_user: str = "",
cdse_password: str = "",
cdse_2fa_token: str = "",
Expand Down Expand Up @@ -258,6 +267,7 @@ def main(
force_asf=force_asf,
asf_user=asf_user,
asf_password=asf_password,
cdse_access_token=cdse_access_token,
cdse_user=cdse_user,
cdse_password=cdse_password,
cdse_2fa_token=cdse_2fa_token,
Expand Down
Loading