This package is useful for those who want to extract large volumes of data from DataStream's Public API.
See Usage to learn how to use this package.
Note: DataStream's Custom Download tool is another option that allows users to download csv data from across datasets in a particular DataStream hub using basic filters. This tool has fewer filtering options than the API, but works well for basic searches. You can find it via 'Explore Data' in the header menu from any DataStream regional hub.
- Python 3.9+
pip install git+https://github.com/datastreamapp/datastream-py
Add the following to your requirements.txt
file:
datastream-py @ git+https://github.com/datastreamapp/datastream-py@main
Then, run pip install -r requirements.txt
Available methods:
set_api_key
metadata
locations
observations
records
See API documentation for query string values and structure.
Returns a generator object that is iterable.
from datastream_py import set_api_key, locations
set_api_key('xxxxxxxxxx')
results = locations({
'$select': 'Id,DOI,Name,Latitude,Longitude',
'$filter': "DOI eq '10.25976/xxxx-xx00'",
'$top': 10000
})
for location in results:
print(location)
from datastream_py import set_api_key, locations
set_api_key('xxxxxxxxxx')
results = locations({
'$select': 'Id,DOI,Name,Latitude,Longitude',
'$filter': "DOI in ('10.25976/xxxx-xx00', '10.25976/xxxx-xx11', '10.25976/xxxx-xx22')",
'$top': 10000
})
for location in results:
print(location)
Returns a generator object that is iterable.
from datastream_py import set_api_key, observations
set_api_key('xxxxxxxxxx')
results = observations({
'$select': 'DOI,ActivityType,ActivityMediaName,ActivityStartDate,ActivityStartTime,SampleCollectionEquipmentName,CharacteristicName,MethodSpeciation,ResultSampleFraction,ResultValue,ResultUnit,ResultValueType',
'$filter': "DOI in ('10.25976/xxxx-xx00', '10.25976/xxxx-xx11', '10.25976/xxxx-xx22') and CharacteristicName in ('Temperature, water', 'pH')",
'$top': 10000
})
for observation in results:
print(observation)
Returns a generator object that is iterable.
from datastream_py import set_api_key, records
set_api_key('xxxxxxxxxx')
results = records({
'$select': 'DOI,ActivityType,ActivityMediaName,ActivityStartDate,ActivityStartTime,SampleCollectionEquipmentName,CharacteristicName,MethodSpeciation,ResultSampleFraction,ResultValue,ResultUnit,ResultValueType',
'$filter': "DOI eq '10.25976/xxxx-xx00'",
'$top': 10000
})
for record in results:
print(record)
Returns a generator object that is iterable.
from datastream_py import set_api_key, metadata
set_api_key('xxxxxxxxxx')
results = list(metadata({
'$select': 'DOI,Version,DatasetName',
'$filter': "DOI eq '10.25976/xxxx-xx00'"
}))
print(results)
Pass $count
as true
to get the count of results.
See URL parameters for more details.
from datastream_py import set_api_key, observations
set_api_key('xxxxxxxxxx')
count = observations({
'$filter': "DOI eq '10.25976/xxxx-xx00'",
'$count': 'true'
})
print(count)
from requests import exceptions
from datastream_py import set_api_key, observations
set_api_key('xxxxxxxxxx')
try:
count = observations({
'$filter': "DOI eq '10.25976/xxxx-xx00'",
'$count': 'true'
})
print(count)
except exceptions.HTTPError as e: # From requests package
print(e)
if e.response.status_code == 400:
print(e.response.json())