Skip to content

Commit

Permalink
Add data.europa.eu support (#87)
Browse files Browse the repository at this point in the history
  • Loading branch information
micafer authored Sep 29, 2024
1 parent 2d746da commit 8c0b048
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 1 deletion.
2 changes: 1 addition & 1 deletion datahugger/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from urllib.parse import urlparse

import requests
from jsonpath_ng import parse
from jsonpath_ng.ext import parse
from scitree import scitree
from tqdm import tqdm

Expand Down
2 changes: 2 additions & 0 deletions datahugger/config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from datahugger.services import ArXivDataset
from datahugger.services import DataDryadDataset
from datahugger.services import DataEuropaDataset
from datahugger.services import DataOneDataset
from datahugger.services import DataverseDataset
from datahugger.services import DjehutyDataset
Expand Down Expand Up @@ -117,6 +118,7 @@
"trolling.uit.no": DataverseDataset,
"www.sodha.be": DataverseDataset,
"www.uni-hildesheim.de": DataverseDataset,
"data.europa.eu": DataEuropaDataset,
}

# regexp lookup
Expand Down
17 changes: 17 additions & 0 deletions datahugger/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,23 @@ def _get_attr_hash_type(self, record):
return self._get_attr_attr(record, self.ATTR_HASH_JSONPATH).split(":")[0]


class DataEuropaDataset(DatasetDownloader):
"""Downloader for European data repository."""

REGEXP_ID = r"data\.europa\.eu\/data\/datasets\/(?P<record_id>.+)"

# the base entry point of the REST API
API_URL = "https://data.europa.eu/api/hub/repo/"

API_URL_META = "{api_url}datasets/{record_id}"
META_FILES_JSONPATH = '$.@graph[?(@.@type == "dcat:Distribution")]'

# paths to file attributes
ATTR_FILE_LINK_JSONPATH = "'dcat:accessURL'.@id"
ATTR_NAME_JSONPATH = "'dct:title'"
ATTR_SIZE_JSONPATH = "'dcat:byteSize'.@value"


class SeaNoeDataset(DatasetDownloader):
"""Downloader for SeaNoe publication."""

Expand Down
4 changes: 4 additions & 0 deletions tests/test_repositories.toml
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@ files = "AA_age.tab"
location = "https://github.com/j535d165/cbsodata"
files = "cbsodata-main/README.md"

[[dataeuropa]]
location = "https://data.europa.eu/data/datasets/65e092e4009f18f050b14216"
files = "consolidation-wattzhub-schema-irve-dynamic-20240918-033000.csv"

[[seanoe]]
location = "https://doi.org/10.17882/101042"
files = "111609.xlsx"

0 comments on commit 8c0b048

Please sign in to comment.