Skip to content

Commit

Permalink
Fix DWD DMO again
Browse files Browse the repository at this point in the history
  • Loading branch information
gutzbenj committed Nov 2, 2023
1 parent 1074c91 commit 946ac0d
Show file tree
Hide file tree
Showing 5 changed files with 192 additions and 71 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ Changelog
Development
***********

- Fix DWD DMO again

0.65.0 (24.10.2023)
*******************

Expand Down
125 changes: 125 additions & 0 deletions tests/provider/dwd/dmo/test_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2018-2023, earthobservations developers.
# Distributed under the MIT License. See LICENSE for more info.
import datetime as dt

import polars as pl
import pytest

from wetterdienst.provider.dwd.dmo import DwdDmoRequest
from wetterdienst.provider.dwd.dmo.api import add_date_from_filename


@pytest.fixture
def df_files_january():
return pl.DataFrame(
{
"date_str": [
"310000",
"311200",
]
}
)


@pytest.fixture
def df_files_two_month():
return pl.DataFrame(
{
"date_str": [
"311200",
"010000",
"011200",
"020000",
]
}
)


@pytest.fixture
def df_files_end_of_month():
return pl.DataFrame(
{
"date_str": [
"310000",
"311200",
]
}
)


@pytest.mark.remote
def test_dwd_dmo_stations(default_settings):
# Acquire data.
stations = DwdDmoRequest(parameter="icon", dmo_type="icon", settings=default_settings)
given_df = stations.all().df
assert not given_df.is_empty()
assert given_df.select(pl.all().max()).to_dicts()[0] == {
"from_date": None,
"height": 4670.0,
"icao_id": "ZYTX",
"latitude": 79.59,
"longitude": 179.2,
"name": "ZWOENITZ",
"state": None,
"station_id": "Z949",
"to_date": None,
}
assert given_df.select(pl.all().min()).to_dicts()[0] == {
"from_date": None,
"height": -350.0,
"icao_id": "AFDU",
"latitude": -78.27,
"longitude": -176.1,
"name": "16N55W",
"state": None,
"station_id": "01001",
"to_date": None,
}
station_names_sorted = given_df.sort(pl.col("name").str.n_chars()).get_column("name").to_list()
assert station_names_sorted[:5] == ["ELM", "PAU", "SAL", "AUE", "HOF"]
assert station_names_sorted[-5:] == [
"MÜNSINGEN-APFELSTETT",
"VILLINGEN-SCHWENNING",
"WEINGARTEN BEI RAVEN",
"LONDON WEATHER CENT.",
"QUITO/MARISCAL SUCRE",
]


def test_add_date_from_filename(df_files_two_month):
df = add_date_from_filename(df_files_two_month, dt.datetime(2021, 11, 15))
assert df.get_column("date").to_list() == [
dt.datetime(2021, 10, 31, 12, 0),
dt.datetime(2021, 11, 1, 0, 0),
dt.datetime(2021, 11, 1, 12, 0),
dt.datetime(2021, 11, 2, 0, 0),
]


def test_add_date_from_filename_early_in_month(df_files_end_of_month):
df = add_date_from_filename(df_files_end_of_month, dt.datetime(2021, 11, 1, 2))
assert df.get_column("date").to_list() == [
dt.datetime(2021, 10, 31, 0, 0, 0),
dt.datetime(2021, 10, 31, 12, 0, 0),
]


def test_add_date_from_filename_early_in_year(df_files_january):
df = add_date_from_filename(df_files_january, dt.datetime(2021, 1, 1, 1, 1, 1))
assert df.get_column("date").to_list() == [
dt.datetime(2020, 12, 31, 0, 0, 0),
dt.datetime(2020, 12, 31, 12, 0, 0),
]


def test_add_date_from_filename_too_few_dates():
df = pl.DataFrame(
{
"date_str": [
"311200",
]
}
)
with pytest.raises(ValueError):
add_date_from_filename(df, dt.datetime(2021, 1, 1, 1, 1, 1))
43 changes: 0 additions & 43 deletions tests/provider/dwd/dmo/test_api_stations.py

This file was deleted.

2 changes: 1 addition & 1 deletion tests/ui/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@
"dwd",
"dmo",
f"--parameter=icon --resolution=icon "
f"--date={datetime.strftime(datetime.today() + timedelta(days=6), '%Y-%m-%d')}",
f"--date={datetime.strftime(datetime.today() + timedelta(days=2), '%Y-%m-%d')}",
"10488",
"DRESDEN",
),
Expand Down
91 changes: 64 additions & 27 deletions wetterdienst/provider/dwd/dmo/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -978,6 +978,53 @@ class DwdDmoStationGroup(Enum):
ALL_STATIONS = "all_stations"


class DwdDmoLeadTime(Enum):
SHORT = 78
LONG = 168


def add_date_from_filename(df: pl.DataFrame, current_date: dt.datetime) -> pl.DataFrame:
"""
Add date column to dataframe based on filename
:param df: Dataframe with url column
:param current_date: Current date without timezone
:return: Dataframe with date column
"""
if len(df) < 2:
raise ValueError("Dataframe must have at least 2 dates")
# get month and year from current date
year = current_date.year
month = current_date.month
# if current date is in the first 3 hours of the month, use previous month
hours_since_month_start = (current_date - current_date.replace(day=1, hour=1, minute=1, second=1)).seconds / 60 / 60
if hours_since_month_start < 3:
month = month - 1
# if month is 0, set to 12 and decrease year
if month == 0:
month = 12
year = year - 1
df = df.with_columns(
[
pl.lit(year).alias("year"),
pl.col("date_str").map_elements(lambda s: s[:2]).cast(int).alias("day"),
pl.col("date_str").map_elements(lambda s: s[2:4]).cast(int).alias("hour"),
]
)
days_difference = df.get_column("day").max() - df.get_column("day").min()
if days_difference > 20:
df = df.with_columns(pl.when(pl.col("day") > 25).then(month - 1).otherwise(month).alias("month"))
else:
df = df.with_columns(pl.lit(month).alias("month"))
return df.select(
[
pl.all().exclude(["year", "month", "day", "hour"]),
pl.struct(["year", "month", "day", "hour"])
.map_elements(lambda s: dt.datetime(s["year"], s["month"], s["day"], s["hour"]))
.alias("date"),
]
)


class DwdDmoValues(TimeseriesValues):
"""
Fetch DWD DMO data.
Expand Down Expand Up @@ -1222,30 +1269,17 @@ def get_url_for_date(self, url: str, date: Union[dt.datetime, DwdForecastDate])
"""
urls = list_remote_files_fsspec(url, self.sr.stations.settings, CacheExpiry.NO_CACHE)
df = pl.DataFrame({"url": urls})
now = dt.datetime.utcnow()
hours_since_month_start = (now - now.replace(day=1, hour=1, minute=1, second=1)).seconds / 60 / 60
if hours_since_month_start < 3:
now = now.replace(month=now.month - 1)
last_date = (
df.select(
pl.col("url")
.str.split("/")
.list.last()
.str.split("_")
.list.last()
.map_elements(lambda s: s[:-4])
.alias("date")
)
.with_columns(pl.col("date").cast(int).alias("date_int"))
.filter(pl.col("date_int").eq(pl.col("date_int").max()))
.get_column("date")
.to_list()[0]
df = df.filter(pl.col("url").str.contains(str(self.sr.stations.lead_time.value)))
df = df.with_columns(
pl.col("url")
.str.split("/")
.list.last()
.str.split("_")
.list.last()
.map_elements(lambda s: s[:-4])
.alias("date_str")
)
day, hour = last_date[:2], last_date[2:4]
last_date = f"{now.date().isoformat()[:-3]}-{day} {hour}:00"
last_date = dt.datetime.fromisoformat(last_date)
dates = [last_date - dt.timedelta(hours=i * 12) for i in range(len(df))]
df = df.with_columns(pl.Series(dates[::-1]).alias("date"))
df = add_date_from_filename(df, dt.datetime.utcnow())
if date == DwdForecastDate.LATEST:
date = df.get_column("date").max()
df = df.filter(pl.col("date").eq(date))
Expand Down Expand Up @@ -1335,7 +1369,8 @@ def __init__(
end_issue: Optional[Union[str, dt.datetime]] = None,
start_date: Optional[Union[str, dt.datetime]] = None,
end_date: Optional[Union[str, dt.datetime]] = None,
station_group: Optional[Union[str, DwdDmoStationGroup]] = None,
station_group: Optional[Union[str, DwdDmoStationGroup]] = DwdDmoStationGroup.SINGLE_STATIONS,
lead_time: Optional[Union[str, DwdDmoLeadTime]] = DwdDmoLeadTime.SHORT,
settings: Optional[Settings] = None,
) -> None:
"""
Expand All @@ -1348,9 +1383,11 @@ def __init__(
:param end_date: end date
"""
self.dmo_type = parse_enumeration_from_template(dmo_type, DwdDmoType)
self.station_group = (
parse_enumeration_from_template(station_group, DwdDmoStationGroup) or DwdDmoStationGroup.SINGLE_STATIONS
)
self.station_group = parse_enumeration_from_template(station_group, DwdDmoStationGroup)
if self.dmo_type == DwdDmoType.ICON_EU:
self.lead_time = DwdDmoLeadTime.SHORT
else:
self.lead_time = parse_enumeration_from_template(lead_time, DwdDmoLeadTime)

super().__init__(
parameter=parameter,
Expand Down

0 comments on commit 946ac0d

Please sign in to comment.