From 3353f330748602003e6d6773bbc5167919ac3473 Mon Sep 17 00:00:00 2001 From: lambert-p Date: Tue, 10 Dec 2024 12:00:29 +0000 Subject: [PATCH 1/2] Remove long name if standard name present (#2059) * Remove long name if standard name present * Remove long_name if standard_name present * Remove long_name if standard_name is present * Remove long_name if standard_name present * Replace long_name if standard_name present * Correct formatting and Contribution list * Added Name to mail map and rerun ruff * edit .mailmap * assert message edited --- .mailmap | 1 + CONTRIBUTING.md | 1 + improver/standardise.py | 10 ++++++++++ improver_tests/standardise/test_StandardiseMetadata.py | 8 ++++++++ 4 files changed, 20 insertions(+) diff --git a/.mailmap b/.mailmap index 4a4ca333f4..ca636e36af 100644 --- a/.mailmap +++ b/.mailmap @@ -29,6 +29,7 @@ Meabh NicGuidhir <43375279+neilCrosswaite@users.noreply.github.com> Paul Abernethy Peter Jordan <52462411+mo-peterjordan@users.noreply.github.com> +Phoebe Lambert Sam Griffiths <122271903+SamGriffithsMO@users.noreply.github.com> Shafiat Dewan <87321907+ShafiatDewan@users.noreply.github.com> <87321907+ShafiatDewan@users.noreply.github.com> Shubhendra Singh Chauhan diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8dd2566698..9e8f748d1d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -59,6 +59,7 @@ below: - Caroline Jones (Met Office, UK) - Peter Jordan (Met Office, UK) - Bruno P. Kinoshita (NIWA, NZ) + - Phoebe Lambert (Met Office, UK) - Lucy Liu (Bureau of Meteorology, Australia) - Daniel Mentiplay (Bureau of Meteorology, Australia) - Stephen Moseley (Met Office, UK) diff --git a/improver/standardise.py b/improver/standardise.py index d1a655a964..4937fea071 100644 --- a/improver/standardise.py +++ b/improver/standardise.py @@ -236,6 +236,15 @@ def _discard_redundant_cell_methods(cube: Cube) -> None: cube.cell_methods = updated_cms + @staticmethod + def _remove_long_name_if_standard_name(cube: Cube) -> None: + """ + Remove the long_name attribute from cubes if the cube also has a standard_name defined + """ + + if cube.standard_name and cube.long_name: + cube.long_name = None + def process(self, cube: Cube) -> Cube: """ Perform compulsory and user-configurable metadata adjustments. The @@ -269,6 +278,7 @@ def process(self, cube: Cube) -> Cube: if self._attributes_dict: amend_attributes(cube, self._attributes_dict) self._discard_redundant_cell_methods(cube) + self._remove_long_name_if_standard_name(cube) # this must be done after unit conversion as if the input is an integer # field, unit conversion outputs the new data as float64 diff --git a/improver_tests/standardise/test_StandardiseMetadata.py b/improver_tests/standardise/test_StandardiseMetadata.py index a54f45113b..971b1f33b3 100644 --- a/improver_tests/standardise/test_StandardiseMetadata.py +++ b/improver_tests/standardise/test_StandardiseMetadata.py @@ -292,6 +292,14 @@ def test_air_temperature_status_flag_coord_without_realization(self): self.assertArrayEqual(result.data, target.data) self.assertEqual(result.coords(), target.coords()) + def test_long_name_removed(self): + cube = self.cube.copy() + cube.long_name = "kittens" + result = StandardiseMetadata().process(cube) + assert ( + result.long_name is None + ), "long_name removal expected, but long_name is not None" + if __name__ == "__main__": unittest.main() From 3c02114865e328eb60be7a556a4eb623a733a72d Mon Sep 17 00:00:00 2001 From: mspelman07 <99179165+mspelman07@users.noreply.github.com> Date: Tue, 10 Dec 2024 15:44:31 +0000 Subject: [PATCH 2/2] Update copy_attribute plugin to include copying history attribute (#2055) * Update copy_attribute plugin to include copying history attribute * remove print statement * fixes following rebase and fixing codecov missing lines * Add in review suggestions * formatting --- improver/utilities/copy_metadata.py | 106 ++++++++++--- .../copy_metadata/test_CopyMetadata.py | 140 ++++++++++++++---- 2 files changed, 199 insertions(+), 47 deletions(-) diff --git a/improver/utilities/copy_metadata.py b/improver/utilities/copy_metadata.py index 3cb120cb7a..90248e2c08 100644 --- a/improver/utilities/copy_metadata.py +++ b/improver/utilities/copy_metadata.py @@ -4,6 +4,7 @@ # See LICENSE in the root of the repository for full licensing details. from typing import List, Union +from dateutil import parser as dparser from iris.cube import Cube, CubeList from improver import BasePlugin @@ -31,19 +32,89 @@ def __init__(self, attributes: List = [], aux_coord: List = []): self.attributes = attributes self.aux_coord = aux_coord - def process(self, *cubes: Union[Cube, CubeList]) -> Union[Cube, CubeList]: + @staticmethod + def get_most_recent_history(datelist: list) -> list: + """ + Gets the most recent history attribute from the list of provided dates. + + Args: + datelist: + A list of dates to find the most recent calue from. + + Returns: + The most recent history attribute. + """ + prev_time = None + + for date in datelist: + new_time = dparser.parse(date, fuzzy=True) + if not prev_time: + prev_time = new_time + str_time = date + elif new_time > prev_time: + prev_time = new_time + str_time = date + + return str_time + + def find_common_attributes(self, cubes: CubeList, attributes: List) -> dict: + """ + Find the common attribute values between the cubes. If the attribute is history, the most recent + value will be returned. + + Args: + cubes: + A list of template cubes to extract common attributes from. + attributes: + A list of attributes to be copied. + Returns: + A dictionary of common attributes. + Raises: + ValueError: If the attribute is not found in any of the template cubes + ValueError: If the attribute has different values in the provided template cubes. + """ + common_attributes = {} + for attribute in attributes: + attribute_value = [ + cube.attributes.get(attribute) + for cube in cubes + if cube.attributes.get(attribute) is not None + ] + if attribute == "history": + # We expect the history attribute to differ between cubes, so we will only keep the most recent one + common_attributes[attribute] = self.get_most_recent_history( + attribute_value + ) + elif len(attribute_value) == 0: + raise ValueError( + f"Attribute {attribute} not found in any of the template cubes" + ) + elif any(attr != attribute_value[0] for attr in attribute_value): + raise ValueError( + f"Attribute {attribute} has different values in the provided template cubes" + ) + else: + common_attributes[attribute] = attribute_value[0] + + return common_attributes + + def process(self, *cubes: Union[Cube, CubeList]) -> Cube: """ Copy attribute or auxilary coordinate values from template_cube to cube, - overwriting any existing values. + overwriting any existing values. If the history attribute is present in + the list of requested attributes, the most recent value will be used. If an + auxilary coordinate needs to be copied then all template cubes must have the + auxilary coordinate present. Operation is performed in-place on provided inputs. Args: cubes: - Source cube(s) to be updated. Final cube provided represents the template_cube. + List of cubes. First cube provided represents the cube to be updated. All + other cubes are treated as template cubes. Returns: - Updated cube(s). + A cube with attributes copied from the template cubes """ cubes_proc = as_cubelist(*cubes) @@ -51,18 +122,17 @@ def process(self, *cubes: Union[Cube, CubeList]) -> Union[Cube, CubeList]: raise RuntimeError( f"At least two cubes are required for this operation, got {len(cubes_proc)}" ) - template_cube = cubes_proc.pop() - - for cube in cubes_proc: - new_attributes = {k: template_cube.attributes[k] for k in self.attributes} - amend_attributes(cube, new_attributes) - for coord in self.aux_coord: - # If coordinate is already present in the cube, remove it - if cube.coords(coord): - cube.remove_coord(coord) - cube.add_aux_coord( - template_cube.coord(coord), - data_dims=template_cube.coord_dims(coord=coord), - ) + cube = cubes_proc.pop(0) + template_cubes = cubes_proc + new_attributes = self.find_common_attributes(template_cubes, self.attributes) + amend_attributes(cube, new_attributes) - return cubes_proc if len(cubes_proc) > 1 else cubes_proc[0] + for coord in self.aux_coord: + # If coordinate is already present in the cube, remove it + if cube.coords(coord): + cube.remove_coord(coord) + cube.add_aux_coord( + template_cubes[0].coord(coord), + data_dims=template_cubes[0].coord_dims(coord=coord), + ) + return cube diff --git a/improver_tests/utilities/copy_metadata/test_CopyMetadata.py b/improver_tests/utilities/copy_metadata/test_CopyMetadata.py index e9156c0f8d..21170ba610 100644 --- a/improver_tests/utilities/copy_metadata/test_CopyMetadata.py +++ b/improver_tests/utilities/copy_metadata/test_CopyMetadata.py @@ -6,7 +6,7 @@ import pytest from iris.coords import AuxCoord -from iris.cube import Cube, CubeList +from iris.cube import Cube from improver.utilities.copy_metadata import CopyMetadata @@ -17,45 +17,92 @@ class HaltExecution(Exception): @patch("improver.utilities.copy_metadata.as_cubelist") def test_as_cubelist_called(mock_as_cubelist): + """Test that the as_cubelist function is called.""" mock_as_cubelist.side_effect = HaltExecution try: CopyMetadata(["attribA", "attribB"])( - sentinel.cube0, sentinel.cube1, sentinel.template_cube + sentinel.cube0, sentinel.template_cube1, sentinel.template_cube2 ) except HaltExecution: pass mock_as_cubelist.assert_called_once_with( - sentinel.cube0, sentinel.cube1, sentinel.template_cube + sentinel.cube0, sentinel.template_cube1, sentinel.template_cube2 ) -def test_copy_attributes_multi_input(): +@pytest.mark.parametrize("history", [False, True]) +def test_copy_attributes_multi_input(history): """ - Test the copy_attributes function for multiple input cubes. + Test the copy_attributes function for multiple input template cubes. Demonstrates copying attributes from the template cube to the input cubes and also demonstrates the attributes on the templates cube that aren't specified in the attributes list are indeed ignored. + Also demonstrates that the most recent history attribute is copied correctly if + present on multiple template cubes. + Note how we are verifying the object IDs, since CubeAttributes is an in-place operation. """ attributes = ["attribA", "attribB"] cube0 = Cube([0], attributes={"attribA": "valueA", "attribB": "valueB"}) - cube1 = Cube([0], attributes={"attribA": "valueAA", "attribB": "valueBB"}) template_cube = Cube( - [0], attributes={"attribA": "tempA", "attribB": "tempB", "attribC": "tempC"} + [0], + attributes={ + "attribA": "tempA", + "attribB": "tempB", + "attribC": "tempC", + "history": "2024-11-25T00:00:00Z", + }, + ) + template_cube_2 = Cube( + [0], + attributes={ + "attribA": "tempA", + "attribC": "tempC", + "history": "2024-11-25T01:43:15Z", + }, + ) + if history: + attributes.append("history") + + plugin = CopyMetadata(attributes) + result = plugin.process(cube0, template_cube, template_cube_2) + assert isinstance(result, Cube) + assert result.attributes["attribA"] == "tempA" + assert result.attributes["attribB"] == "tempB" + assert "attribC" not in result.attributes + assert result == cube0 # Checks cube has been altered in-place + if history: + assert result.attributes["history"] == "2024-11-25T01:43:15Z" + else: + assert "history" not in result.attributes + + +def test_copy_attributes_one_history_attribute(): + """Test that the history attribute is copied correctly if only one template cube has a history attribute.""" + attributes = ["attribA", "attribB", "history"] + cube0 = Cube([0], attributes={"attribA": "valueA", "attribB": "valueB"}) + template_cube = Cube( + [0], + attributes={ + "attribA": "tempA", + "attribB": "tempB", + "attribC": "tempC", + "history": "2024-11-25T00:00:00Z", + }, ) + template_cube_2 = Cube([0], attributes={"attribA": "tempA", "attribC": "tempC"}) plugin = CopyMetadata(attributes) - result = plugin.process(cube0, cube1, template_cube) - assert type(result) is CubeList - for res in result: - assert res.attributes["attribA"] == "tempA" - assert res.attributes["attribB"] == "tempB" - assert "attribC" not in res.attributes - assert id(result[0]) == id(cube0) - assert id(result[1]) == id(cube1) + result = plugin.process(cube0, template_cube_2, template_cube) + assert isinstance(result, Cube) + assert result.attributes["attribA"] == "tempA" + assert result.attributes["attribB"] == "tempB" + assert "attribC" not in result.attributes + assert result == cube0 # Checks cube has been altered in-place + assert result.attributes["history"] == "2024-11-25T00:00:00Z" def test_copy_attributes_single_input(): @@ -72,12 +119,12 @@ def test_copy_attributes_single_input(): plugin = CopyMetadata(attributes) result = plugin.process(cube0, template_cube) - assert type(result) is Cube + assert isinstance(result, Cube) assert result.attributes["attribA"] == "tempA" assert result.attributes["attribB"] == "tempB" assert result.attributes["attribD"] == "valueD" assert "attribC" not in result.attributes - assert id(result) == id(cube0) + assert result == cube0 # Checks cube has been altered in-place @pytest.mark.parametrize("cubelist", [True, False]) @@ -101,21 +148,56 @@ def test_auxiliary_coord_modification(cubelist): cube = Cube(data, aux_coords_and_dims=[(dummy_aux_coord_0, 0)]) # Create the cube with the auxiliary coordinates - template_cube = Cube( + template_cubes = Cube( data, aux_coords_and_dims=[(dummy_aux_coord_0_temp, 0), (dummy_aux_coord_1_temp, 0)], ) - cubes = cube if cubelist: - cubes = [cube, cube] - + template_cubes = [template_cubes, template_cubes] plugin = CopyMetadata(aux_coord=auxiliary_coord) - result = plugin.process(cubes, template_cube) - if cubelist: - for res in result: - assert res.coord("dummy_0 status_flag") == dummy_aux_coord_0_temp - assert res.coord("dummy_1 status_flag") == dummy_aux_coord_1_temp - else: - assert result.coord("dummy_0 status_flag") == dummy_aux_coord_0_temp - assert result.coord("dummy_1 status_flag") == dummy_aux_coord_1_temp + result = plugin.process(cube, template_cubes) + assert result.coord("dummy_0 status_flag") == dummy_aux_coord_0_temp + assert result.coord("dummy_1 status_flag") == dummy_aux_coord_1_temp + + +def test_copy_attributes_multi_input_mismatching_attributes(): + """Test that an error is raised if the template cubes have mismatching attribute values.""" + attributes = ["attribA", "attribB"] + cube0 = Cube([0], attributes={"attribA": "valueA", "attribB": "valueB"}) + template_cube = Cube( + [0], attributes={"attribA": "tempA", "attribB": "tempB", "attribC": "tempC"} + ) + template_cube_2 = Cube([0], attributes={"attribA": "temp2A", "attribC": "tempC"}) + + plugin = CopyMetadata(attributes) + with pytest.raises( + ValueError, + match="Attribute attribA has different values in the provided template cubes", + ): + plugin.process(cube0, template_cube_2, template_cube) + + +def test_copy_attributes_multi_input_missing_attributes(): + """Test that an error is raised if a requested attribute is not present on any of the template cubes.""" + attributes = ["attribA", "attribB"] + cube0 = Cube([0], attributes={"attribA": "valueA", "attribB": "valueB"}) + template_cube = Cube([0], attributes={"attribB": "tempB", "attribC": "tempC"}) + template_cube_2 = Cube([0], attributes={"attribC": "tempC"}) + plugin = CopyMetadata(attributes) + with pytest.raises( + ValueError, match="Attribute attribA not found in any of the template cubes" + ): + plugin.process(cube0, template_cube_2, template_cube) + + +def test_copy_attributes_missing_inputs(): + """Test that an error is raised if the number of input cubes is less than 2.""" + attributes = ["attribA", "attribB"] + cube0 = Cube([0]) + + plugin = CopyMetadata(attributes) + with pytest.raises( + RuntimeError, match="At least two cubes are required for this operation, got 1" + ): + plugin.process(cube0)