Merge pull request #122 from wmo-im/add-metadata-id-to-reports

align ETS and KPI report formats, add UUID and metadata to reports
wmo-im · Aug 27, 2024 · bfd2644 · bfd2644
2 parents 5a70a09 + f118672
commit bfd2644
Show file tree

Hide file tree

Showing 4 changed files with 102 additions and 64 deletions.
diff --git a/pywcmp/ets.py b/pywcmp/ets.py
@@ -3,7 +3,7 @@
 # Authors: Tom Kralidis <[email protected]>
 #          Ján Osuský <[email protected]>
 #
-# Copyright (c) 2023 Tom Kralidis
+# Copyright (c) 2024 Tom Kralidis
 # Copyright (c) 2022 Government of Canada
 # Copyright (c) 2020 IBL Software Engineering spol. s r. o.
 #
@@ -81,7 +81,7 @@ def validate(ctx, file_or_url, logfile, verbosity,
         ctx.exit(1)
 
     click.echo(json.dumps(results, indent=4))
-    ctx.exit(results['ets-report']['summary']['FAILED'])
+    ctx.exit(results['summary']['FAILED'])
 
 
 ets.add_command(validate)
diff --git a/pywcmp/wcmp2/ets.py b/pywcmp/wcmp2/ets.py
@@ -29,6 +29,7 @@
 import json
 import logging
 from pathlib import Path
+import uuid
 
 from jsonschema.validators import Draft202012Validator
 
@@ -67,7 +68,6 @@ def __init__(self, data: dict):
 
         self.test_id = None
         self.record = data
-        self.report = []
 
         self.th = TopicHierarchy(tables=get_userdir())
 
@@ -77,8 +77,10 @@ def run_tests(self, fail_on_schema_validation=False):
         results = []
         tests = []
         ets_report = {
+            'id': str(uuid.uuid4()),
+            'report_type': 'ets',
             'summary': {},
-            'generated-by': f'pywcmp {pywcmp.__version__} (https://github.com/wmo-im/pywcmp)'  # noqa
+            'generated_by': f'pywcmp {pywcmp.__version__} (https://github.com/wmo-im/pywcmp)'  # noqa
         }
 
         for f in dir(WMOCoreMetadataProfileTestSuite2):
@@ -106,10 +108,9 @@ def run_tests(self, fail_on_schema_validation=False):
 
         ets_report['tests'] = results
         ets_report['datetime'] = get_current_datetime_rfc3339()
+        ets_report['metadata_id'] = self.record['id']
 
-        return {
-            'ets-report': ets_report
-        }
+        return ets_report
 
     def test_requirement_validation(self):
         """

diff --git a/pywcmp/wcmp2/kpi.py b/pywcmp/wcmp2/kpi.py
@@ -2,7 +2,7 @@
 #
 # Authors: Tom Kralidis <[email protected]>
 #
-# Copyright (c) 2023 Tom Kralidis
+# Copyright (c) 2024 Tom Kralidis
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@@ -28,6 +28,7 @@
 import logging
 import mimetypes
 import re
+import uuid
 
 from bs4 import BeautifulSoup
 
@@ -41,6 +42,18 @@
 ROUND = 3
 
 
+def gen_test_id(test_id: str) -> str:
+    """
+    Convenience function to print test identifier as URI
+
+    :param test_id: test suite identifier
+
+    :returns: test identifier as URI
+    """
+
+    return f'http://wis.wmo.int/spec/wcmp/2/kpi/core/{test_id}'
+
+
 class WMOCoreMetadataProfileKeyPerformanceIndicators:
     """Key Performance Indicators for WMO Core Metadata Profile"""
 
@@ -70,18 +83,19 @@ def kpi_title(self) -> tuple:
         """
         Implements KPI for Good quality title
 
-        :returns: `tuple` of KPI name, achieved score, total score,
+        :returns: `tuple` of KPI id, title, achieved score, total score,
                   and comments
         """
 
         total = 8
         score = 0
         comments = []
 
-        name = 'KPI: Good quality title'
+        id_ = gen_test_id('good_quality_title')
+        title = 'Good quality title'
         acronym_regex = r'\b([A-Z]{2,}\d*)\b'
 
-        LOGGER.info(f'Running {name}')
+        LOGGER.info(f'Running {title}')
 
         title = self.data['properties']['title']
 
@@ -146,23 +160,24 @@ def kpi_title(self) -> tuple:
         else:
             comments.append(f'Title contains spelling errors {misspelled}')
 
-        return name, total, score, comments
+        return id_, title, total, score, comments
 
     def kpi_description(self) -> tuple:
         """
         Implements KPI for Good quality description
 
-        :returns: `tuple` of KPI name, achieved score, total score,
+        :returns: `tuple` of KPI id, title, achieved score, total score,
                   and comments
         """
 
         total = 4
         score = 0
         comments = []
 
-        name = 'KPI: Good quality description'
+        id_ = gen_test_id('good_quality_description')
+        title = ': Good quality description'
 
-        LOGGER.info(f'Running {name}')
+        LOGGER.info(f'Running {title}')
 
         description = self.data['properties']['description']
 
@@ -198,13 +213,13 @@ def kpi_description(self) -> tuple:
         else:
             comments.append(f'Description contains spelling errors {misspelled}')  # noqa
 
-        return name, total, score, comments
+        return id_, title, total, score, comments
 
     def kpi_time_intervals(self) -> tuple:
         """
         Implements KPI for Time intervals
 
-        :returns: `tuple` of KPI name, achieved score, total score,
+        :returns: `tuple` of KPI id, title, achieved score, total score,
                   and comments
         """
 
@@ -214,15 +229,16 @@ def kpi_time_intervals(self) -> tuple:
         score = 0
         comments = []
 
-        name = 'KPI: Time intervals'
+        id_ = gen_test_id('time_intervals')
+        title = 'Time intervals'
 
-        LOGGER.info(f'Running {name}')
+        LOGGER.info(f'Running {title}')
 
         time_ = self.data.get('time')
         if time_ is None:
             msg = 'time is null; no KPI check'
             LOGGER.debug(msg)
-            return name, 0, 0, [msg]
+            return id_, title, 0, 0, [msg]
 
         interval = self.data['time'].get('interval')
 
@@ -258,13 +274,13 @@ def kpi_time_intervals(self) -> tuple:
             else:
                 comments.append('No temporal resolution found')
 
-        return name, total, score, comments
+        return id_, title, total, score, comments
 
     def kpi_graphic_overview(self) -> tuple:
         """
         Implements KPI for Graphic overview for metadata records
 
-        :returns: `tuple` of KPI name, achieved score, total score,
+        :returns: `tuple` of KPI id, title, achieved score, total score,
                   and comments
         """
 
@@ -282,9 +298,10 @@ def kpi_graphic_overview(self) -> tuple:
             'image/webp'
         ]
 
-        name = 'KPI: Graphic overview for metadata records'
+        id_ = gen_test_id('graphic_overview_for_metadata_records')
+        title = 'Graphic overview for metadata records'
 
-        LOGGER.info(f'Running {name}')
+        LOGGER.info(f'Running {title}')
 
         for link in self.data['links']:
             if link.get('rel') == 'preview':
@@ -308,13 +325,13 @@ def kpi_graphic_overview(self) -> tuple:
                 else:
                     comments.append(f"URL not accessible: {link['href']}")
 
-        return name, total, score, comments
+        return id_, title, total, score, comments
 
     def kpi_links_health(self) -> tuple:
         """
         Implements KPI for Links health
 
-        :returns: `tuple` of KPI name, achieved score, total score,
+        :returns: `tuple` of KPI id, title, achieved score, total score,
                   and comments
         """
 
@@ -324,7 +341,8 @@ def kpi_links_health(self) -> tuple:
         score = 0
         comments = []
 
-        name = 'KPI: Links health'
+        id_ = gen_test_id('links_health')
+        title = 'Links health'
 
         valid_link_mime_types = list(mimetypes.types_map.values())
         valid_link_mime_types.extend([
@@ -333,7 +351,7 @@ def kpi_links_health(self) -> tuple:
             'text/turtle'
         ])
 
-        LOGGER.info(f'Running {name}')
+        LOGGER.info(f'Running {title}')
 
         LOGGER.debug('Assembling all links')
 
@@ -384,13 +402,13 @@ def kpi_links_health(self) -> tuple:
                 else:
                     comments.append(f"invalid link type {link_type}")
 
-        return name, total, score, comments
+        return id_, title, total, score, comments
 
     def kpi_contacts(self) -> tuple:
         """
         Implements KPI for Contacts
 
-        :returns: `tuple` of KPI name, achieved score, total score,
+        :returns: `tuple` of KPI id, title, achieved score, total score,
                   and comments
         """
 
@@ -401,9 +419,10 @@ def kpi_contacts(self) -> tuple:
         score = 0
         comments = []
 
-        name = 'KPI: Contacts'
+        id_ = gen_test_id('contacts')
+        title = 'Contacts'
 
-        LOGGER.info(f'Running {name}')
+        LOGGER.info(f'Running {title}')
 
         for contact in self.data['properties']['contacts']:
             if 'host' in contact['roles']:
@@ -429,13 +448,13 @@ def kpi_contacts(self) -> tuple:
             else:
                 comments.append('No host contact email found')
 
-        return name, total, score, comments
+        return id_, title, total, score, comments
 
     def kpi_pids(self) -> tuple:
         """
         Implements KPI for Persistent identifiers
 
-        :returns: `tuple` of KPI name, achieved score, total score,
+        :returns: `tuple` of KPI id, title, achieved score, total score,
                   and comments
         """
 
@@ -445,9 +464,10 @@ def kpi_pids(self) -> tuple:
         score = 0
         comments = []
 
-        name = 'KPI: Persistent identifiers'
+        id_ = gen_test_id('persistent_identifiers')
+        title = 'Persistent identifiers'
 
-        LOGGER.info(f'Running {name}')
+        LOGGER.info(f'Running {title}')
 
         if 'externalIds' in self.data['properties']:
             total = 3
@@ -467,7 +487,7 @@ def kpi_pids(self) -> tuple:
                 score += 1
                 break
 
-        return name, total, score, comments
+        return id_, title, total, score, comments
 
     def evaluate(self, kpi: str = None) -> dict:
         """
@@ -498,38 +518,42 @@ def evaluate(self, kpi: str = None) -> dict:
 
         LOGGER.info(f'Evaluating KPIs: {kpis_to_run}')
 
-        results = {}
+        results = {
+            'id': str(uuid.uuid4()),
+            'report_type': 'kpi',
+            'metadata_id': self.identifier,
+            'datetime': get_current_datetime_rfc3339(),
+            'generated_by': f'pywcmp {pywcmp.__version__} (https://github.com/wmo-im/pywcmp)',  # noqa
+            'tests': []
+        }
 
         for kpi in kpis_to_run:
             LOGGER.debug(f'Running {kpi}')
             result = getattr(self, kpi)()
             LOGGER.debug(f'Raw result: {result}')
             LOGGER.debug('Calculating result')
             try:
-                percentage = round(float((result[2] / result[1]) * 100), ROUND)
+                percentage = round(float((result[3] / result[2]) * 100), ROUND)
             except ZeroDivisionError:
                 percentage = None
 
-            results[kpi] = {
-                'name': result[0],
-                'total': result[1],
-                'score': result[2],
-                'comments': result[3],
+            results['tests'].append({
+                'id': result[0],
+                'title': result[1],
+                'total': result[2],
+                'score': result[3],
+                'comments': result[4],
                 'percentage': percentage
-            }
-            LOGGER.debug(f'{kpi}: {result[1]} / {result[2]} = {percentage}')
+            })
+            LOGGER.debug(f'{kpi}: {result[2]} / {result[3]} = {percentage}')
 
         LOGGER.debug('Calculating total results')
         results['summary'] = generate_summary(results)
         # this total summary needs extra elements
-        results['summary']['identifier'] = self.identifier
         overall_grade = 'F'
         overall_grade = calculate_grade(results['summary']['percentage'])
         results['summary']['grade'] = overall_grade
 
-        results['datetime'] = get_current_datetime_rfc3339()
-        results['generated-by'] = f'pywcmp {pywcmp.__version__} (https://github.com/wmo-im/pywcmp)'  # noqa
-
         return results
 
 
@@ -542,9 +566,14 @@ def generate_summary(results: dict) -> dict:
     :returns: `dict` of summary report
     """
 
-    sum_total = sum(v['total'] for v in results.values())
-    sum_score = sum(v['score'] for v in results.values())
-    comments = {k: v['comments'] for k, v in results.items() if v['comments']}
+    sum_total = sum(v['total'] for v in results['tests'])
+    sum_score = sum(v['score'] for v in results['tests'])
+    comments = {}
+
+    for test in results['tests']:
+        if test['comments']:
+            for k, v in test.items():
+                comments[k] = v
 
     try:
         sum_percentage = round(float((sum_score / sum_total) * 100), ROUND)