Merge pull request #440 from ecmwf-ifs/nabr-fix-string-line-breaks

JoinableStringList: Do not break lines within quoted strings
ecmwf-ifs · Nov 19, 2024 · d2df088 · d2df088
2 parents 00d4290 + 6414f60
commit d2df088
Show file tree

Hide file tree

Showing 4 changed files with 67 additions and 23 deletions.
diff --git a/cmake/loki_transform_helpers.cmake b/cmake/loki_transform_helpers.cmake
@@ -132,7 +132,7 @@ macro( _loki_transform_env_setup )
     set( _LOKI_TRANSFORM_ENV )
     set( _LOKI_TRANSFORM_PATH )
 
-    if( TARGET clawfc AND ${_PAR_FRONTEND} STREQUAL "omni" )
+    if( TARGET clawfc AND "${_PAR_FRONTEND}" STREQUAL "omni" )
         # Ugly hack but I don't have a better solution: We need to add F_FRONT
         # (which is installed in the same directory as clawfc) to the PATH, if
         # OMNI is used as a frontend. Hence we have to update the environment in the below
@@ -142,7 +142,7 @@ macro( _loki_transform_env_setup )
         list( APPEND _LOKI_TRANSFORM_PATH ${_CLAWFC_LOCATION} )
     endif()
 
-    if( _PAR_OUTPATH AND (${_PAR_FRONTEND} STREQUAL "omni" OR ${_PAR_FRONTEND} STREQUAL "ofp") )
+    if( _PAR_OUTPATH AND ("${_PAR_FRONTEND}" STREQUAL "omni" OR "${_PAR_FRONTEND}" STREQUAL "ofp") )
         # With pre-processing, we may end up having a race condition on the preprocessed
         # source files in parallel builds. Ensuring we use the outpath of the call to Loki
         # should ensure in most cases that parallel builds write to different directories

diff --git a/loki/backend/tests/test_fgen.py b/loki/backend/tests/test_fgen.py
@@ -90,6 +90,32 @@ def test_fgen_literal_list_linebreak(frontend, tmp_path):
     assert all(len(line) < 132 for line in body_lines)
 
 
+@pytest.mark.parametrize('frontend', available_frontends())
+def test_character_list_linebreak(frontend, tmp_path):
+    fcode = """
+module some_mod
+  implicit none
+  character(len=*), parameter :: IceModelName(0:5) = (/ 'Monochromatic         ', &
+       &                                                'Fu-IFS                ', &
+       &                                                'Baran-EXPERIMENTAL    ', &
+       &                                                'Baran2016             ', &
+       &                                                'Baran2017-EXPERIMENTAL', &
+       &                                                'Yi                    ' /)
+end module some_mod
+    """
+    module = Module.from_source(fcode, frontend=frontend, xmods=[tmp_path])
+    generated_fcode = module.to_fortran()
+    for ice_model_name in (
+        "'Monochromatic         '",
+        "'Fu-IFS                '",
+        "'Baran-EXPERIMENTAL    '",
+        "'Baran2016             '",
+        "'Baran2017-EXPERIMENTAL'",
+        "'Yi                    '"
+    ):
+        assert ice_model_name in generated_fcode
+
+
 @pytest.mark.parametrize('frontend', available_frontends())
 def test_fgen_data_stmt(frontend):
     """

diff --git a/loki/tools/strings.py b/loki/tools/strings.py
@@ -18,6 +18,7 @@ def truncate_string(string, length=16, continuation='...'):
     """
     Truncates a string to have a maximum given number of characters and indicates the
     truncation by continuation characters '...'.
+
     This is used, for example, in the representation strings of IR nodes.
     """
     if len(string) > length:
@@ -27,26 +28,34 @@ def truncate_string(string, length=16, continuation='...'):
 
 class JoinableStringList:
     """
-    Helper class that takes a list of items and joins them into a long string,
-    when converting the object to a string using custom separators.
+    Helper class that takes a list of :data:`items` and joins them into a long
+    string, when converting the object to a string using custom separator :data:`sep`.
     Long lines are wrapped automatically.
 
-    The behaviour is essentially the same as `sep.join(items)` but with the
-    automatic wrapping of long lines. `items` can contain st
-
-    :param items: the list (or tuple) of items (that can be instances of
-                  `str` or `JoinableStringList`) that is to be joined.
-    :type items: list or tuple
-    :param str sep: the separator to be inserted between consecutive items.
-    :param int width: the line width after which long lines should be wrapped.
-    :param cont: the line continuation string to be inserted on a line break.
-    :type cont: (str, str) or str
-    :param bool separable: an indicator whether this can be split up to fill
-                           lines or should stay as a unit (this is for cosmetic
-                           purposes only, as too long lines will be wrapped
-                           in any case).
+    The behaviour is essentially the same as ``sep.join(items)`` but with the
+    automatic wrapping of long lines. :data:`items` can contain strings as well
+    as other instances of :class:`JoinableStringList`.
+
+    Parameters
+    ----------
+    items : list of str or :any:`JoinableStringList`
+        The list (or tuple) of items that should be joined into a string.
+    sep : str
+        The separator to be inserted between consecutive items.
+    width : int
+        The line width after which long lines should be wrapped.
+    cont : (str, str) or str
+        The line continuation string to be inserted on a line break, optionally
+        separated as end-of-line and beginning-of-next-line strings
+    separable : bool
+        An indicator whether this object can be split up to fill
+        lines or should stay as a unit (this is for cosmetic
+        purposes only, as too long lines will be wrapped in any case).
     """
 
+    _pattern_quoted_string = re.compile(r'(?:\'.*?\')|(?:".*?")')
+    _pattern_chunk_separator = re.compile(r'(\s|\)(?!%)|\n)')
+
     def __init__(self, items, sep, width, cont, separable=True):
         super().__init__()
 
@@ -107,18 +116,23 @@ def _add_item_to_line(self, line, item):
         # The new item does not fit onto a line at all and it is not a JoinableStringList
         # where the first item fits onto a line, or for which we know how to split it:
         # let's try our best by splitting the string
-        # TODO: This is not safe for strings currently and may still exceed
-        #       the line limit if the chunks are too big! Safest option would
-        #       be to have expression mapper etc. all return JoinableStringList instances
-        #       and accept violations for the remaining cases.
         if isinstance(item, str):
             item_str = item
         elif isinstance(item, type(self)):
             # We simply join up the items here to avoid that any line continuations are introduced
             item_str = item.sep.join(str(i) for i in item.items)
         else:
             item_str = str(item)
-        chunk_list = re.split(r'(\s|\)(?!%)|\n)', item_str)  # split on ' ', ')' (unless followed by '%') and '\n'
+
+        chunk_list = []
+        offset = 0
+        for string_match in self._pattern_quoted_string.finditer(item_str):
+            if string_match.start() > offset:
+                chunk_list += self._pattern_chunk_separator.split(item_str[offset:string_match.start()])
+            chunk_list += [string_match[0]]
+            offset = string_match.end()
+        if offset < len(item_str):
+            chunk_list += self._pattern_chunk_separator.split(item_str[offset:])
 
         # First, add as much as possible to the previous line
         next_chunk = 0

diff --git a/loki/tools/tests/test_tools.py b/loki/tools/tests/test_tools.py
@@ -126,6 +126,10 @@ def test_is_subset_raises(a, b):
      ' ', 8, '\n', 'Hello \nworld!'),
     (('Hello', JoinableStringList(['w', 'o', 'r', 'l', 'd', '!'], '', 8, '\n', separable=True)),
      ' ', 8, '\n', 'Hello w\norld!'),
+    (["'Long word   '", '"list with    "', '"several entries   "', "'that may have'",
+      "' trailing whitespace '", "'characters     '"], ', ', 20, '\n',
+     ("'Long word   ', \n\"list with    \", \n\"several entries   \"\n, 'that may have', \n"
+      "' trailing whitespace '\n, 'characters     '"))
 ])
 def test_joinable_string_list(items, sep, width, cont, ref):
     """