From ae41c5034ad074228290407c0272c8b8035974f4 Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Mon, 13 Apr 2026 01:57:40 +1000 Subject: [PATCH 1/7] #390 Start to add support for linemarker. --- src/fparser/two/C99Preprocessor.py | 46 +++++++++++++++++++ src/fparser/two/tests/test_c99preprocessor.py | 13 ++++++ 2 files changed, 59 insertions(+) diff --git a/src/fparser/two/C99Preprocessor.py b/src/fparser/two/C99Preprocessor.py index 3b42a211..9a9e9204 100644 --- a/src/fparser/two/C99Preprocessor.py +++ b/src/fparser/two/C99Preprocessor.py @@ -57,6 +57,7 @@ "Cpp_Macro_Stmt", "Cpp_Undef_Stmt", "Cpp_Line_Stmt", + "Cpp_Linemarker_Stmt", "Cpp_Error_Stmt", "Cpp_Warning_Stmt", "Cpp_Null_Stmt", @@ -649,6 +650,51 @@ def tostr(self): return "{0} {1}".format(*self.items) +class Cpp_Linemarker_Stmt(WORDClsBase): # Linemarker + """ + Linemarker + + linemarker-stmt is # digit-sequence [ "s-char-sequence" ] [digit ...] + """ + + subclass_names = [] + use_names = ["", "Cpp_Pp_Tokens"] + + _pattern = pattern.Pattern("", + r"^\s*#", + value="#") + + @staticmethod + def match(string): + """Implements the matching for a linemarker. + The right hand side of the directive is not matched any further + but simply kept as a string. + + :param str string: the string to match with as a line statement. + + :return: a tuple of size 1 with the right hand side as a string, \ + or `None` if there is no match. + :rtype: (`str`) or `NoneType` + + """ + if not string: + return None + return WORDClsBase.match( + Cpp_Linemarker_Stmt._pattern, + Cpp_Pp_Tokens, + string, + colons=False, + require_cls=True, + ) + + def tostr(self): + """ + :return: this linemarker as a string. + :rtype: str + """ + return "{0} {1}".format(*self.items) + + class Cpp_Error_Stmt(WORDClsBase): # 6.10.5 Error directive """ C99 6.10.5 Error directive diff --git a/src/fparser/two/tests/test_c99preprocessor.py b/src/fparser/two/tests/test_c99preprocessor.py index 921796a6..606af895 100644 --- a/src/fparser/two/tests/test_c99preprocessor.py +++ b/src/fparser/two/tests/test_c99preprocessor.py @@ -57,6 +57,7 @@ Cpp_Macro_Identifier_List, Cpp_Undef_Stmt, Cpp_Line_Stmt, + Cpp_Linemarker_Stmt, Cpp_Error_Stmt, Cpp_Warning_Stmt, Cpp_Null_Stmt, @@ -451,6 +452,18 @@ def test_incorrect_line_stmt(line): assert "Cpp_Line_Stmt: '{0}'".format(line) in str(excinfo.value) +@pytest.mark.usefixtures("f2003_create") +@pytest.mark.parametrize("line_ref", + [('# 123 "file"', '# 123 "file"'), + (' # 123 "file" ', '# 123 "file"'), + ('# 123 "file" 1 3', '# 123 "file" 1 3')]) +def test_linemarker_statement(line_ref): + """Test that #line is recognized""" + line, ref = line_ref + result = Cpp_Linemarker_Stmt(line) + assert str(result) == ref + + @pytest.mark.usefixtures("f2003_create") @pytest.mark.parametrize("line", ["#error MSG", " # error MSG "]) def test_error_statement_with_msg(line): From 5e124e56c765709bdaf5c2212abd4ce18ffcdbc8 Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Mon, 13 Apr 2026 13:00:21 +1000 Subject: [PATCH 2/7] #390 Improve linermarker handling to support detection of invalid markers. --- src/fparser/two/C99Preprocessor.py | 22 ++++++++++++++---- src/fparser/two/tests/test_c99preprocessor.py | 23 +++++++++++++++---- 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/src/fparser/two/C99Preprocessor.py b/src/fparser/two/C99Preprocessor.py index 9a9e9204..b2ab25b3 100644 --- a/src/fparser/two/C99Preprocessor.py +++ b/src/fparser/two/C99Preprocessor.py @@ -32,7 +32,11 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -"""C99 Preprocessor Syntax Rules.""" +"""C99 Preprocessor Syntax Rules. It also supports linemarker statements +(which are technically not preprocessor directives, but are very close +in their syntax, i.e. starting with `#`) + +""" # Author: Balthasar Reuter # Based on previous work by Martin Schlipf (https://github.com/martin-schlipf) @@ -658,11 +662,11 @@ class Cpp_Linemarker_Stmt(WORDClsBase): # Linemarker """ subclass_names = [] - use_names = ["", "Cpp_Pp_Tokens"] + use_names = ["Cpp_Pp_Tokens"] - _pattern = pattern.Pattern("", - r"^\s*#", - value="#") + # The match method will check that it is a valid linemarker, i.e. + # it has a line number, and file name in double quotes. + _pattern = pattern.Pattern("", r"^\s*#", value="#") @staticmethod def match(string): @@ -679,6 +683,14 @@ def match(string): """ if not string: return None + + # We can't fully rely on WORDClsBase, since it can't easily + # test if there is a line number following (it returns + # `value` for a match, but can't insert the matched line number + # in this value). + if not re.match(r"^\s*#\s+[0-9]+\s+\".*\"", string): + return + return WORDClsBase.match( Cpp_Linemarker_Stmt._pattern, Cpp_Pp_Tokens, diff --git a/src/fparser/two/tests/test_c99preprocessor.py b/src/fparser/two/tests/test_c99preprocessor.py index 606af895..7ed23629 100644 --- a/src/fparser/two/tests/test_c99preprocessor.py +++ b/src/fparser/two/tests/test_c99preprocessor.py @@ -453,17 +453,30 @@ def test_incorrect_line_stmt(line): @pytest.mark.usefixtures("f2003_create") -@pytest.mark.parametrize("line_ref", - [('# 123 "file"', '# 123 "file"'), - (' # 123 "file" ', '# 123 "file"'), - ('# 123 "file" 1 3', '# 123 "file" 1 3')]) -def test_linemarker_statement(line_ref): +@pytest.mark.parametrize( + "line_ref", + [ + ('# 123 "file"', '# 123 "file"'), + (' # 123 "file" ', '# 123 "file"'), + ('# 123 "file" 1 3', '# 123 "file" 1 3'), + ], +) +def test_linemarker(line_ref): """Test that #line is recognized""" line, ref = line_ref result = Cpp_Linemarker_Stmt(line) assert str(result) == ref +@pytest.mark.usefixtures("f2003_create") +@pytest.mark.parametrize("line", ["# abc", '# "bla"', "# 123 'wrong_quotes'"]) +def test_incorrect_linemarker(line): + """Test that incorrectly formed #line statements raise exception""" + with pytest.raises(NoMatchError) as excinfo: + _ = Cpp_Linemarker_Stmt(line) + assert "Cpp_Linemarker_Stmt: '{0}'".format(line) in str(excinfo.value) + + @pytest.mark.usefixtures("f2003_create") @pytest.mark.parametrize("line", ["#error MSG", " # error MSG "]) def test_error_statement_with_msg(line): From 7e482a225b2f7bf6f234f19fc0fa8e311c6a3976 Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Thu, 16 Apr 2026 22:41:05 +1000 Subject: [PATCH 3/7] #390 Simplified parameterization. --- src/fparser/two/tests/test_c99preprocessor.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/fparser/two/tests/test_c99preprocessor.py b/src/fparser/two/tests/test_c99preprocessor.py index 7ed23629..232491f4 100644 --- a/src/fparser/two/tests/test_c99preprocessor.py +++ b/src/fparser/two/tests/test_c99preprocessor.py @@ -454,16 +454,15 @@ def test_incorrect_line_stmt(line): @pytest.mark.usefixtures("f2003_create") @pytest.mark.parametrize( - "line_ref", + "line, ref", [ ('# 123 "file"', '# 123 "file"'), (' # 123 "file" ', '# 123 "file"'), ('# 123 "file" 1 3', '# 123 "file" 1 3'), ], ) -def test_linemarker(line_ref): +def test_linemarker(line, ref): """Test that #line is recognized""" - line, ref = line_ref result = Cpp_Linemarker_Stmt(line) assert str(result) == ref From 88a74ee0617eb3c543a3e453acb01e8672402b6c Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Thu, 16 Apr 2026 23:50:29 +1000 Subject: [PATCH 4/7] #390 Fix incorrect linemarker specification in comment. --- src/fparser/two/C99Preprocessor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fparser/two/C99Preprocessor.py b/src/fparser/two/C99Preprocessor.py index b2ab25b3..2f81d17b 100644 --- a/src/fparser/two/C99Preprocessor.py +++ b/src/fparser/two/C99Preprocessor.py @@ -658,7 +658,7 @@ class Cpp_Linemarker_Stmt(WORDClsBase): # Linemarker """ Linemarker - linemarker-stmt is # digit-sequence [ "s-char-sequence" ] [digit ...] + linemarker-stmt is # digit-sequence "s-char-sequence" [digit ...] """ subclass_names = [] From 8eb39542df43bc4ba10815250197ca5b3c6a1261 Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Thu, 16 Apr 2026 23:53:32 +1000 Subject: [PATCH 5/7] #390 Remove need for using an additional regex to verify the linemarker syntax. Extend match function to be able to return the fully matched string. --- src/fparser/two/C99Preprocessor.py | 16 ++++++---------- src/fparser/two/tests/test_c99preprocessor.py | 2 +- src/fparser/two/utils.py | 7 ++++++- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/fparser/two/C99Preprocessor.py b/src/fparser/two/C99Preprocessor.py index 2f81d17b..6fab5e62 100644 --- a/src/fparser/two/C99Preprocessor.py +++ b/src/fparser/two/C99Preprocessor.py @@ -666,7 +666,7 @@ class Cpp_Linemarker_Stmt(WORDClsBase): # Linemarker # The match method will check that it is a valid linemarker, i.e. # it has a line number, and file name in double quotes. - _pattern = pattern.Pattern("", r"^\s*#", value="#") + _pattern = pattern.Pattern("", r"^\s*#\s+\d+\s+\".*\".*$") @staticmethod def match(string): @@ -684,27 +684,23 @@ def match(string): if not string: return None - # We can't fully rely on WORDClsBase, since it can't easily - # test if there is a line number following (it returns - # `value` for a match, but can't insert the matched line number - # in this value). - if not re.match(r"^\s*#\s+[0-9]+\s+\".*\"", string): - return - return WORDClsBase.match( Cpp_Linemarker_Stmt._pattern, Cpp_Pp_Tokens, string, colons=False, - require_cls=True, + require_cls=False, ) def tostr(self): """ + Returns the line marker as string. Note that fparser accepts + spaces before the `#`, but it should remove the spaces, hence + we lstrip the result :return: this linemarker as a string. :rtype: str """ - return "{0} {1}".format(*self.items) + return self.items[0].lstrip() class Cpp_Error_Stmt(WORDClsBase): # 6.10.5 Error directive diff --git a/src/fparser/two/tests/test_c99preprocessor.py b/src/fparser/two/tests/test_c99preprocessor.py index 232491f4..00d55ad3 100644 --- a/src/fparser/two/tests/test_c99preprocessor.py +++ b/src/fparser/two/tests/test_c99preprocessor.py @@ -457,7 +457,7 @@ def test_incorrect_line_stmt(line): "line, ref", [ ('# 123 "file"', '# 123 "file"'), - (' # 123 "file" ', '# 123 "file"'), + (' # 123 "file"', '# 123 "file"'), ('# 123 "file" 1 3', '# 123 "file" 1 3'), ], ) diff --git a/src/fparser/two/utils.py b/src/fparser/two/utils.py index 7a179d2e..48a99653 100644 --- a/src/fparser/two/utils.py +++ b/src/fparser/two/utils.py @@ -1819,7 +1819,12 @@ def match(keyword, cls, string, colons=False, require_cls=False): if my_match is None: return None line = string[len(my_match.group()) :] - pattern_value = keyword.value + # If no constant return value is defined, + # return the matched string + if keyword.value: + pattern_value = keyword.value + else: + pattern_value = my_match.group() if not line: if require_cls: From 9afd20039c9d61709822441b808aa0f822b27a63 Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Mon, 20 Apr 2026 17:53:00 +1000 Subject: [PATCH 6/7] #499 Added initial try to avoid the look-ahead and break the exponential scaling. --- src/fparser/two/utils.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/fparser/two/utils.py b/src/fparser/two/utils.py index 48a99653..d39963df 100644 --- a/src/fparser/two/utils.py +++ b/src/fparser/two/utils.py @@ -325,6 +325,7 @@ def import_now(): Else_If_Stmt, Else_Stmt, End_If_Stmt, + Label_Do_Stmt, Masked_Elsewhere_Stmt, Elsewhere_Stmt, End_Where_Stmt, @@ -750,6 +751,21 @@ def match( i += 1 continue + from fparser.two.Fortran2003 import Continue_Stmt, End_Do, End_Do_Stmt, Label_Do_Stmt + if (startcls and + isinstance(content[start_idx], Label_Do_Stmt) and + hasattr(obj, "get_end_label") and + content[start_idx].get_start_label() == obj.get_end_label() + and endcls is End_Do and + not isinstance(obj, (End_Do_Stmt, Continue_Stmt))): + if table_name: + SYMBOL_TABLES.exit_scope() + obj.restore_reader(reader) + for obj in reversed(content): + obj.restore_reader(reader) + return None + + # We got a match for this class had_match = True content.append(obj) From 269153c1456c6456c4be0f7c0dfa7fdffadc60ed Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Tue, 21 Apr 2026 15:21:52 +1000 Subject: [PATCH 7/7] #499 Simplified and documented code. --- src/fparser/two/utils.py | 43 +++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/src/fparser/two/utils.py b/src/fparser/two/utils.py index d39963df..d418a9cf 100644 --- a/src/fparser/two/utils.py +++ b/src/fparser/two/utils.py @@ -325,7 +325,6 @@ def import_now(): Else_If_Stmt, Else_Stmt, End_If_Stmt, - Label_Do_Stmt, Masked_Elsewhere_Stmt, Elsewhere_Stmt, End_Where_Stmt, @@ -337,7 +336,12 @@ def import_now(): Comment, Include_Stmt, add_comments_includes_directives, + Continue_Stmt, + End_Do, + End_Do_Stmt, + Label_Do_Stmt, ) + from fparser.two import C99Preprocessor DynamicImport.Else_If_Stmt = Else_If_Stmt @@ -357,6 +361,10 @@ def import_now(): DynamicImport.add_comments_includes_directives = ( add_comments_includes_directives ) + DynamicImport.Continue_Stmt = Continue_Stmt, + DynamicImport.End_Do = End_Do + DynamicImport.End_Do_Stmt = End_Do_Stmt + DynamicImport.Label_Do_Stmt = Label_Do_Stmt di = DynamicImport() @@ -751,21 +759,38 @@ def match( i += 1 continue - from fparser.two.Fortran2003 import Continue_Stmt, End_Do, End_Do_Stmt, Label_Do_Stmt - if (startcls and - isinstance(content[start_idx], Label_Do_Stmt) and - hasattr(obj, "get_end_label") and - content[start_idx].get_start_label() == obj.get_end_label() - and endcls is End_Do and - not isinstance(obj, (End_Do_Stmt, Continue_Stmt))): + # The grammar contains an exponential scaling behaviour for + # non-blocked labelled loop statements. The parser will try + # to find a match for a blocked do statement, but will ignore + # the fact that there is a non-blocking label, e.g.: + # do 10 i=1, 10 + # 10 a(i) = 1 + # It will try to find a `10 enddo` or `10 continue` statement, + # ignoring the fact that the label 10 indicates that it is not + # a blocked loop. Full details in ticket 499. + # In order to avoid that, we identify if we are looking for a + # labelled loop which is blocked (endcls=End_Do), and have + # neither an `End_Do` nor a `Continue`, which has the same + # label: in this case we can abort looking (which will then + # trigger the caller to test for the next rule, which is a + # non-blocked loop). This breaks the exponential behaviour + # in case of non-blocked loops (since the parser won't look + # ahead till the end of the file). + if (startcls is di.Label_Do_Stmt and endcls is di.End_Do and + hasattr(obj, "get_end_label") and + (content[start_idx].get_start_label() == + obj.get_end_label()) and + not isinstance(obj, (di.End_Do_Stmt, di.Continue_Stmt))): if table_name: SYMBOL_TABLES.exit_scope() + # We need to put the just read statement back: obj.restore_reader(reader) + # ... and then also restore all previously read content for obj in reversed(content): obj.restore_reader(reader) + # ... before we abort. return None - # We got a match for this class had_match = True content.append(obj)