From 3479f1d2221f9ab65fe33e5fcb85f9764f4f7c9c Mon Sep 17 00:00:00 2001 From: VsevolodX Date: Wed, 10 Jun 2026 20:45:27 -0700 Subject: [PATCH 1/2] update: remove comments --- dist/js/shared/str.d.ts | 2 +- dist/js/shared/str.js | 9 ++++++--- src/js/shared/str.js | 9 ++++++--- src/py/mat3ra/utils/string.py | 10 ++++++++-- 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/dist/js/shared/str.d.ts b/dist/js/shared/str.d.ts index 1c52214..d3f91f9 100644 --- a/dist/js/shared/str.d.ts +++ b/dist/js/shared/str.d.ts @@ -7,7 +7,7 @@ export function removeNewLinesAndExtraSpaces(str: any): any; export function randomAlphanumeric(length: number): string; export function toFixedLocale(number: any, precision: any): any; /** - * @summary Removes lines started with # character. Shebang (#!) is excluded. + * @summary Removes comments from a given source code text based on the specified programming language. * @param text {String} text to remove comments from. * @param language {String} programming language of the text. * @return {String} diff --git a/dist/js/shared/str.js b/dist/js/shared/str.js index f2075a6..fdf859a 100644 --- a/dist/js/shared/str.js +++ b/dist/js/shared/str.js @@ -40,16 +40,19 @@ function toFixedLocale(number, precision) { } exports.toFixedLocale = toFixedLocale; /** - * @summary Removes lines started with # character. Shebang (#!) is excluded. + * @summary Removes comments from a given source code text based on the specified programming language. * @param text {String} text to remove comments from. * @param language {String} programming language of the text. * @return {String} */ function removeCommentsFromSourceCode(text, language = "shell") { + var _a; const regexList = { - shell: /^(\s+)?#(?!!).*$/gm, + shell: /#(?!!).*$/gm, + fortran: /!.*$/gm, + python: /#.*$/gm, }; - return text.replace(regexList[language], ""); + return text.replace((_a = regexList[language]) !== null && _a !== void 0 ? _a : regexList.shell, ""); } exports.removeCommentsFromSourceCode = removeCommentsFromSourceCode; /** diff --git a/src/js/shared/str.js b/src/js/shared/str.js index ab8c707..058b0a1 100644 --- a/src/js/shared/str.js +++ b/src/js/shared/str.js @@ -37,18 +37,21 @@ export function toFixedLocale(number, precision) { } /** - * @summary Removes lines started with # character. Shebang (#!) is excluded. + * @summary Removes comments from a given source code text based on the specified programming language. * @param text {String} text to remove comments from. * @param language {String} programming language of the text. * @return {String} */ export function removeCommentsFromSourceCode(text, language = "shell") { const regexList = { - shell: /^(\s+)?#(?!!).*$/gm, + shell: /#(?!!).*$/gm, + fortran: /!.*$/gm, + python: /#.*$/gm, }; - return text.replace(regexList[language], ""); + return text.replace(regexList[language] ?? regexList.shell, ""); } + /** * @summary Removes empty lines from a given string. * @param string {String} string to remove empty lines from. diff --git a/src/py/mat3ra/utils/string.py b/src/py/mat3ra/utils/string.py index 4d75756..6bb4ccd 100644 --- a/src/py/mat3ra/utils/string.py +++ b/src/py/mat3ra/utils/string.py @@ -60,8 +60,14 @@ def snake_to_camel(snake_case_str: str) -> str: def remove_comments_from_source_code(text: str, language: str = "shell") -> str: - """Removes lines starting with # (except shebang).""" - return re.sub(r"^(\s+)?#(?!!).*$", "", text, flags=re.MULTILINE) + """Removes comments from source code based on the language. + TODO: consider preserving values enclosed in quotes + """ + if language == "fortran": + return re.sub(r"!.*$", "", text, flags=re.MULTILINE) + if language == "python": + return re.sub(r"#.*$", "", text, flags=re.MULTILINE) + return re.sub(r"#(?!!).*$", "", text, flags=re.MULTILINE) def remove_empty_lines_from_string(text: str) -> str: From b6c81668854f0a298c252ffd88d21fc9bb0fd6fa Mon Sep 17 00:00:00 2001 From: VsevolodX Date: Wed, 10 Jun 2026 20:47:47 -0700 Subject: [PATCH 2/2] update: adjust test --- src/js/shared/str.js | 1 - tests/py/unit/test_hash.py | 7 ++----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/js/shared/str.js b/src/js/shared/str.js index 058b0a1..bff0800 100644 --- a/src/js/shared/str.js +++ b/src/js/shared/str.js @@ -51,7 +51,6 @@ export function removeCommentsFromSourceCode(text, language = "shell") { return text.replace(regexList[language] ?? regexList.shell, ""); } - /** * @summary Removes empty lines from a given string. * @param string {String} string to remove empty lines from. diff --git a/tests/py/unit/test_hash.py b/tests/py/unit/test_hash.py index f8430cf..79661d4 100644 --- a/tests/py/unit/test_hash.py +++ b/tests/py/unit/test_hash.py @@ -39,16 +39,13 @@ class Model(BaseModel): def test_remove_timestampable_keys(): - assert remove_timestampable_keys( - {"a": 1, "createdAt": "x", "updatedAt": "y", "removedAt": "z"} - ) == {"a": 1} + assert remove_timestampable_keys({"a": 1, "createdAt": "x", "updatedAt": "y", "removedAt": "z"}) == {"a": 1} def test_comment_and_empty_line_stripping_matches_js(): text = "# comment\n\nx=1\n # indented\n#!/bin/bash\n echo hi # inline\n" without_comments = remove_comments_from_source_code(text) assert "#!/" in without_comments # shebang preserved - assert "echo hi # inline" in without_comments # inline comment preserved assert "comment" not in without_comments - assert remove_empty_lines_from_string(without_comments) == "x=1\n#!/bin/bash\n echo hi # inline" + assert remove_empty_lines_from_string(without_comments) == "x=1\n#!/bin/bash\n echo hi"