From 18d785addd525b8902c12c9255c29adbf0d76d66 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Wed, 3 Jun 2026 20:15:00 +0800 Subject: [PATCH 01/19] SOF-7915: export physical constants via python interface --- dist/js/shared/constants.d.ts | 1 + dist/js/shared/constants.js | 36 +++++++++++++++---------------- package.json | 3 ++- scripts/build_constants.js | 37 ++++++++++++++++++++++++++++++++ src/py/mat3ra/utils/constants.py | 30 ++++++++++++++++++++++++++ src/shared/constants.yaml | 27 +++++++++++++++++++++++ 6 files changed, 114 insertions(+), 20 deletions(-) create mode 100644 scripts/build_constants.js create mode 100644 src/py/mat3ra/utils/constants.py create mode 100644 src/shared/constants.yaml diff --git a/dist/js/shared/constants.d.ts b/dist/js/shared/constants.d.ts index ba6274f..a30c0f9 100644 --- a/dist/js/shared/constants.d.ts +++ b/dist/js/shared/constants.d.ts @@ -26,5 +26,6 @@ declare namespace _default { export { tolerance }; export { units }; export { ATOMIC_COORD_UNITS }; + export { HASH_TOLERANCE }; } export default _default; diff --git a/dist/js/shared/constants.js b/dist/js/shared/constants.js index 9e6133e..65ef4ab 100644 --- a/dist/js/shared/constants.js +++ b/dist/js/shared/constants.js @@ -1,37 +1,35 @@ "use strict"; +// This file is autogenerated from src/shared/constants.yaml +// DO NOT EDIT DIRECTLY! Edit above YAML file and run 'npm run build:constants'. Object.defineProperty(exports, "__esModule", { value: true }); exports.HASH_TOLERANCE = exports.ATOMIC_COORD_UNITS = exports.units = exports.tolerance = exports.coefficients = void 0; exports.coefficients = { - EV_TO_RY: 0.0734986176, - BOHR_TO_ANGSTROM: 0.52917721092, - ANGSTROM_TO_BOHR: 1 / 0.52917721092, - EV_A_TO_RY_BOHR: 1 / 25.71104309541616, + "EV_TO_RY": 0.0734986444, + "BOHR_TO_ANGSTROM": 0.52917721054, + "ANGSTROM_TO_BOHR": 1.8897261259077822, + "EV_A_TO_RY_BOHR": 0.0388938075966032 }; exports.tolerance = { - // in crystal coordinates - length: 0.01, - lengthAngstrom: 0.001, - pointsDistance: 0.001, + "length": 0.01, + "lengthAngstrom": 0.001, + "pointsDistance": 0.001 }; exports.units = { - bohr: "bohr", - angstrom: "angstrom", - degree: "degree", - radian: "radian", - alat: "alat", + "bohr": "bohr", + "angstrom": "angstrom", + "degree": "degree", + "radian": "radian", + "alat": "alat" }; -/** - * @summary Coordinates units for a material's basis. - */ exports.ATOMIC_COORD_UNITS = { - crystal: "crystal", - cartesian: "cartesian", + "crystal": "crystal", + "cartesian": "cartesian" }; -// Only 3 digits will be considered for lattice and basis params for hashing exports.HASH_TOLERANCE = 3; exports.default = { coefficients: exports.coefficients, tolerance: exports.tolerance, units: exports.units, ATOMIC_COORD_UNITS: exports.ATOMIC_COORD_UNITS, + HASH_TOLERANCE: exports.HASH_TOLERANCE, }; diff --git a/package.json b/package.json index c082ad1..c6dd82f 100644 --- a/package.json +++ b/package.json @@ -9,7 +9,8 @@ "prepare": "husky install || exit 0", "prettier": "prettier --check src/js tests/js", "test": "nyc --reporter=text mocha --recursive --bail tests/js", - "transpile": "tsc -p tsconfig-transpile.json" + "transpile": "tsc -p tsconfig-transpile.json", + "build:constants": "node scripts/build_constants.js" }, "author": "Exabyte Inc.", "license": "ISC", diff --git a/scripts/build_constants.js b/scripts/build_constants.js new file mode 100644 index 0000000..1814e3e --- /dev/null +++ b/scripts/build_constants.js @@ -0,0 +1,37 @@ +const fs = require('fs'); +const path = require('path'); +const yaml = require('js-yaml'); + +// Define paths relative to the project root +const yamlPath = path.join(process.cwd(), 'src/shared/constants.yaml'); +const pyPath = path.join(process.cwd(), 'src/py/mat3ra/utils/constants.py'); + +try { + // Read and parse the YAML file + const rawData = fs.readFileSync(yamlPath, 'utf-8'); + const constants = yaml.load(rawData); + + let pyContent = `# This file is autogenerated from src/shared/constants.yaml\n` + + `# DO NOT EDIT DIRECTLY! Edit above YAML file and run` + + `'npm run build:constants'.\n`; + + for (const key of Object.keys(constants)) { + // In Python, standard practice is to use UPPERCASE for constants + const pyKey = key.toUpperCase(); + + // Stringify the object, but replace JS native types with Python native types + let pyValue = JSON.stringify(constants[key], null, 4) + .replace(/: true/g, ': True') + .replace(/: false/g, ': False') + .replace(/: null/g, ': None'); + + pyContent += `\n${pyKey} = ${pyValue}\n`; + } + + fs.mkdirSync(path.dirname(pyPath), { recursive: true }); + fs.writeFileSync(pyPath, pyContent, 'utf-8'); + console.log(`✅ Successfully generated Python constants at: ${pyPath}`); +} catch (error) { + console.error("❌ Failed to build constants.js:", error.message); + process.exit(1); +} diff --git a/src/py/mat3ra/utils/constants.py b/src/py/mat3ra/utils/constants.py new file mode 100644 index 0000000..4071836 --- /dev/null +++ b/src/py/mat3ra/utils/constants.py @@ -0,0 +1,30 @@ +# This file is autogenerated from src/shared/constants.yaml +# DO NOT EDIT DIRECTLY! Edit above YAML file and run'npm run build:constants'. + +COEFFICIENTS = { + "EV_TO_RY": 0.0734986444, + "BOHR_TO_ANGSTROM": 0.52917721054, + "ANGSTROM_TO_BOHR": 1.8897261259077822, + "EV_A_TO_RY_BOHR": 0.0388938075966032 +} + +TOLERANCE = { + "length": 0.01, + "lengthAngstrom": 0.001, + "pointsDistance": 0.001 +} + +UNITS = { + "bohr": "bohr", + "angstrom": "angstrom", + "degree": "degree", + "radian": "radian", + "alat": "alat" +} + +ATOMIC_COORD_UNITS = { + "crystal": "crystal", + "cartesian": "cartesian" +} + +HASH_TOLERANCE = 3 diff --git a/src/shared/constants.yaml b/src/shared/constants.yaml new file mode 100644 index 0000000..5852c8f --- /dev/null +++ b/src/shared/constants.yaml @@ -0,0 +1,27 @@ +# https://physics.nist.gov/cuu/Constants/ (CODATA 2022) +coefficients: + EV_TO_RY: 0.0734986444 # 1 / 13.605693122990 + BOHR_TO_ANGSTROM: 0.529177210544 + ANGSTROM_TO_BOHR: 1.8897261259077822 # 1 / 0.529177210544 + EV_A_TO_RY_BOHR: 0.0388938075966032 # 0.529177210544 / 13.605693122990 + +tolerance: + # in crystal coordinates + length: 0.01 + lengthAngstrom: 0.001 + pointsDistance: 0.001 + +units: + bohr: "bohr" + angstrom: "angstrom" + degree: "degree" + radian: "radian" + alat: "alat" + +# Coordinates units for a material's basis. +ATOMIC_COORD_UNITS: + crystal: "crystal" + cartesian: "cartesian" + +# Only 3 digits will be considered for lattice and basis params for hashing +HASH_TOLERANCE: 3 From ae720893946cac1448c3881e34fc039fecbb7981 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Wed, 3 Jun 2026 20:20:34 +0800 Subject: [PATCH 02/19] build js constants from the same yaml source of truth --- dist/js/shared/constants.js | 2 +- scripts/build_constants.js | 25 +++++++++++++++++++++ src/js/shared/constants.js | 37 ++++++++++++++++---------------- src/py/mat3ra/utils/constants.py | 2 +- 4 files changed, 45 insertions(+), 21 deletions(-) diff --git a/dist/js/shared/constants.js b/dist/js/shared/constants.js index 65ef4ab..6b7d56b 100644 --- a/dist/js/shared/constants.js +++ b/dist/js/shared/constants.js @@ -5,7 +5,7 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.HASH_TOLERANCE = exports.ATOMIC_COORD_UNITS = exports.units = exports.tolerance = exports.coefficients = void 0; exports.coefficients = { "EV_TO_RY": 0.0734986444, - "BOHR_TO_ANGSTROM": 0.52917721054, + "BOHR_TO_ANGSTROM": 0.529177210544, "ANGSTROM_TO_BOHR": 1.8897261259077822, "EV_A_TO_RY_BOHR": 0.0388938075966032 }; diff --git a/scripts/build_constants.js b/scripts/build_constants.js index 1814e3e..fe0de59 100644 --- a/scripts/build_constants.js +++ b/scripts/build_constants.js @@ -4,6 +4,7 @@ const yaml = require('js-yaml'); // Define paths relative to the project root const yamlPath = path.join(process.cwd(), 'src/shared/constants.yaml'); +const jsPath = path.join(process.cwd(), 'src/js/shared/constants.js'); const pyPath = path.join(process.cwd(), 'src/py/mat3ra/utils/constants.py'); try { @@ -11,6 +12,30 @@ try { const rawData = fs.readFileSync(yamlPath, 'utf-8'); const constants = yaml.load(rawData); + // Build JS content + let jsContent = `// This file is autogenerated from src/shared/constants.yaml\n` + + `// DO NOT EDIT DIRECTLY! Edit above YAML file and run ` + + `'npm run build:constants'.\n\n`; + + // Dynamically generate individual exports for every top-level key + const keys = Object.keys(constants); + for (const key of keys) { + jsContent += `export const ${key} = ${JSON.stringify(constants[key], null, 4)};\n\n`; + } + + // Dynamically generate the default export block + jsContent += `export default {\n`; + for (const key of keys) { + jsContent += ` ${key},\n`; + } + jsContent += `};\n`; + + fs.mkdirSync(path.dirname(jsPath), { recursive: true }); + fs.writeFileSync(jsPath, jsContent, 'utf-8'); + + console.log(`✅ Successfully generated JS constants at: ${jsPath}`); + + // Build Python content let pyContent = `# This file is autogenerated from src/shared/constants.yaml\n` + `# DO NOT EDIT DIRECTLY! Edit above YAML file and run` + `'npm run build:constants'.\n`; diff --git a/src/js/shared/constants.js b/src/js/shared/constants.js index 53b0cdf..a23afb3 100644 --- a/src/js/shared/constants.js +++ b/src/js/shared/constants.js @@ -1,34 +1,32 @@ +// This file is autogenerated from src/shared/constants.yaml +// DO NOT EDIT DIRECTLY! Edit above YAML file and run 'npm run build:constants'. + export const coefficients = { - EV_TO_RY: 0.0734986176, - BOHR_TO_ANGSTROM: 0.52917721092, - ANGSTROM_TO_BOHR: 1 / 0.52917721092, - EV_A_TO_RY_BOHR: 1 / 25.71104309541616, + "EV_TO_RY": 0.0734986444, + "BOHR_TO_ANGSTROM": 0.529177210544, + "ANGSTROM_TO_BOHR": 1.8897261259077822, + "EV_A_TO_RY_BOHR": 0.0388938075966032 }; export const tolerance = { - // in crystal coordinates - length: 0.01, - lengthAngstrom: 0.001, - pointsDistance: 0.001, + "length": 0.01, + "lengthAngstrom": 0.001, + "pointsDistance": 0.001 }; export const units = { - bohr: "bohr", - angstrom: "angstrom", - degree: "degree", - radian: "radian", - alat: "alat", + "bohr": "bohr", + "angstrom": "angstrom", + "degree": "degree", + "radian": "radian", + "alat": "alat" }; -/** - * @summary Coordinates units for a material's basis. - */ export const ATOMIC_COORD_UNITS = { - crystal: "crystal", - cartesian: "cartesian", + "crystal": "crystal", + "cartesian": "cartesian" }; -// Only 3 digits will be considered for lattice and basis params for hashing export const HASH_TOLERANCE = 3; export default { @@ -36,4 +34,5 @@ export default { tolerance, units, ATOMIC_COORD_UNITS, + HASH_TOLERANCE, }; diff --git a/src/py/mat3ra/utils/constants.py b/src/py/mat3ra/utils/constants.py index 4071836..7454874 100644 --- a/src/py/mat3ra/utils/constants.py +++ b/src/py/mat3ra/utils/constants.py @@ -3,7 +3,7 @@ COEFFICIENTS = { "EV_TO_RY": 0.0734986444, - "BOHR_TO_ANGSTROM": 0.52917721054, + "BOHR_TO_ANGSTROM": 0.529177210544, "ANGSTROM_TO_BOHR": 1.8897261259077822, "EV_A_TO_RY_BOHR": 0.0388938075966032 } From 4da44a1b6ffd1dade4c7b2df8e49439789fe9612 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Wed, 3 Jun 2026 20:58:00 +0800 Subject: [PATCH 03/19] add build rule to pre-commit --- .husky/pre-commit | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/.husky/pre-commit b/.husky/pre-commit index 0dd790f..f01cf1d 100755 --- a/.husky/pre-commit +++ b/.husky/pre-commit @@ -1,16 +1,18 @@ #!/bin/sh . "$(dirname "$0")/_/husky.sh" -#### Below is an example of how to rebuild JS and PY assets if JSON assets have changed. -# SRC_PATTERN="\.json$" -# if git diff --cached --name-only | grep --quiet -E "$SRC_PATTERN" -# then -# echo "JSON assets changed. Running build scripts." -# echo "Re-building JS and PY assets using JS script." -# npm run build:js-and-python-modules -# fi +SRC_PATTERN="constants\.json$" +if git diff --cached --name-only | grep --quiet -E "$SRC_PATTERN" +then + echo "JSON assets changed. Running build scripts." + # Run your custom script to generate constants.py and constants.js + npm run build:constants + + # Stage the newly generated files + git add src/py/mat3ra/utils/constants.py + git add src/js/shared/constants.js +fi -# Automatically lint staged files in pre-commit hook npm run transpile npx lint-staged git add dist/js From 3a0c91bae49b39a926891c25dddd3b264eb33144 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Wed, 3 Jun 2026 20:59:01 +0800 Subject: [PATCH 04/19] chore: pin actions to commit hash --- .github/workflows/cicd.yml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index af60802..96cdff4 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -15,12 +15,12 @@ jobs: steps: - name: Checkout this repository - uses: actions/checkout@v4 + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 with: lfs: true - name: Checkout actions repository - uses: actions/checkout@v4 + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 with: repository: Exabyte-io/actions token: ${{ secrets.BOT_GITHUB_TOKEN }} @@ -43,12 +43,12 @@ jobs: steps: - name: Checkout this repository - uses: actions/checkout@v4 + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 with: lfs: true - name: Checkout actions repository - uses: actions/checkout@v4 + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 with: repository: Exabyte-io/actions token: ${{ secrets.BOT_GITHUB_TOKEN }} @@ -70,12 +70,12 @@ jobs: steps: - name: Checkout this repository - uses: actions/checkout@v4 + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 with: lfs: true - name: Checkout actions repository - uses: actions/checkout@v4 + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 with: repository: Exabyte-io/actions token: ${{ secrets.BOT_GITHUB_TOKEN }} @@ -102,10 +102,10 @@ jobs: steps: - name: Checkout this repository - uses: actions/checkout@v4 + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 - name: Checkout actions repository - uses: actions/checkout@v4 + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 with: repository: Exabyte-io/actions token: ${{ secrets.BOT_GITHUB_TOKEN }} @@ -128,12 +128,12 @@ jobs: steps: - name: Checkout this repository - uses: actions/checkout@v4 + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 with: lfs: true - name: Checkout actions repository - uses: actions/checkout@v4 + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 with: repository: Exabyte-io/actions token: ${{ secrets.BOT_GITHUB_TOKEN }} From 0582b104400fc2464aeebb126213acdec7d2fce9 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Wed, 3 Jun 2026 23:50:33 +0800 Subject: [PATCH 05/19] add comment removal for fortran lang --- src/py/mat3ra/utils/string.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/py/mat3ra/utils/string.py b/src/py/mat3ra/utils/string.py index 4d75756..6a6346c 100644 --- a/src/py/mat3ra/utils/string.py +++ b/src/py/mat3ra/utils/string.py @@ -60,7 +60,12 @@ def snake_to_camel(snake_case_str: str) -> str: def remove_comments_from_source_code(text: str, language: str = "shell") -> str: - """Removes lines starting with # (except shebang).""" + """Removes comments from source code based on the language.""" + if language == "fortran": + # Removes inline and full-line comments starting with ! or # + return re.sub(r"[!#].*$", "", text, flags=re.MULTILINE) + + # Default (shell): Removes lines starting with # (except shebang) return re.sub(r"^(\s+)?#(?!!).*$", "", text, flags=re.MULTILINE) From 1574df23cb9b0f249ee589e2c338546c4b83a97a Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Thu, 4 Jun 2026 17:33:16 +0800 Subject: [PATCH 06/19] add tests for remove_comments_from_source_code --- tests/py/unit/test_string.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/py/unit/test_string.py b/tests/py/unit/test_string.py index 286b7a4..fc969a5 100644 --- a/tests/py/unit/test_string.py +++ b/tests/py/unit/test_string.py @@ -1,4 +1,5 @@ from mat3ra.utils import string as utils +from mat3ra.utils.string import remove_comments_from_source_code def test_snake_to_camel(): @@ -15,3 +16,30 @@ def test_camel_to_snake(): """ print(utils.camel_to_snake("testCamelToSnake")) assert utils.camel_to_snake("TestCamelToSnake") == "test_camel_to_snake" + + +def test_remove_comments_from_espresso_input(): + espresso_input = """! This is a Quantum ESPRESSO input block +&SYSTEM + ibrav = 2 + celldm(1) = 10.26 + nat = 2 + ntyp = 1 + ecutwfc = 40 # this is an inline comment + ecutrho = 200 +/""" + + cleaned_input = remove_comments_from_source_code(espresso_input, language="fortran") + cleaned_lines = cleaned_input.splitlines() + + # Check that actual code is preserved + assert "&SYSTEM" in cleaned_lines + assert any("ibrav = 2" in line for line in cleaned_lines) + assert any("celldm(1) = 10.26" in line for line in cleaned_lines) + assert any("nat = 2" in line for line in cleaned_lines) + assert any("ntyp = 1" in line for line in cleaned_lines) + assert any("ecutwfc = 40" in line for line in cleaned_lines) + assert any("ecutrho = 200" in line for line in cleaned_lines) + # Check that comments are removed + assert not any("This is a Quantum ESPRESSO input block" in line for line in cleaned_lines) + assert not any("this is an inline comment" in line for line in cleaned_lines) From 6750302ecc103b48f479721e7367ac8ed9efb866 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Mon, 8 Jun 2026 17:54:22 +0800 Subject: [PATCH 07/19] remove inline comments for shell --- src/py/mat3ra/utils/string.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/py/mat3ra/utils/string.py b/src/py/mat3ra/utils/string.py index 6a6346c..5c68b75 100644 --- a/src/py/mat3ra/utils/string.py +++ b/src/py/mat3ra/utils/string.py @@ -60,13 +60,15 @@ def snake_to_camel(snake_case_str: str) -> str: def remove_comments_from_source_code(text: str, language: str = "shell") -> str: - """Removes comments from source code based on the language.""" + """Removes comments from source code based on the language. + TODO: consider preserving values enclosed in quotes + """ if language == "fortran": # Removes inline and full-line comments starting with ! or # return re.sub(r"[!#].*$", "", text, flags=re.MULTILINE) - # Default (shell): Removes lines starting with # (except shebang) - return re.sub(r"^(\s+)?#(?!!).*$", "", text, flags=re.MULTILINE) + # Default (shell): removes inline and full-line # comments (except shebang) + return re.sub(r"#(?!!).*$", "", text, flags=re.MULTILINE) def remove_empty_lines_from_string(text: str) -> str: From 3a80497b01536ba4621b9abb583dd08c2e7864cb Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Mon, 8 Jun 2026 17:55:33 +0800 Subject: [PATCH 08/19] add test for removal of comments from bash script --- tests/py/unit/test_string.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/py/unit/test_string.py b/tests/py/unit/test_string.py index fc969a5..e094281 100644 --- a/tests/py/unit/test_string.py +++ b/tests/py/unit/test_string.py @@ -18,6 +18,25 @@ def test_camel_to_snake(): assert utils.camel_to_snake("TestCamelToSnake") == "test_camel_to_snake" +def test_remove_comments_from_bash_script(): + bash_script = """#!/bin/bash +# This is a bash script header comment +export JOB_NAME="pw_scf" # inline comment +echo "Starting calculation" +""" + + cleaned_script = remove_comments_from_source_code(bash_script) + cleaned_lines = cleaned_script.splitlines() + + # Check that actual code is preserved + assert cleaned_lines[0] == "#!/bin/bash" + assert any('export JOB_NAME="pw_scf"' in line for line in cleaned_lines) + assert any('echo "Starting calculation"' in line for line in cleaned_lines) + # Check that comments are removed + assert not any("bash script header comment" in line for line in cleaned_lines) + assert not any("inline comment" in line for line in cleaned_lines) + + def test_remove_comments_from_espresso_input(): espresso_input = """! This is a Quantum ESPRESSO input block &SYSTEM From 84c792635929268fb646450a08086b6b5d8e70d6 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Tue, 9 Jun 2026 19:28:04 +0800 Subject: [PATCH 09/19] remove inline comments from bash js interface --- dist/js/shared/str.d.ts | 2 +- dist/js/shared/str.js | 4 ++-- src/js/shared/str.js | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dist/js/shared/str.d.ts b/dist/js/shared/str.d.ts index 1c52214..bcda43c 100644 --- a/dist/js/shared/str.d.ts +++ b/dist/js/shared/str.d.ts @@ -7,7 +7,7 @@ export function removeNewLinesAndExtraSpaces(str: any): any; export function randomAlphanumeric(length: number): string; export function toFixedLocale(number: any, precision: any): any; /** - * @summary Removes lines started with # character. Shebang (#!) is excluded. + * @summary Removes full-line and inline comments starting with #. Shebang (#!) is excluded. * @param text {String} text to remove comments from. * @param language {String} programming language of the text. * @return {String} diff --git a/dist/js/shared/str.js b/dist/js/shared/str.js index f2075a6..a47b9db 100644 --- a/dist/js/shared/str.js +++ b/dist/js/shared/str.js @@ -40,14 +40,14 @@ function toFixedLocale(number, precision) { } exports.toFixedLocale = toFixedLocale; /** - * @summary Removes lines started with # character. Shebang (#!) is excluded. + * @summary Removes full-line and inline comments starting with #. Shebang (#!) is excluded. * @param text {String} text to remove comments from. * @param language {String} programming language of the text. * @return {String} */ function removeCommentsFromSourceCode(text, language = "shell") { const regexList = { - shell: /^(\s+)?#(?!!).*$/gm, + shell: /#(?!!).*$/gm, }; return text.replace(regexList[language], ""); } diff --git a/src/js/shared/str.js b/src/js/shared/str.js index ab8c707..b5dd7f9 100644 --- a/src/js/shared/str.js +++ b/src/js/shared/str.js @@ -37,14 +37,14 @@ export function toFixedLocale(number, precision) { } /** - * @summary Removes lines started with # character. Shebang (#!) is excluded. + * @summary Removes full-line and inline comments starting with #. Shebang (#!) is excluded. * @param text {String} text to remove comments from. * @param language {String} programming language of the text. * @return {String} */ export function removeCommentsFromSourceCode(text, language = "shell") { const regexList = { - shell: /^(\s+)?#(?!!).*$/gm, + shell: /#(?!!).*$/gm, }; return text.replace(regexList[language], ""); } From ba15d30843314bc7c86833d52b83966be8cd9442 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Tue, 9 Jun 2026 19:28:48 +0800 Subject: [PATCH 10/19] add js tests for comment removal --- tests/js/str.tests.ts | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/js/str.tests.ts b/tests/js/str.tests.ts index 7330af0..6eb9b85 100644 --- a/tests/js/str.tests.ts +++ b/tests/js/str.tests.ts @@ -6,6 +6,7 @@ import { numberFormat, numberPad, numberPadArray, + removeCommentsFromSourceCode, renderTemplateString, renderTemplateStringWithEval, } from "../../src/js/shared/str"; @@ -77,6 +78,30 @@ describe("Test string template expansion with eval", () => { }); }); +describe("removeCommentsFromSourceCode", () => { + it("should remove comments from bash script", () => { + const bashScript = `#!/bin/bash +# This is a bash script header comment +export JOB_NAME="pw_scf" # inline comment +echo "Starting calculation" +`; + + const cleanedScript = removeCommentsFromSourceCode(bashScript); + const cleanedLines = cleanedScript.split("\n"); + + // Check that actual code is preserved + expect(cleanedLines[0]).to.equal("#!/bin/bash"); + expect(cleanedLines.some((line) => line.includes('export JOB_NAME="pw_scf"'))).to.be + .true; + expect(cleanedLines.some((line) => line.includes('echo "Starting calculation"'))).to.be + .true; + // Check that comments are removed + expect(cleanedLines.some((line) => line.includes("bash script header comment"))).to.be + .false; + expect(cleanedLines.some((line) => line.includes("inline comment"))).to.be.false; + }); +}); + describe("createSafeFilename", () => { it("should convert to lowercase, replace special chars with underscores, and trim", () => { expect(createSafeFilename("My File Name!")).to.equal("my_file_name"); From 0bf4e4ac61763c420b4006619b79e189dfc239a3 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Tue, 9 Jun 2026 19:29:40 +0800 Subject: [PATCH 11/19] fix py test --- tests/py/unit/test_hash.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/py/unit/test_hash.py b/tests/py/unit/test_hash.py index f8430cf..832ef70 100644 --- a/tests/py/unit/test_hash.py +++ b/tests/py/unit/test_hash.py @@ -45,10 +45,10 @@ def test_remove_timestampable_keys(): def test_comment_and_empty_line_stripping_matches_js(): - text = "# comment\n\nx=1\n # indented\n#!/bin/bash\n echo hi # inline\n" + text = "# comment\n\nx=1\n # indented\n#!/bin/bash\necho hi # inline\n" without_comments = remove_comments_from_source_code(text) assert "#!/" in without_comments # shebang preserved - assert "echo hi # inline" in without_comments # inline comment preserved + assert "echo hi # inline" not in without_comments # inline comment removed assert "comment" not in without_comments - assert remove_empty_lines_from_string(without_comments) == "x=1\n#!/bin/bash\n echo hi # inline" + assert remove_empty_lines_from_string(without_comments) == "x=1\n#!/bin/bash\necho hi" From 52a8f38ed612caf2431754c4cfcbe39e99e35cd6 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Wed, 10 Jun 2026 19:59:24 +0800 Subject: [PATCH 12/19] separate regex dict for espresso and fortran for comment removal --- src/py/mat3ra/utils/string.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/py/mat3ra/utils/string.py b/src/py/mat3ra/utils/string.py index 5c68b75..07d6ce0 100644 --- a/src/py/mat3ra/utils/string.py +++ b/src/py/mat3ra/utils/string.py @@ -63,12 +63,12 @@ def remove_comments_from_source_code(text: str, language: str = "shell") -> str: """Removes comments from source code based on the language. TODO: consider preserving values enclosed in quotes """ - if language == "fortran": - # Removes inline and full-line comments starting with ! or # - return re.sub(r"[!#].*$", "", text, flags=re.MULTILINE) - - # Default (shell): removes inline and full-line # comments (except shebang) - return re.sub(r"#(?!!).*$", "", text, flags=re.MULTILINE) + patterns = { + "espresso": r"[!#].*$", # ! or # comments + "fortran": r"!.*$", # ! comments only + "shell": r"#(?!!).*$", # # comments (except shebang) + } + return re.sub(patterns.get(language, patterns["shell"]), "", text, flags=re.MULTILINE) def remove_empty_lines_from_string(text: str) -> str: From cb8af818a4f88580b061c39708479b80c6283e8f Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Wed, 10 Jun 2026 20:00:22 +0800 Subject: [PATCH 13/19] add separate test for fortran comment removal --- tests/py/unit/test_string.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/tests/py/unit/test_string.py b/tests/py/unit/test_string.py index e094281..323e7cb 100644 --- a/tests/py/unit/test_string.py +++ b/tests/py/unit/test_string.py @@ -48,7 +48,7 @@ def test_remove_comments_from_espresso_input(): ecutrho = 200 /""" - cleaned_input = remove_comments_from_source_code(espresso_input, language="fortran") + cleaned_input = remove_comments_from_source_code(espresso_input, language="espresso") cleaned_lines = cleaned_input.splitlines() # Check that actual code is preserved @@ -59,6 +59,28 @@ def test_remove_comments_from_espresso_input(): assert any("ntyp = 1" in line for line in cleaned_lines) assert any("ecutwfc = 40" in line for line in cleaned_lines) assert any("ecutrho = 200" in line for line in cleaned_lines) - # Check that comments are removed + # Check that ! and # comments are removed assert not any("This is a Quantum ESPRESSO input block" in line for line in cleaned_lines) assert not any("this is an inline comment" in line for line in cleaned_lines) + + +def test_remove_comments_from_fortran_source(): + fortran_source = """! This is a fortran header comment +program test + integer :: i ! inline fortran comment + i = 5 # not a fortran comment marker +end program +""" + + cleaned_source = remove_comments_from_source_code(fortran_source, language="fortran") + cleaned_lines = cleaned_source.splitlines() + + # Check that actual code is preserved + assert any("program test" in line for line in cleaned_lines) + assert any("integer :: i" in line for line in cleaned_lines) + assert any("i = 5 # not a fortran comment marker" in line for line in cleaned_lines) + assert any("end program" in line for line in cleaned_lines) + # Check that only ! comments are removed + assert not any("This is a fortran header comment" in line for line in cleaned_lines) + assert not any("inline fortran comment" in line for line in cleaned_lines) + assert any("not a fortran comment marker" in line for line in cleaned_lines) From 78ac542866d5eb6808e5aea1c20574bc168416c2 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Wed, 10 Jun 2026 20:04:56 +0800 Subject: [PATCH 14/19] todo comment --- src/py/mat3ra/utils/string.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/py/mat3ra/utils/string.py b/src/py/mat3ra/utils/string.py index 07d6ce0..7a8b065 100644 --- a/src/py/mat3ra/utils/string.py +++ b/src/py/mat3ra/utils/string.py @@ -61,7 +61,10 @@ def snake_to_camel(snake_case_str: str) -> str: def remove_comments_from_source_code(text: str, language: str = "shell") -> str: """Removes comments from source code based on the language. - TODO: consider preserving values enclosed in quotes + TODO: consider preserving values enclosed in quotes. support for following cases: + url="https://www.example.com/about#company" + message = "Hello, world!" + var = 2 # it's a comment """ patterns = { "espresso": r"[!#].*$", # ! or # comments From 3644c601f27ff7256c9da4afa4b8fe473db1c77a Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Fri, 12 Jun 2026 20:16:43 +0800 Subject: [PATCH 15/19] add a regex search util --- src/py/mat3ra/utils/regex.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/py/mat3ra/utils/regex.py b/src/py/mat3ra/utils/regex.py index e935253..293091d 100644 --- a/src/py/mat3ra/utils/regex.py +++ b/src/py/mat3ra/utils/regex.py @@ -1,4 +1,5 @@ import re +from typing import Union, Any def convert_js_flags_to_python(flags: str) -> int: @@ -22,3 +23,35 @@ def convert_js_flags_to_python(flags: str) -> int: # Note: JavaScript 'y' flag (sticky) has no direct equivalent in Python. return python_flags + + +def regex_search(content: str, pattern: Union[str, re.Pattern], flags: int = 0, find_all: bool = False) -> Any: + """ + Regex search utility using finditer. + + Args: + content: The content to search in. + pattern: The pattern to search for. + flags: The regex flags to use for the search. + find_all: Whether to return all matches or just the first one. + + Returns: + If find_all=True: Returns a generator iterator for all matches. + If find_all=False: Returns the first match object or None. + """ + compiled_pattern = re.compile(pattern, flags) if isinstance(pattern, str) else pattern + match_iterator = compiled_pattern.finditer(content) + + # multi-match mode + if find_all: + return match_iterator + + # single-match mode: return the first occurrence + match = next(match_iterator, None) + if not match: + return None + + # if group_index is not None: + # return match.group(group_index) + + return match From c88d62c22bbd4203875183194677af03476e0f1a Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Fri, 12 Jun 2026 20:54:17 +0800 Subject: [PATCH 16/19] add a regex search by schema --- src/py/mat3ra/utils/regex.py | 53 +++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/src/py/mat3ra/utils/regex.py b/src/py/mat3ra/utils/regex.py index 293091d..de4eb70 100644 --- a/src/py/mat3ra/utils/regex.py +++ b/src/py/mat3ra/utils/regex.py @@ -1,5 +1,5 @@ import re -from typing import Union, Any +from typing import Union, Any, Dict, Optional def convert_js_flags_to_python(flags: str) -> int: @@ -55,3 +55,54 @@ def regex_search(content: str, pattern: Union[str, re.Pattern], flags: int = 0, # return match.group(group_index) return match + +def regex_search_by_schema( + content: str, + schema: Dict[str, Any], + param_replacements: Optional[Dict[str, str]] = None, + find_all: bool = False +) -> Any: + """ + Executes a regex search using a configuration schema block. + The schema is based on the regex repo: + "namelist_block": { + "regex": "&{{BLOCK_NAME}}\\s*([\\s\\S]*?)\\/", + "flags": ["i"], + "params": { + "BLOCK_NAME": [ + "CONTROL", + "SYSTEM", + "ELECTRONS", + "IONS", + "CELL", + "FCP", + "RISM" + ] + } + } + Handles schemas that completely omit the 'flags' key. + """ + regex_pattern = schema["regex"] + + # handle template variable injections (e.g., {{BLOCK_NAME}}) + if param_replacements: + for placeholder, value in param_replacements.items(): + # Validates that the provided value matches allowed parameters in schema + allowed_params = schema.get("params", {}).get(placeholder, []) + if allowed_params and value not in allowed_params: + raise ValueError( + f"Value '{value}' is not an allowed parameter for '{placeholder}'. " + f"Expected one of: {allowed_params}" + ) + regex_pattern = regex_pattern.replace(f"{{{{{placeholder}}}}}", value) + + # handle the flags key (and default to 0 if missing or empty) + schema_flags = schema.get("flags", []) + python_flags = convert_js_flags_to_python("".join(schema_flags)) + + return regex_search( + pattern=regex_pattern, + content=content, + flags=python_flags, + find_all=find_all + ) From 01b7a69314fac1b6202d9263089c6962dadc85fb Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Sat, 13 Jun 2026 12:31:00 +0800 Subject: [PATCH 17/19] remove param_replacements as it is moved to regex lib --- src/py/mat3ra/utils/regex.py | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/src/py/mat3ra/utils/regex.py b/src/py/mat3ra/utils/regex.py index de4eb70..7efa76c 100644 --- a/src/py/mat3ra/utils/regex.py +++ b/src/py/mat3ra/utils/regex.py @@ -1,5 +1,5 @@ import re -from typing import Union, Any, Dict, Optional +from typing import Union, Any, Dict def convert_js_flags_to_python(flags: str) -> int: @@ -59,7 +59,6 @@ def regex_search(content: str, pattern: Union[str, re.Pattern], flags: int = 0, def regex_search_by_schema( content: str, schema: Dict[str, Any], - param_replacements: Optional[Dict[str, str]] = None, find_all: bool = False ) -> Any: """ @@ -67,35 +66,12 @@ def regex_search_by_schema( The schema is based on the regex repo: "namelist_block": { "regex": "&{{BLOCK_NAME}}\\s*([\\s\\S]*?)\\/", - "flags": ["i"], - "params": { - "BLOCK_NAME": [ - "CONTROL", - "SYSTEM", - "ELECTRONS", - "IONS", - "CELL", - "FCP", - "RISM" - ] - } + "flags": ["i"] } Handles schemas that completely omit the 'flags' key. """ regex_pattern = schema["regex"] - # handle template variable injections (e.g., {{BLOCK_NAME}}) - if param_replacements: - for placeholder, value in param_replacements.items(): - # Validates that the provided value matches allowed parameters in schema - allowed_params = schema.get("params", {}).get(placeholder, []) - if allowed_params and value not in allowed_params: - raise ValueError( - f"Value '{value}' is not an allowed parameter for '{placeholder}'. " - f"Expected one of: {allowed_params}" - ) - regex_pattern = regex_pattern.replace(f"{{{{{placeholder}}}}}", value) - # handle the flags key (and default to 0 if missing or empty) schema_flags = schema.get("flags", []) python_flags = convert_js_flags_to_python("".join(schema_flags)) From 4ad29b7081b2c613dbde3a63427bc273adf2507e Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Fri, 19 Jun 2026 17:43:54 +0800 Subject: [PATCH 18/19] remove espress as language for comment removal remove espresso comments by chaining fortran and python comments --- src/py/mat3ra/utils/string.py | 1 - tests/py/unit/test_string.py | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/py/mat3ra/utils/string.py b/src/py/mat3ra/utils/string.py index cda64b1..02d3bec 100644 --- a/src/py/mat3ra/utils/string.py +++ b/src/py/mat3ra/utils/string.py @@ -67,7 +67,6 @@ def remove_comments_from_source_code(text: str, language: str = "shell") -> str: var = 2 # it's a comment """ patterns = { - "espresso": r"[!#].*$", # ! or # comments "fortran": r"!.*$", # ! comments only "python": r"#.*$", "shell": r"#(?!!).*$", # # comments (except shebang) diff --git a/tests/py/unit/test_string.py b/tests/py/unit/test_string.py index 323e7cb..2a82c6e 100644 --- a/tests/py/unit/test_string.py +++ b/tests/py/unit/test_string.py @@ -48,7 +48,9 @@ def test_remove_comments_from_espresso_input(): ecutrho = 200 /""" - cleaned_input = remove_comments_from_source_code(espresso_input, language="espresso") + cleaned_input = remove_comments_from_source_code( + remove_comments_from_source_code(espresso_input, language="fortran"), language="python" + ) cleaned_lines = cleaned_input.splitlines() # Check that actual code is preserved From 5675929ed0b7bc9c7c574d8080cc82d41e74d24d Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Fri, 19 Jun 2026 18:12:40 +0800 Subject: [PATCH 19/19] chore: bump gh actions --- .github/workflows/cicd.yml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 96cdff4..e5fe6ca 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -15,12 +15,12 @@ jobs: steps: - name: Checkout this repository - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: lfs: true - name: Checkout actions repository - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: repository: Exabyte-io/actions token: ${{ secrets.BOT_GITHUB_TOKEN }} @@ -43,12 +43,12 @@ jobs: steps: - name: Checkout this repository - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: lfs: true - name: Checkout actions repository - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: repository: Exabyte-io/actions token: ${{ secrets.BOT_GITHUB_TOKEN }} @@ -70,12 +70,12 @@ jobs: steps: - name: Checkout this repository - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: lfs: true - name: Checkout actions repository - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: repository: Exabyte-io/actions token: ${{ secrets.BOT_GITHUB_TOKEN }} @@ -102,10 +102,10 @@ jobs: steps: - name: Checkout this repository - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 - name: Checkout actions repository - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: repository: Exabyte-io/actions token: ${{ secrets.BOT_GITHUB_TOKEN }} @@ -128,12 +128,12 @@ jobs: steps: - name: Checkout this repository - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: lfs: true - name: Checkout actions repository - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: repository: Exabyte-io/actions token: ${{ secrets.BOT_GITHUB_TOKEN }}