From 5df8e62253709c8d0410d3c1b06f17ed1820f4d4 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Sat, 23 May 2026 15:50:08 +0800 Subject: [PATCH 01/19] SOF-7915: refactor espresso structure parser to use pymatgen ASE parser is limited to ibrav==0, requires intermediate POSCAR generation. --- express/parsers/structure.py | 55 +++++++++--------------------------- pyproject.toml | 8 +++--- 2 files changed, 17 insertions(+), 46 deletions(-) diff --git a/express/parsers/structure.py b/express/parsers/structure.py index b1b6c3e6..dfcda6d8 100644 --- a/express/parsers/structure.py +++ b/express/parsers/structure.py @@ -1,9 +1,8 @@ -import io import json import pymatgen as mg from pymatgen.core.structure import Structure -from ase.io import read, write +from pymatgen.io.pwscf import PWInput from jarvis.core.atoms import Atoms from jarvis.io.vasp.inputs import Poscar @@ -38,22 +37,16 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.structure_string = kwargs.get("structure_string") self.structure_format = kwargs.get("structure_format") + self.cell_type = kwargs.get("cell_type", "original") # original, primitive or conventional - # convert espresso input into poscar + # convert espresso input into pymatgen.core.structure if self.structure_format == "espresso-in": - self.structure_format = "poscar" - self.structure_string = self.espresso_input_to_poscar(self.structure_string) - + self.structure = PWInput.from_str(self.structure_string).structure # convert jarvis-db-entry JSON into poscar - if self.structure_format == "jarvis-db-entry": - self.structure_format = "poscar" + elif self.structure_format == "jarvis-db-entry": self.structure_string = self.jarvis_db_entry_json_to_poscar(self.structure_string) - - # cell_type is either original, primitive or conventional - self.cell_type = kwargs.get("cell_type", "original") - - # Initialize structure class - if self.structure_format == "pymatgen.core.structure": + self.structure = Structure.from_str(self.structure_string, "poscar") + elif self.structure_format == "pymatgen.core.structure": structure_as_dict = json.loads(self.structure_string) self.structure = Structure.from_dict(structure_as_dict) else: @@ -63,7 +56,7 @@ def __init__(self, *args, **kwargs): self.structure = STRUCTURE_MAP[self.cell_type](self.structure) # keep only one atom inside the basis in order to have the original lattice type - self.lattice_only_structure = self.structure.copy() # deepcopy + self.lattice_only_structure = self.structure.copy() # deepcopy self.lattice_only_structure.remove_sites(range(1, len(self.structure.sites))) def lattice_vectors(self): @@ -178,16 +171,15 @@ def basis(self): for i, site in enumerate(self.structure.sites): if not site.is_ordered: raise ValueError( - f"Disordered site at {site.frac_coords.tolist()} with " - f"occupancy {site.species} is not supported." + f"Disordered site at {site.frac_coords.tolist()} with " + f"occupancy {site.species} is not supported." ) # Use specie.symbol to strip oxidation state (e.g. "Li0+" → "Li", "O2-" → "O") elements.append({"id": i, "value": site.specie.symbol}) - coordinates.append({ - "id": i, - "value": self._round(site.frac_coords.tolist(), PRECISION_MAP["coordinates_crystal"]) - }) + coordinates.append( + {"id": i, "value": self._round(site.frac_coords.tolist(), PRECISION_MAP["coordinates_crystal"])} + ) return {"units": "crystal", "elements": elements, "coordinates": coordinates} def space_group_symbol(self): @@ -240,27 +232,6 @@ def atomic_constraints(self): """ return self.structure.site_properties.get("selective_dynamics") - def espresso_input_to_poscar(self, espresso_input): - """ - Extracts structure from espresso input file and returns in poscar format. - - Args: - espresso_input (str): input file content - - Returns: - str: poscar - """ - input_ = io.StringIO() - input_.write(espresso_input) - input_.seek(0) - atoms = read(input_, format="espresso-in") - output_ = io.StringIO() - write(output_, atoms, format="vasp", vasp5=True) - content = output_.getvalue() - input_.close() - output_.close() - return content - def jarvis_db_entry_json_to_poscar(self, jarvis_db_entry_json_str): """ Extracts structure from jarvis atoms dictionary and returns in poscar format. diff --git a/pyproject.toml b/pyproject.toml index cd333d3b..c1893fd2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,10 +15,10 @@ classifiers = [ "Topic :: Software Development", ] dependencies = [ - "pymatgen>=2023.8.10", - "ase>=3.17.0", - "mat3ra-esse>=2026.3.25.post0", - "jarvis-tools>=2023.12.12", + "pymatgen>=2025.10.7", + "ase>=3.28.0", + "mat3ra-esse>=2026.5.18.post3", + "jarvis-tools>=2026.4.2", # To avoid module 'numpy.linalg._umath_linalg' has no attribute '_ilp64' in Colab "numpy>=1.24.4,<2", ] From 9829d63f49183fdc0a5b4a2a1d4f9c1ddc8c9e85 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Sat, 23 May 2026 16:27:27 +0800 Subject: [PATCH 02/19] use qe-tools to parse structure from espresso-in --- express/parsers/structure.py | 10 ++++++++-- pyproject.toml | 5 +++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/express/parsers/structure.py b/express/parsers/structure.py index dfcda6d8..e42b6539 100644 --- a/express/parsers/structure.py +++ b/express/parsers/structure.py @@ -2,7 +2,7 @@ import pymatgen as mg from pymatgen.core.structure import Structure -from pymatgen.io.pwscf import PWInput +from qe_tools.parsers import PwInputFile from jarvis.core.atoms import Atoms from jarvis.io.vasp.inputs import Poscar @@ -41,7 +41,13 @@ def __init__(self, *args, **kwargs): # convert espresso input into pymatgen.core.structure if self.structure_format == "espresso-in": - self.structure = PWInput.from_str(self.structure_string).structure + parsed = PwInputFile(self.structure_string) + self.structure = Structure( + lattice=parsed.structure["cell"], + species=parsed.structure["atom_names"], + coords=parsed.structure["positions"], + coords_are_cartesian=True, + ) # convert jarvis-db-entry JSON into poscar elif self.structure_format == "jarvis-db-entry": self.structure_string = self.jarvis_db_entry_json_to_poscar(self.structure_string) diff --git a/pyproject.toml b/pyproject.toml index c1893fd2..b97682c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,12 +15,13 @@ classifiers = [ "Topic :: Software Development", ] dependencies = [ - "pymatgen>=2025.10.7", "ase>=3.28.0", - "mat3ra-esse>=2026.5.18.post3", "jarvis-tools>=2026.4.2", + "mat3ra-esse>=2026.5.18.post3", # To avoid module 'numpy.linalg._umath_linalg' has no attribute '_ilp64' in Colab "numpy>=1.24.4,<2", + "pymatgen>=2025.10.7", + "qe-tools==2.3.0", ] [project.optional-dependencies] From 1a1177d139108098379dd6d0ded55c01d8e7b5db Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Sat, 23 May 2026 17:29:22 +0800 Subject: [PATCH 03/19] add tests for espresso-in parser --- tests/.gitattributes | 1 + tests/fixtures/structural/references.py | 22 ++++++ .../structural/test-005/pw_si_ibrav0.in | 3 + .../structural/test-005/pw_si_ibrav2.in | 3 + tests/unit/parsers/test_structure_parser.py | 70 ++++++++++++++++++- 5 files changed, 96 insertions(+), 3 deletions(-) create mode 100644 tests/fixtures/structural/test-005/pw_si_ibrav0.in create mode 100644 tests/fixtures/structural/test-005/pw_si_ibrav2.in diff --git a/tests/.gitattributes b/tests/.gitattributes index 816d8cd1..9dc05008 100644 --- a/tests/.gitattributes +++ b/tests/.gitattributes @@ -5,3 +5,4 @@ fixtures/vasp/** filter=lfs diff=lfs merge=lfs -text fixtures/data.py filter=lfs diff=lfs merge=lfs -text fixtures/pyML/test-001/*.JSON filter=lfs diff=lfs merge=lfs -text *.cif filter=lfs diff=lfs merge=lfs -text +fixtures/structural/test-005/*.in filter=lfs diff=lfs merge=lfs -text diff --git a/tests/fixtures/structural/references.py b/tests/fixtures/structural/references.py index d49591bc..6d502f7b 100644 --- a/tests/fixtures/structural/references.py +++ b/tests/fixtures/structural/references.py @@ -29,3 +29,25 @@ {"id": 8, "value": [0.333333330, 0.666666670, 0.666666670]}, ], } + +# ibrav=0 silicon with explicit CELL_PARAMETERS +SI_IBRAV0_BASIS = { + "units": "crystal", + "elements": [{"id": 0, "value": "Si"}, {"id": 1, "value": "Si"}], + "coordinates": [ + {"id": 0, "value": [0.0, 0.0, 0.0]}, + {"id": 1, "value": [0.25, 0.25, 0.25]}, + ], +} + +# ibrav=2 with FCC lattice, qe-tools produces symmetry equivalent but non-identical basis +SI_IBRAV2_BASIS = { + "units": "crystal", + "elements": [{"id": 0, "value": "Si"}, {"id": 1, "value": "Si"}], + "coordinates": [ + {"id": 0, "value": [0.0, 0.0, 0.0]}, + {"id": 1, "value": [-0.25, 0.75, -0.25]}, + ], +} + +SI_PRIMITIVE_LATTICE_A = 3.867 # Angstrom diff --git a/tests/fixtures/structural/test-005/pw_si_ibrav0.in b/tests/fixtures/structural/test-005/pw_si_ibrav0.in new file mode 100644 index 00000000..6598c17f --- /dev/null +++ b/tests/fixtures/structural/test-005/pw_si_ibrav0.in @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e67b5144b55e13e136098191e0c30ccb660f9436dd688dc276f8ceaf829622f +size 628 diff --git a/tests/fixtures/structural/test-005/pw_si_ibrav2.in b/tests/fixtures/structural/test-005/pw_si_ibrav2.in new file mode 100644 index 00000000..f33c51cf --- /dev/null +++ b/tests/fixtures/structural/test-005/pw_si_ibrav2.in @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4448b4dd80296a2dda8fe82ed2aa54e12c785d7c0047f1dc2b8632bb05c4605 +size 590 diff --git a/tests/unit/parsers/test_structure_parser.py b/tests/unit/parsers/test_structure_parser.py index e77631ad..0a37781b 100644 --- a/tests/unit/parsers/test_structure_parser.py +++ b/tests/unit/parsers/test_structure_parser.py @@ -1,7 +1,7 @@ import os from tests.unit import UnitTestBase -from tests.fixtures.structural.references import LI_CIF_BASIS +from tests.fixtures.structural.references import LI_CIF_BASIS, SI_IBRAV0_BASIS, SI_IBRAV2_BASIS, SI_PRIMITIVE_LATTICE_A from express.parsers.structure import StructureParser LI_CIF_PATH = os.path.join(os.path.dirname(__file__), "..", "..", "fixtures", "structural", "test-004", "Li.cif") @@ -10,6 +10,14 @@ os.path.dirname(__file__), "..", "..", "fixtures", "structural", "test-004", "SrLaCoO4.cif" ) +SI_IBRAV0_PATH = os.path.join( + os.path.dirname(__file__), "..", "..", "fixtures", "structural", "test-005", "pw_si_ibrav0.in" +) + +SI_IBRAV2_PATH = os.path.join( + os.path.dirname(__file__), "..", "..", "fixtures", "structural", "test-005", "pw_si_ibrav2.in" +) + def _read_file(path): with open(path) as f: @@ -46,12 +54,12 @@ class DisorderedStructureParserTest(UnitTestBase): when basis() is called on a structure with disordered (mixed-occupancy) sites. The SrLaCoO4 CIF has Sr2+ and La3+ sharing the same Wyckoff 4e site - with occupancy 0.5 each — a canonical disordered case. + with occupancy 0.5 each, a disordered case. """ def setUp(self): super().setUp() - # Parsing itself succeeds — pymatgen can load disordered structures. + # Parsing itself succeeds as pymatgen can load disordered structures. # The error is raised lazily when basis() is called. self.parser = StructureParser( structure_string=_read_file(DISORDERED_CIF_PATH), @@ -72,3 +80,59 @@ def test_basis_raises_for_disordered_site(self): self.assertIn("is not supported", error) self.assertIn("occupancy", error) self.assertIn("0.361", error) + + +class EspressoInIbrav0StructureParserTest(UnitTestBase): + """Tests that StructureParser correctly parses espresso-in files with ibrav=0 + and explicit CELL_PARAMETERS. + The primitive lattice constant is 3.867 Angstrom.""" + + def setUp(self): + super().setUp() + self.parser = StructureParser( + structure_string=_read_file(SI_IBRAV0_PATH), + structure_format="espresso-in", + ) + + def tearDown(self): + super().tearDown() + + def test_basis(self): + self.assertDeepAlmostEqual(self.parser.basis(), SI_IBRAV0_BASIS, places=5) + + def test_formula(self): + self.assertEqual(self.parser.formula(), "Si2") + + def test_lattice_bravais_type(self): + self.assertEqual(self.parser.lattice_bravais()["type"], "FCC") + + def test_lattice_parameter_a(self): + self.assertAlmostEqual(self.parser.lattice_bravais()["a"], SI_PRIMITIVE_LATTICE_A, places=3) + + +class EspressoInIbrav2StructureParserTest(UnitTestBase): + """Tests that StructureParser correctly parses espresso-in files with ibrav=2 + (FCC) defined via celldm(1), without explicit CELL_PARAMETERS. + The primitive lattice constant is 3.867 Angstrom.""" + + def setUp(self): + super().setUp() + self.parser = StructureParser( + structure_string=_read_file(SI_IBRAV2_PATH), + structure_format="espresso-in", + ) + + def tearDown(self): + super().tearDown() + + def test_basis(self): + self.assertDeepAlmostEqual(self.parser.basis(), SI_IBRAV2_BASIS, places=5) + + def test_formula(self): + self.assertEqual(self.parser.formula(), "Si2") + + def test_lattice_bravais_type(self): + self.assertEqual(self.parser.lattice_bravais()["type"], "FCC") + + def test_lattice_parameter_a(self): + self.assertAlmostEqual(self.parser.lattice_bravais()["a"], SI_PRIMITIVE_LATTICE_A, places=3) From 9bac1b54daf282c2a8269c4e08a320660ab69cf9 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Thu, 28 May 2026 21:13:55 +0800 Subject: [PATCH 04/19] implement qe pw input parser using made --- .../parsers/apps/espresso/pw_input_file.py | 166 ++++++++++++++++++ express/parsers/structure.py | 2 +- pyproject.toml | 5 +- tests/fixtures/structural/references.py | 13 +- tests/unit/parsers/test_structure_parser.py | 6 +- 5 files changed, 172 insertions(+), 20 deletions(-) create mode 100644 express/parsers/apps/espresso/pw_input_file.py diff --git a/express/parsers/apps/espresso/pw_input_file.py b/express/parsers/apps/espresso/pw_input_file.py new file mode 100644 index 00000000..f106aa85 --- /dev/null +++ b/express/parsers/apps/espresso/pw_input_file.py @@ -0,0 +1,166 @@ +import math +import re +from typing import List, Optional, Tuple + +from mat3ra.esse.models.properties_directory.structural.lattice import LatticeSchema +from mat3ra.made.cell.primitive_cell import get_primitive_lattice_vectors_from_config + +BOHR_TO_ANGSTROM = 0.529177210903 + +# Maps QE ibrav codes → made/esse Bravais type strings +IBRAV_TO_LATTICE_TYPE = { + 1: "CUB", + 2: "FCC", + 3: "BCC", -3: "BCC", + 4: "HEX", + 5: "RHL", -5: "RHL", + 6: "TET", + 7: "BCT", + 8: "ORC", + 9: "ORCC", -9: "ORCC", + 10: "ORCF", + 11: "ORCI", + 12: "MCL", -12: "MCL", + 13: "MCLC", + 14: "TRI", +} + + +def _strip_comments(text: str) -> str: + text = re.sub(r"!.*$", "", text, flags=re.MULTILINE) + text = re.sub(r"#.*$", "", text, flags=re.MULTILINE) + return text + + +def _parse_namelist(text: str, name: str) -> dict: + """Extract key=value pairs from a Fortran namelist block &NAME ... /""" + match = re.search(rf"&{name}\s*([\s\S]*?)\/", text, re.IGNORECASE) + if not match: + return {} + block = match.group(1) + result = {} + # Regular key=value pairs + for k, v in re.findall(r"(\w+)\s*=\s*([^,\n/=]+)", block): + result[k.strip().lower()] = v.strip() + # celldm(N) with explicit index + for n, v in re.findall(r"celldm\s*\(\s*(\d+)\s*\)\s*=\s*([^,\n/]+)", block, re.IGNORECASE): + result[f"celldm{n}"] = v.strip() + return result + + +def _get_cell_from_ibrav(system: dict) -> List[List[float]]: + ibrav = int(system.get("ibrav", 0)) + lattice_type = IBRAV_TO_LATTICE_TYPE.get(ibrav) + if lattice_type is None: + raise ValueError(f"Unsupported ibrav={ibrav}") + + has_celldm = "celldm1" in system + + if has_celldm: + a = float(system["celldm1"]) * BOHR_TO_ANGSTROM + b = a * float(system.get("celldm2", 1)) + c = a * float(system.get("celldm3", 1)) + # celldm(4,5,6) are cosines → convert to degrees + alpha = math.degrees(math.acos(float(system.get("celldm4", 0)))) + beta = math.degrees(math.acos(float(system.get("celldm5", 0)))) + gamma = math.degrees(math.acos(float(system.get("celldm6", 0)))) + else: + a = float(system.get("a", 1)) + b = float(system.get("b", a)) + c = float(system.get("c", a)) + alpha = math.degrees(math.acos(float(system["cosbc"]))) if "cosbc" in system else float(system.get("alpha", 90)) + beta = math.degrees(math.acos(float(system["cosac"]))) if "cosac" in system else float(system.get("beta", 90)) + gamma = math.degrees(math.acos(float(system["cosab"]))) if "cosab" in system else float(system.get("gamma", 90)) + + lattice_config = LatticeSchema(type=lattice_type, a=a, b=b, c=c, alpha=alpha, beta=beta, gamma=gamma) + + return get_primitive_lattice_vectors_from_config(lattice_config) + + +def _parse_cell_parameters(text: str, celldm1_angstrom: Optional[float]) -> List[List[float]]: + match = re.search( + r"CELL_PARAMETERS\s*[{(]?\s*(\w+)\s*[)}]?\s*\n" + r"((?:[ \t]*[-\d.eEdD+]+[ \t]+[-\d.eEdD+]+[ \t]+[-\d.eEdD+]+[ \t]*\n?){3})", + text, re.IGNORECASE, + ) + if not match: + raise ValueError("CELL_PARAMETERS card not found") + units = match.group(1).lower() + rows = [list(map(float, line.split())) for line in match.group(2).strip().splitlines()] + if units == "bohr": + rows = [[v * BOHR_TO_ANGSTROM for v in row] for row in rows] + elif units == "alat": + if not celldm1_angstrom: + raise ValueError("alat units require celldm(1)") + rows = [[v * celldm1_angstrom for v in row] for row in rows] + return rows # angstrom: use as-is + + +def _parse_atomic_positions( + text: str, cell: List[List[float]], celldm1_angstrom: Optional[float] +) -> Tuple[List[str], List[List[float]]]: + match = re.search( + r"ATOMIC_POSITIONS\s*[{(]?\s*(\w+)\s*[)}]?\s*\n" + r"((?:[ \t]*\w+[ \t]+[-\d.eEdD+]+[ \t]+[-\d.eEdD+]+[ \t]+[-\d.eEdD+]+.*\n?)+)", + text, re.IGNORECASE, + ) + if not match: + raise ValueError("ATOMIC_POSITIONS card not found") + units = match.group(1).lower() + names, positions = [], [] + for line in match.group(2).strip().splitlines(): + parts = line.split() + if len(parts) < 4: + continue + symbol = parts[0] + coords = list(map(float, parts[1:4])) + if units == "crystal": + # fractional → Cartesian: coords_cart[j] = sum_i frac[i] * cell[i][j] + coords = [ + sum(coords[i] * cell[i][j] for i in range(3)) + for j in range(3) + ] + elif units == "bohr": + coords = [v * BOHR_TO_ANGSTROM for v in coords] + elif units == "alat": + if not celldm1_angstrom: + raise ValueError("alat units require celldm(1)") + coords = [v * celldm1_angstrom for v in coords] + names.append(symbol) + positions.append(coords) + return names, positions + + +class PwInputFile: + """ + QE pw.x input parser. + Uses get_primitive_lattice_vectors_from_config() from mat3ra.made for ibrav != 0. + + self.structure dict keys match qe-tools PwInputFile.structure: + cell - 3x3 list of lists (Angstrom) + atom_names - list of element symbols + positions - list of Cartesian coords (Angstrom) + """ + + def __init__(self, input_text: str): + text = _strip_comments(input_text) + system = _parse_namelist(text, "SYSTEM") + ibrav = int(system.get("ibrav", 0)) + + celldm1_angstrom = ( + float(system["celldm1"]) * BOHR_TO_ANGSTROM if "celldm1" in system else None + ) + + cell = ( + _parse_cell_parameters(text, celldm1_angstrom) + if ibrav == 0 + else _get_cell_from_ibrav(system) # ← delegates to made + ) + + atom_names, positions = _parse_atomic_positions(text, cell, celldm1_angstrom) + + self.structure = { + "cell": cell, + "atom_names": atom_names, + "positions": positions, + } diff --git a/express/parsers/structure.py b/express/parsers/structure.py index e42b6539..a4c231d7 100644 --- a/express/parsers/structure.py +++ b/express/parsers/structure.py @@ -2,7 +2,7 @@ import pymatgen as mg from pymatgen.core.structure import Structure -from qe_tools.parsers import PwInputFile +from express.parsers.apps.espresso.pw_input_file import PwInputFile from jarvis.core.atoms import Atoms from jarvis.io.vasp.inputs import Poscar diff --git a/pyproject.toml b/pyproject.toml index 2ab645f1..dc3a238d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,14 +15,11 @@ classifiers = [ "Topic :: Software Development", ] dependencies = [ - "pymatgen>=2023.8.10", - "ase>=3.17.0", "mat3ra-esse>=2026.5.27.post0", + "mat3ra-made @ git+https://github.com/exabyte-io/made.git@f80df56b6b404ce1d5482e6d850e60f10041a8f5", "jarvis-tools>=2023.12.12", # To avoid module 'numpy.linalg._umath_linalg' has no attribute '_ilp64' in Colab "numpy>=1.24.4,<2", - "pymatgen>=2025.10.7", - "qe-tools==2.3.0", ] [project.optional-dependencies] diff --git a/tests/fixtures/structural/references.py b/tests/fixtures/structural/references.py index 6d502f7b..6d26ed96 100644 --- a/tests/fixtures/structural/references.py +++ b/tests/fixtures/structural/references.py @@ -30,8 +30,7 @@ ], } -# ibrav=0 silicon with explicit CELL_PARAMETERS -SI_IBRAV0_BASIS = { +SI_IBRAV_BASIS = { "units": "crystal", "elements": [{"id": 0, "value": "Si"}, {"id": 1, "value": "Si"}], "coordinates": [ @@ -40,14 +39,4 @@ ], } -# ibrav=2 with FCC lattice, qe-tools produces symmetry equivalent but non-identical basis -SI_IBRAV2_BASIS = { - "units": "crystal", - "elements": [{"id": 0, "value": "Si"}, {"id": 1, "value": "Si"}], - "coordinates": [ - {"id": 0, "value": [0.0, 0.0, 0.0]}, - {"id": 1, "value": [-0.25, 0.75, -0.25]}, - ], -} - SI_PRIMITIVE_LATTICE_A = 3.867 # Angstrom diff --git a/tests/unit/parsers/test_structure_parser.py b/tests/unit/parsers/test_structure_parser.py index 0a37781b..969f269a 100644 --- a/tests/unit/parsers/test_structure_parser.py +++ b/tests/unit/parsers/test_structure_parser.py @@ -1,7 +1,7 @@ import os from tests.unit import UnitTestBase -from tests.fixtures.structural.references import LI_CIF_BASIS, SI_IBRAV0_BASIS, SI_IBRAV2_BASIS, SI_PRIMITIVE_LATTICE_A +from tests.fixtures.structural.references import LI_CIF_BASIS, SI_IBRAV_BASIS, SI_PRIMITIVE_LATTICE_A from express.parsers.structure import StructureParser LI_CIF_PATH = os.path.join(os.path.dirname(__file__), "..", "..", "fixtures", "structural", "test-004", "Li.cif") @@ -98,7 +98,7 @@ def tearDown(self): super().tearDown() def test_basis(self): - self.assertDeepAlmostEqual(self.parser.basis(), SI_IBRAV0_BASIS, places=5) + self.assertDeepAlmostEqual(self.parser.basis(), SI_IBRAV_BASIS, places=5) def test_formula(self): self.assertEqual(self.parser.formula(), "Si2") @@ -126,7 +126,7 @@ def tearDown(self): super().tearDown() def test_basis(self): - self.assertDeepAlmostEqual(self.parser.basis(), SI_IBRAV2_BASIS, places=5) + self.assertDeepAlmostEqual(self.parser.basis(), SI_IBRAV_BASIS, places=5) def test_formula(self): self.assertEqual(self.parser.formula(), "Si2") From e141f3cb088b3da597ba57c8d14db373650e216b Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Thu, 28 May 2026 21:18:12 +0800 Subject: [PATCH 05/19] fix made[tools] dependency --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index dc3a238d..ef94061e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ classifiers = [ ] dependencies = [ "mat3ra-esse>=2026.5.27.post0", - "mat3ra-made @ git+https://github.com/exabyte-io/made.git@f80df56b6b404ce1d5482e6d850e60f10041a8f5", + "mat3ra-made[tools] @ git+https://github.com/exabyte-io/made.git@f80df56b6b404ce1d5482e6d850e60f10041a8f5", "jarvis-tools>=2023.12.12", # To avoid module 'numpy.linalg._umath_linalg' has no attribute '_ilp64' in Colab "numpy>=1.24.4,<2", From 52a13cc167b7464e0f1c343ed6204cb7e4bc35e5 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Thu, 28 May 2026 22:04:41 +0800 Subject: [PATCH 06/19] bump made --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ef94061e..0978a7e2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ classifiers = [ ] dependencies = [ "mat3ra-esse>=2026.5.27.post0", - "mat3ra-made[tools] @ git+https://github.com/exabyte-io/made.git@f80df56b6b404ce1d5482e6d850e60f10041a8f5", + "mat3ra-made[tools] @ git+https://github.com/exabyte-io/made.git@190beb96c97d066396a5433b4a640442c49a73f9", "jarvis-tools>=2023.12.12", # To avoid module 'numpy.linalg._umath_linalg' has no attribute '_ilp64' in Colab "numpy>=1.24.4,<2", From 94555a98b61f531ed8008748a735ae102ccc37f5 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Wed, 3 Jun 2026 23:35:28 +0800 Subject: [PATCH 07/19] use BOHR_TO_ANGSTROM contstant via utils --- express/parsers/apps/espresso/pw_input_file.py | 11 +++++------ pyproject.toml | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/express/parsers/apps/espresso/pw_input_file.py b/express/parsers/apps/espresso/pw_input_file.py index f106aa85..b1781f87 100644 --- a/express/parsers/apps/espresso/pw_input_file.py +++ b/express/parsers/apps/espresso/pw_input_file.py @@ -4,8 +4,7 @@ from mat3ra.esse.models.properties_directory.structural.lattice import LatticeSchema from mat3ra.made.cell.primitive_cell import get_primitive_lattice_vectors_from_config - -BOHR_TO_ANGSTROM = 0.529177210903 +from mat3ra.utils.constants import COEFFICIENTS # Maps QE ibrav codes → made/esse Bravais type strings IBRAV_TO_LATTICE_TYPE = { @@ -57,7 +56,7 @@ def _get_cell_from_ibrav(system: dict) -> List[List[float]]: has_celldm = "celldm1" in system if has_celldm: - a = float(system["celldm1"]) * BOHR_TO_ANGSTROM + a = float(system["celldm1"]) * COEFFICIENTS["BOHR_TO_ANGSTROM"] b = a * float(system.get("celldm2", 1)) c = a * float(system.get("celldm3", 1)) # celldm(4,5,6) are cosines → convert to degrees @@ -88,7 +87,7 @@ def _parse_cell_parameters(text: str, celldm1_angstrom: Optional[float]) -> List units = match.group(1).lower() rows = [list(map(float, line.split())) for line in match.group(2).strip().splitlines()] if units == "bohr": - rows = [[v * BOHR_TO_ANGSTROM for v in row] for row in rows] + rows = [[v * COEFFICIENTS["BOHR_TO_ANGSTROM"] for v in row] for row in rows] elif units == "alat": if not celldm1_angstrom: raise ValueError("alat units require celldm(1)") @@ -121,7 +120,7 @@ def _parse_atomic_positions( for j in range(3) ] elif units == "bohr": - coords = [v * BOHR_TO_ANGSTROM for v in coords] + coords = [v * COEFFICIENTS["BOHR_TO_ANGSTROM"] for v in coords] elif units == "alat": if not celldm1_angstrom: raise ValueError("alat units require celldm(1)") @@ -148,7 +147,7 @@ def __init__(self, input_text: str): ibrav = int(system.get("ibrav", 0)) celldm1_angstrom = ( - float(system["celldm1"]) * BOHR_TO_ANGSTROM if "celldm1" in system else None + float(system["celldm1"]) * COEFFICIENTS["BOHR_TO_ANGSTROM"] if "celldm1" in system else None ) cell = ( diff --git a/pyproject.toml b/pyproject.toml index 0978a7e2..00546194 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ classifiers = [ ] dependencies = [ "mat3ra-esse>=2026.5.27.post0", - "mat3ra-made[tools] @ git+https://github.com/exabyte-io/made.git@190beb96c97d066396a5433b4a640442c49a73f9", + "mat3ra-made[tools] @ git+https://github.com/exabyte-io/made.git@e5fa168b942f73623fed986dde7a5ba55c6f9821", "jarvis-tools>=2023.12.12", # To avoid module 'numpy.linalg._umath_linalg' has no attribute '_ilp64' in Colab "numpy>=1.24.4,<2", From a93bb057d85fb3cf15de7d4fdc3dbbe891be4b81 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Thu, 4 Jun 2026 00:00:29 +0800 Subject: [PATCH 08/19] replace strip_comments with remove_comments_from_source_code from mat3ra-utils --- express/parsers/apps/espresso/pw_input_file.py | 9 ++------- pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/express/parsers/apps/espresso/pw_input_file.py b/express/parsers/apps/espresso/pw_input_file.py index b1781f87..e55efbdb 100644 --- a/express/parsers/apps/espresso/pw_input_file.py +++ b/express/parsers/apps/espresso/pw_input_file.py @@ -5,6 +5,7 @@ from mat3ra.esse.models.properties_directory.structural.lattice import LatticeSchema from mat3ra.made.cell.primitive_cell import get_primitive_lattice_vectors_from_config from mat3ra.utils.constants import COEFFICIENTS +from mat3ra.utils.string import remove_comments_from_source_code # Maps QE ibrav codes → made/esse Bravais type strings IBRAV_TO_LATTICE_TYPE = { @@ -25,12 +26,6 @@ } -def _strip_comments(text: str) -> str: - text = re.sub(r"!.*$", "", text, flags=re.MULTILINE) - text = re.sub(r"#.*$", "", text, flags=re.MULTILINE) - return text - - def _parse_namelist(text: str, name: str) -> dict: """Extract key=value pairs from a Fortran namelist block &NAME ... /""" match = re.search(rf"&{name}\s*([\s\S]*?)\/", text, re.IGNORECASE) @@ -142,7 +137,7 @@ class PwInputFile: """ def __init__(self, input_text: str): - text = _strip_comments(input_text) + text = remove_comments_from_source_code(input_text, language="fortran") system = _parse_namelist(text, "SYSTEM") ibrav = int(system.get("ibrav", 0)) diff --git a/pyproject.toml b/pyproject.toml index 00546194..c3f02310 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ classifiers = [ ] dependencies = [ "mat3ra-esse>=2026.5.27.post0", - "mat3ra-made[tools] @ git+https://github.com/exabyte-io/made.git@e5fa168b942f73623fed986dde7a5ba55c6f9821", + "mat3ra-made[tools] @ git+https://github.com/exabyte-io/made.git@9f24bff67b63dc5d4c55c0c89509e72743ad1258", "jarvis-tools>=2023.12.12", # To avoid module 'numpy.linalg._umath_linalg' has no attribute '_ilp64' in Colab "numpy>=1.24.4,<2", From b019cab728dbacdae17ef5072f817db1f57474f9 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Sat, 6 Jun 2026 00:30:36 +0800 Subject: [PATCH 09/19] refactor after moving parsers to parsers repo --- .../parsers/apps/espresso/pw_input_file.py | 95 ++----------------- pyproject.toml | 3 +- 2 files changed, 11 insertions(+), 87 deletions(-) diff --git a/express/parsers/apps/espresso/pw_input_file.py b/express/parsers/apps/espresso/pw_input_file.py index e55efbdb..4c8ecab6 100644 --- a/express/parsers/apps/espresso/pw_input_file.py +++ b/express/parsers/apps/espresso/pw_input_file.py @@ -1,11 +1,11 @@ import math -import re -from typing import List, Optional, Tuple +from typing import List from mat3ra.esse.models.properties_directory.structural.lattice import LatticeSchema from mat3ra.made.cell.primitive_cell import get_primitive_lattice_vectors_from_config from mat3ra.utils.constants import COEFFICIENTS from mat3ra.utils.string import remove_comments_from_source_code +from mat3ra.parsers.applications.espresso.pw_x.stdin.parser import EspressoPwxStdinParser # Maps QE ibrav codes → made/esse Bravais type strings IBRAV_TO_LATTICE_TYPE = { @@ -26,22 +26,6 @@ } -def _parse_namelist(text: str, name: str) -> dict: - """Extract key=value pairs from a Fortran namelist block &NAME ... /""" - match = re.search(rf"&{name}\s*([\s\S]*?)\/", text, re.IGNORECASE) - if not match: - return {} - block = match.group(1) - result = {} - # Regular key=value pairs - for k, v in re.findall(r"(\w+)\s*=\s*([^,\n/=]+)", block): - result[k.strip().lower()] = v.strip() - # celldm(N) with explicit index - for n, v in re.findall(r"celldm\s*\(\s*(\d+)\s*\)\s*=\s*([^,\n/]+)", block, re.IGNORECASE): - result[f"celldm{n}"] = v.strip() - return result - - def _get_cell_from_ibrav(system: dict) -> List[List[float]]: ibrav = int(system.get("ibrav", 0)) lattice_type = IBRAV_TO_LATTICE_TYPE.get(ibrav) @@ -71,87 +55,26 @@ def _get_cell_from_ibrav(system: dict) -> List[List[float]]: return get_primitive_lattice_vectors_from_config(lattice_config) -def _parse_cell_parameters(text: str, celldm1_angstrom: Optional[float]) -> List[List[float]]: - match = re.search( - r"CELL_PARAMETERS\s*[{(]?\s*(\w+)\s*[)}]?\s*\n" - r"((?:[ \t]*[-\d.eEdD+]+[ \t]+[-\d.eEdD+]+[ \t]+[-\d.eEdD+]+[ \t]*\n?){3})", - text, re.IGNORECASE, - ) - if not match: - raise ValueError("CELL_PARAMETERS card not found") - units = match.group(1).lower() - rows = [list(map(float, line.split())) for line in match.group(2).strip().splitlines()] - if units == "bohr": - rows = [[v * COEFFICIENTS["BOHR_TO_ANGSTROM"] for v in row] for row in rows] - elif units == "alat": - if not celldm1_angstrom: - raise ValueError("alat units require celldm(1)") - rows = [[v * celldm1_angstrom for v in row] for row in rows] - return rows # angstrom: use as-is - - -def _parse_atomic_positions( - text: str, cell: List[List[float]], celldm1_angstrom: Optional[float] -) -> Tuple[List[str], List[List[float]]]: - match = re.search( - r"ATOMIC_POSITIONS\s*[{(]?\s*(\w+)\s*[)}]?\s*\n" - r"((?:[ \t]*\w+[ \t]+[-\d.eEdD+]+[ \t]+[-\d.eEdD+]+[ \t]+[-\d.eEdD+]+.*\n?)+)", - text, re.IGNORECASE, - ) - if not match: - raise ValueError("ATOMIC_POSITIONS card not found") - units = match.group(1).lower() - names, positions = [], [] - for line in match.group(2).strip().splitlines(): - parts = line.split() - if len(parts) < 4: - continue - symbol = parts[0] - coords = list(map(float, parts[1:4])) - if units == "crystal": - # fractional → Cartesian: coords_cart[j] = sum_i frac[i] * cell[i][j] - coords = [ - sum(coords[i] * cell[i][j] for i in range(3)) - for j in range(3) - ] - elif units == "bohr": - coords = [v * COEFFICIENTS["BOHR_TO_ANGSTROM"] for v in coords] - elif units == "alat": - if not celldm1_angstrom: - raise ValueError("alat units require celldm(1)") - coords = [v * celldm1_angstrom for v in coords] - names.append(symbol) - positions.append(coords) - return names, positions - - class PwInputFile: - """ - QE pw.x input parser. - Uses get_primitive_lattice_vectors_from_config() from mat3ra.made for ibrav != 0. - - self.structure dict keys match qe-tools PwInputFile.structure: - cell - 3x3 list of lists (Angstrom) - atom_names - list of element symbols - positions - list of Cartesian coords (Angstrom) - """ - def __init__(self, input_text: str): text = remove_comments_from_source_code(input_text, language="fortran") - system = _parse_namelist(text, "SYSTEM") + parser = EspressoPwxStdinParser(text) + + system = parser.get_namelist_as_dict("SYSTEM") ibrav = int(system.get("ibrav", 0)) celldm1_angstrom = ( float(system["celldm1"]) * COEFFICIENTS["BOHR_TO_ANGSTROM"] if "celldm1" in system else None ) + # Delegate to the new parser method if ibrav == 0, else build from ibrav cell = ( - _parse_cell_parameters(text, celldm1_angstrom) + parser.get_card_cell_parameters(celldm1_angstrom) if ibrav == 0 - else _get_cell_from_ibrav(system) # ← delegates to made + else _get_cell_from_ibrav(system) ) - atom_names, positions = _parse_atomic_positions(text, cell, celldm1_angstrom) + atom_names, positions = parser.get_card_atomic_positions(cell, celldm1_angstrom) self.structure = { "cell": cell, diff --git a/pyproject.toml b/pyproject.toml index c3f02310..c4fb6923 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,8 @@ classifiers = [ ] dependencies = [ "mat3ra-esse>=2026.5.27.post0", - "mat3ra-made[tools] @ git+https://github.com/exabyte-io/made.git@9f24bff67b63dc5d4c55c0c89509e72743ad1258", + "mat3ra-made[tools] @ git+https://github.com/exabyte-io/made.git@97cd8a9a9f9898446ebffc5881919ef236849a1b", + "mat3ra-parsers @ git+https://github.com/exabyte-io/parsers.git@d850566958a32b323ee345a93c6d57d4caf9a359", "jarvis-tools>=2023.12.12", # To avoid module 'numpy.linalg._umath_linalg' has no attribute '_ilp64' in Colab "numpy>=1.24.4,<2", From 9e2b20eb7d736b8412d60f81bda27183900ff423 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Fri, 12 Jun 2026 22:15:53 +0800 Subject: [PATCH 10/19] use updated parser to get_namelist --- express/parsers/apps/espresso/pw_input_file.py | 14 ++++---------- pyproject.toml | 4 ++-- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/express/parsers/apps/espresso/pw_input_file.py b/express/parsers/apps/espresso/pw_input_file.py index 4c8ecab6..624c235f 100644 --- a/express/parsers/apps/espresso/pw_input_file.py +++ b/express/parsers/apps/espresso/pw_input_file.py @@ -60,19 +60,13 @@ def __init__(self, input_text: str): text = remove_comments_from_source_code(input_text, language="fortran") parser = EspressoPwxStdinParser(text) - system = parser.get_namelist_as_dict("SYSTEM") + system = parser.get_namelist("SYSTEM") ibrav = int(system.get("ibrav", 0)) - celldm1_angstrom = ( - float(system["celldm1"]) * COEFFICIENTS["BOHR_TO_ANGSTROM"] if "celldm1" in system else None - ) + celldm1_angstrom = float(system["celldm1"]) * COEFFICIENTS["BOHR_TO_ANGSTROM"] if "celldm1" in system else None - # Delegate to the new parser method if ibrav == 0, else build from ibrav - cell = ( - parser.get_card_cell_parameters(celldm1_angstrom) - if ibrav == 0 - else _get_cell_from_ibrav(system) - ) + # Delegate crystal lattice calculation based on ibrav value + cell = parser.get_card_cell_parameters(celldm1_angstrom) if ibrav == 0 else _get_cell_from_ibrav(system) atom_names, positions = parser.get_card_atomic_positions(cell, celldm1_angstrom) diff --git a/pyproject.toml b/pyproject.toml index c4fb6923..038f99e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,8 +16,8 @@ classifiers = [ ] dependencies = [ "mat3ra-esse>=2026.5.27.post0", - "mat3ra-made[tools] @ git+https://github.com/exabyte-io/made.git@97cd8a9a9f9898446ebffc5881919ef236849a1b", - "mat3ra-parsers @ git+https://github.com/exabyte-io/parsers.git@d850566958a32b323ee345a93c6d57d4caf9a359", + "mat3ra-made[tools] @ git+https://github.com/exabyte-io/made.git@774c0c2c54c5f175291ea026411de1ee8405d825", + "mat3ra-parsers @ git+https://github.com/exabyte-io/parsers.git@6f8601a2bdd3aac155f9743b2c53dfc73c4c69a4", "jarvis-tools>=2023.12.12", # To avoid module 'numpy.linalg._umath_linalg' has no attribute '_ilp64' in Colab "numpy>=1.24.4,<2", From 7be6036fd661b8b9011f5eed6a862d002f86124c Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Sat, 13 Jun 2026 13:12:41 +0800 Subject: [PATCH 11/19] refactor parser changes --- express/parsers/apps/espresso/pw_input_file.py | 4 ++-- pyproject.toml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/express/parsers/apps/espresso/pw_input_file.py b/express/parsers/apps/espresso/pw_input_file.py index 624c235f..b6d10897 100644 --- a/express/parsers/apps/espresso/pw_input_file.py +++ b/express/parsers/apps/espresso/pw_input_file.py @@ -5,7 +5,7 @@ from mat3ra.made.cell.primitive_cell import get_primitive_lattice_vectors_from_config from mat3ra.utils.constants import COEFFICIENTS from mat3ra.utils.string import remove_comments_from_source_code -from mat3ra.parsers.applications.espresso.pw_x.stdin.parser import EspressoPwxStdinParser +from mat3ra.parsers.applications.espresso.pwin import EspressoPwinParser # Maps QE ibrav codes → made/esse Bravais type strings IBRAV_TO_LATTICE_TYPE = { @@ -58,7 +58,7 @@ def _get_cell_from_ibrav(system: dict) -> List[List[float]]: class PwInputFile: def __init__(self, input_text: str): text = remove_comments_from_source_code(input_text, language="fortran") - parser = EspressoPwxStdinParser(text) + parser = EspressoPwinParser(text) system = parser.get_namelist("SYSTEM") ibrav = int(system.get("ibrav", 0)) diff --git a/pyproject.toml b/pyproject.toml index e7f22491..2b08459e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,8 +16,8 @@ classifiers = [ ] dependencies = [ "mat3ra-esse>=2026.6.12", - "mat3ra-made[tools] @ git+https://github.com/exabyte-io/made.git@774c0c2c54c5f175291ea026411de1ee8405d825", - "mat3ra-parsers @ git+https://github.com/exabyte-io/parsers.git@6f8601a2bdd3aac155f9743b2c53dfc73c4c69a4", + "mat3ra-made[tools] @ git+https://github.com/exabyte-io/made.git@1b18d807cea0d34b4551baa1da6cc2827298193b", + "mat3ra-parsers @ git+https://github.com/exabyte-io/parsers.git@63f0bf323035a1c9b294494af3accdb0333baf38", "jarvis-tools>=2023.12.12", # To avoid module 'numpy.linalg._umath_linalg' has no attribute '_ilp64' in Colab "numpy>=1.24.4,<2", From 04c1f910f2135aef9411464753e5a3156e484697 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Sat, 13 Jun 2026 13:44:18 +0800 Subject: [PATCH 12/19] bump parser --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2b08459e..0859a1ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ classifiers = [ dependencies = [ "mat3ra-esse>=2026.6.12", "mat3ra-made[tools] @ git+https://github.com/exabyte-io/made.git@1b18d807cea0d34b4551baa1da6cc2827298193b", - "mat3ra-parsers @ git+https://github.com/exabyte-io/parsers.git@63f0bf323035a1c9b294494af3accdb0333baf38", + "mat3ra-parsers @ git+https://github.com/exabyte-io/parsers.git@372a7fd31ee27ada9216221a970fce608878a5a8", "jarvis-tools>=2023.12.12", # To avoid module 'numpy.linalg._umath_linalg' has no attribute '_ilp64' in Colab "numpy>=1.24.4,<2", From 13da531b83402c10f23835b7a4347c5d7f4f02aa Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Sat, 13 Jun 2026 15:42:12 +0800 Subject: [PATCH 13/19] bump parsers --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0859a1ec..1bd2ade0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ classifiers = [ dependencies = [ "mat3ra-esse>=2026.6.12", "mat3ra-made[tools] @ git+https://github.com/exabyte-io/made.git@1b18d807cea0d34b4551baa1da6cc2827298193b", - "mat3ra-parsers @ git+https://github.com/exabyte-io/parsers.git@372a7fd31ee27ada9216221a970fce608878a5a8", + "mat3ra-parsers @ git+https://github.com/exabyte-io/parsers.git@0edfda94b01e357aafc8851873c13d7b688857c1", "jarvis-tools>=2023.12.12", # To avoid module 'numpy.linalg._umath_linalg' has no attribute '_ilp64' in Colab "numpy>=1.24.4,<2", From c8a5252f893c664bb813c49a7fae78c6c9a4f24d Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Sat, 13 Jun 2026 21:49:07 +0800 Subject: [PATCH 14/19] update parsers --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1bd2ade0..3dab5fb1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ classifiers = [ dependencies = [ "mat3ra-esse>=2026.6.12", "mat3ra-made[tools] @ git+https://github.com/exabyte-io/made.git@1b18d807cea0d34b4551baa1da6cc2827298193b", - "mat3ra-parsers @ git+https://github.com/exabyte-io/parsers.git@0edfda94b01e357aafc8851873c13d7b688857c1", + "mat3ra-parsers @ git+https://github.com/exabyte-io/parsers.git@988851514c1a4ca387def281ee7a63b4f70435f8", "jarvis-tools>=2023.12.12", # To avoid module 'numpy.linalg._umath_linalg' has no attribute '_ilp64' in Colab "numpy>=1.24.4,<2", From c0be22841062adba27c54b5d72df0a389cb2c57a Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Sat, 13 Jun 2026 22:17:02 +0800 Subject: [PATCH 15/19] update parsers --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3dab5fb1..2e9d5afc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ classifiers = [ dependencies = [ "mat3ra-esse>=2026.6.12", "mat3ra-made[tools] @ git+https://github.com/exabyte-io/made.git@1b18d807cea0d34b4551baa1da6cc2827298193b", - "mat3ra-parsers @ git+https://github.com/exabyte-io/parsers.git@988851514c1a4ca387def281ee7a63b4f70435f8", + "mat3ra-parsers @ git+https://github.com/exabyte-io/parsers.git@b50bb00e41be4682c8420288fbd0f731808cdbb8", "jarvis-tools>=2023.12.12", # To avoid module 'numpy.linalg._umath_linalg' has no attribute '_ilp64' in Colab "numpy>=1.24.4,<2", From 01565af4f5f1090b055c806cd52968ea3c979c50 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Sat, 20 Jun 2026 00:55:31 +0800 Subject: [PATCH 16/19] update parser class --- express/parsers/apps/espresso/pw_input_file.py | 4 ++-- pyproject.toml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/express/parsers/apps/espresso/pw_input_file.py b/express/parsers/apps/espresso/pw_input_file.py index b6d10897..624c235f 100644 --- a/express/parsers/apps/espresso/pw_input_file.py +++ b/express/parsers/apps/espresso/pw_input_file.py @@ -5,7 +5,7 @@ from mat3ra.made.cell.primitive_cell import get_primitive_lattice_vectors_from_config from mat3ra.utils.constants import COEFFICIENTS from mat3ra.utils.string import remove_comments_from_source_code -from mat3ra.parsers.applications.espresso.pwin import EspressoPwinParser +from mat3ra.parsers.applications.espresso.pw_x.stdin.parser import EspressoPwxStdinParser # Maps QE ibrav codes → made/esse Bravais type strings IBRAV_TO_LATTICE_TYPE = { @@ -58,7 +58,7 @@ def _get_cell_from_ibrav(system: dict) -> List[List[float]]: class PwInputFile: def __init__(self, input_text: str): text = remove_comments_from_source_code(input_text, language="fortran") - parser = EspressoPwinParser(text) + parser = EspressoPwxStdinParser(text) system = parser.get_namelist("SYSTEM") ibrav = int(system.get("ibrav", 0)) diff --git a/pyproject.toml b/pyproject.toml index 2e9d5afc..a13a1a21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,8 +16,8 @@ classifiers = [ ] dependencies = [ "mat3ra-esse>=2026.6.12", - "mat3ra-made[tools] @ git+https://github.com/exabyte-io/made.git@1b18d807cea0d34b4551baa1da6cc2827298193b", - "mat3ra-parsers @ git+https://github.com/exabyte-io/parsers.git@b50bb00e41be4682c8420288fbd0f731808cdbb8", + "mat3ra-made[tools] @ git+https://github.com/exabyte-io/made.git@bdfa37a451f0364cee1602bcec723fb878fdfc0a", + "mat3ra-parsers @ git+https://github.com/exabyte-io/parsers.git@30182b7f967024d31ff03dc39dcbdc33faf89722", "jarvis-tools>=2023.12.12", # To avoid module 'numpy.linalg._umath_linalg' has no attribute '_ilp64' in Colab "numpy>=1.24.4,<2", From 59ea4b1404eee695a88999a1b2eccdf7570bc1e5 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Sat, 20 Jun 2026 00:59:45 +0800 Subject: [PATCH 17/19] remove espresso comments by chaining fortran and python comments --- express/parsers/apps/espresso/pw_input_file.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/express/parsers/apps/espresso/pw_input_file.py b/express/parsers/apps/espresso/pw_input_file.py index 624c235f..d96c289a 100644 --- a/express/parsers/apps/espresso/pw_input_file.py +++ b/express/parsers/apps/espresso/pw_input_file.py @@ -57,7 +57,9 @@ def _get_cell_from_ibrav(system: dict) -> List[List[float]]: class PwInputFile: def __init__(self, input_text: str): - text = remove_comments_from_source_code(input_text, language="fortran") + text = remove_comments_from_source_code( + remove_comments_from_source_code(input_text, language="fortran"), language="python" + ) parser = EspressoPwxStdinParser(text) system = parser.get_namelist("SYSTEM") From 26d8d618b18ff1277372eed8b36d1ec9c273d6f0 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Sat, 20 Jun 2026 01:00:03 +0800 Subject: [PATCH 18/19] chore: bump gh actions/checkout --- .github/workflows/cicd.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 698755f8..5696ef99 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -16,12 +16,12 @@ jobs: steps: - name: Checkout this repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: lfs: true - name: Checkout actions repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: repository: Exabyte-io/actions token: ${{ secrets.BOT_GITHUB_TOKEN }} @@ -43,12 +43,12 @@ jobs: steps: - name: Checkout this repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: lfs: true - name: Checkout actions repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: repository: Exabyte-io/actions token: ${{ secrets.BOT_GITHUB_TOKEN }} @@ -71,13 +71,13 @@ jobs: steps: - name: Checkout this repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: lfs: true fetch-tags: true - name: Checkout actions repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: repository: Exabyte-io/actions token: ${{ secrets.BOT_GITHUB_TOKEN }} From 6980e3af8fb940a7c51907f656897855d16ce79d Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Sat, 20 Jun 2026 10:59:08 +0800 Subject: [PATCH 19/19] update parsers lib --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a13a1a21..7d94a56b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ classifiers = [ dependencies = [ "mat3ra-esse>=2026.6.12", "mat3ra-made[tools] @ git+https://github.com/exabyte-io/made.git@bdfa37a451f0364cee1602bcec723fb878fdfc0a", - "mat3ra-parsers @ git+https://github.com/exabyte-io/parsers.git@30182b7f967024d31ff03dc39dcbdc33faf89722", + "mat3ra-parsers @ git+https://github.com/exabyte-io/parsers.git@09184533d422d47945073849106bdf1ac5c878ac", "jarvis-tools>=2023.12.12", # To avoid module 'numpy.linalg._umath_linalg' has no attribute '_ilp64' in Colab "numpy>=1.24.4,<2",