diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 698755f8..5696ef99 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -16,12 +16,12 @@ jobs: steps: - name: Checkout this repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: lfs: true - name: Checkout actions repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: repository: Exabyte-io/actions token: ${{ secrets.BOT_GITHUB_TOKEN }} @@ -43,12 +43,12 @@ jobs: steps: - name: Checkout this repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: lfs: true - name: Checkout actions repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: repository: Exabyte-io/actions token: ${{ secrets.BOT_GITHUB_TOKEN }} @@ -71,13 +71,13 @@ jobs: steps: - name: Checkout this repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: lfs: true fetch-tags: true - name: Checkout actions repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: repository: Exabyte-io/actions token: ${{ secrets.BOT_GITHUB_TOKEN }} diff --git a/express/parsers/apps/espresso/pw_input_file.py b/express/parsers/apps/espresso/pw_input_file.py new file mode 100644 index 00000000..d96c289a --- /dev/null +++ b/express/parsers/apps/espresso/pw_input_file.py @@ -0,0 +1,79 @@ +import math +from typing import List + +from mat3ra.esse.models.properties_directory.structural.lattice import LatticeSchema +from mat3ra.made.cell.primitive_cell import get_primitive_lattice_vectors_from_config +from mat3ra.utils.constants import COEFFICIENTS +from mat3ra.utils.string import remove_comments_from_source_code +from mat3ra.parsers.applications.espresso.pw_x.stdin.parser import EspressoPwxStdinParser + +# Maps QE ibrav codes → made/esse Bravais type strings +IBRAV_TO_LATTICE_TYPE = { + 1: "CUB", + 2: "FCC", + 3: "BCC", -3: "BCC", + 4: "HEX", + 5: "RHL", -5: "RHL", + 6: "TET", + 7: "BCT", + 8: "ORC", + 9: "ORCC", -9: "ORCC", + 10: "ORCF", + 11: "ORCI", + 12: "MCL", -12: "MCL", + 13: "MCLC", + 14: "TRI", +} + + +def _get_cell_from_ibrav(system: dict) -> List[List[float]]: + ibrav = int(system.get("ibrav", 0)) + lattice_type = IBRAV_TO_LATTICE_TYPE.get(ibrav) + if lattice_type is None: + raise ValueError(f"Unsupported ibrav={ibrav}") + + has_celldm = "celldm1" in system + + if has_celldm: + a = float(system["celldm1"]) * COEFFICIENTS["BOHR_TO_ANGSTROM"] + b = a * float(system.get("celldm2", 1)) + c = a * float(system.get("celldm3", 1)) + # celldm(4,5,6) are cosines → convert to degrees + alpha = math.degrees(math.acos(float(system.get("celldm4", 0)))) + beta = math.degrees(math.acos(float(system.get("celldm5", 0)))) + gamma = math.degrees(math.acos(float(system.get("celldm6", 0)))) + else: + a = float(system.get("a", 1)) + b = float(system.get("b", a)) + c = float(system.get("c", a)) + alpha = math.degrees(math.acos(float(system["cosbc"]))) if "cosbc" in system else float(system.get("alpha", 90)) + beta = math.degrees(math.acos(float(system["cosac"]))) if "cosac" in system else float(system.get("beta", 90)) + gamma = math.degrees(math.acos(float(system["cosab"]))) if "cosab" in system else float(system.get("gamma", 90)) + + lattice_config = LatticeSchema(type=lattice_type, a=a, b=b, c=c, alpha=alpha, beta=beta, gamma=gamma) + + return get_primitive_lattice_vectors_from_config(lattice_config) + + +class PwInputFile: + def __init__(self, input_text: str): + text = remove_comments_from_source_code( + remove_comments_from_source_code(input_text, language="fortran"), language="python" + ) + parser = EspressoPwxStdinParser(text) + + system = parser.get_namelist("SYSTEM") + ibrav = int(system.get("ibrav", 0)) + + celldm1_angstrom = float(system["celldm1"]) * COEFFICIENTS["BOHR_TO_ANGSTROM"] if "celldm1" in system else None + + # Delegate crystal lattice calculation based on ibrav value + cell = parser.get_card_cell_parameters(celldm1_angstrom) if ibrav == 0 else _get_cell_from_ibrav(system) + + atom_names, positions = parser.get_card_atomic_positions(cell, celldm1_angstrom) + + self.structure = { + "cell": cell, + "atom_names": atom_names, + "positions": positions, + } diff --git a/express/parsers/structure.py b/express/parsers/structure.py index b1b6c3e6..a4c231d7 100644 --- a/express/parsers/structure.py +++ b/express/parsers/structure.py @@ -1,9 +1,8 @@ -import io import json import pymatgen as mg from pymatgen.core.structure import Structure -from ase.io import read, write +from express.parsers.apps.espresso.pw_input_file import PwInputFile from jarvis.core.atoms import Atoms from jarvis.io.vasp.inputs import Poscar @@ -38,22 +37,22 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.structure_string = kwargs.get("structure_string") self.structure_format = kwargs.get("structure_format") + self.cell_type = kwargs.get("cell_type", "original") # original, primitive or conventional - # convert espresso input into poscar + # convert espresso input into pymatgen.core.structure if self.structure_format == "espresso-in": - self.structure_format = "poscar" - self.structure_string = self.espresso_input_to_poscar(self.structure_string) - + parsed = PwInputFile(self.structure_string) + self.structure = Structure( + lattice=parsed.structure["cell"], + species=parsed.structure["atom_names"], + coords=parsed.structure["positions"], + coords_are_cartesian=True, + ) # convert jarvis-db-entry JSON into poscar - if self.structure_format == "jarvis-db-entry": - self.structure_format = "poscar" + elif self.structure_format == "jarvis-db-entry": self.structure_string = self.jarvis_db_entry_json_to_poscar(self.structure_string) - - # cell_type is either original, primitive or conventional - self.cell_type = kwargs.get("cell_type", "original") - - # Initialize structure class - if self.structure_format == "pymatgen.core.structure": + self.structure = Structure.from_str(self.structure_string, "poscar") + elif self.structure_format == "pymatgen.core.structure": structure_as_dict = json.loads(self.structure_string) self.structure = Structure.from_dict(structure_as_dict) else: @@ -63,7 +62,7 @@ def __init__(self, *args, **kwargs): self.structure = STRUCTURE_MAP[self.cell_type](self.structure) # keep only one atom inside the basis in order to have the original lattice type - self.lattice_only_structure = self.structure.copy() # deepcopy + self.lattice_only_structure = self.structure.copy() # deepcopy self.lattice_only_structure.remove_sites(range(1, len(self.structure.sites))) def lattice_vectors(self): @@ -178,16 +177,15 @@ def basis(self): for i, site in enumerate(self.structure.sites): if not site.is_ordered: raise ValueError( - f"Disordered site at {site.frac_coords.tolist()} with " - f"occupancy {site.species} is not supported." + f"Disordered site at {site.frac_coords.tolist()} with " + f"occupancy {site.species} is not supported." ) # Use specie.symbol to strip oxidation state (e.g. "Li0+" → "Li", "O2-" → "O") elements.append({"id": i, "value": site.specie.symbol}) - coordinates.append({ - "id": i, - "value": self._round(site.frac_coords.tolist(), PRECISION_MAP["coordinates_crystal"]) - }) + coordinates.append( + {"id": i, "value": self._round(site.frac_coords.tolist(), PRECISION_MAP["coordinates_crystal"])} + ) return {"units": "crystal", "elements": elements, "coordinates": coordinates} def space_group_symbol(self): @@ -240,27 +238,6 @@ def atomic_constraints(self): """ return self.structure.site_properties.get("selective_dynamics") - def espresso_input_to_poscar(self, espresso_input): - """ - Extracts structure from espresso input file and returns in poscar format. - - Args: - espresso_input (str): input file content - - Returns: - str: poscar - """ - input_ = io.StringIO() - input_.write(espresso_input) - input_.seek(0) - atoms = read(input_, format="espresso-in") - output_ = io.StringIO() - write(output_, atoms, format="vasp", vasp5=True) - content = output_.getvalue() - input_.close() - output_.close() - return content - def jarvis_db_entry_json_to_poscar(self, jarvis_db_entry_json_str): """ Extracts structure from jarvis atoms dictionary and returns in poscar format. diff --git a/pyproject.toml b/pyproject.toml index 89653c2f..7d94a56b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,9 +15,9 @@ classifiers = [ "Topic :: Software Development", ] dependencies = [ - "pymatgen>=2023.8.10", - "ase>=3.17.0", "mat3ra-esse>=2026.6.12", + "mat3ra-made[tools] @ git+https://github.com/exabyte-io/made.git@bdfa37a451f0364cee1602bcec723fb878fdfc0a", + "mat3ra-parsers @ git+https://github.com/exabyte-io/parsers.git@09184533d422d47945073849106bdf1ac5c878ac", "jarvis-tools>=2023.12.12", # To avoid module 'numpy.linalg._umath_linalg' has no attribute '_ilp64' in Colab "numpy>=1.24.4,<2", diff --git a/tests/.gitattributes b/tests/.gitattributes index 816d8cd1..9dc05008 100644 --- a/tests/.gitattributes +++ b/tests/.gitattributes @@ -5,3 +5,4 @@ fixtures/vasp/** filter=lfs diff=lfs merge=lfs -text fixtures/data.py filter=lfs diff=lfs merge=lfs -text fixtures/pyML/test-001/*.JSON filter=lfs diff=lfs merge=lfs -text *.cif filter=lfs diff=lfs merge=lfs -text +fixtures/structural/test-005/*.in filter=lfs diff=lfs merge=lfs -text diff --git a/tests/fixtures/structural/references.py b/tests/fixtures/structural/references.py index d49591bc..6d26ed96 100644 --- a/tests/fixtures/structural/references.py +++ b/tests/fixtures/structural/references.py @@ -29,3 +29,14 @@ {"id": 8, "value": [0.333333330, 0.666666670, 0.666666670]}, ], } + +SI_IBRAV_BASIS = { + "units": "crystal", + "elements": [{"id": 0, "value": "Si"}, {"id": 1, "value": "Si"}], + "coordinates": [ + {"id": 0, "value": [0.0, 0.0, 0.0]}, + {"id": 1, "value": [0.25, 0.25, 0.25]}, + ], +} + +SI_PRIMITIVE_LATTICE_A = 3.867 # Angstrom diff --git a/tests/fixtures/structural/test-005/pw_si_ibrav0.in b/tests/fixtures/structural/test-005/pw_si_ibrav0.in new file mode 100644 index 00000000..6598c17f --- /dev/null +++ b/tests/fixtures/structural/test-005/pw_si_ibrav0.in @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e67b5144b55e13e136098191e0c30ccb660f9436dd688dc276f8ceaf829622f +size 628 diff --git a/tests/fixtures/structural/test-005/pw_si_ibrav2.in b/tests/fixtures/structural/test-005/pw_si_ibrav2.in new file mode 100644 index 00000000..f33c51cf --- /dev/null +++ b/tests/fixtures/structural/test-005/pw_si_ibrav2.in @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4448b4dd80296a2dda8fe82ed2aa54e12c785d7c0047f1dc2b8632bb05c4605 +size 590 diff --git a/tests/unit/parsers/test_structure_parser.py b/tests/unit/parsers/test_structure_parser.py index e77631ad..969f269a 100644 --- a/tests/unit/parsers/test_structure_parser.py +++ b/tests/unit/parsers/test_structure_parser.py @@ -1,7 +1,7 @@ import os from tests.unit import UnitTestBase -from tests.fixtures.structural.references import LI_CIF_BASIS +from tests.fixtures.structural.references import LI_CIF_BASIS, SI_IBRAV_BASIS, SI_PRIMITIVE_LATTICE_A from express.parsers.structure import StructureParser LI_CIF_PATH = os.path.join(os.path.dirname(__file__), "..", "..", "fixtures", "structural", "test-004", "Li.cif") @@ -10,6 +10,14 @@ os.path.dirname(__file__), "..", "..", "fixtures", "structural", "test-004", "SrLaCoO4.cif" ) +SI_IBRAV0_PATH = os.path.join( + os.path.dirname(__file__), "..", "..", "fixtures", "structural", "test-005", "pw_si_ibrav0.in" +) + +SI_IBRAV2_PATH = os.path.join( + os.path.dirname(__file__), "..", "..", "fixtures", "structural", "test-005", "pw_si_ibrav2.in" +) + def _read_file(path): with open(path) as f: @@ -46,12 +54,12 @@ class DisorderedStructureParserTest(UnitTestBase): when basis() is called on a structure with disordered (mixed-occupancy) sites. The SrLaCoO4 CIF has Sr2+ and La3+ sharing the same Wyckoff 4e site - with occupancy 0.5 each — a canonical disordered case. + with occupancy 0.5 each, a disordered case. """ def setUp(self): super().setUp() - # Parsing itself succeeds — pymatgen can load disordered structures. + # Parsing itself succeeds as pymatgen can load disordered structures. # The error is raised lazily when basis() is called. self.parser = StructureParser( structure_string=_read_file(DISORDERED_CIF_PATH), @@ -72,3 +80,59 @@ def test_basis_raises_for_disordered_site(self): self.assertIn("is not supported", error) self.assertIn("occupancy", error) self.assertIn("0.361", error) + + +class EspressoInIbrav0StructureParserTest(UnitTestBase): + """Tests that StructureParser correctly parses espresso-in files with ibrav=0 + and explicit CELL_PARAMETERS. + The primitive lattice constant is 3.867 Angstrom.""" + + def setUp(self): + super().setUp() + self.parser = StructureParser( + structure_string=_read_file(SI_IBRAV0_PATH), + structure_format="espresso-in", + ) + + def tearDown(self): + super().tearDown() + + def test_basis(self): + self.assertDeepAlmostEqual(self.parser.basis(), SI_IBRAV_BASIS, places=5) + + def test_formula(self): + self.assertEqual(self.parser.formula(), "Si2") + + def test_lattice_bravais_type(self): + self.assertEqual(self.parser.lattice_bravais()["type"], "FCC") + + def test_lattice_parameter_a(self): + self.assertAlmostEqual(self.parser.lattice_bravais()["a"], SI_PRIMITIVE_LATTICE_A, places=3) + + +class EspressoInIbrav2StructureParserTest(UnitTestBase): + """Tests that StructureParser correctly parses espresso-in files with ibrav=2 + (FCC) defined via celldm(1), without explicit CELL_PARAMETERS. + The primitive lattice constant is 3.867 Angstrom.""" + + def setUp(self): + super().setUp() + self.parser = StructureParser( + structure_string=_read_file(SI_IBRAV2_PATH), + structure_format="espresso-in", + ) + + def tearDown(self): + super().tearDown() + + def test_basis(self): + self.assertDeepAlmostEqual(self.parser.basis(), SI_IBRAV_BASIS, places=5) + + def test_formula(self): + self.assertEqual(self.parser.formula(), "Si2") + + def test_lattice_bravais_type(self): + self.assertEqual(self.parser.lattice_bravais()["type"], "FCC") + + def test_lattice_parameter_a(self): + self.assertAlmostEqual(self.parser.lattice_bravais()["a"], SI_PRIMITIVE_LATTICE_A, places=3)