From 5ea19997dfcd9dac910025c3d82b2ee5208405e8 Mon Sep 17 00:00:00 2001 From: Fausto Milletari Date: Mon, 8 Jun 2026 18:26:24 +0000 Subject: [PATCH] Write _atom_site.occupancy in mmCIF output (#331) Biotite only emits an _atom_site column when the corresponding annotation is present on the AtomArray. Our writers set b_factor but not occupancy, so the occupancy column was omitted entirely. This broke downstream parsers that require it (e.g. BioPython's MMCIFParser, which raises KeyError: '_atom_site.occupancy'). Set occupancy=1.0 on atoms in ProteinChain.atom_array / atom_array_no_insertions (also covers ProteinComplex, which reuses chain.atom_array) and on the manually-built AtomArray in MolecularComplex.to_mmcif. Co-Authored-By: Claude Opus 4.8 (1M context) --- esm/utils/structure/molecular_complex.py | 4 ++++ esm/utils/structure/protein_chain.py | 2 ++ 2 files changed, 6 insertions(+) diff --git a/esm/utils/structure/molecular_complex.py b/esm/utils/structure/molecular_complex.py index dc679d83..16c8d73d 100644 --- a/esm/utils/structure/molecular_complex.py +++ b/esm/utils/structure/molecular_complex.py @@ -870,6 +870,10 @@ def to_mmcif(self) -> str: atom_array.atom_name = np.array(atom_names, dtype="U4") atom_array.add_annotation("b_factor", dtype=float) atom_array.b_factor = atom_bfactors + atom_array.add_annotation("occupancy", dtype=float) + atom_array.occupancy = np.ones( + n_atoms, dtype=np.float32 + ) # Necessary for BioPython MMCIFParser atom_array.add_annotation("entity_id", dtype=int) atom_array.entity_id = atom_entity_ids diff --git a/esm/utils/structure/protein_chain.py b/esm/utils/structure/protein_chain.py index 08f39df2..a4872d93 100644 --- a/esm/utils/structure/protein_chain.py +++ b/esm/utils/structure/protein_chain.py @@ -227,6 +227,7 @@ def atom_array(self) -> bs.AtomArray: atom_name=residue_constants.atom_types[i], element=residue_constants.atom_types[i][0], b_factor=float(b_factor), + occupancy=1.0, # Necessary for BioPython MMCIFParser ) atoms.append(atom) return bs.array(atoms) @@ -262,6 +263,7 @@ def atom_array_no_insertions(self) -> bs.AtomArray: atom_name=residue_constants.atom_types[i], element=residue_constants.atom_types[i][0], b_factor=float(b_factor), + occupancy=1.0, # Necessary for BioPython MMCIFParser ) atoms.append(atom) return bs.array(atoms)