-
Notifications
You must be signed in to change notification settings - Fork 12
[WIP] Handle hex36 indices #663
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
0aa83f3
ceba9b1
425be62
b5ea786
83ba020
a220ced
4ee605b
b86b2da
042f0a6
1d27e15
a89bc19
9c3ce0a
ca4766b
9662b3e
8fd337a
840ac35
5950398
4ef38a7
ffd7883
f3a3964
f74750c
5fae83e
f3cda78
a547062
b731143
bb142d8
38d2d7d
ac062bb
20a5c08
3e9237f
027fbe7
7cbc27c
acd737e
ebbbca7
a76ee19
dcfb355
6ad338d
3832453
4bcea73
fbffc98
58ee35b
9592af6
5323908
0461a33
793ced4
dc36336
f18b9d2
c1ab249
ac085a6
6310b98
1ca2ef4
154ba94
2d7cd19
1a3490f
22aeb9c
c9d04fd
04ed4fd
66ed92f
47d50cd
77f411b
20d0dcd
07ccc81
c766096
07bced8
7a8966e
ec13568
bd5989c
aea6f83
e2f5931
07f0387
32c3055
99782b7
fb12f90
78b10c1
52a8d16
f16a508
35aa82e
7ce298f
511de64
d4e5948
66eda24
00de01b
216f3b4
8d5166b
0a4a5a7
882716d
b0e5c95
36d3225
133811b
d473138
e0019d3
b766d5c
436d3cf
0812fdf
5e8918b
a0e46e2
2186d47
e9892a4
73ebdc5
28fce52
313d302
907448c
ecc6219
2a52600
eeda36f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,46 @@ | ||
| from io import StringIO | ||
|
|
||
| from gufe.vendor.pdb_file.pdbstructure import PdbStructure, _parse_atom_index | ||
|
|
||
|
|
||
| def test_hex_conect_parsing(): | ||
| pdb_snippet = """ATOM 99999 C LIG A 1 0.000 0.000 0.000 1.00 0.00 C | ||
| ATOM A000F N LIG A 2 1.000 0.000 0.000 1.00 0.00 N | ||
| ATOM A000G O LIG A 3 0.000 1.000 0.000 1.00 0.00 O | ||
| CONECT99999A000FA000G""" | ||
|
|
||
| f = StringIO(pdb_snippet) | ||
| pdb = PdbStructure(f, load_all_models=True) | ||
|
|
||
| # Collect all atom serial numbers, including Maestro-style | ||
| atom_serials = [atom.serial_number for atom in pdb.iter_atoms(use_all_models=True)] | ||
|
|
||
| # There should be 3 atoms | ||
| assert len(atom_serials) == 3 | ||
|
|
||
| # All serial numbers should be integers and unique | ||
| for serial in atom_serials: | ||
| assert isinstance(serial, int) | ||
| assert len(set(atom_serials)) == 3 | ||
|
|
||
| # Convert the known Maestro-style indices to integers | ||
| a000f_serial = _parse_atom_index("A000F") | ||
| a000g_serial = _parse_atom_index("A000G") | ||
| assert a000f_serial in atom_serials | ||
| assert a000g_serial in atom_serials | ||
|
|
||
| # Check that CONECT records refer to the correct integers | ||
| conects = pdb._current_model.connects | ||
| assert len(conects) == 1 | ||
| central, bonded1, bonded2 = conects[0] | ||
| assert central == 99999 | ||
| # The bonded atoms match the converted Maestro-style serials | ||
| assert bonded1 in [a000f_serial, a000g_serial] | ||
| assert bonded2 in [a000f_serial, a000g_serial] | ||
| assert bonded1 != bonded2 | ||
|
|
||
| known_hex_serials = ["A000G", "A000F"] | ||
| for hex_serial in known_hex_serials: | ||
| idx = _parse_atom_index(hex_serial) | ||
| assert isinstance(idx, int) | ||
| assert idx >= 100000 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -165,13 +165,16 @@ def _load(self, input_stream): | |
| if command == "ATOM " or command == "HETATM": | ||
| self._add_atom(Atom(pdb_line, self, self.extraParticleIdentifier)) | ||
| elif command == "CONECT": | ||
| atoms = [_parse_atom_index(pdb_line[6:11])] | ||
| for pos in (11, 16, 21, 26): | ||
| try: | ||
| atoms.append(_parse_atom_index(pdb_line[pos : pos + 5])) | ||
| except: | ||
| pass | ||
| self._current_model.connects.append(atoms) | ||
| try: | ||
| atoms = [_parse_atom_index(pdb_line[6:11])] | ||
| for pos in (11, 16, 21, 26): | ||
| try: | ||
| atoms.append(_parse_atom_index(pdb_line[pos : pos + 5])) | ||
| except: | ||
| pass | ||
| self._current_model.connects.append(atoms) | ||
| except: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should catch an exception here |
||
| pass | ||
| # Notice MODEL punctuation, for the next level of detail | ||
| # in the structure->model->chain->residue->atom->position hierarchy | ||
| elif pdb_line[:5] == "MODEL": | ||
|
|
@@ -1060,12 +1063,27 @@ def __str__(self): | |
| return str(self.position) | ||
|
|
||
|
|
||
| def _parse_atom_index(index): | ||
| """Parse the string containing an atom index, which might be either decimal or hex.""" | ||
| try: | ||
| return int(index) | ||
| except: | ||
| return int(index, 16) - 0xA0000 + 100000 | ||
| def _parse_atom_index(index: str) -> int: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd rather this function raise a custom exception that we then catch when we call it instead of a bare except |
||
| """ | ||
| Parse an atom serial index from a PDB file, supporting: | ||
| - Decimal numbers (e.g., 12345) | ||
| - Standard hex (0–9, A–F) | ||
| - Maestro-style extended hex (letters beyond F, e.g., A000G) | ||
| """ | ||
| index = index.strip() | ||
|
|
||
| # Try decimal, then hex, then Maestro base36 | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. MH: check for a G to see what base we are using, and then use the same base for the entire file and we should be fine
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to add a bit more from slack: It also might be nice to have the parsing function take in a base as an argument, so that if a user supplies us a base, we can just use that value. We can then use another function to guess the base if a user doesn't give us one, then pass that base to the parsing function. |
||
| for base in (10, 16, 36): | ||
| try: | ||
| val = int(index, base) | ||
| if val >= 0xA0000: | ||
| val = val - 0xA0000 + 100000 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we add some |
||
| return val | ||
| except ValueError: | ||
| continue | ||
|
|
||
| raise ValueError(f"Unable to parse atom index: '{index}'") | ||
|
|
||
|
|
||
|
|
||
| # run module directly for testing | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should catch an exception here