Skip to content
Open
49 changes: 26 additions & 23 deletions source_modelling/srf.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,7 @@ class SrfFile:


points : pd.DataFrame
A list of SrfPoint objects representing the points in the SRF
file. The columns of the points dataframe are:
A dataframe of the points (subfaults) in the SRF file. The columns are:

- lon: longitude of the patch.
- lat: latitude of the patch.
Expand All @@ -198,11 +197,15 @@ class SrfFile:
- area: area of the patch (in cm^2).
- tinit: initial rupture time for this patch (in seconds).
- dt: the timestep for all slipt columns (in seconds).
- vs: shear-wave velocity at the patch (in cm/s). Version 2.0 only.
- den: density at the patch (in g/cm^3). Version 2.0 only.
- rake: local rake.
- slip: total slip.
- slip: total slip (in cm).
- rise: total rise time (in seconds), computed as nt * dt.

The final two columns are computed from the SRF and are not saved to
disk. See the linked documentation on the SRF format for more details.
The vs and den columns are only present when version is "2.0". The
rise column is computed from the SRF and is not written to disk. See
the linked documentation on the SRF format for more details.

slipt1_array : csr_array
A sparse array containing the slip for each point and at each timestep, where
Expand Down Expand Up @@ -234,8 +237,12 @@ def from_file(cls, srf_ffp: Path | str) -> Self:
"""
with open(srf_ffp, mode="r", encoding="utf-8") as srf_file_handle:
version = srf_file_handle.readline().strip()
if version not in {"1.0", "2.0"}:
raise parse_utils.ParseError(f"Unsupported SRF version: {version}")

plane_count_line = srf_file_handle.readline().strip()
while plane_count_line.startswith("#"): # genslip writes comments after the version line
plane_count_line = srf_file_handle.readline().strip()
plane_count_match = re.match(PLANE_COUNT_RE, plane_count_line)
if not plane_count_match:
raise parse_utils.ParseError(
Expand Down Expand Up @@ -274,24 +281,17 @@ def from_file(cls, srf_ffp: Path | str) -> Self:
position = srf_file_handle.tell()

points_metadata, slipt1_array = srf_parser.parse_srf( # type: ignore
str(srf_ffp), position, point_count
str(srf_ffp), position, point_count, version == "2.0"
)

columns = ["lon", "lat", "dep", "stk", "dip", "area", "tinit", "dt"]
Comment thread
AndrewRidden-Harper marked this conversation as resolved.
if version == "2.0":
columns += ["vs", "den"]
columns += ["rake", "slip", "rise"]

points_df = pd.DataFrame(
points_metadata.reshape((-1, 11)),
columns=[
"lon",
"lat",
"dep",
"stk",
"dip",
"area",
"tinit",
"dt",
"rake",
"slip",
"rise",
],
points_metadata.reshape((-1, len(columns))),
columns=columns,
)

return cls(
Expand All @@ -312,7 +312,7 @@ def write_srf(self, srf_ffp: str | Path) -> None:
"""

with open(srf_ffp, mode="w", encoding="utf-8") as srf_file_handle:
srf_file_handle.write("1.0\n")
srf_file_handle.write(f"{self.version}\n")
srf_file_handle.write(f"PLANE {len(self.header)}\n")
# Cannot use self.header.to_string because the newline separating headers is significant!
# This is ok because the number of headers is typically very small (< 100)
Expand Down Expand Up @@ -365,7 +365,7 @@ def write_sw4_hdf5(
) # ty: ignore[invalid-assignment]

# Build POINTS structured array
points_data = np.zeros(len(self.points), dtype=SW4_POINTS_DTYPE)
points_data: np.ndarray = np.zeros(len(self.points), dtype=SW4_POINTS_DTYPE)
assert SW4_POINTS_DTYPE.names is not None
for field in SW4_POINTS_DTYPE.names:
if field in _SW4_POINTS_EXTERNAL_FIELDS:
Expand All @@ -374,7 +374,10 @@ def write_sw4_hdf5(
"slip" if field == "SLIP1" else field.lower()
].values.astype(SW4_POINTS_DTYPE[field].type) # ty: ignore

points_data["NT1"] = np.diff(self.slipt1_array.indptr).astype(np.int32) # ty: ignore[invalid-assignment]
points_data["NT1"] = np.diff(self.slipt1_array.indptr).astype(np.int32)
if self.version == "2.0": # vs/den are mandatory in 2.0; missing columns will fail loudly
points_data["VS"] = self.points["vs"].to_numpy().astype(np.float32)
points_data["DEN"] = self.points["den"].to_numpy().astype(np.float32)

with h5py.File(output_ffp, "w") as h5file:
h5file.attrs.create("VERSION", np.float32(self.version))
Expand Down
28 changes: 25 additions & 3 deletions source_modelling/srf_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ use std::io::BufWriter;
use std::io::Error;
use std::io::Write;

/// Number of per-point metadata quantities returned for each SRF version
/// (version 2.0 adds vs and den).
const NUM_QUANTITIES_V1: usize = 11;
const NUM_QUANTITIES_V2: usize = 13;

#[derive(Default)]
struct SparseMatrix {
row_ptr: Vec<i64>,
Expand Down Expand Up @@ -67,9 +72,17 @@ fn parse_value<T: lexical_core::FromLexical>(
fn read_srf_points(
data: &[u8],
point_count: usize,
read_vs_den: bool,
) -> Result<(Vec<f32>, SparseMatrix), lexical_core::Error> {
let mut index: usize = 0;
let mut metadata = Vec::with_capacity(point_count * 11);
let mut metadata = Vec::with_capacity(
point_count
* if read_vs_den {
NUM_QUANTITIES_V2
} else {
NUM_QUANTITIES_V1
},
);
let mut slipt1 = SparseMatrix::default();

for _ in 0..point_count {
Expand Down Expand Up @@ -97,6 +110,11 @@ fn read_srf_points(
let dt = parse_value::<f32>(data, &mut index)?;
metadata.push(dt);

if read_vs_den {
metadata.push(parse_value::<f32>(data, &mut index)?);
metadata.push(parse_value::<f32>(data, &mut index)?);
}

let rake = parse_value::<f32>(data, &mut index)?;
metadata.push(rake);

Expand Down Expand Up @@ -130,11 +148,12 @@ fn parse_srf(
file_path: &str,
offset: usize,
num_points: usize,
read_vs_den: bool,
) -> PyResult<(Py<PyAny>, Py<PyAny>)> {
let file = File::open(file_path).or_else(marshall_os_error)?;
let mmap = unsafe { MmapOptions::new().map(&file) }.or_else(marshall_os_error)?;
let (metadata, sparse_matrix) =
read_srf_points(&mmap[offset..], num_points).or_else(marshall_value_error)?;
read_srf_points(&mmap[offset..], num_points, read_vs_den).or_else(marshall_value_error)?;

let metadata_array = PyArray1::from_vec(py, metadata);

Expand All @@ -159,7 +178,10 @@ fn write_srf_points(
let row_array = row_ptr.as_slice()?;
let data_array = data.as_slice()?;
let mut buffer = [0u8; BUFFER_SIZE];
let summary_length = 8;
// Data line 1 gets every point column except the trailing three (hence the
// - 3): rake and slip are written on line 2, and the derived rise column is
// not written at all. So summary_length is 8 for v1.0 and 10 (vs, den) for v2.0.
let summary_length = metadata_array.shape()[1] - 3;
Comment thread
AndrewRidden-Harper marked this conversation as resolved.
for (i, row) in metadata_array.outer_iter().enumerate() {
// Write all but last element
for v in row.iter().take(summary_length) {
Expand Down
Binary file added tests/data/test_v2.srf.gz
Binary file not shown.
2 changes: 2 additions & 0 deletions tests/srfs/bad_plane.srf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
1.0
not a plane header
11 changes: 11 additions & 0 deletions tests/srfs/point_source_v2.srf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
2.0
PLANE 1
172.0000 -43.0000 2 1 2.00 1.00
45 80 0.00 0.00 0.50
POINTS 2
172.0000 -43.0000 0.5000 45 80 1.00000e+10 0.0000 1.00000e-01 3.50000e+05 2.70000e+00
90 10.00 2 0.00 0 0.00 0
0.00000e+00 5.00000e+00
172.0100 -43.0100 0.5000 45 80 1.00000e+10 0.1000 1.00000e-01 3.60000e+05 2.80000e+00
90 12.00 2 0.00 0 0.00 0
0.00000e+00 6.00000e+00
190 changes: 190 additions & 0 deletions tests/test_srf.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import gzip
import tempfile
from pathlib import Path

Expand Down Expand Up @@ -367,6 +368,9 @@ def test_junk_srfs():
with pytest.raises(parse_utils.ParseError):
srf.read_srf(SRF_DIR / "no_points.srf")

with pytest.raises(parse_utils.ParseError):
srf.read_srf(SRF_DIR / "bad_plane.srf")


def test_writing_christchurch():
"""Check that writing a copy an SRF produces an SRF with the same values."""
Expand Down Expand Up @@ -562,3 +566,189 @@ def test_sw4_hdf5_read_write(tmp_path: Path):
# Unused slip components stay zero
for zero_field in ("SLIP2", "NT2", "SLIP3", "NT3"):
assert points[zero_field] == pytest.approx(0)


def test_read_srf_v2():
"""Read a minimal hand-written version 2.0 SRF and verify every parsed value.

The 2-point source file is small enough that all the expected values here,
including the complete slip-rate sparse-matrix structure, can be checked by
eye against the file. See test_read_real_srf_v2 for the complementary test
on a real (genslip-generated) version 2.0 SRF.
"""
srf_v2 = srf.read_srf(SRF_DIR / "point_source_v2.srf")
assert srf_v2.version == "2.0"
assert len(srf_v2.points) == 2
assert "vs" in srf_v2.points
assert "den" in srf_v2.points
assert srf_v2.points["vs"].tolist() == pytest.approx([3.5e5, 3.6e5])
assert srf_v2.points["den"].tolist() == pytest.approx([2.7, 2.8])
assert srf_v2.points.iloc[0].to_dict() == pytest.approx(
{
"lon": 172.0,
"lat": -43.0,
"dep": 0.5,
"stk": 45,
"dip": 80,
"area": 1.0e10,
"tinit": 0.0,
"dt": 0.1,
"vs": 3.5e5,
"den": 2.7,
"rake": 90,
"slip": 10.0,
"rise": 0.2,
}
)
# 2 points (rows) x 3 time-step columns. Columns are 0-indexed, so the column
# count is (highest filled column index) + 1 = 2 + 1 = 3 (columns 0, 1, 2).
assert srf_v2.slipt1_array.shape == (2, 3)
# the stored values, row by row; each point starts with a slip-rate of 0.0.
assert srf_v2.slipt1_array.data.tolist() == pytest.approx([0.0, 5.0, 0.0, 6.0])
# each sample's column = floor(tinit / dt) + offset, where offset is the
# sample's index within its point's slip-rate function (0 to nt1 - 1):
# point 0 (floor(0.0/0.1)=0) fills cols 0,1; point 1 (floor(0.1/0.1)=1) fills cols 1,2.
assert srf_v2.slipt1_array.indices.tolist() == [0, 1, 1, 2]
# row boundaries into data/indices: nt1 = 2 per point, so cuts at 0, 2, 4.
assert srf_v2.slipt1_array.indptr.tolist() == [0, 2, 4]


def test_read_real_srf_v2(tmp_path: Path):
"""Read a real genslip-generated version 2.0 SRF end to end.

Complements the hand-verifiable test_read_srf_v2 by covering what only a
real file exercises: the comment lines genslip writes after the version
line, and a full-size (2601-point) rupture. The expected values below are
spot checks transcribed from the first, middle and last point blocks of
the file. Because the parser consumes the file as one sequential token
stream, a correct last point implies it stayed aligned through every
preceding block.
"""
srf_ffp = tmp_path / "test_v2.srf"
srf_ffp.write_bytes(
gzip.decompress((Path(__file__).parent / "data" / "test_v2.srf.gz").read_bytes())
)
real_srf = srf.read_srf(srf_ffp)
assert real_srf.version == "2.0"
assert real_srf.header.iloc[0].to_dict() == pytest.approx(
{
"elon": 176.514603,
"elat": -38.006092,
"nstk": 51,
"ndip": 51,
"len": 5.0699,
"wid": 5.0699,
"stk": 240,
"dip": 88,
"dtop": 0.0,
"shyp": 0.0,
"dhyp": 2.5350,
}
)
assert len(real_srf.points) == 2601
assert real_srf.points.iloc[0].to_dict() == pytest.approx(
{
"lon": 176.539108,
"lat": -37.994919,
"dep": 4.96747e-02,
"stk": 240,
"dip": 88,
"area": 9.88234e07,
"tinit": 5.815377,
"dt": 5.0e-03,
"vs": 3.8e04,
"den": 1.81,
"rake": -16,
"slip": 94.2758,
"rise": 64 * 5.0e-03,
}
)
assert real_srf.points.iloc[1300].to_dict() == pytest.approx(
{
"lon": 176.514099,
"lat": -38.005402,
"dep": 2.53341,
"stk": 240,
"dip": 88,
"area": 9.88234e07,
"tinit": 9.254894e-02,
"dt": 5.0e-03,
"vs": 2.28e05,
"den": 2.40,
"rake": -15,
"slip": 49.4048,
"rise": 59 * 5.0e-03,
}
)
assert real_srf.points.iloc[2600].to_dict() == pytest.approx(
{
"lon": 176.489090,
"lat": -38.015869,
"dep": 5.01714,
"stk": 240,
"dip": 88,
"area": 9.88234e07,
"tinit": 2.567641,
"dt": 5.0e-03,
"vs": 3.6e05,
"den": 2.72,
"rake": -7,
"slip": 15.8105,
"rise": 3 * 5.0e-03,
}
)
# the first point's slip-rate function holds nt1 = 64 samples
assert np.diff(real_srf.slipt1_array.indptr)[0] == 64
assert real_srf.slipt1_array.data[:3] == pytest.approx([0.0, 9.69786, 21.3934])
# the last point's slip-rate function holds nt1 = 3 samples
assert np.diff(real_srf.slipt1_array.indptr)[-1] == 3
assert real_srf.slipt1_array.data[-3:] == pytest.approx([0.0, 3.16209e03, 0.0])


def test_read_srf_v1_has_no_vs_den():
"""Regression: version 1.0 SRFs must not gain vs/den columns."""
christchurch_srf = srf.read_srf(SRF_DIR / "3468575.srf")
assert "vs" not in christchurch_srf.points
assert "den" not in christchurch_srf.points
assert christchurch_srf.points.shape[1] == 11


def test_unsupported_version_srf(tmp_path: Path):
"""An otherwise-valid SRF whose version is neither 1.0 nor 2.0 is rejected."""
bad_srf = tmp_path / "v9.srf"
bad_srf.write_text(
"9.0\n"
"PLANE 1\n"
" 172.0 -43.0 1 1 1.0 1.0\n"
" 45 80 0.0 0.0 0.5\n"
"POINTS 1\n"
" 172.0 -43.0 0.5 45 80 1.0e10 0.0 0.1\n"
" 90 10.0 1 0.0 0 0.0 0\n"
" 0.0\n"
)
with pytest.raises(parse_utils.ParseError):
srf.read_srf(bad_srf)


def test_write_read_srf_v2(tmp_path: Path):
"""Check that writing a version 2.0 SRF round-trips, including vs/den."""
srf_v2 = srf.read_srf(SRF_DIR / "point_source_v2.srf")
out = tmp_path / "roundtrip_v2.srf"
srf.write_srf(out, srf_v2)
reread = srf.read_srf(out)
assert reread.version == "2.0"
assert srf_v2.header.equals(reread.header)
assert srf_v2.points.equals(reread.points)
assert (srf_v2.slip != reread.slip).nnz == 0


def test_sw4_hdf5_v2(tmp_path: Path):
"""Test that write_sw4_hdf5 writes vs/den for a version 2.0 SRF."""
srf_v2 = srf.read_srf(SRF_DIR / "point_source_v2.srf")
out = tmp_path / "v2.h5"
srf_v2.write_sw4_hdf5(out)
with h5py.File(out, "r") as h5file:
assert h5file.attrs["VERSION"] == np.float32(2.0)
points = h5file["POINTS"]
assert points["VS"] == pytest.approx(srf_v2.points["vs"].to_numpy())
assert points["DEN"] == pytest.approx(srf_v2.points["den"].to_numpy())
Loading