diff --git a/source_modelling/srf.py b/source_modelling/srf.py index 8102c23c..ea57f5bc 100644 --- a/source_modelling/srf.py +++ b/source_modelling/srf.py @@ -187,8 +187,7 @@ class SrfFile: points : pd.DataFrame - A list of SrfPoint objects representing the points in the SRF - file. The columns of the points dataframe are: + A dataframe of the points (subfaults) in the SRF file. The columns are: - lon: longitude of the patch. - lat: latitude of the patch. @@ -198,11 +197,15 @@ class SrfFile: - area: area of the patch (in cm^2). - tinit: initial rupture time for this patch (in seconds). - dt: the timestep for all slipt columns (in seconds). + - vs: shear-wave velocity at the patch (in cm/s). Version 2.0 only. + - den: density at the patch (in g/cm^3). Version 2.0 only. - rake: local rake. - - slip: total slip. + - slip: total slip (in cm). + - rise: total rise time (in seconds), computed as nt * dt. - The final two columns are computed from the SRF and are not saved to - disk. See the linked documentation on the SRF format for more details. + The vs and den columns are only present when version is "2.0". The + rise column is computed from the SRF and is not written to disk. See + the linked documentation on the SRF format for more details. slipt1_array : csr_array A sparse array containing the slip for each point and at each timestep, where @@ -234,8 +237,12 @@ def from_file(cls, srf_ffp: Path | str) -> Self: """ with open(srf_ffp, mode="r", encoding="utf-8") as srf_file_handle: version = srf_file_handle.readline().strip() + if version not in {"1.0", "2.0"}: + raise parse_utils.ParseError(f"Unsupported SRF version: {version}") plane_count_line = srf_file_handle.readline().strip() + while plane_count_line.startswith("#"): # genslip writes comments after the version line + plane_count_line = srf_file_handle.readline().strip() plane_count_match = re.match(PLANE_COUNT_RE, plane_count_line) if not plane_count_match: raise parse_utils.ParseError( @@ -274,24 +281,17 @@ def from_file(cls, srf_ffp: Path | str) -> Self: position = srf_file_handle.tell() points_metadata, slipt1_array = srf_parser.parse_srf( # type: ignore - str(srf_ffp), position, point_count + str(srf_ffp), position, point_count, version == "2.0" ) + columns = ["lon", "lat", "dep", "stk", "dip", "area", "tinit", "dt"] + if version == "2.0": + columns += ["vs", "den"] + columns += ["rake", "slip", "rise"] + points_df = pd.DataFrame( - points_metadata.reshape((-1, 11)), - columns=[ - "lon", - "lat", - "dep", - "stk", - "dip", - "area", - "tinit", - "dt", - "rake", - "slip", - "rise", - ], + points_metadata.reshape((-1, len(columns))), + columns=columns, ) return cls( @@ -312,7 +312,7 @@ def write_srf(self, srf_ffp: str | Path) -> None: """ with open(srf_ffp, mode="w", encoding="utf-8") as srf_file_handle: - srf_file_handle.write("1.0\n") + srf_file_handle.write(f"{self.version}\n") srf_file_handle.write(f"PLANE {len(self.header)}\n") # Cannot use self.header.to_string because the newline separating headers is significant! # This is ok because the number of headers is typically very small (< 100) @@ -365,7 +365,7 @@ def write_sw4_hdf5( ) # ty: ignore[invalid-assignment] # Build POINTS structured array - points_data = np.zeros(len(self.points), dtype=SW4_POINTS_DTYPE) + points_data: np.ndarray = np.zeros(len(self.points), dtype=SW4_POINTS_DTYPE) assert SW4_POINTS_DTYPE.names is not None for field in SW4_POINTS_DTYPE.names: if field in _SW4_POINTS_EXTERNAL_FIELDS: @@ -374,7 +374,10 @@ def write_sw4_hdf5( "slip" if field == "SLIP1" else field.lower() ].values.astype(SW4_POINTS_DTYPE[field].type) # ty: ignore - points_data["NT1"] = np.diff(self.slipt1_array.indptr).astype(np.int32) # ty: ignore[invalid-assignment] + points_data["NT1"] = np.diff(self.slipt1_array.indptr).astype(np.int32) + if self.version == "2.0": # vs/den are mandatory in 2.0; missing columns will fail loudly + points_data["VS"] = self.points["vs"].to_numpy().astype(np.float32) + points_data["DEN"] = self.points["den"].to_numpy().astype(np.float32) with h5py.File(output_ffp, "w") as h5file: h5file.attrs.create("VERSION", np.float32(self.version)) diff --git a/source_modelling/srf_parser/src/lib.rs b/source_modelling/srf_parser/src/lib.rs index 821553f2..f7578898 100644 --- a/source_modelling/srf_parser/src/lib.rs +++ b/source_modelling/srf_parser/src/lib.rs @@ -14,6 +14,11 @@ use std::io::BufWriter; use std::io::Error; use std::io::Write; +/// Number of per-point metadata quantities returned for each SRF version +/// (version 2.0 adds vs and den). +const NUM_QUANTITIES_V1: usize = 11; +const NUM_QUANTITIES_V2: usize = 13; + #[derive(Default)] struct SparseMatrix { row_ptr: Vec, @@ -67,9 +72,17 @@ fn parse_value( fn read_srf_points( data: &[u8], point_count: usize, + read_vs_den: bool, ) -> Result<(Vec, SparseMatrix), lexical_core::Error> { let mut index: usize = 0; - let mut metadata = Vec::with_capacity(point_count * 11); + let mut metadata = Vec::with_capacity( + point_count + * if read_vs_den { + NUM_QUANTITIES_V2 + } else { + NUM_QUANTITIES_V1 + }, + ); let mut slipt1 = SparseMatrix::default(); for _ in 0..point_count { @@ -97,6 +110,11 @@ fn read_srf_points( let dt = parse_value::(data, &mut index)?; metadata.push(dt); + if read_vs_den { + metadata.push(parse_value::(data, &mut index)?); + metadata.push(parse_value::(data, &mut index)?); + } + let rake = parse_value::(data, &mut index)?; metadata.push(rake); @@ -130,11 +148,12 @@ fn parse_srf( file_path: &str, offset: usize, num_points: usize, + read_vs_den: bool, ) -> PyResult<(Py, Py)> { let file = File::open(file_path).or_else(marshall_os_error)?; let mmap = unsafe { MmapOptions::new().map(&file) }.or_else(marshall_os_error)?; let (metadata, sparse_matrix) = - read_srf_points(&mmap[offset..], num_points).or_else(marshall_value_error)?; + read_srf_points(&mmap[offset..], num_points, read_vs_den).or_else(marshall_value_error)?; let metadata_array = PyArray1::from_vec(py, metadata); @@ -159,7 +178,10 @@ fn write_srf_points( let row_array = row_ptr.as_slice()?; let data_array = data.as_slice()?; let mut buffer = [0u8; BUFFER_SIZE]; - let summary_length = 8; + // Data line 1 gets every point column except the trailing three (hence the + // - 3): rake and slip are written on line 2, and the derived rise column is + // not written at all. So summary_length is 8 for v1.0 and 10 (vs, den) for v2.0. + let summary_length = metadata_array.shape()[1] - 3; for (i, row) in metadata_array.outer_iter().enumerate() { // Write all but last element for v in row.iter().take(summary_length) { diff --git a/tests/data/test_v2.srf.gz b/tests/data/test_v2.srf.gz new file mode 100644 index 00000000..79af9e8a Binary files /dev/null and b/tests/data/test_v2.srf.gz differ diff --git a/tests/srfs/bad_plane.srf b/tests/srfs/bad_plane.srf new file mode 100644 index 00000000..ced0cae2 --- /dev/null +++ b/tests/srfs/bad_plane.srf @@ -0,0 +1,2 @@ +1.0 +not a plane header diff --git a/tests/srfs/point_source_v2.srf b/tests/srfs/point_source_v2.srf new file mode 100644 index 00000000..55040880 --- /dev/null +++ b/tests/srfs/point_source_v2.srf @@ -0,0 +1,11 @@ +2.0 +PLANE 1 + 172.0000 -43.0000 2 1 2.00 1.00 + 45 80 0.00 0.00 0.50 +POINTS 2 + 172.0000 -43.0000 0.5000 45 80 1.00000e+10 0.0000 1.00000e-01 3.50000e+05 2.70000e+00 + 90 10.00 2 0.00 0 0.00 0 + 0.00000e+00 5.00000e+00 + 172.0100 -43.0100 0.5000 45 80 1.00000e+10 0.1000 1.00000e-01 3.60000e+05 2.80000e+00 + 90 12.00 2 0.00 0 0.00 0 + 0.00000e+00 6.00000e+00 diff --git a/tests/test_srf.py b/tests/test_srf.py index da0a19f7..13fe99e0 100644 --- a/tests/test_srf.py +++ b/tests/test_srf.py @@ -1,3 +1,4 @@ +import gzip import tempfile from pathlib import Path @@ -367,6 +368,9 @@ def test_junk_srfs(): with pytest.raises(parse_utils.ParseError): srf.read_srf(SRF_DIR / "no_points.srf") + with pytest.raises(parse_utils.ParseError): + srf.read_srf(SRF_DIR / "bad_plane.srf") + def test_writing_christchurch(): """Check that writing a copy an SRF produces an SRF with the same values.""" @@ -562,3 +566,189 @@ def test_sw4_hdf5_read_write(tmp_path: Path): # Unused slip components stay zero for zero_field in ("SLIP2", "NT2", "SLIP3", "NT3"): assert points[zero_field] == pytest.approx(0) + + +def test_read_srf_v2(): + """Read a minimal hand-written version 2.0 SRF and verify every parsed value. + + The 2-point source file is small enough that all the expected values here, + including the complete slip-rate sparse-matrix structure, can be checked by + eye against the file. See test_read_real_srf_v2 for the complementary test + on a real (genslip-generated) version 2.0 SRF. + """ + srf_v2 = srf.read_srf(SRF_DIR / "point_source_v2.srf") + assert srf_v2.version == "2.0" + assert len(srf_v2.points) == 2 + assert "vs" in srf_v2.points + assert "den" in srf_v2.points + assert srf_v2.points["vs"].tolist() == pytest.approx([3.5e5, 3.6e5]) + assert srf_v2.points["den"].tolist() == pytest.approx([2.7, 2.8]) + assert srf_v2.points.iloc[0].to_dict() == pytest.approx( + { + "lon": 172.0, + "lat": -43.0, + "dep": 0.5, + "stk": 45, + "dip": 80, + "area": 1.0e10, + "tinit": 0.0, + "dt": 0.1, + "vs": 3.5e5, + "den": 2.7, + "rake": 90, + "slip": 10.0, + "rise": 0.2, + } + ) + # 2 points (rows) x 3 time-step columns. Columns are 0-indexed, so the column + # count is (highest filled column index) + 1 = 2 + 1 = 3 (columns 0, 1, 2). + assert srf_v2.slipt1_array.shape == (2, 3) + # the stored values, row by row; each point starts with a slip-rate of 0.0. + assert srf_v2.slipt1_array.data.tolist() == pytest.approx([0.0, 5.0, 0.0, 6.0]) + # each sample's column = floor(tinit / dt) + offset, where offset is the + # sample's index within its point's slip-rate function (0 to nt1 - 1): + # point 0 (floor(0.0/0.1)=0) fills cols 0,1; point 1 (floor(0.1/0.1)=1) fills cols 1,2. + assert srf_v2.slipt1_array.indices.tolist() == [0, 1, 1, 2] + # row boundaries into data/indices: nt1 = 2 per point, so cuts at 0, 2, 4. + assert srf_v2.slipt1_array.indptr.tolist() == [0, 2, 4] + + +def test_read_real_srf_v2(tmp_path: Path): + """Read a real genslip-generated version 2.0 SRF end to end. + + Complements the hand-verifiable test_read_srf_v2 by covering what only a + real file exercises: the comment lines genslip writes after the version + line, and a full-size (2601-point) rupture. The expected values below are + spot checks transcribed from the first, middle and last point blocks of + the file. Because the parser consumes the file as one sequential token + stream, a correct last point implies it stayed aligned through every + preceding block. + """ + srf_ffp = tmp_path / "test_v2.srf" + srf_ffp.write_bytes( + gzip.decompress((Path(__file__).parent / "data" / "test_v2.srf.gz").read_bytes()) + ) + real_srf = srf.read_srf(srf_ffp) + assert real_srf.version == "2.0" + assert real_srf.header.iloc[0].to_dict() == pytest.approx( + { + "elon": 176.514603, + "elat": -38.006092, + "nstk": 51, + "ndip": 51, + "len": 5.0699, + "wid": 5.0699, + "stk": 240, + "dip": 88, + "dtop": 0.0, + "shyp": 0.0, + "dhyp": 2.5350, + } + ) + assert len(real_srf.points) == 2601 + assert real_srf.points.iloc[0].to_dict() == pytest.approx( + { + "lon": 176.539108, + "lat": -37.994919, + "dep": 4.96747e-02, + "stk": 240, + "dip": 88, + "area": 9.88234e07, + "tinit": 5.815377, + "dt": 5.0e-03, + "vs": 3.8e04, + "den": 1.81, + "rake": -16, + "slip": 94.2758, + "rise": 64 * 5.0e-03, + } + ) + assert real_srf.points.iloc[1300].to_dict() == pytest.approx( + { + "lon": 176.514099, + "lat": -38.005402, + "dep": 2.53341, + "stk": 240, + "dip": 88, + "area": 9.88234e07, + "tinit": 9.254894e-02, + "dt": 5.0e-03, + "vs": 2.28e05, + "den": 2.40, + "rake": -15, + "slip": 49.4048, + "rise": 59 * 5.0e-03, + } + ) + assert real_srf.points.iloc[2600].to_dict() == pytest.approx( + { + "lon": 176.489090, + "lat": -38.015869, + "dep": 5.01714, + "stk": 240, + "dip": 88, + "area": 9.88234e07, + "tinit": 2.567641, + "dt": 5.0e-03, + "vs": 3.6e05, + "den": 2.72, + "rake": -7, + "slip": 15.8105, + "rise": 3 * 5.0e-03, + } + ) + # the first point's slip-rate function holds nt1 = 64 samples + assert np.diff(real_srf.slipt1_array.indptr)[0] == 64 + assert real_srf.slipt1_array.data[:3] == pytest.approx([0.0, 9.69786, 21.3934]) + # the last point's slip-rate function holds nt1 = 3 samples + assert np.diff(real_srf.slipt1_array.indptr)[-1] == 3 + assert real_srf.slipt1_array.data[-3:] == pytest.approx([0.0, 3.16209e03, 0.0]) + + +def test_read_srf_v1_has_no_vs_den(): + """Regression: version 1.0 SRFs must not gain vs/den columns.""" + christchurch_srf = srf.read_srf(SRF_DIR / "3468575.srf") + assert "vs" not in christchurch_srf.points + assert "den" not in christchurch_srf.points + assert christchurch_srf.points.shape[1] == 11 + + +def test_unsupported_version_srf(tmp_path: Path): + """An otherwise-valid SRF whose version is neither 1.0 nor 2.0 is rejected.""" + bad_srf = tmp_path / "v9.srf" + bad_srf.write_text( + "9.0\n" + "PLANE 1\n" + " 172.0 -43.0 1 1 1.0 1.0\n" + " 45 80 0.0 0.0 0.5\n" + "POINTS 1\n" + " 172.0 -43.0 0.5 45 80 1.0e10 0.0 0.1\n" + " 90 10.0 1 0.0 0 0.0 0\n" + " 0.0\n" + ) + with pytest.raises(parse_utils.ParseError): + srf.read_srf(bad_srf) + + +def test_write_read_srf_v2(tmp_path: Path): + """Check that writing a version 2.0 SRF round-trips, including vs/den.""" + srf_v2 = srf.read_srf(SRF_DIR / "point_source_v2.srf") + out = tmp_path / "roundtrip_v2.srf" + srf.write_srf(out, srf_v2) + reread = srf.read_srf(out) + assert reread.version == "2.0" + assert srf_v2.header.equals(reread.header) + assert srf_v2.points.equals(reread.points) + assert (srf_v2.slip != reread.slip).nnz == 0 + + +def test_sw4_hdf5_v2(tmp_path: Path): + """Test that write_sw4_hdf5 writes vs/den for a version 2.0 SRF.""" + srf_v2 = srf.read_srf(SRF_DIR / "point_source_v2.srf") + out = tmp_path / "v2.h5" + srf_v2.write_sw4_hdf5(out) + with h5py.File(out, "r") as h5file: + assert h5file.attrs["VERSION"] == np.float32(2.0) + points = h5file["POINTS"] + assert points["VS"] == pytest.approx(srf_v2.points["vs"].to_numpy()) + assert points["DEN"] == pytest.approx(srf_v2.points["den"].to_numpy())