ucgmsim · AndrewRidden-Harper · Jun 3, 2026 · Jun 3, 2026 · Jun 3, 2026 · Jun 4, 2026
diff --git a/source_modelling/srf.py b/source_modelling/srf.py
@@ -187,8 +187,7 @@ class SrfFile:
 
 
     points : pd.DataFrame
-        A list of SrfPoint objects representing the points in the SRF
-        file. The columns of the points dataframe are:
+        A dataframe of the points (subfaults) in the SRF file. The columns are:
 
         - lon: longitude of the patch.
         - lat: latitude of the patch.
@@ -198,11 +197,15 @@ class SrfFile:
         - area: area of the patch (in cm^2).
         - tinit: initial rupture time for this patch (in seconds).
         - dt: the timestep for all slipt columns (in seconds).
+        - vs: shear-wave velocity at the patch (in cm/s). Version 2.0 only.
+        - den: density at the patch (in g/cm^3). Version 2.0 only.
         - rake: local rake.
-        - slip: total slip.
+        - slip: total slip (in cm).
+        - rise: total rise time (in seconds), computed as nt * dt.
 
-        The final two columns are computed from the SRF and are not saved to
-        disk. See the linked documentation on the SRF format for more details.
+        The vs and den columns are only present when version is "2.0". The
+        rise column is computed from the SRF and is not written to disk. See
+        the linked documentation on the SRF format for more details.
 
     slipt1_array : csr_array
         A sparse array containing the slip for each point and at each timestep, where
@@ -234,8 +237,12 @@ def from_file(cls, srf_ffp: Path | str) -> Self:
         """
         with open(srf_ffp, mode="r", encoding="utf-8") as srf_file_handle:
             version = srf_file_handle.readline().strip()
+            if version not in {"1.0", "2.0"}:
+                raise parse_utils.ParseError(f"Unsupported SRF version: {version}")
 
             plane_count_line = srf_file_handle.readline().strip()
+            while plane_count_line.startswith("#"):  # genslip writes comments after the version line
+                plane_count_line = srf_file_handle.readline().strip()
             plane_count_match = re.match(PLANE_COUNT_RE, plane_count_line)
             if not plane_count_match:
                 raise parse_utils.ParseError(
@@ -274,24 +281,17 @@ def from_file(cls, srf_ffp: Path | str) -> Self:
             position = srf_file_handle.tell()
 
         points_metadata, slipt1_array = srf_parser.parse_srf(  # type: ignore
-            str(srf_ffp), position, point_count
+            str(srf_ffp), position, point_count, version == "2.0"
         )
 
+        columns = ["lon", "lat", "dep", "stk", "dip", "area", "tinit", "dt"]
+        if version == "2.0":
+            columns += ["vs", "den"]
+        columns += ["rake", "slip", "rise"]
+
         points_df = pd.DataFrame(
-            points_metadata.reshape((-1, 11)),
-            columns=[
-                "lon",
-                "lat",
-                "dep",
-                "stk",
-                "dip",
-                "area",
-                "tinit",
-                "dt",
-                "rake",
-                "slip",
-                "rise",
-            ],
+            points_metadata.reshape((-1, len(columns))),
+            columns=columns,
         )
 
         return cls(
@@ -312,7 +312,7 @@ def write_srf(self, srf_ffp: str | Path) -> None:
         """
 
         with open(srf_ffp, mode="w", encoding="utf-8") as srf_file_handle:
-            srf_file_handle.write("1.0\n")
+            srf_file_handle.write(f"{self.version}\n")
             srf_file_handle.write(f"PLANE {len(self.header)}\n")
             # Cannot use self.header.to_string because the newline separating headers is significant!
             # This is ok because the number of headers is typically very small (< 100)
@@ -365,7 +365,7 @@ def write_sw4_hdf5(
             )  # ty: ignore[invalid-assignment]
 
         # Build POINTS structured array
-        points_data = np.zeros(len(self.points), dtype=SW4_POINTS_DTYPE)
+        points_data: np.ndarray = np.zeros(len(self.points), dtype=SW4_POINTS_DTYPE)
         assert SW4_POINTS_DTYPE.names is not None
         for field in SW4_POINTS_DTYPE.names:
             if field in _SW4_POINTS_EXTERNAL_FIELDS:
@@ -374,7 +374,10 @@ def write_sw4_hdf5(
                 "slip" if field == "SLIP1" else field.lower()
             ].values.astype(SW4_POINTS_DTYPE[field].type)  # ty: ignore
 
-        points_data["NT1"] = np.diff(self.slipt1_array.indptr).astype(np.int32)  # ty: ignore[invalid-assignment]
+        points_data["NT1"] = np.diff(self.slipt1_array.indptr).astype(np.int32)
+        if self.version == "2.0":  # vs/den are mandatory in 2.0; missing columns will fail loudly
+            points_data["VS"] = self.points["vs"].to_numpy().astype(np.float32)
+            points_data["DEN"] = self.points["den"].to_numpy().astype(np.float32)
 
         with h5py.File(output_ffp, "w") as h5file:
             h5file.attrs.create("VERSION", np.float32(self.version))

diff --git a/source_modelling/srf_parser/src/lib.rs b/source_modelling/srf_parser/src/lib.rs
@@ -14,6 +14,11 @@ use std::io::BufWriter;
 use std::io::Error;
 use std::io::Write;
 
+/// Number of per-point metadata quantities returned for each SRF version
+/// (version 2.0 adds vs and den).
+const NUM_QUANTITIES_V1: usize = 11;
+const NUM_QUANTITIES_V2: usize = 13;
+
 #[derive(Default)]
 struct SparseMatrix {
     row_ptr: Vec<i64>,
@@ -67,9 +72,17 @@ fn parse_value<T: lexical_core::FromLexical>(
 fn read_srf_points(
     data: &[u8],
     point_count: usize,
+    read_vs_den: bool,
 ) -> Result<(Vec<f32>, SparseMatrix), lexical_core::Error> {
     let mut index: usize = 0;
-    let mut metadata = Vec::with_capacity(point_count * 11);
+    let mut metadata = Vec::with_capacity(
+        point_count
+            * if read_vs_den {
+                NUM_QUANTITIES_V2
+            } else {
+                NUM_QUANTITIES_V1
+            },
+    );
     let mut slipt1 = SparseMatrix::default();
 
     for _ in 0..point_count {
@@ -97,6 +110,11 @@ fn read_srf_points(
         let dt = parse_value::<f32>(data, &mut index)?;
         metadata.push(dt);
 
+        if read_vs_den {
+            metadata.push(parse_value::<f32>(data, &mut index)?);
+            metadata.push(parse_value::<f32>(data, &mut index)?);
+        }
+
         let rake = parse_value::<f32>(data, &mut index)?;
         metadata.push(rake);
 
@@ -130,11 +148,12 @@ fn parse_srf(
     file_path: &str,
     offset: usize,
     num_points: usize,
+    read_vs_den: bool,
 ) -> PyResult<(Py<PyAny>, Py<PyAny>)> {
     let file = File::open(file_path).or_else(marshall_os_error)?;
     let mmap = unsafe { MmapOptions::new().map(&file) }.or_else(marshall_os_error)?;
     let (metadata, sparse_matrix) =
-        read_srf_points(&mmap[offset..], num_points).or_else(marshall_value_error)?;
+        read_srf_points(&mmap[offset..], num_points, read_vs_den).or_else(marshall_value_error)?;
 
     let metadata_array = PyArray1::from_vec(py, metadata);
 
@@ -159,7 +178,10 @@ fn write_srf_points(
     let row_array = row_ptr.as_slice()?;
     let data_array = data.as_slice()?;
     let mut buffer = [0u8; BUFFER_SIZE];
-    let summary_length = 8;
+    // Data line 1 gets every point column except the trailing three (hence the
+    // - 3): rake and slip are written on line 2, and the derived rise column is
+    // not written at all. So summary_length is 8 for v1.0 and 10 (vs, den) for v2.0.
+    let summary_length = metadata_array.shape()[1] - 3;
     for (i, row) in metadata_array.outer_iter().enumerate() {
         // Write all but last element
         for v in row.iter().take(summary_length) {

diff --git a/tests/data/test_v2.srf.gz b/tests/data/test_v2.srf.gz
diff --git a/tests/srfs/bad_plane.srf b/tests/srfs/bad_plane.srf
@@ -0,0 +1,2 @@
+1.0
+not a plane header
diff --git a/tests/srfs/point_source_v2.srf b/tests/srfs/point_source_v2.srf
@@ -0,0 +1,11 @@
+2.0
+PLANE 1
+  172.0000  -43.0000    2     1     2.00     1.00
+  45   80     0.00     0.00     0.50
+POINTS 2
+  172.0000  -43.0000    0.5000   45   80  1.00000e+10     0.0000  1.00000e-01  3.50000e+05  2.70000e+00
+  90    10.00     2     0.00      0     0.00      0
+  0.00000e+00  5.00000e+00
+  172.0100  -43.0100    0.5000   45   80  1.00000e+10     0.1000  1.00000e-01  3.60000e+05  2.80000e+00
+  90    12.00     2     0.00      0     0.00      0
+  0.00000e+00  6.00000e+00
diff --git a/tests/test_srf.py b/tests/test_srf.py
@@ -1,3 +1,4 @@
+import gzip
 import tempfile
 from pathlib import Path
 
@@ -367,6 +368,9 @@ def test_junk_srfs():
     with pytest.raises(parse_utils.ParseError):
         srf.read_srf(SRF_DIR / "no_points.srf")
 
+    with pytest.raises(parse_utils.ParseError):
+        srf.read_srf(SRF_DIR / "bad_plane.srf")
+
 
 def test_writing_christchurch():
     """Check that writing a copy an SRF produces an SRF with the same values."""
@@ -562,3 +566,189 @@ def test_sw4_hdf5_read_write(tmp_path: Path):
         # Unused slip components stay zero
         for zero_field in ("SLIP2", "NT2", "SLIP3", "NT3"):
             assert points[zero_field] == pytest.approx(0)
+
+
+def test_read_srf_v2():
+    """Read a minimal hand-written version 2.0 SRF and verify every parsed value.
+
+    The 2-point source file is small enough that all the expected values here,
+    including the complete slip-rate sparse-matrix structure, can be checked by
+    eye against the file. See test_read_real_srf_v2 for the complementary test
+    on a real (genslip-generated) version 2.0 SRF.
+    """
+    srf_v2 = srf.read_srf(SRF_DIR / "point_source_v2.srf")
+    assert srf_v2.version == "2.0"
+    assert len(srf_v2.points) == 2
+    assert "vs" in srf_v2.points
+    assert "den" in srf_v2.points
+    assert srf_v2.points["vs"].tolist() == pytest.approx([3.5e5, 3.6e5])
+    assert srf_v2.points["den"].tolist() == pytest.approx([2.7, 2.8])
+    assert srf_v2.points.iloc[0].to_dict() == pytest.approx(
+        {
+            "lon": 172.0,
+            "lat": -43.0,
+            "dep": 0.5,
+            "stk": 45,
+            "dip": 80,
+            "area": 1.0e10,
+            "tinit": 0.0,
+            "dt": 0.1,
+            "vs": 3.5e5,
+            "den": 2.7,
+            "rake": 90,
+            "slip": 10.0,
+            "rise": 0.2,
+        }
+    )
+    # 2 points (rows) x 3 time-step columns. Columns are 0-indexed, so the column
+    # count is (highest filled column index) + 1 = 2 + 1 = 3 (columns 0, 1, 2).
+    assert srf_v2.slipt1_array.shape == (2, 3)
+    # the stored values, row by row; each point starts with a slip-rate of 0.0.
+    assert srf_v2.slipt1_array.data.tolist() == pytest.approx([0.0, 5.0, 0.0, 6.0])
+    # each sample's column = floor(tinit / dt) + offset, where offset is the
+    # sample's index within its point's slip-rate function (0 to nt1 - 1):
+    # point 0 (floor(0.0/0.1)=0) fills cols 0,1; point 1 (floor(0.1/0.1)=1) fills cols 1,2.
+    assert srf_v2.slipt1_array.indices.tolist() == [0, 1, 1, 2]
+    # row boundaries into data/indices: nt1 = 2 per point, so cuts at 0, 2, 4.
+    assert srf_v2.slipt1_array.indptr.tolist() == [0, 2, 4]
+
+
+def test_read_real_srf_v2(tmp_path: Path):
+    """Read a real genslip-generated version 2.0 SRF end to end.
+
+    Complements the hand-verifiable test_read_srf_v2 by covering what only a
+    real file exercises: the comment lines genslip writes after the version
+    line, and a full-size (2601-point) rupture. The expected values below are
+    spot checks transcribed from the first, middle and last point blocks of
+    the file. Because the parser consumes the file as one sequential token
+    stream, a correct last point implies it stayed aligned through every
+    preceding block.
+    """
+    srf_ffp = tmp_path / "test_v2.srf"
+    srf_ffp.write_bytes(
+        gzip.decompress((Path(__file__).parent / "data" / "test_v2.srf.gz").read_bytes())
+    )
+    real_srf = srf.read_srf(srf_ffp)
+    assert real_srf.version == "2.0"
+    assert real_srf.header.iloc[0].to_dict() == pytest.approx(
+        {
+            "elon": 176.514603,
+            "elat": -38.006092,
+            "nstk": 51,
+            "ndip": 51,
+            "len": 5.0699,
+            "wid": 5.0699,
+            "stk": 240,
+            "dip": 88,
+            "dtop": 0.0,
+            "shyp": 0.0,
+            "dhyp": 2.5350,
+        }
+    )
+    assert len(real_srf.points) == 2601
+    assert real_srf.points.iloc[0].to_dict() == pytest.approx(
+        {
+            "lon": 176.539108,
+            "lat": -37.994919,
+            "dep": 4.96747e-02,
+            "stk": 240,
+            "dip": 88,
+            "area": 9.88234e07,
+            "tinit": 5.815377,
+            "dt": 5.0e-03,
+            "vs": 3.8e04,
+            "den": 1.81,
+            "rake": -16,
+            "slip": 94.2758,
+            "rise": 64 * 5.0e-03,
+        }
+    )
+    assert real_srf.points.iloc[1300].to_dict() == pytest.approx(
+        {
+            "lon": 176.514099,
+            "lat": -38.005402,
+            "dep": 2.53341,
+            "stk": 240,
+            "dip": 88,
+            "area": 9.88234e07,
+            "tinit": 9.254894e-02,
+            "dt": 5.0e-03,
+            "vs": 2.28e05,
+            "den": 2.40,
+            "rake": -15,
+            "slip": 49.4048,
+            "rise": 59 * 5.0e-03,
+        }
+    )
+    assert real_srf.points.iloc[2600].to_dict() == pytest.approx(
+        {
+            "lon": 176.489090,
+            "lat": -38.015869,
+            "dep": 5.01714,
+            "stk": 240,
+            "dip": 88,
+            "area": 9.88234e07,
+            "tinit": 2.567641,
+            "dt": 5.0e-03,
+            "vs": 3.6e05,
+            "den": 2.72,
+            "rake": -7,
+            "slip": 15.8105,
+            "rise": 3 * 5.0e-03,
+        }
+    )
+    # the first point's slip-rate function holds nt1 = 64 samples
+    assert np.diff(real_srf.slipt1_array.indptr)[0] == 64
+    assert real_srf.slipt1_array.data[:3] == pytest.approx([0.0, 9.69786, 21.3934])
+    # the last point's slip-rate function holds nt1 = 3 samples
+    assert np.diff(real_srf.slipt1_array.indptr)[-1] == 3
+    assert real_srf.slipt1_array.data[-3:] == pytest.approx([0.0, 3.16209e03, 0.0])
+
+
+def test_read_srf_v1_has_no_vs_den():
+    """Regression: version 1.0 SRFs must not gain vs/den columns."""
+    christchurch_srf = srf.read_srf(SRF_DIR / "3468575.srf")
+    assert "vs" not in christchurch_srf.points
+    assert "den" not in christchurch_srf.points
+    assert christchurch_srf.points.shape[1] == 11
+
+
+def test_unsupported_version_srf(tmp_path: Path):
+    """An otherwise-valid SRF whose version is neither 1.0 nor 2.0 is rejected."""
+    bad_srf = tmp_path / "v9.srf"
+    bad_srf.write_text(
+        "9.0\n"
+        "PLANE 1\n"
+        "  172.0  -43.0   1   1   1.0   1.0\n"
+        "  45   80   0.0   0.0   0.5\n"
+        "POINTS 1\n"
+        "  172.0  -43.0   0.5   45   80   1.0e10   0.0   0.1\n"
+        "  90   10.0   1   0.0   0   0.0   0\n"
+        "  0.0\n"
+    )
+    with pytest.raises(parse_utils.ParseError):
+        srf.read_srf(bad_srf)
+
+
+def test_write_read_srf_v2(tmp_path: Path):
+    """Check that writing a version 2.0 SRF round-trips, including vs/den."""
+    srf_v2 = srf.read_srf(SRF_DIR / "point_source_v2.srf")
+    out = tmp_path / "roundtrip_v2.srf"
+    srf.write_srf(out, srf_v2)
+    reread = srf.read_srf(out)
+    assert reread.version == "2.0"
+    assert srf_v2.header.equals(reread.header)
+    assert srf_v2.points.equals(reread.points)
+    assert (srf_v2.slip != reread.slip).nnz == 0
+
+
+def test_sw4_hdf5_v2(tmp_path: Path):
+    """Test that write_sw4_hdf5 writes vs/den for a version 2.0 SRF."""
+    srf_v2 = srf.read_srf(SRF_DIR / "point_source_v2.srf")
+    out = tmp_path / "v2.h5"
+    srf_v2.write_sw4_hdf5(out)
+    with h5py.File(out, "r") as h5file:
+        assert h5file.attrs["VERSION"] == np.float32(2.0)
+        points = h5file["POINTS"]
+        assert points["VS"] == pytest.approx(srf_v2.points["vs"].to_numpy())
+        assert points["DEN"] == pytest.approx(srf_v2.points["den"].to_numpy())