From 75be7e395a7621d24fb47417425aebd67fe6b3b7 Mon Sep 17 00:00:00 2001
From: andrewklayk <andrew.klyachkin@gmail.com>
Date: Mon, 25 May 2026 13:52:32 +0200
Subject: [PATCH] hamiltonian pinn experiments

---
 ...structurePreservingPINN_2D_trainableACS.py | 758 +++++++++++++++++
 ...ePreservingPINN_2D_trainableACS_pytorch.py | 279 +++++++
 ...ePreservingPINN_CamassaHolm_trainableH1.py | 729 +++++++++++++++++
 ...ingPINN_CamassaHolm_trainableH1_pytorch.py | 389 +++++++++
 ...structurePreservingPINN_kdv_trainableH1.py | 769 ++++++++++++++++++
 ...ePreservingPINN_kdv_trainableH1_pytorch.py | 573 +++++++++++++
 6 files changed, 3497 insertions(+)
 create mode 100644 experiments/structure_preserving_pinns/structurePreservingPINN_2D_trainableACS.py
 create mode 100644 experiments/structure_preserving_pinns/structurePreservingPINN_2D_trainableACS_pytorch.py
 create mode 100644 experiments/structure_preserving_pinns/structurePreservingPINN_CamassaHolm_trainableH1.py
 create mode 100644 experiments/structure_preserving_pinns/structurePreservingPINN_CamassaHolm_trainableH1_pytorch.py
 create mode 100644 experiments/structure_preserving_pinns/structurePreservingPINN_kdv_trainableH1.py
 create mode 100644 experiments/structure_preserving_pinns/structurePreservingPINN_kdv_trainableH1_pytorch.py

diff --git a/experiments/structure_preserving_pinns/structurePreservingPINN_2D_trainableACS.py b/experiments/structure_preserving_pinns/structurePreservingPINN_2D_trainableACS.py
new file mode 100644
index 0000000..82beaa6
--- /dev/null
+++ b/experiments/structure_preserving_pinns/structurePreservingPINN_2D_trainableACS.py
@@ -0,0 +1,758 @@
+import numpy as np
+from sympy import N
+import tensorflow as tf
+from time import time
+import matplotlib.pyplot as plt
+
+# TF_USE_LEGACY_KERAS=True
+
+DTYPE = np.float32
+Nx = 10 #100
+Ny = 10 #100
+Nt = 10 #50
+N_collocation = Nx*Ny*Nt
+d_in = 3
+
+xMin = 0.0
+xMax = 8.0
+yMin = 0.0
+yMax = 8.0
+tMax = 50.
+d_in = 2
+
+def choose_width_depth(N_collocation=N_collocation, overparam_factor=3.0, d_in=d_in, d_out=1):
+    """
+    Returns a single (width, depth) pair for a mildly overparameterized PINN.
+    
+    N_collocation : Nx * Ny * Nt
+    overparam_factor : how many times larger the model than data (default 3x)
+    """
+    
+    N_target = int(overparam_factor * N_collocation)
+
+    # For 3rd-order PDEs like ZK:
+    # depth 5–7 is stable. We fix depth=6 (good compromise).
+    depth = 8
+
+    # Parameter formula:
+    # N = (depth-1) w^2 + (d_in + depth + d_out) w + d_out
+    a = depth - 1
+    b = d_in + depth + d_out
+    c = d_out - N_target
+
+    # Solve quadratic for width
+    width = int((-b + np.sqrt(b*b - 4*a*c)) / (2*a))
+
+    return width, depth
+
+width, depth = choose_width_depth()
+dx = (xMax - xMin) / (Nx - 1)
+dy = (yMax - yMin) / (Ny - 1)
+dt = tMax / (Nt - 1)
+h = np.max([dx, dy, dt])
+
+lambdas = [1., 1., 1.] #[1.7, 0.2, 1.4] #for cs  #[1., 0.5, 1.]  # [1.7, 0.2, 1.4] for cs  #
+lambdas = tf.Variable(lambdas, trainable=False, name='lambdas', dtype=DTYPE)
+
+cheb_par = tf.Variable(0.5, trainable=True, name='cheb_par', dtype=DTYPE)
+
+x = np.linspace(xMin, xMax, Nx).reshape((-1, 1)).astype(DTYPE)
+y = np.linspace(yMin, yMax, Ny).reshape((-1, 1)).astype(DTYPE)
+t = np.linspace(0, tMax, Nt).reshape((-1, 1)).astype(DTYPE)
+x_grid, y_grid, t_grid = np.meshgrid(x, y, t, indexing='ij')
+x_train = x_grid.flatten(); x_train = tf.convert_to_tensor(x_train); x_train = tf.expand_dims(x_train, axis=-1)
+y_train = y_grid.flatten(); y_train = tf.convert_to_tensor(y_train); y_train = tf.expand_dims(y_train, axis=-1)
+t_train = t_grid.flatten(); t_train = tf.convert_to_tensor(t_train); t_train = tf.expand_dims(t_train, axis=-1)
+xyt_train = tf.concat([x_train, y_train, t_train], axis=-1)
+
+save_fig = True
+
+# Define the initial condition
+def u_0(x, y):
+    ##1
+    epsilon = 0.01
+    theta = 0.
+    y1 = 0.
+    y2 = 0.
+    c1 = 0.45
+    c2 = 0.25
+    x1 = 2.5
+    x2 = 3.3
+    out = 3*c1/(tf.math.cosh(0.5*tf.sqrt(c1/epsilon)*((x-x1)*tf.math.cos(theta) + (y-y1)*tf.math.sin(theta))))**2 
+    + 3*c2/(tf.math.cosh(0.5*tf.sqrt(c2/epsilon)*((x-x2)*tf.math.cos(theta) + (y-y2)*tf.math.sin(theta))))**2
+    ##2
+    # epsilon = 0.01
+    # theta = 0.
+    # y1 = 4.
+    # c1 = 1.
+    # x1 = 2.5
+    # out = 3*c1/(tf.math.cosh(0.5*tf.sqrt(c1/epsilon)*((x-x1)*tf.math.cos(theta) + (y-y1)*tf.math.sin(theta))))**2
+
+    return out
+ # mpmath for sech
+
+# def periodic_boundary_conditions(model, Nbc=2000):
+#     x = tf.random.uniform((Nbc,1), xMin, xMax)
+#     y = tf.random.uniform((Nbc,1), yMin, yMax)
+#     t = tf.random.uniform((Nbc,1), 0, tMax)
+
+#     xL = tf.ones_like(x)*xMin; xR = tf.ones_like(x)*xMax
+#     yL = tf.ones_like(y)*yMin; yR = tf.ones_like(y)*yMax
+
+#     uLx = model(tf.concat([xL,y,t],1))
+#     uRx = model(tf.concat([xR,y,t],1))
+#     uLy = model(tf.concat([x,yL,t],1))
+#     uRy = model(tf.concat([x,yR,t],1))
+
+#     return tf.reduce_mean((uLx-uRx)**2 + (uLy-uRy)**2)
+
+
+def periodic_boundary_conditions(model, Nbc=2000):
+
+    # Random boundary sampling (correct choice)
+    x = tf.random.uniform((Nbc,1), xMin, xMax)
+    y = tf.random.uniform((Nbc,1), yMin, yMax)
+    t = tf.random.uniform((Nbc,1), 0.0, tMax)
+
+    xL = tf.ones_like(x) * xMin
+    xR = tf.ones_like(x) * xMax
+    yL = tf.ones_like(y) * yMin
+    yR = tf.ones_like(y) * yMax
+
+    with tf.GradientTape(persistent=True) as tape:
+        tape.watch([xL, xR, yL, yR])
+
+        uLx = model(tf.concat([xL, y, t], 1))
+        uRx = model(tf.concat([xR, y, t], 1))
+
+        uLy = model(tf.concat([x, yL, t], 1))
+        uRy = model(tf.concat([x, yR, t], 1))
+
+    # First derivatives
+    uxL = tape.gradient(uLx, xL)
+    uxR = tape.gradient(uRx, xR)
+
+    uyL = tape.gradient(uLy, yL)
+    uyR = tape.gradient(uRy, yR)
+
+    del tape
+
+    # Enforce periodicity of values AND derivatives
+    loss = tf.reduce_mean(
+        (uLx - uRx)**2 +
+        (uLy - uRy)**2 +
+        (uxL - uxR)**2 +
+        (uyL - uyR)**2
+    )
+
+    return loss
+
+
+
+def H(u, u_x, u_y):
+    return tf.reduce_sum((tf.pow(u_x,2) + tf.pow(u_y,2))/2.0-tf.pow(u,3)/6.0, axis=[0,1]) * dx*dy
+
+
+def linear_loss_function(tensors, weights):
+    """
+    Computes the sum of the input tensors.
+    
+    Args:
+        tensors (list of tf.Tensor): List of tensors to compute the sum.
+        
+    Returns:
+        tf.Tensor: The sum of the input tensors.
+    """
+    # weights = weights / tf.reduce_sum(weights)  # Normalize weights
+    # stack_tensor = tf.stack(tf.multiply(tensors, weights))
+    # stack_tensor = tf.stack([w * t for w, t in zip(weights, tensors)], axis=0)   
+    stacked = tf.stack(tensors, axis=0)   # shape (n_losses,)
+    weights = weights / tf.reduce_sum(weights)
+    loss = tf.reduce_sum(weights * stacked)  
+    loss_type = 'ls'
+    return loss, loss_type
+
+
+def chebyshev_loss_function(tensors, weights):
+    """
+    Computes the max of the input tensors.
+    
+    Args:
+        tensors (list of tf.Tensor): List of tensors to compute the log-sum-exp.
+        
+    Returns:
+        tf.Tensor: The maximum of the input tensors.
+    """
+    # weights = weights / tf.reduce_sum(weights)  # Normalize weights
+    # stack_tensor = tf.stack(tf.multiply(tensors, weights))
+    # stack_tensor = tf.stack([w * t for w, t in zip(weights, tensors)], axis=0)
+    stacked = tf.stack(tensors, axis=0)
+    loss = tf.reduce_max(weights*stacked)
+    loss_type = 'cs'
+    return loss, loss_type
+
+
+def smooth_chebyshev_loss_function(mu, tensors, weights):
+    """
+    Computes the log of the sum of the exponentials of the input tensors.
+    
+    Args:
+        tensors (list of tf.Tensor): List of tensors to compute the log-sum-exp.
+        
+    Returns:
+        tf.Tensor: The log-sum-exp of the input tensors.
+    """
+    weights = weights / tf.reduce_sum(weights)  # Normalize weights
+    # stack_tensor = tf.stack(tf.multiply(tensors, weights))
+    # stack_tensor = tf.stack([w * t for w, t in zip(weights, tensors)], axis=0)
+    stacked = tf.stack(tensors, axis=0)
+    exp_sum = tf.reduce_sum(tf.math.exp(stacked/mu), axis=0)
+    loss = mu*tf.math.log(exp_sum)
+    loss_type = 'scs'
+    return loss, loss_type
+
+
+def augmentedChebyshev_loss_function(tensors, weights):
+    """
+    Computes the log of the sum of the exponentials of the input tensors.
+    
+    Args:
+        tensors (list of tf.Tensor): List of tensors to compute the log-sum-exp.
+        weights (list of tf.Tensor): List of weights for each tensor.
+    Returns:
+        tf.Tensor: The log-sum-exp of the input tensors.
+    """
+    # weights = weights / tf.reduce_sum(weights)  # Normalize weights
+    loss_type = 'acs'
+    par = tf.sigmoid(cheb_par)  # par is between 0 and 1
+    return par*chebyshev_loss_function(tensors, weights)[0] + (1-par)*linear_loss_function(tensors, weights)[0], loss_type
+
+
+class FourierFeatures(tf.keras.layers.Layer):
+    def __init__(self, n_modes=5):
+        super().__init__()
+        self.n_modes = n_modes
+
+    def call(self, inputs):
+        x = inputs[:, 0:1]
+        y = inputs[:, 1:2]
+        t = inputs[:, 2:3]
+
+        features = [t]
+
+        for k in range(1, self.n_modes + 1):
+            features.append(tf.sin(2*np.pi*k*(x - xMin)/(xMax-xMin)))
+            features.append(tf.cos(2*np.pi*k*(x - xMin)/(xMax-xMin)))
+            features.append(tf.sin(2*np.pi*k*(y - yMin)/(yMax-yMin)))
+            features.append(tf.cos(2*np.pi*k*(y - yMin)/(yMax-yMin)))
+
+        return tf.concat(features, axis=1)
+    
+def PINNModel(num_hidden_layers=depth, num_neurons_per_layer=width):  # 8,80 OK (# 8,40  # 10,40)  
+    xyt_input = tf.keras.Input(shape=(3,))
+    output_u = FourierFeatures(n_modes=4)(xyt_input)
+    for _ in range(num_hidden_layers):
+        output_u = tf.keras.layers.Dense(num_neurons_per_layer,
+                                         activation='tanh',  # tanh
+                                         kernel_initializer='glorot_uniform',  # glorot_normal
+                                         )(output_u)
+
+    output_u = tf.keras.layers.Dense(units=1,
+                                     activation='linear',  # mish
+                                     kernel_initializer='glorot_uniform',  # glorot_normal
+                                     )(output_u)
+    
+    return tf.keras.Model(inputs=xyt_input, outputs=output_u)  #tf.keras.Model(inputs=[x_input, t_input], outputs=output_u)
+
+
+# def PINNModel(num_hidden_layers=depth, num_neurons_per_layer=width):  # 8,80 OK (# 8,40  # 10,40)
+#     xyt_input = tf.keras.Input(shape=(3,))
+#     output_u = xyt_input
+#     for _ in range(num_hidden_layers):
+#         output_u = tf.keras.layers.Dense(num_neurons_per_layer,
+#                                          activation='tanh',  # tanh
+#                                          kernel_initializer='glorot_uniform',  # glorot_normal
+#                                          )(output_u)
+
+#     output_u = tf.keras.layers.Dense(units=1,
+#                                      activation='linear',  # mish
+#                                      kernel_initializer='glorot_uniform',  # glorot_normal
+#                                      )(output_u)
+    
+#     # Define the initial condition
+#     # x_input = tf.reshape(xt_input[:, 0], shape=[-1, 1])
+#     # t_input = tf.reshape(xt_input[:, 1], shape=[-1, 1])
+#     # initial_u = u_0(x_input)
+#     # output_u = tf.where(tf.equal(t_input, 0), initial_u, output_u)
+
+#     return tf.keras.Model(inputs=xyt_input, outputs=output_u)  #tf.keras.Model(inputs=[x_input, t_input], outputs=output_u)
+
+
+@tf.function
+def custom_loss(inputs, model):
+    xyt = inputs
+    x, y, t = xyt[:, 0:1], xyt[:, 1:2], xyt[:, 2:3]
+    # zeros = tf.zeros_like(x)
+
+    with tf.GradientTape(persistent=True) as tape:
+        tape.watch(t)
+        tape.watch(x)
+        tape.watch(y)
+        with tf.GradientTape(persistent=True) as tape2:            
+            tape2.watch(x)
+            tape2.watch(y)
+            with tf.GradientTape(persistent=True) as tape3:
+                tape3.watch(t)
+                tape3.watch(x)
+                tape3.watch(y)
+                u_model = model(tf.concat([x,y,t], axis=1))
+            u_x = tape3.gradient(u_model, x)
+            u_y = tape3.gradient(u_model, y)
+            u_t = tape3.gradient(u_model, t)
+        u_xx = tape2.gradient(u_x, x)
+        u_xy = tape2.gradient(u_x, y)
+    u_xxx = tape.gradient(u_xx, x)
+    u_xyy = tape.gradient(u_xy, y)
+    del tape, tape2, tape3
+      
+    
+    # v = -nu*u_x
+    # phi_t = Vprime(u_model) - nu*u_xx - Vprime(u_model_0) + nu*u_0_xx
+    # w = -nu * u_xx + phi_t/2. - Vprime(u_model)
+    
+    # Compute the components of loss function
+    pde_loss = tf.reduce_mean((u_t + u_model * u_x + u_xxx + u_xyy) ** 2)
+    
+    # x_ic = tf.random.uniform((Nx*Ny,1), xMin, xMax)
+    # y_ic = tf.random.uniform((Nx*Ny,1), yMin, yMax)
+    x_ic = tf.expand_dims(tf.linspace(xMin, xMax, Nx*Ny), axis=-1)  # For grid sampling
+    y_ic = tf.expand_dims(tf.linspace(yMin, yMax, Nx*Ny), axis=-1)  # For grid sampling
+    t_ic = tf.zeros_like(x_ic)
+    u_ic = u_0(x_ic, y_ic)  # Initial condition
+    t_ic = tf.zeros_like(x_ic)  # t=0 for initial condition
+    u_ic_pred = model(tf.concat([x_ic, y_ic, t_ic], axis=1))  # Predicted initial condition
+    data_fitting_loss_0 = tf.reduce_mean((u_ic_pred - u_ic) ** 2)
+    data_fitting_loss_l_r = periodic_boundary_conditions(model)
+
+    # Combine the components of the loss functions
+    # loss, loss_type = linear_loss_function([pde_loss, data_fitting_loss_0, data_fitting_loss_l_r], tf.exp(lambdas))
+    # loss, loss_type = linear_loss_function([pde_loss, data_fitting_loss_0, data_fitting_loss_l_r], lambdas)
+    # loss, loss_type = chebyshev_loss_function([pde_loss, data_fitting_loss_0, data_fitting_loss_l_r], tf.exp(lambdas))
+    # loss, loss_type = chebyshev_loss_function([pde_loss, data_fitting_loss_0, data_fitting_loss_l_r], lambdas)
+    # loss, loss_type = smooth_chebyshev_loss_function(.1, [pde_loss, data_fitting_loss_0, data_fitting_loss_l_r], lambdas)
+    loss, loss_type = augmentedChebyshev_loss_function([pde_loss, data_fitting_loss_0, data_fitting_loss_l_r], lambdas)
+   
+    # S_loss = S(u_model, v, w)
+    H_loss = H(tf.reshape(u_model, shape=[Nx, Ny, Nt]), tf.reshape(u_x, shape=[Nx, Ny, Nt]), tf.reshape(u_y, shape=[Nx, Ny, Nt]))
+    # beta = 1e-3
+    # data_fitting_loss = loss = beta*tf.math.log(tf.math.exp(data_fitting_loss_weight_0 * data_fitting_loss_0 / beta) 
+    #                                             + tf.math.exp(data_fitting_loss_weight_l * data_fitting_loss_l / beta) 
+    #                                             + tf.math.exp(data_fitting_loss_weight_r * data_fitting_loss_r / beta))
+    # loss = beta*tf.math.log(tf.math.exp(pde_loss_weight * pde_loss / beta) 
+    #                         + tf.math.exp(data_fitting_loss_weight_0 * data_fitting_loss_0 / beta) 
+    #                         + tf.math.exp(data_fitting_loss_weight_l * data_fitting_loss_l / beta) 
+    #                         + tf.math.exp(data_fitting_loss_weight_r * data_fitting_loss_r / beta))
+    # data_fitting_loss = tf.math.reduce_max(tf.constant([data_fitting_loss_weight_0 * data_fitting_loss_0, 
+    #                                                     data_fitting_loss_weight_l * data_fitting_loss_l, 
+    #                                                     data_fitting_loss_weight_r * data_fitting_loss_r]))
+    # loss = tf.math.reduce_max(tf.constant([pde_loss_weight * pde_loss, 
+    #                                        data_fitting_loss_weight_0 * data_fitting_loss_0, 
+    #                                        data_fitting_loss_weight_l * data_fitting_loss_l, 
+    #                                        data_fitting_loss_weight_r * data_fitting_loss_r]))
+
+    return loss, loss_type, pde_loss, data_fitting_loss_0, data_fitting_loss_l_r, H_loss#, S_loss
+
+
+# Create the PINN model
+model = PINNModel()
+model.summary()
+
+epochs = 500  # 5000  # 1000
+# # Compile the model
+# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
+#               loss=lambda y_true, y_pred: custom_loss([x_train, t_train, theta_train], model)[1])
+
+# Create the optimizer with a smaller learning rate
+# learning_rate = 1e-3  # 1e-4
+# learning_rate_type = 'constant'
+# learning_rate = tf.keras.optimizers.schedules.PiecewiseConstantDecay([10, 100], [1e-1, 5e-2, 1e-2])  #OK
+# learning_rate = tf.keras.optimizers.schedules.PiecewiseConstantDecay([100, 300], [1e-2, 1e-3, 1e-4])
+learning_rate = tf.keras.optimizers.schedules.PolynomialDecay(
+    initial_learning_rate=1e-2,
+    decay_steps=epochs,
+    end_learning_rate=1e-4,
+    power=3.,
+    cycle=False,
+    name= 'PolynomialDecay'
+)
+# learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(
+#     initial_learning_rate=1e-3,
+#     decay_steps=50, # 100
+#     decay_rate=0.9,
+#     staircase=False,
+#     name='ExponentialDecay'
+# )
+# learning_rate = tf.keras.optimizers.schedules.CosineDecay(
+#     initial_learning_rate=1e-3,
+#     decay_steps=1000,
+#     alpha=0.0,
+#     warmup_target=None,
+#     warmup_steps=0,
+#     name='CosineDecay'
+# )
+learning_rate_type = learning_rate.name
+
+trainable = model.trainable_variables
+if lambdas.trainable:
+    trainable += [lambdas]
+    
+if cheb_par.trainable:
+    trainable += [cheb_par]
+
+# optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, epsilon=1e-08, amsgrad=True)
+# optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9, nesterov=True)
+optimizer = tf.keras.optimizers.Nadam(learning_rate=learning_rate, beta_1=0.8, beta_2=0.9, epsilon=1e-07)
+# optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate, rho=0.9, momentum=0.0, epsilon=1e-07, centered=False)
+
+# Training loop
+losses = []
+pde_losses = []
+data_fitting_losses_0 = []
+data_fitting_losses_l_r = []
+delta_gradients = []
+# S_losses_min = []
+# S_losses_max = []
+H_losses_min = []
+H_losses_max = []
+H_losses_mean = []
+H_losses_std = []
+H_losses_abs_error = []
+H_losses_rel_error = []
+lambdas_values = []
+lambdas_values.append(lambdas.numpy())
+cheb_par_values = []
+cheb_par_values.append(cheb_par.numpy())
+
+
+# Convert data to tensor because tf.GradientTape() can only watch tensor and not numpy arrays
+inputs = xyt_train
+stop = False
+# Start timer
+t0 = time()
+for epoch in range(epochs):
+    if not stop:
+        # print("# STARTING EPOCH", epoch + 1)
+
+        # Create a LearningRateScheduler to update the learning rate
+        # current_lr = scheduler(epoch, learning_rate)
+        # tf.keras.backend.set_value(optimizer.lr, current_lr)
+
+        with tf.GradientTape() as tape:
+            loss, loss_type, pde_loss, data_fitting_loss_0, data_fitting_loss_l_r, H_loss = custom_loss(inputs, model)
+
+        # print("Computing gradients")
+        gradients = tape.gradient(loss, trainable)
+        # print(gradients[-1])
+        # print("Applying gradients")
+        optimizer.apply_gradients(zip(gradients, trainable))
+        # print("Appending losses")
+        losses.append(loss.numpy())
+        pde_losses.append(pde_loss.numpy())
+        data_fitting_losses_0.append(data_fitting_loss_0.numpy())
+        data_fitting_losses_l_r.append(data_fitting_loss_l_r.numpy())
+        # param_values.append((trainable[-1]).numpy())
+        # delta_gradients.append((gradients[-1]).numpy())
+        # S_loss_min = tf.reduce_min(S_loss)
+        # S_loss_max = tf.reduce_max(S_loss)
+        # S_losses_min.append(S_loss_min.numpy())
+        # S_losses_max.append(S_loss_max.numpy())
+        H_loss_min = tf.reduce_min(H_loss)
+        H_loss_max = tf.reduce_max(H_loss)
+        H_losses_min.append(H_loss_min.numpy())
+        H_losses_max.append(H_loss_max.numpy())
+        H_loss_mean = tf.reduce_mean(H_loss)
+        H_loss_std = tf.math.reduce_std(H_loss)
+        H_losses_mean.append(H_loss_mean.numpy())
+        H_losses_std.append(H_loss_std.numpy())
+        # lambdas_values.append((trainable[-1]).numpy())
+        
+        H0 = H_loss[0].numpy()
+        Hf = H_loss[-1].numpy()
+        H_abs_error = tf.abs(Hf - H0)
+        H_losses_abs_error.append(H_abs_error.numpy())
+        H_rel_error = H_abs_error / tf.abs((H0 + 1e-16))
+        H_losses_rel_error.append(H_rel_error.numpy())
+
+        # # Print S_loss, H_loss
+        # print(f"S_loss at epoch {epoch + 1}: {S_loss.numpy()}")
+        # print(f"H_loss at epoch {epoch + 1}: {H_loss.numpy()}")
+        
+        if len(losses) > 1 and not lambdas.trainable:# and False:
+        # SoftAdaptive weights update
+            # num1 = tf.math.exp(pde_losses[-1] - pde_losses[-2])
+            # num2 = tf.math.exp(data_fitting_losses_0[-1] - data_fitting_losses_0[-2])
+            # num3 = tf.math.exp(data_fitting_losses_l_r[-1] - data_fitting_losses_l_r[-2])  
+            num = tf.nn.softmax([pde_losses[-1] - pde_losses[-2], data_fitting_losses_0[-1] - data_fitting_losses_0[-2], data_fitting_losses_l_r[-1] - data_fitting_losses_l_r[-2]])
+            num1 = num[0]
+            num2 = num[1]
+            num3 = num[2]
+            den  = num1 + num2 + num3
+
+            new_lambdas = tf.stack([num1 / den, num2 / den, num3 / den])
+            lambdas.assign(new_lambdas)
+            # lambdas_values.append((lambdas).numpy())
+        
+        if cheb_par.trainable:
+            cheb_par_values.append(cheb_par.numpy())
+            
+        del tape
+
+        if epoch % 100 == 0 or epoch == epochs - 1:
+            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.numpy()}")
+
+        if len(losses) > 2 and np.abs(losses[-1] - losses[-2]) / np.abs(losses[-2]) < 1e-8:
+            stop = True
+
+print(f"Loss type: {loss_type}")
+print(f"Hamiltonian mean: {H_loss_mean.numpy()}")
+print(f"Hamiltonian standard deviation: {H_loss_std.numpy()}")
+print(f"Hamiltonian maximum: {H_loss_max.numpy()}")
+print(f"Hamiltonian minimum: {H_loss_min.numpy()}")
+print(f"Hamiltonian absolute error: {H_abs_error.numpy()}")
+print(f"Hamiltonian relative error: {H_rel_error.numpy()}")
+# Print computation time
+print('\nComputation time: {} seconds'.format(time() - t0))
+
+
+def generate_save_fig_string(type, epochs, learning_rate_type, loss_type):
+    """
+    Generates a string for saving figures that includes the number of epochs and the type of learning rate.
+    
+    Args:
+        epochs (int): The number of epochs.
+        learning_rate_type (str): The type of learning rate.
+        
+    Returns:
+        str: The generated string for saving figures.
+    """
+    return f"./results/{type}_epochs_{epochs}_lr_{learning_rate_type}_{loss_type}.png"
+
+# Plot the loss history
+plt.semilogy(losses, label='Total Loss')
+plt.semilogy(pde_losses, label='PDE Loss')
+plt.semilogy(data_fitting_losses_0, label='Initial Conditions Loss')
+plt.semilogy(data_fitting_losses_l_r, label='Periodic Boundary Conditions Loss')
+plt.xlabel('Epoch')
+plt.ylabel('Loss')
+plt.title('Loss Contributions')
+plt.legend()
+plt.grid()
+
+if save_fig:
+    save_fig_string = generate_save_fig_string('loss', epochs, learning_rate_type, loss_type)
+    # save png
+    plt.savefig(save_fig_string, dpi=300)
+plt.show()
+    # # save pdf
+    # plt.savefig('../results/' + 'loss.pdf', dpi=300)
+
+# # Evaluate the function
+# x_eval = np.linspace(x_train[0].numpy(), x_train[-1].numpy(), 100).reshape((-1, 1)).astype(np.float32)
+# y_eval = np.linspace(y_train[0].numpy(), y_train[-1].numpy(), 100).reshape((-1, 1)).astype(np.float32)
+# t_eval = np.linspace(t_train[0].numpy(), t_train[-1].numpy(), 100).reshape((-1, 1)).astype(np.float32)
+# inputs_eval = [x_eval, y_eval, t_eval]
+
+# # Plot the parameters over epochs
+# plt.plot(S_losses_min, label='S_loss_min')
+# plt.plot(S_losses_max, label='S_loss_max')
+# plt.xlabel('Epoch')
+# plt.ylabel('Multisymplectic Constant')
+# plt.title('Multisymplectic Constant over epochs')
+# plt.legend()
+# plt.grid()
+# 
+# if save_fig:    
+#     save_fig_string = generate_save_fig_string('S_loss', epochs, learning_rate_type, loss_type)
+#     # save png
+#     plt.savefig(save_fig_string, dpi=300)
+#     # # save pdf
+#     # plt.savefig('../results/' + 'S_loss.pdf', dpi=300)
+    
+
+# Plot the Hamiltonian over epochs
+plt.plot(H_losses_min, label='H_loss_min')
+plt.plot(H_losses_max, label='H_loss_max')
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian')
+plt.title('Hamiltonian over epochs')
+plt.legend()
+plt.grid()
+
+if save_fig:
+    save_fig_string = generate_save_fig_string('H_loss', epochs, learning_rate_type, loss_type)
+    # save png
+    plt.savefig(save_fig_string, dpi=300)
+plt.show()
+    # # save pdf
+    # plt.savefig('../results/' + 'H_loss.pdf', dpi=300)
+
+# Plot the average Hamiltonian over epochs with standard deviation
+H_losses_mean = np.array(H_losses_mean)
+H_losses_std = np.array(H_losses_std)
+H_losses_abs_error = np.array(H_losses_abs_error)
+H_losses_rel_error = np.array(H_losses_rel_error)
+
+plt.plot(H_losses_mean)
+plt.fill_between(range(len(H_losses_mean)), H_losses_mean - H_losses_std, H_losses_mean + H_losses_std, alpha=0.2)
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian mean')
+plt.title('Hamiltonian mean over epochs with standard deviation')
+# plt.legend()
+plt.grid()
+
+if save_fig:
+    save_fig_string = generate_save_fig_string('H_loss_mean', epochs, learning_rate_type, loss_type)
+    # save png
+    plt.savefig(save_fig_string, dpi=300)
+plt.show()
+    # # save pdf
+    # plt.savefig('../results/' + 'H_loss_std.pdf', dpi=300)
+
+# Plot the standard deviation of the Hamiltonian over epochs
+plt.plot(H_losses_std)
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian std')
+plt.title('Hamiltonian standard deviation over epochs')
+# plt.legend()
+plt.grid()
+
+if save_fig:
+    save_fig_string = generate_save_fig_string('H_loss_std', epochs, learning_rate_type, loss_type)
+    # save png
+    plt.savefig(save_fig_string, dpi=300)
+plt.show()
+    # # save pdf
+    # plt.savefig('../results/' + 'H_loss_std.pdf', dpi=300)
+
+
+# Plot the absolute error of the Hamiltonian over epochs
+plt.plot(H_losses_abs_error)    
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian absolute error')
+plt.title('Hamiltonian absolute error over epochs')
+# plt.legend()
+plt.grid()
+
+if save_fig:
+    save_fig_string = generate_save_fig_string('H_loss_abs_error', epochs, learning_rate_type, loss_type)
+    # save png
+    plt.savefig(save_fig_string, dpi=300)
+plt.show()
+    # # save pdf
+    # plt.savefig('../results/' + 'H_loss_rel_error.pdf', dpi=300)
+
+    
+# Plot the relative error of the Hamiltonian over epochs
+plt.plot(H_losses_rel_error)    
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian relative error')
+plt.title('Hamiltonian relative error over epochs')
+# plt.legend()
+plt.grid()
+
+if save_fig:
+    save_fig_string = generate_save_fig_string('H_loss_rel_error', epochs, learning_rate_type, loss_type)
+    # save png
+    plt.savefig(save_fig_string, dpi=300)
+plt.show()
+    # # save pdf
+    # plt.savefig('../results/' + 'H_loss_rel_error.pdf', dpi=300)
+
+
+# Plot the Chebyshev parameter over epochs
+if cheb_par.trainable:
+    plt.plot(tf.sigmoid(cheb_par_values))
+    plt.xlabel('Epoch')
+    plt.ylabel('Chebyshev parameter')
+    plt.title('Chebyshev parameter over epochs')
+    plt.grid()
+    
+    if save_fig:
+        save_fig_string = generate_save_fig_string('cheb_par', epochs, learning_rate_type, loss_type)
+        # save png
+        plt.savefig(save_fig_string, dpi=300)
+plt.show()
+        # # save pdf
+        # plt.savefig('../results/' + 'cheb_par.pdf', dpi=300)    
+
+
+import pandas as pd
+df = pd.DataFrame()
+
+df['total_loss'] = losses
+df['pde_loss'] = pde_losses
+df['data_fitting_loss_0'] = data_fitting_losses_0
+df['data_fitting_loss_l_r'] = data_fitting_losses_l_r
+df['H_loss_min'] = H_losses_min
+df['H_loss_max'] = H_losses_max
+df['H_loss_mean'] = H_losses_mean
+df['H_loss_std'] = H_losses_std
+df['H_loss_abs_error'] = H_losses_abs_error
+df['H_loss_rel_error'] = H_losses_rel_error
+# df['cheb_par'] = cheb_par_values
+
+df.to_csv('./results/2D/training_history.csv', index=False)
+# from mpl_toolkits.mplot3d import Axes3D
+
+# # Set up meshgrid
+# N = 600
+# tspace = np.linspace(0, 2, N + 1)
+# xspace = np.linspace(0, 2, N + 1)
+# yspace = np.linspace(0, 2, N + 1)
+# T, X , Y= np.meshgrid(tspace, xspace, yspace)
+# XYTgrid = np.vstack([X.flatten(),Y.flatten(),T.flatten()]).T
+
+# # Determine predictions of u(t, x)
+# u_pred = model(tf.cast(XYTgrid,DTYPE))
+
+# # Reshape upred
+# U = u_pred.numpy().reshape(N+1,N+1,N+1)
+
+# # Surface plot of solution u(t,x)
+# fig = plt.figure(figsize=(9,6))
+# ax = fig.add_subplot(111, projection='3d')
+# ax.plot_surface(X, Y, U, cmap='viridis')
+# ax.view_init(35,35)
+# ax.set_xlabel('$x$')
+# ax.set_ylabel('$y$')
+# ax.set_zlabel('$u_\\theta(x,y,t)$')
+# ax.set_title('Solution to KdV equation')
+# if save_fig:
+#     save_fig_string = generate_save_fig_string('sol', epochs, learning_rate_type, loss_type)
+#     # save png
+#     plt.savefig(save_fig_string, dpi=300)
+#     # # save pdf
+#     # plt.savefig('../results/' + 'solution.pdf', dpi=300)
+
+# # Extract the components of lambdas over epochs
+# lambda_1 = [l[0] for l in lambdas_values]
+# lambda_2 = [l[1] for l in lambdas_values]
+# lambda_3 = [l[2] for l in lambdas_values]
+
+# # Plot the components of lambdas
+# plt.figure(figsize=(10, 6))
+# plt.plot(lambda_1, label='$\lambda_1$', color='r')
+# plt.plot(lambda_2, label='$\lambda_2$', color='g')
+# plt.plot(lambda_3, label='$\lambda_3$', color='b')
+# plt.xlabel('Epochs')
+# plt.ylabel('Weights Values')
+# plt.title('Evolution of weight components over training')
+# plt.legend()
+# plt.grid()
+# 
+
+# # Save the plot if required
+# if save_fig:
+#     save_fig_string = generate_save_fig_string('lambdas', epochs, learning_rate_type, loss_type)
+#     plt.savefig(save_fig_string, dpi=300)
+
diff --git a/experiments/structure_preserving_pinns/structurePreservingPINN_2D_trainableACS_pytorch.py b/experiments/structure_preserving_pinns/structurePreservingPINN_2D_trainableACS_pytorch.py
new file mode 100644
index 0000000..e2350db
--- /dev/null
+++ b/experiments/structure_preserving_pinns/structurePreservingPINN_2D_trainableACS_pytorch.py
@@ -0,0 +1,279 @@
+import numpy as np
+import torch
+import torch.nn as nn
+from time import time
+import matplotlib.pyplot as plt
+
+DTYPE = torch.float32
+torch.set_default_dtype(DTYPE)
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+Nx, Ny, Nt = 10, 10, 10
+N_collocation = Nx*Ny*Nt
+d_in = 3
+
+xMin, xMax = 0.0, 8.0
+yMin, yMax = 0.0, 8.0
+tMax = 50.
+
+def choose_width_depth(N_collocation=N_collocation, overparam_factor=3.0, d_in=d_in, d_out=1):
+    N_target = int(overparam_factor * N_collocation)
+    depth = 8
+    a, b = depth - 1, d_in + depth + d_out
+    c = d_out - N_target
+    width = int((-b + np.sqrt(b*b - 4*a*c)) / (2*a))
+    return width, depth
+
+width, depth = choose_width_depth()
+dx = (xMax - xMin) / (Nx - 1)
+dy = (yMax - yMin) / (Ny - 1)
+dt = tMax / (Nt - 1)
+h = np.max([dx, dy, dt])
+
+lambdas = torch.tensor([1., 1., 1.], dtype=DTYPE, device=device, requires_grad=False)
+cheb_par = torch.tensor(0.5, dtype=DTYPE, device=device, requires_grad=True)
+
+x = torch.linspace(xMin, xMax, Nx, dtype=DTYPE, device=device).reshape(-1, 1)
+y = torch.linspace(yMin, yMax, Ny, dtype=DTYPE, device=device).reshape(-1, 1)
+t = torch.linspace(0, tMax, Nt, dtype=DTYPE, device=device).reshape(-1, 1)
+y_grid, x_grid, t_grid = torch.meshgrid(y.flatten(), x.flatten(), t.flatten(), indexing='ij')
+x_train = x_grid.flatten().reshape(-1, 1)
+y_train = y_grid.flatten().reshape(-1, 1)
+t_train = t_grid.flatten().reshape(-1, 1)
+xyt_train = torch.stack([x_train.flatten(), y_train.flatten(), t_train.flatten()], dim=1)
+
+save_fig = True
+
+def u_0(x, y):
+    epsilon = 0.01
+    c1, c2 = 0.45, 0.25
+    x1, x2 = 2.5, 3.3
+    y1 = 0.
+    out = 3*c1/(torch.cosh(0.5*torch.sqrt(torch.tensor(c1/epsilon))*((x-x1)**2 + (y-y1)**2)**0.5))**2 
+    out += 3*c2/(torch.cosh(0.5*torch.sqrt(torch.tensor(c2/epsilon))*((x-x2)**2 + (y-y1)**2)**0.5))**2
+    return out
+
+def periodic_boundary_conditions(model, Nbc=2000):
+    x = torch.rand(Nbc, 1, device=device) * (xMax - xMin) + xMin
+    y = torch.rand(Nbc, 1, device=device) * (yMax - yMin) + yMin
+    t = torch.rand(Nbc, 1, device=device) * tMax
+    
+    xL = torch.full_like(x, xMin)
+    xR = torch.full_like(x, xMax)
+    yL = torch.full_like(y, yMin)
+    yR = torch.full_like(y, yMax)
+    
+    uLx = model(torch.cat([xL, y, t], 1))
+    uRx = model(torch.cat([xR, y, t], 1))
+    uLy = model(torch.cat([x, yL, t], 1))
+    uRy = model(torch.cat([x, yR, t], 1))
+    
+    loss = torch.mean((uLx - uRx)**2 + (uLy - uRy)**2)
+    return loss
+
+def H(u, u_x, u_y):
+    return torch.sum((u_x**2 + u_y**2)/2 - u**3/6) * dx * dy
+
+def linear_loss_function(tensors, weights):
+    stacked = torch.stack(tensors)
+    weights = weights / torch.sum(weights)
+    loss = torch.sum(weights * stacked)
+    return loss, 'ls'
+
+def chebyshev_loss_function(tensors, weights):
+    stacked = torch.stack(tensors)
+    loss = torch.max(weights * stacked)
+    return loss, 'cs'
+
+def augmentedChebyshev_loss_function(tensors, weights):
+    par = torch.sigmoid(cheb_par)
+    ls = linear_loss_function(tensors, weights)[0]
+    cs = chebyshev_loss_function(tensors, weights)[0]
+    return par*cs + (1-par)*ls, 'acs'
+
+class FourierFeatures(nn.Module):
+    def __init__(self, n_modes=5):
+        super().__init__()
+        self.n_modes = n_modes
+    
+    def forward(self, inputs):
+        x, y, t = inputs[:, 0:1], inputs[:, 1:2], inputs[:, 2:3]
+        features = [t]
+        for k in range(1, self.n_modes + 1):
+            features.append(torch.sin(2*np.pi*k*(x - xMin)/(xMax-xMin)))
+            features.append(torch.cos(2*np.pi*k*(x - xMin)/(xMax-xMin)))
+            features.append(torch.sin(2*np.pi*k*(y - yMin)/(yMax-yMin)))
+            features.append(torch.cos(2*np.pi*k*(y - yMin)/(yMax-yMin)))
+        return torch.cat(features, dim=1)
+
+class PINNModel(nn.Module):
+    def __init__(self, num_hidden_layers=depth, num_neurons_per_layer=width):
+        super().__init__()
+        self.ff = FourierFeatures(n_modes=4)
+        layers = []
+        # input_dim = 3 + 4 * 2 * 4
+        input_dim = 17
+        for _ in range(num_hidden_layers):
+            layers.append(nn.Linear(input_dim, num_neurons_per_layer))
+            layers.append(nn.Tanh())
+            input_dim = num_neurons_per_layer
+        layers.append(nn.Linear(input_dim, 1))
+        self.net = nn.Sequential(*layers)
+    
+    def forward(self, x):
+        x = self.ff(x)
+        return self.net(x)
+
+def custom_loss(inputs, model):
+    x, y, t = inputs[:, 0:1], inputs[:, 1:2], inputs[:, 2:3]
+    x.requires_grad_(True)
+    y.requires_grad_(True)
+    t.requires_grad_(True)
+    u_model = model(torch.cat([x, y, t], dim=1))
+    
+    u_t = torch.autograd.grad(u_model.sum(), t, create_graph=True)[0]
+    u_x = torch.autograd.grad(u_model.sum(), x, create_graph=True)[0]
+    u_y = torch.autograd.grad(u_model.sum(), y, create_graph=True)[0]
+    
+    u_xx = torch.autograd.grad(u_x.sum(), x, create_graph=True)[0]
+    u_yy = torch.autograd.grad(u_y.sum(), y, create_graph=True)[0]
+    
+    u_xxx = torch.autograd.grad(u_xx.sum(), x, create_graph=True)[0]
+    u_xyy = torch.autograd.grad(u_y.sum(), y, create_graph=True)[0]
+    
+    pde_loss = torch.mean((u_t + u_model * u_x + u_xxx + u_xyy) ** 2)
+    
+    x_ic = torch.linspace(xMin, xMax, Nx*Ny).reshape(-1, 1).to(device)
+    y_ic = torch.linspace(yMin, yMax, Nx*Ny).reshape(-1, 1).to(device)
+    t_ic = torch.zeros_like(x_ic)
+    u_ic = u_0(x_ic, y_ic)
+    u_ic_pred = model(torch.cat([x_ic, y_ic, t_ic], dim=1))
+    data_fitting_loss_0 = torch.mean((u_ic_pred - u_ic) ** 2)
+    
+    data_fitting_loss_l_r = periodic_boundary_conditions(model)
+    loss, loss_type = augmentedChebyshev_loss_function([pde_loss, data_fitting_loss_0, data_fitting_loss_l_r], lambdas)
+    
+    H_loss = H(u_model.reshape(Nx, Ny, Nt), u_x.reshape(Nx, Ny, Nt), u_y.reshape(Nx, Ny, Nt))
+    
+    return loss, loss_type, pde_loss, data_fitting_loss_0, data_fitting_loss_l_r, H_loss
+
+model = PINNModel().to(device)
+epochs = 1000
+lr_schedule = torch.optim.lr_scheduler.PolynomialLR(
+    torch.optim.Adam(model.parameters(), lr=1e-2),
+    total_iters=epochs, power=3.0
+)
+optimizer = lr_schedule.optimizer
+
+losses, pde_losses, data_losses_0, bc_losses = [], [], [], []
+H_losses_min, H_losses_max, H_losses_mean, H_losses_std = [], [], [], []
+H_losses_abs_error, H_losses_rel_error = [], []
+t0 = time()
+
+for epoch in range(epochs):
+    optimizer.zero_grad()
+    loss, loss_type, pde_loss, data_loss_0, bc_loss, H_loss = custom_loss(xyt_train, model)
+    loss.backward()
+    optimizer.step()
+    lr_schedule.step()
+    
+    with torch.no_grad():
+        losses.append(loss.item())
+        pde_losses.append(pde_loss.item())
+        data_losses_0.append(data_loss_0.item())
+        bc_losses.append(bc_loss.item())
+        
+        H_loss_min = torch.min(H_loss).item()
+        H_loss_max = torch.max(H_loss).item()
+        H_losses_min.append(H_loss_min)
+        H_losses_max.append(H_loss_max)
+        H_loss_mean = torch.mean(H_loss).item()
+        H_loss_std = torch.std(H_loss).item()
+        H_losses_mean.append(H_loss_mean)
+        H_losses_std.append(H_loss_std)
+        
+        if epoch % 100 == 0 or epoch == epochs - 1:
+            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.6e}")
+
+print(f'\nComputation time: {time() - t0:.2f}s')
+
+plt.figure(figsize=(10, 6))
+plt.semilogy(losses, label='Total Loss')
+plt.semilogy(pde_losses, label='PDE Loss')
+plt.semilogy(data_losses_0, label='Initial Conditions Loss')
+plt.semilogy(bc_losses, label='Periodic Boundary Conditions Loss')
+plt.xlabel('Epoch')
+plt.ylabel('Loss')
+plt.title('Loss Contributions')
+plt.legend()
+plt.grid()
+plt.savefig('./results/2D_loss.png', dpi=300) if save_fig else None
+plt.show()
+
+plt.figure(figsize=(10, 6))
+plt.plot(H_losses_min, label='H_loss_min')
+plt.plot(H_losses_max, label='H_loss_max')
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian')
+plt.title('Hamiltonian over epochs')
+plt.legend()
+plt.grid()
+plt.savefig('./results/2D_H_minmax.png', dpi=300) if save_fig else None
+plt.show()
+
+H_losses_mean_arr = np.array(H_losses_mean)
+H_losses_std_arr = np.array(H_losses_std)
+plt.figure(figsize=(10, 6))
+plt.plot(H_losses_mean_arr)
+plt.fill_between(range(len(H_losses_mean_arr)), H_losses_mean_arr - H_losses_std_arr, H_losses_mean_arr + H_losses_std_arr, alpha=0.2)
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian mean')
+plt.title('Hamiltonian mean over epochs with standard deviation')
+plt.grid()
+plt.savefig('./results/2D_H_mean_std.png', dpi=300) if save_fig else None
+plt.show()
+
+plt.figure(figsize=(10, 6))
+plt.plot(H_losses_std_arr)
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian std')
+plt.title('Hamiltonian standard deviation over epochs')
+plt.grid()
+plt.savefig('./results/2D_H_std.png', dpi=300) if save_fig else None
+plt.show()
+
+if cheb_par.requires_grad:
+    plt.figure(figsize=(10, 6))
+    plt.plot(torch.sigmoid(cheb_par).detach())
+    plt.xlabel('Epoch')
+    plt.ylabel('Chebyshev parameter')
+    plt.title('Chebyshev parameter over epochs')
+    plt.grid()
+    plt.savefig('./results/2D_cheb_par.png', dpi=300) if save_fig else None
+    plt.show()
+
+from mpl_toolkits.mplot3d import Axes3D
+N = 100
+xspace = torch.linspace(xMin, xMax, N, dtype=DTYPE, device=device)
+yspace = torch.linspace(yMin, yMax, N, dtype=DTYPE, device=device)
+tspace_val = torch.tensor(tMax, dtype=DTYPE, device=device)
+X_grid, Y_grid = torch.meshgrid(xspace, yspace, indexing='ij')
+T_grid = torch.full_like(X_grid, tMax)
+XYTgrid = torch.stack([X_grid.flatten(), Y_grid.flatten(), T_grid.flatten()], dim=1)
+
+with torch.no_grad():
+    u_pred = model(XYTgrid)
+U = u_pred.reshape(N, N)
+
+X_np = X_grid.cpu().numpy()
+Y_np = Y_grid.cpu().numpy()
+U_np = U.cpu().numpy()
+
+fig = plt.figure(figsize=(9, 6))
+ax = fig.add_subplot(111, projection='3d')
+ax.plot_surface(X_np, Y_np, U_np, cmap='viridis')
+ax.set_xlabel('$x$')
+ax.set_ylabel('$y$')
+ax.set_zlabel('$u(x,y,t)$')
+ax.set_title('2D PDE Solution')
+plt.savefig('./results/2D_solution.png', dpi=300) if save_fig else None
+plt.show()
diff --git a/experiments/structure_preserving_pinns/structurePreservingPINN_CamassaHolm_trainableH1.py b/experiments/structure_preserving_pinns/structurePreservingPINN_CamassaHolm_trainableH1.py
new file mode 100644
index 0000000..55ba1d3
--- /dev/null
+++ b/experiments/structure_preserving_pinns/structurePreservingPINN_CamassaHolm_trainableH1.py
@@ -0,0 +1,729 @@
+import numpy as np
+import tensorflow as tf
+from time import time
+import matplotlib.pyplot as plt
+
+# TF_USE_LEGACY_KERAS=True
+
+DTYPE = np.float32
+# Nx = 50
+# Nt = 50
+# N_collocation = Nx*Nt
+
+xMin = -np.pi
+xMax = np.pi
+tMax = 5. # 10.
+d_in = 2
+
+def Nx_from_arch(width, depth, fac=2.0, d_in=2, d_out=1):
+    """
+    Given a PINN architecture (width, depth) and an overparam factor fac,
+    compute Nx = Nt such that:
+    
+        Nx * Nt  ≈ N_params / fac,
+        Nx = Nt,
+        
+    where N_params is the number of trainable parameters.
+    
+    Parameters
+    ----------
+    width : int
+        Number of neurons per hidden layer.
+    depth : int
+        Number of hidden layers.
+    fac : float
+        Over-parameterization factor. Typical values: fac = 2 or 3.
+    d_in : int
+        Input dimension (usually 2: x,t).
+    d_out : int
+        Output dimension (usually 1: u).
+        
+    Returns
+    -------
+    Nx : int
+    Nt : int
+    Ntheta : int
+        Total number of trainable parameters.
+    Ncoll_target : int
+        Target number of collocation points = Ntheta/fac.
+    """
+
+    # Parameter count
+    Ntheta = (d_in + 1) * width \
+             + (depth - 1) * (width * width + width) \
+             + d_out * (width + 1)
+
+    # Target collocation count
+    Ncoll_target = int(Ntheta / fac)
+
+    # Square grid Nx = Nt
+    Nx = int(np.sqrt(Ncoll_target))
+    Nt = Nx
+
+    return Nx, Nt, Ntheta, Ncoll_target
+
+width = 80
+depth = 4
+
+Nx, Nt, Ntheta, Ncoll = Nx_from_arch(width=width, depth=depth, fac=10.)
+
+def h_from_NxNt(Nx, Nt, xMin, xMax, tMax):
+    """
+    Compute dx, dt, and h from Nx, Nt and the domain extents.
+    h is defined as max(dx, dt).
+
+    Returns
+    -------
+    dx : float
+    dt : float
+    h  : float
+    """
+
+    Lx = xMax - xMin
+    Lt = tMax
+
+    dx = Lx / (Nx - 1)
+    dt = Lt / (Nt - 1)
+
+    h = max(dx, dt)
+
+    return dx, dt, h
+
+dx, dt, h = h_from_NxNt(Nx, Nt, xMin, xMax, tMax)
+
+lambdas = [1., 1., 1.]
+lambdas = tf.Variable(lambdas, trainable=False, name='lambdas', dtype=DTYPE)
+do_training = True
+cheb_par = tf.Variable(0.5, trainable=False, name='cheb_par', dtype=DTYPE)
+
+x = np.linspace(xMin, xMax, Nx).reshape((-1, 1)).astype(DTYPE)
+t = np.linspace(0, tMax, Nt).reshape((-1, 1)).astype(DTYPE)
+
+x_train = tf.expand_dims(tf.convert_to_tensor(x.flatten()), axis=-1)
+t_train = tf.expand_dims(tf.convert_to_tensor(t.flatten()), axis=-1)
+
+save_fig = True
+
+# Define the initial condition
+def u_0(x):
+    return 0.2+0.1*tf.math.cos(2 * x)
+
+
+def u_0_x(x):
+    return -0.2*tf.math.sin(2 * x)
+
+
+def periodic_boundary_conditions(model, Nbc=2000):
+
+    # Random boundary sampling (correct choice)
+    x = tf.random.uniform((Nbc,1), xMin, xMax)
+    t = tf.random.uniform((Nbc,1), 0.0, tMax)
+
+    xL = tf.ones_like(x) * xMin
+    xR = tf.ones_like(x) * xMax
+
+    with tf.GradientTape(persistent=True) as tape:
+        tape.watch([xL, xR])
+
+        uLx = model(tf.concat([xL, t], 1))
+        uRx = model(tf.concat([xR, t], 1))
+
+    # First derivatives
+    uxL = tape.gradient(uLx, xL)
+    uxR = tape.gradient(uRx, xR)
+
+    del tape
+
+    # Enforce periodicity of values AND derivatives
+    loss = tf.reduce_mean(
+        (uLx - uRx)**2 +
+        (uxL - uxR)**2 
+    )
+
+    return loss
+
+
+# def H(u, u_x, dx):
+#     return tf.reduce_sum(tf.pow(u, 3)+u*tf.pow(u_x, 2), axis = -1) * dx
+#     # return tf.reduce_sum((tf.pow(u, 2)+tf.pow(u_x, 2))/2, axis = -1) * dx
+    
+def ch_density(u, u_x):
+    return tf.pow(u, 3) + u * tf.pow(u_x, 2)
+
+
+# @tf.function
+def H(u, u_x, dx, density_fn=ch_density, axis=-1):
+    """
+    Boole’s rule (8th order) along 'axis' for uniform grid with spacing dx.
+    Requires (N-1) % 4 == 0. Otherwise uses Boole on the largest prefix and trapezoid on remainder.
+    """
+    f = density_fn(u, u_x)   # [..., N]
+    n = tf.shape(f)[axis]
+
+    # Trapezoid as a fallback on short tails
+    def _trap_rem(rem):
+        # rem: [..., M] contiguous tail; integrate with trapezoid
+        return tf.reduce_sum(0.5*(rem[..., 1:] + rem[..., :-1]), axis=-1) * tf.cast(dx, f.dtype)
+
+    # Degenerate
+    if tf.less_equal(n, 1):
+        return tf.reduce_sum(f, axis=axis) * dx
+
+    # Largest prefix with (n1-1) % 4 == 0
+    n1 = n - ((n - 1) % 4)
+    # Boole constant for uniform spacing: 2*dx/45
+    c = (2.0 * dx) / 45.0
+
+    # Indices for prefix
+    idx_prefix = tf.range(n1)
+    f0   = tf.gather(f, idx_prefix[0::4], axis=axis)      # 0,4,8,...
+    f1   = tf.gather(f, idx_prefix[1::4], axis=axis)      # 1,5,9,...
+    f2   = tf.gather(f, idx_prefix[2::4], axis=axis)      # 2,6,10,...
+    f3   = tf.gather(f, idx_prefix[3::4], axis=axis)      # 3,7,11,...
+    f4   = tf.gather(f, idx_prefix[4::4], axis=axis)      # 4,8,12,... (last block end)
+
+    # Weighted sum across blocks
+    # Boole's block weights per 5 nodes: [7, 32, 12, 32, 7]
+    # Aggregate across all blocks by summing slices
+    s = 7.0  * tf.reduce_sum(f0, axis=axis)
+    s += 32.0 * tf.reduce_sum(f1, axis=axis)
+    s += 12.0 * tf.reduce_sum(f2, axis=axis)
+    s += 32.0 * tf.reduce_sum(f3, axis=axis)
+    s += 7.0  * tf.reduce_sum(f4, axis=axis)
+
+    boole_part = c * s
+
+    # Tail remainder
+    if tf.equal(n1, n):
+        return boole_part
+
+    rem = tf.gather(f, tf.range(n1-1, n), axis=axis)  # nodes: n1-1 .. n-1
+    tail = _trap_rem(rem)
+    return boole_part + tail    
+
+
+def linear_loss_function(tensors, weights):
+    """
+    Computes the sum of the input tensors.
+    
+    Args:
+        tensors (list of tf.Tensor): List of tensors to compute the sum.
+        
+    Returns:
+        tf.Tensor: The sum of the input tensors.
+    """
+    # weights = weights / tf.reduce_sum(weights)  # Normalize weights
+    # stack_tensor = tf.stack(tf.multiply(tensors, weights))
+    # stack_tensor = tf.stack([w * t for w, t in zip(weights, tensors)], axis=0)   
+    stacked = tf.stack(tensors, axis=0)   # shape (n_losses,)
+    weights = weights / tf.reduce_sum(weights)
+    loss = tf.reduce_sum(weights * stacked)  
+    loss_type = 'ls'
+    return loss, loss_type
+
+
+def chebyshev_loss_function(tensors, weights):
+    """
+    Computes the max of the input tensors.
+    
+    Args:
+        tensors (list of tf.Tensor): List of tensors to compute the log-sum-exp.
+        
+    Returns:
+        tf.Tensor: The maximum of the input tensors.
+    """
+    # weights = weights / tf.reduce_sum(weights)  # Normalize weights
+    # stack_tensor = tf.stack(tf.multiply(tensors, weights))
+    # stack_tensor = tf.stack([w * t for w, t in zip(weights, tensors)], axis=0)
+    stacked = tf.stack(tensors, axis=0)
+    loss = tf.reduce_max(weights*stacked)
+    loss_type = 'cs'
+    return loss, loss_type
+
+
+def smooth_chebyshev_loss_function(mu, tensors, weights):
+    """
+    Computes the log of the sum of the exponentials of the input tensors.
+    
+    Args:
+        tensors (list of tf.Tensor): List of tensors to compute the log-sum-exp.
+        
+    Returns:
+        tf.Tensor: The log-sum-exp of the input tensors.
+    """
+    weights = weights / tf.reduce_sum(weights)  # Normalize weights
+    # stack_tensor = tf.stack(tf.multiply(tensors, weights))
+    # stack_tensor = tf.stack([w * t for w, t in zip(weights, tensors)], axis=0)
+    stacked = tf.stack(tensors, axis=0)
+    exp_sum = tf.reduce_sum(tf.math.exp(stacked/mu), axis=0)
+    loss = mu*tf.math.log(exp_sum)
+    loss_type = 'scs'
+    return loss, loss_type
+
+
+def augmentedChebyshev_loss_function(tensors, weights):
+    """
+    Computes the log of the sum of the exponentials of the input tensors.
+    
+    Args:
+        tensors (list of tf.Tensor): List of tensors to compute the log-sum-exp.
+        
+    Returns:
+        tf.Tensor: The log-sum-exp of the input tensors.
+    """
+    # weights = weights / tf.reduce_sum(weights)  # Normalize weights
+    loss_type = 'acs'
+    par = tf.sigmoid(cheb_par)  # par is between 0 and 1
+    return par*chebyshev_loss_function(tensors, weights)[0] + (1-par)*linear_loss_function(tensors, weights)[0], loss_type
+
+
+def sigmoid_centered(x):
+    return 2*tf.nn.sigmoid(.5*x) - 1
+
+def PINNModel(num_hidden_layers=depth, num_neurons_per_layer=width):  # 8,40 
+    xt_input = tf.keras.Input(shape=(2,))
+    output_u = xt_input
+    for _ in range(num_hidden_layers):
+        output_u = tf.keras.layers.Dense(num_neurons_per_layer,
+                                         activation='gelu',  # tanh
+                                         kernel_initializer='glorot_normal', #'glorot_uniform',  # glorot_normal
+                                         )(output_u)
+
+    output_u = tf.keras.layers.Dense(units=1,
+                                     activation='linear',
+                                     kernel_initializer='glorot_normal', #'glorot_uniform',  # glorot_normal
+                                     )(output_u)
+
+    return tf.keras.Model(inputs=xt_input, outputs=output_u)  #tf.keras.Model(inputs=[x_input, t_input], outputs=output_u)
+
+
+def lambda_grad(epoch,
+                start=1000,
+                lam_max=1e-0,
+                kappa=1e-3):
+    epoch = tf.cast(epoch, tf.float32)
+    return lam_max * (1.0 - tf.exp(-kappa * tf.maximum(epoch - start, 0.0)))
+
+
+
+# @tf.function
+def custom_loss(inputs, model):
+    x, t = inputs[:, 0:1], inputs[:, 1:2]
+
+    with tf.GradientTape(persistent=True) as outerTape:
+        outerTape.watch(x)
+        with tf.GradientTape(persistent=True) as tape:
+            tape.watch(t)
+            tape.watch(x)
+            with tf.GradientTape(persistent=False) as tape2:            
+                tape2.watch(x)
+                tape2.watch(t)
+                with tf.GradientTape(persistent=True) as tape3:
+                    tape3.watch(x)
+                    tape3.watch(t)
+                    u_model = model(tf.stack([x[:, 0], t[:, 0]], axis=1))
+                u_x = tape3.gradient(u_model, x)
+                u_t = tape3.gradient(u_model, t)
+            u_xx = tape2.gradient(u_x, x)     
+        u_xxt = tape.gradient(u_xx, t)   
+        u_xxx = tape.gradient(u_xx, x) 
+
+        # === Camassa–Holm residual ===
+        r = (
+            u_t
+            - u_xxt
+            + 3.0 * u_model * u_x
+            - 2.0 * u_x * u_xx
+            - u_model * u_xxx
+        )
+    r_x = outerTape.gradient(r, x)
+
+    # Clean up
+    del tape, tape2, tape3, outerTape
+    
+    lam = lambda_grad(epoch)
+
+    # === H1 norm of residual ===
+    pde_loss_L2 = tf.reduce_mean(tf.square(r))
+    pde_loss_grad = tf.reduce_mean(tf.square(r_x))
+    pde_loss_H1 = pde_loss_L2 + lam * pde_loss_grad
+
+    # === Initial condition ===
+    ic_mask = tf.where(tf.abs(t) < 1e-6)
+    x_ic = tf.gather(x, ic_mask[:, 0])
+    u_ic = u_0(x_ic)
+    t_ic = tf.zeros_like(x_ic)
+    u_ic_pred = model(tf.concat([x_ic, t_ic], axis=1))
+    data_fitting_loss_0 = tf.reduce_mean(tf.square(u_ic_pred - u_ic))
+
+    # === Periodic boundary conditions ===
+    data_fitting_loss_l_r = periodic_boundary_conditions(model)
+
+    # === Chebyshev aggregation ===
+    loss, loss_type = chebyshev_loss_function(
+        [pde_loss_H1, data_fitting_loss_0, data_fitting_loss_l_r],
+        lambdas
+    )
+
+    # === Hamiltonian (monitor only) ===
+    H_loss = H(
+        tf.reshape(u_model, shape=[Nt, Nx]),
+        tf.reshape(u_x, shape=[Nt, Nx]),
+        dx
+    )
+
+    return (
+        loss,
+        loss_type,
+        pde_loss_H1,
+        data_fitting_loss_0,
+        data_fitting_loss_l_r,
+        H_loss,
+    )
+    
+
+# Create the PINN model
+model = PINNModel()
+model.summary()
+
+epochs = 1000  # 3000  # 5000  # 2000
+# # Compile the model
+# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
+#               loss=lambda y_true, y_pred: custom_loss([x_train, t_train, theta_train], model)[1])
+
+# Create the optimizer with a smaller learning rate
+# learning_rate = 1e-3  # 1e-4
+# learning_rate_type = 'constant'
+# learning_rate = tf.keras.optimizers.schedules.PiecewiseConstantDecay([10, 100], [1e-1, 5e-2, 1e-2])  #OK
+# learning_rate = tf.keras.optimizers.schedules.PiecewiseConstantDecay([100, 300], [1e-2, 1e-3, 1e-4])
+# learning_rate = tf.keras.optimizers.schedules.PolynomialDecay(
+#     initial_learning_rate=1e-3,
+#     decay_steps=epochs,
+#     end_learning_rate=1e-5,
+#     power=2.,
+#     cycle=False,  # True    
+#     name='PolynomialDecay'
+# )
+# learning_rate_type = 'polynomialDecay'
+learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(
+    initial_learning_rate=1e-2,
+    decay_steps=epochs, # 100
+    decay_rate=0.9,
+    staircase=False,
+    name='ExponentialDecay'
+)
+learning_rate_type = 'exponentialDecay'
+# learning_rate = tf.keras.optimizers.schedules.CosineDecay(
+#     initial_learning_rate=1e-3,
+#     decay_steps=1000,
+#     alpha=0.0,
+#     name='CosineDecay',
+#     warmup_target=None,
+#     warmup_steps=0
+# )
+# learning_rate_type = 'cosineDecay'
+
+trainable = model.trainable_variables 
+if lambdas.trainable:
+    trainable += [lambdas]
+    
+if cheb_par.trainable:
+    trainable += [cheb_par]
+
+# optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, epsilon=1e-08, amsgrad=True)
+# optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9, nesterov=True)
+optimizer = tf.keras.optimizers.Nadam(learning_rate=learning_rate, beta_1=0.8, beta_2=0.9, epsilon=1e-07)
+# optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate, rho=0.9, momentum=0.0, epsilon=1e-07, centered=False)
+
+# Training loop
+losses = []
+pde_losses = []
+data_fitting_losses_0 = []
+data_fitting_losses_l_r = []
+delta_gradients = []
+H_losses_min = []
+H_losses_max = []
+H_losses_mean = []
+H_losses_std = []
+H_losses_abs_error = []
+H_losses_rel_error = []
+lambdas_values = []
+lambdas_values.append(lambdas.numpy())
+cheb_par_values = []
+cheb_par_values.append(cheb_par.numpy())
+
+# Convert data to tensor because tf.GradientTape() can only watch tensor and not numpy arrays
+x_train = tf.convert_to_tensor(x_train)
+t_train = tf.convert_to_tensor(t_train)
+x_grid, t_grid = np.meshgrid(x.flatten(), t.flatten())
+inputs = tf.convert_to_tensor(np.vstack([x_grid.flatten(), t_grid.flatten()]).T)
+stop = False
+# Start timer
+t0 = time()
+for epoch in range(epochs):
+    if not stop:
+        # print("# STARTING EPOCH", epoch + 1)
+
+        # Create a LearningRateScheduler to update the learning rate
+        # current_lr = scheduler(epoch, learning_rate)
+        # tf.keras.backend.set_value(optimizer.lr, current_lr)
+
+        with tf.GradientTape() as tape:
+            loss, loss_type, pde_loss, data_fitting_loss_0, data_fitting_loss_l_r, H_loss = custom_loss(inputs, model)
+
+        # print("Computing gradients")
+        gradients = tape.gradient(loss, trainable)
+        # print(gradients[-1])
+        # print("Applying gradients")
+        optimizer.apply_gradients(zip(gradients, trainable))
+        # print("Appending losses")
+        losses.append(loss.numpy())
+        pde_losses.append(pde_loss.numpy())
+        data_fitting_losses_0.append(data_fitting_loss_0.numpy())
+        data_fitting_losses_l_r.append(data_fitting_loss_l_r.numpy())
+        H_loss_min = tf.reduce_min(H_loss)
+        H_loss_max = tf.reduce_max(H_loss)
+        H_losses_min.append(H_loss_min.numpy())
+        H_losses_max.append(H_loss_max.numpy())
+        H_loss_mean = tf.reduce_mean(H_loss)
+        H_loss_std = tf.math.reduce_std(H_loss)
+        H_losses_mean.append(H_loss_mean.numpy())
+        H_losses_std.append(H_loss_std.numpy())
+        
+        H0 = H(u_0(x_grid), u_0_x(x_grid), dx)  # H0 = H_loss[0].numpy()
+        Hf = H_loss.numpy()
+        H_abs_error = tf.abs(Hf - H0)
+        H_losses_abs_error.append(tf.reduce_max(H_abs_error).numpy())
+        H_rel_error = H_abs_error / tf.abs((H0 + 1e-16))
+        H_losses_rel_error.append(H_rel_error[-1].numpy())
+        
+        # lambdas_values.append((trainable[-1]).numpy())
+        if len(losses) > 1 and not lambdas.trainable and do_training:
+            # SoftAdaptive weights update
+            num1 = tf.math.exp(pde_losses[-1] - pde_losses[-2])
+            num2 = tf.math.exp(data_fitting_losses_0[-1] - data_fitting_losses_0[-2])
+            num3 = tf.math.exp(data_fitting_losses_l_r[-1] - data_fitting_losses_l_r[-2])
+            den  = num1 + num2 + num3
+
+            new_lambdas = tf.stack([num1 / den, num2 / den, num3 / den])
+            lambdas.assign(new_lambdas)
+            lambdas_values.append((lambdas).numpy())
+            
+        if cheb_par.trainable:
+            cheb_par_values.append(cheb_par.numpy())
+
+        del tape
+
+        if epoch % 100 == 0 or epoch == epochs - 1:
+            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.numpy()}")
+
+        if len(losses) > 2 and np.abs(losses[-1] - losses[-2]) / np.abs(losses[-2]) < 1e-8:
+            stop = True
+
+print(f"Loss type: {loss_type}")
+print(f"Hamiltonian mean: {H_loss_mean.numpy()}")
+print(f"Hamiltonian standard deviation: {H_loss_std.numpy()}")
+print(f"Hamiltonian maximum: {H_loss_max.numpy()}")
+print(f"Hamiltonian minimum: {H_loss_min.numpy()}")
+# print(f"Hamiltonian absolute error: {H_abs_error.numpy()}")
+# print(f"Hamiltonian relative error: {H_rel_error.numpy()}")
+print(f"Hamitonian relative error: {H_rel_error[-1].numpy()}")
+# Print computation time
+print('\nComputation time: {} seconds'.format(time() - t0))
+
+
+def generate_save_fig_string(type, epochs, learning_rate_type, loss_type):
+    """
+    Generates a string for saving figures that includes the number of epochs and the type of learning rate.
+    
+    Args:
+        epochs (int): The number of epochs.
+        learning_rate_type (str): The type of learning rate.
+        
+    Returns:
+        str: The generated string for saving figures.
+    """
+    return f"./results/{type}_epochs_{epochs}_lr_{learning_rate_type}_{loss_type}.png"
+
+# Plot the loss history
+plt.semilogy(losses, label='Total Loss')
+plt.semilogy(pde_losses, label='PDE Loss')
+plt.semilogy(data_fitting_losses_0, label='Initial Conditions Loss')
+plt.semilogy(data_fitting_losses_l_r, label='Boundary Conditions Loss')
+plt.xlabel('Epoch')
+plt.ylabel('Loss')
+plt.title('Loss Contributions')
+plt.legend()
+plt.grid()
+
+if save_fig:
+    save_fig_string = generate_save_fig_string('loss', epochs, learning_rate_type, loss_type)
+    # save png
+    plt.savefig(save_fig_string, dpi=300)
+plt.show()
+    # # save pdf
+    # plt.savefig('../results/' + 'loss.pdf', dpi=300)
+
+# Evaluate the function
+x_eval = np.linspace(x_train[0].numpy(), x_train[-1].numpy(), 100).reshape((-1, 1)).astype(np.float32)
+t_eval = np.linspace(t_train[0].numpy(), t_train[-1].numpy(), 100).reshape((-1, 1)).astype(np.float32)
+inputs_eval = [x_eval, t_eval]
+
+# Plot the Hamiltonian over epochs
+plt.plot(H_losses_min, label='min H')
+plt.plot(H_losses_max, label='max H')
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian')
+plt.title('Hamiltonian over epochs')
+plt.legend()
+plt.grid()
+
+if save_fig:
+    save_fig_string = generate_save_fig_string('H_loss', epochs, learning_rate_type, loss_type)
+    # save png
+    plt.savefig(save_fig_string, dpi=300)
+plt.show()
+    # # save pdf
+    # plt.savefig('../results/' + 'H_loss.pdf', dpi=300)    
+    
+# Plot the average Hamiltonian over epochs with standard deviation
+H_losses_mean = np.array(H_losses_mean)
+H_losses_std = np.array(H_losses_std)
+H_losses_rel_error = np.array(H_losses_rel_error)
+H_losses_rel_error = np.array(H_losses_rel_error)
+
+plt.plot(H_losses_mean)
+plt.fill_between(range(len(H_losses_mean)), H_losses_mean - H_losses_std, H_losses_mean + H_losses_std, alpha=0.2)
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian mean')
+plt.title('Hamiltonian mean over epochs with standard deviation')
+# plt.legend()
+plt.grid()
+
+if save_fig:
+    save_fig_string = generate_save_fig_string('H_loss_mean', epochs, learning_rate_type, loss_type)
+    # save png
+    plt.savefig(save_fig_string, dpi=300)
+plt.show()
+    # # save pdf
+    # plt.savefig('../results/' + 'H_loss_std.pdf', dpi=300)
+
+# Plot the standard deviation of the Hamiltonian over epochs
+plt.plot(H_losses_std)
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian std')
+plt.title('Hamiltonian standard deviation over epochs')
+# plt.legend()
+plt.grid()
+
+if save_fig:
+    save_fig_string = generate_save_fig_string('H_loss_std', epochs, learning_rate_type, loss_type)
+    # save png
+    plt.savefig(save_fig_string, dpi=300)
+plt.show()
+    # # save pdf
+    # plt.savefig('../results/' + 'H_loss_std.pdf', dpi=300)
+    
+# Plot the absolute error of the Hamiltonian over epochs
+plt.plot(H_losses_abs_error)    
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian absolute error')
+plt.title('Hamiltonian absolute error over epochs')
+# plt.legend()
+plt.grid()
+
+if save_fig:
+    save_fig_string = generate_save_fig_string('H_loss_abs_error', epochs, learning_rate_type, loss_type)
+    # save png
+    plt.savefig(save_fig_string, dpi=300)
+plt.show()
+    # # save pdf
+    # plt.savefig('../results/' + 'H_loss_rel_error.pdf', dpi=300)
+    
+
+# Plot the relative error of the Hamiltonian over epochs
+plt.plot(H_losses_rel_error)    
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian relative error')
+plt.title('Hamiltonian relative error over epochs')
+# plt.legend()
+plt.grid()
+
+if save_fig:
+    save_fig_string = generate_save_fig_string('H_loss_rel_error', epochs, learning_rate_type, loss_type)
+    # save png
+    plt.savefig(save_fig_string, dpi=300)
+plt.show()
+    # # save pdf
+    # plt.savefig('../results/' + 'H_loss_rel_error.pdf', dpi=300)
+
+
+# Plot the Chebyshev parameter over epochs
+if cheb_par.trainable:
+    plt.plot(tf.sigmoid(cheb_par_values))
+    plt.xlabel('Epoch')
+    plt.ylabel('Chebyshev parameter')
+    plt.title('Chebyshev parameter over epochs')
+    plt.grid()
+    if save_fig:
+        save_fig_string = generate_save_fig_string('cheb_par', epochs, learning_rate_type, loss_type)
+        # save png
+        plt.savefig(save_fig_string, dpi=300)
+plt.show()
+        # # save pdf
+        # plt.savefig('../results/' + 'cheb_par.pdf', dpi=300)    
+    
+    
+from mpl_toolkits.mplot3d import Axes3D
+
+# Set up meshgrid
+N = 600
+tspace = np.linspace(0, tMax, N + 1)
+xspace = np.linspace(xMin, xMax, N + 1)
+T, X = np.meshgrid(tspace, xspace)
+XTgrid = np.vstack([X.flatten(),T.flatten()]).T
+
+# Determine predictions of u(t, x)
+u_pred = model(tf.cast(XTgrid,DTYPE))
+
+# Reshape upred
+U = u_pred.numpy().reshape(N+1,N+1)
+
+# Surface plot of solution u(t,x)
+fig = plt.figure(figsize=(9,6))
+ax = fig.add_subplot(111, projection='3d')
+ax.plot_surface(X, T, U, cmap='viridis')
+ax.view_init(35,35)
+ax.set_xlabel('$x$')
+ax.set_ylabel('$t$')
+ax.set_zlabel('$u_\\theta(x,t)$')
+ax.set_title('Solution to Camassa-Holm equation')
+ax.set_box_aspect(None, zoom=0.85)
+
+if save_fig:
+    save_fig_string = generate_save_fig_string('sol', epochs, learning_rate_type, loss_type)
+    # save png
+    plt.savefig(save_fig_string, dpi=300)
+plt.show()
+    # # save pdf
+    # plt.savefig('../results/' + 'solution.pdf', dpi=300)
+
+
+
+
+import pandas as pd
+df = pd.DataFrame()
+
+df['total_loss'] = losses
+df['pde_loss'] = pde_losses
+df['data_fitting_loss_0'] = data_fitting_losses_0
+df['data_fitting_loss_l_r'] = data_fitting_losses_l_r
+df['H_loss_min'] = H_losses_min
+df['H_loss_max'] = H_losses_max
+df['H_loss_mean'] = H_losses_mean
+df['H_loss_std'] = H_losses_std
+df['H_loss_abs_error'] = H_losses_abs_error
+df['H_loss_rel_error'] = H_losses_rel_error
+# df['cheb_par'] = cheb_par_values
+
+df.to_csv('./results/camassa/training_history.csv', index=False)
\ No newline at end of file
diff --git a/experiments/structure_preserving_pinns/structurePreservingPINN_CamassaHolm_trainableH1_pytorch.py b/experiments/structure_preserving_pinns/structurePreservingPINN_CamassaHolm_trainableH1_pytorch.py
new file mode 100644
index 0000000..915aedc
--- /dev/null
+++ b/experiments/structure_preserving_pinns/structurePreservingPINN_CamassaHolm_trainableH1_pytorch.py
@@ -0,0 +1,389 @@
+import numpy as np
+import torch
+import torch.nn as nn
+from time import time
+import matplotlib.pyplot as plt
+
+from humancompatible.train.dual_optim import ALM, iALM
+
+DTYPE = torch.float32
+torch.set_default_dtype(DTYPE)
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+
+xMin, xMax = -np.pi, np.pi
+tMax = 5.
+d_in = 2
+
+def Nx_from_arch(width, depth, fac=2.0, d_in=2, d_out=1):
+    Ntheta = (d_in + 1) * width + (depth - 1) * (width * width + width) + d_out * (width + 1)
+    Ncoll_target = int(Ntheta / fac)
+    Nx = int(np.sqrt(Ncoll_target))
+    Nt = Nx
+    return Nx, Nt, Ntheta, Ncoll_target
+
+width, depth = 80, 4
+Nx, Nt, Ntheta, Ncoll = Nx_from_arch(width=width, depth=depth, fac=10.)
+
+dx = (xMax - xMin) / (Nx - 1)
+dt = tMax / (Nt - 1)
+h = max(dx, dt)
+
+lambdas = torch.tensor([1., 1., 1.], dtype=DTYPE, device=device, requires_grad=False)
+do_training = True
+cheb_par = torch.tensor(0.5, dtype=DTYPE, device=device, requires_grad=False)
+
+x = torch.linspace(xMin, xMax, Nx, dtype=DTYPE, device=device).reshape(-1, 1)
+t = torch.linspace(0, tMax, Nt, dtype=DTYPE, device=device).reshape(-1, 1)
+x_train = x.reshape(-1, 1)
+t_train = t.reshape(-1, 1)
+t_grid, x_grid = torch.meshgrid(t.flatten(), x.flatten(), indexing='ij')
+inputs = torch.stack([x_grid.flatten(), t_grid.flatten()], dim=1)
+
+save_fig = True
+
+def u_0(x):
+    return 0.2 + 0.1 * torch.cos(2 * x)
+
+def u_0_x(x):
+    return -0.2 * torch.sin(2 * x)
+
+def periodic_boundary_conditions(model, Nbc=2000):
+    x = torch.rand(Nbc, 1, device=device) * (xMax - xMin) + xMin
+    t = torch.rand(Nbc, 1, device=device) * tMax
+    
+    xL = torch.full_like(x, xMin)
+    xR = torch.full_like(x, xMax)
+    
+    uLx = model(torch.cat([xL, t], 1))
+    uRx = model(torch.cat([xR, t], 1))
+    
+    loss = torch.mean((uLx - uRx)**2)
+    return loss
+
+def ch_density(u, u_x):
+    return u**3 + u * u_x**2
+
+def H(u, u_x, dx, density_fn=ch_density):
+    f = density_fn(u, u_x)
+    return torch.sum(f) * dx
+
+def linear_loss_function(tensors, weights):
+    stacked = torch.stack(tensors)
+    weights = weights / torch.sum(weights)
+    loss = torch.sum(weights * stacked)
+    return loss, 'ls'
+
+def chebyshev_loss_function(tensors, weights):
+    stacked = torch.stack(tensors)
+    loss = torch.max(weights * stacked)
+    return loss, 'cs'
+
+### MODEL ###
+
+# @torch.compile
+class PINNModel(nn.Module):
+    def __init__(self, num_hidden_layers=depth, num_neurons_per_layer=width):
+        super().__init__()
+        layers = []
+        in_dim = 2
+        for _ in range(num_hidden_layers):
+            layers.append(nn.Linear(in_dim, num_neurons_per_layer))
+            layers.append(nn.GELU())
+            in_dim = num_neurons_per_layer
+        layers.append(nn.Linear(in_dim, 1))
+        self.net = nn.Sequential(*layers)
+    
+    def forward(self, x):
+        return self.net(x)
+
+def lambda_grad(epoch,
+                start=1000,
+                lam_max=1e-0,
+                kappa=1e-3):
+    epoch = float(epoch)
+    return lam_max * (1.0 - np.exp(-kappa * max(epoch - start, 0.0)))
+
+
+##### UNCONSTRAINED LOSS FUNCTION WITH H1 REGULARIZATION #####
+
+
+# @torch.compile
+def custom_loss(inputs, model, epoch):
+    x, t = inputs[:, 0:1], inputs[:, 1:2]
+    x.requires_grad_(True)
+    t.requires_grad_(True)
+    
+    u_model = model(torch.cat([x, t], dim=1))
+    
+    u_t = torch.autograd.grad(u_model.sum(), t, create_graph=True)[0]
+    u_x = torch.autograd.grad(u_model.sum(), x, create_graph=True)[0]
+    
+    u_xx = torch.autograd.grad(u_x.sum(), x, create_graph=True)[0]
+    u_xxt = torch.autograd.grad(u_xx.sum(), t, create_graph=True)[0]
+    u_xxx = torch.autograd.grad(u_xx.sum(), x, create_graph=True)[0]
+    
+    r = u_t - u_xxt + 3.0 * u_model * u_x - 2.0 * u_x * u_xx - u_model * u_xxx
+    r_x = torch.autograd.grad(r.sum(), x, create_graph=True)[0]
+    
+    pde_loss_L2 = torch.mean(torch.square(r))
+    pde_loss_grad = torch.mean(torch.square(r_x))
+
+    lam = lambda_grad(epoch)
+    pde_loss_H1 = pde_loss_L2 + lam * pde_loss_grad
+    
+    ic_mask = torch.abs(t) < 1e-6
+    x_ic = x[ic_mask[:, 0]]
+    u_ic = u_0(x_ic)
+    t_ic = torch.zeros_like(x_ic)
+    u_ic_pred = model(torch.cat([x_ic, t_ic], axis=1))
+    data_fitting_loss_0 = torch.mean(torch.square(u_ic_pred - u_ic))
+    
+    data_fitting_loss_l_r = periodic_boundary_conditions(model)
+    
+    loss, loss_type = chebyshev_loss_function(
+        [pde_loss_H1, data_fitting_loss_0, data_fitting_loss_l_r],
+        lambdas
+    )
+    
+    H_loss = H(u_model.reshape(Nt, Nx), u_x.reshape(Nt, Nx), dx)
+    
+    return loss, loss_type, pde_loss_L2, data_fitting_loss_0, data_fitting_loss_l_r, H_loss
+
+
+#### LOSS FUNCTION WITH H1 CONSTRAINT ####
+
+def lagrangian_loss(inputs, model, dual_opt, epoch):
+    x, t = inputs[:, 0:1], inputs[:, 1:2]
+    x.requires_grad_(True)
+    t.requires_grad_(True)
+    
+    u_model = model(torch.cat([x, t], dim=1))
+    
+    u_t = torch.autograd.grad(u_model.sum(), t, create_graph=True)[0]
+    u_x = torch.autograd.grad(u_model.sum(), x, create_graph=True)[0]
+    
+    u_xx = torch.autograd.grad(u_x.sum(), x, create_graph=True)[0]
+    u_xxt = torch.autograd.grad(u_xx.sum(), t, create_graph=True)[0]
+    u_xxx = torch.autograd.grad(u_xx.sum(), x, create_graph=True)[0]
+    
+    r = u_t - u_xxt + 3.0 * u_model * u_x - 2.0 * u_x * u_xx - u_model * u_xxx
+    r_x = torch.autograd.grad(r.sum(), x, create_graph=True)[0]
+    
+    pde_loss_L2 = torch.mean(torch.square(r))
+    pde_loss_grad = torch.mean(torch.square(r_x))
+
+    lam = lambda_grad(epoch)
+    pde_loss_H1 = pde_loss_L2 + lam * pde_loss_grad
+    
+    ic_mask = torch.abs(t) < 1e-6
+    x_ic = x[ic_mask[:, 0]]
+    u_ic = u_0(x_ic)
+    t_ic = torch.zeros_like(x_ic)
+    u_ic_pred = model(torch.cat([x_ic, t_ic], axis=1))
+    data_fitting_loss_0 = torch.mean(torch.square(u_ic_pred - u_ic))
+    
+    data_fitting_loss_l_r = periodic_boundary_conditions(model)
+    
+    loss, loss_type = chebyshev_loss_function(
+        [pde_loss_H1, data_fitting_loss_0, data_fitting_loss_l_r],
+        lambdas
+    )
+
+    # constraint
+    H0 = H(u_0(x_grid.flatten().reshape(-1, 1)), u_0_x(x_grid.flatten().reshape(-1, 1)), dx)
+
+    Hf = H(u_model.reshape(Nt, Nx), u_x.reshape(Nt, Nx), dx)
+    H_constraint = torch.abs(Hf - H0)/torch.abs(H0)
+
+    eps = 5/(epoch+1)
+    H_constraint = torch.max(H_constraint - eps, torch.zeros_like(H_constraint)).unsqueeze(0)
+
+    loss = dual_opt.forward_update(loss, H_constraint)
+    
+    return loss, loss_type, pde_loss_L2, data_fitting_loss_0, data_fitting_loss_l_r, Hf
+
+
+####### TRAINING LOOP #######
+
+
+model = PINNModel().to(device)
+epochs = 1000
+
+optimizer = torch.optim.NAdam(model.parameters(), lr=1e-2, betas=(0.8, 0.9), eps=1e-07)
+lr_schedule = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9**(1/epochs))
+
+losses, pde_losses, data_losses_0, bc_losses = [], [], [], []
+H_losses_min, H_losses_max, H_losses_mean, H_losses_std = [], [], [], []
+H_losses_abs_error, H_losses_rel_error = [], []
+t0 = time()
+
+
+# dual_opt = ALM(m=1, lr=5e-5, dual_range=(0.,100.), device=device, ctol=1e-3, penalty=0.)
+dual_opt = iALM(m=1, lr=0.1, beta=0.01, sigma=1.0001, gamma=1., dual_range=(0.,10.), ctol=1e-3)
+
+for epoch in range(epochs):
+    optimizer.zero_grad()
+    # loss, loss_type, pde_loss, data_loss_0, bc_loss, H_loss = custom_loss(inputs, model, epoch)
+
+    loss, loss_type, pde_loss, data_loss_0, bc_loss, H_loss = lagrangian_loss(inputs, model, dual_opt, epoch)
+    loss.backward()
+    optimizer.step()
+    
+    
+    if epoch % 1 == 0:
+        lr_schedule.step()
+    
+    with torch.no_grad():
+
+        losses.append(loss.item())
+        pde_losses.append(pde_loss.item())
+        data_losses_0.append(data_loss_0.item())
+        bc_losses.append(bc_loss.item())
+
+        H_loss_min = torch.min(H_loss).item()
+        H_loss_max = torch.max(H_loss).item()
+        H_losses_min.append(H_loss_min)
+        H_losses_max.append(H_loss_max)
+        H_loss_mean = torch.mean(H_loss).item()
+        H_loss_std = torch.std(H_loss).item()
+        H_losses_mean.append(H_loss_mean)
+        H_losses_std.append(H_loss_std)
+        
+        H0 = H(u_0(x_grid.flatten().reshape(-1, 1)), u_0_x(x_grid.flatten().reshape(-1, 1)), dx)
+        Hf = H_loss.detach()
+        H_abs_error = torch.abs(Hf - H0)
+        H_losses_abs_error.append(torch.max(H_abs_error).item())
+        H_rel_error = H_abs_error / (torch.abs(H0) + 1e-16)
+        if isinstance(H_rel_error, torch.Tensor):
+            H_rel_error = H_rel_error.item() if H_rel_error.numel() == 1 else H_rel_error.max().item()
+        H_losses_rel_error.append(H_rel_error)
+        
+        if epoch % 100 == 0 or epoch == epochs - 1:
+            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.6e}")
+
+        # lambdas_values.append((trainable[-1]).numpy())
+        if len(losses) > 1:
+            # SoftAdaptive weights update
+            num1 = np.exp(pde_losses[-1] - pde_losses[-2])
+            num2 = np.exp(data_losses_0[-1] - data_losses_0[-2])
+            num3 = np.exp(bc_losses[-1] - bc_losses[-2])
+            den  = num1 + num2 + num3
+
+            new_lambdas = torch.tensor([num1 / den, num2 / den, num3 / den])
+            lambdas = new_lambdas
+            # lambdas_values.append((lambdas).numpy())
+
+print(f'\nComputation time: {time() - t0:.2f}s')
+print(f"Loss type: {loss_type}")
+print(f"Hamiltonian mean: {H_loss_mean}")
+print(f"Hamiltonian std: {H_loss_std}")
+print(f"Hamiltonian max: {H_loss_max}")
+print(f"Hamiltonian min: {H_loss_min}")
+
+plt.figure(figsize=(10, 6))
+plt.semilogy(losses, label='Total Loss')
+plt.semilogy(pde_losses, label='PDE Loss')
+plt.semilogy(data_losses_0, label='Initial Conditions Loss')
+plt.semilogy(bc_losses, label='Boundary Conditions Loss')
+plt.xlabel('Epoch')
+plt.ylabel('Loss')
+plt.title('Loss Contributions')
+plt.legend()
+plt.grid()
+plt.savefig('./results/ch_loss.png', dpi=300) if save_fig else None
+plt.show()
+
+plt.figure(figsize=(10, 6))
+plt.plot(H_losses_min, label='min H')
+plt.plot(H_losses_max, label='max H')
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian')
+plt.title('Hamiltonian over epochs')
+plt.legend()
+plt.grid()
+plt.savefig('./results/ch_H_loss.png', dpi=300) if save_fig else None
+plt.show()
+
+H_losses_mean_arr = np.array(H_losses_mean)
+H_losses_std_arr = np.array(H_losses_std)
+plt.figure(figsize=(10, 6))
+plt.plot(H_losses_mean_arr)
+plt.fill_between(range(len(H_losses_mean_arr)), H_losses_mean_arr - H_losses_std_arr, H_losses_mean_arr + H_losses_std_arr, alpha=0.2)
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian mean')
+plt.title('Hamiltonian mean over epochs with standard deviation')
+plt.grid()
+plt.savefig('./results/ch_H_loss_mean.png', dpi=300) if save_fig else None
+plt.show()
+
+plt.figure(figsize=(10, 6))
+plt.plot(H_losses_std_arr)
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian std')
+plt.title('Hamiltonian standard deviation over epochs')
+plt.grid()
+plt.savefig('./results/ch_H_loss_std.png', dpi=300) if save_fig else None
+plt.show()
+
+H_losses_abs_error = np.array(H_losses_abs_error)
+plt.figure(figsize=(10, 6))
+plt.plot(H_losses_abs_error)
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian absolute error')
+plt.title('Hamiltonian absolute error over epochs')
+plt.grid()
+plt.savefig('./results/ch_H_loss_abs_error.png', dpi=300) if save_fig else None
+plt.show()
+
+H_losses_rel_error = np.array(H_losses_rel_error)
+plt.figure(figsize=(10, 6))
+plt.plot(H_losses_rel_error)
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian relative error')
+plt.title('Hamiltonian relative error over epochs')
+plt.grid()
+plt.savefig('./results/ch_H_loss_rel_error.png', dpi=300) if save_fig else None
+plt.show()
+
+N = 600
+tspace = torch.linspace(0, tMax, N + 1, dtype=DTYPE, device=device)
+xspace = torch.linspace(xMin, xMax, N + 1, dtype=DTYPE, device=device)
+T_grid, X_grid = torch.meshgrid(tspace, xspace, indexing='ij')
+XTgrid = torch.stack([X_grid.flatten(), T_grid.flatten()], dim=1)
+
+with torch.no_grad():
+    u_pred = model(XTgrid)
+U = u_pred.reshape(N+1, N+1)
+
+X_np = X_grid.cpu().numpy()
+T_np = T_grid.cpu().numpy()
+U_np = U.cpu().numpy()
+
+from mpl_toolkits.mplot3d import Axes3D
+fig = plt.figure(figsize=(9, 6))
+ax = fig.add_subplot(111, projection='3d')
+ax.plot_surface(X_np, T_np, U_np, cmap='viridis')
+ax.set_xlabel('$x$')
+ax.set_ylabel('$t$')
+ax.set_zlabel('$u(x,t)$')
+ax.set_title('Camassa-Holm equation')
+plt.savefig('./results/ch_solution.png', dpi=300) if save_fig else None
+plt.show()
+
+
+import pandas as pd
+df = pd.DataFrame()
+
+df['total_loss'] = losses
+df['pde_loss'] = pde_losses
+df['data_fitting_loss_0'] = data_losses_0
+df['data_fitting_loss_l_r'] = bc_losses
+df['H_loss_min'] = H_losses_min
+df['H_loss_max'] = H_losses_max
+df['H_loss_mean'] = H_losses_mean
+df['H_loss_std'] = H_losses_std
+df['H_loss_abs_error'] = H_losses_abs_error
+df['H_loss_rel_error'] = H_losses_rel_error
+# df['cheb_par'] = cheb_par_values
+
+df.to_csv('./results/camassa/torch_training_history.csv', index=False)
\ No newline at end of file
diff --git a/experiments/structure_preserving_pinns/structurePreservingPINN_kdv_trainableH1.py b/experiments/structure_preserving_pinns/structurePreservingPINN_kdv_trainableH1.py
new file mode 100644
index 0000000..781639e
--- /dev/null
+++ b/experiments/structure_preserving_pinns/structurePreservingPINN_kdv_trainableH1.py
@@ -0,0 +1,769 @@
+import numpy as np
+from sympy import false, per
+import tensorflow as tf
+from time import time
+import matplotlib.pyplot as plt
+
+# TF_USE_LEGACY_KERAS=True
+
+DTYPE = np.float32
+# Nx = 100
+# Nt = 100
+# N_collocation = Nx*Nt
+
+type = 1 # 1 for KdV, 2
+
+if type == 1:
+    nu = -0.022**2
+    alpha = -0.5
+    rho = 0.
+    xMin = 0.
+    xMax = 2.
+    tMax = 5.  # 10.
+elif type == 2:
+    nu = -1.
+    alpha = -3.
+    rho = 0.
+    xMin = -20.
+    xMax = 20.
+    tMax = 100.  # 4.
+
+
+def Nx_from_arch(width, depth, fac=1.5, d_in=2, d_out=1):
+    """
+    Given a PINN architecture (width, depth) and an overparam factor fac,
+    compute Nx = Nt such that:
+    
+        Nx * Nt  ≈ N_params / fac,
+        Nx = Nt,
+        
+    where N_params is the number of trainable parameters.
+    
+    Parameters
+    ----------
+    width : int
+        Number of neurons per hidden layer.
+    depth : int
+        Number of hidden layers.
+    fac : float
+        Over-parameterization factor. Typical values: fac = 2 or 3.
+    d_in : int
+        Input dimension (usually 2: x,t).
+    d_out : int
+        Output dimension (usually 1: u).
+        
+    Returns
+    -------
+    Nx : int
+    Nt : int
+    Ntheta : int
+        Total number of trainable parameters.
+    Ncoll_target : int
+        Target number of collocation points = Ntheta/fac.
+    """
+
+    # Parameter count
+    Ntheta = (d_in + 1) * width \
+             + (depth - 1) * (width * width + width) \
+             + d_out * (width + 1)
+
+    # Target collocation count
+    Ncoll_target = int(Ntheta / fac)
+
+    # Square grid Nx = Nt
+    Nx = int(np.sqrt(Ncoll_target))
+    Nt = Nx
+
+    return Nx, Nt, Ntheta, Ncoll_target
+
+
+width = 80
+depth = 4
+
+Nx, Nt, Ntheta, Ncoll = Nx_from_arch(width=width, depth=depth, fac=10.)
+
+def h_from_NxNt(Nx, Nt, xMin, xMax, tMax):
+    """
+    Compute dx, dt, and h from Nx, Nt and the domain extents.
+    h is defined as max(dx, dt).
+
+    Returns
+    -------
+    dx : float
+    dt : float
+    h  : float
+    """
+
+    Lx = xMax - xMin
+    Lt = tMax
+
+    dx = Lx / (Nx - 1)
+    dt = Lt / (Nt - 1)
+
+    h = max(dx, dt)
+
+    return dx, dt, h
+
+dx, dt, h = h_from_NxNt(Nx, Nt, xMin, xMax, tMax)
+    
+x = np.linspace(xMin, xMax, Nx).reshape((-1, 1)).astype(DTYPE)
+t = np.linspace(0, tMax, Nt).reshape((-1, 1)).astype(DTYPE)
+
+x_train = tf.expand_dims(tf.convert_to_tensor(x.flatten()), axis=-1)
+t_train = tf.expand_dims(tf.convert_to_tensor(t.flatten()), axis=-1)
+
+lambdas = [1., 1., 1.]
+lambdas = tf.Variable(lambdas, trainable=False, name='lambdas', dtype=DTYPE)
+do_training = False
+cheb_par = tf.Variable(0.5, trainable=False, name='cheb_par', dtype=DTYPE)
+
+save_fig = True
+
+# Define the initial condition
+def u_0(x):
+    if type == 1:
+        return tf.math.cos(np.pi * x)
+    elif type == 2:
+        return 6./(tf.math.cosh(x)**2)
+    
+
+def u_0_x(x):
+    if type == 1:
+        return -np.pi*tf.math.sin(np.pi * x)
+    elif type == 2:
+        return -12.*tf.math.sinh(x)/(tf.math.cosh(x)**3)
+
+
+def periodic_bc(model, x, t):
+    xL = tf.ones_like(x) * xMin
+    xR = tf.ones_like(x) * xMax
+    uL = model(tf.concat([xL, t], axis=1))
+    uR = model(tf.concat([xR, t], axis=1))
+    return tf.reduce_mean((uL - uR)**2)
+
+
+def V(u):
+    return alpha*tf.pow(u, 3)/3 + rho*tf.pow(u, 2)/2
+
+
+def kdv_density(u, u_x):
+    return V(u)-nu*tf.pow(u_x, 2)/2
+
+# @tf.function
+def H(u, u_x, dx, density_fn=kdv_density, axis=-1):
+    """
+    Boole’s rule (8th order) along 'axis' for uniform grid with spacing dx.
+    Requires (N-1) % 4 == 0. Otherwise uses Boole on the largest prefix and trapezoid on remainder.
+    """
+    f = density_fn(u, u_x)   # [..., N]
+    n = tf.shape(f)[axis]
+
+    # Trapezoid as a fallback on short tails
+    def _trap_rem(rem):
+        # rem: [..., M] contiguous tail; integrate with trapezoid
+        return tf.reduce_sum(0.5*(rem[..., 1:] + rem[..., :-1]), axis=-1) * tf.cast(dx, f.dtype)
+
+    # Degenerate
+    if tf.less_equal(n, 1):
+        return tf.reduce_sum(f, axis=axis) * dx
+
+    # Largest prefix with (n1-1) % 4 == 0
+    n1 = n - ((n - 1) % 4)
+    # Boole constant for uniform spacing: 2*dx/45
+    c = (2.0 * dx) / 45.0
+
+    # Indices for prefix
+    idx_prefix = tf.range(n1)
+    f0   = tf.gather(f, idx_prefix[0::4], axis=axis)      # 0,4,8,...
+    f1   = tf.gather(f, idx_prefix[1::4], axis=axis)      # 1,5,9,...
+    f2   = tf.gather(f, idx_prefix[2::4], axis=axis)      # 2,6,10,...
+    f3   = tf.gather(f, idx_prefix[3::4], axis=axis)      # 3,7,11,...
+    f4   = tf.gather(f, idx_prefix[4::4], axis=axis)      # 4,8,12,... (last block end)
+
+    # Weighted sum across blocks
+    # Boole's block weights per 5 nodes: [7, 32, 12, 32, 7]
+    # Aggregate across all blocks by summing slices
+    s = 7.0  * tf.reduce_sum(f0, axis=axis)
+    s += 32.0 * tf.reduce_sum(f1, axis=axis)
+    s += 12.0 * tf.reduce_sum(f2, axis=axis)
+    s += 32.0 * tf.reduce_sum(f3, axis=axis)
+    s += 7.0  * tf.reduce_sum(f4, axis=axis)
+
+    boole_part = c * s
+
+    # Tail remainder
+    if tf.equal(n1, n):
+        return boole_part
+
+    rem = tf.gather(f, tf.range(n1-1, n), axis=axis)  # nodes: n1-1 .. n-1
+    tail = _trap_rem(rem)
+    return boole_part + tail
+
+
+def linear_loss_function(tensors, weights):
+    """
+    Computes the sum of the input tensors.
+    
+    Args:
+        tensors (list of tf.Tensor): List of tensors to compute the sum.
+        
+    Returns:
+        tf.Tensor: The sum of the input tensors.
+    """
+    # weights = weights / tf.reduce_sum(weights)  # Normalize weights
+    # stack_tensor = tf.stack(tf.multiply(tensors, weights))
+    # stack_tensor = tf.stack([w * t for w, t in zip(weights, tensors)], axis=0)   
+    stacked = tf.stack(tensors, axis=0)   # shape (n_losses,)
+    weights = weights / tf.reduce_sum(weights)
+    loss = tf.reduce_sum(weights * stacked)  
+    loss_type = 'ls'
+    return loss, loss_type
+
+
+def chebyshev_loss_function(tensors, weights):
+    """
+    Computes the max of the input tensors.
+    
+    Args:
+        tensors (list of tf.Tensor): List of tensors to compute the log-sum-exp.
+        
+    Returns:
+        tf.Tensor: The maximum of the input tensors.
+    """
+    # weights = weights / tf.reduce_sum(weights)  # Normalize weights
+    # stack_tensor = tf.stack(tf.multiply(tensors, weights))
+    # stack_tensor = tf.stack([w * t for w, t in zip(weights, tensors)], axis=0)
+    stacked = tf.stack(tensors, axis=0)
+    loss = tf.reduce_max(weights*stacked)
+    loss_type = 'cs'
+    return loss, loss_type
+
+
+def smooth_chebyshev_loss_function(mu, tensors, weights):
+    """
+    Computes the log of the sum of the exponentials of the input tensors.
+    
+    Args:
+        tensors (list of tf.Tensor): List of tensors to compute the log-sum-exp.
+        
+    Returns:
+        tf.Tensor: The log-sum-exp of the input tensors.
+    """
+    weights = weights / tf.reduce_sum(weights)  # Normalize weights
+    # stack_tensor = tf.stack(tf.multiply(tensors, weights))
+    # stack_tensor = tf.stack([w * t for w, t in zip(weights, tensors)], axis=0)
+    stacked = tf.stack(tensors, axis=0)
+    exp_sum = tf.reduce_sum(tf.math.exp(stacked/mu), axis=0)
+    loss = mu*tf.math.log(exp_sum)
+    loss_type = 'scs'
+    return loss, loss_type
+
+
+def augmentedChebyshev_loss_function(tensors, weights):
+    """
+    Computes the log of the sum of the exponentials of the input tensors.
+    
+    Args:
+        tensors (list of tf.Tensor): List of tensors to compute the log-sum-exp.
+        
+    Returns:
+        tf.Tensor: The log-sum-exp of the input tensors.
+    """
+    # weights = weights / tf.reduce_sum(weights)  # Normalize weights
+    loss_type = 'acs'
+    par = tf.sigmoid(cheb_par)  # par is between 0 and 1
+    return par*chebyshev_loss_function(tensors, weights)[0] + (1-par)*linear_loss_function(tensors, weights)[0], loss_type
+
+
+def sigmoid_centered(x):
+    return 2*tf.nn.sigmoid(x) - 1
+
+
+def PINNModel(num_hidden_layers=depth, num_neurons_per_layer=width):  # 8,40
+    xt_input = tf.keras.Input(shape=(2,))
+    output_u = xt_input
+    for _ in range(num_hidden_layers):
+        output_u = tf.keras.layers.Dense(num_neurons_per_layer,
+                                         activation=sigmoid_centered,  # mish
+                                         kernel_initializer='glorot_uniform',  # glorot_normal
+                                        #  kernel_constraint=tf.keras.constraints.UnitNorm(axis=0)
+                                        #  lora_rank=10
+                                         )(output_u)
+
+    output_u = tf.keras.layers.Dense(units=1,
+                                     activation='linear',
+                                     kernel_initializer='glorot_uniform',  # glorot_normal
+                                    #  kernel_constraint=tf.keras.constraints.UnitNorm(axis=0)
+                                    #  lora_rank=10
+                                     )(output_u)
+    
+    return tf.keras.Model(inputs=xt_input, outputs=output_u)  #tf.keras.Model(inputs=[x_input, t_input], outputs=output_u)
+
+
+def lambda_grad(epoch,
+                start=1000,
+                lam_max=1e-0,
+                kappa=1e-3):
+    epoch = tf.cast(epoch, tf.float32)
+    return lam_max * (1.0 - tf.exp(-kappa * tf.maximum(epoch - start, 0.0)))
+
+
+@tf.function
+def grad_L2_fft_batch(r, L):
+    """
+    r : shape (Nt, Nx)
+    returns : shape (Nt,)
+    """
+    r = tf.cast(r, tf.complex64)
+    Nx = tf.shape(r)[-1]
+
+    k_pos = tf.range(0, Nx//2 + 1, dtype=tf.float32)
+    k_neg = tf.range(-Nx//2 + 1, 0, dtype=tf.float32)
+    k = tf.concat([k_pos, k_neg], axis=0)
+    k = (2.0 * tf.constant(np.pi) / L) * k
+    k = tf.cast(k, tf.complex64)
+
+    r_hat = tf.signal.fft(r)
+
+    grad_energy = tf.reduce_sum(tf.abs(1j * k * r_hat)**2, axis=-1)
+
+    dx = L / tf.cast(Nx, tf.float32)
+    return tf.math.real(grad_energy) * dx
+
+
+@tf.function
+def H1_norm_fft_batch(r, L):
+    """
+    Compute ||r||_{H^1}^2 for each time slice.
+    
+    r : shape (Nt, Nx)
+    returns : shape (Nt,)
+    """
+    r = tf.cast(r, tf.complex64)
+    Nx = tf.shape(r)[-1]
+
+    k_pos = tf.range(0, Nx//2 + 1, dtype=tf.float32)
+    k_neg = tf.range(-Nx//2 + 1, 0, dtype=tf.float32)
+    k = tf.concat([k_pos, k_neg], axis=0)
+    k = (2.0 * tf.constant(np.pi) / L) * k
+    k = tf.cast(k, tf.complex64)
+
+    r_hat = tf.signal.fft(r)
+    weight = 1.0 + tf.abs(k)**2
+
+    H1_sq = tf.reduce_sum(weight * tf.abs(r_hat)**2, axis=-1)
+
+    dx = L / tf.cast(Nx, tf.float32)
+    return tf.math.real(H1_sq) * dx
+
+
+# @tf.function
+def custom_loss(inputs, model, epoch):
+    x, t = inputs[:, 0:1], inputs[:, 1:2]
+
+    with tf.GradientTape(persistent=True) as tape:
+        tape.watch(t)
+        tape.watch(x)
+        with tf.GradientTape(persistent=True) as tape2:            
+            tape2.watch(x)
+            tape2.watch(t)
+            with tf.GradientTape(persistent=True) as tape3:
+                tape3.watch(x)
+                tape3.watch(t)
+                u_model = model(tf.concat([x, t], axis=1))
+            u_x = tape3.gradient(u_model, x)
+            u_t = tape3.gradient(u_model, t)
+        u_xx = tape2.gradient(u_x, x)  
+    u_xxx = tape.gradient(u_xx, x)
+    u_squared_x = 2*u_model*u_x
+    r = u_t - alpha * u_squared_x - rho*u_x - nu*u_xxx  
+    del tape, tape2, tape3  
+    
+    # === PDE residual loss (stabilized, consistent) ===
+    pde_loss_L2 = tf.reduce_mean(tf.square(r))
+
+    r_grid = tf.reshape(r, [Nt, Nx])
+    L = xMax - xMin
+
+    pde_loss_grad = tf.reduce_mean(
+        grad_L2_fft_batch(r_grid, L)
+    )
+
+    # mesh-scaled stabilization parameter
+    lam = 0.01 * (dx**2) * tf.minimum(1.0, epoch / 1000.0)
+
+    pde_loss_H1 = pde_loss_L2 + lam * pde_loss_grad
+    
+    # === Initial condition ===
+    ic_mask = tf.where(tf.abs(t) < 1e-6)
+    x_ic = tf.gather(x, ic_mask[:, 0])
+    u_ic = u_0(x_ic)
+    t_ic = tf.zeros_like(x_ic)
+    u_ic_pred = model(tf.concat([x_ic, t_ic], axis=1))
+    data_fitting_loss_0 = tf.reduce_mean(tf.square(u_ic_pred - u_ic))
+
+    # === Periodic BC ===
+    data_fitting_loss_l_r = periodic_bc(model, x, t)
+
+    # === Chebyshev aggregation ===
+    # loss, loss_type = chebyshev_loss_function(
+    #     [pde_loss_H1, data_fitting_loss_0, data_fitting_loss_l_r],
+    #     lambdas
+    # )
+    # loss, loss_type = augmentedChebyshev_loss_function(
+    #     [pde_loss_H1, data_fitting_loss_0, data_fitting_loss_l_r],
+    #     lambdas
+    # )
+    loss, loss_type = linear_loss_function(
+        [pde_loss_H1, data_fitting_loss_0, data_fitting_loss_l_r],
+        lambdas
+    )
+
+    # === Hamiltonian (monitor only) ===
+    H_loss = H(
+        tf.reshape(u_model, shape=[Nt, Nx]),
+        tf.reshape(u_x, shape=[Nt, Nx]),
+        dx
+    )
+
+    return (
+        loss,
+        loss_type,
+        pde_loss_H1,
+        data_fitting_loss_0,
+        data_fitting_loss_l_r,
+        H_loss,
+    )
+
+
+# Create the PINN model
+model = PINNModel()
+model.summary()
+
+epochs = 5000  # 1000, 2000, 5000
+# # Compile the model
+# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
+#               loss=lambda y_true, y_pred: custom_loss([x_train, t_train, theta_train], model)[1])
+
+# Create the optimizer with a smaller learning rate
+# learning_rate = 1e-3  # 1e-4
+# learning_rate_type = 'constant'
+# learning_rate = tf.keras.optimizers.schedules.PiecewiseConstantDecay([10, 100], [1e-1, 5e-2, 1e-2])  #OK
+# learning_rate = tf.keras.optimizers.schedules.PiecewiseConstantDecay([100, 300], [1e-2, 1e-3, 1e-4])
+# learning_rate = tf.keras.optimizers.schedules.PolynomialDecay(
+#     initial_learning_rate=1e-3,
+#     decay_steps=epochs,
+#     end_learning_rate=1e-5,
+#     power=2.,
+#     cycle=False,
+#     name='PolynomialDecay'
+# )
+# learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(
+#     initial_learning_rate=1e-3,
+#     decay_steps=epochs, # 100
+#     decay_rate=0.9, # 0.9
+#     staircase=False,
+#     name='ExponentialDecay'
+# )
+# learning_rate_type = 'exponentialDecay'
+learning_rate = tf.keras.optimizers.schedules.CosineDecay(
+    initial_learning_rate=1e-4,
+    decay_steps=1000,
+    alpha=0.5,
+    name='CosineDecay',
+    warmup_target=None,
+    warmup_steps=100
+)
+learning_rate_type = 'cosineDecay'
+# param_values = [delta.numpy()]
+trainable = model.trainable_variables 
+if lambdas.trainable:
+    trainable += [lambdas]
+    
+if cheb_par.trainable:
+    trainable += [cheb_par]
+
+# optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, epsilon=1e-08, amsgrad=True)
+# optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9, nesterov=True)
+optimizer = tf.keras.optimizers.AdamW(learning_rate=learning_rate, beta_1=0.8, beta_2=0.9, epsilon=1e-07)
+# optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate, rho=0.9, momentum=0.0, epsilon=1e-07, centered=False)
+
+# Training loop
+losses = []
+pde_losses = []
+data_fitting_losses_0 = []
+data_fitting_losses_l_r = []
+delta_gradients = []
+# S_losses_min = []
+# S_losses_max = []
+H_losses_min = []
+H_losses_max = []
+H_losses_mean = []
+H_losses_std = []
+H_losses_abs_error = []
+H_losses_rel_error = []
+lambdas_values = []
+lambdas_values.append(lambdas.numpy())
+cheb_par_values = []
+cheb_par_values.append(cheb_par.numpy())
+
+# Convert data to tensor because tf.GradientTape() can only watch tensor and not numpy arrays
+x_train = tf.convert_to_tensor(x_train)
+t_train = tf.convert_to_tensor(t_train)
+x_grid, t_grid = np.meshgrid(x.flatten(), t.flatten())
+inputs = tf.convert_to_tensor(np.vstack([x_grid.flatten(), t_grid.flatten()]).T)
+stop = False
+# Start timer
+t0 = time()
+for epoch in range(epochs):
+    if not stop:
+        # print("# STARTING EPOCH", epoch + 1)
+
+        with tf.GradientTape() as tape:
+            loss, loss_type, pde_loss, data_fitting_loss_0, data_fitting_loss_l_r, H_loss = custom_loss(inputs, model, epoch)
+
+        # print("Computing gradients")
+        gradients = tape.gradient(loss, trainable)
+        # print(gradients[-1])
+        # print("Applying gradients")
+        optimizer.apply_gradients(zip(gradients, trainable))
+        # print("Appending losses")
+        losses.append(loss.numpy())
+        pde_losses.append(pde_loss.numpy())
+        data_fitting_losses_0.append(data_fitting_loss_0.numpy())
+        data_fitting_losses_l_r.append(data_fitting_loss_l_r.numpy())
+        H_loss_min = tf.reduce_min(H_loss)
+        H_loss_max = tf.reduce_max(H_loss)
+        H_losses_min.append(H_loss_min.numpy())
+        H_losses_max.append(H_loss_max.numpy())
+        H_loss_mean = tf.reduce_mean(H_loss)
+        H_loss_std = tf.math.reduce_std(H_loss)
+        H_losses_mean.append(H_loss_mean.numpy())
+        H_losses_std.append(H_loss_std.numpy())
+        
+        H0 = H(u_0(x_grid), u_0_x(x_grid), dx)  # H0 = H_loss[0].numpy()
+        Hf = H_loss.numpy()
+        H_abs_error = tf.abs(Hf - H0)
+        H_losses_abs_error.append(tf.reduce_max(H_abs_error).numpy())
+        H_rel_error = H_abs_error / tf.abs((H0 + 1e-16))
+        H_losses_rel_error.append(H_rel_error[-1].numpy())
+
+        # # Print S_loss, H_loss
+        # print(f"S_loss at epoch {epoch + 1}: {S_loss.numpy()}")
+        # print(f"H_loss at epoch {epoch + 1}: {H_loss.numpy()}")
+        
+        if len(losses) > 1 and not lambdas.trainable and do_training:
+            # SoftAdaptive weights update
+            # num1 = tf.math.exp(tf.experimental.numpy.cbrt(pde_losses[-1] - pde_losses[-2]))
+            # num2 = tf.math.exp(tf.experimental.numpy.cbrt(data_fitting_losses_0[-1] - data_fitting_losses_0[-2]))
+            # num3 = tf.math.exp(tf.experimental.numpy.cbrt(data_fitting_losses_l_r[-1] - data_fitting_losses_l_r[-2]))   
+            num1 = tf.math.exp((pde_losses[-1] - pde_losses[-2]))
+            num2 = tf.math.exp((data_fitting_losses_0[-1] - data_fitting_losses_0[-2]))
+            num3 = tf.math.exp((data_fitting_losses_l_r[-1] - data_fitting_losses_l_r[-2]))   
+            den  = num1 + num2 + num3
+
+            new_lambdas = tf.stack([num1 / den, num2 / den, num3 / den])
+            lambdas.assign(new_lambdas)
+            lambdas_values.append((lambdas).numpy())
+            
+        if cheb_par.trainable:
+            cheb_par_values.append(cheb_par.numpy())
+        
+        del tape
+
+        if epoch % 100 == 0 or epoch == epochs - 1:
+            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.numpy()}")
+
+        # if len(losses) > 2 and np.abs(losses[-1] - losses[-2]) / np.abs(losses[-2]) < 1e-8:
+        #     stop = True
+            
+print(f"Loss type: {loss_type}")
+print(f"Hamiltonian mean: {H_loss_mean.numpy()}")
+print(f"Hamiltonian standard deviation: {H_loss_std.numpy()}")
+print(f"Hamiltonian maximum: {H_loss_max.numpy()}")
+print(f"Hamiltonian minimum: {H_loss_min.numpy()}")
+# print(f"Hamiltonian absolute error: {H_abs_error.numpy()}")
+# print(f"Hamiltonian relative error: {H_rel_error.numpy()}")
+print(f"Hamitonian relative error: {H_rel_error[-1].numpy()}")
+# Print computation time
+print('\nComputation time: {} seconds'.format(time() - t0))
+
+import pandas as pd
+
+df = pd.DataFrame()
+df['epoch'] = range(1, epochs + 1)
+
+def generate_save_fig_string(type, epochs, learning_rate_type, loss_type):
+    """
+    Generates a string for saving figures that includes the number of epochs and the type of learning rate.
+    
+    Args:
+        epochs (int): The number of epochs.
+        learning_rate_type (str): The type of learning rate.
+        
+    Returns:
+        str: The generated string for saving figures.
+    """
+    return f"./results/{type}_epochs_{epochs}_lr_{learning_rate_type}_{loss_type}.png"
+
+# Plot the loss history
+plt.semilogy(losses, label='Total Loss')
+plt.semilogy(pde_losses, label='PDE Loss')
+plt.semilogy(data_fitting_losses_0, label='Initial Conditions Loss')
+plt.semilogy(data_fitting_losses_l_r, label='Periodic Boundary Conditions Loss')
+plt.xlabel('Epoch')
+plt.ylabel('Loss')
+plt.title('Loss Contributions')
+plt.legend()
+plt.grid()
+
+df['total_loss'] = losses
+df['pde_loss'] = pde_losses
+df['data_fitting_loss_0'] = data_fitting_losses_0
+df['data_fitting_loss_l_r'] = data_fitting_losses_l_r
+df['H_loss_min'] = H_losses_min
+df['H_loss_max'] = H_losses_max
+df['H_loss_mean'] = H_losses_mean
+df['H_loss_std'] = H_losses_std
+df['H_loss_abs_error'] = H_losses_abs_error
+df['H_loss_rel_error'] = H_losses_rel_error
+# df['cheb_par'] = cheb_par_values
+
+df.to_csv('./results/kdv/training_history.csv', index=False)
+
+
+if save_fig:
+    save_fig_string = generate_save_fig_string('loss', epochs, learning_rate_type, loss_type)
+    # save png
+    plt.savefig(save_fig_string, dpi=300)
+plt.show()
+
+# Evaluate the function
+x_eval = np.linspace(x_train[0].numpy(), x_train[-1].numpy(), 100).reshape((-1, 1)).astype(np.float32)
+t_eval = np.linspace(t_train[0].numpy(), t_train[-1].numpy(), 100).reshape((-1, 1)).astype(np.float32)
+inputs_eval = [x_eval, t_eval]
+    
+
+# Plot the Hamiltonian over epochs
+plt.plot(H_losses_min, label='H_loss_min')
+plt.plot(H_losses_max, label='H_loss_max')
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian')
+plt.title('Hamiltonian over epochs')
+plt.legend()
+plt.grid()
+
+if save_fig:
+    save_fig_string = generate_save_fig_string('H_loss', epochs, learning_rate_type, loss_type)
+    # save png
+    plt.savefig(save_fig_string, dpi=300)
+plt.show()
+    
+# Plot the average Hamiltonian over epochs with standard deviation
+H_losses_mean = np.array(H_losses_mean)
+H_losses_std = np.array(H_losses_std)
+H_losses_abs_error = np.array(H_losses_abs_error)
+H_losses_rel_error = np.array(H_losses_rel_error)
+
+plt.plot(H_losses_mean)
+plt.fill_between(range(len(H_losses_mean)), H_losses_mean - H_losses_std, H_losses_mean + H_losses_std, alpha=0.2)
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian mean')
+plt.title('Hamiltonian mean over epochs with standard deviation')
+# plt.legend()
+plt.grid()
+
+if save_fig:
+    save_fig_string = generate_save_fig_string('H_loss_mean', epochs, learning_rate_type, loss_type)
+    # save png
+    plt.savefig(save_fig_string, dpi=300)
+plt.show()
+    
+# Plot the standard deviation of the Hamiltonian over epochs
+plt.plot(H_losses_std)
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian std')
+plt.title('Hamiltonian standard deviation over epochs')
+# plt.legend()
+plt.grid()
+
+if save_fig:
+    save_fig_string = generate_save_fig_string('H_loss_std', epochs, learning_rate_type, loss_type)
+    # save png
+    plt.savefig(save_fig_string, dpi=300)
+plt.show()
+    
+# Plot the absolute error of the Hamiltonian over epochs
+plt.plot(H_losses_abs_error)    
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian absolute error')
+plt.title('Hamiltonian absolute error over epochs')
+# plt.legend()
+plt.grid()
+
+if save_fig:
+    save_fig_string = generate_save_fig_string('H_loss_abs_error', epochs, learning_rate_type, loss_type)
+    # save png
+    plt.savefig(save_fig_string, dpi=300)
+plt.show()
+    
+# Plot the relative error of the Hamiltonian over epochs
+plt.plot(H_losses_rel_error)
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian relative error')
+plt.title('Hamiltonian relative error over epochs')
+# plt.legend()
+plt.grid()
+
+if save_fig:
+    save_fig_string = generate_save_fig_string('H_loss_rel_error', epochs, learning_rate_type, loss_type)
+    # save png
+    plt.savefig(save_fig_string, dpi=300)
+plt.show()
+    
+    
+# Plot the Chebyshev parameter over epochs
+if cheb_par.trainable:
+    plt.plot(tf.sigmoid(cheb_par_values))
+    plt.xlabel('Epoch')
+    plt.ylabel('Chebyshev parameter')
+    plt.title('Chebyshev parameter over epochs')
+    plt.grid()
+    
+    if save_fig:
+        save_fig_string = generate_save_fig_string('cheb_par', epochs, learning_rate_type, loss_type)
+        # save png
+        plt.savefig(save_fig_string, dpi=300)
+plt.show()
+
+from mpl_toolkits.mplot3d import Axes3D
+
+# Set up meshgrid
+N = 600
+tspace = np.linspace(0, tMax, N + 1)
+xspace = np.linspace(xMin, xMax, N + 1)
+T, X = np.meshgrid(tspace, xspace)
+XTgrid = np.vstack([X.flatten(),T.flatten()]).T
+
+# Determine predictions of u(t, x)
+u_pred = model(tf.cast(XTgrid,DTYPE))
+
+# Reshape upred
+U = u_pred.numpy().reshape(N+1,N+1)
+
+# Surface plot of solution u(t,x)
+fig = plt.figure(figsize=(9,6))
+ax = fig.add_subplot(111, projection='3d')
+ax.plot_surface(X, T, U, cmap='viridis')
+ax.view_init(35,35)
+ax.set_xlabel('$x$')
+ax.set_ylabel('$t$')
+ax.set_zlabel('$u_(x,t)$')
+ax.set_title('Solution to KdV equation')
+ax.set_box_aspect(None, zoom=0.85)
+
+if save_fig:
+    save_fig_string = generate_save_fig_string('sol', epochs, learning_rate_type, loss_type)
+    # save png
+    plt.savefig(save_fig_string, dpi=300)
+plt.show()
\ No newline at end of file
diff --git a/experiments/structure_preserving_pinns/structurePreservingPINN_kdv_trainableH1_pytorch.py b/experiments/structure_preserving_pinns/structurePreservingPINN_kdv_trainableH1_pytorch.py
new file mode 100644
index 0000000..4a028ea
--- /dev/null
+++ b/experiments/structure_preserving_pinns/structurePreservingPINN_kdv_trainableH1_pytorch.py
@@ -0,0 +1,573 @@
+import numpy as np
+import torch
+import torch.nn as nn
+from time import time
+import matplotlib.pyplot as plt
+from humancompatible.train.dual_optim import ALM
+
+DTYPE = torch.float32
+torch.set_default_dtype(DTYPE)
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+
+type_pde = 1
+if type_pde == 1:
+    nu, alpha, rho = -0.022**2, -0.5, 0.
+    xMin, xMax, tMax = 0., 2., 5.
+elif type_pde == 2:
+    nu, alpha, rho = -1., -3., 0.
+    xMin, xMax, tMax = -20., 20., 100.
+
+def Nx_from_arch(width, depth, fac=1.5, d_in=2, d_out=1):
+    Ntheta = (d_in + 1) * width + (depth - 1) * (width * width + width) + d_out * (width + 1)
+    Ncoll_target = int(Ntheta / fac)
+    Nx = int(np.sqrt(Ncoll_target))
+    Nt = Nx
+    return Nx, Nt, Ntheta, Ncoll_target
+
+width, depth = 80, 4
+Nx, Nt, Ntheta, Ncoll = Nx_from_arch(width=width, depth=depth, fac=10.)
+
+dx = (xMax - xMin) / (Nx - 1)
+dt = tMax / (Nt - 1)
+h = max(dx, dt)
+
+lambdas = torch.tensor([1., 1., 1.], dtype=DTYPE, device=device, requires_grad=False)
+# do_training = False
+cheb_par = torch.tensor(0.5, dtype=DTYPE, device=device, requires_grad=False)
+
+x = torch.linspace(xMin, xMax, Nx, dtype=DTYPE, device=device).reshape(-1, 1)
+t = torch.linspace(0, tMax, Nt, dtype=DTYPE, device=device).reshape(-1, 1)
+x_train = x.reshape(-1, 1)
+t_train = t.reshape(-1, 1)
+# t_grid, x_grid = torch.meshgrid(t.flatten(), x.flatten(), indexing='ij')
+x_grid, t_grid = torch.meshgrid(x.flatten(), t.flatten(), indexing='xy')
+inputs = torch.stack([x_grid.flatten(), t_grid.flatten()], dim=1)
+
+save_fig = True
+
+def u_0(x):
+    if type_pde == 1:
+        return torch.cos(np.pi * x)
+    elif type_pde == 2:
+        return 6. / (torch.cosh(x)**2)
+
+def u_0_x(x):
+    if type_pde == 1:
+        return -np.pi * torch.sin(np.pi * x)
+    elif type_pde == 2:
+        return -12. * torch.sinh(x) / (torch.cosh(x)**3)
+
+def periodic_bc(model, x, t):
+    xL = torch.full_like(x, xMin)
+    xR = torch.full_like(x, xMax)
+    uL = model(torch.cat([xL, t], dim=1))
+    uR = model(torch.cat([xR, t], dim=1))
+    return torch.mean((uL - uR)**2)
+
+def V(u):
+    return alpha * (u**3) / 3 + (rho * u**2) / 2
+
+def kdv_density(u, u_x):
+    return V(u) - nu * torch.pow(u_x, 2) / 2
+
+
+def H(u, u_x, dx, density_fn=kdv_density, axis=-1):
+    """
+    Boole’s rule (8th order) along `axis` for a uniform grid with spacing dx.
+    Requires (N-1) % 4 == 0. Otherwise applies Boole on the largest valid
+    prefix and trapezoid rule on the remainder.
+    """
+    f = density_fn(u, u_x)  # [..., N]
+    n = f.shape[axis]
+
+    # Normalize negative axis
+    axis = axis % f.ndim
+
+    def _trap_rem(rem):
+        """
+        rem: contiguous tail segment integrated with trapezoid rule
+        """
+        left = rem.narrow(-1, 0, rem.shape[-1] - 1)
+        right = rem.narrow(-1, 1, rem.shape[-1] - 1)
+        return torch.sum(0.5 * (left + right), dim=-1) * dx
+
+    # Degenerate case
+    if n <= 1:
+        return torch.sum(f, dim=axis) * dx
+
+    # Largest prefix satisfying (n1 - 1) % 4 == 0
+    n1 = n - ((n - 1) % 4)
+
+    # Boole constant
+    c = (2.0 * dx) / 45.0
+
+    # Build index slices
+    idx_prefix = torch.arange(n1, device=f.device)
+
+    f0 = torch.index_select(f, axis, idx_prefix[0::4])
+    f1 = torch.index_select(f, axis, idx_prefix[1::4])
+    f2 = torch.index_select(f, axis, idx_prefix[2::4])
+    f3 = torch.index_select(f, axis, idx_prefix[3::4])
+    f4 = torch.index_select(f, axis, idx_prefix[4::4])
+
+    # Weighted Boole sum
+    s = 7.0 * torch.sum(f0, dim=axis)
+    s += 32.0 * torch.sum(f1, dim=axis)
+    s += 12.0 * torch.sum(f2, dim=axis)
+    s += 32.0 * torch.sum(f3, dim=axis)
+    s += 7.0 * torch.sum(f4, dim=axis)
+
+    boole_part = c * s
+
+    # No remainder
+    if n1 == n:
+        return boole_part
+
+    # Tail remainder
+    rem_idx = torch.arange(n1 - 1, n, device=f.device)
+    rem = torch.index_select(f, axis, rem_idx)
+
+    tail = _trap_rem(rem)
+
+    return boole_part + tail
+
+def linear_loss_function(tensors, weights):
+    stacked = torch.stack(tensors)
+    weights = weights / torch.sum(weights)
+    loss = torch.sum(weights * stacked)
+    return loss, 'ls'
+
+def chebyshev_loss_function(tensors, weights):
+    stacked = torch.stack(tensors)
+    loss = torch.max(weights * stacked)
+    return loss, 'cs'
+
+def sigmoid_centered(x):
+    return 2 * torch.sigmoid(x) - 1
+
+class PINNModel(nn.Module):
+    def __init__(self, num_hidden_layers=depth, num_neurons_per_layer=width):
+        super().__init__()
+        layers = []
+        in_dim = 2
+        for _ in range(num_hidden_layers):
+            layers.append(nn.Linear(in_dim, num_neurons_per_layer))
+            layers.append(nn.Tanh())
+            in_dim = num_neurons_per_layer
+        layers.append(nn.Linear(in_dim, 1))
+        self.net = nn.Sequential(*layers)
+    
+    def forward(self, x):
+        return self.net(x)
+
+
+def lambda_grad(epoch,
+                start=1000,
+                lam_max=1e0,
+                kappa=1e-3):
+    epoch = torch.as_tensor(epoch, dtype=torch.float32)
+    return lam_max * (
+        1.0 - torch.exp(-kappa * torch.clamp(epoch - start, min=0.0))
+    )
+
+
+def grad_L2_fft_batch(r, L):
+    """
+    r : shape (Nt, Nx)
+    returns : shape (Nt,)
+    """
+    r = r.to(torch.complex64)
+    Nx = r.shape[-1]
+
+    device = r.device
+
+    k_pos = torch.arange(0, Nx // 2 + 1,
+                         dtype=torch.float32,
+                         device=device)
+
+    k_neg = torch.arange(-Nx // 2 + 1, 0,
+                         dtype=torch.float32,
+                         device=device)
+
+    k = torch.cat([k_pos, k_neg], dim=0)
+    k = (2.0 * np.pi / L) * k
+    k = k.to(torch.complex64)
+
+    r_hat = torch.fft.fft(r, dim=-1)
+
+    grad_energy = torch.sum(torch.abs(1j * k * r_hat) ** 2, dim=-1)
+
+    dx = L / float(Nx)
+
+    return torch.real(grad_energy) * dx
+
+
+def H1_norm_fft_batch(r, L):
+    """
+    Compute ||r||_{H^1}^2 for each time slice.
+
+    r : shape (Nt, Nx)
+    returns : shape (Nt,)
+    """
+    r = r.to(torch.complex64)
+    Nx = r.shape[-1]
+
+    device = r.device
+
+    k_pos = torch.arange(0, Nx // 2 + 1,
+                         dtype=torch.float32,
+                         device=device)
+
+    k_neg = torch.arange(-Nx // 2 + 1, 0,
+                         dtype=torch.float32,
+                         device=device)
+
+    k = torch.cat([k_pos, k_neg], dim=0)
+    k = (2.0 * np.pi / L) * k
+    k = k.to(torch.complex64)
+
+    r_hat = torch.fft.fft(r, dim=-1)
+
+    weight = 1.0 + torch.abs(k) ** 2
+
+    H1_sq = torch.sum(weight * torch.abs(r_hat) ** 2, dim=-1)
+
+    dx = L / float(Nx)
+
+    return torch.real(H1_sq) * dx
+
+
+def custom_loss(inputs, model, epoch):
+    """
+    Assumes the following globals/functions exist:
+
+        alpha, rho, nu
+        Nt, Nx
+        dx, xMin, xMax
+        lambdas
+
+        u_0(...)
+        periodic_bc(...)
+        linear_loss_function(...)
+        H(...)
+    """
+
+    x = inputs[:, 0:1].clone().detach().requires_grad_(True)
+    t = inputs[:, 1:2].clone().detach().requires_grad_(True)
+
+    xt = torch.cat([x, t], dim=1)
+
+    # Forward pass
+    u_model = model(xt)
+
+    # First derivatives
+    u_x = torch.autograd.grad(
+        u_model,
+        x,
+        grad_outputs=torch.ones_like(u_model),
+        create_graph=True,
+        retain_graph=True,
+    )[0]
+
+    u_t = torch.autograd.grad(
+        u_model,
+        t,
+        grad_outputs=torch.ones_like(u_model),
+        create_graph=True,
+        retain_graph=True,
+    )[0]
+
+    # Second derivative
+    u_xx = torch.autograd.grad(
+        u_x,
+        x,
+        grad_outputs=torch.ones_like(u_x),
+        create_graph=True,
+        retain_graph=True,
+    )[0]
+
+    # Third derivative
+    u_xxx = torch.autograd.grad(
+        u_xx,
+        x,
+        grad_outputs=torch.ones_like(u_xx),
+        create_graph=True,
+        retain_graph=True,
+    )[0]
+
+    # PDE residual
+    u_squared_x = 2 * u_model * u_x
+
+    r = (
+        u_t
+        - alpha * u_squared_x
+        - rho * u_x
+        - nu * u_xxx
+    )
+
+    # === PDE residual loss ===
+    pde_loss_L2 = torch.mean(r ** 2)
+
+    r_grid = r.reshape(Nt, Nx)
+
+    L = xMax - xMin
+
+    pde_loss_grad = torch.mean(
+        grad_L2_fft_batch(r_grid, L)
+    )
+
+    # mesh-scaled stabilization parameter
+    lam = 0.01 * (dx ** 2) * min(1.0, float(epoch) / 1000.0)
+
+    pde_loss_H1 = pde_loss_L2 + lam * pde_loss_grad
+
+    # === Initial condition ===
+    ic_mask = torch.where(torch.abs(t) < 1e-6)[0]
+
+    x_ic = x[ic_mask]
+
+    u_ic = u_0(x_ic)
+
+    t_ic = torch.zeros_like(x_ic)
+
+    u_ic_pred = model(torch.cat([x_ic, t_ic], dim=1))
+
+    data_fitting_loss_0 = torch.mean(
+        (u_ic_pred - u_ic) ** 2
+    )
+
+    # === Periodic BC ===
+    data_fitting_loss_l_r = periodic_bc(model, x, t)
+
+    # === Aggregated loss ===
+    loss, loss_type = linear_loss_function(
+        [
+            pde_loss_H1,
+            data_fitting_loss_0,
+            data_fitting_loss_l_r,
+        ],
+        lambdas
+    )
+
+    # === Hamiltonian (monitor only) ===
+    # breakpoint()
+    H_loss = H(
+        u_model.reshape(Nt, Nx),
+        u_x.reshape(Nt, Nx),
+        dx
+    )
+
+    return (
+        loss,
+        loss_type,
+        pde_loss_H1,
+        data_fitting_loss_0,
+        data_fitting_loss_l_r,
+        H_loss,
+    )
+
+def lagrangian_loss(inputs, model, dual_opt):
+    x, t = inputs[:, 0:1], inputs[:, 1:2]
+    x.requires_grad_(True)
+    t.requires_grad_(True)
+    
+    u_model = model(torch.cat([x, t], dim=1))
+    
+    u_t = torch.autograd.grad(u_model.sum(), t, create_graph=True)[0]
+    u_x = torch.autograd.grad(u_model.sum(), x, create_graph=True)[0]
+    
+    u_xx = torch.autograd.grad(u_x.sum(), x, create_graph=True)[0]
+    u_xxx = torch.autograd.grad(u_xx.sum(), x, create_graph=True)[0]
+    
+    u_squared_x = 2 * u_model * u_x
+    r = u_t - alpha * u_squared_x - rho * u_x - nu * u_xxx
+    
+    pde_loss_L2 = torch.mean(torch.square(r))
+    
+    # constraint: pde_loss_L2 = 0 or <= eps
+
+    ic_mask = torch.abs(t) < 1e-6
+    x_ic = x[ic_mask[:, 0]]
+    u_ic = u_0(x_ic)
+    t_ic = torch.zeros_like(x_ic)
+    u_ic_pred = model(torch.cat([x_ic, t_ic], axis=1))
+    data_fitting_loss_0 = torch.mean((u_ic_pred - u_ic) ** 2) # IC loss
+    
+    data_fitting_loss_l_r = periodic_bc(model, x, t) # BC loss
+
+    # ask: what's H_loss here, and should we use it in a constraint
+    H_loss = H(u_model.reshape(Nt, Nx), u_x.reshape(Nt, Nx), dx)
+    
+    data_fitting_loss = 0.5 * data_fitting_loss_0 + 0.5 * data_fitting_loss_l_r
+
+    lagr = dual_opt.forward_update(data_fitting_loss, pde_loss_L2.unsqueeze(0))
+    loss_type = 'ls'
+
+    return lagr, loss_type, pde_loss_L2, data_fitting_loss_0, data_fitting_loss_l_r, H_loss
+
+
+
+model = PINNModel().to(device)
+epochs = 5000
+
+optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
+
+dual_opt = ALM(m=1, lr=1e-3, dual_range=(0.,10.), device=device)
+
+lr_schedule = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
+    optimizer,
+    T_0=100,
+    T_mult=1,
+    eta_min=0.5*1e-4
+)
+
+losses, pde_losses, data_losses_0, bc_losses = [], [], [], []
+H_losses_min, H_losses_max, H_losses_mean, H_losses_std = [], [], [], []
+H_losses_abs_error, H_losses_rel_error = [], []
+t0 = time()
+
+for epoch in range(epochs):
+    optimizer.zero_grad()
+    loss, loss_type, pde_loss, data_loss_0, bc_loss, H_loss = custom_loss(inputs, model, epoch)
+    loss.backward()
+    optimizer.step()
+    lr_schedule.step()
+    
+    with torch.no_grad():
+        losses.append(loss.item())
+        pde_losses.append(pde_loss.item())
+        data_losses_0.append(data_loss_0.item())
+        bc_losses.append(bc_loss.item())
+        
+        H_loss_min = torch.min(H_loss).item()
+        H_loss_max = torch.max(H_loss).item()
+        H_losses_min.append(H_loss_min)
+        H_losses_max.append(H_loss_max)
+        H_loss_mean = torch.mean(H_loss).item()
+        H_loss_std = torch.std(H_loss).item()
+        H_losses_mean.append(H_loss_mean)
+        H_losses_std.append(H_loss_std)
+        
+        H0 = H(u_0(x_grid.flatten().reshape(-1, 1)).reshape(Nt, Nx), u_0_x(x_grid.flatten().reshape(-1, 1)).reshape(Nt, Nx), dx)
+        Hf = H_loss.detach()
+        # breakpoint()
+        H_abs_error = torch.abs(Hf - H0)
+        H_losses_abs_error.append(torch.max(H_abs_error).item())
+        H_rel_error = H_abs_error / (torch.abs(H0) + 1e-16)
+        if isinstance(H_rel_error, torch.Tensor):
+            H_rel_error = H_rel_error.item() if H_rel_error.numel() == 1 else H_rel_error.max().item()
+        H_losses_rel_error.append(H_rel_error)
+
+        if epoch > 1:
+            # SoftAdaptive weights update
+            # num1 = tf.math.exp(tf.experimental.numpy.cbrt(pde_losses[-1] - pde_losses[-2]))
+            # num2 = tf.math.exp(tf.experimental.numpy.cbrt(data_fitting_losses_0[-1] - data_fitting_losses_0[-2]))
+            # num3 = tf.math.exp(tf.experimental.numpy.cbrt(data_fitting_losses_l_r[-1] - data_fitting_losses_l_r[-2]))   
+            num1 = np.exp((pde_losses[-1] - pde_losses[-2]))
+            num2 = np.exp((data_losses_0[-1] - data_losses_0[-2]))
+            num3 = np.exp((bc_losses[-1] - bc_losses[-2]))   
+            den  = num1 + num2 + num3
+
+            new_lambdas = torch.tensor([num1 / den, num2 / den, num3 / den])
+            lambdas = new_lambdas
+
+        if epoch % 100 == 0 or epoch == epochs - 1:
+            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.6e}")
+
+print(f'\nComputation time: {time() - t0:.2f}s')
+print(f"Loss type: {loss_type}")
+print(f"Hamiltonian mean: {H_loss_mean}")
+print(f"Hamiltonian std: {H_loss_std}")
+print(f"Hamiltonian max: {H_loss_max}")
+print(f"Hamiltonian min: {H_loss_min}")
+
+plt.figure(figsize=(10, 6))
+plt.semilogy(losses, label='Total Loss')
+plt.semilogy(pde_losses, label='PDE Loss')
+plt.semilogy(data_losses_0, label='Initial Conditions Loss')
+plt.semilogy(bc_losses, label='Periodic Boundary Conditions Loss')
+plt.xlabel('Epoch')
+plt.ylabel('Loss')
+plt.title('Loss Contributions')
+plt.legend()
+plt.grid()
+plt.savefig('./results/kdv_loss.png', dpi=300) if save_fig else None
+plt.show()
+
+plt.figure(figsize=(10, 6))
+plt.plot(H_losses_min, label='H_loss_min')
+plt.plot(H_losses_max, label='H_loss_max')
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian')
+plt.title('Hamiltonian over epochs')
+plt.legend()
+plt.grid()
+plt.savefig('./results/kdv_H_loss.png', dpi=300) if save_fig else None
+plt.show()
+
+H_losses_mean_arr = np.array(H_losses_mean)
+H_losses_std_arr = np.array(H_losses_std)
+plt.figure(figsize=(10, 6))
+plt.plot(H_losses_mean_arr)
+plt.fill_between(range(len(H_losses_mean_arr)), H_losses_mean_arr - H_losses_std_arr, H_losses_mean_arr + H_losses_std_arr, alpha=0.2)
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian mean')
+plt.title('Hamiltonian mean over epochs with standard deviation')
+plt.grid()
+plt.savefig('./results/kdv_H_loss_mean.png', dpi=300) if save_fig else None
+plt.show()
+
+plt.figure(figsize=(10, 6))
+plt.plot(H_losses_std_arr)
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian std')
+plt.title('Hamiltonian standard deviation over epochs')
+plt.grid()
+plt.savefig('./results/kdv_H_loss_std.png', dpi=300) if save_fig else None
+plt.show()
+
+H_losses_abs_error = np.array(H_losses_abs_error)
+plt.figure(figsize=(10, 6))
+plt.plot(H_losses_abs_error)
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian absolute error')
+plt.title('Hamiltonian absolute error over epochs')
+plt.grid()
+plt.savefig('./results/kdv_H_loss_abs_error.png', dpi=300) if save_fig else None
+plt.show()
+
+H_losses_rel_error = np.array(H_losses_rel_error)
+plt.figure(figsize=(10, 6))
+plt.plot(H_losses_rel_error)
+plt.xlabel('Epoch')
+plt.ylabel('Hamiltonian relative error')
+plt.title('Hamiltonian relative error over epochs')
+plt.grid()
+plt.savefig('./results/kdv_H_loss_rel_error.png', dpi=300) if save_fig else None
+plt.show()
+
+N = 600
+tspace = torch.linspace(0, tMax, N + 1, dtype=DTYPE, device=device)
+xspace = torch.linspace(xMin, xMax, N + 1, dtype=DTYPE, device=device)
+T_grid, X_grid = torch.meshgrid(tspace, xspace, indexing='ij')
+XTgrid = torch.stack([X_grid.flatten(), T_grid.flatten()], dim=1)
+
+with torch.no_grad():
+    u_pred = model(XTgrid)
+U = u_pred.reshape(N+1, N+1)
+
+X_np = X_grid.cpu().numpy()
+T_np = T_grid.cpu().numpy()
+U_np = U.cpu().numpy()
+
+from mpl_toolkits.mplot3d import Axes3D
+fig = plt.figure(figsize=(9, 6))
+ax = fig.add_subplot(111, projection='3d')
+ax.plot_surface(X_np, T_np, U_np, cmap='viridis')
+ax.set_xlabel('$x$')
+ax.set_ylabel('$t$')
+ax.set_zlabel('$u(x,t)$')
+ax.set_title('KdV equation')
+ax.set_box_aspect(None, zoom=0.85)
+plt.savefig('./results/kdv_solution.png', dpi=300) if save_fig else None
+plt.show()
\ No newline at end of file