From 75be7e395a7621d24fb47417425aebd67fe6b3b7 Mon Sep 17 00:00:00 2001 From: andrewklayk Date: Mon, 25 May 2026 13:52:32 +0200 Subject: [PATCH] hamiltonian pinn experiments --- ...structurePreservingPINN_2D_trainableACS.py | 758 +++++++++++++++++ ...ePreservingPINN_2D_trainableACS_pytorch.py | 279 +++++++ ...ePreservingPINN_CamassaHolm_trainableH1.py | 729 +++++++++++++++++ ...ingPINN_CamassaHolm_trainableH1_pytorch.py | 389 +++++++++ ...structurePreservingPINN_kdv_trainableH1.py | 769 ++++++++++++++++++ ...ePreservingPINN_kdv_trainableH1_pytorch.py | 573 +++++++++++++ 6 files changed, 3497 insertions(+) create mode 100644 experiments/structure_preserving_pinns/structurePreservingPINN_2D_trainableACS.py create mode 100644 experiments/structure_preserving_pinns/structurePreservingPINN_2D_trainableACS_pytorch.py create mode 100644 experiments/structure_preserving_pinns/structurePreservingPINN_CamassaHolm_trainableH1.py create mode 100644 experiments/structure_preserving_pinns/structurePreservingPINN_CamassaHolm_trainableH1_pytorch.py create mode 100644 experiments/structure_preserving_pinns/structurePreservingPINN_kdv_trainableH1.py create mode 100644 experiments/structure_preserving_pinns/structurePreservingPINN_kdv_trainableH1_pytorch.py diff --git a/experiments/structure_preserving_pinns/structurePreservingPINN_2D_trainableACS.py b/experiments/structure_preserving_pinns/structurePreservingPINN_2D_trainableACS.py new file mode 100644 index 0000000..82beaa6 --- /dev/null +++ b/experiments/structure_preserving_pinns/structurePreservingPINN_2D_trainableACS.py @@ -0,0 +1,758 @@ +import numpy as np +from sympy import N +import tensorflow as tf +from time import time +import matplotlib.pyplot as plt + +# TF_USE_LEGACY_KERAS=True + +DTYPE = np.float32 +Nx = 10 #100 +Ny = 10 #100 +Nt = 10 #50 +N_collocation = Nx*Ny*Nt +d_in = 3 + +xMin = 0.0 +xMax = 8.0 +yMin = 0.0 +yMax = 8.0 +tMax = 50. +d_in = 2 + +def choose_width_depth(N_collocation=N_collocation, overparam_factor=3.0, d_in=d_in, d_out=1): + """ + Returns a single (width, depth) pair for a mildly overparameterized PINN. + + N_collocation : Nx * Ny * Nt + overparam_factor : how many times larger the model than data (default 3x) + """ + + N_target = int(overparam_factor * N_collocation) + + # For 3rd-order PDEs like ZK: + # depth 5–7 is stable. We fix depth=6 (good compromise). + depth = 8 + + # Parameter formula: + # N = (depth-1) w^2 + (d_in + depth + d_out) w + d_out + a = depth - 1 + b = d_in + depth + d_out + c = d_out - N_target + + # Solve quadratic for width + width = int((-b + np.sqrt(b*b - 4*a*c)) / (2*a)) + + return width, depth + +width, depth = choose_width_depth() +dx = (xMax - xMin) / (Nx - 1) +dy = (yMax - yMin) / (Ny - 1) +dt = tMax / (Nt - 1) +h = np.max([dx, dy, dt]) + +lambdas = [1., 1., 1.] #[1.7, 0.2, 1.4] #for cs #[1., 0.5, 1.] # [1.7, 0.2, 1.4] for cs # +lambdas = tf.Variable(lambdas, trainable=False, name='lambdas', dtype=DTYPE) + +cheb_par = tf.Variable(0.5, trainable=True, name='cheb_par', dtype=DTYPE) + +x = np.linspace(xMin, xMax, Nx).reshape((-1, 1)).astype(DTYPE) +y = np.linspace(yMin, yMax, Ny).reshape((-1, 1)).astype(DTYPE) +t = np.linspace(0, tMax, Nt).reshape((-1, 1)).astype(DTYPE) +x_grid, y_grid, t_grid = np.meshgrid(x, y, t, indexing='ij') +x_train = x_grid.flatten(); x_train = tf.convert_to_tensor(x_train); x_train = tf.expand_dims(x_train, axis=-1) +y_train = y_grid.flatten(); y_train = tf.convert_to_tensor(y_train); y_train = tf.expand_dims(y_train, axis=-1) +t_train = t_grid.flatten(); t_train = tf.convert_to_tensor(t_train); t_train = tf.expand_dims(t_train, axis=-1) +xyt_train = tf.concat([x_train, y_train, t_train], axis=-1) + +save_fig = True + +# Define the initial condition +def u_0(x, y): + ##1 + epsilon = 0.01 + theta = 0. + y1 = 0. + y2 = 0. + c1 = 0.45 + c2 = 0.25 + x1 = 2.5 + x2 = 3.3 + out = 3*c1/(tf.math.cosh(0.5*tf.sqrt(c1/epsilon)*((x-x1)*tf.math.cos(theta) + (y-y1)*tf.math.sin(theta))))**2 + + 3*c2/(tf.math.cosh(0.5*tf.sqrt(c2/epsilon)*((x-x2)*tf.math.cos(theta) + (y-y2)*tf.math.sin(theta))))**2 + ##2 + # epsilon = 0.01 + # theta = 0. + # y1 = 4. + # c1 = 1. + # x1 = 2.5 + # out = 3*c1/(tf.math.cosh(0.5*tf.sqrt(c1/epsilon)*((x-x1)*tf.math.cos(theta) + (y-y1)*tf.math.sin(theta))))**2 + + return out + # mpmath for sech + +# def periodic_boundary_conditions(model, Nbc=2000): +# x = tf.random.uniform((Nbc,1), xMin, xMax) +# y = tf.random.uniform((Nbc,1), yMin, yMax) +# t = tf.random.uniform((Nbc,1), 0, tMax) + +# xL = tf.ones_like(x)*xMin; xR = tf.ones_like(x)*xMax +# yL = tf.ones_like(y)*yMin; yR = tf.ones_like(y)*yMax + +# uLx = model(tf.concat([xL,y,t],1)) +# uRx = model(tf.concat([xR,y,t],1)) +# uLy = model(tf.concat([x,yL,t],1)) +# uRy = model(tf.concat([x,yR,t],1)) + +# return tf.reduce_mean((uLx-uRx)**2 + (uLy-uRy)**2) + + +def periodic_boundary_conditions(model, Nbc=2000): + + # Random boundary sampling (correct choice) + x = tf.random.uniform((Nbc,1), xMin, xMax) + y = tf.random.uniform((Nbc,1), yMin, yMax) + t = tf.random.uniform((Nbc,1), 0.0, tMax) + + xL = tf.ones_like(x) * xMin + xR = tf.ones_like(x) * xMax + yL = tf.ones_like(y) * yMin + yR = tf.ones_like(y) * yMax + + with tf.GradientTape(persistent=True) as tape: + tape.watch([xL, xR, yL, yR]) + + uLx = model(tf.concat([xL, y, t], 1)) + uRx = model(tf.concat([xR, y, t], 1)) + + uLy = model(tf.concat([x, yL, t], 1)) + uRy = model(tf.concat([x, yR, t], 1)) + + # First derivatives + uxL = tape.gradient(uLx, xL) + uxR = tape.gradient(uRx, xR) + + uyL = tape.gradient(uLy, yL) + uyR = tape.gradient(uRy, yR) + + del tape + + # Enforce periodicity of values AND derivatives + loss = tf.reduce_mean( + (uLx - uRx)**2 + + (uLy - uRy)**2 + + (uxL - uxR)**2 + + (uyL - uyR)**2 + ) + + return loss + + + +def H(u, u_x, u_y): + return tf.reduce_sum((tf.pow(u_x,2) + tf.pow(u_y,2))/2.0-tf.pow(u,3)/6.0, axis=[0,1]) * dx*dy + + +def linear_loss_function(tensors, weights): + """ + Computes the sum of the input tensors. + + Args: + tensors (list of tf.Tensor): List of tensors to compute the sum. + + Returns: + tf.Tensor: The sum of the input tensors. + """ + # weights = weights / tf.reduce_sum(weights) # Normalize weights + # stack_tensor = tf.stack(tf.multiply(tensors, weights)) + # stack_tensor = tf.stack([w * t for w, t in zip(weights, tensors)], axis=0) + stacked = tf.stack(tensors, axis=0) # shape (n_losses,) + weights = weights / tf.reduce_sum(weights) + loss = tf.reduce_sum(weights * stacked) + loss_type = 'ls' + return loss, loss_type + + +def chebyshev_loss_function(tensors, weights): + """ + Computes the max of the input tensors. + + Args: + tensors (list of tf.Tensor): List of tensors to compute the log-sum-exp. + + Returns: + tf.Tensor: The maximum of the input tensors. + """ + # weights = weights / tf.reduce_sum(weights) # Normalize weights + # stack_tensor = tf.stack(tf.multiply(tensors, weights)) + # stack_tensor = tf.stack([w * t for w, t in zip(weights, tensors)], axis=0) + stacked = tf.stack(tensors, axis=0) + loss = tf.reduce_max(weights*stacked) + loss_type = 'cs' + return loss, loss_type + + +def smooth_chebyshev_loss_function(mu, tensors, weights): + """ + Computes the log of the sum of the exponentials of the input tensors. + + Args: + tensors (list of tf.Tensor): List of tensors to compute the log-sum-exp. + + Returns: + tf.Tensor: The log-sum-exp of the input tensors. + """ + weights = weights / tf.reduce_sum(weights) # Normalize weights + # stack_tensor = tf.stack(tf.multiply(tensors, weights)) + # stack_tensor = tf.stack([w * t for w, t in zip(weights, tensors)], axis=0) + stacked = tf.stack(tensors, axis=0) + exp_sum = tf.reduce_sum(tf.math.exp(stacked/mu), axis=0) + loss = mu*tf.math.log(exp_sum) + loss_type = 'scs' + return loss, loss_type + + +def augmentedChebyshev_loss_function(tensors, weights): + """ + Computes the log of the sum of the exponentials of the input tensors. + + Args: + tensors (list of tf.Tensor): List of tensors to compute the log-sum-exp. + weights (list of tf.Tensor): List of weights for each tensor. + Returns: + tf.Tensor: The log-sum-exp of the input tensors. + """ + # weights = weights / tf.reduce_sum(weights) # Normalize weights + loss_type = 'acs' + par = tf.sigmoid(cheb_par) # par is between 0 and 1 + return par*chebyshev_loss_function(tensors, weights)[0] + (1-par)*linear_loss_function(tensors, weights)[0], loss_type + + +class FourierFeatures(tf.keras.layers.Layer): + def __init__(self, n_modes=5): + super().__init__() + self.n_modes = n_modes + + def call(self, inputs): + x = inputs[:, 0:1] + y = inputs[:, 1:2] + t = inputs[:, 2:3] + + features = [t] + + for k in range(1, self.n_modes + 1): + features.append(tf.sin(2*np.pi*k*(x - xMin)/(xMax-xMin))) + features.append(tf.cos(2*np.pi*k*(x - xMin)/(xMax-xMin))) + features.append(tf.sin(2*np.pi*k*(y - yMin)/(yMax-yMin))) + features.append(tf.cos(2*np.pi*k*(y - yMin)/(yMax-yMin))) + + return tf.concat(features, axis=1) + +def PINNModel(num_hidden_layers=depth, num_neurons_per_layer=width): # 8,80 OK (# 8,40 # 10,40) + xyt_input = tf.keras.Input(shape=(3,)) + output_u = FourierFeatures(n_modes=4)(xyt_input) + for _ in range(num_hidden_layers): + output_u = tf.keras.layers.Dense(num_neurons_per_layer, + activation='tanh', # tanh + kernel_initializer='glorot_uniform', # glorot_normal + )(output_u) + + output_u = tf.keras.layers.Dense(units=1, + activation='linear', # mish + kernel_initializer='glorot_uniform', # glorot_normal + )(output_u) + + return tf.keras.Model(inputs=xyt_input, outputs=output_u) #tf.keras.Model(inputs=[x_input, t_input], outputs=output_u) + + +# def PINNModel(num_hidden_layers=depth, num_neurons_per_layer=width): # 8,80 OK (# 8,40 # 10,40) +# xyt_input = tf.keras.Input(shape=(3,)) +# output_u = xyt_input +# for _ in range(num_hidden_layers): +# output_u = tf.keras.layers.Dense(num_neurons_per_layer, +# activation='tanh', # tanh +# kernel_initializer='glorot_uniform', # glorot_normal +# )(output_u) + +# output_u = tf.keras.layers.Dense(units=1, +# activation='linear', # mish +# kernel_initializer='glorot_uniform', # glorot_normal +# )(output_u) + +# # Define the initial condition +# # x_input = tf.reshape(xt_input[:, 0], shape=[-1, 1]) +# # t_input = tf.reshape(xt_input[:, 1], shape=[-1, 1]) +# # initial_u = u_0(x_input) +# # output_u = tf.where(tf.equal(t_input, 0), initial_u, output_u) + +# return tf.keras.Model(inputs=xyt_input, outputs=output_u) #tf.keras.Model(inputs=[x_input, t_input], outputs=output_u) + + +@tf.function +def custom_loss(inputs, model): + xyt = inputs + x, y, t = xyt[:, 0:1], xyt[:, 1:2], xyt[:, 2:3] + # zeros = tf.zeros_like(x) + + with tf.GradientTape(persistent=True) as tape: + tape.watch(t) + tape.watch(x) + tape.watch(y) + with tf.GradientTape(persistent=True) as tape2: + tape2.watch(x) + tape2.watch(y) + with tf.GradientTape(persistent=True) as tape3: + tape3.watch(t) + tape3.watch(x) + tape3.watch(y) + u_model = model(tf.concat([x,y,t], axis=1)) + u_x = tape3.gradient(u_model, x) + u_y = tape3.gradient(u_model, y) + u_t = tape3.gradient(u_model, t) + u_xx = tape2.gradient(u_x, x) + u_xy = tape2.gradient(u_x, y) + u_xxx = tape.gradient(u_xx, x) + u_xyy = tape.gradient(u_xy, y) + del tape, tape2, tape3 + + + # v = -nu*u_x + # phi_t = Vprime(u_model) - nu*u_xx - Vprime(u_model_0) + nu*u_0_xx + # w = -nu * u_xx + phi_t/2. - Vprime(u_model) + + # Compute the components of loss function + pde_loss = tf.reduce_mean((u_t + u_model * u_x + u_xxx + u_xyy) ** 2) + + # x_ic = tf.random.uniform((Nx*Ny,1), xMin, xMax) + # y_ic = tf.random.uniform((Nx*Ny,1), yMin, yMax) + x_ic = tf.expand_dims(tf.linspace(xMin, xMax, Nx*Ny), axis=-1) # For grid sampling + y_ic = tf.expand_dims(tf.linspace(yMin, yMax, Nx*Ny), axis=-1) # For grid sampling + t_ic = tf.zeros_like(x_ic) + u_ic = u_0(x_ic, y_ic) # Initial condition + t_ic = tf.zeros_like(x_ic) # t=0 for initial condition + u_ic_pred = model(tf.concat([x_ic, y_ic, t_ic], axis=1)) # Predicted initial condition + data_fitting_loss_0 = tf.reduce_mean((u_ic_pred - u_ic) ** 2) + data_fitting_loss_l_r = periodic_boundary_conditions(model) + + # Combine the components of the loss functions + # loss, loss_type = linear_loss_function([pde_loss, data_fitting_loss_0, data_fitting_loss_l_r], tf.exp(lambdas)) + # loss, loss_type = linear_loss_function([pde_loss, data_fitting_loss_0, data_fitting_loss_l_r], lambdas) + # loss, loss_type = chebyshev_loss_function([pde_loss, data_fitting_loss_0, data_fitting_loss_l_r], tf.exp(lambdas)) + # loss, loss_type = chebyshev_loss_function([pde_loss, data_fitting_loss_0, data_fitting_loss_l_r], lambdas) + # loss, loss_type = smooth_chebyshev_loss_function(.1, [pde_loss, data_fitting_loss_0, data_fitting_loss_l_r], lambdas) + loss, loss_type = augmentedChebyshev_loss_function([pde_loss, data_fitting_loss_0, data_fitting_loss_l_r], lambdas) + + # S_loss = S(u_model, v, w) + H_loss = H(tf.reshape(u_model, shape=[Nx, Ny, Nt]), tf.reshape(u_x, shape=[Nx, Ny, Nt]), tf.reshape(u_y, shape=[Nx, Ny, Nt])) + # beta = 1e-3 + # data_fitting_loss = loss = beta*tf.math.log(tf.math.exp(data_fitting_loss_weight_0 * data_fitting_loss_0 / beta) + # + tf.math.exp(data_fitting_loss_weight_l * data_fitting_loss_l / beta) + # + tf.math.exp(data_fitting_loss_weight_r * data_fitting_loss_r / beta)) + # loss = beta*tf.math.log(tf.math.exp(pde_loss_weight * pde_loss / beta) + # + tf.math.exp(data_fitting_loss_weight_0 * data_fitting_loss_0 / beta) + # + tf.math.exp(data_fitting_loss_weight_l * data_fitting_loss_l / beta) + # + tf.math.exp(data_fitting_loss_weight_r * data_fitting_loss_r / beta)) + # data_fitting_loss = tf.math.reduce_max(tf.constant([data_fitting_loss_weight_0 * data_fitting_loss_0, + # data_fitting_loss_weight_l * data_fitting_loss_l, + # data_fitting_loss_weight_r * data_fitting_loss_r])) + # loss = tf.math.reduce_max(tf.constant([pde_loss_weight * pde_loss, + # data_fitting_loss_weight_0 * data_fitting_loss_0, + # data_fitting_loss_weight_l * data_fitting_loss_l, + # data_fitting_loss_weight_r * data_fitting_loss_r])) + + return loss, loss_type, pde_loss, data_fitting_loss_0, data_fitting_loss_l_r, H_loss#, S_loss + + +# Create the PINN model +model = PINNModel() +model.summary() + +epochs = 500 # 5000 # 1000 +# # Compile the model +# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), +# loss=lambda y_true, y_pred: custom_loss([x_train, t_train, theta_train], model)[1]) + +# Create the optimizer with a smaller learning rate +# learning_rate = 1e-3 # 1e-4 +# learning_rate_type = 'constant' +# learning_rate = tf.keras.optimizers.schedules.PiecewiseConstantDecay([10, 100], [1e-1, 5e-2, 1e-2]) #OK +# learning_rate = tf.keras.optimizers.schedules.PiecewiseConstantDecay([100, 300], [1e-2, 1e-3, 1e-4]) +learning_rate = tf.keras.optimizers.schedules.PolynomialDecay( + initial_learning_rate=1e-2, + decay_steps=epochs, + end_learning_rate=1e-4, + power=3., + cycle=False, + name= 'PolynomialDecay' +) +# learning_rate = tf.keras.optimizers.schedules.ExponentialDecay( +# initial_learning_rate=1e-3, +# decay_steps=50, # 100 +# decay_rate=0.9, +# staircase=False, +# name='ExponentialDecay' +# ) +# learning_rate = tf.keras.optimizers.schedules.CosineDecay( +# initial_learning_rate=1e-3, +# decay_steps=1000, +# alpha=0.0, +# warmup_target=None, +# warmup_steps=0, +# name='CosineDecay' +# ) +learning_rate_type = learning_rate.name + +trainable = model.trainable_variables +if lambdas.trainable: + trainable += [lambdas] + +if cheb_par.trainable: + trainable += [cheb_par] + +# optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, epsilon=1e-08, amsgrad=True) +# optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9, nesterov=True) +optimizer = tf.keras.optimizers.Nadam(learning_rate=learning_rate, beta_1=0.8, beta_2=0.9, epsilon=1e-07) +# optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate, rho=0.9, momentum=0.0, epsilon=1e-07, centered=False) + +# Training loop +losses = [] +pde_losses = [] +data_fitting_losses_0 = [] +data_fitting_losses_l_r = [] +delta_gradients = [] +# S_losses_min = [] +# S_losses_max = [] +H_losses_min = [] +H_losses_max = [] +H_losses_mean = [] +H_losses_std = [] +H_losses_abs_error = [] +H_losses_rel_error = [] +lambdas_values = [] +lambdas_values.append(lambdas.numpy()) +cheb_par_values = [] +cheb_par_values.append(cheb_par.numpy()) + + +# Convert data to tensor because tf.GradientTape() can only watch tensor and not numpy arrays +inputs = xyt_train +stop = False +# Start timer +t0 = time() +for epoch in range(epochs): + if not stop: + # print("# STARTING EPOCH", epoch + 1) + + # Create a LearningRateScheduler to update the learning rate + # current_lr = scheduler(epoch, learning_rate) + # tf.keras.backend.set_value(optimizer.lr, current_lr) + + with tf.GradientTape() as tape: + loss, loss_type, pde_loss, data_fitting_loss_0, data_fitting_loss_l_r, H_loss = custom_loss(inputs, model) + + # print("Computing gradients") + gradients = tape.gradient(loss, trainable) + # print(gradients[-1]) + # print("Applying gradients") + optimizer.apply_gradients(zip(gradients, trainable)) + # print("Appending losses") + losses.append(loss.numpy()) + pde_losses.append(pde_loss.numpy()) + data_fitting_losses_0.append(data_fitting_loss_0.numpy()) + data_fitting_losses_l_r.append(data_fitting_loss_l_r.numpy()) + # param_values.append((trainable[-1]).numpy()) + # delta_gradients.append((gradients[-1]).numpy()) + # S_loss_min = tf.reduce_min(S_loss) + # S_loss_max = tf.reduce_max(S_loss) + # S_losses_min.append(S_loss_min.numpy()) + # S_losses_max.append(S_loss_max.numpy()) + H_loss_min = tf.reduce_min(H_loss) + H_loss_max = tf.reduce_max(H_loss) + H_losses_min.append(H_loss_min.numpy()) + H_losses_max.append(H_loss_max.numpy()) + H_loss_mean = tf.reduce_mean(H_loss) + H_loss_std = tf.math.reduce_std(H_loss) + H_losses_mean.append(H_loss_mean.numpy()) + H_losses_std.append(H_loss_std.numpy()) + # lambdas_values.append((trainable[-1]).numpy()) + + H0 = H_loss[0].numpy() + Hf = H_loss[-1].numpy() + H_abs_error = tf.abs(Hf - H0) + H_losses_abs_error.append(H_abs_error.numpy()) + H_rel_error = H_abs_error / tf.abs((H0 + 1e-16)) + H_losses_rel_error.append(H_rel_error.numpy()) + + # # Print S_loss, H_loss + # print(f"S_loss at epoch {epoch + 1}: {S_loss.numpy()}") + # print(f"H_loss at epoch {epoch + 1}: {H_loss.numpy()}") + + if len(losses) > 1 and not lambdas.trainable:# and False: + # SoftAdaptive weights update + # num1 = tf.math.exp(pde_losses[-1] - pde_losses[-2]) + # num2 = tf.math.exp(data_fitting_losses_0[-1] - data_fitting_losses_0[-2]) + # num3 = tf.math.exp(data_fitting_losses_l_r[-1] - data_fitting_losses_l_r[-2]) + num = tf.nn.softmax([pde_losses[-1] - pde_losses[-2], data_fitting_losses_0[-1] - data_fitting_losses_0[-2], data_fitting_losses_l_r[-1] - data_fitting_losses_l_r[-2]]) + num1 = num[0] + num2 = num[1] + num3 = num[2] + den = num1 + num2 + num3 + + new_lambdas = tf.stack([num1 / den, num2 / den, num3 / den]) + lambdas.assign(new_lambdas) + # lambdas_values.append((lambdas).numpy()) + + if cheb_par.trainable: + cheb_par_values.append(cheb_par.numpy()) + + del tape + + if epoch % 100 == 0 or epoch == epochs - 1: + print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.numpy()}") + + if len(losses) > 2 and np.abs(losses[-1] - losses[-2]) / np.abs(losses[-2]) < 1e-8: + stop = True + +print(f"Loss type: {loss_type}") +print(f"Hamiltonian mean: {H_loss_mean.numpy()}") +print(f"Hamiltonian standard deviation: {H_loss_std.numpy()}") +print(f"Hamiltonian maximum: {H_loss_max.numpy()}") +print(f"Hamiltonian minimum: {H_loss_min.numpy()}") +print(f"Hamiltonian absolute error: {H_abs_error.numpy()}") +print(f"Hamiltonian relative error: {H_rel_error.numpy()}") +# Print computation time +print('\nComputation time: {} seconds'.format(time() - t0)) + + +def generate_save_fig_string(type, epochs, learning_rate_type, loss_type): + """ + Generates a string for saving figures that includes the number of epochs and the type of learning rate. + + Args: + epochs (int): The number of epochs. + learning_rate_type (str): The type of learning rate. + + Returns: + str: The generated string for saving figures. + """ + return f"./results/{type}_epochs_{epochs}_lr_{learning_rate_type}_{loss_type}.png" + +# Plot the loss history +plt.semilogy(losses, label='Total Loss') +plt.semilogy(pde_losses, label='PDE Loss') +plt.semilogy(data_fitting_losses_0, label='Initial Conditions Loss') +plt.semilogy(data_fitting_losses_l_r, label='Periodic Boundary Conditions Loss') +plt.xlabel('Epoch') +plt.ylabel('Loss') +plt.title('Loss Contributions') +plt.legend() +plt.grid() + +if save_fig: + save_fig_string = generate_save_fig_string('loss', epochs, learning_rate_type, loss_type) + # save png + plt.savefig(save_fig_string, dpi=300) +plt.show() + # # save pdf + # plt.savefig('../results/' + 'loss.pdf', dpi=300) + +# # Evaluate the function +# x_eval = np.linspace(x_train[0].numpy(), x_train[-1].numpy(), 100).reshape((-1, 1)).astype(np.float32) +# y_eval = np.linspace(y_train[0].numpy(), y_train[-1].numpy(), 100).reshape((-1, 1)).astype(np.float32) +# t_eval = np.linspace(t_train[0].numpy(), t_train[-1].numpy(), 100).reshape((-1, 1)).astype(np.float32) +# inputs_eval = [x_eval, y_eval, t_eval] + +# # Plot the parameters over epochs +# plt.plot(S_losses_min, label='S_loss_min') +# plt.plot(S_losses_max, label='S_loss_max') +# plt.xlabel('Epoch') +# plt.ylabel('Multisymplectic Constant') +# plt.title('Multisymplectic Constant over epochs') +# plt.legend() +# plt.grid() +# +# if save_fig: +# save_fig_string = generate_save_fig_string('S_loss', epochs, learning_rate_type, loss_type) +# # save png +# plt.savefig(save_fig_string, dpi=300) +# # # save pdf +# # plt.savefig('../results/' + 'S_loss.pdf', dpi=300) + + +# Plot the Hamiltonian over epochs +plt.plot(H_losses_min, label='H_loss_min') +plt.plot(H_losses_max, label='H_loss_max') +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian') +plt.title('Hamiltonian over epochs') +plt.legend() +plt.grid() + +if save_fig: + save_fig_string = generate_save_fig_string('H_loss', epochs, learning_rate_type, loss_type) + # save png + plt.savefig(save_fig_string, dpi=300) +plt.show() + # # save pdf + # plt.savefig('../results/' + 'H_loss.pdf', dpi=300) + +# Plot the average Hamiltonian over epochs with standard deviation +H_losses_mean = np.array(H_losses_mean) +H_losses_std = np.array(H_losses_std) +H_losses_abs_error = np.array(H_losses_abs_error) +H_losses_rel_error = np.array(H_losses_rel_error) + +plt.plot(H_losses_mean) +plt.fill_between(range(len(H_losses_mean)), H_losses_mean - H_losses_std, H_losses_mean + H_losses_std, alpha=0.2) +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian mean') +plt.title('Hamiltonian mean over epochs with standard deviation') +# plt.legend() +plt.grid() + +if save_fig: + save_fig_string = generate_save_fig_string('H_loss_mean', epochs, learning_rate_type, loss_type) + # save png + plt.savefig(save_fig_string, dpi=300) +plt.show() + # # save pdf + # plt.savefig('../results/' + 'H_loss_std.pdf', dpi=300) + +# Plot the standard deviation of the Hamiltonian over epochs +plt.plot(H_losses_std) +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian std') +plt.title('Hamiltonian standard deviation over epochs') +# plt.legend() +plt.grid() + +if save_fig: + save_fig_string = generate_save_fig_string('H_loss_std', epochs, learning_rate_type, loss_type) + # save png + plt.savefig(save_fig_string, dpi=300) +plt.show() + # # save pdf + # plt.savefig('../results/' + 'H_loss_std.pdf', dpi=300) + + +# Plot the absolute error of the Hamiltonian over epochs +plt.plot(H_losses_abs_error) +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian absolute error') +plt.title('Hamiltonian absolute error over epochs') +# plt.legend() +plt.grid() + +if save_fig: + save_fig_string = generate_save_fig_string('H_loss_abs_error', epochs, learning_rate_type, loss_type) + # save png + plt.savefig(save_fig_string, dpi=300) +plt.show() + # # save pdf + # plt.savefig('../results/' + 'H_loss_rel_error.pdf', dpi=300) + + +# Plot the relative error of the Hamiltonian over epochs +plt.plot(H_losses_rel_error) +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian relative error') +plt.title('Hamiltonian relative error over epochs') +# plt.legend() +plt.grid() + +if save_fig: + save_fig_string = generate_save_fig_string('H_loss_rel_error', epochs, learning_rate_type, loss_type) + # save png + plt.savefig(save_fig_string, dpi=300) +plt.show() + # # save pdf + # plt.savefig('../results/' + 'H_loss_rel_error.pdf', dpi=300) + + +# Plot the Chebyshev parameter over epochs +if cheb_par.trainable: + plt.plot(tf.sigmoid(cheb_par_values)) + plt.xlabel('Epoch') + plt.ylabel('Chebyshev parameter') + plt.title('Chebyshev parameter over epochs') + plt.grid() + + if save_fig: + save_fig_string = generate_save_fig_string('cheb_par', epochs, learning_rate_type, loss_type) + # save png + plt.savefig(save_fig_string, dpi=300) +plt.show() + # # save pdf + # plt.savefig('../results/' + 'cheb_par.pdf', dpi=300) + + +import pandas as pd +df = pd.DataFrame() + +df['total_loss'] = losses +df['pde_loss'] = pde_losses +df['data_fitting_loss_0'] = data_fitting_losses_0 +df['data_fitting_loss_l_r'] = data_fitting_losses_l_r +df['H_loss_min'] = H_losses_min +df['H_loss_max'] = H_losses_max +df['H_loss_mean'] = H_losses_mean +df['H_loss_std'] = H_losses_std +df['H_loss_abs_error'] = H_losses_abs_error +df['H_loss_rel_error'] = H_losses_rel_error +# df['cheb_par'] = cheb_par_values + +df.to_csv('./results/2D/training_history.csv', index=False) +# from mpl_toolkits.mplot3d import Axes3D + +# # Set up meshgrid +# N = 600 +# tspace = np.linspace(0, 2, N + 1) +# xspace = np.linspace(0, 2, N + 1) +# yspace = np.linspace(0, 2, N + 1) +# T, X , Y= np.meshgrid(tspace, xspace, yspace) +# XYTgrid = np.vstack([X.flatten(),Y.flatten(),T.flatten()]).T + +# # Determine predictions of u(t, x) +# u_pred = model(tf.cast(XYTgrid,DTYPE)) + +# # Reshape upred +# U = u_pred.numpy().reshape(N+1,N+1,N+1) + +# # Surface plot of solution u(t,x) +# fig = plt.figure(figsize=(9,6)) +# ax = fig.add_subplot(111, projection='3d') +# ax.plot_surface(X, Y, U, cmap='viridis') +# ax.view_init(35,35) +# ax.set_xlabel('$x$') +# ax.set_ylabel('$y$') +# ax.set_zlabel('$u_\\theta(x,y,t)$') +# ax.set_title('Solution to KdV equation') +# if save_fig: +# save_fig_string = generate_save_fig_string('sol', epochs, learning_rate_type, loss_type) +# # save png +# plt.savefig(save_fig_string, dpi=300) +# # # save pdf +# # plt.savefig('../results/' + 'solution.pdf', dpi=300) + +# # Extract the components of lambdas over epochs +# lambda_1 = [l[0] for l in lambdas_values] +# lambda_2 = [l[1] for l in lambdas_values] +# lambda_3 = [l[2] for l in lambdas_values] + +# # Plot the components of lambdas +# plt.figure(figsize=(10, 6)) +# plt.plot(lambda_1, label='$\lambda_1$', color='r') +# plt.plot(lambda_2, label='$\lambda_2$', color='g') +# plt.plot(lambda_3, label='$\lambda_3$', color='b') +# plt.xlabel('Epochs') +# plt.ylabel('Weights Values') +# plt.title('Evolution of weight components over training') +# plt.legend() +# plt.grid() +# + +# # Save the plot if required +# if save_fig: +# save_fig_string = generate_save_fig_string('lambdas', epochs, learning_rate_type, loss_type) +# plt.savefig(save_fig_string, dpi=300) + diff --git a/experiments/structure_preserving_pinns/structurePreservingPINN_2D_trainableACS_pytorch.py b/experiments/structure_preserving_pinns/structurePreservingPINN_2D_trainableACS_pytorch.py new file mode 100644 index 0000000..e2350db --- /dev/null +++ b/experiments/structure_preserving_pinns/structurePreservingPINN_2D_trainableACS_pytorch.py @@ -0,0 +1,279 @@ +import numpy as np +import torch +import torch.nn as nn +from time import time +import matplotlib.pyplot as plt + +DTYPE = torch.float32 +torch.set_default_dtype(DTYPE) +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') +Nx, Ny, Nt = 10, 10, 10 +N_collocation = Nx*Ny*Nt +d_in = 3 + +xMin, xMax = 0.0, 8.0 +yMin, yMax = 0.0, 8.0 +tMax = 50. + +def choose_width_depth(N_collocation=N_collocation, overparam_factor=3.0, d_in=d_in, d_out=1): + N_target = int(overparam_factor * N_collocation) + depth = 8 + a, b = depth - 1, d_in + depth + d_out + c = d_out - N_target + width = int((-b + np.sqrt(b*b - 4*a*c)) / (2*a)) + return width, depth + +width, depth = choose_width_depth() +dx = (xMax - xMin) / (Nx - 1) +dy = (yMax - yMin) / (Ny - 1) +dt = tMax / (Nt - 1) +h = np.max([dx, dy, dt]) + +lambdas = torch.tensor([1., 1., 1.], dtype=DTYPE, device=device, requires_grad=False) +cheb_par = torch.tensor(0.5, dtype=DTYPE, device=device, requires_grad=True) + +x = torch.linspace(xMin, xMax, Nx, dtype=DTYPE, device=device).reshape(-1, 1) +y = torch.linspace(yMin, yMax, Ny, dtype=DTYPE, device=device).reshape(-1, 1) +t = torch.linspace(0, tMax, Nt, dtype=DTYPE, device=device).reshape(-1, 1) +y_grid, x_grid, t_grid = torch.meshgrid(y.flatten(), x.flatten(), t.flatten(), indexing='ij') +x_train = x_grid.flatten().reshape(-1, 1) +y_train = y_grid.flatten().reshape(-1, 1) +t_train = t_grid.flatten().reshape(-1, 1) +xyt_train = torch.stack([x_train.flatten(), y_train.flatten(), t_train.flatten()], dim=1) + +save_fig = True + +def u_0(x, y): + epsilon = 0.01 + c1, c2 = 0.45, 0.25 + x1, x2 = 2.5, 3.3 + y1 = 0. + out = 3*c1/(torch.cosh(0.5*torch.sqrt(torch.tensor(c1/epsilon))*((x-x1)**2 + (y-y1)**2)**0.5))**2 + out += 3*c2/(torch.cosh(0.5*torch.sqrt(torch.tensor(c2/epsilon))*((x-x2)**2 + (y-y1)**2)**0.5))**2 + return out + +def periodic_boundary_conditions(model, Nbc=2000): + x = torch.rand(Nbc, 1, device=device) * (xMax - xMin) + xMin + y = torch.rand(Nbc, 1, device=device) * (yMax - yMin) + yMin + t = torch.rand(Nbc, 1, device=device) * tMax + + xL = torch.full_like(x, xMin) + xR = torch.full_like(x, xMax) + yL = torch.full_like(y, yMin) + yR = torch.full_like(y, yMax) + + uLx = model(torch.cat([xL, y, t], 1)) + uRx = model(torch.cat([xR, y, t], 1)) + uLy = model(torch.cat([x, yL, t], 1)) + uRy = model(torch.cat([x, yR, t], 1)) + + loss = torch.mean((uLx - uRx)**2 + (uLy - uRy)**2) + return loss + +def H(u, u_x, u_y): + return torch.sum((u_x**2 + u_y**2)/2 - u**3/6) * dx * dy + +def linear_loss_function(tensors, weights): + stacked = torch.stack(tensors) + weights = weights / torch.sum(weights) + loss = torch.sum(weights * stacked) + return loss, 'ls' + +def chebyshev_loss_function(tensors, weights): + stacked = torch.stack(tensors) + loss = torch.max(weights * stacked) + return loss, 'cs' + +def augmentedChebyshev_loss_function(tensors, weights): + par = torch.sigmoid(cheb_par) + ls = linear_loss_function(tensors, weights)[0] + cs = chebyshev_loss_function(tensors, weights)[0] + return par*cs + (1-par)*ls, 'acs' + +class FourierFeatures(nn.Module): + def __init__(self, n_modes=5): + super().__init__() + self.n_modes = n_modes + + def forward(self, inputs): + x, y, t = inputs[:, 0:1], inputs[:, 1:2], inputs[:, 2:3] + features = [t] + for k in range(1, self.n_modes + 1): + features.append(torch.sin(2*np.pi*k*(x - xMin)/(xMax-xMin))) + features.append(torch.cos(2*np.pi*k*(x - xMin)/(xMax-xMin))) + features.append(torch.sin(2*np.pi*k*(y - yMin)/(yMax-yMin))) + features.append(torch.cos(2*np.pi*k*(y - yMin)/(yMax-yMin))) + return torch.cat(features, dim=1) + +class PINNModel(nn.Module): + def __init__(self, num_hidden_layers=depth, num_neurons_per_layer=width): + super().__init__() + self.ff = FourierFeatures(n_modes=4) + layers = [] + # input_dim = 3 + 4 * 2 * 4 + input_dim = 17 + for _ in range(num_hidden_layers): + layers.append(nn.Linear(input_dim, num_neurons_per_layer)) + layers.append(nn.Tanh()) + input_dim = num_neurons_per_layer + layers.append(nn.Linear(input_dim, 1)) + self.net = nn.Sequential(*layers) + + def forward(self, x): + x = self.ff(x) + return self.net(x) + +def custom_loss(inputs, model): + x, y, t = inputs[:, 0:1], inputs[:, 1:2], inputs[:, 2:3] + x.requires_grad_(True) + y.requires_grad_(True) + t.requires_grad_(True) + u_model = model(torch.cat([x, y, t], dim=1)) + + u_t = torch.autograd.grad(u_model.sum(), t, create_graph=True)[0] + u_x = torch.autograd.grad(u_model.sum(), x, create_graph=True)[0] + u_y = torch.autograd.grad(u_model.sum(), y, create_graph=True)[0] + + u_xx = torch.autograd.grad(u_x.sum(), x, create_graph=True)[0] + u_yy = torch.autograd.grad(u_y.sum(), y, create_graph=True)[0] + + u_xxx = torch.autograd.grad(u_xx.sum(), x, create_graph=True)[0] + u_xyy = torch.autograd.grad(u_y.sum(), y, create_graph=True)[0] + + pde_loss = torch.mean((u_t + u_model * u_x + u_xxx + u_xyy) ** 2) + + x_ic = torch.linspace(xMin, xMax, Nx*Ny).reshape(-1, 1).to(device) + y_ic = torch.linspace(yMin, yMax, Nx*Ny).reshape(-1, 1).to(device) + t_ic = torch.zeros_like(x_ic) + u_ic = u_0(x_ic, y_ic) + u_ic_pred = model(torch.cat([x_ic, y_ic, t_ic], dim=1)) + data_fitting_loss_0 = torch.mean((u_ic_pred - u_ic) ** 2) + + data_fitting_loss_l_r = periodic_boundary_conditions(model) + loss, loss_type = augmentedChebyshev_loss_function([pde_loss, data_fitting_loss_0, data_fitting_loss_l_r], lambdas) + + H_loss = H(u_model.reshape(Nx, Ny, Nt), u_x.reshape(Nx, Ny, Nt), u_y.reshape(Nx, Ny, Nt)) + + return loss, loss_type, pde_loss, data_fitting_loss_0, data_fitting_loss_l_r, H_loss + +model = PINNModel().to(device) +epochs = 1000 +lr_schedule = torch.optim.lr_scheduler.PolynomialLR( + torch.optim.Adam(model.parameters(), lr=1e-2), + total_iters=epochs, power=3.0 +) +optimizer = lr_schedule.optimizer + +losses, pde_losses, data_losses_0, bc_losses = [], [], [], [] +H_losses_min, H_losses_max, H_losses_mean, H_losses_std = [], [], [], [] +H_losses_abs_error, H_losses_rel_error = [], [] +t0 = time() + +for epoch in range(epochs): + optimizer.zero_grad() + loss, loss_type, pde_loss, data_loss_0, bc_loss, H_loss = custom_loss(xyt_train, model) + loss.backward() + optimizer.step() + lr_schedule.step() + + with torch.no_grad(): + losses.append(loss.item()) + pde_losses.append(pde_loss.item()) + data_losses_0.append(data_loss_0.item()) + bc_losses.append(bc_loss.item()) + + H_loss_min = torch.min(H_loss).item() + H_loss_max = torch.max(H_loss).item() + H_losses_min.append(H_loss_min) + H_losses_max.append(H_loss_max) + H_loss_mean = torch.mean(H_loss).item() + H_loss_std = torch.std(H_loss).item() + H_losses_mean.append(H_loss_mean) + H_losses_std.append(H_loss_std) + + if epoch % 100 == 0 or epoch == epochs - 1: + print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.6e}") + +print(f'\nComputation time: {time() - t0:.2f}s') + +plt.figure(figsize=(10, 6)) +plt.semilogy(losses, label='Total Loss') +plt.semilogy(pde_losses, label='PDE Loss') +plt.semilogy(data_losses_0, label='Initial Conditions Loss') +plt.semilogy(bc_losses, label='Periodic Boundary Conditions Loss') +plt.xlabel('Epoch') +plt.ylabel('Loss') +plt.title('Loss Contributions') +plt.legend() +plt.grid() +plt.savefig('./results/2D_loss.png', dpi=300) if save_fig else None +plt.show() + +plt.figure(figsize=(10, 6)) +plt.plot(H_losses_min, label='H_loss_min') +plt.plot(H_losses_max, label='H_loss_max') +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian') +plt.title('Hamiltonian over epochs') +plt.legend() +plt.grid() +plt.savefig('./results/2D_H_minmax.png', dpi=300) if save_fig else None +plt.show() + +H_losses_mean_arr = np.array(H_losses_mean) +H_losses_std_arr = np.array(H_losses_std) +plt.figure(figsize=(10, 6)) +plt.plot(H_losses_mean_arr) +plt.fill_between(range(len(H_losses_mean_arr)), H_losses_mean_arr - H_losses_std_arr, H_losses_mean_arr + H_losses_std_arr, alpha=0.2) +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian mean') +plt.title('Hamiltonian mean over epochs with standard deviation') +plt.grid() +plt.savefig('./results/2D_H_mean_std.png', dpi=300) if save_fig else None +plt.show() + +plt.figure(figsize=(10, 6)) +plt.plot(H_losses_std_arr) +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian std') +plt.title('Hamiltonian standard deviation over epochs') +plt.grid() +plt.savefig('./results/2D_H_std.png', dpi=300) if save_fig else None +plt.show() + +if cheb_par.requires_grad: + plt.figure(figsize=(10, 6)) + plt.plot(torch.sigmoid(cheb_par).detach()) + plt.xlabel('Epoch') + plt.ylabel('Chebyshev parameter') + plt.title('Chebyshev parameter over epochs') + plt.grid() + plt.savefig('./results/2D_cheb_par.png', dpi=300) if save_fig else None + plt.show() + +from mpl_toolkits.mplot3d import Axes3D +N = 100 +xspace = torch.linspace(xMin, xMax, N, dtype=DTYPE, device=device) +yspace = torch.linspace(yMin, yMax, N, dtype=DTYPE, device=device) +tspace_val = torch.tensor(tMax, dtype=DTYPE, device=device) +X_grid, Y_grid = torch.meshgrid(xspace, yspace, indexing='ij') +T_grid = torch.full_like(X_grid, tMax) +XYTgrid = torch.stack([X_grid.flatten(), Y_grid.flatten(), T_grid.flatten()], dim=1) + +with torch.no_grad(): + u_pred = model(XYTgrid) +U = u_pred.reshape(N, N) + +X_np = X_grid.cpu().numpy() +Y_np = Y_grid.cpu().numpy() +U_np = U.cpu().numpy() + +fig = plt.figure(figsize=(9, 6)) +ax = fig.add_subplot(111, projection='3d') +ax.plot_surface(X_np, Y_np, U_np, cmap='viridis') +ax.set_xlabel('$x$') +ax.set_ylabel('$y$') +ax.set_zlabel('$u(x,y,t)$') +ax.set_title('2D PDE Solution') +plt.savefig('./results/2D_solution.png', dpi=300) if save_fig else None +plt.show() diff --git a/experiments/structure_preserving_pinns/structurePreservingPINN_CamassaHolm_trainableH1.py b/experiments/structure_preserving_pinns/structurePreservingPINN_CamassaHolm_trainableH1.py new file mode 100644 index 0000000..55ba1d3 --- /dev/null +++ b/experiments/structure_preserving_pinns/structurePreservingPINN_CamassaHolm_trainableH1.py @@ -0,0 +1,729 @@ +import numpy as np +import tensorflow as tf +from time import time +import matplotlib.pyplot as plt + +# TF_USE_LEGACY_KERAS=True + +DTYPE = np.float32 +# Nx = 50 +# Nt = 50 +# N_collocation = Nx*Nt + +xMin = -np.pi +xMax = np.pi +tMax = 5. # 10. +d_in = 2 + +def Nx_from_arch(width, depth, fac=2.0, d_in=2, d_out=1): + """ + Given a PINN architecture (width, depth) and an overparam factor fac, + compute Nx = Nt such that: + + Nx * Nt ≈ N_params / fac, + Nx = Nt, + + where N_params is the number of trainable parameters. + + Parameters + ---------- + width : int + Number of neurons per hidden layer. + depth : int + Number of hidden layers. + fac : float + Over-parameterization factor. Typical values: fac = 2 or 3. + d_in : int + Input dimension (usually 2: x,t). + d_out : int + Output dimension (usually 1: u). + + Returns + ------- + Nx : int + Nt : int + Ntheta : int + Total number of trainable parameters. + Ncoll_target : int + Target number of collocation points = Ntheta/fac. + """ + + # Parameter count + Ntheta = (d_in + 1) * width \ + + (depth - 1) * (width * width + width) \ + + d_out * (width + 1) + + # Target collocation count + Ncoll_target = int(Ntheta / fac) + + # Square grid Nx = Nt + Nx = int(np.sqrt(Ncoll_target)) + Nt = Nx + + return Nx, Nt, Ntheta, Ncoll_target + +width = 80 +depth = 4 + +Nx, Nt, Ntheta, Ncoll = Nx_from_arch(width=width, depth=depth, fac=10.) + +def h_from_NxNt(Nx, Nt, xMin, xMax, tMax): + """ + Compute dx, dt, and h from Nx, Nt and the domain extents. + h is defined as max(dx, dt). + + Returns + ------- + dx : float + dt : float + h : float + """ + + Lx = xMax - xMin + Lt = tMax + + dx = Lx / (Nx - 1) + dt = Lt / (Nt - 1) + + h = max(dx, dt) + + return dx, dt, h + +dx, dt, h = h_from_NxNt(Nx, Nt, xMin, xMax, tMax) + +lambdas = [1., 1., 1.] +lambdas = tf.Variable(lambdas, trainable=False, name='lambdas', dtype=DTYPE) +do_training = True +cheb_par = tf.Variable(0.5, trainable=False, name='cheb_par', dtype=DTYPE) + +x = np.linspace(xMin, xMax, Nx).reshape((-1, 1)).astype(DTYPE) +t = np.linspace(0, tMax, Nt).reshape((-1, 1)).astype(DTYPE) + +x_train = tf.expand_dims(tf.convert_to_tensor(x.flatten()), axis=-1) +t_train = tf.expand_dims(tf.convert_to_tensor(t.flatten()), axis=-1) + +save_fig = True + +# Define the initial condition +def u_0(x): + return 0.2+0.1*tf.math.cos(2 * x) + + +def u_0_x(x): + return -0.2*tf.math.sin(2 * x) + + +def periodic_boundary_conditions(model, Nbc=2000): + + # Random boundary sampling (correct choice) + x = tf.random.uniform((Nbc,1), xMin, xMax) + t = tf.random.uniform((Nbc,1), 0.0, tMax) + + xL = tf.ones_like(x) * xMin + xR = tf.ones_like(x) * xMax + + with tf.GradientTape(persistent=True) as tape: + tape.watch([xL, xR]) + + uLx = model(tf.concat([xL, t], 1)) + uRx = model(tf.concat([xR, t], 1)) + + # First derivatives + uxL = tape.gradient(uLx, xL) + uxR = tape.gradient(uRx, xR) + + del tape + + # Enforce periodicity of values AND derivatives + loss = tf.reduce_mean( + (uLx - uRx)**2 + + (uxL - uxR)**2 + ) + + return loss + + +# def H(u, u_x, dx): +# return tf.reduce_sum(tf.pow(u, 3)+u*tf.pow(u_x, 2), axis = -1) * dx +# # return tf.reduce_sum((tf.pow(u, 2)+tf.pow(u_x, 2))/2, axis = -1) * dx + +def ch_density(u, u_x): + return tf.pow(u, 3) + u * tf.pow(u_x, 2) + + +# @tf.function +def H(u, u_x, dx, density_fn=ch_density, axis=-1): + """ + Boole’s rule (8th order) along 'axis' for uniform grid with spacing dx. + Requires (N-1) % 4 == 0. Otherwise uses Boole on the largest prefix and trapezoid on remainder. + """ + f = density_fn(u, u_x) # [..., N] + n = tf.shape(f)[axis] + + # Trapezoid as a fallback on short tails + def _trap_rem(rem): + # rem: [..., M] contiguous tail; integrate with trapezoid + return tf.reduce_sum(0.5*(rem[..., 1:] + rem[..., :-1]), axis=-1) * tf.cast(dx, f.dtype) + + # Degenerate + if tf.less_equal(n, 1): + return tf.reduce_sum(f, axis=axis) * dx + + # Largest prefix with (n1-1) % 4 == 0 + n1 = n - ((n - 1) % 4) + # Boole constant for uniform spacing: 2*dx/45 + c = (2.0 * dx) / 45.0 + + # Indices for prefix + idx_prefix = tf.range(n1) + f0 = tf.gather(f, idx_prefix[0::4], axis=axis) # 0,4,8,... + f1 = tf.gather(f, idx_prefix[1::4], axis=axis) # 1,5,9,... + f2 = tf.gather(f, idx_prefix[2::4], axis=axis) # 2,6,10,... + f3 = tf.gather(f, idx_prefix[3::4], axis=axis) # 3,7,11,... + f4 = tf.gather(f, idx_prefix[4::4], axis=axis) # 4,8,12,... (last block end) + + # Weighted sum across blocks + # Boole's block weights per 5 nodes: [7, 32, 12, 32, 7] + # Aggregate across all blocks by summing slices + s = 7.0 * tf.reduce_sum(f0, axis=axis) + s += 32.0 * tf.reduce_sum(f1, axis=axis) + s += 12.0 * tf.reduce_sum(f2, axis=axis) + s += 32.0 * tf.reduce_sum(f3, axis=axis) + s += 7.0 * tf.reduce_sum(f4, axis=axis) + + boole_part = c * s + + # Tail remainder + if tf.equal(n1, n): + return boole_part + + rem = tf.gather(f, tf.range(n1-1, n), axis=axis) # nodes: n1-1 .. n-1 + tail = _trap_rem(rem) + return boole_part + tail + + +def linear_loss_function(tensors, weights): + """ + Computes the sum of the input tensors. + + Args: + tensors (list of tf.Tensor): List of tensors to compute the sum. + + Returns: + tf.Tensor: The sum of the input tensors. + """ + # weights = weights / tf.reduce_sum(weights) # Normalize weights + # stack_tensor = tf.stack(tf.multiply(tensors, weights)) + # stack_tensor = tf.stack([w * t for w, t in zip(weights, tensors)], axis=0) + stacked = tf.stack(tensors, axis=0) # shape (n_losses,) + weights = weights / tf.reduce_sum(weights) + loss = tf.reduce_sum(weights * stacked) + loss_type = 'ls' + return loss, loss_type + + +def chebyshev_loss_function(tensors, weights): + """ + Computes the max of the input tensors. + + Args: + tensors (list of tf.Tensor): List of tensors to compute the log-sum-exp. + + Returns: + tf.Tensor: The maximum of the input tensors. + """ + # weights = weights / tf.reduce_sum(weights) # Normalize weights + # stack_tensor = tf.stack(tf.multiply(tensors, weights)) + # stack_tensor = tf.stack([w * t for w, t in zip(weights, tensors)], axis=0) + stacked = tf.stack(tensors, axis=0) + loss = tf.reduce_max(weights*stacked) + loss_type = 'cs' + return loss, loss_type + + +def smooth_chebyshev_loss_function(mu, tensors, weights): + """ + Computes the log of the sum of the exponentials of the input tensors. + + Args: + tensors (list of tf.Tensor): List of tensors to compute the log-sum-exp. + + Returns: + tf.Tensor: The log-sum-exp of the input tensors. + """ + weights = weights / tf.reduce_sum(weights) # Normalize weights + # stack_tensor = tf.stack(tf.multiply(tensors, weights)) + # stack_tensor = tf.stack([w * t for w, t in zip(weights, tensors)], axis=0) + stacked = tf.stack(tensors, axis=0) + exp_sum = tf.reduce_sum(tf.math.exp(stacked/mu), axis=0) + loss = mu*tf.math.log(exp_sum) + loss_type = 'scs' + return loss, loss_type + + +def augmentedChebyshev_loss_function(tensors, weights): + """ + Computes the log of the sum of the exponentials of the input tensors. + + Args: + tensors (list of tf.Tensor): List of tensors to compute the log-sum-exp. + + Returns: + tf.Tensor: The log-sum-exp of the input tensors. + """ + # weights = weights / tf.reduce_sum(weights) # Normalize weights + loss_type = 'acs' + par = tf.sigmoid(cheb_par) # par is between 0 and 1 + return par*chebyshev_loss_function(tensors, weights)[0] + (1-par)*linear_loss_function(tensors, weights)[0], loss_type + + +def sigmoid_centered(x): + return 2*tf.nn.sigmoid(.5*x) - 1 + +def PINNModel(num_hidden_layers=depth, num_neurons_per_layer=width): # 8,40 + xt_input = tf.keras.Input(shape=(2,)) + output_u = xt_input + for _ in range(num_hidden_layers): + output_u = tf.keras.layers.Dense(num_neurons_per_layer, + activation='gelu', # tanh + kernel_initializer='glorot_normal', #'glorot_uniform', # glorot_normal + )(output_u) + + output_u = tf.keras.layers.Dense(units=1, + activation='linear', + kernel_initializer='glorot_normal', #'glorot_uniform', # glorot_normal + )(output_u) + + return tf.keras.Model(inputs=xt_input, outputs=output_u) #tf.keras.Model(inputs=[x_input, t_input], outputs=output_u) + + +def lambda_grad(epoch, + start=1000, + lam_max=1e-0, + kappa=1e-3): + epoch = tf.cast(epoch, tf.float32) + return lam_max * (1.0 - tf.exp(-kappa * tf.maximum(epoch - start, 0.0))) + + + +# @tf.function +def custom_loss(inputs, model): + x, t = inputs[:, 0:1], inputs[:, 1:2] + + with tf.GradientTape(persistent=True) as outerTape: + outerTape.watch(x) + with tf.GradientTape(persistent=True) as tape: + tape.watch(t) + tape.watch(x) + with tf.GradientTape(persistent=False) as tape2: + tape2.watch(x) + tape2.watch(t) + with tf.GradientTape(persistent=True) as tape3: + tape3.watch(x) + tape3.watch(t) + u_model = model(tf.stack([x[:, 0], t[:, 0]], axis=1)) + u_x = tape3.gradient(u_model, x) + u_t = tape3.gradient(u_model, t) + u_xx = tape2.gradient(u_x, x) + u_xxt = tape.gradient(u_xx, t) + u_xxx = tape.gradient(u_xx, x) + + # === Camassa–Holm residual === + r = ( + u_t + - u_xxt + + 3.0 * u_model * u_x + - 2.0 * u_x * u_xx + - u_model * u_xxx + ) + r_x = outerTape.gradient(r, x) + + # Clean up + del tape, tape2, tape3, outerTape + + lam = lambda_grad(epoch) + + # === H1 norm of residual === + pde_loss_L2 = tf.reduce_mean(tf.square(r)) + pde_loss_grad = tf.reduce_mean(tf.square(r_x)) + pde_loss_H1 = pde_loss_L2 + lam * pde_loss_grad + + # === Initial condition === + ic_mask = tf.where(tf.abs(t) < 1e-6) + x_ic = tf.gather(x, ic_mask[:, 0]) + u_ic = u_0(x_ic) + t_ic = tf.zeros_like(x_ic) + u_ic_pred = model(tf.concat([x_ic, t_ic], axis=1)) + data_fitting_loss_0 = tf.reduce_mean(tf.square(u_ic_pred - u_ic)) + + # === Periodic boundary conditions === + data_fitting_loss_l_r = periodic_boundary_conditions(model) + + # === Chebyshev aggregation === + loss, loss_type = chebyshev_loss_function( + [pde_loss_H1, data_fitting_loss_0, data_fitting_loss_l_r], + lambdas + ) + + # === Hamiltonian (monitor only) === + H_loss = H( + tf.reshape(u_model, shape=[Nt, Nx]), + tf.reshape(u_x, shape=[Nt, Nx]), + dx + ) + + return ( + loss, + loss_type, + pde_loss_H1, + data_fitting_loss_0, + data_fitting_loss_l_r, + H_loss, + ) + + +# Create the PINN model +model = PINNModel() +model.summary() + +epochs = 1000 # 3000 # 5000 # 2000 +# # Compile the model +# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), +# loss=lambda y_true, y_pred: custom_loss([x_train, t_train, theta_train], model)[1]) + +# Create the optimizer with a smaller learning rate +# learning_rate = 1e-3 # 1e-4 +# learning_rate_type = 'constant' +# learning_rate = tf.keras.optimizers.schedules.PiecewiseConstantDecay([10, 100], [1e-1, 5e-2, 1e-2]) #OK +# learning_rate = tf.keras.optimizers.schedules.PiecewiseConstantDecay([100, 300], [1e-2, 1e-3, 1e-4]) +# learning_rate = tf.keras.optimizers.schedules.PolynomialDecay( +# initial_learning_rate=1e-3, +# decay_steps=epochs, +# end_learning_rate=1e-5, +# power=2., +# cycle=False, # True +# name='PolynomialDecay' +# ) +# learning_rate_type = 'polynomialDecay' +learning_rate = tf.keras.optimizers.schedules.ExponentialDecay( + initial_learning_rate=1e-2, + decay_steps=epochs, # 100 + decay_rate=0.9, + staircase=False, + name='ExponentialDecay' +) +learning_rate_type = 'exponentialDecay' +# learning_rate = tf.keras.optimizers.schedules.CosineDecay( +# initial_learning_rate=1e-3, +# decay_steps=1000, +# alpha=0.0, +# name='CosineDecay', +# warmup_target=None, +# warmup_steps=0 +# ) +# learning_rate_type = 'cosineDecay' + +trainable = model.trainable_variables +if lambdas.trainable: + trainable += [lambdas] + +if cheb_par.trainable: + trainable += [cheb_par] + +# optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, epsilon=1e-08, amsgrad=True) +# optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9, nesterov=True) +optimizer = tf.keras.optimizers.Nadam(learning_rate=learning_rate, beta_1=0.8, beta_2=0.9, epsilon=1e-07) +# optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate, rho=0.9, momentum=0.0, epsilon=1e-07, centered=False) + +# Training loop +losses = [] +pde_losses = [] +data_fitting_losses_0 = [] +data_fitting_losses_l_r = [] +delta_gradients = [] +H_losses_min = [] +H_losses_max = [] +H_losses_mean = [] +H_losses_std = [] +H_losses_abs_error = [] +H_losses_rel_error = [] +lambdas_values = [] +lambdas_values.append(lambdas.numpy()) +cheb_par_values = [] +cheb_par_values.append(cheb_par.numpy()) + +# Convert data to tensor because tf.GradientTape() can only watch tensor and not numpy arrays +x_train = tf.convert_to_tensor(x_train) +t_train = tf.convert_to_tensor(t_train) +x_grid, t_grid = np.meshgrid(x.flatten(), t.flatten()) +inputs = tf.convert_to_tensor(np.vstack([x_grid.flatten(), t_grid.flatten()]).T) +stop = False +# Start timer +t0 = time() +for epoch in range(epochs): + if not stop: + # print("# STARTING EPOCH", epoch + 1) + + # Create a LearningRateScheduler to update the learning rate + # current_lr = scheduler(epoch, learning_rate) + # tf.keras.backend.set_value(optimizer.lr, current_lr) + + with tf.GradientTape() as tape: + loss, loss_type, pde_loss, data_fitting_loss_0, data_fitting_loss_l_r, H_loss = custom_loss(inputs, model) + + # print("Computing gradients") + gradients = tape.gradient(loss, trainable) + # print(gradients[-1]) + # print("Applying gradients") + optimizer.apply_gradients(zip(gradients, trainable)) + # print("Appending losses") + losses.append(loss.numpy()) + pde_losses.append(pde_loss.numpy()) + data_fitting_losses_0.append(data_fitting_loss_0.numpy()) + data_fitting_losses_l_r.append(data_fitting_loss_l_r.numpy()) + H_loss_min = tf.reduce_min(H_loss) + H_loss_max = tf.reduce_max(H_loss) + H_losses_min.append(H_loss_min.numpy()) + H_losses_max.append(H_loss_max.numpy()) + H_loss_mean = tf.reduce_mean(H_loss) + H_loss_std = tf.math.reduce_std(H_loss) + H_losses_mean.append(H_loss_mean.numpy()) + H_losses_std.append(H_loss_std.numpy()) + + H0 = H(u_0(x_grid), u_0_x(x_grid), dx) # H0 = H_loss[0].numpy() + Hf = H_loss.numpy() + H_abs_error = tf.abs(Hf - H0) + H_losses_abs_error.append(tf.reduce_max(H_abs_error).numpy()) + H_rel_error = H_abs_error / tf.abs((H0 + 1e-16)) + H_losses_rel_error.append(H_rel_error[-1].numpy()) + + # lambdas_values.append((trainable[-1]).numpy()) + if len(losses) > 1 and not lambdas.trainable and do_training: + # SoftAdaptive weights update + num1 = tf.math.exp(pde_losses[-1] - pde_losses[-2]) + num2 = tf.math.exp(data_fitting_losses_0[-1] - data_fitting_losses_0[-2]) + num3 = tf.math.exp(data_fitting_losses_l_r[-1] - data_fitting_losses_l_r[-2]) + den = num1 + num2 + num3 + + new_lambdas = tf.stack([num1 / den, num2 / den, num3 / den]) + lambdas.assign(new_lambdas) + lambdas_values.append((lambdas).numpy()) + + if cheb_par.trainable: + cheb_par_values.append(cheb_par.numpy()) + + del tape + + if epoch % 100 == 0 or epoch == epochs - 1: + print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.numpy()}") + + if len(losses) > 2 and np.abs(losses[-1] - losses[-2]) / np.abs(losses[-2]) < 1e-8: + stop = True + +print(f"Loss type: {loss_type}") +print(f"Hamiltonian mean: {H_loss_mean.numpy()}") +print(f"Hamiltonian standard deviation: {H_loss_std.numpy()}") +print(f"Hamiltonian maximum: {H_loss_max.numpy()}") +print(f"Hamiltonian minimum: {H_loss_min.numpy()}") +# print(f"Hamiltonian absolute error: {H_abs_error.numpy()}") +# print(f"Hamiltonian relative error: {H_rel_error.numpy()}") +print(f"Hamitonian relative error: {H_rel_error[-1].numpy()}") +# Print computation time +print('\nComputation time: {} seconds'.format(time() - t0)) + + +def generate_save_fig_string(type, epochs, learning_rate_type, loss_type): + """ + Generates a string for saving figures that includes the number of epochs and the type of learning rate. + + Args: + epochs (int): The number of epochs. + learning_rate_type (str): The type of learning rate. + + Returns: + str: The generated string for saving figures. + """ + return f"./results/{type}_epochs_{epochs}_lr_{learning_rate_type}_{loss_type}.png" + +# Plot the loss history +plt.semilogy(losses, label='Total Loss') +plt.semilogy(pde_losses, label='PDE Loss') +plt.semilogy(data_fitting_losses_0, label='Initial Conditions Loss') +plt.semilogy(data_fitting_losses_l_r, label='Boundary Conditions Loss') +plt.xlabel('Epoch') +plt.ylabel('Loss') +plt.title('Loss Contributions') +plt.legend() +plt.grid() + +if save_fig: + save_fig_string = generate_save_fig_string('loss', epochs, learning_rate_type, loss_type) + # save png + plt.savefig(save_fig_string, dpi=300) +plt.show() + # # save pdf + # plt.savefig('../results/' + 'loss.pdf', dpi=300) + +# Evaluate the function +x_eval = np.linspace(x_train[0].numpy(), x_train[-1].numpy(), 100).reshape((-1, 1)).astype(np.float32) +t_eval = np.linspace(t_train[0].numpy(), t_train[-1].numpy(), 100).reshape((-1, 1)).astype(np.float32) +inputs_eval = [x_eval, t_eval] + +# Plot the Hamiltonian over epochs +plt.plot(H_losses_min, label='min H') +plt.plot(H_losses_max, label='max H') +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian') +plt.title('Hamiltonian over epochs') +plt.legend() +plt.grid() + +if save_fig: + save_fig_string = generate_save_fig_string('H_loss', epochs, learning_rate_type, loss_type) + # save png + plt.savefig(save_fig_string, dpi=300) +plt.show() + # # save pdf + # plt.savefig('../results/' + 'H_loss.pdf', dpi=300) + +# Plot the average Hamiltonian over epochs with standard deviation +H_losses_mean = np.array(H_losses_mean) +H_losses_std = np.array(H_losses_std) +H_losses_rel_error = np.array(H_losses_rel_error) +H_losses_rel_error = np.array(H_losses_rel_error) + +plt.plot(H_losses_mean) +plt.fill_between(range(len(H_losses_mean)), H_losses_mean - H_losses_std, H_losses_mean + H_losses_std, alpha=0.2) +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian mean') +plt.title('Hamiltonian mean over epochs with standard deviation') +# plt.legend() +plt.grid() + +if save_fig: + save_fig_string = generate_save_fig_string('H_loss_mean', epochs, learning_rate_type, loss_type) + # save png + plt.savefig(save_fig_string, dpi=300) +plt.show() + # # save pdf + # plt.savefig('../results/' + 'H_loss_std.pdf', dpi=300) + +# Plot the standard deviation of the Hamiltonian over epochs +plt.plot(H_losses_std) +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian std') +plt.title('Hamiltonian standard deviation over epochs') +# plt.legend() +plt.grid() + +if save_fig: + save_fig_string = generate_save_fig_string('H_loss_std', epochs, learning_rate_type, loss_type) + # save png + plt.savefig(save_fig_string, dpi=300) +plt.show() + # # save pdf + # plt.savefig('../results/' + 'H_loss_std.pdf', dpi=300) + +# Plot the absolute error of the Hamiltonian over epochs +plt.plot(H_losses_abs_error) +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian absolute error') +plt.title('Hamiltonian absolute error over epochs') +# plt.legend() +plt.grid() + +if save_fig: + save_fig_string = generate_save_fig_string('H_loss_abs_error', epochs, learning_rate_type, loss_type) + # save png + plt.savefig(save_fig_string, dpi=300) +plt.show() + # # save pdf + # plt.savefig('../results/' + 'H_loss_rel_error.pdf', dpi=300) + + +# Plot the relative error of the Hamiltonian over epochs +plt.plot(H_losses_rel_error) +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian relative error') +plt.title('Hamiltonian relative error over epochs') +# plt.legend() +plt.grid() + +if save_fig: + save_fig_string = generate_save_fig_string('H_loss_rel_error', epochs, learning_rate_type, loss_type) + # save png + plt.savefig(save_fig_string, dpi=300) +plt.show() + # # save pdf + # plt.savefig('../results/' + 'H_loss_rel_error.pdf', dpi=300) + + +# Plot the Chebyshev parameter over epochs +if cheb_par.trainable: + plt.plot(tf.sigmoid(cheb_par_values)) + plt.xlabel('Epoch') + plt.ylabel('Chebyshev parameter') + plt.title('Chebyshev parameter over epochs') + plt.grid() + if save_fig: + save_fig_string = generate_save_fig_string('cheb_par', epochs, learning_rate_type, loss_type) + # save png + plt.savefig(save_fig_string, dpi=300) +plt.show() + # # save pdf + # plt.savefig('../results/' + 'cheb_par.pdf', dpi=300) + + +from mpl_toolkits.mplot3d import Axes3D + +# Set up meshgrid +N = 600 +tspace = np.linspace(0, tMax, N + 1) +xspace = np.linspace(xMin, xMax, N + 1) +T, X = np.meshgrid(tspace, xspace) +XTgrid = np.vstack([X.flatten(),T.flatten()]).T + +# Determine predictions of u(t, x) +u_pred = model(tf.cast(XTgrid,DTYPE)) + +# Reshape upred +U = u_pred.numpy().reshape(N+1,N+1) + +# Surface plot of solution u(t,x) +fig = plt.figure(figsize=(9,6)) +ax = fig.add_subplot(111, projection='3d') +ax.plot_surface(X, T, U, cmap='viridis') +ax.view_init(35,35) +ax.set_xlabel('$x$') +ax.set_ylabel('$t$') +ax.set_zlabel('$u_\\theta(x,t)$') +ax.set_title('Solution to Camassa-Holm equation') +ax.set_box_aspect(None, zoom=0.85) + +if save_fig: + save_fig_string = generate_save_fig_string('sol', epochs, learning_rate_type, loss_type) + # save png + plt.savefig(save_fig_string, dpi=300) +plt.show() + # # save pdf + # plt.savefig('../results/' + 'solution.pdf', dpi=300) + + + + +import pandas as pd +df = pd.DataFrame() + +df['total_loss'] = losses +df['pde_loss'] = pde_losses +df['data_fitting_loss_0'] = data_fitting_losses_0 +df['data_fitting_loss_l_r'] = data_fitting_losses_l_r +df['H_loss_min'] = H_losses_min +df['H_loss_max'] = H_losses_max +df['H_loss_mean'] = H_losses_mean +df['H_loss_std'] = H_losses_std +df['H_loss_abs_error'] = H_losses_abs_error +df['H_loss_rel_error'] = H_losses_rel_error +# df['cheb_par'] = cheb_par_values + +df.to_csv('./results/camassa/training_history.csv', index=False) \ No newline at end of file diff --git a/experiments/structure_preserving_pinns/structurePreservingPINN_CamassaHolm_trainableH1_pytorch.py b/experiments/structure_preserving_pinns/structurePreservingPINN_CamassaHolm_trainableH1_pytorch.py new file mode 100644 index 0000000..915aedc --- /dev/null +++ b/experiments/structure_preserving_pinns/structurePreservingPINN_CamassaHolm_trainableH1_pytorch.py @@ -0,0 +1,389 @@ +import numpy as np +import torch +import torch.nn as nn +from time import time +import matplotlib.pyplot as plt + +from humancompatible.train.dual_optim import ALM, iALM + +DTYPE = torch.float32 +torch.set_default_dtype(DTYPE) +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + +xMin, xMax = -np.pi, np.pi +tMax = 5. +d_in = 2 + +def Nx_from_arch(width, depth, fac=2.0, d_in=2, d_out=1): + Ntheta = (d_in + 1) * width + (depth - 1) * (width * width + width) + d_out * (width + 1) + Ncoll_target = int(Ntheta / fac) + Nx = int(np.sqrt(Ncoll_target)) + Nt = Nx + return Nx, Nt, Ntheta, Ncoll_target + +width, depth = 80, 4 +Nx, Nt, Ntheta, Ncoll = Nx_from_arch(width=width, depth=depth, fac=10.) + +dx = (xMax - xMin) / (Nx - 1) +dt = tMax / (Nt - 1) +h = max(dx, dt) + +lambdas = torch.tensor([1., 1., 1.], dtype=DTYPE, device=device, requires_grad=False) +do_training = True +cheb_par = torch.tensor(0.5, dtype=DTYPE, device=device, requires_grad=False) + +x = torch.linspace(xMin, xMax, Nx, dtype=DTYPE, device=device).reshape(-1, 1) +t = torch.linspace(0, tMax, Nt, dtype=DTYPE, device=device).reshape(-1, 1) +x_train = x.reshape(-1, 1) +t_train = t.reshape(-1, 1) +t_grid, x_grid = torch.meshgrid(t.flatten(), x.flatten(), indexing='ij') +inputs = torch.stack([x_grid.flatten(), t_grid.flatten()], dim=1) + +save_fig = True + +def u_0(x): + return 0.2 + 0.1 * torch.cos(2 * x) + +def u_0_x(x): + return -0.2 * torch.sin(2 * x) + +def periodic_boundary_conditions(model, Nbc=2000): + x = torch.rand(Nbc, 1, device=device) * (xMax - xMin) + xMin + t = torch.rand(Nbc, 1, device=device) * tMax + + xL = torch.full_like(x, xMin) + xR = torch.full_like(x, xMax) + + uLx = model(torch.cat([xL, t], 1)) + uRx = model(torch.cat([xR, t], 1)) + + loss = torch.mean((uLx - uRx)**2) + return loss + +def ch_density(u, u_x): + return u**3 + u * u_x**2 + +def H(u, u_x, dx, density_fn=ch_density): + f = density_fn(u, u_x) + return torch.sum(f) * dx + +def linear_loss_function(tensors, weights): + stacked = torch.stack(tensors) + weights = weights / torch.sum(weights) + loss = torch.sum(weights * stacked) + return loss, 'ls' + +def chebyshev_loss_function(tensors, weights): + stacked = torch.stack(tensors) + loss = torch.max(weights * stacked) + return loss, 'cs' + +### MODEL ### + +# @torch.compile +class PINNModel(nn.Module): + def __init__(self, num_hidden_layers=depth, num_neurons_per_layer=width): + super().__init__() + layers = [] + in_dim = 2 + for _ in range(num_hidden_layers): + layers.append(nn.Linear(in_dim, num_neurons_per_layer)) + layers.append(nn.GELU()) + in_dim = num_neurons_per_layer + layers.append(nn.Linear(in_dim, 1)) + self.net = nn.Sequential(*layers) + + def forward(self, x): + return self.net(x) + +def lambda_grad(epoch, + start=1000, + lam_max=1e-0, + kappa=1e-3): + epoch = float(epoch) + return lam_max * (1.0 - np.exp(-kappa * max(epoch - start, 0.0))) + + +##### UNCONSTRAINED LOSS FUNCTION WITH H1 REGULARIZATION ##### + + +# @torch.compile +def custom_loss(inputs, model, epoch): + x, t = inputs[:, 0:1], inputs[:, 1:2] + x.requires_grad_(True) + t.requires_grad_(True) + + u_model = model(torch.cat([x, t], dim=1)) + + u_t = torch.autograd.grad(u_model.sum(), t, create_graph=True)[0] + u_x = torch.autograd.grad(u_model.sum(), x, create_graph=True)[0] + + u_xx = torch.autograd.grad(u_x.sum(), x, create_graph=True)[0] + u_xxt = torch.autograd.grad(u_xx.sum(), t, create_graph=True)[0] + u_xxx = torch.autograd.grad(u_xx.sum(), x, create_graph=True)[0] + + r = u_t - u_xxt + 3.0 * u_model * u_x - 2.0 * u_x * u_xx - u_model * u_xxx + r_x = torch.autograd.grad(r.sum(), x, create_graph=True)[0] + + pde_loss_L2 = torch.mean(torch.square(r)) + pde_loss_grad = torch.mean(torch.square(r_x)) + + lam = lambda_grad(epoch) + pde_loss_H1 = pde_loss_L2 + lam * pde_loss_grad + + ic_mask = torch.abs(t) < 1e-6 + x_ic = x[ic_mask[:, 0]] + u_ic = u_0(x_ic) + t_ic = torch.zeros_like(x_ic) + u_ic_pred = model(torch.cat([x_ic, t_ic], axis=1)) + data_fitting_loss_0 = torch.mean(torch.square(u_ic_pred - u_ic)) + + data_fitting_loss_l_r = periodic_boundary_conditions(model) + + loss, loss_type = chebyshev_loss_function( + [pde_loss_H1, data_fitting_loss_0, data_fitting_loss_l_r], + lambdas + ) + + H_loss = H(u_model.reshape(Nt, Nx), u_x.reshape(Nt, Nx), dx) + + return loss, loss_type, pde_loss_L2, data_fitting_loss_0, data_fitting_loss_l_r, H_loss + + +#### LOSS FUNCTION WITH H1 CONSTRAINT #### + +def lagrangian_loss(inputs, model, dual_opt, epoch): + x, t = inputs[:, 0:1], inputs[:, 1:2] + x.requires_grad_(True) + t.requires_grad_(True) + + u_model = model(torch.cat([x, t], dim=1)) + + u_t = torch.autograd.grad(u_model.sum(), t, create_graph=True)[0] + u_x = torch.autograd.grad(u_model.sum(), x, create_graph=True)[0] + + u_xx = torch.autograd.grad(u_x.sum(), x, create_graph=True)[0] + u_xxt = torch.autograd.grad(u_xx.sum(), t, create_graph=True)[0] + u_xxx = torch.autograd.grad(u_xx.sum(), x, create_graph=True)[0] + + r = u_t - u_xxt + 3.0 * u_model * u_x - 2.0 * u_x * u_xx - u_model * u_xxx + r_x = torch.autograd.grad(r.sum(), x, create_graph=True)[0] + + pde_loss_L2 = torch.mean(torch.square(r)) + pde_loss_grad = torch.mean(torch.square(r_x)) + + lam = lambda_grad(epoch) + pde_loss_H1 = pde_loss_L2 + lam * pde_loss_grad + + ic_mask = torch.abs(t) < 1e-6 + x_ic = x[ic_mask[:, 0]] + u_ic = u_0(x_ic) + t_ic = torch.zeros_like(x_ic) + u_ic_pred = model(torch.cat([x_ic, t_ic], axis=1)) + data_fitting_loss_0 = torch.mean(torch.square(u_ic_pred - u_ic)) + + data_fitting_loss_l_r = periodic_boundary_conditions(model) + + loss, loss_type = chebyshev_loss_function( + [pde_loss_H1, data_fitting_loss_0, data_fitting_loss_l_r], + lambdas + ) + + # constraint + H0 = H(u_0(x_grid.flatten().reshape(-1, 1)), u_0_x(x_grid.flatten().reshape(-1, 1)), dx) + + Hf = H(u_model.reshape(Nt, Nx), u_x.reshape(Nt, Nx), dx) + H_constraint = torch.abs(Hf - H0)/torch.abs(H0) + + eps = 5/(epoch+1) + H_constraint = torch.max(H_constraint - eps, torch.zeros_like(H_constraint)).unsqueeze(0) + + loss = dual_opt.forward_update(loss, H_constraint) + + return loss, loss_type, pde_loss_L2, data_fitting_loss_0, data_fitting_loss_l_r, Hf + + +####### TRAINING LOOP ####### + + +model = PINNModel().to(device) +epochs = 1000 + +optimizer = torch.optim.NAdam(model.parameters(), lr=1e-2, betas=(0.8, 0.9), eps=1e-07) +lr_schedule = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9**(1/epochs)) + +losses, pde_losses, data_losses_0, bc_losses = [], [], [], [] +H_losses_min, H_losses_max, H_losses_mean, H_losses_std = [], [], [], [] +H_losses_abs_error, H_losses_rel_error = [], [] +t0 = time() + + +# dual_opt = ALM(m=1, lr=5e-5, dual_range=(0.,100.), device=device, ctol=1e-3, penalty=0.) +dual_opt = iALM(m=1, lr=0.1, beta=0.01, sigma=1.0001, gamma=1., dual_range=(0.,10.), ctol=1e-3) + +for epoch in range(epochs): + optimizer.zero_grad() + # loss, loss_type, pde_loss, data_loss_0, bc_loss, H_loss = custom_loss(inputs, model, epoch) + + loss, loss_type, pde_loss, data_loss_0, bc_loss, H_loss = lagrangian_loss(inputs, model, dual_opt, epoch) + loss.backward() + optimizer.step() + + + if epoch % 1 == 0: + lr_schedule.step() + + with torch.no_grad(): + + losses.append(loss.item()) + pde_losses.append(pde_loss.item()) + data_losses_0.append(data_loss_0.item()) + bc_losses.append(bc_loss.item()) + + H_loss_min = torch.min(H_loss).item() + H_loss_max = torch.max(H_loss).item() + H_losses_min.append(H_loss_min) + H_losses_max.append(H_loss_max) + H_loss_mean = torch.mean(H_loss).item() + H_loss_std = torch.std(H_loss).item() + H_losses_mean.append(H_loss_mean) + H_losses_std.append(H_loss_std) + + H0 = H(u_0(x_grid.flatten().reshape(-1, 1)), u_0_x(x_grid.flatten().reshape(-1, 1)), dx) + Hf = H_loss.detach() + H_abs_error = torch.abs(Hf - H0) + H_losses_abs_error.append(torch.max(H_abs_error).item()) + H_rel_error = H_abs_error / (torch.abs(H0) + 1e-16) + if isinstance(H_rel_error, torch.Tensor): + H_rel_error = H_rel_error.item() if H_rel_error.numel() == 1 else H_rel_error.max().item() + H_losses_rel_error.append(H_rel_error) + + if epoch % 100 == 0 or epoch == epochs - 1: + print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.6e}") + + # lambdas_values.append((trainable[-1]).numpy()) + if len(losses) > 1: + # SoftAdaptive weights update + num1 = np.exp(pde_losses[-1] - pde_losses[-2]) + num2 = np.exp(data_losses_0[-1] - data_losses_0[-2]) + num3 = np.exp(bc_losses[-1] - bc_losses[-2]) + den = num1 + num2 + num3 + + new_lambdas = torch.tensor([num1 / den, num2 / den, num3 / den]) + lambdas = new_lambdas + # lambdas_values.append((lambdas).numpy()) + +print(f'\nComputation time: {time() - t0:.2f}s') +print(f"Loss type: {loss_type}") +print(f"Hamiltonian mean: {H_loss_mean}") +print(f"Hamiltonian std: {H_loss_std}") +print(f"Hamiltonian max: {H_loss_max}") +print(f"Hamiltonian min: {H_loss_min}") + +plt.figure(figsize=(10, 6)) +plt.semilogy(losses, label='Total Loss') +plt.semilogy(pde_losses, label='PDE Loss') +plt.semilogy(data_losses_0, label='Initial Conditions Loss') +plt.semilogy(bc_losses, label='Boundary Conditions Loss') +plt.xlabel('Epoch') +plt.ylabel('Loss') +plt.title('Loss Contributions') +plt.legend() +plt.grid() +plt.savefig('./results/ch_loss.png', dpi=300) if save_fig else None +plt.show() + +plt.figure(figsize=(10, 6)) +plt.plot(H_losses_min, label='min H') +plt.plot(H_losses_max, label='max H') +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian') +plt.title('Hamiltonian over epochs') +plt.legend() +plt.grid() +plt.savefig('./results/ch_H_loss.png', dpi=300) if save_fig else None +plt.show() + +H_losses_mean_arr = np.array(H_losses_mean) +H_losses_std_arr = np.array(H_losses_std) +plt.figure(figsize=(10, 6)) +plt.plot(H_losses_mean_arr) +plt.fill_between(range(len(H_losses_mean_arr)), H_losses_mean_arr - H_losses_std_arr, H_losses_mean_arr + H_losses_std_arr, alpha=0.2) +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian mean') +plt.title('Hamiltonian mean over epochs with standard deviation') +plt.grid() +plt.savefig('./results/ch_H_loss_mean.png', dpi=300) if save_fig else None +plt.show() + +plt.figure(figsize=(10, 6)) +plt.plot(H_losses_std_arr) +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian std') +plt.title('Hamiltonian standard deviation over epochs') +plt.grid() +plt.savefig('./results/ch_H_loss_std.png', dpi=300) if save_fig else None +plt.show() + +H_losses_abs_error = np.array(H_losses_abs_error) +plt.figure(figsize=(10, 6)) +plt.plot(H_losses_abs_error) +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian absolute error') +plt.title('Hamiltonian absolute error over epochs') +plt.grid() +plt.savefig('./results/ch_H_loss_abs_error.png', dpi=300) if save_fig else None +plt.show() + +H_losses_rel_error = np.array(H_losses_rel_error) +plt.figure(figsize=(10, 6)) +plt.plot(H_losses_rel_error) +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian relative error') +plt.title('Hamiltonian relative error over epochs') +plt.grid() +plt.savefig('./results/ch_H_loss_rel_error.png', dpi=300) if save_fig else None +plt.show() + +N = 600 +tspace = torch.linspace(0, tMax, N + 1, dtype=DTYPE, device=device) +xspace = torch.linspace(xMin, xMax, N + 1, dtype=DTYPE, device=device) +T_grid, X_grid = torch.meshgrid(tspace, xspace, indexing='ij') +XTgrid = torch.stack([X_grid.flatten(), T_grid.flatten()], dim=1) + +with torch.no_grad(): + u_pred = model(XTgrid) +U = u_pred.reshape(N+1, N+1) + +X_np = X_grid.cpu().numpy() +T_np = T_grid.cpu().numpy() +U_np = U.cpu().numpy() + +from mpl_toolkits.mplot3d import Axes3D +fig = plt.figure(figsize=(9, 6)) +ax = fig.add_subplot(111, projection='3d') +ax.plot_surface(X_np, T_np, U_np, cmap='viridis') +ax.set_xlabel('$x$') +ax.set_ylabel('$t$') +ax.set_zlabel('$u(x,t)$') +ax.set_title('Camassa-Holm equation') +plt.savefig('./results/ch_solution.png', dpi=300) if save_fig else None +plt.show() + + +import pandas as pd +df = pd.DataFrame() + +df['total_loss'] = losses +df['pde_loss'] = pde_losses +df['data_fitting_loss_0'] = data_losses_0 +df['data_fitting_loss_l_r'] = bc_losses +df['H_loss_min'] = H_losses_min +df['H_loss_max'] = H_losses_max +df['H_loss_mean'] = H_losses_mean +df['H_loss_std'] = H_losses_std +df['H_loss_abs_error'] = H_losses_abs_error +df['H_loss_rel_error'] = H_losses_rel_error +# df['cheb_par'] = cheb_par_values + +df.to_csv('./results/camassa/torch_training_history.csv', index=False) \ No newline at end of file diff --git a/experiments/structure_preserving_pinns/structurePreservingPINN_kdv_trainableH1.py b/experiments/structure_preserving_pinns/structurePreservingPINN_kdv_trainableH1.py new file mode 100644 index 0000000..781639e --- /dev/null +++ b/experiments/structure_preserving_pinns/structurePreservingPINN_kdv_trainableH1.py @@ -0,0 +1,769 @@ +import numpy as np +from sympy import false, per +import tensorflow as tf +from time import time +import matplotlib.pyplot as plt + +# TF_USE_LEGACY_KERAS=True + +DTYPE = np.float32 +# Nx = 100 +# Nt = 100 +# N_collocation = Nx*Nt + +type = 1 # 1 for KdV, 2 + +if type == 1: + nu = -0.022**2 + alpha = -0.5 + rho = 0. + xMin = 0. + xMax = 2. + tMax = 5. # 10. +elif type == 2: + nu = -1. + alpha = -3. + rho = 0. + xMin = -20. + xMax = 20. + tMax = 100. # 4. + + +def Nx_from_arch(width, depth, fac=1.5, d_in=2, d_out=1): + """ + Given a PINN architecture (width, depth) and an overparam factor fac, + compute Nx = Nt such that: + + Nx * Nt ≈ N_params / fac, + Nx = Nt, + + where N_params is the number of trainable parameters. + + Parameters + ---------- + width : int + Number of neurons per hidden layer. + depth : int + Number of hidden layers. + fac : float + Over-parameterization factor. Typical values: fac = 2 or 3. + d_in : int + Input dimension (usually 2: x,t). + d_out : int + Output dimension (usually 1: u). + + Returns + ------- + Nx : int + Nt : int + Ntheta : int + Total number of trainable parameters. + Ncoll_target : int + Target number of collocation points = Ntheta/fac. + """ + + # Parameter count + Ntheta = (d_in + 1) * width \ + + (depth - 1) * (width * width + width) \ + + d_out * (width + 1) + + # Target collocation count + Ncoll_target = int(Ntheta / fac) + + # Square grid Nx = Nt + Nx = int(np.sqrt(Ncoll_target)) + Nt = Nx + + return Nx, Nt, Ntheta, Ncoll_target + + +width = 80 +depth = 4 + +Nx, Nt, Ntheta, Ncoll = Nx_from_arch(width=width, depth=depth, fac=10.) + +def h_from_NxNt(Nx, Nt, xMin, xMax, tMax): + """ + Compute dx, dt, and h from Nx, Nt and the domain extents. + h is defined as max(dx, dt). + + Returns + ------- + dx : float + dt : float + h : float + """ + + Lx = xMax - xMin + Lt = tMax + + dx = Lx / (Nx - 1) + dt = Lt / (Nt - 1) + + h = max(dx, dt) + + return dx, dt, h + +dx, dt, h = h_from_NxNt(Nx, Nt, xMin, xMax, tMax) + +x = np.linspace(xMin, xMax, Nx).reshape((-1, 1)).astype(DTYPE) +t = np.linspace(0, tMax, Nt).reshape((-1, 1)).astype(DTYPE) + +x_train = tf.expand_dims(tf.convert_to_tensor(x.flatten()), axis=-1) +t_train = tf.expand_dims(tf.convert_to_tensor(t.flatten()), axis=-1) + +lambdas = [1., 1., 1.] +lambdas = tf.Variable(lambdas, trainable=False, name='lambdas', dtype=DTYPE) +do_training = False +cheb_par = tf.Variable(0.5, trainable=False, name='cheb_par', dtype=DTYPE) + +save_fig = True + +# Define the initial condition +def u_0(x): + if type == 1: + return tf.math.cos(np.pi * x) + elif type == 2: + return 6./(tf.math.cosh(x)**2) + + +def u_0_x(x): + if type == 1: + return -np.pi*tf.math.sin(np.pi * x) + elif type == 2: + return -12.*tf.math.sinh(x)/(tf.math.cosh(x)**3) + + +def periodic_bc(model, x, t): + xL = tf.ones_like(x) * xMin + xR = tf.ones_like(x) * xMax + uL = model(tf.concat([xL, t], axis=1)) + uR = model(tf.concat([xR, t], axis=1)) + return tf.reduce_mean((uL - uR)**2) + + +def V(u): + return alpha*tf.pow(u, 3)/3 + rho*tf.pow(u, 2)/2 + + +def kdv_density(u, u_x): + return V(u)-nu*tf.pow(u_x, 2)/2 + +# @tf.function +def H(u, u_x, dx, density_fn=kdv_density, axis=-1): + """ + Boole’s rule (8th order) along 'axis' for uniform grid with spacing dx. + Requires (N-1) % 4 == 0. Otherwise uses Boole on the largest prefix and trapezoid on remainder. + """ + f = density_fn(u, u_x) # [..., N] + n = tf.shape(f)[axis] + + # Trapezoid as a fallback on short tails + def _trap_rem(rem): + # rem: [..., M] contiguous tail; integrate with trapezoid + return tf.reduce_sum(0.5*(rem[..., 1:] + rem[..., :-1]), axis=-1) * tf.cast(dx, f.dtype) + + # Degenerate + if tf.less_equal(n, 1): + return tf.reduce_sum(f, axis=axis) * dx + + # Largest prefix with (n1-1) % 4 == 0 + n1 = n - ((n - 1) % 4) + # Boole constant for uniform spacing: 2*dx/45 + c = (2.0 * dx) / 45.0 + + # Indices for prefix + idx_prefix = tf.range(n1) + f0 = tf.gather(f, idx_prefix[0::4], axis=axis) # 0,4,8,... + f1 = tf.gather(f, idx_prefix[1::4], axis=axis) # 1,5,9,... + f2 = tf.gather(f, idx_prefix[2::4], axis=axis) # 2,6,10,... + f3 = tf.gather(f, idx_prefix[3::4], axis=axis) # 3,7,11,... + f4 = tf.gather(f, idx_prefix[4::4], axis=axis) # 4,8,12,... (last block end) + + # Weighted sum across blocks + # Boole's block weights per 5 nodes: [7, 32, 12, 32, 7] + # Aggregate across all blocks by summing slices + s = 7.0 * tf.reduce_sum(f0, axis=axis) + s += 32.0 * tf.reduce_sum(f1, axis=axis) + s += 12.0 * tf.reduce_sum(f2, axis=axis) + s += 32.0 * tf.reduce_sum(f3, axis=axis) + s += 7.0 * tf.reduce_sum(f4, axis=axis) + + boole_part = c * s + + # Tail remainder + if tf.equal(n1, n): + return boole_part + + rem = tf.gather(f, tf.range(n1-1, n), axis=axis) # nodes: n1-1 .. n-1 + tail = _trap_rem(rem) + return boole_part + tail + + +def linear_loss_function(tensors, weights): + """ + Computes the sum of the input tensors. + + Args: + tensors (list of tf.Tensor): List of tensors to compute the sum. + + Returns: + tf.Tensor: The sum of the input tensors. + """ + # weights = weights / tf.reduce_sum(weights) # Normalize weights + # stack_tensor = tf.stack(tf.multiply(tensors, weights)) + # stack_tensor = tf.stack([w * t for w, t in zip(weights, tensors)], axis=0) + stacked = tf.stack(tensors, axis=0) # shape (n_losses,) + weights = weights / tf.reduce_sum(weights) + loss = tf.reduce_sum(weights * stacked) + loss_type = 'ls' + return loss, loss_type + + +def chebyshev_loss_function(tensors, weights): + """ + Computes the max of the input tensors. + + Args: + tensors (list of tf.Tensor): List of tensors to compute the log-sum-exp. + + Returns: + tf.Tensor: The maximum of the input tensors. + """ + # weights = weights / tf.reduce_sum(weights) # Normalize weights + # stack_tensor = tf.stack(tf.multiply(tensors, weights)) + # stack_tensor = tf.stack([w * t for w, t in zip(weights, tensors)], axis=0) + stacked = tf.stack(tensors, axis=0) + loss = tf.reduce_max(weights*stacked) + loss_type = 'cs' + return loss, loss_type + + +def smooth_chebyshev_loss_function(mu, tensors, weights): + """ + Computes the log of the sum of the exponentials of the input tensors. + + Args: + tensors (list of tf.Tensor): List of tensors to compute the log-sum-exp. + + Returns: + tf.Tensor: The log-sum-exp of the input tensors. + """ + weights = weights / tf.reduce_sum(weights) # Normalize weights + # stack_tensor = tf.stack(tf.multiply(tensors, weights)) + # stack_tensor = tf.stack([w * t for w, t in zip(weights, tensors)], axis=0) + stacked = tf.stack(tensors, axis=0) + exp_sum = tf.reduce_sum(tf.math.exp(stacked/mu), axis=0) + loss = mu*tf.math.log(exp_sum) + loss_type = 'scs' + return loss, loss_type + + +def augmentedChebyshev_loss_function(tensors, weights): + """ + Computes the log of the sum of the exponentials of the input tensors. + + Args: + tensors (list of tf.Tensor): List of tensors to compute the log-sum-exp. + + Returns: + tf.Tensor: The log-sum-exp of the input tensors. + """ + # weights = weights / tf.reduce_sum(weights) # Normalize weights + loss_type = 'acs' + par = tf.sigmoid(cheb_par) # par is between 0 and 1 + return par*chebyshev_loss_function(tensors, weights)[0] + (1-par)*linear_loss_function(tensors, weights)[0], loss_type + + +def sigmoid_centered(x): + return 2*tf.nn.sigmoid(x) - 1 + + +def PINNModel(num_hidden_layers=depth, num_neurons_per_layer=width): # 8,40 + xt_input = tf.keras.Input(shape=(2,)) + output_u = xt_input + for _ in range(num_hidden_layers): + output_u = tf.keras.layers.Dense(num_neurons_per_layer, + activation=sigmoid_centered, # mish + kernel_initializer='glorot_uniform', # glorot_normal + # kernel_constraint=tf.keras.constraints.UnitNorm(axis=0) + # lora_rank=10 + )(output_u) + + output_u = tf.keras.layers.Dense(units=1, + activation='linear', + kernel_initializer='glorot_uniform', # glorot_normal + # kernel_constraint=tf.keras.constraints.UnitNorm(axis=0) + # lora_rank=10 + )(output_u) + + return tf.keras.Model(inputs=xt_input, outputs=output_u) #tf.keras.Model(inputs=[x_input, t_input], outputs=output_u) + + +def lambda_grad(epoch, + start=1000, + lam_max=1e-0, + kappa=1e-3): + epoch = tf.cast(epoch, tf.float32) + return lam_max * (1.0 - tf.exp(-kappa * tf.maximum(epoch - start, 0.0))) + + +@tf.function +def grad_L2_fft_batch(r, L): + """ + r : shape (Nt, Nx) + returns : shape (Nt,) + """ + r = tf.cast(r, tf.complex64) + Nx = tf.shape(r)[-1] + + k_pos = tf.range(0, Nx//2 + 1, dtype=tf.float32) + k_neg = tf.range(-Nx//2 + 1, 0, dtype=tf.float32) + k = tf.concat([k_pos, k_neg], axis=0) + k = (2.0 * tf.constant(np.pi) / L) * k + k = tf.cast(k, tf.complex64) + + r_hat = tf.signal.fft(r) + + grad_energy = tf.reduce_sum(tf.abs(1j * k * r_hat)**2, axis=-1) + + dx = L / tf.cast(Nx, tf.float32) + return tf.math.real(grad_energy) * dx + + +@tf.function +def H1_norm_fft_batch(r, L): + """ + Compute ||r||_{H^1}^2 for each time slice. + + r : shape (Nt, Nx) + returns : shape (Nt,) + """ + r = tf.cast(r, tf.complex64) + Nx = tf.shape(r)[-1] + + k_pos = tf.range(0, Nx//2 + 1, dtype=tf.float32) + k_neg = tf.range(-Nx//2 + 1, 0, dtype=tf.float32) + k = tf.concat([k_pos, k_neg], axis=0) + k = (2.0 * tf.constant(np.pi) / L) * k + k = tf.cast(k, tf.complex64) + + r_hat = tf.signal.fft(r) + weight = 1.0 + tf.abs(k)**2 + + H1_sq = tf.reduce_sum(weight * tf.abs(r_hat)**2, axis=-1) + + dx = L / tf.cast(Nx, tf.float32) + return tf.math.real(H1_sq) * dx + + +# @tf.function +def custom_loss(inputs, model, epoch): + x, t = inputs[:, 0:1], inputs[:, 1:2] + + with tf.GradientTape(persistent=True) as tape: + tape.watch(t) + tape.watch(x) + with tf.GradientTape(persistent=True) as tape2: + tape2.watch(x) + tape2.watch(t) + with tf.GradientTape(persistent=True) as tape3: + tape3.watch(x) + tape3.watch(t) + u_model = model(tf.concat([x, t], axis=1)) + u_x = tape3.gradient(u_model, x) + u_t = tape3.gradient(u_model, t) + u_xx = tape2.gradient(u_x, x) + u_xxx = tape.gradient(u_xx, x) + u_squared_x = 2*u_model*u_x + r = u_t - alpha * u_squared_x - rho*u_x - nu*u_xxx + del tape, tape2, tape3 + + # === PDE residual loss (stabilized, consistent) === + pde_loss_L2 = tf.reduce_mean(tf.square(r)) + + r_grid = tf.reshape(r, [Nt, Nx]) + L = xMax - xMin + + pde_loss_grad = tf.reduce_mean( + grad_L2_fft_batch(r_grid, L) + ) + + # mesh-scaled stabilization parameter + lam = 0.01 * (dx**2) * tf.minimum(1.0, epoch / 1000.0) + + pde_loss_H1 = pde_loss_L2 + lam * pde_loss_grad + + # === Initial condition === + ic_mask = tf.where(tf.abs(t) < 1e-6) + x_ic = tf.gather(x, ic_mask[:, 0]) + u_ic = u_0(x_ic) + t_ic = tf.zeros_like(x_ic) + u_ic_pred = model(tf.concat([x_ic, t_ic], axis=1)) + data_fitting_loss_0 = tf.reduce_mean(tf.square(u_ic_pred - u_ic)) + + # === Periodic BC === + data_fitting_loss_l_r = periodic_bc(model, x, t) + + # === Chebyshev aggregation === + # loss, loss_type = chebyshev_loss_function( + # [pde_loss_H1, data_fitting_loss_0, data_fitting_loss_l_r], + # lambdas + # ) + # loss, loss_type = augmentedChebyshev_loss_function( + # [pde_loss_H1, data_fitting_loss_0, data_fitting_loss_l_r], + # lambdas + # ) + loss, loss_type = linear_loss_function( + [pde_loss_H1, data_fitting_loss_0, data_fitting_loss_l_r], + lambdas + ) + + # === Hamiltonian (monitor only) === + H_loss = H( + tf.reshape(u_model, shape=[Nt, Nx]), + tf.reshape(u_x, shape=[Nt, Nx]), + dx + ) + + return ( + loss, + loss_type, + pde_loss_H1, + data_fitting_loss_0, + data_fitting_loss_l_r, + H_loss, + ) + + +# Create the PINN model +model = PINNModel() +model.summary() + +epochs = 5000 # 1000, 2000, 5000 +# # Compile the model +# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), +# loss=lambda y_true, y_pred: custom_loss([x_train, t_train, theta_train], model)[1]) + +# Create the optimizer with a smaller learning rate +# learning_rate = 1e-3 # 1e-4 +# learning_rate_type = 'constant' +# learning_rate = tf.keras.optimizers.schedules.PiecewiseConstantDecay([10, 100], [1e-1, 5e-2, 1e-2]) #OK +# learning_rate = tf.keras.optimizers.schedules.PiecewiseConstantDecay([100, 300], [1e-2, 1e-3, 1e-4]) +# learning_rate = tf.keras.optimizers.schedules.PolynomialDecay( +# initial_learning_rate=1e-3, +# decay_steps=epochs, +# end_learning_rate=1e-5, +# power=2., +# cycle=False, +# name='PolynomialDecay' +# ) +# learning_rate = tf.keras.optimizers.schedules.ExponentialDecay( +# initial_learning_rate=1e-3, +# decay_steps=epochs, # 100 +# decay_rate=0.9, # 0.9 +# staircase=False, +# name='ExponentialDecay' +# ) +# learning_rate_type = 'exponentialDecay' +learning_rate = tf.keras.optimizers.schedules.CosineDecay( + initial_learning_rate=1e-4, + decay_steps=1000, + alpha=0.5, + name='CosineDecay', + warmup_target=None, + warmup_steps=100 +) +learning_rate_type = 'cosineDecay' +# param_values = [delta.numpy()] +trainable = model.trainable_variables +if lambdas.trainable: + trainable += [lambdas] + +if cheb_par.trainable: + trainable += [cheb_par] + +# optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, epsilon=1e-08, amsgrad=True) +# optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9, nesterov=True) +optimizer = tf.keras.optimizers.AdamW(learning_rate=learning_rate, beta_1=0.8, beta_2=0.9, epsilon=1e-07) +# optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate, rho=0.9, momentum=0.0, epsilon=1e-07, centered=False) + +# Training loop +losses = [] +pde_losses = [] +data_fitting_losses_0 = [] +data_fitting_losses_l_r = [] +delta_gradients = [] +# S_losses_min = [] +# S_losses_max = [] +H_losses_min = [] +H_losses_max = [] +H_losses_mean = [] +H_losses_std = [] +H_losses_abs_error = [] +H_losses_rel_error = [] +lambdas_values = [] +lambdas_values.append(lambdas.numpy()) +cheb_par_values = [] +cheb_par_values.append(cheb_par.numpy()) + +# Convert data to tensor because tf.GradientTape() can only watch tensor and not numpy arrays +x_train = tf.convert_to_tensor(x_train) +t_train = tf.convert_to_tensor(t_train) +x_grid, t_grid = np.meshgrid(x.flatten(), t.flatten()) +inputs = tf.convert_to_tensor(np.vstack([x_grid.flatten(), t_grid.flatten()]).T) +stop = False +# Start timer +t0 = time() +for epoch in range(epochs): + if not stop: + # print("# STARTING EPOCH", epoch + 1) + + with tf.GradientTape() as tape: + loss, loss_type, pde_loss, data_fitting_loss_0, data_fitting_loss_l_r, H_loss = custom_loss(inputs, model, epoch) + + # print("Computing gradients") + gradients = tape.gradient(loss, trainable) + # print(gradients[-1]) + # print("Applying gradients") + optimizer.apply_gradients(zip(gradients, trainable)) + # print("Appending losses") + losses.append(loss.numpy()) + pde_losses.append(pde_loss.numpy()) + data_fitting_losses_0.append(data_fitting_loss_0.numpy()) + data_fitting_losses_l_r.append(data_fitting_loss_l_r.numpy()) + H_loss_min = tf.reduce_min(H_loss) + H_loss_max = tf.reduce_max(H_loss) + H_losses_min.append(H_loss_min.numpy()) + H_losses_max.append(H_loss_max.numpy()) + H_loss_mean = tf.reduce_mean(H_loss) + H_loss_std = tf.math.reduce_std(H_loss) + H_losses_mean.append(H_loss_mean.numpy()) + H_losses_std.append(H_loss_std.numpy()) + + H0 = H(u_0(x_grid), u_0_x(x_grid), dx) # H0 = H_loss[0].numpy() + Hf = H_loss.numpy() + H_abs_error = tf.abs(Hf - H0) + H_losses_abs_error.append(tf.reduce_max(H_abs_error).numpy()) + H_rel_error = H_abs_error / tf.abs((H0 + 1e-16)) + H_losses_rel_error.append(H_rel_error[-1].numpy()) + + # # Print S_loss, H_loss + # print(f"S_loss at epoch {epoch + 1}: {S_loss.numpy()}") + # print(f"H_loss at epoch {epoch + 1}: {H_loss.numpy()}") + + if len(losses) > 1 and not lambdas.trainable and do_training: + # SoftAdaptive weights update + # num1 = tf.math.exp(tf.experimental.numpy.cbrt(pde_losses[-1] - pde_losses[-2])) + # num2 = tf.math.exp(tf.experimental.numpy.cbrt(data_fitting_losses_0[-1] - data_fitting_losses_0[-2])) + # num3 = tf.math.exp(tf.experimental.numpy.cbrt(data_fitting_losses_l_r[-1] - data_fitting_losses_l_r[-2])) + num1 = tf.math.exp((pde_losses[-1] - pde_losses[-2])) + num2 = tf.math.exp((data_fitting_losses_0[-1] - data_fitting_losses_0[-2])) + num3 = tf.math.exp((data_fitting_losses_l_r[-1] - data_fitting_losses_l_r[-2])) + den = num1 + num2 + num3 + + new_lambdas = tf.stack([num1 / den, num2 / den, num3 / den]) + lambdas.assign(new_lambdas) + lambdas_values.append((lambdas).numpy()) + + if cheb_par.trainable: + cheb_par_values.append(cheb_par.numpy()) + + del tape + + if epoch % 100 == 0 or epoch == epochs - 1: + print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.numpy()}") + + # if len(losses) > 2 and np.abs(losses[-1] - losses[-2]) / np.abs(losses[-2]) < 1e-8: + # stop = True + +print(f"Loss type: {loss_type}") +print(f"Hamiltonian mean: {H_loss_mean.numpy()}") +print(f"Hamiltonian standard deviation: {H_loss_std.numpy()}") +print(f"Hamiltonian maximum: {H_loss_max.numpy()}") +print(f"Hamiltonian minimum: {H_loss_min.numpy()}") +# print(f"Hamiltonian absolute error: {H_abs_error.numpy()}") +# print(f"Hamiltonian relative error: {H_rel_error.numpy()}") +print(f"Hamitonian relative error: {H_rel_error[-1].numpy()}") +# Print computation time +print('\nComputation time: {} seconds'.format(time() - t0)) + +import pandas as pd + +df = pd.DataFrame() +df['epoch'] = range(1, epochs + 1) + +def generate_save_fig_string(type, epochs, learning_rate_type, loss_type): + """ + Generates a string for saving figures that includes the number of epochs and the type of learning rate. + + Args: + epochs (int): The number of epochs. + learning_rate_type (str): The type of learning rate. + + Returns: + str: The generated string for saving figures. + """ + return f"./results/{type}_epochs_{epochs}_lr_{learning_rate_type}_{loss_type}.png" + +# Plot the loss history +plt.semilogy(losses, label='Total Loss') +plt.semilogy(pde_losses, label='PDE Loss') +plt.semilogy(data_fitting_losses_0, label='Initial Conditions Loss') +plt.semilogy(data_fitting_losses_l_r, label='Periodic Boundary Conditions Loss') +plt.xlabel('Epoch') +plt.ylabel('Loss') +plt.title('Loss Contributions') +plt.legend() +plt.grid() + +df['total_loss'] = losses +df['pde_loss'] = pde_losses +df['data_fitting_loss_0'] = data_fitting_losses_0 +df['data_fitting_loss_l_r'] = data_fitting_losses_l_r +df['H_loss_min'] = H_losses_min +df['H_loss_max'] = H_losses_max +df['H_loss_mean'] = H_losses_mean +df['H_loss_std'] = H_losses_std +df['H_loss_abs_error'] = H_losses_abs_error +df['H_loss_rel_error'] = H_losses_rel_error +# df['cheb_par'] = cheb_par_values + +df.to_csv('./results/kdv/training_history.csv', index=False) + + +if save_fig: + save_fig_string = generate_save_fig_string('loss', epochs, learning_rate_type, loss_type) + # save png + plt.savefig(save_fig_string, dpi=300) +plt.show() + +# Evaluate the function +x_eval = np.linspace(x_train[0].numpy(), x_train[-1].numpy(), 100).reshape((-1, 1)).astype(np.float32) +t_eval = np.linspace(t_train[0].numpy(), t_train[-1].numpy(), 100).reshape((-1, 1)).astype(np.float32) +inputs_eval = [x_eval, t_eval] + + +# Plot the Hamiltonian over epochs +plt.plot(H_losses_min, label='H_loss_min') +plt.plot(H_losses_max, label='H_loss_max') +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian') +plt.title('Hamiltonian over epochs') +plt.legend() +plt.grid() + +if save_fig: + save_fig_string = generate_save_fig_string('H_loss', epochs, learning_rate_type, loss_type) + # save png + plt.savefig(save_fig_string, dpi=300) +plt.show() + +# Plot the average Hamiltonian over epochs with standard deviation +H_losses_mean = np.array(H_losses_mean) +H_losses_std = np.array(H_losses_std) +H_losses_abs_error = np.array(H_losses_abs_error) +H_losses_rel_error = np.array(H_losses_rel_error) + +plt.plot(H_losses_mean) +plt.fill_between(range(len(H_losses_mean)), H_losses_mean - H_losses_std, H_losses_mean + H_losses_std, alpha=0.2) +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian mean') +plt.title('Hamiltonian mean over epochs with standard deviation') +# plt.legend() +plt.grid() + +if save_fig: + save_fig_string = generate_save_fig_string('H_loss_mean', epochs, learning_rate_type, loss_type) + # save png + plt.savefig(save_fig_string, dpi=300) +plt.show() + +# Plot the standard deviation of the Hamiltonian over epochs +plt.plot(H_losses_std) +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian std') +plt.title('Hamiltonian standard deviation over epochs') +# plt.legend() +plt.grid() + +if save_fig: + save_fig_string = generate_save_fig_string('H_loss_std', epochs, learning_rate_type, loss_type) + # save png + plt.savefig(save_fig_string, dpi=300) +plt.show() + +# Plot the absolute error of the Hamiltonian over epochs +plt.plot(H_losses_abs_error) +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian absolute error') +plt.title('Hamiltonian absolute error over epochs') +# plt.legend() +plt.grid() + +if save_fig: + save_fig_string = generate_save_fig_string('H_loss_abs_error', epochs, learning_rate_type, loss_type) + # save png + plt.savefig(save_fig_string, dpi=300) +plt.show() + +# Plot the relative error of the Hamiltonian over epochs +plt.plot(H_losses_rel_error) +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian relative error') +plt.title('Hamiltonian relative error over epochs') +# plt.legend() +plt.grid() + +if save_fig: + save_fig_string = generate_save_fig_string('H_loss_rel_error', epochs, learning_rate_type, loss_type) + # save png + plt.savefig(save_fig_string, dpi=300) +plt.show() + + +# Plot the Chebyshev parameter over epochs +if cheb_par.trainable: + plt.plot(tf.sigmoid(cheb_par_values)) + plt.xlabel('Epoch') + plt.ylabel('Chebyshev parameter') + plt.title('Chebyshev parameter over epochs') + plt.grid() + + if save_fig: + save_fig_string = generate_save_fig_string('cheb_par', epochs, learning_rate_type, loss_type) + # save png + plt.savefig(save_fig_string, dpi=300) +plt.show() + +from mpl_toolkits.mplot3d import Axes3D + +# Set up meshgrid +N = 600 +tspace = np.linspace(0, tMax, N + 1) +xspace = np.linspace(xMin, xMax, N + 1) +T, X = np.meshgrid(tspace, xspace) +XTgrid = np.vstack([X.flatten(),T.flatten()]).T + +# Determine predictions of u(t, x) +u_pred = model(tf.cast(XTgrid,DTYPE)) + +# Reshape upred +U = u_pred.numpy().reshape(N+1,N+1) + +# Surface plot of solution u(t,x) +fig = plt.figure(figsize=(9,6)) +ax = fig.add_subplot(111, projection='3d') +ax.plot_surface(X, T, U, cmap='viridis') +ax.view_init(35,35) +ax.set_xlabel('$x$') +ax.set_ylabel('$t$') +ax.set_zlabel('$u_(x,t)$') +ax.set_title('Solution to KdV equation') +ax.set_box_aspect(None, zoom=0.85) + +if save_fig: + save_fig_string = generate_save_fig_string('sol', epochs, learning_rate_type, loss_type) + # save png + plt.savefig(save_fig_string, dpi=300) +plt.show() \ No newline at end of file diff --git a/experiments/structure_preserving_pinns/structurePreservingPINN_kdv_trainableH1_pytorch.py b/experiments/structure_preserving_pinns/structurePreservingPINN_kdv_trainableH1_pytorch.py new file mode 100644 index 0000000..4a028ea --- /dev/null +++ b/experiments/structure_preserving_pinns/structurePreservingPINN_kdv_trainableH1_pytorch.py @@ -0,0 +1,573 @@ +import numpy as np +import torch +import torch.nn as nn +from time import time +import matplotlib.pyplot as plt +from humancompatible.train.dual_optim import ALM + +DTYPE = torch.float32 +torch.set_default_dtype(DTYPE) +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + +type_pde = 1 +if type_pde == 1: + nu, alpha, rho = -0.022**2, -0.5, 0. + xMin, xMax, tMax = 0., 2., 5. +elif type_pde == 2: + nu, alpha, rho = -1., -3., 0. + xMin, xMax, tMax = -20., 20., 100. + +def Nx_from_arch(width, depth, fac=1.5, d_in=2, d_out=1): + Ntheta = (d_in + 1) * width + (depth - 1) * (width * width + width) + d_out * (width + 1) + Ncoll_target = int(Ntheta / fac) + Nx = int(np.sqrt(Ncoll_target)) + Nt = Nx + return Nx, Nt, Ntheta, Ncoll_target + +width, depth = 80, 4 +Nx, Nt, Ntheta, Ncoll = Nx_from_arch(width=width, depth=depth, fac=10.) + +dx = (xMax - xMin) / (Nx - 1) +dt = tMax / (Nt - 1) +h = max(dx, dt) + +lambdas = torch.tensor([1., 1., 1.], dtype=DTYPE, device=device, requires_grad=False) +# do_training = False +cheb_par = torch.tensor(0.5, dtype=DTYPE, device=device, requires_grad=False) + +x = torch.linspace(xMin, xMax, Nx, dtype=DTYPE, device=device).reshape(-1, 1) +t = torch.linspace(0, tMax, Nt, dtype=DTYPE, device=device).reshape(-1, 1) +x_train = x.reshape(-1, 1) +t_train = t.reshape(-1, 1) +# t_grid, x_grid = torch.meshgrid(t.flatten(), x.flatten(), indexing='ij') +x_grid, t_grid = torch.meshgrid(x.flatten(), t.flatten(), indexing='xy') +inputs = torch.stack([x_grid.flatten(), t_grid.flatten()], dim=1) + +save_fig = True + +def u_0(x): + if type_pde == 1: + return torch.cos(np.pi * x) + elif type_pde == 2: + return 6. / (torch.cosh(x)**2) + +def u_0_x(x): + if type_pde == 1: + return -np.pi * torch.sin(np.pi * x) + elif type_pde == 2: + return -12. * torch.sinh(x) / (torch.cosh(x)**3) + +def periodic_bc(model, x, t): + xL = torch.full_like(x, xMin) + xR = torch.full_like(x, xMax) + uL = model(torch.cat([xL, t], dim=1)) + uR = model(torch.cat([xR, t], dim=1)) + return torch.mean((uL - uR)**2) + +def V(u): + return alpha * (u**3) / 3 + (rho * u**2) / 2 + +def kdv_density(u, u_x): + return V(u) - nu * torch.pow(u_x, 2) / 2 + + +def H(u, u_x, dx, density_fn=kdv_density, axis=-1): + """ + Boole’s rule (8th order) along `axis` for a uniform grid with spacing dx. + Requires (N-1) % 4 == 0. Otherwise applies Boole on the largest valid + prefix and trapezoid rule on the remainder. + """ + f = density_fn(u, u_x) # [..., N] + n = f.shape[axis] + + # Normalize negative axis + axis = axis % f.ndim + + def _trap_rem(rem): + """ + rem: contiguous tail segment integrated with trapezoid rule + """ + left = rem.narrow(-1, 0, rem.shape[-1] - 1) + right = rem.narrow(-1, 1, rem.shape[-1] - 1) + return torch.sum(0.5 * (left + right), dim=-1) * dx + + # Degenerate case + if n <= 1: + return torch.sum(f, dim=axis) * dx + + # Largest prefix satisfying (n1 - 1) % 4 == 0 + n1 = n - ((n - 1) % 4) + + # Boole constant + c = (2.0 * dx) / 45.0 + + # Build index slices + idx_prefix = torch.arange(n1, device=f.device) + + f0 = torch.index_select(f, axis, idx_prefix[0::4]) + f1 = torch.index_select(f, axis, idx_prefix[1::4]) + f2 = torch.index_select(f, axis, idx_prefix[2::4]) + f3 = torch.index_select(f, axis, idx_prefix[3::4]) + f4 = torch.index_select(f, axis, idx_prefix[4::4]) + + # Weighted Boole sum + s = 7.0 * torch.sum(f0, dim=axis) + s += 32.0 * torch.sum(f1, dim=axis) + s += 12.0 * torch.sum(f2, dim=axis) + s += 32.0 * torch.sum(f3, dim=axis) + s += 7.0 * torch.sum(f4, dim=axis) + + boole_part = c * s + + # No remainder + if n1 == n: + return boole_part + + # Tail remainder + rem_idx = torch.arange(n1 - 1, n, device=f.device) + rem = torch.index_select(f, axis, rem_idx) + + tail = _trap_rem(rem) + + return boole_part + tail + +def linear_loss_function(tensors, weights): + stacked = torch.stack(tensors) + weights = weights / torch.sum(weights) + loss = torch.sum(weights * stacked) + return loss, 'ls' + +def chebyshev_loss_function(tensors, weights): + stacked = torch.stack(tensors) + loss = torch.max(weights * stacked) + return loss, 'cs' + +def sigmoid_centered(x): + return 2 * torch.sigmoid(x) - 1 + +class PINNModel(nn.Module): + def __init__(self, num_hidden_layers=depth, num_neurons_per_layer=width): + super().__init__() + layers = [] + in_dim = 2 + for _ in range(num_hidden_layers): + layers.append(nn.Linear(in_dim, num_neurons_per_layer)) + layers.append(nn.Tanh()) + in_dim = num_neurons_per_layer + layers.append(nn.Linear(in_dim, 1)) + self.net = nn.Sequential(*layers) + + def forward(self, x): + return self.net(x) + + +def lambda_grad(epoch, + start=1000, + lam_max=1e0, + kappa=1e-3): + epoch = torch.as_tensor(epoch, dtype=torch.float32) + return lam_max * ( + 1.0 - torch.exp(-kappa * torch.clamp(epoch - start, min=0.0)) + ) + + +def grad_L2_fft_batch(r, L): + """ + r : shape (Nt, Nx) + returns : shape (Nt,) + """ + r = r.to(torch.complex64) + Nx = r.shape[-1] + + device = r.device + + k_pos = torch.arange(0, Nx // 2 + 1, + dtype=torch.float32, + device=device) + + k_neg = torch.arange(-Nx // 2 + 1, 0, + dtype=torch.float32, + device=device) + + k = torch.cat([k_pos, k_neg], dim=0) + k = (2.0 * np.pi / L) * k + k = k.to(torch.complex64) + + r_hat = torch.fft.fft(r, dim=-1) + + grad_energy = torch.sum(torch.abs(1j * k * r_hat) ** 2, dim=-1) + + dx = L / float(Nx) + + return torch.real(grad_energy) * dx + + +def H1_norm_fft_batch(r, L): + """ + Compute ||r||_{H^1}^2 for each time slice. + + r : shape (Nt, Nx) + returns : shape (Nt,) + """ + r = r.to(torch.complex64) + Nx = r.shape[-1] + + device = r.device + + k_pos = torch.arange(0, Nx // 2 + 1, + dtype=torch.float32, + device=device) + + k_neg = torch.arange(-Nx // 2 + 1, 0, + dtype=torch.float32, + device=device) + + k = torch.cat([k_pos, k_neg], dim=0) + k = (2.0 * np.pi / L) * k + k = k.to(torch.complex64) + + r_hat = torch.fft.fft(r, dim=-1) + + weight = 1.0 + torch.abs(k) ** 2 + + H1_sq = torch.sum(weight * torch.abs(r_hat) ** 2, dim=-1) + + dx = L / float(Nx) + + return torch.real(H1_sq) * dx + + +def custom_loss(inputs, model, epoch): + """ + Assumes the following globals/functions exist: + + alpha, rho, nu + Nt, Nx + dx, xMin, xMax + lambdas + + u_0(...) + periodic_bc(...) + linear_loss_function(...) + H(...) + """ + + x = inputs[:, 0:1].clone().detach().requires_grad_(True) + t = inputs[:, 1:2].clone().detach().requires_grad_(True) + + xt = torch.cat([x, t], dim=1) + + # Forward pass + u_model = model(xt) + + # First derivatives + u_x = torch.autograd.grad( + u_model, + x, + grad_outputs=torch.ones_like(u_model), + create_graph=True, + retain_graph=True, + )[0] + + u_t = torch.autograd.grad( + u_model, + t, + grad_outputs=torch.ones_like(u_model), + create_graph=True, + retain_graph=True, + )[0] + + # Second derivative + u_xx = torch.autograd.grad( + u_x, + x, + grad_outputs=torch.ones_like(u_x), + create_graph=True, + retain_graph=True, + )[0] + + # Third derivative + u_xxx = torch.autograd.grad( + u_xx, + x, + grad_outputs=torch.ones_like(u_xx), + create_graph=True, + retain_graph=True, + )[0] + + # PDE residual + u_squared_x = 2 * u_model * u_x + + r = ( + u_t + - alpha * u_squared_x + - rho * u_x + - nu * u_xxx + ) + + # === PDE residual loss === + pde_loss_L2 = torch.mean(r ** 2) + + r_grid = r.reshape(Nt, Nx) + + L = xMax - xMin + + pde_loss_grad = torch.mean( + grad_L2_fft_batch(r_grid, L) + ) + + # mesh-scaled stabilization parameter + lam = 0.01 * (dx ** 2) * min(1.0, float(epoch) / 1000.0) + + pde_loss_H1 = pde_loss_L2 + lam * pde_loss_grad + + # === Initial condition === + ic_mask = torch.where(torch.abs(t) < 1e-6)[0] + + x_ic = x[ic_mask] + + u_ic = u_0(x_ic) + + t_ic = torch.zeros_like(x_ic) + + u_ic_pred = model(torch.cat([x_ic, t_ic], dim=1)) + + data_fitting_loss_0 = torch.mean( + (u_ic_pred - u_ic) ** 2 + ) + + # === Periodic BC === + data_fitting_loss_l_r = periodic_bc(model, x, t) + + # === Aggregated loss === + loss, loss_type = linear_loss_function( + [ + pde_loss_H1, + data_fitting_loss_0, + data_fitting_loss_l_r, + ], + lambdas + ) + + # === Hamiltonian (monitor only) === + # breakpoint() + H_loss = H( + u_model.reshape(Nt, Nx), + u_x.reshape(Nt, Nx), + dx + ) + + return ( + loss, + loss_type, + pde_loss_H1, + data_fitting_loss_0, + data_fitting_loss_l_r, + H_loss, + ) + +def lagrangian_loss(inputs, model, dual_opt): + x, t = inputs[:, 0:1], inputs[:, 1:2] + x.requires_grad_(True) + t.requires_grad_(True) + + u_model = model(torch.cat([x, t], dim=1)) + + u_t = torch.autograd.grad(u_model.sum(), t, create_graph=True)[0] + u_x = torch.autograd.grad(u_model.sum(), x, create_graph=True)[0] + + u_xx = torch.autograd.grad(u_x.sum(), x, create_graph=True)[0] + u_xxx = torch.autograd.grad(u_xx.sum(), x, create_graph=True)[0] + + u_squared_x = 2 * u_model * u_x + r = u_t - alpha * u_squared_x - rho * u_x - nu * u_xxx + + pde_loss_L2 = torch.mean(torch.square(r)) + + # constraint: pde_loss_L2 = 0 or <= eps + + ic_mask = torch.abs(t) < 1e-6 + x_ic = x[ic_mask[:, 0]] + u_ic = u_0(x_ic) + t_ic = torch.zeros_like(x_ic) + u_ic_pred = model(torch.cat([x_ic, t_ic], axis=1)) + data_fitting_loss_0 = torch.mean((u_ic_pred - u_ic) ** 2) # IC loss + + data_fitting_loss_l_r = periodic_bc(model, x, t) # BC loss + + # ask: what's H_loss here, and should we use it in a constraint + H_loss = H(u_model.reshape(Nt, Nx), u_x.reshape(Nt, Nx), dx) + + data_fitting_loss = 0.5 * data_fitting_loss_0 + 0.5 * data_fitting_loss_l_r + + lagr = dual_opt.forward_update(data_fitting_loss, pde_loss_L2.unsqueeze(0)) + loss_type = 'ls' + + return lagr, loss_type, pde_loss_L2, data_fitting_loss_0, data_fitting_loss_l_r, H_loss + + + +model = PINNModel().to(device) +epochs = 5000 + +optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4) + +dual_opt = ALM(m=1, lr=1e-3, dual_range=(0.,10.), device=device) + +lr_schedule = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts( + optimizer, + T_0=100, + T_mult=1, + eta_min=0.5*1e-4 +) + +losses, pde_losses, data_losses_0, bc_losses = [], [], [], [] +H_losses_min, H_losses_max, H_losses_mean, H_losses_std = [], [], [], [] +H_losses_abs_error, H_losses_rel_error = [], [] +t0 = time() + +for epoch in range(epochs): + optimizer.zero_grad() + loss, loss_type, pde_loss, data_loss_0, bc_loss, H_loss = custom_loss(inputs, model, epoch) + loss.backward() + optimizer.step() + lr_schedule.step() + + with torch.no_grad(): + losses.append(loss.item()) + pde_losses.append(pde_loss.item()) + data_losses_0.append(data_loss_0.item()) + bc_losses.append(bc_loss.item()) + + H_loss_min = torch.min(H_loss).item() + H_loss_max = torch.max(H_loss).item() + H_losses_min.append(H_loss_min) + H_losses_max.append(H_loss_max) + H_loss_mean = torch.mean(H_loss).item() + H_loss_std = torch.std(H_loss).item() + H_losses_mean.append(H_loss_mean) + H_losses_std.append(H_loss_std) + + H0 = H(u_0(x_grid.flatten().reshape(-1, 1)).reshape(Nt, Nx), u_0_x(x_grid.flatten().reshape(-1, 1)).reshape(Nt, Nx), dx) + Hf = H_loss.detach() + # breakpoint() + H_abs_error = torch.abs(Hf - H0) + H_losses_abs_error.append(torch.max(H_abs_error).item()) + H_rel_error = H_abs_error / (torch.abs(H0) + 1e-16) + if isinstance(H_rel_error, torch.Tensor): + H_rel_error = H_rel_error.item() if H_rel_error.numel() == 1 else H_rel_error.max().item() + H_losses_rel_error.append(H_rel_error) + + if epoch > 1: + # SoftAdaptive weights update + # num1 = tf.math.exp(tf.experimental.numpy.cbrt(pde_losses[-1] - pde_losses[-2])) + # num2 = tf.math.exp(tf.experimental.numpy.cbrt(data_fitting_losses_0[-1] - data_fitting_losses_0[-2])) + # num3 = tf.math.exp(tf.experimental.numpy.cbrt(data_fitting_losses_l_r[-1] - data_fitting_losses_l_r[-2])) + num1 = np.exp((pde_losses[-1] - pde_losses[-2])) + num2 = np.exp((data_losses_0[-1] - data_losses_0[-2])) + num3 = np.exp((bc_losses[-1] - bc_losses[-2])) + den = num1 + num2 + num3 + + new_lambdas = torch.tensor([num1 / den, num2 / den, num3 / den]) + lambdas = new_lambdas + + if epoch % 100 == 0 or epoch == epochs - 1: + print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.6e}") + +print(f'\nComputation time: {time() - t0:.2f}s') +print(f"Loss type: {loss_type}") +print(f"Hamiltonian mean: {H_loss_mean}") +print(f"Hamiltonian std: {H_loss_std}") +print(f"Hamiltonian max: {H_loss_max}") +print(f"Hamiltonian min: {H_loss_min}") + +plt.figure(figsize=(10, 6)) +plt.semilogy(losses, label='Total Loss') +plt.semilogy(pde_losses, label='PDE Loss') +plt.semilogy(data_losses_0, label='Initial Conditions Loss') +plt.semilogy(bc_losses, label='Periodic Boundary Conditions Loss') +plt.xlabel('Epoch') +plt.ylabel('Loss') +plt.title('Loss Contributions') +plt.legend() +plt.grid() +plt.savefig('./results/kdv_loss.png', dpi=300) if save_fig else None +plt.show() + +plt.figure(figsize=(10, 6)) +plt.plot(H_losses_min, label='H_loss_min') +plt.plot(H_losses_max, label='H_loss_max') +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian') +plt.title('Hamiltonian over epochs') +plt.legend() +plt.grid() +plt.savefig('./results/kdv_H_loss.png', dpi=300) if save_fig else None +plt.show() + +H_losses_mean_arr = np.array(H_losses_mean) +H_losses_std_arr = np.array(H_losses_std) +plt.figure(figsize=(10, 6)) +plt.plot(H_losses_mean_arr) +plt.fill_between(range(len(H_losses_mean_arr)), H_losses_mean_arr - H_losses_std_arr, H_losses_mean_arr + H_losses_std_arr, alpha=0.2) +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian mean') +plt.title('Hamiltonian mean over epochs with standard deviation') +plt.grid() +plt.savefig('./results/kdv_H_loss_mean.png', dpi=300) if save_fig else None +plt.show() + +plt.figure(figsize=(10, 6)) +plt.plot(H_losses_std_arr) +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian std') +plt.title('Hamiltonian standard deviation over epochs') +plt.grid() +plt.savefig('./results/kdv_H_loss_std.png', dpi=300) if save_fig else None +plt.show() + +H_losses_abs_error = np.array(H_losses_abs_error) +plt.figure(figsize=(10, 6)) +plt.plot(H_losses_abs_error) +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian absolute error') +plt.title('Hamiltonian absolute error over epochs') +plt.grid() +plt.savefig('./results/kdv_H_loss_abs_error.png', dpi=300) if save_fig else None +plt.show() + +H_losses_rel_error = np.array(H_losses_rel_error) +plt.figure(figsize=(10, 6)) +plt.plot(H_losses_rel_error) +plt.xlabel('Epoch') +plt.ylabel('Hamiltonian relative error') +plt.title('Hamiltonian relative error over epochs') +plt.grid() +plt.savefig('./results/kdv_H_loss_rel_error.png', dpi=300) if save_fig else None +plt.show() + +N = 600 +tspace = torch.linspace(0, tMax, N + 1, dtype=DTYPE, device=device) +xspace = torch.linspace(xMin, xMax, N + 1, dtype=DTYPE, device=device) +T_grid, X_grid = torch.meshgrid(tspace, xspace, indexing='ij') +XTgrid = torch.stack([X_grid.flatten(), T_grid.flatten()], dim=1) + +with torch.no_grad(): + u_pred = model(XTgrid) +U = u_pred.reshape(N+1, N+1) + +X_np = X_grid.cpu().numpy() +T_np = T_grid.cpu().numpy() +U_np = U.cpu().numpy() + +from mpl_toolkits.mplot3d import Axes3D +fig = plt.figure(figsize=(9, 6)) +ax = fig.add_subplot(111, projection='3d') +ax.plot_surface(X_np, T_np, U_np, cmap='viridis') +ax.set_xlabel('$x$') +ax.set_ylabel('$t$') +ax.set_zlabel('$u(x,t)$') +ax.set_title('KdV equation') +ax.set_box_aspect(None, zoom=0.85) +plt.savefig('./results/kdv_solution.png', dpi=300) if save_fig else None +plt.show() \ No newline at end of file