"""
Multivariate derivative computation for pydelt.
This module provides classes for computing multivariate derivatives including:
- Gradient (∇f): For scalar functions of multiple variables
- Jacobian (∂f/∂x): For vector-valued functions
- Hessian (∂²f/∂x²): Second derivatives for scalar functions
- Laplacian (∇²f): Trace of Hessian for scalar functions
The module supports both traditional interpolation methods and neural networks
with automatic differentiation for superior performance in high dimensions.
"""
import numpy as np
from typing import List, Tuple, Dict, Union, Optional, Callable, Any
from .interpolation import BaseInterpolator, SplineInterpolator, LlaInterpolator
import warnings
try:
import torch
import torch.nn as nn
PYTORCH_AVAILABLE = True
except ImportError:
PYTORCH_AVAILABLE = False
try:
import tensorflow as tf
TENSORFLOW_AVAILABLE = True
except ImportError:
TENSORFLOW_AVAILABLE = False
[docs]
class MultivariateDerivatives:
"""
Compute multivariate derivatives using traditional interpolation methods.
This class provides methods to compute gradient, Jacobian, Hessian, and Laplacian
for multivariate functions using fitted interpolators. It works by fitting separate
1D interpolators for each output-input dimension pair and computing derivatives
along each dimension independently.
Key Features:
- Gradient computation for scalar functions
- Jacobian matrix computation for vector-valued functions
- Hessian matrix computation for scalar functions (diagonal elements only)
- Laplacian computation for scalar functions
- Support for any interpolator from the pydelt library
Limitations:
- Mixed partial derivatives are approximated as zero
- Assumes separable dependencies between input dimensions
- Best suited for functions where each output depends primarily on one input
- **Critical Point Smoothing**: Interpolation methods smooth out sharp mathematical
features, leading to non-zero gradients at points where they should be zero
- **Boundary Effects**: Edge artifacts can distort derivative calculations
- **Regularization Impact**: Smoothing parameters blur sharp transitions and critical points
- **Finite Sampling**: Cannot capture infinitely sharp transitions or discontinuities
**Example of Critical Point Issue:**
For f(x,y) = (x-y)², the mathematical gradient is zero along x=y line,
especially at corners like (-3,-3) and (3,3). However, numerical interpolation
will give non-zero gradients everywhere due to smoothing effects.
**Mitigation Strategies:**
- Use higher resolution sampling near critical points
- Reduce smoothing parameters (but beware of overfitting)
- Validate against analytical solutions when available
- Consider neural network methods with automatic differentiation for exact derivatives
For exact mixed partial derivatives and better handling of critical points,
consider using NeuralNetworkMultivariateDerivatives with automatic differentiation.
Example:
>>> from pydelt.interpolation import SplineInterpolator
>>> mv = MultivariateDerivatives(SplineInterpolator, smoothing=0.1)
>>> mv.fit(input_data, output_data)
>>> gradient_func = mv.gradient()
>>> grad = gradient_func(test_points)
Args:
interpolator_class: The interpolation class to use (e.g., SplineInterpolator, LlaInterpolator)
**interpolator_kwargs: Keyword arguments to pass to the interpolator constructor
"""
def __init__(self, interpolator_class: type = SplineInterpolator, **interpolator_kwargs):
self.interpolator_class = interpolator_class
self.interpolator_kwargs = interpolator_kwargs
self.interpolators = None
self.input_data = None
self.output_data = None
self.n_inputs = None
self.n_outputs = None
self.fitted = False
[docs]
def fit(self, input_data: np.ndarray, output_data: np.ndarray):
"""
Fit interpolators for multivariate derivative computation.
This method fits separate 1D interpolators for each output-input dimension pair.
For each combination of output dimension i and input dimension j, an interpolator
is fitted using input_data[:, j] as x-values and output_data[:, i] as y-values.
The data is automatically sorted and duplicate x-values are handled by averaging
the corresponding y-values to ensure proper interpolation.
Args:
input_data: Input data of shape (n_samples, n_input_dims)
Must be 2D array where each row is a sample and each column
is an input dimension.
output_data: Output data of shape (n_samples,) for scalar functions or
(n_samples, n_output_dims) for vector-valued functions.
For scalar functions, can be 1D array.
Returns:
Self for method chaining
Raises:
ValueError: If input_data is not 2D or if input/output sample counts don't match
Note:
After fitting, the interpolators are stored in self.interpolators as a nested list
where interpolators[i][j] contains the interpolator for output i, input j.
"""
input_data = np.asarray(input_data)
output_data = np.asarray(output_data)
# Handle 1D input data by reshaping to 2D
if input_data.ndim == 1:
input_data = input_data.reshape(-1, 1)
elif input_data.ndim != 2:
raise ValueError(f"Input data must be 1D or 2D. Got shape {input_data.shape}")
self.n_samples, self.n_inputs = input_data.shape
# Handle scalar vs vector output
if output_data.ndim == 1:
self.n_outputs = 1
output_data = output_data.reshape(-1, 1)
else:
self.n_outputs = output_data.shape[1]
if output_data.shape[0] != self.n_samples:
raise ValueError(f"Input and output data must have same number of samples. "
f"Got {self.n_samples} and {output_data.shape[0]}")
# Store data for derivative computation
self.input_data = input_data.copy()
self.output_data = output_data.copy()
# Create interpolators for each output-input pair
self.interpolators = []
for output_dim in range(self.n_outputs):
output_interpolators = []
for input_dim in range(self.n_inputs):
# Create interpolator for this output-input pair
interpolator = self.interpolator_class(**self.interpolator_kwargs)
# Fit interpolator using input dimension as x and output as y
# Sort by input dimension to ensure proper interpolation
x_values = input_data[:, input_dim]
y_values = output_data[:, output_dim]
# Sort data by x_values for proper interpolation
sort_indices = np.argsort(x_values)
x_sorted = x_values[sort_indices]
y_sorted = y_values[sort_indices]
# Remove duplicate x values by averaging y values
unique_x, inverse_indices = np.unique(x_sorted, return_inverse=True)
unique_y = np.zeros_like(unique_x)
for i in range(len(unique_x)):
mask = inverse_indices == i
unique_y[i] = np.mean(y_sorted[mask])
try:
interpolator.fit(unique_x, unique_y)
output_interpolators.append(interpolator)
except Exception as e:
warnings.warn(f"Failed to fit interpolator for output {output_dim}, input {input_dim}: {e}")
# Create a dummy interpolator that returns zeros
class DummyInterpolator:
def differentiate(self, order=1):
def dummy_func(x):
return np.zeros_like(x)
return dummy_func
output_interpolators.append(DummyInterpolator())
self.interpolators.append(output_interpolators)
self.fitted = True
return self
[docs]
def gradient(self, eval_points: Optional[np.ndarray] = None) -> Callable:
"""
Compute the gradient (∇f) for scalar functions.
The gradient is computed by fitting separate 1D interpolators for each input dimension
and evaluating their first derivatives. This approach works well for functions where
each output depends primarily on one input dimension.
Args:
eval_points: Points at which to evaluate the gradient. If None, uses training points.
Returns:
Callable function that takes input points and returns gradient vectors.
For single evaluation point:
- Input: array of shape (n_input_dims,) or (1, n_input_dims)
- Output: array of shape (n_input_dims,)
For multiple evaluation points:
- Input: array of shape (n_points, n_input_dims)
- Output: array of shape (n_points, n_input_dims)
Raises:
ValueError: If the function is not scalar (n_outputs != 1)
RuntimeError: If the model has not been fitted
Note:
This method assumes that partial derivatives can be approximated by fitting
1D interpolators along each input dimension independently. Mixed partial
derivatives are not computed with this approach.
**Critical Point Warning:**
Numerical interpolation smooths out sharp mathematical features. For functions
with critical points (where gradient should be zero), the computed gradient
may be non-zero due to smoothing effects. Always validate against analytical
solutions when possible, especially near minima, maxima, and saddle points.
"""
if not self.fitted:
raise RuntimeError("MultivariateDerivatives must be fit before computing gradient.")
if self.n_outputs != 1:
raise ValueError(f"Gradient is only defined for scalar functions. Got {self.n_outputs} outputs.")
def gradient_func(query_points: np.ndarray) -> np.ndarray:
"""
Evaluate gradient at query points.
Args:
query_points: Points of shape (n_points, n_input_dims)
Returns:
Gradient vectors of shape (n_points, n_input_dims)
"""
query_points = np.asarray(query_points)
if query_points.ndim == 1:
query_points = query_points.reshape(1, -1)
n_points = query_points.shape[0]
gradients = np.zeros((n_points, self.n_inputs))
# Compute partial derivative with respect to each input dimension
for input_dim in range(self.n_inputs):
interpolator = self.interpolators[0][input_dim] # First (and only) output
derivative_func = interpolator.differentiate(order=1)
# Evaluate derivative at the corresponding input dimension
input_values = query_points[:, input_dim]
try:
partial_derivatives = derivative_func(input_values)
# Handle scalar output from derivative function
if np.isscalar(partial_derivatives):
partial_derivatives = np.array([partial_derivatives])
gradients[:, input_dim] = partial_derivatives
except Exception as e:
warnings.warn(f"Could not compute partial derivative for dimension {input_dim}: {e}")
gradients[:, input_dim] = 0.0
# For single point, return shape (n_inputs,), for multiple points (n_points, n_inputs)
return gradients[0] if n_points == 1 else gradients
return gradient_func
[docs]
def jacobian(self, eval_points: Optional[np.ndarray] = None) -> Callable:
"""
Compute the Jacobian matrix (∂f/∂x) for vector-valued functions.
The Jacobian matrix contains all first-order partial derivatives of a vector-valued
function. Element (i,j) represents ∂f_i/∂x_j. This is computed by fitting separate
1D interpolators for each output-input dimension pair.
Args:
eval_points: Points at which to evaluate the Jacobian. If None, uses training points.
Returns:
Callable function that takes input points and returns Jacobian matrices.
For single evaluation point:
- Input: array of shape (n_input_dims,) or (1, n_input_dims)
- Output: array of shape (n_outputs, n_inputs)
For multiple evaluation points:
- Input: array of shape (n_points, n_input_dims)
- Output: array of shape (n_points, n_outputs, n_inputs)
Raises:
RuntimeError: If the model has not been fitted
Note:
For scalar functions (n_outputs=1), the Jacobian is equivalent to the gradient
transposed. Mixed partial derivatives are not computed with traditional
interpolation methods.
"""
if not self.fitted:
raise RuntimeError("MultivariateDerivatives must be fit before computing Jacobian.")
def jacobian_func(query_points: np.ndarray) -> np.ndarray:
"""
Evaluate Jacobian at query points.
Args:
query_points: Points of shape (n_points, n_input_dims)
Returns:
Jacobian matrices of shape (n_points, n_outputs, n_inputs)
"""
query_points = np.asarray(query_points)
if query_points.ndim == 1:
query_points = query_points.reshape(1, -1)
n_points = query_points.shape[0]
jacobians = np.zeros((n_points, self.n_outputs, self.n_inputs))
# Compute partial derivatives for each output with respect to each input
for output_dim in range(self.n_outputs):
for input_dim in range(self.n_inputs):
interpolator = self.interpolators[output_dim][input_dim]
derivative_func = interpolator.differentiate(order=1)
# Evaluate derivative at the corresponding input dimension
input_values = query_points[:, input_dim]
try:
partial_derivatives = derivative_func(input_values)
# Handle scalar output from derivative function
if np.isscalar(partial_derivatives):
partial_derivatives = np.array([partial_derivatives])
jacobians[:, output_dim, input_dim] = partial_derivatives
except Exception as e:
warnings.warn(f"Could not compute partial derivative for output {output_dim}, input {input_dim}: {e}")
jacobians[:, output_dim, input_dim] = 0.0
# For single point, return shape (n_outputs, n_inputs), for multiple points (n_points, n_outputs, n_inputs)
return jacobians[0] if n_points == 1 else jacobians
return jacobian_func
[docs]
def hessian(self, eval_points: Optional[np.ndarray] = None) -> Callable:
"""
Compute the Hessian matrix (∂²f/∂x²) for scalar functions.
The Hessian matrix contains all second-order partial derivatives of a scalar function.
Element (i,j) represents ∂²f/∂x_i∂x_j. For traditional interpolation methods, only
diagonal elements (pure second derivatives) are computed; off-diagonal elements
(mixed partials) are approximated as zero.
Args:
eval_points: Points at which to evaluate the Hessian. If None, uses training points.
Returns:
Callable function that takes input points and returns Hessian matrices.
For single evaluation point:
- Input: array of shape (n_input_dims,) or (1, n_input_dims)
- Output: array of shape (n_inputs, n_inputs)
For multiple evaluation points:
- Input: array of shape (n_points, n_input_dims)
- Output: array of shape (n_points, n_inputs, n_inputs)
Raises:
ValueError: If the function is not scalar (n_outputs != 1)
RuntimeError: If the model has not been fitted
Warning:
Traditional interpolation methods cannot compute mixed partial derivatives
(off-diagonal elements). These are set to zero, which may not be accurate
for functions with significant cross-dependencies. Consider using neural
network methods with automatic differentiation for exact mixed partials.
"""
if not self.fitted:
raise RuntimeError("MultivariateDerivatives must be fit before computing Hessian.")
if self.n_outputs != 1:
raise ValueError(f"Hessian is only defined for scalar functions. Got {self.n_outputs} outputs.")
def hessian_func(query_points: np.ndarray) -> np.ndarray:
"""
Evaluate Hessian at query points.
Args:
query_points: Points of shape (n_points, n_input_dims)
Returns:
Hessian matrices of shape (n_points, n_inputs, n_inputs)
"""
query_points = np.asarray(query_points)
if query_points.ndim == 1:
query_points = query_points.reshape(1, -1)
n_points = query_points.shape[0]
hessians = np.zeros((n_points, self.n_inputs, self.n_inputs))
# Compute second partial derivatives (diagonal elements)
for input_dim in range(self.n_inputs):
interpolator = self.interpolators[0][input_dim] # First (and only) output
try:
second_derivative_func = interpolator.differentiate(order=2)
input_values = query_points[:, input_dim]
partial_derivatives = second_derivative_func(input_values)
# Handle scalar output from derivative function
if np.isscalar(partial_derivatives):
partial_derivatives = np.array([partial_derivatives])
hessians[:, input_dim, input_dim] = partial_derivatives
except Exception as e:
warnings.warn(f"Could not compute second derivative for dimension {input_dim}: {e}")
hessians[:, input_dim, input_dim] = 0.0
# Mixed partial derivatives (off-diagonal elements)
# Note: For traditional interpolation methods, mixed partials are approximated as zero
# This is a limitation that neural networks with automatic differentiation can overcome
# For single point, return shape (n_inputs, n_inputs), for multiple points (n_points, n_inputs, n_inputs)
return hessians[0] if n_points == 1 else hessians
return hessian_func
[docs]
def laplacian(self, eval_points: Optional[np.ndarray] = None) -> Callable:
"""
Compute the Laplacian (∇²f = tr(H)) for scalar functions.
The Laplacian is the trace (sum of diagonal elements) of the Hessian matrix,
representing the sum of all pure second partial derivatives: ∇²f = ∂²f/∂x₁² + ∂²f/∂x₂² + ...
This is a scalar measure of the "curvature" of the function.
Args:
eval_points: Points at which to evaluate the Laplacian. If None, uses training points.
Returns:
Callable function that takes input points and returns Laplacian values.
For single evaluation point:
- Input: array of shape (n_input_dims,) or (1, n_input_dims)
- Output: scalar value
For multiple evaluation points:
- Input: array of shape (n_points, n_input_dims)
- Output: array of shape (n_points,)
Raises:
ValueError: If the function is not scalar (n_outputs != 1)
RuntimeError: If the model has not been fitted
Note:
Since mixed partial derivatives are not computed in traditional interpolation
methods, this Laplacian only includes pure second derivatives. For functions
with significant cross-dependencies, consider neural network methods.
"""
if not self.fitted:
raise RuntimeError("MultivariateDerivatives must be fit before computing Laplacian.")
if self.n_outputs != 1:
raise ValueError(f"Laplacian is only defined for scalar functions. Got {self.n_outputs} outputs.")
def laplacian_func(query_points: np.ndarray) -> np.ndarray:
"""
Evaluate Laplacian at query points.
Args:
query_points: Points of shape (n_points, n_input_dims)
Returns:
Laplacian values of shape (n_points,)
"""
query_points = np.asarray(query_points)
if query_points.ndim == 1:
query_points = query_points.reshape(1, -1)
n_points = query_points.shape[0]
laplacians = np.zeros(n_points)
# Laplacian is the trace of the Hessian (sum of diagonal elements)
for input_dim in range(self.n_inputs):
interpolator = self.interpolators[0][input_dim] # First (and only) output
try:
second_derivative_func = interpolator.differentiate(order=2)
input_values = query_points[:, input_dim]
partial_derivatives = second_derivative_func(input_values)
# Handle scalar output from derivative function
if np.isscalar(partial_derivatives):
partial_derivatives = np.array([partial_derivatives])
laplacians += partial_derivatives
except Exception as e:
warnings.warn(f"Could not compute second derivative for dimension {input_dim}: {e}")
# For single point, return scalar, for multiple points return array
return laplacians[0] if n_points == 1 else laplacians
return laplacian_func
[docs]
class NeuralNetworkMultivariateDerivatives:
"""
Neural network-based multivariate derivatives with automatic differentiation.
This class provides true multivariate derivative computation using automatic
differentiation, which is superior to traditional methods for high-dimensional
problems and can compute exact mixed partial derivatives.
Args:
framework: Deep learning framework to use ('pytorch' or 'tensorflow')
hidden_layers: List of hidden layer sizes
epochs: Number of training epochs
learning_rate: Learning rate for optimization
"""
def __init__(self,
framework: str = 'pytorch',
hidden_layers: List[int] = [64, 32],
epochs: int = 1000,
learning_rate: float = 0.01):
if framework == 'pytorch' and not PYTORCH_AVAILABLE:
raise ImportError("PyTorch is required for neural network derivatives. Install with: pip install torch")
elif framework == 'tensorflow' and not TENSORFLOW_AVAILABLE:
raise ImportError("TensorFlow is required for neural network derivatives. Install with: pip install tensorflow")
self.framework = framework
self.hidden_layers = hidden_layers
self.epochs = epochs
self.learning_rate = learning_rate
self.model = None
self.input_data = None
self.output_data = None
self.n_inputs = None
self.n_outputs = None
self.fitted = False
# Normalization parameters
self.input_mean = None
self.input_std = None
self.output_mean = None
self.output_std = None
[docs]
def fit(self, input_data: np.ndarray, output_data: np.ndarray) -> 'NeuralNetworkMultivariateDerivatives':
"""
Fit the neural network model.
Args:
input_data: Input data of shape (n_samples, n_input_dims)
output_data: Output data of shape (n_samples,) for scalar functions or
(n_samples, n_output_dims) for vector functions
Returns:
Self for method chaining
"""
input_data = np.asarray(input_data)
output_data = np.asarray(output_data)
if input_data.ndim == 1:
input_data = input_data.reshape(-1, 1)
if output_data.ndim == 1:
output_data = output_data.reshape(-1, 1)
self.input_data = input_data
self.output_data = output_data
self.n_inputs = input_data.shape[1]
self.n_outputs = output_data.shape[1]
# Normalize data for better training
self.input_mean = np.mean(input_data, axis=0)
self.input_std = np.std(input_data, axis=0) + 1e-8 # Avoid division by zero
self.output_mean = np.mean(output_data, axis=0)
self.output_std = np.std(output_data, axis=0) + 1e-8
input_normalized = (input_data - self.input_mean) / self.input_std
output_normalized = (output_data - self.output_mean) / self.output_std
if self.framework == 'pytorch':
self._fit_pytorch(input_normalized, output_normalized)
elif self.framework == 'tensorflow':
self._fit_tensorflow(input_normalized, output_normalized)
else:
raise ValueError(f"Unknown framework: {self.framework}")
self.fitted = True
return self
def _fit_pytorch(self, input_data: np.ndarray, output_data: np.ndarray):
"""Fit PyTorch model."""
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
# Create neural network architecture
layers = []
prev_size = self.n_inputs
for hidden_size in self.hidden_layers:
layers.append(nn.Linear(prev_size, hidden_size))
layers.append(nn.ReLU())
prev_size = hidden_size
layers.append(nn.Linear(prev_size, self.n_outputs))
self.model = nn.Sequential(*layers)
# Prepare data
X = torch.tensor(input_data, dtype=torch.float32)
y = torch.tensor(output_data, dtype=torch.float32)
dataset = TensorDataset(X, y)
dataloader = DataLoader(dataset, batch_size=min(32, len(input_data)), shuffle=True)
# Train model
optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
loss_fn = nn.MSELoss()
self.model.train()
for epoch in range(self.epochs):
for batch_X, batch_y in dataloader:
optimizer.zero_grad()
outputs = self.model(batch_X)
loss = loss_fn(outputs, batch_y)
loss.backward()
optimizer.step()
self.model.eval()
def _fit_tensorflow(self, input_data: np.ndarray, output_data: np.ndarray):
"""Fit TensorFlow model."""
import tensorflow as tf
# Create neural network architecture
layers = [tf.keras.layers.Input(shape=(self.n_inputs,))]
for hidden_size in self.hidden_layers:
layers.append(tf.keras.layers.Dense(hidden_size, activation='relu'))
layers.append(tf.keras.layers.Dense(self.n_outputs))
self.model = tf.keras.Sequential(layers)
# Compile and train
self.model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate),
loss='mse')
self.model.fit(input_data, output_data, epochs=self.epochs, verbose=0)
[docs]
def gradient(self) -> Callable:
"""
Compute gradient using automatic differentiation.
Returns:
Callable function that takes input points and returns gradient vectors
"""
if not self.fitted:
raise RuntimeError("NeuralNetworkMultivariateDerivatives must be fit before computing gradient.")
if self.n_outputs != 1:
raise ValueError(f"Gradient is only defined for scalar functions. Got {self.n_outputs} outputs.")
if self.framework == 'pytorch':
return self._pytorch_gradient()
elif self.framework == 'tensorflow':
return self._tensorflow_gradient()
def _pytorch_gradient(self) -> Callable:
"""PyTorch gradient computation."""
import torch
def gradient_func(query_points: np.ndarray) -> np.ndarray:
query_points = np.asarray(query_points)
if query_points.ndim == 1:
query_points = query_points.reshape(1, -1)
# Normalize input
query_normalized = (query_points - self.input_mean) / self.input_std
query_tensor = torch.tensor(query_normalized, dtype=torch.float32, requires_grad=True)
# Forward pass
output = self.model(query_tensor)
# Compute gradients
gradients = []
for i in range(query_tensor.shape[0]):
grad = torch.autograd.grad(output[i], query_tensor, create_graph=True, retain_graph=True)[0]
gradients.append(grad[i].detach().numpy())
gradients = np.array(gradients)
# Denormalize gradients
gradients = gradients * (self.output_std / self.input_std)
return gradients.squeeze() if gradients.shape[0] == 1 else gradients
return gradient_func
def _tensorflow_gradient(self) -> Callable:
"""TensorFlow gradient computation."""
import tensorflow as tf
def gradient_func(query_points: np.ndarray) -> np.ndarray:
query_points = np.asarray(query_points)
if query_points.ndim == 1:
query_points = query_points.reshape(1, -1)
# Normalize input
query_normalized = (query_points - self.input_mean) / self.input_std
query_tensor = tf.Variable(query_normalized, dtype=tf.float32)
with tf.GradientTape() as tape:
output = self.model(query_tensor)
# Compute gradients
gradients = tape.gradient(output, query_tensor).numpy()
# Denormalize gradients
gradients = gradients * (self.output_std / self.input_std)
return gradients.squeeze() if gradients.shape[0] == 1 else gradients
return gradient_func
[docs]
def jacobian(self) -> Callable:
"""
Compute Jacobian using automatic differentiation.
Returns:
Callable function that takes input points and returns Jacobian matrices
"""
if not self.fitted:
raise RuntimeError("NeuralNetworkMultivariateDerivatives must be fit before computing Jacobian.")
if self.framework == 'pytorch':
return self._pytorch_jacobian()
elif self.framework == 'tensorflow':
return self._tensorflow_jacobian()
def _pytorch_jacobian(self) -> Callable:
"""PyTorch Jacobian computation."""
import torch
def jacobian_func(query_points: np.ndarray) -> np.ndarray:
query_points = np.asarray(query_points)
if query_points.ndim == 1:
query_points = query_points.reshape(1, -1)
# Normalize input
query_normalized = (query_points - self.input_mean) / self.input_std
query_tensor = torch.tensor(query_normalized, dtype=torch.float32, requires_grad=True)
# Forward pass
output = self.model(query_tensor)
# Compute Jacobian
jacobians = []
for i in range(query_tensor.shape[0]):
point_jacobian = []
for j in range(self.n_outputs):
grad = torch.autograd.grad(output[i, j], query_tensor,
create_graph=True, retain_graph=True)[0]
point_jacobian.append(grad[i].detach().numpy())
jacobians.append(np.array(point_jacobian))
jacobians = np.array(jacobians)
# Denormalize Jacobian
jacobians = jacobians * (self.output_std.reshape(1, -1, 1) / self.input_std.reshape(1, 1, -1))
return jacobians.squeeze() if jacobians.shape[0] == 1 else jacobians
return jacobian_func
def _tensorflow_jacobian(self) -> Callable:
"""TensorFlow Jacobian computation."""
import tensorflow as tf
def jacobian_func(query_points: np.ndarray) -> np.ndarray:
query_points = np.asarray(query_points)
if query_points.ndim == 1:
query_points = query_points.reshape(1, -1)
# Normalize input
query_normalized = (query_points - self.input_mean) / self.input_std
query_tensor = tf.Variable(query_normalized, dtype=tf.float32)
with tf.GradientTape() as tape:
output = self.model(query_tensor)
# Compute Jacobian
jacobians = []
for i in range(self.n_outputs):
grad = tape.gradient(output[:, i], query_tensor)
jacobians.append(grad.numpy())
jacobians = np.array(jacobians).transpose(1, 0, 2) # (n_points, n_outputs, n_inputs)
# Denormalize Jacobian
jacobians = jacobians * (self.output_std.reshape(1, -1, 1) / self.input_std.reshape(1, 1, -1))
return jacobians.squeeze() if jacobians.shape[0] == 1 else jacobians
return jacobian_func
[docs]
def hessian(self) -> Callable:
"""
Compute Hessian using automatic differentiation.
Returns:
Callable function that takes input points and returns Hessian matrices
"""
if not self.fitted:
raise RuntimeError("NeuralNetworkMultivariateDerivatives must be fit before computing Hessian.")
if self.n_outputs != 1:
raise ValueError(f"Hessian is only defined for scalar functions. Got {self.n_outputs} outputs.")
if self.framework == 'pytorch':
return self._pytorch_hessian()
elif self.framework == 'tensorflow':
return self._tensorflow_hessian()
def _pytorch_hessian(self) -> Callable:
"""PyTorch Hessian computation."""
import torch
def hessian_func(query_points: np.ndarray) -> np.ndarray:
query_points = np.asarray(query_points)
if query_points.ndim == 1:
query_points = query_points.reshape(1, -1)
# Normalize input
query_normalized = (query_points - self.input_mean) / self.input_std
query_tensor = torch.tensor(query_normalized, dtype=torch.float32, requires_grad=True)
# Forward pass
output = self.model(query_tensor)
# Compute Hessian
hessians = []
for i in range(query_tensor.shape[0]):
# First derivatives
first_grad = torch.autograd.grad(output[i], query_tensor,
create_graph=True, retain_graph=True)[0]
# Second derivatives
hessian_matrix = []
for j in range(self.n_inputs):
second_grad = torch.autograd.grad(first_grad[i, j], query_tensor,
create_graph=True, retain_graph=True)[0]
hessian_matrix.append(second_grad[i].detach().numpy())
hessians.append(np.array(hessian_matrix))
hessians = np.array(hessians)
# Denormalize Hessian
hessians = hessians * (self.output_std / (self.input_std.reshape(1, -1) * self.input_std.reshape(-1, 1)))
return hessians.squeeze() if hessians.shape[0] == 1 else hessians
return hessian_func
def _tensorflow_hessian(self) -> Callable:
"""TensorFlow Hessian computation."""
import tensorflow as tf
def hessian_func(query_points: np.ndarray) -> np.ndarray:
query_points = np.asarray(query_points)
if query_points.ndim == 1:
query_points = query_points.reshape(1, -1)
# Normalize input
query_normalized = (query_points - self.input_mean) / self.input_std
query_tensor = tf.Variable(query_normalized, dtype=tf.float32)
with tf.GradientTape() as tape2:
with tf.GradientTape() as tape1:
output = self.model(query_tensor)
first_grad = tape1.gradient(output, query_tensor)
# Compute Hessian
hessians = []
for i in range(self.n_inputs):
second_grad = tape2.gradient(first_grad[:, i], query_tensor)
hessians.append(second_grad.numpy())
hessians = np.array(hessians).transpose(1, 0, 2) # (n_points, n_inputs, n_inputs)
# Denormalize Hessian
hessians = hessians * (self.output_std / (self.input_std.reshape(1, 1, -1) * self.input_std.reshape(1, -1, 1)))
return hessians.squeeze() if hessians.shape[0] == 1 else hessians
return hessian_func
[docs]
def laplacian(self) -> Callable:
"""
Compute Laplacian using automatic differentiation.
Returns:
Callable function that takes input points and returns Laplacian values
"""
if not self.fitted:
raise RuntimeError("NeuralNetworkMultivariateDerivatives must be fit before computing Laplacian.")
if self.n_outputs != 1:
raise ValueError(f"Laplacian is only defined for scalar functions. Got {self.n_outputs} outputs.")
def laplacian_func(query_points: np.ndarray) -> np.ndarray:
hessian_func = self.hessian()
hessian_matrices = hessian_func(query_points)
if hessian_matrices.ndim == 2: # Single point
return np.trace(hessian_matrices)
else: # Multiple points
return np.array([np.trace(h) for h in hessian_matrices])
return laplacian_func