--- title: Losses keywords: fastai sidebar: home_sidebar summary: "A selection of loss functions I've used with this dataset." description: "A selection of loss functions I've used with this dataset." nb_path: "05_losses.ipynb" ---
{% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}
from llamass.core import unpack_body_models, npz_paths
import tempfile
import warnings
import gaitplotlib.core as gpl
body_model = gpl.init_body_model("neutral")
fk_engine = transforms.SMPLHForwardKinematics()
{% endraw %}

Axis-Angle Cosine Distance

Cosine similarity between rotation vectors.

This is probably not a correct distance metric for rotations. This blog makes an argument that there's only one correct distance metric for rotations and it's the angular distance between unit quaternions. But, this one is quick to evaluate on rotation vectors.

There are also implementations of this correct distance metric using rotation matrices in Python here.

{% raw %}

aa_cosine[source]

aa_cosine(out, target)

{% endraw %} {% raw %}
def aa_cosine(out, target):
    if out.ndim == 2:
        b, d = out.size()
    elif out.ndim == 3:
        b, f, d = out.size()
        assert f == 1, f'{out.size()}'
    j = d//3
    out, target = out.view(b, j, 3), target.view(b, j, 3)
    def theta(x, eps=1e-6):
        return torch.sqrt(torch.clamp(torch.sum(x**2, 2, keepdims=True), eps, 2*math.pi))
    theta_a = theta(out)
    theta_b = theta(target)
    cosine_sim = F.cosine_similarity(out, target, dim=2)
    cosine_sim_loss = 1. - cosine_sim
    cosine_angle_diff = 1. - torch.cos(theta_a - theta_b)
    return cosine_sim_loss + cosine_angle_diff[:,:,0]
{% endraw %}

VPoser

The loss function used by the VPoser VAE in the SMPL-X paper.

Appears to cause NaNs when used on the sample AMASS data below in tests.

{% raw %}

class GeodesicLossR[source]

GeodesicLossR(reduction='batchmean') :: Module

Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing to nest them in a tree structure. You can assign the submodules as regular attributes::

import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

Submodules assigned in this way will be registered, and will have their parameters converted too when you call :meth:to, etc.

:ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool

{% endraw %} {% raw %}
class GeodesicLossR(nn.Module):
    def __init__(self, reduction='batchmean'):
        super(GeodesicLossR, self).__init__()

        self.reduction = reduction
        self.eps = 1e-6

    # batch geodesic loss for rotation matrices
    def bgdR(self,m1,m2):
        assert m1.ndim == m2.ndim, \
            f"Rotation matrices ndim must be equal but was {m1.ndim} {m2.ndim}"
        for m in [m1, m2]:
            assert m.size(-1) == 3 and m.size(-2) == 3, \
                f"Trailing 2 dimensions must 3x3 rotation matrices {m.size()}"
        if m1.ndim == 2:
            # ndim 2 must be single rotation matrix
            m1 = m1.view(1, 3, 3)
            m2 = m2.view(1, 3, 3)
        elif m1.ndim > 3:
            m1 = m1.view(-1, 3, 3)
            m2 = m2.view(-1, 3, 3)
        batch = m1.shape[0]
        m = torch.bmm(m1, m2.transpose(1, 2))  # batch*3*3

        cos = (m[:, 0, 0] + m[:, 1, 1] + m[:, 2, 2] - 1) / 2
        cos = torch.min(cos, m1.new(np.ones(batch)))
        cos = torch.max(cos, m1.new(np.ones(batch)) * -1)

        return torch.acos(cos)

    def forward(self, ypred, ytrue):
        theta = self.bgdR(ypred,ytrue)
        if self.reduction == 'mean':
            return torch.mean(theta)
        elif self.reduction == 'batchmean':
            return torch.mean(torch.sum(theta, dim=theta.shape[1:]))
        elif self.reduction == 'none':
            return theta
        else:
            raise NotImplementedError(f"Reduction {self.reduction} not known")
{% endraw %} {% raw %}

class ContinuousRotReprDecoder[source]

ContinuousRotReprDecoder() :: Module

Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing to nest them in a tree structure. You can assign the submodules as regular attributes::

import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

Submodules assigned in this way will be registered, and will have their parameters converted too when you call :meth:to, etc.

:ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool

{% endraw %} {% raw %}
class ContinuousRotReprDecoder(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, module_input):
        b, d = module_input.size()
        assert d%6 == 0
        reshaped_input = module_input.view(-1, 3, 2)

        b1 = F.normalize(reshaped_input[:, :, 0], dim=1)

        dot_prod = torch.sum(b1 * reshaped_input[:, :, 1], dim=1, keepdim=True)
        b2 = F.normalize(reshaped_input[:, :, 1] - dot_prod * b1, dim=-1)
        b3 = torch.cross(b1, b2, dim=1)

        return torch.stack([b1, b2, b3], dim=-1).view(b, -1, 3, 3)
{% endraw %} {% raw %}

class ForwardKinematicLoss[source]

ForwardKinematicLoss(body_model) :: Module

Must be initialized with an SMPL-like body_model.

{% endraw %} {% raw %}
class ForwardKinematicLoss(nn.Module):
    "Must be initialized with an SMPL-like `body_model`."
    def __init__(self, body_model):
        super().__init__()
        self.bm = body_model
        self.geodesic_loss = GeodesicLossR(reduction="mean")
        
    def kinematics(self, aa_out, pose_target):
        with torch.no_grad():
            bm_orig = self.bm(pose_body=pose_target)
        bm_rec = self.bm(pose_body=aa_out.contiguous())
        return bm_orig, bm_rec
{% endraw %} {% raw %}

class VPoserLikelihood[source]

VPoserLikelihood(body_model) :: ForwardKinematicLoss

Must be initialized with an SMPL-like body_model.

{% endraw %} {% raw %}
class VPoserLikelihood(ForwardKinematicLoss):
    def forward(
        self,
        dec_out,
        aa_out,
        pose_target,
        pose_target_rotmat,
        bm_orig=None,
        bm_rec=None,
        loss_rec_wt=torch.tensor(4),
        loss_matrot_wt=torch.tensor(2),
        loss_jtr_wt=torch.tensor(2),
        callback=None
    ):
        """
        Default settings for loss weights taken from:
        https://github.com/nghorbani/human_body_prior/blob/master/src/human_body_prior/train/V02_05/V02_05.yaml
        Inputs:
            - dec_out: output of network as rotation matrix, shape (batch, frames, joints, 3, 3)
            - aa_out: output of network as axis-angle vectors, shape (batch, frames, joints, 3)
            - pose_target: target as axis-angle vectors, shape (batch, frames, joints, 3)
            - pose_target_rotmat: target as rotation matrix, shape (batch, frames, joints, 3, 3)
        """
        l1_loss = torch.nn.L1Loss(reduction="mean")

        # cast decoder output to aa
        bs, f, d = pose_target.size()
        
        # forward kinematics
        if bm_orig is None or bm_rec is None:
            bm_orig, bm_rec = self.kinematics(aa_out.view(bs*f, -1), pose_target.view(bs*f, d))

        # Reconstruction loss - L1 on the output mesh
        v2v = l1_loss(bm_rec.v, bm_orig.v)

        # Geodesic loss between rotation matrices
        matrot_loss = self.geodesic_loss(
            dec_out.view(-1, 3, 3),
            pose_target_rotmat.view(-1, 3, 3)
        )
        # L1 Loss on joint positions
        jtr_loss = l1_loss(bm_rec.Jtr, bm_orig.Jtr)

        # apply weights to make weighted loss
        weighted_loss = (
            loss_matrot_wt * matrot_loss + loss_rec_wt * v2v + loss_jtr_wt * jtr_loss
        )


        # log results
        with torch.no_grad():
            unweighted_loss = matrot_loss + v2v + jtr_loss
            if callback is not None:
                callback(all_univariate_tensors_in(locals()))

        return weighted_loss
{% endraw %} {% raw %}
with tempfile.TemporaryDirectory() as tmpdirname:
    unpack_body_models("sample_data/", tmpdirname, 1, verify=False)
    for npz_path in npz_paths(tmpdirname):
        cdata = np.load(npz_path)
        _poses = torch.tensor(cdata['poses'][::6, 3:66], dtype=torch.float, requires_grad=True)
        n, d = _poses.size()
        j = d//3
        poses = _poses.reshape(n, 1, j*3)
        vposer_loss = VPoserLikelihood(body_model)
        pred, target = poses[:-1], poses[1:]
        n = pred.size(0)
        pred_rotmat = transforms.Rotation.from_rotvec(pred.reshape(-1, 3)).as_matrix().view(n, -1)
        target_rotmat = transforms.Rotation.from_rotvec(target.reshape(-1, 3)).as_matrix().view(n, -1)
        loss = vposer_loss(pred_rotmat, pred, target, target_rotmat)
        # loss = poses.sum()
        loss.backward()
        try:
            assert not torch.any(torch.isnan(_poses.grad)), "gradient contains NaNs"
            assert torch.abs(_poses.grad).max() > 1e-6, "gradient is zero"
        except AssertionError as e:
            warnings.warn(str(e))
        break
torch.any(torch.isnan(_poses.grad))
tensor(False)
{% endraw %}

Discretized Euler Angles

Intuitively, an ADC applied to Euler angles. Allows use of a NLL loss, which is can be useful for training a neural network.

{% raw %}

discretize[source]

discretize(x, nquant, eps=1e-06, dither=False, zero_centered=True)

{% endraw %} {% raw %}

class DiscretizedEulerLoss[source]

DiscretizedEulerLoss(nquant, dither=False, zero_centered=True) :: Module

Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing to nest them in a tree structure. You can assign the submodules as regular attributes::

import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

Submodules assigned in this way will be registered, and will have their parameters converted too when you call :meth:to, etc.

:ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool

{% endraw %} {% raw %}
def discretize(x, nquant, eps=1e-6, dither=False, zero_centered=True):
    if zero_centered:
        x = x + math.pi
    m = math.pi*2
    assert x.max() < m
    x = x/m # scale to between zero and 1
    x = x*nquant
    if dither:
        d = 2.*(torch.rand_like(x)-0.5)
        x = torch.clamp(x+d, 0, nquant-eps)
    return torch.floor(x).long() # bin account to nquant levels

class DiscretizedEulerLoss(nn.Module):
    def __init__(self, nquant, dither=False, zero_centered=True):
        super().__init__()
        self.nquant, self.dither, self.zero_centered = nquant, dither, zero_centered

    def forward(self, out, target):
        assert out.size(-1) == self.nquant, f'trailing dimension should hold logits {out.size()}'
        target = discretize(target, self.nquant, dither=self.dither, zero_centered=self.zero_centered)
        return F.nll_loss(out.view(-1, self.nquant), target.view(-1))
{% endraw %}

SPL Loss Functions

Loss functions adapted from the SPL repository.

To do:

  • euler_angle_mse is an inaccurate name, it's actually a pairwise distance
  • Geodesic needs better documentation

In the standard evaluation on Human3.6M and AMASS the following loss functions are recommended:

  • Euler angle
  • Geodesic joint angle (calculated above in the VPoser loss)
  • Absolute position error (calculated above in the VPoser loss)
  • PCK: "percentage of joints lying within a spherical threshold $\rho$ around the target joint position", typically reported as an AUC while varying the $\rho$ threshold

Required packages to run tests:

And it's necessary to have unpacked body model files with gaitplotlib as described here.

{% raw %}
# I don't want to install tensorflow to install their whole package for testing
!git clone https://github.com/eth-ait/spl.git 2>/dev/null
import sys
from pathlib import Path
common_path = "./spl/common"
if common_path not in sys.path:
    sys.path.append(common_path)
from conversions import (is_valid_rotmat, rotmat2euler, aa2rotmat, get_closest_rotmat, sparse_to_full, local_rot_to_global)
import cv2
/nobackup/gngdb/envs/amass/lib/python3.7/site-packages/quaternion/numba_wrapper.py:23: UserWarning: 

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Could not import from numba, which means that some
parts of this code may run MUCH more slowly.  You
may wish to install numba.
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

  warnings.warn(warning_text)
{% endraw %}

SPL Implementations

Included for testing.

{% raw %}
"""
SPL: training and evaluation of neural networks with a structured prediction layer.
Copyright (C) 2019 ETH Zurich, Emre Aksan, Manuel Kaufmann

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <https://www.gnu.org/licenses/>.
"""
def euler_diff(predictions, targets):
    """
    Computes the Euler angle error as in previous work, following
    https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/translate.py#L207
    Args:
        predictions: np array of predicted joint angles represented as rotation matrices, i.e. in shape
          (..., n_joints, 3, 3)
        targets: np array of same shape as `predictions`
    Returns:
        The Euler angle error an np array of shape (..., )
    """
    assert predictions.shape[-1] == 3 and predictions.shape[-2] == 3
    assert targets.shape[-1] == 3 and targets.shape[-2] == 3
    n_joints = predictions.shape[-3]

    ori_shape = predictions.shape[:-3]
    preds = np.reshape(predictions, [-1, 3, 3])
    targs = np.reshape(targets, [-1, 3, 3])

    euler_preds = rotmat2euler(preds)  # (N, 3)
    euler_targs = rotmat2euler(targs)  # (N, 3)

    # reshape to (-1, n_joints*3) to be consistent with previous work
    euler_preds = np.reshape(euler_preds, [-1, n_joints*3])
    euler_targs = np.reshape(euler_targs, [-1, n_joints*3])

    # l2 error on euler angles
    idx_to_use = np.where(np.std(euler_targs, 0) > 1e-4)[0]
    euc_error = np.power(euler_targs[:, idx_to_use] - euler_preds[:, idx_to_use], 2)
    euc_error = np.sqrt(np.sum(euc_error, axis=1))  # (-1, ...)

    # reshape to original
    return np.reshape(euc_error, ori_shape)

def angle_diff(predictions, targets):
    """
    Computes the angular distance between the target and predicted rotations. We define this as the angle that is
    required to rotate one rotation into the other. This essentially computes || log(R_diff) || where R_diff is the
    difference rotation between prediction and target.
    Args:
        predictions: np array of predicted joint angles represented as rotation matrices, i.e. in shape
          (..., n_joints, 3, 3)
        targets: np array of same shape as `predictions`
    Returns:
        The geodesic distance for each joint as an np array of shape (..., n_joints)
    """
    assert predictions.shape[-1] == predictions.shape[-2] == 3
    assert targets.shape[-1] == targets.shape[-2] == 3

    ori_shape = predictions.shape[:-2]
    preds = np.reshape(predictions, [-1, 3, 3])
    targs = np.reshape(targets, [-1, 3, 3])

    # compute R1 * R2.T, if prediction and target match, this will be the identity matrix
    r = np.matmul(preds, np.transpose(targs, [0, 2, 1]))

    # convert `r` to angle-axis representation and extract the angle, which is our measure of difference between
    # the predicted and target orientations
    angles = []
    for i in range(r.shape[0]):
        aa, _ = cv2.Rodrigues(r[i])
        angles.append(np.linalg.norm(aa))
    angles = np.array(angles)

    return np.reshape(angles, ori_shape)

def positional(predictions, targets):
    """
    Computes the Euclidean distance between joints in 3D space.
    Args:
        predictions: np array of predicted 3D joint positions in format (..., n_joints, 3)
        targets: np array of same shape as `predictions`
    Returns:
        The Euclidean distance for each joint as an np array of shape (..., n_joints)
    """
    return np.sqrt(np.sum((predictions - targets) ** 2, axis=-1))

def pck(predictions, targets, thresh):
    """
    Percentage of correct keypoints.
    Args:
        predictions: np array of predicted 3D joint positions in format (..., n_joints, 3)
        targets: np array of same shape as `predictions`
        thresh: radius within which a predicted joint has to lie.
    Returns:
        Percentage of correct keypoints at the given threshold level, stored in a np array of shape (..., len(threshs))
    """
    dist = np.sqrt(np.sum((predictions - targets) ** 2, axis=-1))
    pck = np.mean(np.array(dist <= thresh, dtype=np.float32), axis=-1)
    return pck
{% endraw %}

Euler Angle

I don't know what euler angle convention this corresponds to, but it matches the implementation used in prior papers.

{% raw %}

euler_angle_mse[source]

euler_angle_mse(predictions, targets, n_joints=21)

Inputs predictions and targets are assumed to be rotation matrices

{% endraw %} {% raw %}
def euler_angle_mse(predictions, targets, n_joints=21):
    "Inputs predictions and targets are assumed to be rotation matrices"
    predictions = transforms.Rotation.from_matrix(predictions).as_euler()
    targets = transforms.Rotation.from_matrix(targets).as_euler()
    
    predictions = predictions.view(-1, n_joints*3)
    targets = targets.view(-1, n_joints*3)
    
    # l2 error on euler angles
    #idx_to_use = np.where(np.std(euler_targs, 0) > 1e-4)[0]
    mask = (torch.std(targets, 0) > 1e-4).float().view(1, -1)
    euc_error = torch.square(targets*mask - predictions*mask)
    euc_error = torch.sqrt(torch.sum(euc_error, 1))  # (-1, ...)
    return euc_error
{% endraw %} {% raw %}
with tempfile.TemporaryDirectory() as tmpdirname:
    unpack_body_models("sample_data/", tmpdirname, 1, verify=False)
    for npz_path in npz_paths(tmpdirname):
        cdata = np.load(npz_path)
        poses = torch.tensor(cdata['poses'][:, 3:66]).float()
        n, d = poses.size()
        j = d//3
        poses = poses.view(n, j, 3)
        # convert poses to rotation matrices
        rotmats = transforms.Rotation.from_rotvec(poses).as_matrix().reshape(n, j, 3, 3)
        outputs = rotmats[:-1:6]
        targets = rotmats[1::6]
        loss = euler_diff(outputs.numpy(), targets.numpy())
        _loss = euler_angle_mse(outputs.reshape(-1, 3, 3), targets.reshape(-1, 3, 3))
        assert np.abs(loss - _loss.numpy()).max() < 1e-4
        print(outputs.shape, targets.shape, loss.shape)
torch.Size([100, 21, 3, 3]) torch.Size([100, 21, 3, 3]) (100,)
torch.Size([39, 21, 3, 3]) torch.Size([39, 21, 3, 3]) (39,)
{% endraw %}

Geodesic

Basing this on the geodesic loss calculated in the combined Vposer loss above.

{% raw %}

class GeodesicLossSPL[source]

GeodesicLossSPL(reduction='none') :: GeodesicLossR

Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing to nest them in a tree structure. You can assign the submodules as regular attributes::

import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

Submodules assigned in this way will be registered, and will have their parameters converted too when you call :meth:to, etc.

:ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool

{% endraw %} {% raw %}
class GeodesicLossSPL(GeodesicLossR):
    def __init__(self, reduction='none'):
        super().__init__(reduction=reduction)
        
    def bgdR(self, m1, m2):
        assert m1.ndim == m2.ndim, \
            f"Rotation matrices ndim must be equal but was {m1.ndim} {m2.ndim}"
        for m in [m1, m2]:
            assert m.size(-1) == 3 and m.size(-2) == 3, \
                f"Trailing 2 dimensions must 3x3 rotation matrices {m.size()}"
        if m1.ndim == 2:
            # ndim 2 must be single rotation matrix
            m1 = m1.view(1, 3, 3)
            m2 = m2.view(1, 3, 3)
        elif m1.ndim > 3:
            m1 = m1.view(-1, 3, 3)
            m2 = m2.view(-1, 3, 3)
        batch = m1.shape[0]
        m = torch.bmm(m1, m2.transpose(1, 2))  # batch*3*3

        aa = transforms.Rotation.from_matrix(m).as_rotvec().view(batch, 3)
        angles = torch.linalg.norm(aa, axis=1)
        
        return angles
{% endraw %} {% raw %}
with tempfile.TemporaryDirectory() as tmpdirname:
    unpack_body_models("sample_data/", tmpdirname, 1, verify=False)
    for npz_path in npz_paths(tmpdirname):
        cdata = np.load(npz_path)
        poses = torch.tensor(cdata['poses'][:, 3:66])
        n, d = poses.size()
        j = d//3
        poses = poses.view(n, j, 3)
        # convert poses to rotation matrices
        rotmats = transforms.Rotation.from_rotvec(poses).as_matrix().view(n, j, 3, 3)
        outputs = rotmats[:-1:6]
        targets = rotmats[1::6]
        loss = angle_diff(outputs.numpy(), targets.numpy())
        geodesic_loss = GeodesicLossSPL()
        _loss = geodesic_loss(outputs.reshape(-1,3,3), targets.reshape(-1,3,3))
        assert np.allclose(loss, _loss.numpy().reshape(*loss.shape))
        print(outputs.shape, targets.shape, loss.shape)
torch.Size([100, 21, 3, 3]) torch.Size([100, 21, 3, 3]) (100, 21)
torch.Size([39, 21, 3, 3]) torch.Size([39, 21, 3, 3]) (39, 21)
{% endraw %}

Positional

Basing this on the VPoser loss above, has to contain a body model to do forward kinematics to estimate the joint positions given a fixed body model. (Although, I think betas are often provided for AMASS examples, so it may be more correct to use those.)

{% raw %}

class PositionalLossSPL[source]

PositionalLossSPL(body_model) :: ForwardKinematicLoss

Must be initialized with an SMPL-like body_model.

{% endraw %} {% raw %}
class PositionalLossSPL(ForwardKinematicLoss):
    def forward(
        self,
        aa_out=None,
        pose_target=None,
        bm_orig=None,
        bm_rec=None,
        positions=None,
        target_positions=None
    ):
        for p in [positions, target_positions]:
            if p is not None:
                assert p.ndim == 3
                assert p.size(-1) == 3, "final dim must contain 3D locations"
        if pose_target is not None:
            if pose_target.ndim == 3:
                bs, f, d = pose_target.size()
                n = bs*f
                assert d == n_joints*3
            elif pose_target.ndim == 2:
                n, d = pose_target.size()

        # forward kinematics
        no_bm_output = bm_orig is None or bm_rec is None
        no_positions = positions is None or target_positions is None
        if no_bm_output and no_positions:
            bm_orig, bm_rec = self.kinematics(aa_out.reshape(n, d), pose_target.reshape(n, d))
            positions = bm_rec.Jtr
            target_positions = bm_orig.Jtr
            
        return torch.sqrt(torch.square(positions - target_positions).sum(2))
{% endraw %} {% raw %}
with tempfile.TemporaryDirectory() as tmpdirname:
    unpack_body_models("sample_data/", tmpdirname, 1, verify=False)
    for npz_path in npz_paths(tmpdirname):
        cdata = np.load(npz_path)
        _poses = torch.tensor(cdata['poses'][::6, 3:66], dtype=torch.float, requires_grad=True)
        n, d = _poses.size()
        j = d//3
        poses = _poses.view(n, j, 3)
        poses = poses.view(-1, j*3)
        # convert poses to rotation matrices
        with torch.no_grad():
            positions = body_model(pose_body=poses).Jtr
        outputs = positions[:-1]
        targets = positions[1:]
        loss = positional(outputs.numpy(), targets.numpy())
        positional_loss_spl = PositionalLossSPL(body_model)
        _loss = positional_loss_spl(poses[:-1], poses[1:])
        assert np.allclose(loss, _loss.detach().numpy())
        _loss.mean().backward()
        assert not torch.any(torch.isnan(_poses.grad)), "Gradient contains NaNs"
        assert torch.abs(_poses.grad).max() > 1e-6, "gradient is zero"
        print(outputs.shape, targets.shape, loss.shape)
torch.Size([100, 52, 3]) torch.Size([100, 52, 3]) (100, 52)
torch.Size([39, 52, 3]) torch.Size([39, 52, 3]) (39, 52)
{% endraw %} {% raw %}
with tempfile.TemporaryDirectory() as tmpdirname:
    unpack_body_models("sample_data/", tmpdirname, 1, verify=False)
    for npz_path in npz_paths(tmpdirname):
        cdata = np.load(npz_path)
        _poses = torch.tensor(cdata['poses'][::6, :3*24], dtype=torch.float, requires_grad=True)
        n, d = _poses.size()
        j = d//3
        poses = _poses.view(n, j, 3)
        poses = poses.view(-1, j*3)
        # convert poses to rotation matrices
        positions = fk_engine.from_aa(poses)
        outputs = positions[:-1]
        targets = positions[1:]
        loss = positional(outputs.detach().numpy(), targets.detach().numpy())
        positional_loss_spl = PositionalLossSPL(None)
        _loss = positional_loss_spl(positions=outputs, target_positions=targets)
        assert np.allclose(loss, _loss.detach().numpy())
        _loss.mean().backward()
        assert not torch.any(torch.isnan(_poses.grad)), "Gradient contains NaNs"
        assert torch.abs(_poses.grad).max() > 1e-6, "gradient is zero"
        print(outputs.shape, targets.shape, loss.shape)
torch.Size([100, 24, 3]) torch.Size([100, 24, 3]) (100, 24)
torch.Size([39, 24, 3]) torch.Size([39, 24, 3]) (39, 24)
{% endraw %}

PCK

{% raw %}

class PCK_SPL[source]

PCK_SPL(body_model) :: ForwardKinematicLoss

Must be initialized with an SMPL-like body_model.

{% endraw %} {% raw %}
class PCK_SPL(ForwardKinematicLoss):
    def forward(
        self,
        aa_out=None,
        pose_target=None,
        positions=None,
        target_positions=None,
        thresh=None,
        bm_orig=None,
        bm_rec=None,
        n_joints=21
    ):
        assert thresh is not None
        for p in [positions, target_positions]:
            if p is not None:
                assert p.ndim == 3
                assert p.size(-1) == 3, "final dim must contain 3D locations"
        if pose_target is not None:
            if pose_target.ndim == 3:
                bs, f, d = pose_target.size()
                n = bs*f
                assert d == n_joints*3
            elif pose_target.ndim == 2:
                n, d = pose_target.size()

        # forward kinematics
        no_bm_output = bm_orig is None or bm_rec is None
        no_positions = positions is None or target_positions is None
        if no_bm_output and no_positions:
            bm_orig, bm_rec = self.kinematics(aa_out.reshape(n, d), pose_target.reshape(n, d))
            positions = bm_rec.Jtr
            target_positions = bm_orig.Jtr
            
        # percentage of coordinates in the ball defined by thresh around a joint
        n, d, _ = positions.size()
        dist = torch.sqrt(torch.square(positions - target_positions).sum(2))
        return torch.mean((dist <= thresh).float(), 1)
{% endraw %} {% raw %}
pck_thresholds = [0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.15, 0.2, 0.3]

with tempfile.TemporaryDirectory() as tmpdirname:
    unpack_body_models("sample_data/", tmpdirname, 1, verify=False)
    for npz_path in npz_paths(tmpdirname):
        cdata = np.load(npz_path)
        poses = torch.tensor(cdata['poses'][:, 3:66]).float()
        n, d = poses.size()
        j = d//3
        poses = poses.view(n, j, 3)
        poses = poses[::6].view(-1, j*3)
        # convert poses to rotation matrices
        with torch.no_grad():
            positions = body_model(pose_body=poses).Jtr
        outputs = positions[:-1]
        targets = positions[1:]
        _pck = PCK_SPL(body_model)
        for thresh in pck_thresholds:
            loss = pck(outputs.numpy(), targets.numpy(), thresh)
            _loss = _pck(poses[:-1], poses[1:], thresh=thresh)
            assert np.allclose(loss, _loss)
{% endraw %}

PCK AUC

Area under curve for PCK losses

{% raw %}

calculate_auc[source]

calculate_auc(pck_values, pck_thresholds, target_length)

Calculate area under a curve (AUC) metric for PCK.

If the sequence length is shorter, we ignore some of the high-tolerance PCK values in order to have less saturated AUC. Args: pck_values (list): PCK values. pck_thresholds (list): PCK threshold values. target_length (int): determines for which time-step we calculate AUC. Returns:

{% endraw %} {% raw %}
"""
SPL: training and evaluation of neural networks with a structured prediction layer.
Copyright (C) 2019 ETH Zurich, Emre Aksan, Manuel Kaufmann

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <https://www.gnu.org/licenses/>.
"""

def calculate_auc(pck_values, pck_thresholds, target_length):
    """Calculate area under a curve (AUC) metric for PCK.

    If the sequence length is shorter, we ignore some of the high-tolerance PCK values in order to have less
    saturated AUC.
    Args:
        pck_values (list): PCK values.
        pck_thresholds (list): PCK threshold values.
        target_length (int): determines for which time-step we calculate AUC.
    Returns:
    """
    # Due to the saturation effect, we consider a limited number of PCK thresholds in AUC calculation.
    if target_length < 6:
        n_pck = 6
    elif target_length < 12:
        n_pck = 7
    elif target_length < 18:
        n_pck = 8
    else:
        n_pck = len(pck_thresholds)

    norm_factor = np.diff(pck_thresholds[:n_pck]).sum()
    auc_values = []
    for i in range(n_pck - 1):
        auc = (pck_values[i] + pck_values[i + 1]) / 2 * (pck_thresholds[i + 1] - pck_thresholds[i])
        auc_values.append(auc)
    return np.array(auc_values).sum() / norm_factor
{% endraw %} {% raw %}
pck_thresholds = [0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.15, 0.2, 0.3]

with tempfile.TemporaryDirectory() as tmpdirname:
    unpack_body_models("sample_data/", tmpdirname, 1, verify=False)
    for npz_path in npz_paths(tmpdirname):
        cdata = np.load(npz_path)
        _poses = torch.tensor(cdata['poses'][::6, :3*24], dtype=torch.float, requires_grad=True)
        n, d = _poses.size()
        j = d//3
        poses = _poses.view(n, j, 3)
        poses = poses.view(-1, j*3)
        # convert poses to rotation matrices
        positions = fk_engine.from_aa(poses)
        outputs = positions[:-1]
        targets = positions[1:]
        _pck = PCK_SPL(None)
        
        pck_vals = []
        for thresh in pck_thresholds:
            loss = pck(outputs.detach().numpy(), targets.detach().numpy(), thresh)
            pck_vals.append(loss)
            _loss = _pck(positions=outputs, target_positions=targets, thresh=thresh)
            assert np.allclose(loss, _loss)
        pck_vals = [np.mean(v) for v in pck_vals]
        print(f"AUC: {calculate_auc(pck_vals, pck_thresholds, 6)}")
AUC: 0.9904558266569304
AUC: 0.5497117626587016
{% endraw %}

Logging Utilities

Utility functions for logging losses during training.

{% raw %}

all_univariate_tensors_in[source]

all_univariate_tensors_in(d)

Utility function for logging with a callback function

{% endraw %} {% raw %}
def all_univariate_tensors_in(d):
    "Utility function for logging with a callback function"
    def is_univariate_tensor(x):
        if isinstance(x, torch.Tensor):
            return x.nelement() == 1

    return {k: v for k, v in d.items() if is_univariate_tensor(v)}
{% endraw %}