Source code for ott.tools.gaussian_mixture.gaussian_mixture_pair

# Copyright OTT-JAX
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any

import jax
import jax.numpy as jnp

from ott.geometry import costs, geometry, pointcloud
from ott.problems.linear import linear_problem
from ott.solvers.linear import sinkhorn
from ott.tools.gaussian_mixture import gaussian_mixture

__all__ = ["GaussianMixturePair"]



[docs]
@jax.tree_util.register_pytree_node_class
class GaussianMixturePair:
  """Coupled pair of Gaussian mixture models.

  Includes methods used in estimating an optimal pairing between GMM components
  using the Wasserstein-like method described in :cite:`delon:20`,
  as well as generalization that allows for the reweighting of components.

  :cite:`delon:20` propose fitting a pair of GMMs to a pair
  of point clouds in such a way that the sum of the log likelihood of the
  points minus a weighted penalty involving a Wasserstein-like distance between
  the GMMs. Their proposed algorithm involves using EM in which a balanced
  Sinkhorn algorithm is used to estimate a coupling between the GMMs at each
  step of EM.

  Our generalization of this algorithm allows for a mismatch between the
  marginals of the coupling and the GMM component weights. This mismatch can be
  interpreted as components being reweighted rather than being transported.
  We penalize reweighting with a generalized KL-divergence penalty, and we give
  the option to use the unbalanced Sinkhorn algorithm rather than the balanced
  to compute the divergence between GMMs.
  """

  def __init__(
      self,
      gmm0: gaussian_mixture.GaussianMixture,
      gmm1: gaussian_mixture.GaussianMixture,
      epsilon: float = 1e-2,
      tau: float = 1.0,
      lock_gmm1: bool = False,
  ):
    """Constructor.

    When fitting a pair of coupled GMMs with *no* reweighting of components
    using the algorithm in :cite:`delon:20`, set tau = 1. The coupling between
    components will be determined via the balanced Sinkhorn algorithm.

    When fitting a pair of coupled GMMs in which reweighting of components is
    allowed, set tau to a value in (0, 1). The resulting coupling will penalize
    the generalized KL divergence between the coupling's marginals and the GMM
    component weights with a weight of rho = epsilon tau / (1 - tau).

    Args:
      gmm0: first GMM in the pair
      gmm1: second GMM in the pair
      epsilon: regularization weight to use for the Sinkhorn algorithm
      tau: encodes the weight, rho, to use for the generalized KL divergence
        between the coupling's marginals and GMM component weights as
        rho = epsilon tau / (1 - tau)
      lock_gmm1: indicates whether the parameters of gmm1 should be modified
        during optimization
    """  # noqa: D401
    self._gmm0 = gmm0
    self._gmm1 = gmm1
    self._epsilon = epsilon
    self._tau = tau
    self._lock_gmm1 = lock_gmm1

  @property
  def dtype(self):  # noqa: D102
    return self.gmm0.dtype

  @property
  def gmm0(self):  # noqa: D102
    return self._gmm0

  @property
  def gmm1(self):  # noqa: D102
    return self._gmm1

  @property
  def epsilon(self):  # noqa: D102
    return self._epsilon

  @property
  def tau(self):  # noqa: D102
    return self._tau

  @property
  def rho(self):  # noqa: D102
    return self.epsilon * self.tau / (1.0 - self.tau)

  @property
  def lock_gmm1(self):  # noqa: D102
    return self._lock_gmm1


[docs]
  def get_bures_geometry(self) -> pointcloud.PointCloud:
    """Get a Bures Geometry for the two GMMs."""
    mean0 = self.gmm0.loc
    dimension = mean0.shape[-1]
    cov0 = self.gmm0.covariance
    cov0 = cov0.reshape(cov0.shape[:-2] + (dimension * dimension,))
    x = jnp.concatenate([mean0, cov0], axis=-1)
    mean1 = self.gmm1.loc
    cov1 = self.gmm1.covariance
    cov1 = cov1.reshape(cov1.shape[:-2] + (dimension * dimension,))
    y = jnp.concatenate([mean1, cov1], axis=-1)
    return pointcloud.PointCloud(
        x=x,
        y=y,
        cost_fn=costs.Bures(dimension=dimension),
        epsilon=self.epsilon
    )



[docs]
  def get_cost_matrix(self) -> jnp.ndarray:
    """Get matrix of :math:`W_2^2` costs between all pairs of components."""
    return self.get_bures_geometry().cost_matrix



[docs]
  def get_sinkhorn(
      self, cost_matrix: jnp.ndarray, **kwargs: Any
  ) -> sinkhorn.SinkhornOutput:
    """Get the output of Sinkhorn's method for a given cost matrix."""
    # We use a Geometry here rather than the PointCloud created in
    # get_bures_geometry to avoid recomputing the cost matrix, since
    # the cost matrix is quite expensive
    geom = geometry.Geometry(cost_matrix=cost_matrix, epsilon=self.epsilon)
    prob = linear_problem.LinearProblem(
        geom,
        a=self.gmm0.component_weights,
        b=self.gmm1.component_weights,
        tau_a=self.tau,
        tau_b=self.tau
    )
    return sinkhorn.Sinkhorn(**kwargs)(prob)



[docs]
  def get_normalized_sinkhorn_coupling(
      self,
      sinkhorn_output: sinkhorn.SinkhornOutput,
  ) -> jnp.ndarray:
    """Get the normalized coupling matrix for the specified Sinkhorn output.

    Args:
      sinkhorn_output: Sinkhorn algorithm output as returned by
        :meth:`get_sinkhorn`.

    Returns:
      A coupling matrix that tells how much of the mass of each component of
      :attr:`gmm0` is mapped to each component of :attr:`gmm1`.
    """
    return sinkhorn_output.matrix / jnp.sum(sinkhorn_output.matrix)


  def tree_flatten(self):
    """Method used by jax.tree_util to flatten a GaussianMixturePair.

    We control the subset of parameters that we will optimize in fit_gmm_pair
    by selectively placing them in either children (the parameters to optimize)
    or aux_data (the parameters to leave alone).

    Returns:
      A tuple of child pytrees and a dict of auxiliary data.
    """  # noqa: D401
    children = [self.gmm0]
    aux_data = {
        "epsilon": self.epsilon,
        "tau": self.tau,
        "lock_gmm1": self.lock_gmm1
    }
    if self.lock_gmm1:
      aux_data["gmm1"] = self.gmm1
    else:
      children.append(self.gmm1)
    return tuple(children), aux_data

  @classmethod
  def tree_unflatten(cls, aux_data, children):
    """Method used by jax.tree_util to unflatten a GaussianMixturePair.

    tree_flatten controls which parameters get optimized by placing them in
    either children or aux_data; here we invert the process.

    Args:
      aux_data: auxiliary data that is passed to the constructor as kwargs
      children: child pytrees passed to the constructor as args

    Returns:
      A GaussianMixturePair.
    """  # noqa: D401
    children = list(children)
    if "gmm1" in aux_data:
      gmm1 = aux_data.pop("gmm1")
      children.insert(1, gmm1)
    return cls(*children, **aux_data)

  def __repr__(self):
    class_name = type(self).__name__
    children, aux = self.tree_flatten()
    return "{}({})".format(
        class_name, ", ".join([repr(c) for c in children] +
                              [f"{k}: {repr(v)}" for k, v in aux.items()])
    )

  def __hash__(self):
    return jax.tree_util.tree_flatten(self).__hash__()

  def __eq__(self, other):
    return jax.tree_util.tree_flatten(self) == jax.tree_util.tree_flatten(other)