Source code for ott.solvers.linear.discrete_barycenter

# Copyright OTT-JAX
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import functools
from typing import NamedTuple, Optional, Sequence

import jax
import jax.numpy as jnp

from ott.geometry import geometry
from ott.math import fixed_point_loop
from ott.problems.linear import barycenter_problem
from ott.solvers.linear import sinkhorn

__all__ = ["SinkhornBarycenterOutput", "FixedBarycenter"]


[docs] class SinkhornBarycenterOutput(NamedTuple): # noqa: D101 f: jnp.ndarray g: jnp.ndarray histogram: jnp.ndarray errors: jnp.ndarray
[docs] @jax.tree_util.register_pytree_node_class class FixedBarycenter: """A Wasserstein barycenter solver for histograms on a common geometry. This solver uses a variant of the :class:`~ott.solvers.linear.sinkhorn.Sinkhorn` algorithm proposed in :cite:`janati:20a` to compute the barycenter of various measures supported on the same (common to all) geometry. The geometry is assumed to be either symmetric, or to describe costs between a set of points and another. In that case all reference measures have support on the first measure, whereas the barycenter is supported on the second. Args: threshold: convergence threshold. The algorithm stops when the marginal violations of all transport plans computed for that barycenter go below that threshold. norm_error: norm used to compute marginal deviation. inner_iterations: number of iterations run before recomputing errors. min_iterations: number of iterations run without checking whether termination criterion is true. max_iterations: maximal number of iterations. lse_mode: sets computations in kernel (``False``) or log-sum-exp mode. debiased: uses debiasing correction to avoid blur due to entropic regularization. """ def __init__( self, threshold: float = 1e-2, norm_error: int = 1, inner_iterations: float = 10, min_iterations: int = 0, max_iterations: int = 2000, lse_mode: bool = True, debiased: bool = False ): self.threshold = threshold self.norm_error = norm_error self.inner_iterations = inner_iterations self.min_iterations = min_iterations self.max_iterations = max_iterations self.lse_mode = lse_mode self.debiased = debiased def __call__( self, fixed_bp: barycenter_problem.FixedBarycenterProblem, dual_initialization: Optional[jnp.ndarray] = None, ) -> SinkhornBarycenterOutput: """Solve barycenter problem, possibly using clever initialization. Args: fixed_bp: Fixed barycenter problem. dual_initialization: Initial value for the g_v potential/scalings, one for each of the histograms described in ``fixed_bp``. If ``None``, use initialization from :cite:`cuturi:15`, eq. 3.6. Returns: The barycenter. """ geom = fixed_bp.geom a = fixed_bp.a num_a, num_b = geom.shape weights = fixed_bp.weights if dual_initialization is None: # initialization strategy from :cite:`cuturi:15`, (3.6). dual_initialization = geom.apply_cost(a.T, axis=0).T dual_initialization -= jnp.average( dual_initialization, weights=weights, axis=0 )[jnp.newaxis, :] if self.debiased and not geom.is_symmetric: raise ValueError("Geometry must be symmetric to use debiased option.") norm_error = (self.norm_error,) return _discrete_barycenter( geom, a, weights, dual_initialization, self.threshold, norm_error, self.inner_iterations, self.min_iterations, self.max_iterations, self.lse_mode, self.debiased, num_a, num_b ) def tree_flatten(self): # noqa: D102 aux = vars(self).copy() aux.pop("threshold") return [ self.threshold, ], aux @classmethod def tree_unflatten(cls, aux_data, children): # noqa: D102 return cls(**aux_data, threshold=children[0])
@functools.partial(jax.jit, static_argnums=(5, 6, 7, 8, 9, 10, 11, 12)) def _discrete_barycenter( geom: geometry.Geometry, a: jnp.ndarray, weights: jnp.ndarray, dual_initialization: jnp.ndarray, threshold: float, norm_error: Sequence[int], inner_iterations: int, min_iterations: int, max_iterations: int, lse_mode: bool, debiased: bool, num_a: int, num_b: int ) -> SinkhornBarycenterOutput: """Jit'able function to compute discrete barycenters.""" if lse_mode: f_u = jnp.zeros_like(a) g_v = dual_initialization else: f_u = jnp.ones_like(a) g_v = geom.scaling_from_potential(dual_initialization) # d below is as described in https://arxiv.org/abs/2006.02575. Note that # d should be considered to be equal to eps log(d) with those notations # if running in log-sum-exp mode. d = jnp.zeros((num_b,)) if lse_mode else jnp.ones((num_b,)) if lse_mode: parallel_update = jax.vmap( lambda f, g, marginal, iter: geom. update_potential(f, g, jnp.log(marginal), axis=1), in_axes=[0, 0, 0, None] ) parallel_apply = jax.vmap( lambda f_, g_, eps_: geom. apply_lse_kernel(f_, g_, eps_, vec=None, axis=0)[0], in_axes=[0, 0, None] ) else: parallel_update = jax.vmap( lambda f, g, marginal, iter: geom.update_scaling(g, marginal, axis=1), in_axes=[0, 0, 0, None] ) parallel_apply = jax.vmap( lambda f_, g_, eps_: geom.apply_kernel(f_, eps_, axis=0), in_axes=[0, 0, None] ) errors_fn = jax.vmap( functools.partial( sinkhorn.marginal_error, geom=geom, axis=1, norm_error=norm_error, lse_mode=lse_mode ), in_axes=[0, 0, 0] ) errors = -jnp.ones((max_iterations // inner_iterations + 1, len(norm_error))) const = (geom, a, weights) def cond_fn(iteration, const, state): # pylint: disable=unused-argument errors = state[0] return jnp.logical_or( iteration == 0, errors[iteration // inner_iterations - 1, 0] > threshold ) def body_fn(iteration, const, state, compute_error): geom, a, weights = const errors, d, f_u, g_v = state eps = geom._epsilon.at(iteration) # pylint: disable=protected-access f_u = parallel_update(f_u, g_v, a, iteration) # kernel_f_u stands for K times potential u if running in scaling mode, # eps log K exp f / eps in lse mode. kernel_f_u = parallel_apply(f_u, g_v, eps) # b below is the running estimate for the barycenter if running in scaling # mode, eps log b if running in lse mode. if lse_mode: b = jnp.average(kernel_f_u, weights=weights, axis=0) else: b = jnp.prod(kernel_f_u ** weights[:, jnp.newaxis], axis=0) if debiased: if lse_mode: b += d d = 0.5 * ( d + geom.update_potential( jnp.zeros((num_a,)), d, b / eps, iteration=iteration, axis=0 ) ) else: b *= d d = jnp.sqrt(d * geom.update_scaling(d, b, iteration=iteration, axis=0)) if lse_mode: g_v = b[jnp.newaxis, :] - kernel_f_u else: g_v = b[jnp.newaxis, :] / kernel_f_u # re-compute error if compute_error is True, else set to inf. err = jnp.where( jnp.logical_and(compute_error, iteration >= min_iterations), jnp.mean(errors_fn(f_u, g_v, a)), jnp.inf ) errors = errors.at[iteration // inner_iterations, :].set(err) return errors, d, f_u, g_v state = (errors, d, f_u, g_v) state = fixed_point_loop.fixpoint_iter_backprop( cond_fn, body_fn, min_iterations, max_iterations, inner_iterations, const, state ) errors, d, f_u, g_v = state kernel_f_u = parallel_apply(f_u, g_v, geom.epsilon) if lse_mode: b = jnp.average(kernel_f_u, weights=weights, axis=0) else: b = jnp.prod(kernel_f_u ** weights[:, jnp.newaxis], axis=0) if debiased: if lse_mode: b += d else: b *= d if lse_mode: b = jnp.exp(b / geom.epsilon) return SinkhornBarycenterOutput(f_u, g_v, b, errors)