From a2a9903955d0d2565e6e9ae9fd1d13c91e2da889 Mon Sep 17 00:00:00 2001
From: Xiaosu Zhu <aurora.studio@outlook.com>
Date: Fri, 1 Nov 2019 09:17:25 +0800
Subject: [PATCH] Add files via upload

---
 TripletLoss.py | 191 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 191 insertions(+)
 create mode 100644 TripletLoss.py

diff --git a/TripletLoss.py b/TripletLoss.py
new file mode 100644
index 0000000..9a88f00
--- /dev/null
+++ b/TripletLoss.py
@@ -0,0 +1,191 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import logging_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import script_ops
+from tensorflow.python.ops import sparse_ops
+from tensorflow.python.summary import summary
+from sklearn import metrics
+
+
+def pairwise_distance(feature, squared=False):
+    """Computes the pairwise distance matrix with numerical stability.
+    output[i, j] = || feature[i, :] - feature[j, :] ||_2
+    Args:
+      feature: 2-D Tensor of size [number of data, feature dimension].
+      squared: Boolean, whether or not to square the pairwise distances.
+    Returns:
+      pairwise_distances: 2-D Tensor of size [number of data, number of data].
+    """
+    pairwise_distances_squared = math_ops.add(
+        math_ops.reduce_sum(math_ops.square(feature), axis=[1], keepdims=True),
+        math_ops.reduce_sum(
+            math_ops.square(array_ops.transpose(feature)),
+            axis=[0],
+            keepdims=True)) - 2.0 * math_ops.matmul(feature,
+                                                    array_ops.transpose(feature))
+
+    # Deal with numerical inaccuracies. Set small negatives to zero.
+    pairwise_distances_squared = math_ops.maximum(pairwise_distances_squared, 0.0)
+    # Get the mask where the zero distances are at.
+    error_mask = math_ops.less_equal(pairwise_distances_squared, 0.0)
+
+    # Optionally take the sqrt.
+    if squared:
+        pairwise_distances = pairwise_distances_squared
+    else:
+        pairwise_distances = math_ops.sqrt(
+            pairwise_distances_squared + math_ops.to_float(error_mask) * 1e-16)
+
+    # Undo conditionally adding 1e-16.
+    pairwise_distances = math_ops.multiply(
+        pairwise_distances, math_ops.to_float(math_ops.logical_not(error_mask)))
+
+    num_data = array_ops.shape(feature)[0]
+    # Explicitly set diagonals to zero.
+    mask_offdiagonals = array_ops.ones_like(pairwise_distances) - array_ops.diag(
+        array_ops.ones([num_data]))
+    pairwise_distances = math_ops.multiply(pairwise_distances, mask_offdiagonals)
+    return pairwise_distances
+
+def pairwise_cosine_distance(feature):
+    # normalize each row
+    normalized = nn.l2_normalize(feature, axis = 1)
+
+    # multiply row i with row j using transpose
+    # element wise product
+    prod = math_ops.matmul(normalized, normalized,
+                     adjoint_b = True # transpose second matrix
+                     )
+
+    dist = 1 - prod
+    return dist
+
+def _build_multilabel_adjacency(labels):
+    """
+      Since that we assume labels share at least one concepts are similar and don't
+      share any concepts are dissimilar, so we can compute c @ c.T, and zero elements
+      are dissimilar pairs, otherwise similar.
+    :param labels: labels of [batch_size, class_num]
+    :return: a [batch_size, batch_size] adjacency matrix
+    """
+    adj = labels @ array_ops.transpose(labels)
+    return math_ops.greater(adj, 0)
+
+def masked_maximum(data, mask, dim=1):
+    """Computes the axis wise maximum over chosen elements.
+    Args:
+      data: 2-D float `Tensor` of size [n, m].
+      mask: 2-D Boolean `Tensor` of size [n, m].
+      dim: The dimension over which to compute the maximum.
+    Returns:
+      masked_maximums: N-D `Tensor`.
+        The maximized dimension is of size 1 after the operation.
+    """
+    axis_minimums = math_ops.reduce_min(data, dim, keepdims=True)
+    masked_maximums = math_ops.reduce_max(
+        math_ops.multiply(data - axis_minimums, mask), dim,
+        keepdims=True) + axis_minimums
+    return masked_maximums
+
+
+def masked_minimum(data, mask, dim=1):
+    """Computes the axis wise minimum over chosen elements.
+    Args:
+      data: 2-D float `Tensor` of size [n, m].
+      mask: 2-D Boolean `Tensor` of size [n, m].
+      dim: The dimension over which to compute the minimum.
+    Returns:
+      masked_minimums: N-D `Tensor`.
+        The minimized dimension is of size 1 after the operation.
+    """
+    axis_maximums = math_ops.reduce_max(data, dim, keepdims=True)
+    masked_minimums = math_ops.reduce_min(
+        math_ops.multiply(data - axis_maximums, mask), dim,
+        keepdims=True) + axis_maximums
+    return masked_minimums
+
+
+def triplet_semihard_loss_multilabel(labels, embeddings, use_cos=False, margin=1.0):
+    """Computes the triplet loss with semi-hard negative mining.
+    The loss encourages the positive distances (between a pair of embeddings with
+    the same labels) to be smaller than the minimum negative distance among
+    which are at least greater than the positive distance plus the margin constant
+    (called semi-hard negative) in the mini-batch. If no such negative exists,
+    uses the largest negative distance instead.
+    See: https://arxiv.org/abs/1503.03832.
+    Args:
+      labels: tensor of shape [batch_size, class_num] for multi-label samples
+      embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should
+        be l2 normalized.
+      use_cos: metric of embedding, cosine similarity or l2 distance
+      margin: Float, margin term in the loss definition.
+    Returns:
+      triplet_loss: tf.float32 scalar.
+    """
+    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
+
+    # Build pairwise squared distance matrix.
+    pdist_matrix = pairwise_cosine_distance(embeddings) if use_cos else pairwise_distance(embeddings, squared=True)
+    # Build pairwise binary adjacency matrix.
+    adjacency = _build_multilabel_adjacency(labels)
+    # Invert so we can select negatives only.
+    adjacency_not = math_ops.logical_not(adjacency)
+
+    batch_size = labels.get_shape().as_list()[0]
+
+    # Compute the mask.
+    pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
+    mask = math_ops.logical_and(
+        array_ops.tile(adjacency_not, [batch_size, 1]),
+        math_ops.greater(
+            pdist_matrix_tile, array_ops.reshape(
+                array_ops.transpose(pdist_matrix), [-1, 1])))
+    mask_final = array_ops.reshape(
+        math_ops.greater(
+            math_ops.reduce_sum(
+                math_ops.cast(mask, dtype=dtypes.float32), 1, keepdims=True),
+            0.0), [batch_size, batch_size])
+    mask_final = array_ops.transpose(mask_final)
+
+    adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32)
+    mask = math_ops.cast(mask, dtype=dtypes.float32)
+
+    # negatives_outside: smallest D_an where D_an > D_ap.
+    negatives_outside = array_ops.reshape(
+        masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
+    negatives_outside = array_ops.transpose(negatives_outside)
+
+    # negatives_inside: largest D_an.
+    negatives_inside = array_ops.tile(
+        masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])
+    semi_hard_negatives = array_ops.where(
+        mask_final, negatives_outside, negatives_inside)
+
+    loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives)
+
+    mask_positives = math_ops.cast(
+        adjacency, dtype=dtypes.float32) - array_ops.diag(
+        array_ops.ones([batch_size]))
+
+    # In lifted-struct, the authors multiply 0.5 for upper triangular
+    #   in semihard, they take all positive pairs except the diagonal.
+    num_positives = math_ops.reduce_sum(mask_positives)
+
+    triplet_loss = math_ops.truediv(
+        math_ops.reduce_sum(
+            math_ops.maximum(
+                math_ops.multiply(loss_mat, mask_positives), 0.0)),
+        num_positives,
+        name='triplet_semihard_loss')
+
+    return triplet_loss