def layer_rotation(current_params, init_params):
>>> import netharn as nh
>>> model = nh.models.ToyNet2d()
>>> model2 = nh.models.ToyNet2d()
>>> init_params = _get_named_params(model)
>>> current_params = _get_named_params(model2)
>>> ret = layer_rotation(current_params, init_params)
ret = []
for (n1, p1), (n2, p2) in zip(current_params, init_params):
assert n1 == n2, "{} vs {}".format(n1, n2)
sim = torch.cosine_similarity(p1.reshape(-1), p2.reshape(-1), dim=0).item()
dist = 1.0 - sim
ret.append((n1, dist))
return ret
def forward(self, sentence_features: Iterable[Dict[str, Tensor]], labels: Tensor):
reps = [self.model(sentence_feature)['sentence_embedding'] for sentence_feature in sentence_features]
rep_a, rep_b = reps
output = torch.cosine_similarity(rep_a, rep_b)
loss_fct = nn.MSELoss()
if labels is not None:
loss = loss_fct(output, labels.view(-1))
return loss
return reps, output
def cluster(data, k, temp, num_iter, init = None, cluster_temp=5):
pytorch (differentiable) implementation of soft k-means clustering.
#normalize x so it lies on the unit sphere
data = torch.diag(1./torch.norm(data, p=2, dim=1)) @ data
#use kmeans++ initialization if nothing is provided
if init is None:
data_np = data.detach().numpy()
norm = (data_np**2).sum(axis=1)
init = sklearn.cluster.k_means_._k_init(data_np, k, norm, sklearn.utils.check_random_state(None))
init = torch.tensor(init, requires_grad=True)
if num_iter == 0: return init
mu = init
n = data.shape[0]
d = data.shape[1]
# data = torch.diag(1./torch.norm(data, dim=1, p=2))@data
for t in range(num_iter):
#get distances between all data points and cluster centers
# dist = torch.cosine_similarity(data[:, None].expand(n, k, d).reshape((-1, d)), mu[None].expand(n, k, d).reshape((-1, d))).reshape((n, k))
dist = data @ mu.t()
#cluster responsibilities via softmax
r = torch.softmax(cluster_temp*dist, 1)
#total responsibility of each cluster
cluster_r = r.sum(dim=0)
#mean of points in each cluster weighted by responsibility
cluster_mean = (r.t().unsqueeze(1) @ data.expand(k, *data.shape)).squeeze(1)
#update cluster means
new_mu = torch.diag(1/cluster_r) @ cluster_mean
mu = new_mu
dist = data @ mu.t()
r = torch.softmax(cluster_temp*dist, 1)
return mu, r, dist
def forward_gmmn(self, visual_features, semantic_features, class_id, words, metrics):
loss = mmd(real=visual_features, fake=semantic_features, **self.gmmn_config["mmd"])
if self.gmmn_config.get("old_mmd") and self._old_word_embeddings is not None:
old_unseen_limit = self._n_classes - self._task_size
if not self.gmmn_config["old_mmd"].get(
"apply_unseen", False
) and class_id >= old_unseen_limit:
return loss
with torch.no_grad():
old_semantic_features = self._old_word_embeddings(words)
factor = self.gmmn_config["old_mmd"]["factor"]
_type = self.gmmn_config["old_mmd"].get("type", "mmd")
if _type == "mmd":
old_loss = factor * mmd(
real=old_semantic_features, fake=semantic_features, **self.gmmn_config["mmd"]
elif _type == "kl":
old_loss = factor * F.kl_div(
semantic_features, old_semantic_features, reduction="batchmean"
elif _type == "l2":
old_loss = factor * torch.pairwise_distance(
semantic_features, old_semantic_features, p=2
elif _type == "cosine":
old_loss = factor * (
1 - torch.cosine_similarity(semantic_features, old_semantic_features)
raise ValueError(f"Unknown distillation: {_type}.")
if self.gmmn_config.get("scheduled"):
old_loss = old_loss * math.sqrt(self._n_classes / self._task_size)
metrics["old"] += old_loss.item()
return loss + old_loss
return loss
def semantic_regularization(
features, targets, similarity_matrix, margin=None, aggreg="mean", factor=1.0, metric="cosine"
pair_indexes = []
np_targets = targets.cpu().numpy()
for index, target in enumerate(np_targets):
neg_indexes = np.where(np_targets != target)[0]
neg_index = np.random.choice(neg_indexes)
pair_indexes.append(tuple(sorted((index, neg_index))))
pair_indexes_ = list(set(pair_indexes))
pair_indexes = torch.tensor(pair_indexes_).long()
left = features[pair_indexes[..., 0]]
right = features[pair_indexes[..., 1]]
if metric == "cosine":
similarities = F.cosine_similarity(left, right)
if margin is not None:
margins = torch.ones_like(similarities) * margin
margins = similarity_matrix[targets[pair_indexes[..., 0]], targets[pair_indexes[...,
hinges = torch.clamp(similarities - margins, min=0.)
return factor * _aggreg(hinges, aggreg, features_dim=features.shape[1])
elif metric == "gor":
similarities = torch.sum(torch.mul(left, right), 1)
return factor * _aggreg(similarities, aggreg, features_dim=features.shape[1])
elif metric == "snr":
noise = left - right
var_noise = noise.var(axis=1, unbiased=True)
var_anchor = right.var(axis=1, unbiased=True)
dist = torch.mean(var_anchor / var_noise)
return factor * dist
raise NotImplementedError(f"Unknown metric: {metric}.")
def _pair_distance(a, b, distance_type="l2"):
if distance_type == "l2":
return F.pairwise_distance(a, b, p=2)
if distance_type == "l2squared":
return torch.pow(F.pairwise_distance(a, b, p=2), 2)
elif distance_type == "l1":
return F.pairwise_distance(a, b, p=1)
elif distance_type == "cosine":
return 1 - torch.cosine_similarity(a, b)
raise ValueError("Unknown distance type {}.".format(distance_type))