Python源码示例:torch.cdist()
示例1
def euclidean_distances(X, Z=None):
"""computes the pairwise euclidean distances between the samples matrices *X* and *Z*.
Parameters
----------
X: torch tensor of shape (n_samples_1, n_features)
Z: torch tensor of shape (n_samples_2, n_features)
Returns
-------
D: torch tensor of shape (n_samples_1, n_samples_2),
the distances matrix.
"""
X, Z = check_pairwise_X_Z(X, Z)
return torch.cdist(X, Z)
示例2
def test_knn():
x = th.randn(8, 3)
kg = dgl.nn.KNNGraph(3)
d = th.cdist(x, x)
def check_knn(g, x, start, end):
for v in range(start, end):
src, _ = g.in_edges(v)
src = set(src.numpy())
i = v - start
src_ans = set(th.topk(d[start:end, start:end][i], 3, largest=False)[1].numpy() + start)
assert src == src_ans
g = kg(x)
check_knn(g, x, 0, 8)
g = kg(x.view(2, 4, 3))
check_knn(g, x, 0, 4)
check_knn(g, x, 4, 8)
kg = dgl.nn.SegmentedKNNGraph(3)
g = kg(x, [3, 5])
check_knn(g, x, 0, 3)
check_knn(g, x, 3, 8)
示例3
def order_points(pts):
pts_reorder = []
for idx, pt in enumerate(pts):
idx = torch.argsort(pt[:, 0])
xSorted = pt[idx, :]
leftMost = xSorted[:2, :]
rightMost = xSorted[2:, :]
leftMost = leftMost[torch.argsort(leftMost[:, 1]), :]
(tl, bl) = leftMost
D = torch.cdist(tl[np.newaxis], rightMost)[0]
(br, tr) = rightMost[torch.argsort(D, descending=True), :]
pts_reorder.append(torch.stack([tl, tr, br, bl]))
return torch.stack([p for p in pts_reorder])
示例4
def _euclidian(x, y):
"""
Helper function to calculate euclidian distance between torch.tensors x and y: sqrt(|x-y|**2)
Based on torch.cdist
Parameters
----------
x : torch.tensor
2D tensor of size m x f
y : torch.tensor
2D tensor of size n x f
Returns
-------
torch.tensor
2D tensor of size m x n
"""
return torch.cdist(x, y)
示例5
def _gaussian(x, y, sigma=1.0):
"""
Helper function to calculate gaussian distance between torch.tensors x and y: exp(-(|x-y|**2/2sigma**2)
Based on torch.cdist
Parameters
----------
x : torch.tensor
2D tensor of size m x f
y : torch.tensor
2D tensor of size n x f
sigma: float, default=1.0
scaling factor for gaussian kernel
Returns
-------
torch.tensor
2D tensor of size m x n
"""
d2 = _euclidian(x, y) ** 2
result = torch.exp(-d2 / (2 * sigma * sigma))
return result
示例6
def _sample(self, features: Tensor, labels: List[int]) -> TTripletsIds:
"""
This method samples the hardest triplets inside the batch.
Args:
features: has the shape of [batch_size, feature_size]
labels: labels of the samples in the batch
Returns:
the batch of the triplets in the order below:
(anchor, positive, negative)
"""
assert features.shape[0] == len(labels)
if self._need_norm:
features = normalize(samples=features)
dist_mat = torch.cdist(x1=features, x2=features, p=2)
ids_anchor, ids_pos, ids_neg = self._sample_from_distmat(
distmat=dist_mat, labels=labels
)
return ids_anchor, ids_pos, ids_neg
示例7
def batched_l1_dist(a, b):
res = th.cdist(a, b, p=1)
return res
示例8
def score_emb(self, s_emb, p_emb, o_emb, combine: str):
n = p_emb.size(0)
if combine == "spo":
out = -F.pairwise_distance(s_emb + p_emb, o_emb, p=self._norm)
elif combine == "sp_":
out = -torch.cdist(s_emb + p_emb, o_emb, p=self._norm)
elif combine == "_po":
out = -torch.cdist(o_emb - p_emb, s_emb, p=self._norm)
else:
out = super().score_emb(s_emb, p_emb, o_emb, combine)
return out.view(n, -1)
示例9
def assign_by_euclidian_at_k(X, T, k):
"""
X : [nb_samples x nb_features], e.g. 100 x 64 (embeddings)
k : for each sample, assign target labels of k nearest points
"""
distances = torch.cdist(X, X)
# get nearest points
indices = distances.topk(k + 1, largest=False)[1][:, 1: k + 1]
return np.array([[T[i] for i in ii] for ii in indices])
示例10
def forward(self, X, T):
P = F.normalize(self.proxies, p = 2, dim = -1) * self.scaling_p
X = F.normalize(X, p = 2, dim = -1) * self.scaling_x
D = torch.cdist(X, P) ** 2
T = binarize_and_smooth_labels(T, len(P), self.smoothing_const)
# note that compared to proxy nca, positive included in denominator
loss = torch.sum(-T * F.log_softmax(-D, -1), -1)
return loss.mean()
示例11
def compute_indices(inputs_orig, codebook):
bi = []
SZ = 10000
for i in range(0, inputs_orig.size(0), SZ):
inputs = inputs_orig[i:i + SZ]
# NxK
distances_matrix = torch.cdist(inputs, codebook)
# Nx1
indic = torch.min(distances_matrix, dim=-1)[1].unsqueeze(1)
bi.append(indic)
return torch.cat(bi, dim=0)
示例12
def cdist(X, Y=None, quadratic_expansion=False):
if quadratic_expansion:
return _dist(X, Y, _euclidian_fast)
else:
return _dist(X, Y, _euclidian)
示例13
def squared_euclidian_distance(a, b):
return torch.cdist(a, b)**2
示例14
def cluster(self, lr=0.5, max_iter_p=10, max_iter_h=3000, lr_decay=200, early_stop=-1):
""" compute Wasserstein clustering
Args:
reg_type (int): specify regulazation term, 0 means no regularization
reg (int): regularization weight
max_iter_p (int): max num of iteration of clustering
max_iter_h (int): max num of updating h
lr (float): GD learning rate
lr_decay (float): learning rate decay
Returns:
idx (pytorch Tensor): assignment of e to p
pred_label_e (pytorch Tensor): labels of e that come from nearest p
See Also
--------
update_p : update p
update_map: compute optimal transportation
"""
e_idx, pred_label_e = None, None
for iter_p in range(max_iter_p):
dist = torch.cdist(self.data_p, self.data_e) ** 2
e_idx, pred_label_e = self.update_map(dist, max_iter_h, lr=lr, lr_decay=lr_decay, early_stop=early_stop)
if self.update_p(e_idx, iter_p):
break
return e_idx, pred_label_e
示例15
def cluster(self, reg_type=0, reg=0.01, lr=0.5, max_iter_p=10, max_iter_h=3000, lr_decay=200, early_stop=-1):
""" compute Wasserstein clustering
Args:
reg_type (int): specify regulazation term, 0 means no regularization
reg (float): regularization weight
lr (float): GD learning rate
max_iter_p (int): max num of iteration of clustering
max_iter_h (int): max num of updating h
lr_decay (int): learning rate decay interval
See Also
--------
update_p : update p
update_map: compute optimal transportation
"""
self.data_p.requires_grad_(True)
e_idx, pred_label_e = None, None
for iter_p in range(max_iter_p):
dist = torch.cdist(self.data_p, self.data_e) ** 2
e_idx, pred_label_e = self.update_map(dist, max_iter_h, lr=lr, lr_decay=lr_decay, early_stop=early_stop)
# reg = reg / 20 * (20 - iter_p)
# reg /= 1
if self.update_p(e_idx, iter_p, reg_type, reg):
break
return e_idx, pred_label_e
示例16
def __init__(self, data, sampling='unisquare', label=None, weight_p=None, thres=1e-5, ratio=100, verbose=True, device='cpu'):
""" set up parameters
Args:
thres float: threshold to break loops
ratio float: the ratio of num of e to the num of p
data pytorch Tensor: initial coordinates of p
label pytorch Tensor: labels of p
mass_p pytorch Tensor: weights of p
Atts:
thres float: Threshold to break loops
lr float: Learning rate
verbose bool: console output verbose flag
y pytorch floattensor: coordinates of p
label_y pytorch inttensor: labels of p
mass_p pytorch floattensor: mass of clusters of p
weight_p pytorch floattensor: dirac measure of p
"""
if not isinstance(data, torch.Tensor):
raise Exception('input is not a pytorch tensor')
if label and not isinstance(label, torch.Tensor):
raise Exception('label is neither a numpy array not a pytorch tensor')
if weight_p and not isinstance(weight_p, torch.Tensor):
raise Exception('label is neither a numpy array not a pytorch tensor')
self.data_p = data
self.data_p_original = self.data_p.clone()
num_p = data.shape[0]
self.label_p = label
self.weight_p = weight_p if weight_p is not None else torch.ones(num_p).double().to(device) / num_p
self.thres = thres
self.verbose = verbose
self.ratio = ratio
self.device = device
utils.assert_boundary(self.data_p)
num_e = int(self.ratio * num_p)
dim = self.data_p.shape[1]
self.data_e, _ = utils.random_sample(num_e, dim, sampling=sampling)
self.data_e = torch.from_numpy(self.data_e).double().to(self.device)
self.dist = torch.cdist(self.data_p, self.data_e, p=2).double().to(self.device)**2
示例17
def cluster(self, lr=0.5, max_iter_p=10, max_iter_h=3000, lr_decay=200, early_stop=-1, beta=0, reg=0.):
""" compute Wasserstein clustering
Args:
reg_type (int): specify regulazation term, 0 means no regularization
reg (int): regularization weight
max_iter_p (int): max num of iteration of clustering
max_iter_h (int): max num of updating h
lr (float): GD learning rate
lr_decay (float): learning rate decay
reg (float): for regularized k-means
Returns:
idx (pytorch Tensor): assignment of e to p
pred_label_e (pytorch Tensor): labels of e that come from nearest p
See Also
--------
update_p : update p
update_map: compute optimal transportation
"""
e_idx_return, pred_label_e_return = [], []
n = len(self.data_e)
dhss = []
e_idxss = []
for iter_p in range(max_iter_p):
e_idx, pred_label_e = [], []
for i in range(n):
# if self.verbose:
print("solving marginal #" + str(i))
dist = (torch.cdist(self.data_p, self.data_e[i]) ** 2).double().to(self.device)
idx, pred_label, dhs, e_idxs = self.update_map(i, dist, max_iter_h, lr=lr, lr_decay=lr_decay, beta=beta, early_stop=early_stop, reg=reg)
dhss.append(dhs)
e_idxss.append(e_idxs)
e_idx.append(idx)
pred_label_e.append(pred_label)
if self.update_p(e_idx, iter_p):
e_idx_return, pred_label_e_return = e_idx, pred_label_e
break
if iter_p == max_iter_p - 1:
e_idx_return, pred_label_e_return = e_idx, pred_label_e
output = dict()
output['idx'] = e_idx_return
output['pred_label_e'] = pred_label_e_return
output['dhss'] = dhss
output['idxs'] = e_idxss
# compute WD
wd = 0
for e_idx, data_e, weight_e in zip(e_idx_return, self.data_e, self.weight_e):
tmp = self.data_p[e_idx, :]
tmp -= data_e
tmp = tmp ** 2
wd += torch.sum(torch.sum(tmp, dim=1) * weight_e)
output['wd'] = 2 * wd
return output
示例18
def cluster(self, reg_type=0, reg=0.01, lr=0.5, max_iter_p=10, max_iter_h=3000, lr_decay=200, early_stop=-1, beta=0):
""" compute Wasserstein clustering
Args:
reg_type (int): specify regulazation term, 0 means no regularization
reg (int): regularization weight
max_iter_p (int): max num of iteration of clustering
max_iter_h (int): max num of updating h
lr (float): GD learning rate
lr_decay (float): learning rate decay
Returns:
idx (pytorch Tensor): assignment of e to p
pred_label_e (pytorch Tensor): labels of e that come from nearest p
See Also
--------
update_p : update p
update_map: compute optimal transportation
"""
e_idx_return, pred_label_e_return = [], []
n = len(self.data_e)
dhss = []
e_idxss = []
for iter_p in range(max_iter_p):
e_idx, pred_label_e = [], []
for i in range(n):
# if self.verbose:
print("solving marginal #" + str(i))
dist = (torch.cdist(self.data_p, self.data_e[i]) ** 2).double().to(self.device)
idx, pred_label, dhs, e_idxs = self.update_map(i, dist, max_iter_h, lr=lr, lr_decay=lr_decay, beta=beta, early_stop=early_stop)
dhss.append(dhs)
e_idxss.append(e_idxs)
e_idx.append(idx)
pred_label_e.append(pred_label)
if self.update_p(e_idx, iter_p, reg=reg):
e_idx_return, pred_label_e_return = e_idx, pred_label_e
break
if iter_p == max_iter_p - 1:
e_idx_return, pred_label_e_return = e_idx, pred_label_e
output = dict()
output['idx'] = e_idx_return
output['pred_label_e'] = pred_label_e_return
output['dhss'] = dhss
output['idxs'] = e_idxss
# compute WD
wd = 0
for e_idx, data_e, weight_e in zip(e_idx_return, self.data_e, self.weight_e):
tmp = self.data_p[e_idx, :]
tmp -= data_e
tmp = tmp ** 2
wd += torch.sum(torch.sum(tmp, dim=1) * weight_e)
output['wd'] = 2 * wd
return output
示例19
def cluster(self, lr=0.5, max_iter_p=10, max_iter_h=3000, lr_decay=200, early_stop=-1, beta=0):
""" compute Wasserstein clustering
Args:
reg_type (int): specify regulazation term, 0 means no regularization
reg (int): regularization weight
max_iter_p (int): max num of iteration of clustering
max_iter_h (int): max num of updating h
lr (float): GD learning rate
lr_decay (float): learning rate decay
Returns:
idx (pytorch Tensor): assignment of e to p
pred_label_e (pytorch Tensor): labels of e that come from nearest p
See Also
--------
update_p : update p
update_map: compute optimal transportation
"""
dhss = []
e_idxss = []
lrs = [lr / torch.abs(weight - self.weight_p_sum) for weight in self.weight_e_sum]
e_idx_return, pred_label_e_return = [], []
n = len(self.data_e)
for iter_p in range(max_iter_p):
e_idx, pred_label_e = [], []
for i in range(n):
lr = lrs[i]
# if self.verbose:
print("solving marginal #" + str(i))
dist = torch.cdist(self.data_p, self.data_e[i]) ** 2
idx, pred_label, dhs, e_idxs = self.update_map(i, dist, max_iter_h, lr=lr, lr_decay=lr_decay, beta=beta, early_stop=early_stop)
dhss.append(dhs)
e_idxss.append(e_idxs)
# pred_label_es.append(pred_label)
e_idx.append(idx)
pred_label_e.append(pred_label)
if self.update_p(e_idx, iter_p):
e_idx_return, pred_label_e_return = e_idx, pred_label_e
break
if iter_p == max_iter_p - 1:
e_idx_return, pred_label_e_return = e_idx, pred_label_e
output = dict()
output['idx'] = e_idx_return
output['pred_label_e'] = pred_label_e_return
output['dhss'] = dhss
output['idxs'] = e_idxss
return output
示例20
def cluster(self, reg_type=0, reg=0.01, lr=0.5, max_iter_p=10, max_iter_h=3000, lr_decay=200, early_stop=-1, beta=0):
""" compute Wasserstein clustering
Args:
reg_type (int): specify regulazation term, 0 means no regularization
reg (int): regularization weight
max_iter_p (int): max num of iteration of clustering
max_iter_h (int): max num of updating h
lr (float): GD learning rate
lr_decay (float): learning rate decay
Returns:
e_idx (pytorch Tensor): assignment of e to p
pred_label_e (pytorch Tensor): labels of e that come from nearest p
See Also
--------
update_y : update p
update_map: compute optimal transportation
"""
e_idx_return, pred_label_e_return = [], []
n = len(self.data_e)
# dhss = []
# e_idxss = []
for iter_p in range(max_iter_p):
e_idx, pred_label_e = [], []
for i in range(n):
# if self.verbose:
print("solving marginal #" + str(i))
dist = (torch.cdist(self.data_p, self.data_e[i]) ** 2).to(self.device)
output = self.update_map(i, dist, max_iter_h, lr=lr, lr_decay=lr_decay, beta=beta, early_stop=early_stop)
# dhss.append(dhs)
# e_idxss.append(e_idxs)
# e_idx.append(idx)
# pred_label_e.append(pred_label)
e_idx.append(output['e_idx'])
if self.update_e(e_idx, iter_p):
# e_idx_return, pred_label_e_return = e_idx, pred_label_e
break
# if iter_p == max_iter_p - 1:
# e_idx_return, pred_label_e_return = e_idx, pred_label_e
output = dict()
# output['e_idx'] = e_idx_return
# output['pred_label_e'] = pred_label_e_return
# output['dhss'] = dhss
# output['e_idxss'] = e_idxss
# compute WD
wd = 0
for e_idx, data_e, weight_e in zip(e_idx_return, self.data_e, self.weight_e):
tmp = self.data_p[e_idx, :]
tmp -= data_e
tmp = tmp ** 2
wd += torch.sum(torch.sum(tmp, dim=1) * weight_e)
output['wd'] = 2 * wd
return output