Python源码示例:torch.diagonal()
示例1
def _components(self) -> Dict[Tuple[str, str, str], Tuple[Tensor, Tensor]]:
states_per_measure = defaultdict(list)
for state_belief in self.state_beliefs:
for m, measure in enumerate(self.design.measures):
H = state_belief.H[:, m, :].data
m = H * state_belief.means.data
std = H * torch.diagonal(state_belief.covs.data, dim1=-2, dim2=-1).sqrt()
states_per_measure[measure].append((m, std))
out = {}
for measure, means_and_stds in states_per_measure.items():
means, stds = zip(*means_and_stds)
means = torch.stack(means).permute(1, 0, 2)
stds = torch.stack(stds).permute(1, 0, 2)
for s, (process_name, state_element) in enumerate(self.design.state_elements):
if ~torch.isclose(means[:, :, s].abs().max(), torch.zeros(1)):
out[(measure, process_name, state_element)] = (means[:, :, s], stds[:, :, s])
return out
示例2
def test_lkj_covariance_prior_log_prob(self, cuda=False):
device = torch.device("cuda") if cuda else torch.device("cpu")
sd_prior = SmoothedBoxPrior(exp(-1), exp(1))
if cuda:
sd_prior = sd_prior.cuda()
prior = LKJCovariancePrior(2, torch.tensor(0.5, device=device), sd_prior)
S = torch.eye(2, device=device)
self.assertAlmostEqual(prior.log_prob(S).item(), -3.59981, places=4)
S = torch.stack([S, torch.tensor([[1.0, 0.5], [0.5, 1]], device=S.device)])
self.assertTrue(approx_equal(prior.log_prob(S), torch.tensor([-3.59981, -3.45597], device=S.device)))
with self.assertRaises(ValueError):
prior.log_prob(torch.eye(3, device=device))
# For eta=1.0 log_prob is flat over all covariance matrices
prior = LKJCovariancePrior(2, torch.tensor(1.0, device=device), sd_prior)
marginal_sd = torch.diagonal(S, dim1=-2, dim2=-1).sqrt()
log_prob_expected = prior.correlation_prior.C + prior.sd_prior.log_prob(marginal_sd)
self.assertTrue(approx_equal(prior.log_prob(S), log_prob_expected))
示例3
def test_lkj_covariance_prior_log_prob_hetsd(self, cuda=False):
device = torch.device("cuda") if cuda else torch.device("cpu")
a = torch.tensor([exp(-1), exp(-2)], device=device)
b = torch.tensor([exp(1), exp(2)], device=device)
sd_prior = SmoothedBoxPrior(a, b)
prior = LKJCovariancePrior(2, torch.tensor(0.5, device=device), sd_prior)
S = torch.eye(2, device=device)
self.assertAlmostEqual(prior.log_prob(S).item(), -4.71958, places=4)
S = torch.stack([S, torch.tensor([[1.0, 0.5], [0.5, 1]], device=S.device)])
self.assertTrue(approx_equal(prior.log_prob(S), torch.tensor([-4.71958, -4.57574], device=S.device)))
with self.assertRaises(ValueError):
prior.log_prob(torch.eye(3, device=device))
# For eta=1.0 log_prob is flat over all covariance matrices
prior = LKJCovariancePrior(2, torch.tensor(1.0, device=device), sd_prior)
marginal_sd = torch.diagonal(S, dim1=-2, dim2=-1).sqrt()
log_prob_expected = prior.correlation_prior.C + prior.sd_prior.log_prob(marginal_sd)
self.assertTrue(approx_equal(prior.log_prob(S), log_prob_expected))
示例4
def _is_valid_correlation_matrix(Sigma, tol=1e-6):
"""Check if supplied matrix is a valid correlation matrix
A matrix is a valid correlation matrix if it is positive semidefinite, and
if all diagonal elements are equal to 1.
Args:
Sigma: A n x n correlation matrix, or a batch of b correlation matrices
with shape b x n x n
tol: The tolerance with which to check unit value of the diagonal elements
Returns:
True if Sigma is a valid correlation matrix, False otherwise (in batch
mode, all matrices in the batch need to be valid correlation matrices)
"""
pdef = torch.all(constraints.positive_definite.check(Sigma))
return pdef and all(torch.all(torch.abs(S.diag() - 1) < tol) for S in Sigma.view(-1, *Sigma.shape[-2:]))
示例5
def _is_valid_correlation_matrix_cholesky_factor(L, tol=1e-6):
"""Check if supplied matrix is a Cholesky factor of a valid correlation matrix
A matrix is a Cholesky fator of a valid correlation matrix if it is lower
triangular, has positive diagonal, and unit row-sum
Args:
L: A n x n lower-triangular matrix, or a batch of b lower-triangular
matrices with shape b x n x n
tol: The tolerance with which to check positivity of the diagonal and
unit-sum of the rows
Returns:
True if L is a Cholesky factor of a valid correlation matrix, False
otherwise (in batch mode, all matrices in the batch need to be
Cholesky factors of valid correlation matrices)
"""
unit_row_length = torch.all((torch.norm(L, dim=-1) - 1).abs() < tol)
return unit_row_length and torch.all(constraints.lower_cholesky.check(L))
示例6
def deprecate_task_noise_corr(state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs):
if prefix + "task_noise_corr_factor" in state_dict:
# Remove after 1.0
warnings.warn(
"Loading a deprecated parameterization of _MultitaskGaussianLikelihoodBase. Consider re-saving your model.",
OldVersionWarning,
)
# construct the task correlation matrix from the factors using the old parameterization
corr_factor = state_dict.pop(prefix + "task_noise_corr_factor").squeeze(0)
corr_diag = state_dict.pop(prefix + "task_noise_corr_diag").squeeze(0)
num_tasks, rank = corr_factor.shape[-2:]
M = corr_factor.matmul(corr_factor.transpose(-1, -2))
idx = torch.arange(M.shape[-1], dtype=torch.long, device=M.device)
M[..., idx, idx] += corr_diag
sem_inv = 1 / torch.diagonal(M, dim1=-2, dim2=-1).sqrt().unsqueeze(-1)
C = M * sem_inv.matmul(sem_inv.transpose(-1, -2))
# perform a Cholesky decomposition and extract the required entries
L = torch.cholesky(C)
tidcs = torch.tril_indices(num_tasks, rank)[:, 1:]
task_noise_corr = L[..., tidcs[0], tidcs[1]]
state_dict[prefix + "task_noise_corr"] = task_noise_corr
示例7
def lognorm_to_norm(mu: Tensor, Cov: Tensor) -> Tuple[Tensor, Tensor]:
"""Compute mean and covariance of a MVN from those of the associated log-MVN
If `Y` is log-normal with mean mu_ln and covariance Cov_ln, then
`X ~ N(mu_n, Cov_n)` with
Cov_n_{ij} = log(1 + Cov_ln_{ij} / (mu_ln_{i} * mu_n_{j}))
mu_n_{i} = log(mu_ln_{i}) - 0.5 * log(1 + Cov_ln_{ii} / mu_ln_{i}**2)
Args:
mu: A `batch_shape x n` mean vector of the log-Normal distribution.
Cov: A `batch_shape x n x n` covariance matrix of the log-Normal
distribution.
Returns:
A two-tuple containing:
- The `batch_shape x n` mean vector of the Normal distribution
- The `batch_shape x n x n` covariance matrix of the Normal distribution
"""
Cov_n = torch.log(1 + Cov / (mu.unsqueeze(-1) * mu.unsqueeze(-2)))
mu_n = torch.log(mu) - 0.5 * torch.diagonal(Cov_n, dim1=-1, dim2=-2)
return mu_n, Cov_n
示例8
def norm_to_lognorm(mu: Tensor, Cov: Tensor) -> Tuple[Tensor, Tensor]:
"""Compute mean and covariance of a log-MVN from its MVN sufficient statistics
If `X ~ N(mu, Cov)` and `Y = exp(X)`, then `Y` is log-normal with
mu_ln_{i} = exp(mu_{i} + 0.5 * Cov_{ii})
Cov_ln_{ij} = exp(mu_{i} + mu_{j} + 0.5 * (Cov_{ii} + Cov_{jj})) *
(exp(Cov_{ij}) - 1)
Args:
mu: A `batch_shape x n` mean vector of the Normal distribution.
Cov: A `batch_shape x n x n` covariance matrix of the Normal distribution.
Returns:
A two-tuple containing:
- The `batch_shape x n` mean vector of the log-Normal distribution.
- The `batch_shape x n x n` covariance matrix of the log-Normal
distribution.
"""
diag = torch.diagonal(Cov, dim1=-1, dim2=-2)
b = mu + 0.5 * diag
mu_ln = torch.exp(b)
Cov_ln = (torch.exp(Cov) - 1) * torch.exp(b.unsqueeze(-1) + b.unsqueeze(-2))
return mu_ln, Cov_ln
示例9
def exact_matrix_logarithm_trace(Fx, x):
"""
Computes slow-ass Tr(Ln(d(Fx)/dx))
:param Fx: output of f(x)
:param x: input
:return: Tr(Ln(I + df/dx))
"""
bs = Fx.size(0)
outVector = torch.sum(Fx, 0).view(-1)
outdim = outVector.size()[0]
indim = x.view(bs, -1).size()
jac = torch.empty([bs, outdim, indim[1]], dtype=torch.float)
# for each output Fx[i] compute d(Fx[i])/d(x)
for i in range(outdim):
zero_gradients(x)
jac[:, i, :] = torch.autograd.grad(outVector[i], x,
retain_graph=True)[0].view(bs, outdim)
jac = jac.cpu().numpy()
iden = np.eye(jac.shape[1])
log_jac = np.stack([logm(jac[i] + iden) for i in range(bs)])
trace_jac = np.diagonal(log_jac, axis1=1, axis2=2).sum(1)
return trace_jac
示例10
def power_series_full_jac_exact_trace(Fx, x, k):
"""
Fast-boi Tr(Ln(d(Fx)/dx)) using power-series approximation with full
jacobian and exact trace
:param Fx: output of f(x)
:param x: input
:param k: number of power-series terms to use
:return: Tr(Ln(I + df/dx))
"""
_, jac = compute_log_det(x, Fx)
jacPower = jac
summand = torch.zeros_like(jacPower)
for i in range(1, k+1):
if i > 1:
jacPower = torch.matmul(jacPower, jac)
if (i + 1) % 2 == 0:
summand += jacPower / (np.float(i))
else:
summand -= jacPower / (np.float(i))
trace = torch.diagonal(summand, dim1=1, dim2=2).sum(1)
return trace
示例11
def _apply_loss(d, d_gt):
"""
LOSS CALCULATION OF THE BATCH
Arguments:
----------
- d: Computed displacements
- d_gt: Ground truth displacements
Returns:
--------
- loss: calculate loss according to the specified loss function
"""
# Set all pixel entries to 0 whose displacement magnitude is bigger than 10px
pixel_thresh = 10
dispMagnitude = torch.sqrt(torch.pow(d_gt[:,:,0],2) + torch.pow(d_gt[:,:,1], 2)).unsqueeze(-1).expand(-1,-1,2)
idx = dispMagnitude > pixel_thresh
z = torch.zeros(dispMagnitude.shape)
d = torch.where(idx, z, d)
d_gt = torch.where(idx, z, d_gt)
# Calculate loss according to formula in paper
return torch.sum(torch.sqrt(torch.diagonal(torch.bmm(d - d_gt, (d-d_gt).permute(0,2,1)), dim1=-2, dim2=-1)), dim = 1)
示例12
def tobit_probs(mean: Tensor,
cov: Tensor,
lower: Optional[Tensor] = None,
upper: Optional[Tensor] = None) -> Tuple[Tensor, Tensor]:
# CDF not well behaved at tails, truncate
clamp = lambda z: torch.clamp(z, -5., 5.)
if upper is None:
upper = torch.empty_like(mean)
upper[:] = float('inf')
if lower is None:
lower = torch.empty_like(mean)
lower[:] = float('-inf')
std = torch.diagonal(cov, dim1=-2, dim2=-1)
probs_up = torch.zeros_like(mean)
is_cens_up = torch.isfinite(upper)
upper_z = (upper[is_cens_up] - mean[is_cens_up]) / std[is_cens_up]
probs_up[is_cens_up] = 1. - std_normal.cdf(clamp(upper_z))
probs_lo = torch.zeros_like(mean)
is_cens_lo = torch.isfinite(lower)
lower_z = (lower[is_cens_lo] - mean[is_cens_lo]) / std[is_cens_lo]
probs_lo[is_cens_lo] = std_normal.cdf(clamp(lower_z))
return probs_lo, probs_up
示例13
def forward(self, src, adj, tgt_seq, binary_tgt,return_attns=False,int_preds=False):
batch_size = src[0].size(0)
src_seq, src_pos = src
if self.decoder_type in ['sa_m','rnn_m']:
tgt_seq = tgt_seq[:, :-1]
enc_output, *enc_self_attns = self.encoder(src_seq, adj, src_pos,return_attns=return_attns)
dec_output, *dec_output2 = self.decoder(tgt_seq,src_seq,enc_output,return_attns=return_attns,int_preds=int_preds)
if self.decoder_type == 'rnn_m':
seq_logit = dec_output
elif self.decoder_type == 'mlp':
seq_logit = dec_output
else:
seq_logit = self.tgt_word_proj(dec_output)
if self.decoder_type == 'graph':
seq_logit = torch.diagonal(seq_logit,0,1,2)
if int_preds:
intermediate_preds = []
tgt_word_proj_copy = self.tgt_word_proj.linear.weight.data.detach().repeat(batch_size,1,1)
for int_idx,int_out in enumerate(dec_output2[0][:-1]):
int_out = torch.bmm(int_out,tgt_word_proj_copy.transpose(1,2))
intermediate_preds += [torch.diagonal(int_out,0,1,2)]
return seq_logit.view(-1, seq_logit.size(-1)),enc_output, intermediate_preds
elif return_attns:
return seq_logit.view(-1,seq_logit.size(-1)),enc_output,enc_self_attns,dec_output2
else:
return seq_logit.view(-1,seq_logit.size(-1)),enc_output,None
示例14
def generate_labels(pairs, rel_dict):
pool = ThreadPool(8)
def func(pair):
tmp = rel_dict.get((pair[0], pair[1]), [0])
out = np.zeros(cfg.MODEL.NUM_RELATIONS, dtype=np.int32)
if pair[0] != -1 and pair[1] != -1: # If pair = (-1,-1) then this pair is diagonal pair, we don't need the labels
out[tmp] = 1
return out
results = pool.map(func, pairs)
pool.close()
pool.join()
results = np.stack(results)
return results
示例15
def generate_gt_rel_labels(rois, roi_labels, roi_feats, pairs, roidb):
# rois and roi_labels has to be original
proposal_num = get_proposal_num(rois)
head_pointer = 0
rel_labels = []
for i in range(len(proposal_num)):
if proposal_num[i] == 0:
continue
tmp_rel_labels = torch.zeros(proposal_num[i], proposal_num[i], cfg.MODEL.NUM_RELATIONS).long()
tmp_rel_labels[:,:,0].fill_(1)
for rel, rel_ids in roidb[i]['gt_relationships'].items():
tmp_rel_labels[rel[0],rel[1],0] = 0
tmp_rel_labels[rel[0],rel[1]][rel_ids] = 1
# remove diagonals
torch.diagonal(tmp_rel_labels, dim1=0, dim2=1).fill_(0)
# Remove roi_label == 0 ones
remaining_index = roi_labels[head_pointer: head_pointer + proposal_num[i]] > 0
remaining_index = torch.from_numpy(remaining_index.astype('uint8'))
if remaining_index.sum() >= 0:
tmp_rel_labels = tmp_rel_labels[remaining_index][:, remaining_index]
rel_labels.append(tmp_rel_labels.reshape(-1, cfg.MODEL.NUM_RELATIONS))
head_pointer += proposal_num[i]
out = torch.cat(rel_labels, 0).numpy()
if out.shape[0] != pairs.shape[0]:
import pdb;pdb.set_trace()
return out
示例16
def test_multivariate_normal_batch_lazy(self, cuda=False):
device = torch.device("cuda") if cuda else torch.device("cpu")
for dtype in (torch.float, torch.double):
mean = torch.tensor([0, 1, 2], device=device, dtype=dtype).repeat(2, 1)
covmat = torch.diag(torch.tensor([1, 0.75, 1.5], device=device, dtype=dtype)).repeat(2, 1, 1)
covmat_chol = torch.cholesky(covmat)
mvn = MultivariateNormal(mean=mean, covariance_matrix=NonLazyTensor(covmat))
self.assertTrue(torch.is_tensor(mvn.covariance_matrix))
self.assertIsInstance(mvn.lazy_covariance_matrix, LazyTensor)
self.assertAllClose(mvn.variance, torch.diagonal(covmat, dim1=-2, dim2=-1))
self.assertAllClose(mvn._unbroadcasted_scale_tril, covmat_chol)
mvn_plus1 = mvn + 1
self.assertAllClose(mvn_plus1.mean, mvn.mean + 1)
self.assertAllClose(mvn_plus1.covariance_matrix, mvn.covariance_matrix)
self.assertAllClose(mvn_plus1._unbroadcasted_scale_tril, covmat_chol)
mvn_times2 = mvn * 2
self.assertAllClose(mvn_times2.mean, mvn.mean * 2)
self.assertAllClose(mvn_times2.covariance_matrix, mvn.covariance_matrix * 4)
self.assertAllClose(mvn_times2._unbroadcasted_scale_tril, covmat_chol * 2)
mvn_divby2 = mvn / 2
self.assertAllClose(mvn_divby2.mean, mvn.mean / 2)
self.assertAllClose(mvn_divby2.covariance_matrix, mvn.covariance_matrix / 4)
self.assertAllClose(mvn_divby2._unbroadcasted_scale_tril, covmat_chol / 2)
# TODO: Add tests for entropy, log_prob, etc. - this an issue b/c it
# uses using root_decomposition which is not very reliable
# self.assertTrue(torch.allclose(mvn.entropy(), 4.3157 * torch.ones(2)))
# self.assertTrue(
# torch.allclose(mvn.log_prob(torch.zeros(2, 3)), -4.8157 * torch.ones(2))
# )
# self.assertTrue(
# torch.allclose(mvn.log_prob(torch.zeros(2, 2, 3)), -4.8157 * torch.ones(2, 2))
# )
conf_lower, conf_upper = mvn.confidence_region()
self.assertAllClose(conf_lower, mvn.mean - 2 * mvn.stddev)
self.assertAllClose(conf_upper, mvn.mean + 2 * mvn.stddev)
self.assertTrue(mvn.sample().shape == torch.Size([2, 3]))
self.assertTrue(mvn.sample(torch.Size([2])).shape == torch.Size([2, 2, 3]))
self.assertTrue(mvn.sample(torch.Size([2, 4])).shape == torch.Size([2, 4, 2, 3]))
示例17
def log_prob(self, X):
# I'm sure this could be done more elegantly
logdetp = torch.logdet(X)
Kinvp = torch.matmul(self.K_inv, X)
trKinvp = torch.diagonal(Kinvp, dim1=-2, dim2=-1).sum(-1)
return self.C + 0.5 * (self.nu - self.n - 1) * logdetp - trKinvp
示例18
def log_prob(self, X):
logdetp = torch.logdet(X)
pinvK = torch.solve(self.K, X)[0]
trpinvK = torch.diagonal(pinvK, dim1=-2, dim2=-1).sum(-1) # trace in batch mode
return self.C - 0.5 * ((self.nu + 2 * self.n) * logdetp + trpinvK)
示例19
def log_prob(self, X):
if any(s != self.n for s in X.shape[-2:]):
raise ValueError("Cholesky factor is not of size n={}".format(self.n.item()))
if not _is_valid_correlation_matrix_cholesky_factor(X):
raise ValueError("Input is not a Cholesky factor of a valid correlation matrix")
log_diag_sum = torch.diagonal(X, dim1=-2, dim2=-1).log().sum(-1)
return self.C + (self.eta - 1) * 2 * log_diag_sum
示例20
def log_prob(self, X):
marginal_var = torch.diagonal(X, dim1=-2, dim2=-1)
if not torch.all(marginal_var >= 0):
raise ValueError("Variance(s) cannot be negative")
marginal_sd = marginal_var.sqrt()
sd_diag_mat = _batch_form_diag(1 / marginal_sd)
correlations = torch.matmul(torch.matmul(sd_diag_mat, X), sd_diag_mat)
log_prob_corr = self.correlation_prior.log_prob(correlations)
log_prob_sd = self.sd_prior.log_prob(marginal_sd)
return log_prob_corr + log_prob_sd
示例21
def _batch_form_diag(tsr):
"""Form diagonal matrices in batch mode."""
eye = torch.eye(tsr.shape[-1], dtype=tsr.dtype, device=tsr.device)
M = tsr.unsqueeze(-1).expand(tsr.shape + tsr.shape[-1:])
return eye * M
示例22
def __init__(
self,
num_tasks,
rank=0,
task_correlation_prior=None,
batch_shape=torch.Size(),
noise_prior=None,
noise_constraint=None,
):
"""
Args:
num_tasks (int): Number of tasks.
rank (int): The rank of the task noise covariance matrix to fit. If `rank` is set to 0,
then a diagonal covariance matrix is fit.
task_correlation_prior (:obj:`gpytorch.priors.Prior`): Prior to use over the task noise correlaton matrix.
Only used when `rank` > 0.
"""
if noise_constraint is None:
noise_constraint = GreaterThan(1e-4)
noise_covar = MultitaskHomoskedasticNoise(
num_tasks=num_tasks, noise_prior=noise_prior, noise_constraint=noise_constraint, batch_shape=batch_shape
)
super().__init__(
num_tasks=num_tasks,
noise_covar=noise_covar,
rank=rank,
task_correlation_prior=task_correlation_prior,
batch_shape=batch_shape,
)
self.register_parameter(name="raw_noise", parameter=torch.nn.Parameter(torch.zeros(*batch_shape, 1)))
self.register_constraint("raw_noise", noise_constraint)
示例23
def __init__(
self, num_tasks, rank=0, task_prior=None, batch_shape=torch.Size(), noise_prior=None, noise_constraint=None
):
"""
Args:
num_tasks (int): Number of tasks.
rank (int): The rank of the task noise covariance matrix to fit. If `rank` is set to 0,
then a diagonal covariance matrix is fit.
task_prior (:obj:`gpytorch.priors.Prior`): Prior to use over the task noise covariance matrix if
`rank` > 0, or a prior over the log of just the diagonal elements, if `rank` == 0.
"""
super(Likelihood, self).__init__()
if noise_constraint is None:
noise_constraint = GreaterThan(1e-4)
self.register_parameter(name="raw_noise", parameter=torch.nn.Parameter(torch.zeros(*batch_shape, 1)))
if rank == 0:
self.register_parameter(
name="raw_task_noises", parameter=torch.nn.Parameter(torch.zeros(*batch_shape, num_tasks))
)
if task_prior is not None:
raise RuntimeError("Cannot set a `task_prior` if rank=0")
else:
self.register_parameter(
name="task_noise_covar_factor", parameter=torch.nn.Parameter(torch.randn(*batch_shape, num_tasks, rank))
)
if task_prior is not None:
self.register_prior("MultitaskErrorCovariancePrior", task_prior, self._eval_covar_matrix)
self.num_tasks = num_tasks
self.rank = rank
self.register_constraint("raw_noise", noise_constraint)
示例24
def log_det_by_cholesky(matrix):
"""
Args:
matrix: matrix must be a positive define matrix.
shape [N, C, D, D].
Ref:
https://github.com/tensorflow/tensorflow/blob/r1.13/tensorflow/python/ops/linalg/linalg_impl.py
"""
# This uses the property that the log det(A) = 2 * sum(log(real(diag(C))))
# where C is the cholesky decomposition of A.
chol = torch.cholesky(matrix)
#return 2.0 * torch.sum(torch.log(torch.diagonal(chol, dim1=-2, dim2=-1) + 1e-6), dim=-1)
return 2.0 * torch.sum(torch.log(torch.diagonal(chol, dim1=-2, dim2=-1) + 1e-8), dim=-1)
示例25
def test_compute_linear_truncated_kernel_no_batch(self):
x1 = torch.tensor([[1, 0.1, 0.2], [2, 0.3, 0.4]])
x2 = torch.tensor([[3, 0.5, 0.6], [4, 0.7, 0.8]])
t_1 = torch.tensor([[0.3584, 0.1856], [0.2976, 0.1584]])
for nu, fidelity_dims in itertools.product({0.5, 1.5, 2.5}, ([2], [1, 2])):
kernel = LinearTruncatedFidelityKernel(
fidelity_dims=fidelity_dims, dimension=3, nu=nu
)
kernel.power = 1
n_fid = len(fidelity_dims)
if n_fid > 1:
active_dimsM = [0]
t_2 = torch.tensor([[0.4725, 0.2889], [0.4025, 0.2541]])
t_3 = torch.tensor([[0.1685, 0.0531], [0.1168, 0.0386]])
t = 1 + t_1 + t_2 + t_3
else:
active_dimsM = [0, 1]
t = 1 + t_1
matern_ker = MaternKernel(nu=nu, active_dims=active_dimsM)
matern_term = matern_ker(x1, x2).evaluate()
actual = t * matern_term
res = kernel(x1, x2).evaluate()
self.assertLess(torch.norm(res - actual), 1e-4)
# test diagonal mode
res_diag = kernel(x1, x2, diag=True)
self.assertLess(torch.norm(res_diag - actual.diag()), 1e-4)
# make sure that we error out if last_dim_is_batch=True
with self.assertRaises(NotImplementedError):
kernel(x1, x2, diag=True, last_dim_is_batch=True)
示例26
def tobit_adjustment(mean: Tensor,
cov: Tensor,
lower: Optional[Tensor] = None,
upper: Optional[Tensor] = None,
probs: Optional[Tuple[Tensor, Tensor]] = None) -> Tuple[Tensor, Tensor]:
assert cov.shape[-1] == cov.shape[-2] # symmetrical
if upper is None:
upper = torch.full_like(mean, float('inf'))
if lower is None:
lower = torch.full_like(mean, -float('inf'))
assert lower.shape == upper.shape == mean.shape
is_cens_up = torch.isfinite(upper)
is_cens_lo = torch.isfinite(lower)
if not is_cens_up.any() and not is_cens_lo.any():
return mean, cov
F1, F2 = _F1F2(mean, cov, lower, upper)
std = torch.diagonal(cov, dim1=-2, dim2=-1).sqrt()
sqrt_pi = pi ** .5
# prob censoring:
if probs is None:
prob_lo, prob_up = tobit_probs(mean=mean,
cov=cov,
lower=lower,
upper=upper)
else:
prob_lo, prob_up = probs
# adjust mean:
lower_adj = torch.zeros_like(mean)
lower_adj[is_cens_lo] = prob_lo[is_cens_lo] * lower[is_cens_lo]
upper_adj = torch.zeros_like(mean)
upper_adj[is_cens_up] = prob_up[is_cens_up] * upper[is_cens_up]
mean_if_uncens = mean + (sqrt(2. / pi) * F1) * std
mean_uncens_adj = (1. - prob_up - prob_lo) * mean_if_uncens
mean_adj = mean_uncens_adj + upper_adj + lower_adj
# adjust cov:
diag_adj = torch.zeros_like(mean)
for m in range(mean.shape[-1]):
diag_adj[..., m] = (1. + 2. / sqrt_pi * F2[..., m] - 2. / pi * (F1[..., m] ** 2)) * cov[..., m, m]
cov_adj = torch.diag_embed(diag_adj)
return mean_adj, cov_adj
示例27
def _F1F2(mean: Tensor,
cov: Tensor,
lower: Tensor,
upper: Tensor) -> Tuple[Tensor, Tensor]:
"""
See https://github.com/cossio/TruncatedNormal.jl/blob/5e72b6abc8f2ce7aed8147e629b4c8dd5040a8bd/notes/normal.pdf
"""
is_cens_up = torch.isfinite(upper)
is_cens_lo = torch.isfinite(lower)
std = torch.diagonal(cov, dim1=-2, dim2=-1).sqrt()
# mask out the infs before any gradients are being tracked:
alpha = torch.zeros_like(mean)
alpha[is_cens_lo] = (lower[is_cens_lo] - mean[is_cens_lo]) / std[is_cens_lo]
beta = torch.zeros_like(mean)
beta[is_cens_up] = (upper[is_cens_up] - mean[is_cens_up]) / std[is_cens_up]
# _F1F2_no_inf unstable for large z-scores, so use the lim(+/-inf) version for those as well
is_cens_up = is_cens_up & (beta.data < 4.)
is_cens_lo = is_cens_lo & (alpha.data > -4.)
is_cens_both = is_cens_up & is_cens_lo
#
sqrt_2 = 2. ** .5
x = alpha / sqrt_2
y = beta / sqrt_2
# uncensored
F1, F2 = torch.zeros_like(mean), torch.zeros_like(mean)
# censored both:
F1[is_cens_both], F2[is_cens_both] = _F1F2_no_inf(x[is_cens_both], y[is_cens_both])
# censored lower, uncensored upper:
F1[is_cens_lo & ~is_cens_up] = 1. / erfcx(x[is_cens_lo & ~is_cens_up])
F2[is_cens_lo & ~is_cens_up] = x[is_cens_lo & ~is_cens_up] / erfcx(x[is_cens_lo & ~is_cens_up])
# uncensored lower, censored upper:
F1[~is_cens_lo & is_cens_up] = -1. / erfcx(-y[~is_cens_lo & is_cens_up])
F2[~is_cens_lo & is_cens_up] = -y[~is_cens_lo & is_cens_up] / erfcx(-y[~is_cens_lo & is_cens_up])
return F1, F2
示例28
def marginal(self, function_dist, *params, **kwargs):
r"""
Adds the task noises to the diagonal of the covariance matrix of the supplied
:obj:`gpytorch.distributions.MultivariateNormal` or :obj:`gpytorch.distributions.MultitaskMultivariateNormal`,
in case of `rank` == 0. Otherwise, adds a rank `rank` covariance matrix to it.
To accomplish this, we form a new :obj:`gpytorch.lazy.KroneckerProductLazyTensor` between :math:`I_{n}`,
an identity matrix with size equal to the data and a (not necessarily diagonal) matrix containing the task
noises :math:`D_{t}`.
We also incorporate a shared `noise` parameter from the base
:class:`gpytorch.likelihoods.GaussianLikelihood` that we extend.
The final covariance matrix after this method is then :math:`K + D_{t} \otimes I_{n} + \sigma^{2}I_{nt}`.
Args:
function_dist (:obj:`gpytorch.distributions.MultitaskMultivariateNormal`): Random variable whose covariance
matrix is a :obj:`gpytorch.lazy.LazyTensor` we intend to augment.
Returns:
:obj:`gpytorch.distributions.MultitaskMultivariateNormal`: A new random variable whose covariance
matrix is a :obj:`gpytorch.lazy.LazyTensor` with :math:`D_{t} \otimes I_{n}` and :math:`\sigma^{2}I_{nt}`
added.
"""
mean, covar = function_dist.mean, function_dist.lazy_covariance_matrix
if self.rank == 0:
task_noises = self.raw_noise_constraint.transform(self.raw_task_noises)
task_var_lt = DiagLazyTensor(task_noises)
dtype, device = task_noises.dtype, task_noises.device
else:
task_noise_covar_factor = self.task_noise_covar_factor
task_var_lt = RootLazyTensor(task_noise_covar_factor)
dtype, device = task_noise_covar_factor.dtype, task_noise_covar_factor.device
eye_lt = DiagLazyTensor(
torch.ones(*covar.batch_shape, covar.size(-1) // self.num_tasks, dtype=dtype, device=device)
)
task_var_lt = task_var_lt.expand(*covar.batch_shape, *task_var_lt.matrix_shape)
covar_kron_lt = KroneckerProductLazyTensor(eye_lt, task_var_lt)
covar = covar + covar_kron_lt
noise = self.noise
covar = add_diag(covar, noise)
return function_dist.__class__(mean, covar)
示例29
def update_qparam(self, a_i, V_ji, R_ij):
# Out ← [?, B, C, 1, 1, F, F, K, K]
R_ij = R_ij * a_i # broadcast a_i 1->C, and R_ij (1,1,1,1)->(F,F,K,K), 1->batch
# Out ← [?, 1, C, 1, 1, F, F, 1, 1]
self.R_j = self.reduce_icaps(R_ij)
# Out ← [?, 1, C, 1, 1, F, F, 1, 1]
self.alpha_j = self.alpha0 + self.R_j
# self.alpha_j = torch.exp(self.alpha0) + self.R_j # when alpha's a param
self.kappa_j = self.kappa0 + self.R_j
self.nu_j = self.nu0 + self.R_j
# Out ← [?, 1, C, P*P, 1, F, F, 1, 1]
mu_j = (1./self.R_j) * self.reduce_icaps(R_ij * V_ji)
# Out ← [?, 1, C, P*P, 1, F, F, 1, 1]
# self.m_j = (1./self.kappa_j) * (self.R_j * mu_j + self.kappa0 * self.m0) # use this if self.m0 != 0
self.m_j = (1./self.kappa_j) * (self.R_j * mu_j) # priors removed for faster computation
if self.cov == 'diag':
# Out ← [?, 1, C, P*P, 1, F, F, 1, 1] (1./R_j) not needed because Psi_j calc
sigma_j = self.reduce_icaps(R_ij * (V_ji - mu_j).pow(2))
# Out ← [?, 1, C, P*P, 1, F, F, 1, 1]
# self.invPsi_j = self.Psi0 + sigma_j + (self.kappa0*self.R_j / self.kappa_j) \
# * (mu_j - self.m0).pow(2) # use this if m0 != 0 or kappa0 != 1
self.invPsi_j = self.Psi0 + sigma_j + (self.R_j / self.kappa_j) * (mu_j).pow(2) # priors removed for faster computation
# Out ← [?, 1, C, 1, 1, F, F, 1, 1] (-) sign as inv. Psi_j
self.lndet_Psi_j = -self.reduce_poses(torch.log(self.invPsi_j)) # log det of diag precision matrix
elif self.cov == 'full':
#[?, B, C, P*P, P*P, F, F, K, K]
sigma_j = self.reduce_icaps(
R_ij * (V_ji - mu_j) * (V_ji - mu_j).transpose(3,4))
# Out ← [?, 1, C, P*P, P*P, F, F, 1, 1] full cov, torch.inverse(self.Psi0)
self.invPsi_j = self.Psi0 + sigma_j + (self.kappa0*self.R_j / self.kappa_j) \
* (mu_j - self.m0) * (mu_j - self.m0).transpose(3,4)
# Out ← [?, 1, C, F, F, 1, 1 , P*P, P*P]
# needed for pytorch (*,n,n) dim requirements in .cholesky and .inverse
self.invPsi_j = self.invPsi_j.permute(0,1,2,5,6,7,8,3,4)
# Out ← [?, 1, 1, 1, C, F, F, 1, 1] (-) sign as inv. Psi_j
self.lndet_Psi_j = -2*torch.diagonal(torch.cholesky(
self.invPsi_j), dim1=-2, dim2=-1).log().sum(-1, keepdim=True)[...,None]
示例30
def lw_shrink(X_t):
"""
Estimates the shrunk Ledoit-Wolf covariance matrix
Args:
X_t: samples x variants
Returns:
shrunk_cov_t: shrunk covariance
shrinkage_t: shrinkage coefficient
Adapted from scikit-learn:
https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/covariance/shrunk_covariance_.py
"""
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if len(X_t.shape) == 2:
n_samples, n_features = X_t.shape # samples x variants
X_t = X_t - X_t.mean(0)
X2_t = X_t.pow(2)
emp_cov_trace_sum = X2_t.sum() / n_samples
delta_ = torch.mm(X_t.t(), X_t).pow(2).sum() / n_samples**2
beta_ = torch.mm(X2_t.t(), X2_t).sum()
beta = 1. / (n_features * n_samples) * (beta_ / n_samples - delta_)
delta = delta_ - 1. * emp_cov_trace_sum**2 / n_features
delta /= n_features
beta = torch.min(beta, delta)
shrinkage_t = 0 if beta == 0 else beta / delta
emp_cov_t = torch.mm(X_t.t(), X_t) / n_samples
mu_t = torch.trace(emp_cov_t) / n_features
shrunk_cov_t = (1. - shrinkage_t) * emp_cov_t
shrunk_cov_t.view(-1)[::n_features + 1] += shrinkage_t * mu_t # add to diagonal
else: # broadcast along first dimension
n_samples, n_features = X_t.shape[1:] # samples x variants
X_t = X_t - X_t.mean(1, keepdim=True)
X2_t = X_t.pow(2)
emp_cov_trace_sum = X2_t.sum([1,2]) / n_samples
delta_ = torch.matmul(torch.transpose(X_t, 1, 2), X_t).pow(2).sum([1,2]) / n_samples**2
beta_ = torch.matmul(torch.transpose(X2_t, 1, 2), X2_t).sum([1,2])
beta = 1. / (n_features * n_samples) * (beta_ / n_samples - delta_)
delta = delta_ - 1. * emp_cov_trace_sum**2 / n_features
delta /= n_features
beta = torch.min(beta, delta)
shrinkage_t = torch.where(beta==0, torch.zeros(beta.shape).to(device), beta/delta)
emp_cov_t = torch.matmul(torch.transpose(X_t, 1, 2), X_t) / n_samples
mu_t = torch.diagonal(emp_cov_t, dim1=1, dim2=2).sum(1) / n_features
shrunk_cov_t = (1 - shrinkage_t.reshape([shrinkage_t.shape[0], 1, 1])) * emp_cov_t
ix = torch.LongTensor(np.array([np.arange(0, n_features**2, n_features+1)+i*n_features**2 for i in range(X_t.shape[0])])).to(device)
shrunk_cov_t.view(-1)[ix] += (shrinkage_t * mu_t).unsqueeze(-1) # add to diagonal
return shrunk_cov_t, shrinkage_t