Python源码示例:tensorflow.python.ops.state.assign()
示例1
def _apply_dense(self, grad, var):
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
# the following equations given in [1]
# m_t = beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, "m")
m_t = state_ops.assign(m, beta1_t * m + (1. - beta1_t) * grad, use_locking=self._use_locking)
# v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
v = self.get_slot(var, "v")
v_t = state_ops.assign(v, beta2_t * v + (1. - beta2_t) * tf.square(grad), use_locking=self._use_locking)
v_prime = self.get_slot(var, "v_prime")
v_t_prime = state_ops.assign(v_prime, tf.maximum(v_prime, v_t))
var_update = state_ops.assign_sub(var,
lr_t * m_t / (tf.sqrt(v_t_prime) + epsilon_t),
use_locking=self._use_locking)
return control_flow_ops.group(*[var_update, m_t, v_t, v_t_prime])
# keras Nadam update rule
示例2
def _finish(self, update_ops, name_scope):
# Update the power accumulators.
with ops.control_dependencies(update_ops):
with ops.colocate_with(self._iterations):
update_beta1 = self._beta1_power.assign(
self._beta1_power * self._beta1_t,
use_locking=self._use_locking)
update_beta2 = self._beta2_power.assign(
self._beta2_power * self._beta2_t,
use_locking=self._use_locking)
t = self._iterations + 1.
update_iterations = self._iterations.assign(t, use_locking=self._use_locking)
momentum_cache_power = self._get_momentum_cache(self._schedule_decay_t, t)
momentum_cache_t = self._beta1_t * (1. - 0.5 * momentum_cache_power)
update_m_schedule = self._m_schedule.assign(
self._m_schedule * momentum_cache_t,
use_locking=self._use_locking)
return control_flow_ops.group(
*update_ops + [update_beta1, update_beta2] + [update_iterations, update_m_schedule],
name=name_scope)
示例3
def _update_t_cur_eta_t_v2(self, lr_t=None, var=None): # tf.keras
t_cur_update, eta_t_update = None, None # in case not assigned
# update `t_cur` if iterating last `(grad, var)`
iteration_done = self._updates_processed == (self._updates_per_iter - 1)
if iteration_done:
t_cur_update = state_ops.assign_add(self.t_cur, 1,
use_locking=self._use_locking)
self._updates_processed = 0 # reset
else:
self._updates_processed += 1
# Cosine annealing
if self.use_cosine_annealing and iteration_done:
# ensure eta_t is updated AFTER t_cur
with ops.control_dependencies([t_cur_update]):
eta_t_update = state_ops.assign(self.eta_t, _compute_eta_t(self),
use_locking=self._use_locking)
self.lr_t = lr_t * self.eta_t # for external tracking
return iteration_done, t_cur_update, eta_t_update
示例4
def value(self):
"""Returns the last snapshot of this variable.
You usually do not need to call this method as all ops that need the value
of the variable call it automatically through a `convert_to_tensor()` call.
Returns a `Tensor` which holds the value of the variable. You can not
assign a new value to this tensor as it is not a reference to the variable.
To avoid copies, if the consumer of the returned value is on the same device
as the variable, this actually returns the live value of the variable, not
a copy. Updates to the variable are seen by the consumer. If the consumer
is on a different device it will get a copy of the variable.
Returns:
A `Tensor` containing the value of the variable.
"""
return self._snapshot
示例5
def _prepare_gramian(self, factors, gramian):
"""Helper function to create ops to prepare/calculate gramian.
Args:
factors: Variable or list of Variable representing (sharded) factors.
Used to compute the updated corresponding gramian value.
gramian: Variable storing the gramian calculated from the factors.
Returns:
A op that updates the gramian with the calcuated value from the factors.
"""
partial_gramians = []
for f in factors:
with ops.colocate_with(f):
partial_gramians.append(math_ops.matmul(f, f, transpose_a=True))
with ops.colocate_with(gramian):
prep_gramian = state_ops.assign(gramian,
math_ops.add_n(partial_gramians)).op
return prep_gramian
示例6
def scatter_update(cls, factor, indices, values, sharding_func, name=None):
"""Helper function for doing sharded scatter update."""
assert isinstance(factor, list)
if len(factor) == 1:
with ops.colocate_with(factor[0]):
# TODO(agarwal): assign instead of scatter update for full batch update.
return state_ops.scatter_update(factor[0], indices, values,
name=name).op
else:
num_shards = len(factor)
assignments, new_ids = sharding_func(indices)
assert assignments is not None
assignments = math_ops.cast(assignments, dtypes.int32)
sharded_ids = data_flow_ops.dynamic_partition(new_ids, assignments,
num_shards)
sharded_values = data_flow_ops.dynamic_partition(values, assignments,
num_shards)
updates = []
for i in xrange(num_shards):
updates.append(state_ops.scatter_update(factor[i], sharded_ids[i],
sharded_values[i]))
return control_flow_ops.group(*updates, name=name)
示例7
def batch_set_value(tuples):
"""Sets the values of many tensor variables at once.
Arguments:
tuples: a list of tuples `(tensor, value)`.
`value` should be a Numpy array.
"""
if tuples:
assign_ops = []
feed_dict = {}
for x, value in tuples:
value = np.asarray(value)
tf_dtype = _convert_string_dtype(x.dtype.name.split('_')[0])
if hasattr(x, '_assign_placeholder'):
assign_placeholder = x._assign_placeholder
assign_op = x._assign_op
else:
assign_placeholder = array_ops.placeholder(tf_dtype, shape=value.shape)
assign_op = x.assign(assign_placeholder)
x._assign_placeholder = assign_placeholder
x._assign_op = assign_op
assign_ops.append(assign_op)
feed_dict[assign_placeholder] = value
get_session().run(assign_ops, feed_dict=feed_dict)
示例8
def __init__(self, inputs, outputs, updates=None):
updates = updates or []
if not isinstance(inputs, (list, tuple)):
raise TypeError('`inputs` to a TensorFlow backend function '
'should be a list or tuple.')
if not isinstance(outputs, (list, tuple)):
raise TypeError('`outputs` of a TensorFlow backend function '
'should be a list or tuple.')
if not isinstance(updates, (list, tuple)):
raise TypeError('`updates` in a TensorFlow backend function '
'should be a list or tuple.')
self.inputs = list(inputs)
self.outputs = list(outputs)
with ops.control_dependencies(self.outputs):
updates_ops = []
for update in updates:
if isinstance(update, tuple):
p, new_p = update
updates_ops.append(state_ops.assign(p, new_p))
else:
# assumed already an op
updates_ops.append(update)
self.updates_op = control_flow_ops.group(*updates_ops)
示例9
def record_variable_inits(self):
"""Context manager to record Variable initializations.
Sets _in_variable_creation to True before a Variable is initialized.
NOTE(keveman): This is used for recording the list of assign ops
that are used to initialize variables. It relies on the fact that
the constructor of Variable class creates exactly one assign op that is
used for initializing the variable. Variable ops not created using the
variables.Variable class are not added to _init_ops and hence not
initialized automatically.
"""
old_init = getattr(variables.Variable, '__init__')
def record(*args, **kwargs):
self._in_variable_creation = True
old_init(*args, **kwargs)
self._in_variable_creation = False
setattr(variables.Variable, '__init__', record)
yield
setattr(variables.Variable, '__init__', old_init)
# pylint: enable=g-doc-return-or-yield
示例10
def value(self):
"""Returns the last snapshot of this variable.
You usually do not need to call this method as all ops that need the value
of the variable call it automatically through a `convert_to_tensor()` call.
Returns a `Tensor` which holds the value of the variable. You can not
assign a new value to this tensor as it is not a reference to the variable.
To avoid copies, if the consumer of the returned value is on the same device
as the variable, this actually returns the live value of the variable, not
a copy. Updates to the variable are seen by the consumer. If the consumer
is on a different device it will get a copy of the variable.
Returns:
A `Tensor` containing the value of the variable.
"""
return self._snapshot
示例11
def _prepare_gramian(self, factors, gramian):
"""Helper function to create ops to prepare/calculate gramian.
Args:
factors: Variable or list of Variable representing (sharded) factors.
Used to compute the updated corresponding gramian value.
gramian: Variable storing the gramian calculated from the factors.
Returns:
A op that updates the gramian with the calcuated value from the factors.
"""
partial_gramians = []
for f in factors:
with ops.colocate_with(f):
partial_gramians.append(math_ops.matmul(f, f, transpose_a=True))
with ops.colocate_with(gramian):
prep_gramian = state_ops.assign(gramian,
math_ops.add_n(partial_gramians)).op
return prep_gramian
示例12
def scatter_update(cls, factor, indices, values, sharding_func):
"""Helper function for doing sharded scatter update."""
assert isinstance(factor, list)
if len(factor) == 1:
with ops.colocate_with(factor[0]):
# TODO(agarwal): assign instead of scatter update for full batch update.
return state_ops.scatter_update(factor[0], indices, values).op
else:
num_shards = len(factor)
assignments, new_ids = sharding_func(indices)
assert assignments is not None
assignments = math_ops.cast(assignments, dtypes.int32)
sharded_ids = data_flow_ops.dynamic_partition(new_ids, assignments,
num_shards)
sharded_values = data_flow_ops.dynamic_partition(values, assignments,
num_shards)
updates = []
for i in xrange(num_shards):
updates.append(
state_ops.scatter_update(factor[i], sharded_ids[i], sharded_values[
i]))
return control_flow_ops.group(*updates)
示例13
def _apply_dense(self, grad, var):
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
# the following equations given in [1]
# m_t = beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, "m")
m_t = state_ops.assign(m, beta1_t * m + (1. - beta1_t) * grad, use_locking=self._use_locking)
# v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
v = self.get_slot(var, "v")
v_t = state_ops.assign(v, beta2_t * v + (1. - beta2_t) * tf.square(grad), use_locking=self._use_locking)
v_prime = self.get_slot(var, "v_prime")
v_t_prime = state_ops.assign(v_prime, tf.maximum(v_prime, v_t))
var_update = state_ops.assign_sub(var,
lr_t * m_t / (tf.sqrt(v_t_prime) + epsilon_t),
use_locking=self._use_locking)
return control_flow_ops.group(*[var_update, m_t, v_t, v_t_prime])
# keras Nadam update rule
示例14
def _finish(self, update_ops, name_scope):
# Update the power accumulators.
with ops.control_dependencies(update_ops):
with ops.colocate_with(self._iterations):
update_beta1 = self._beta1_power.assign(
self._beta1_power * self._beta1_t,
use_locking=self._use_locking)
update_beta2 = self._beta2_power.assign(
self._beta2_power * self._beta2_t,
use_locking=self._use_locking)
t = self._iterations + 1.
update_iterations = self._iterations.assign(t, use_locking=self._use_locking)
momentum_cache_power = self._get_momentum_cache(self._schedule_decay_t, t)
momentum_cache_t = self._beta1_t * (1. - 0.5 * momentum_cache_power)
update_m_schedule = self._m_schedule.assign(
self._m_schedule * momentum_cache_t,
use_locking=self._use_locking)
return control_flow_ops.group(
*update_ops + [update_beta1, update_beta2] + [update_iterations, update_m_schedule],
name=name_scope)
示例15
def _finish(self, update_ops, name_scope):
# Update the power accumulators.
with ops.control_dependencies(update_ops):
with ops.colocate_with(self._iterations):
update_beta1 = self._beta1_power.assign(
self._beta1_power * self._beta1_t,
use_locking=self._use_locking)
update_beta2 = self._beta2_power.assign(
self._beta2_power * self._beta2_t,
use_locking=self._use_locking)
t = self._iterations + 1.
update_iterations = self._iterations.assign(t, use_locking=self._use_locking)
momentum_cache_power = self._get_momentum_cache(self._schedule_decay_t, t)
momentum_cache_t = self._beta1_t * (1. - 0.5 * momentum_cache_power)
update_m_schedule = self._m_schedule.assign(
self._m_schedule * momentum_cache_t,
use_locking=self._use_locking)
return control_flow_ops.group(
*update_ops + [update_beta1, update_beta2] + [update_iterations, update_m_schedule],
name=name_scope)
示例16
def _apply_dense(self, grad, var):
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
alpha_t = math_ops.cast(self._alpha_t, var.dtype.base_dtype)
beta_t = math_ops.cast(self._beta_t, var.dtype.base_dtype)
eps = 1e-7 # cap for moving average
m = self.get_slot(var, "m")
m_t = m.assign(tf.maximum(beta_t * m + eps, tf.abs(grad)))
var_update = state_ops.assign_sub(var, lr_t * grad * tf.exp(
tf.log(alpha_t) * tf.sign(grad) * tf.sign(m_t))) # Update 'ref' by subtracting 'value
# Create an op that groups multiple operations.
# When this op finishes, all ops in input have finished
return control_flow_ops.group(*[var_update, m_t])
示例17
def _apply_dense(self, grad, var):
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
beta_t = math_ops.cast(self._beta_t, var.dtype.base_dtype)
alpha_t = math_ops.cast(self._alpha_t, var.dtype.base_dtype)
eps = 1e-7 # cap for moving average
m = self.get_slot(var, "m")
m_t = m.assign(tf.maximum(beta_t * m + eps, tf.abs(grad)))
var_update = state_ops.assign_sub(var, lr_t * grad * (1.0 + alpha_t * tf.sign(grad) * tf.sign(m_t)))
# Create an op that groups multiple operations
# When this op finishes, all ops in input have finished
return control_flow_ops.group(*[var_update, m_t])
示例18
def _apply_dense(self, grad, var):
beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype)
beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype)
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
# m_t = beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, "m")
m_scaled_g_values = grad * (1 - beta1_t)
m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values,
use_locking=self._use_locking)
# v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
v = self.get_slot(var, "v")
v_scaled_g_values = (grad * grad) * (1 - beta2_t)
v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values,
use_locking=self._use_locking)
# amsgrad
vhat = self.get_slot(var, "vhat")
vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat))
v_sqrt = math_ops.sqrt(vhat_t)
var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t),
use_locking=self._use_locking)
return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
示例19
def _resource_apply_dense(self, grad, var):
var = var.handle
beta1_power = math_ops.cast(self._beta1_power, grad.dtype.base_dtype)
beta2_power = math_ops.cast(self._beta2_power, grad.dtype.base_dtype)
lr_t = math_ops.cast(self._lr_t, grad.dtype.base_dtype)
beta1_t = math_ops.cast(self._beta1_t, grad.dtype.base_dtype)
beta2_t = math_ops.cast(self._beta2_t, grad.dtype.base_dtype)
epsilon_t = math_ops.cast(self._epsilon_t, grad.dtype.base_dtype)
lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
# m_t = beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, "m").handle
m_scaled_g_values = grad * (1 - beta1_t)
m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values,
use_locking=self._use_locking)
# v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
v = self.get_slot(var, "v").handle
v_scaled_g_values = (grad * grad) * (1 - beta2_t)
v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values,
use_locking=self._use_locking)
# amsgrad
vhat = self.get_slot(var, "vhat").handle
vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat))
v_sqrt = math_ops.sqrt(vhat_t)
var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t),
use_locking=self._use_locking)
return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
示例20
def _apply_sparse_shared(self, grad, var, indices, scatter_add):
beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype)
beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype)
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
# m_t = beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, "m")
m_scaled_g_values = grad * (1 - beta1_t)
m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking)
with ops.control_dependencies([m_t]):
m_t = scatter_add(m, indices, m_scaled_g_values)
# v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
v = self.get_slot(var, "v")
v_scaled_g_values = (grad * grad) * (1 - beta2_t)
v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking)
with ops.control_dependencies([v_t]):
v_t = scatter_add(v, indices, v_scaled_g_values)
# amsgrad
vhat = self.get_slot(var, "vhat")
vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat))
v_sqrt = math_ops.sqrt(vhat_t)
var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t),
use_locking=self._use_locking)
return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
示例21
def _finish(self, update_ops, name_scope):
# Update the power accumulators.
with ops.control_dependencies(update_ops):
with ops.colocate_with(self._beta1_power):
update_beta1 = self._beta1_power.assign(
self._beta1_power * self._beta1_t,
use_locking=self._use_locking)
update_beta2 = self._beta2_power.assign(
self._beta2_power * self._beta2_t,
use_locking=self._use_locking)
return control_flow_ops.group(*update_ops + [update_beta1, update_beta2],
name=name_scope)
示例22
def _update_t_cur_eta_t(self): # keras
self.updates.append(state_ops.assign_add(self.t_cur, 1))
# Cosine annealing
if self.use_cosine_annealing:
# ensure eta_t is updated AFTER t_cur
with ops.control_dependencies([self.updates[-1]]):
self.updates.append(state_ops.assign(self.eta_t,
_compute_eta_t(self)))
示例23
def _apply_dense(self, grad, var):
beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype)
beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype)
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
# m_t = beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, "m")
m_scaled_g_values = grad * (1 - beta1_t)
m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking)
# v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
v = self.get_slot(var, "v")
v_scaled_g_values = (grad * grad) * (1 - beta2_t)
v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking)
# amsgrad
vhat = self.get_slot(var, "vhat")
vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat))
v_sqrt = math_ops.sqrt(vhat_t)
var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking)
return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
示例24
def _resource_apply_dense(self, grad, var):
var = var.handle
beta1_power = math_ops.cast(self._beta1_power, grad.dtype.base_dtype)
beta2_power = math_ops.cast(self._beta2_power, grad.dtype.base_dtype)
lr_t = math_ops.cast(self._lr_t, grad.dtype.base_dtype)
beta1_t = math_ops.cast(self._beta1_t, grad.dtype.base_dtype)
beta2_t = math_ops.cast(self._beta2_t, grad.dtype.base_dtype)
epsilon_t = math_ops.cast(self._epsilon_t, grad.dtype.base_dtype)
lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
# m_t = beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, "m").handle
m_scaled_g_values = grad * (1 - beta1_t)
m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking)
# v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
v = self.get_slot(var, "v").handle
v_scaled_g_values = (grad * grad) * (1 - beta2_t)
v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking)
# amsgrad
vhat = self.get_slot(var, "vhat").handle
vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat))
v_sqrt = math_ops.sqrt(vhat_t)
var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking)
return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
示例25
def _apply_sparse_shared(self, grad, var, indices, scatter_add):
beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype)
beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype)
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
# m_t = beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, "m")
m_scaled_g_values = grad * (1 - beta1_t)
m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking)
with ops.control_dependencies([m_t]):
m_t = scatter_add(m, indices, m_scaled_g_values)
# v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
v = self.get_slot(var, "v")
v_scaled_g_values = (grad * grad) * (1 - beta2_t)
v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking)
with ops.control_dependencies([v_t]):
v_t = scatter_add(v, indices, v_scaled_g_values)
# amsgrad
vhat = self.get_slot(var, "vhat")
vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat))
v_sqrt = math_ops.sqrt(vhat_t)
var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking)
return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
示例26
def _finish(self, update_ops, name_scope):
# Update the power accumulators.
with ops.control_dependencies(update_ops):
with ops.colocate_with(self._beta1_power):
update_beta1 = self._beta1_power.assign(
self._beta1_power * self._beta1_t,
use_locking=self._use_locking)
update_beta2 = self._beta2_power.assign(
self._beta2_power * self._beta2_t,
use_locking=self._use_locking)
return control_flow_ops.group(*update_ops + [update_beta1, update_beta2],
name=name_scope)
示例27
def _preconditioned_update(self, var, partitioned_grads,
diagonal_grad_update):
"""Computes the matrix preconditioned update.
Args:
var: Variable for which we are computing the preconditioned gradient.
partitioned_grads: Partitioned gradients.
diagonal_grad_update: Update as given by diagonal adagrad.
Returns:
scaled preconditioned gradient.
"""
def _l2_norm(v):
return tf.sqrt(tf.reduce_sum(tf.square(v)))
precond_grad = self._compute_preconditioned_raw_grad(var, partitioned_grads)
if self._momentum > 0.0:
gbar = self.get_slot(var, "precond_grad_momentum")
matrix_preconditioned_grad = state_ops.assign(
gbar, gbar * self._momentum_tensor + precond_grad *
(1.0 - self._momentum_tensor))
else:
matrix_preconditioned_grad = precond_grad
# We use the direction from Shampoo while using the step size scale from
# diagonal AdaGrad.
precond_l2_norm = _l2_norm(matrix_preconditioned_grad)
diagonal_l2_norm = _l2_norm(diagonal_grad_update)
multiplier = tf.where(
tf.greater(precond_l2_norm, 0.0),
tf.maximum(diagonal_l2_norm, 1e-30) /
(tf.maximum(precond_l2_norm, 1e-30)), 1.0)
return matrix_preconditioned_grad * multiplier
示例28
def streaming_tp_fp_arrays(num_gbboxes, tp, fp,
metrics_collections=None,
updates_collections=None,
name=None):
"""Streaming computation of True and False Positive arrays.
"""
with variable_scope.variable_scope(name, 'streaming_tp_fp',
[num_gbboxes, tp, fp]):
num_gbboxes = tf.cast(num_gbboxes, tf.int32)
tp = tf.cast(tp, tf.bool)
fp = tf.cast(fp, tf.bool)
# Reshape TP and FP tensors and clean away 0 class values.
tp = tf.reshape(tp, [-1])
fp = tf.reshape(fp, [-1])
# Local variables accumlating information over batches.
v_num_objects = _create_local('v_num_gbboxes', shape=[], dtype=tf.int32)
v_tp = _create_local('v_tp', shape=[0, ], dtype=tf.bool)
v_fp = _create_local('v_fp', shape=[0, ], dtype=tf.bool)
# Update operations.
num_objects_op = state_ops.assign_add(v_num_objects,
tf.reduce_sum(num_gbboxes))
tp_op = state_ops.assign(v_tp, tf.concat([v_tp, tp], axis=0),
validate_shape=False)
fp_op = state_ops.assign(v_fp, tf.concat([v_fp, fp], axis=0),
validate_shape=False)
# Value and update ops.
val = (v_num_objects, v_tp, v_fp)
with ops.control_dependencies([num_objects_op, tp_op, fp_op]):
update_op = (num_objects_op, tp_op, fp_op)
return val, update_op
示例29
def _ref(self):
"""Returns a reference to this variable.
You usually do not need to call this method as all ops that need a reference
to the variable call it automatically.
Returns is a `Tensor` which holds a reference to the variable. You can
assign a new value to the variable by passing the tensor to an assign op.
See @{tf.Variable.value} if you want to get the value of the
variable.
Returns:
A `Tensor` that is a reference to the variable.
"""
return self._variable
示例30
def assign(self, value, use_locking=False):
"""Assigns a new value to the variable.
This is essentially a shortcut for `assign(self, value)`.
Args:
value: A `Tensor`. The new value for this variable.
use_locking: If `True`, use locking during the assignment.
Returns:
A `Tensor` that will hold the new value of this variable after
the assignment has completed.
"""
return state_ops.assign(self._variable, value, use_locking=use_locking)