Python源码示例:tensorflow.python.ops.state.init_variable()
示例1
def _AddVariable(self, shape, dtype, name, initializer=None):
if name in self.variables:
return self.variables[name]
self.variables[name] = tf.get_variable(name, shape, dtype, initializer)
if initializer is not None:
self.inits[name] = state_ops.init_variable(self.variables[name],
initializer)
return self.variables[name]
示例2
def _AddVariable(self, shape, dtype, name, initializer=None):
if name in self.variables:
return self.variables[name]
self.variables[name] = tf.get_variable(name, shape, dtype, initializer)
if initializer is not None:
self.inits[name] = state_ops.init_variable(self.variables[name],
initializer)
return self.variables[name]
示例3
def _AddVariable(self, shape, dtype, name, initializer=None):
if name in self.variables:
return self.variables[name]
self.variables[name] = tf.get_variable(name, shape, dtype, initializer)
if initializer is not None:
self.inits[name] = state_ops.init_variable(self.variables[name],
initializer)
return self.variables[name]
示例4
def _AddVariable(self, shape, dtype, name, initializer=None):
if name in self.variables:
return self.variables[name]
self.variables[name] = tf.get_variable(name, shape, dtype, initializer)
if initializer is not None:
self.inits[name] = state_ops.init_variable(self.variables[name],
initializer)
return self.variables[name]
示例5
def _AddVariable(self, shape, dtype, name, initializer=None):
if name in self.variables:
return self.variables[name]
self.variables[name] = tf.get_variable(name, shape, dtype, initializer)
if initializer is not None:
self.inits[name] = state_ops.init_variable(self.variables[name],
initializer)
return self.variables[name]
示例6
def _AddVariable(self, shape, dtype, name, initializer=None):
if name in self.variables:
return self.variables[name]
self.variables[name] = tf.get_variable(name, shape, dtype, initializer)
if initializer is not None:
self.inits[name] = state_ops.init_variable(self.variables[name],
initializer)
return self.variables[name]
示例7
def _AddVariable(self, shape, dtype, name, initializer=None):
if name in self.variables:
return self.variables[name]
self.variables[name] = tf.get_variable(name, shape, dtype, initializer)
if initializer is not None:
self.inits[name] = state_ops.init_variable(self.variables[name],
initializer)
return self.variables[name]
示例8
def _AddVariable(self, shape, dtype, name, initializer=None):
if name in self.variables:
return self.variables[name]
self.variables[name] = tf.get_variable(name, shape, dtype, initializer)
if initializer is not None:
self.inits[name] = state_ops.init_variable(self.variables[name],
initializer)
return self.variables[name]
示例9
def _AddVariable(self, shape, dtype, name, initializer=None):
if name in self.variables:
return self.variables[name]
self.variables[name] = tf.get_variable(name, shape, dtype, initializer)
if initializer is not None:
self.inits[name] = state_ops.init_variable(self.variables[name],
initializer)
return self.variables[name]
示例10
def _AddVariable(self, shape, dtype, name, initializer=None):
if name in self.variables:
return self.variables[name]
self.variables[name] = tf.get_variable(name, shape, dtype, initializer)
if initializer is not None:
self.inits[name] = state_ops.init_variable(self.variables[name],
initializer)
return self.variables[name]
示例11
def _AddVariable(self, shape, dtype, name, initializer=None):
if name in self.variables:
return self.variables[name]
self.variables[name] = tf.get_variable(name, shape, dtype, initializer)
if initializer is not None:
self.inits[name] = state_ops.init_variable(self.variables[name],
initializer)
return self.variables[name]
示例12
def _AddVariable(self, shape, dtype, name, initializer=None):
if name in self.variables:
return self.variables[name]
self.variables[name] = tf.get_variable(name, shape, dtype, initializer)
if initializer is not None:
self.inits[name] = state_ops.init_variable(self.variables[name],
initializer)
return self.variables[name]
示例13
def _AddParam(self,
shape,
dtype,
name,
initializer=None,
return_average=False):
"""Add a model parameter w.r.t. we expect to compute gradients.
_AddParam creates both regular parameters (usually for training) and
averaged nodes (usually for inference). It returns one or the other based
on the 'return_average' arg.
Args:
shape: int list, tensor shape of the parameter to create
dtype: tf.DataType, data type of the parameter
name: string, name of the parameter in the TF graph
initializer: optional initializer for the paramter
return_average: if False, return parameter otherwise return moving average
Returns:
parameter or averaged parameter
"""
if name not in self.params:
step = tf.cast(self.GetStep(), tf.float32)
# Put all parameters and their initializing ops in their own scope
# irrespective of the current scope (training or eval).
with tf.name_scope(self._param_scope):
self.params[name] = tf.get_variable(name, shape, dtype, initializer)
param = self.params[name]
if initializer is not None:
self.inits[name] = state_ops.init_variable(param, initializer)
if self._averaging_decay == 1:
logging.info('Using vanilla averaging of parameters.')
ema = tf.train.ExponentialMovingAverage(decay=(step / (step + 1.0)),
num_updates=None)
else:
ema = tf.train.ExponentialMovingAverage(decay=self._averaging_decay,
num_updates=step)
self._averaging[name + '_avg_update'] = ema.apply([param])
self.variables[name + '_avg_var'] = ema.average(param)
self.inits[name + '_avg_init'] = state_ops.init_variable(
ema.average(param), tf.zeros_initializer())
return (self.variables[name + '_avg_var'] if return_average else
self.params[name])
示例14
def AddTraining(self,
task_context,
batch_size,
learning_rate=0.1,
decay_steps=4000,
momentum=0.9,
corpus_name='documents'):
"""Builds a trainer to minimize the cross entropy cost function.
Args:
task_context: file path from which to read the task context
batch_size: batch size to request from reader op
learning_rate: initial value of the learning rate
decay_steps: decay learning rate by 0.96 every this many steps
momentum: momentum parameter used when training with momentum
corpus_name: name of the task input to read parses from
Returns:
Dictionary of named training nodes.
"""
with tf.name_scope('training'):
nodes = self.training
nodes.update(self._AddGoldReader(task_context, batch_size, corpus_name))
nodes.update(self._BuildNetwork(nodes['feature_endpoints'],
return_average=False))
nodes.update(self._AddCostFunction(batch_size, nodes['gold_actions'],
nodes['logits']))
# Add the optimizer
if self._only_train:
trainable_params = [v
for k, v in self.params.iteritems()
if k in self._only_train]
else:
trainable_params = self.params.values()
lr = self._AddLearningRate(learning_rate, decay_steps)
optimizer = tf.train.MomentumOptimizer(lr,
momentum,
use_locking=self._use_locking)
train_op = optimizer.minimize(nodes['cost'], var_list=trainable_params)
for param in trainable_params:
slot = optimizer.get_slot(param, 'momentum')
self.inits[slot.name] = state_ops.init_variable(slot,
tf.zeros_initializer())
self.variables[slot.name] = slot
numerical_checks = [
tf.check_numerics(param,
message='Parameter is not finite.')
for param in trainable_params
if param.dtype.base_dtype in [tf.float32, tf.float64]
]
check_op = tf.group(*numerical_checks)
avg_update_op = tf.group(*self._averaging.values())
train_ops = [train_op]
if self._check_parameters:
train_ops.append(check_op)
if self._use_averaging:
train_ops.append(avg_update_op)
nodes['train_op'] = tf.group(*train_ops, name='train_op')
return nodes
示例15
def _AddParam(self,
shape,
dtype,
name,
initializer=None,
return_average=False):
"""Add a model parameter w.r.t. we expect to compute gradients.
_AddParam creates both regular parameters (usually for training) and
averaged nodes (usually for inference). It returns one or the other based
on the 'return_average' arg.
Args:
shape: int list, tensor shape of the parameter to create
dtype: tf.DataType, data type of the parameter
name: string, name of the parameter in the TF graph
initializer: optional initializer for the paramter
return_average: if False, return parameter otherwise return moving average
Returns:
parameter or averaged parameter
"""
if name not in self.params:
step = tf.cast(self.GetStep(), tf.float32)
# Put all parameters and their initializing ops in their own scope
# irrespective of the current scope (training or eval).
with tf.name_scope(self._param_scope):
self.params[name] = tf.get_variable(name, shape, dtype, initializer)
param = self.params[name]
if initializer is not None:
self.inits[name] = state_ops.init_variable(param, initializer)
if self._averaging_decay == 1:
logging.info('Using vanilla averaging of parameters.')
ema = tf.train.ExponentialMovingAverage(decay=(step / (step + 1.0)),
num_updates=None)
else:
ema = tf.train.ExponentialMovingAverage(decay=self._averaging_decay,
num_updates=step)
self._averaging[name + '_avg_update'] = ema.apply([param])
self.variables[name + '_avg_var'] = ema.average(param)
self.inits[name + '_avg_init'] = state_ops.init_variable(
ema.average(param), tf.zeros_initializer())
return (self.variables[name + '_avg_var'] if return_average else
self.params[name])
示例16
def AddTraining(self,
task_context,
batch_size,
learning_rate=0.1,
decay_steps=4000,
momentum=0.9,
corpus_name='documents'):
"""Builds a trainer to minimize the cross entropy cost function.
Args:
task_context: file path from which to read the task context
batch_size: batch size to request from reader op
learning_rate: initial value of the learning rate
decay_steps: decay learning rate by 0.96 every this many steps
momentum: momentum parameter used when training with momentum
corpus_name: name of the task input to read parses from
Returns:
Dictionary of named training nodes.
"""
with tf.name_scope('training'):
nodes = self.training
nodes.update(self._AddGoldReader(task_context, batch_size, corpus_name))
nodes.update(self._BuildNetwork(nodes['feature_endpoints'],
return_average=False))
nodes.update(self._AddCostFunction(batch_size, nodes['gold_actions'],
nodes['logits']))
# Add the optimizer
if self._only_train:
trainable_params = [v
for k, v in self.params.iteritems()
if k in self._only_train]
else:
trainable_params = self.params.values()
lr = self._AddLearningRate(learning_rate, decay_steps)
optimizer = tf.train.MomentumOptimizer(lr,
momentum,
use_locking=self._use_locking)
train_op = optimizer.minimize(nodes['cost'], var_list=trainable_params)
for param in trainable_params:
slot = optimizer.get_slot(param, 'momentum')
self.inits[slot.name] = state_ops.init_variable(slot,
tf.zeros_initializer())
self.variables[slot.name] = slot
numerical_checks = [
tf.check_numerics(param,
message='Parameter is not finite.')
for param in trainable_params
if param.dtype.base_dtype in [tf.float32, tf.float64]
]
check_op = tf.group(*numerical_checks)
avg_update_op = tf.group(*self._averaging.values())
train_ops = [train_op]
if self._check_parameters:
train_ops.append(check_op)
if self._use_averaging:
train_ops.append(avg_update_op)
nodes['train_op'] = tf.group(*train_ops, name='train_op')
return nodes
示例17
def _AddParam(self,
shape,
dtype,
name,
initializer=None,
return_average=False):
"""Add a model parameter w.r.t. we expect to compute gradients.
_AddParam creates both regular parameters (usually for training) and
averaged nodes (usually for inference). It returns one or the other based
on the 'return_average' arg.
Args:
shape: int list, tensor shape of the parameter to create
dtype: tf.DataType, data type of the parameter
name: string, name of the parameter in the TF graph
initializer: optional initializer for the paramter
return_average: if False, return parameter otherwise return moving average
Returns:
parameter or averaged parameter
"""
if name not in self.params:
step = tf.cast(self.GetStep(), tf.float32)
# Put all parameters and their initializing ops in their own scope
# irrespective of the current scope (training or eval).
with tf.name_scope(self._param_scope):
self.params[name] = tf.get_variable(name, shape, dtype, initializer)
param = self.params[name]
if initializer is not None:
self.inits[name] = state_ops.init_variable(param, initializer)
if self._averaging_decay == 1:
logging.info('Using vanilla averaging of parameters.')
ema = tf.train.ExponentialMovingAverage(decay=(step / (step + 1.0)),
num_updates=None)
else:
ema = tf.train.ExponentialMovingAverage(decay=self._averaging_decay,
num_updates=step)
self._averaging[name + '_avg_update'] = ema.apply([param])
self.variables[name + '_avg_var'] = ema.average(param)
self.inits[name + '_avg_init'] = state_ops.init_variable(
ema.average(param), tf.zeros_initializer())
return (self.variables[name + '_avg_var'] if return_average else
self.params[name])
示例18
def AddTraining(self,
task_context,
batch_size,
learning_rate=0.1,
decay_steps=4000,
momentum=0.9,
corpus_name='documents'):
"""Builds a trainer to minimize the cross entropy cost function.
Args:
task_context: file path from which to read the task context
batch_size: batch size to request from reader op
learning_rate: initial value of the learning rate
decay_steps: decay learning rate by 0.96 every this many steps
momentum: momentum parameter used when training with momentum
corpus_name: name of the task input to read parses from
Returns:
Dictionary of named training nodes.
"""
with tf.name_scope('training'):
nodes = self.training
nodes.update(self._AddGoldReader(task_context, batch_size, corpus_name))
nodes.update(self._BuildNetwork(nodes['feature_endpoints'],
return_average=False))
nodes.update(self._AddCostFunction(batch_size, nodes['gold_actions'],
nodes['logits']))
# Add the optimizer
if self._only_train:
trainable_params = [v
for k, v in self.params.iteritems()
if k in self._only_train]
else:
trainable_params = self.params.values()
lr = self._AddLearningRate(learning_rate, decay_steps)
optimizer = tf.train.MomentumOptimizer(lr,
momentum,
use_locking=self._use_locking)
train_op = optimizer.minimize(nodes['cost'], var_list=trainable_params)
for param in trainable_params:
slot = optimizer.get_slot(param, 'momentum')
self.inits[slot.name] = state_ops.init_variable(slot,
tf.zeros_initializer())
self.variables[slot.name] = slot
numerical_checks = [
tf.check_numerics(param,
message='Parameter is not finite.')
for param in trainable_params
if param.dtype.base_dtype in [tf.float32, tf.float64]
]
check_op = tf.group(*numerical_checks)
avg_update_op = tf.group(*self._averaging.values())
train_ops = [train_op]
if self._check_parameters:
train_ops.append(check_op)
if self._use_averaging:
train_ops.append(avg_update_op)
nodes['train_op'] = tf.group(*train_ops, name='train_op')
return nodes
示例19
def _AddParam(self,
shape,
dtype,
name,
initializer=None,
return_average=False):
"""Add a model parameter w.r.t. we expect to compute gradients.
_AddParam creates both regular parameters (usually for training) and
averaged nodes (usually for inference). It returns one or the other based
on the 'return_average' arg.
Args:
shape: int list, tensor shape of the parameter to create
dtype: tf.DataType, data type of the parameter
name: string, name of the parameter in the TF graph
initializer: optional initializer for the paramter
return_average: if False, return parameter otherwise return moving average
Returns:
parameter or averaged parameter
"""
if name not in self.params:
step = tf.cast(self.GetStep(), tf.float32)
# Put all parameters and their initializing ops in their own scope
# irrespective of the current scope (training or eval).
with tf.name_scope(self._param_scope):
self.params[name] = tf.get_variable(name, shape, dtype, initializer)
param = self.params[name]
if initializer is not None:
self.inits[name] = state_ops.init_variable(param, initializer)
if self._averaging_decay == 1:
logging.info('Using vanilla averaging of parameters.')
ema = tf.train.ExponentialMovingAverage(decay=(step / (step + 1.0)),
num_updates=None)
else:
ema = tf.train.ExponentialMovingAverage(decay=self._averaging_decay,
num_updates=step)
self._averaging[name + '_avg_update'] = ema.apply([param])
self.variables[name + '_avg_var'] = ema.average(param)
self.inits[name + '_avg_init'] = state_ops.init_variable(
ema.average(param), tf.zeros_initializer)
return (self.variables[name + '_avg_var'] if return_average else
self.params[name])
示例20
def AddTraining(self,
task_context,
batch_size,
learning_rate=0.1,
decay_steps=4000,
momentum=0.9,
corpus_name='documents'):
"""Builds a trainer to minimize the cross entropy cost function.
Args:
task_context: file path from which to read the task context
batch_size: batch size to request from reader op
learning_rate: initial value of the learning rate
decay_steps: decay learning rate by 0.96 every this many steps
momentum: momentum parameter used when training with momentum
corpus_name: name of the task input to read parses from
Returns:
Dictionary of named training nodes.
"""
with tf.name_scope('training'):
nodes = self.training
nodes.update(self._AddGoldReader(task_context, batch_size, corpus_name))
nodes.update(self._BuildNetwork(nodes['feature_endpoints'],
return_average=False))
nodes.update(self._AddCostFunction(batch_size, nodes['gold_actions'],
nodes['logits']))
# Add the optimizer
if self._only_train:
trainable_params = [v
for k, v in self.params.iteritems()
if k in self._only_train]
else:
trainable_params = self.params.values()
lr = self._AddLearningRate(learning_rate, decay_steps)
optimizer = tf.train.MomentumOptimizer(lr,
momentum,
use_locking=self._use_locking)
train_op = optimizer.minimize(nodes['cost'], var_list=trainable_params)
for param in trainable_params:
slot = optimizer.get_slot(param, 'momentum')
self.inits[slot.name] = state_ops.init_variable(slot,
tf.zeros_initializer)
self.variables[slot.name] = slot
numerical_checks = [
tf.check_numerics(param,
message='Parameter is not finite.')
for param in trainable_params
if param.dtype.base_dtype in [tf.float32, tf.float64]
]
check_op = tf.group(*numerical_checks)
avg_update_op = tf.group(*self._averaging.values())
train_ops = [train_op]
if self._check_parameters:
train_ops.append(check_op)
if self._use_averaging:
train_ops.append(avg_update_op)
nodes['train_op'] = tf.group(*train_ops, name='train_op')
return nodes
示例21
def _AddParam(self,
shape,
dtype,
name,
initializer=None,
return_average=False):
"""Add a model parameter w.r.t. we expect to compute gradients.
_AddParam creates both regular parameters (usually for training) and
averaged nodes (usually for inference). It returns one or the other based
on the 'return_average' arg.
Args:
shape: int list, tensor shape of the parameter to create
dtype: tf.DataType, data type of the parameter
name: string, name of the parameter in the TF graph
initializer: optional initializer for the paramter
return_average: if False, return parameter otherwise return moving average
Returns:
parameter or averaged parameter
"""
if name not in self.params:
step = tf.cast(self.GetStep(), tf.float32)
# Put all parameters and their initializing ops in their own scope
# irrespective of the current scope (training or eval).
with tf.name_scope(self._param_scope):
self.params[name] = tf.get_variable(name, shape, dtype, initializer)
param = self.params[name]
if initializer is not None:
self.inits[name] = state_ops.init_variable(param, initializer)
if self._averaging_decay == 1:
logging.info('Using vanilla averaging of parameters.')
ema = tf.train.ExponentialMovingAverage(decay=(step / (step + 1.0)),
num_updates=None)
else:
ema = tf.train.ExponentialMovingAverage(decay=self._averaging_decay,
num_updates=step)
self._averaging[name + '_avg_update'] = ema.apply([param])
self.variables[name + '_avg_var'] = ema.average(param)
self.inits[name + '_avg_init'] = state_ops.init_variable(
ema.average(param), tf.zeros_initializer)
return (self.variables[name + '_avg_var'] if return_average else
self.params[name])
示例22
def AddTraining(self,
task_context,
batch_size,
learning_rate=0.1,
decay_steps=4000,
momentum=0.9,
corpus_name='documents'):
"""Builds a trainer to minimize the cross entropy cost function.
Args:
task_context: file path from which to read the task context
batch_size: batch size to request from reader op
learning_rate: initial value of the learning rate
decay_steps: decay learning rate by 0.96 every this many steps
momentum: momentum parameter used when training with momentum
corpus_name: name of the task input to read parses from
Returns:
Dictionary of named training nodes.
"""
with tf.name_scope('training'):
nodes = self.training
nodes.update(self._AddGoldReader(task_context, batch_size, corpus_name))
nodes.update(self._BuildNetwork(nodes['feature_endpoints'],
return_average=False))
nodes.update(self._AddCostFunction(batch_size, nodes['gold_actions'],
nodes['logits']))
# Add the optimizer
if self._only_train:
trainable_params = [v
for k, v in self.params.iteritems()
if k in self._only_train]
else:
trainable_params = self.params.values()
lr = self._AddLearningRate(learning_rate, decay_steps)
optimizer = tf.train.MomentumOptimizer(lr,
momentum,
use_locking=self._use_locking)
train_op = optimizer.minimize(nodes['cost'], var_list=trainable_params)
for param in trainable_params:
slot = optimizer.get_slot(param, 'momentum')
self.inits[slot.name] = state_ops.init_variable(slot,
tf.zeros_initializer)
self.variables[slot.name] = slot
numerical_checks = [
tf.check_numerics(param,
message='Parameter is not finite.')
for param in trainable_params
if param.dtype.base_dtype in [tf.float32, tf.float64]
]
check_op = tf.group(*numerical_checks)
avg_update_op = tf.group(*self._averaging.values())
train_ops = [train_op]
if self._check_parameters:
train_ops.append(check_op)
if self._use_averaging:
train_ops.append(avg_update_op)
nodes['train_op'] = tf.group(*train_ops, name='train_op')
return nodes
示例23
def _AddParam(self,
shape,
dtype,
name,
initializer=None,
return_average=False):
"""Add a model parameter w.r.t. we expect to compute gradients.
_AddParam creates both regular parameters (usually for training) and
averaged nodes (usually for inference). It returns one or the other based
on the 'return_average' arg.
Args:
shape: int list, tensor shape of the parameter to create
dtype: tf.DataType, data type of the parameter
name: string, name of the parameter in the TF graph
initializer: optional initializer for the paramter
return_average: if False, return parameter otherwise return moving average
Returns:
parameter or averaged parameter
"""
if name not in self.params:
step = tf.cast(self.GetStep(), tf.float32)
# Put all parameters and their initializing ops in their own scope
# irrespective of the current scope (training or eval).
with tf.name_scope(self._param_scope):
self.params[name] = tf.get_variable(name, shape, dtype, initializer)
param = self.params[name]
if initializer is not None:
self.inits[name] = state_ops.init_variable(param, initializer)
if self._averaging_decay == 1:
logging.info('Using vanilla averaging of parameters.')
ema = tf.train.ExponentialMovingAverage(decay=(step / (step + 1.0)),
num_updates=None)
else:
ema = tf.train.ExponentialMovingAverage(decay=self._averaging_decay,
num_updates=step)
self._averaging[name + '_avg_update'] = ema.apply([param])
self.variables[name + '_avg_var'] = ema.average(param)
self.inits[name + '_avg_init'] = state_ops.init_variable(
ema.average(param), tf.zeros_initializer())
return (self.variables[name + '_avg_var'] if return_average else
self.params[name])
示例24
def AddTraining(self,
task_context,
batch_size,
learning_rate=0.1,
decay_steps=4000,
momentum=0.9,
corpus_name='documents'):
"""Builds a trainer to minimize the cross entropy cost function.
Args:
task_context: file path from which to read the task context
batch_size: batch size to request from reader op
learning_rate: initial value of the learning rate
decay_steps: decay learning rate by 0.96 every this many steps
momentum: momentum parameter used when training with momentum
corpus_name: name of the task input to read parses from
Returns:
Dictionary of named training nodes.
"""
with tf.name_scope('training'):
nodes = self.training
nodes.update(self._AddGoldReader(task_context, batch_size, corpus_name))
nodes.update(self._BuildNetwork(nodes['feature_endpoints'],
return_average=False))
nodes.update(self._AddCostFunction(batch_size, nodes['gold_actions'],
nodes['logits']))
# Add the optimizer
if self._only_train:
trainable_params = [v
for k, v in self.params.iteritems()
if k in self._only_train]
else:
trainable_params = self.params.values()
lr = self._AddLearningRate(learning_rate, decay_steps)
optimizer = tf.train.MomentumOptimizer(lr,
momentum,
use_locking=self._use_locking)
train_op = optimizer.minimize(nodes['cost'], var_list=trainable_params)
for param in trainable_params:
slot = optimizer.get_slot(param, 'momentum')
self.inits[slot.name] = state_ops.init_variable(slot,
tf.zeros_initializer())
self.variables[slot.name] = slot
numerical_checks = [
tf.check_numerics(param,
message='Parameter is not finite.')
for param in trainable_params
if param.dtype.base_dtype in [tf.float32, tf.float64]
]
check_op = tf.group(*numerical_checks)
avg_update_op = tf.group(*self._averaging.values())
train_ops = [train_op]
if self._check_parameters:
train_ops.append(check_op)
if self._use_averaging:
train_ops.append(avg_update_op)
nodes['train_op'] = tf.group(*train_ops, name='train_op')
return nodes
示例25
def _AddParam(self,
shape,
dtype,
name,
initializer=None,
return_average=False):
"""Add a model parameter w.r.t. we expect to compute gradients.
_AddParam creates both regular parameters (usually for training) and
averaged nodes (usually for inference). It returns one or the other based
on the 'return_average' arg.
Args:
shape: int list, tensor shape of the parameter to create
dtype: tf.DataType, data type of the parameter
name: string, name of the parameter in the TF graph
initializer: optional initializer for the paramter
return_average: if False, return parameter otherwise return moving average
Returns:
parameter or averaged parameter
"""
if name not in self.params:
step = tf.cast(self.GetStep(), tf.float32)
# Put all parameters and their initializing ops in their own scope
# irrespective of the current scope (training or eval).
with tf.name_scope(self._param_scope):
self.params[name] = tf.get_variable(name, shape, dtype, initializer)
param = self.params[name]
if initializer is not None:
self.inits[name] = state_ops.init_variable(param, initializer)
if self._averaging_decay == 1:
logging.info('Using vanilla averaging of parameters.')
ema = tf.train.ExponentialMovingAverage(decay=(step / (step + 1.0)),
num_updates=None)
else:
ema = tf.train.ExponentialMovingAverage(decay=self._averaging_decay,
num_updates=step)
self._averaging[name + '_avg_update'] = ema.apply([param])
self.variables[name + '_avg_var'] = ema.average(param)
self.inits[name + '_avg_init'] = state_ops.init_variable(
ema.average(param), tf.zeros_initializer())
return (self.variables[name + '_avg_var'] if return_average else
self.params[name])
示例26
def AddTraining(self,
task_context,
batch_size,
learning_rate=0.1,
decay_steps=4000,
momentum=0.9,
corpus_name='documents'):
"""Builds a trainer to minimize the cross entropy cost function.
Args:
task_context: file path from which to read the task context
batch_size: batch size to request from reader op
learning_rate: initial value of the learning rate
decay_steps: decay learning rate by 0.96 every this many steps
momentum: momentum parameter used when training with momentum
corpus_name: name of the task input to read parses from
Returns:
Dictionary of named training nodes.
"""
with tf.name_scope('training'):
nodes = self.training
nodes.update(self._AddGoldReader(task_context, batch_size, corpus_name))
nodes.update(self._BuildNetwork(nodes['feature_endpoints'],
return_average=False))
nodes.update(self._AddCostFunction(batch_size, nodes['gold_actions'],
nodes['logits']))
# Add the optimizer
if self._only_train:
trainable_params = [v
for k, v in self.params.iteritems()
if k in self._only_train]
else:
trainable_params = self.params.values()
lr = self._AddLearningRate(learning_rate, decay_steps)
optimizer = tf.train.MomentumOptimizer(lr,
momentum,
use_locking=self._use_locking)
train_op = optimizer.minimize(nodes['cost'], var_list=trainable_params)
for param in trainable_params:
slot = optimizer.get_slot(param, 'momentum')
self.inits[slot.name] = state_ops.init_variable(slot,
tf.zeros_initializer())
self.variables[slot.name] = slot
numerical_checks = [
tf.check_numerics(param,
message='Parameter is not finite.')
for param in trainable_params
if param.dtype.base_dtype in [tf.float32, tf.float64]
]
check_op = tf.group(*numerical_checks)
avg_update_op = tf.group(*self._averaging.values())
train_ops = [train_op]
if self._check_parameters:
train_ops.append(check_op)
if self._use_averaging:
train_ops.append(avg_update_op)
nodes['train_op'] = tf.group(*train_ops, name='train_op')
return nodes
示例27
def _AddParam(self,
shape,
dtype,
name,
initializer=None,
return_average=False):
"""Add a model parameter w.r.t. we expect to compute gradients.
_AddParam creates both regular parameters (usually for training) and
averaged nodes (usually for inference). It returns one or the other based
on the 'return_average' arg.
Args:
shape: int list, tensor shape of the parameter to create
dtype: tf.DataType, data type of the parameter
name: string, name of the parameter in the TF graph
initializer: optional initializer for the paramter
return_average: if False, return parameter otherwise return moving average
Returns:
parameter or averaged parameter
"""
if name not in self.params:
step = tf.cast(self.GetStep(), tf.float32)
# Put all parameters and their initializing ops in their own scope
# irrespective of the current scope (training or eval).
with tf.name_scope(self._param_scope):
self.params[name] = tf.get_variable(name, shape, dtype, initializer)
param = self.params[name]
if initializer is not None:
self.inits[name] = state_ops.init_variable(param, initializer)
if self._averaging_decay == 1:
logging.info('Using vanilla averaging of parameters.')
ema = tf.train.ExponentialMovingAverage(decay=(step / (step + 1.0)),
num_updates=None)
else:
ema = tf.train.ExponentialMovingAverage(decay=self._averaging_decay,
num_updates=step)
self._averaging[name + '_avg_update'] = ema.apply([param])
self.variables[name + '_avg_var'] = ema.average(param)
self.inits[name + '_avg_init'] = state_ops.init_variable(
ema.average(param), tf.zeros_initializer())
return (self.variables[name + '_avg_var'] if return_average else
self.params[name])
示例28
def AddTraining(self,
task_context,
batch_size,
learning_rate=0.1,
decay_steps=4000,
momentum=0.9,
corpus_name='documents'):
"""Builds a trainer to minimize the cross entropy cost function.
Args:
task_context: file path from which to read the task context
batch_size: batch size to request from reader op
learning_rate: initial value of the learning rate
decay_steps: decay learning rate by 0.96 every this many steps
momentum: momentum parameter used when training with momentum
corpus_name: name of the task input to read parses from
Returns:
Dictionary of named training nodes.
"""
with tf.name_scope('training'):
nodes = self.training
nodes.update(self._AddGoldReader(task_context, batch_size, corpus_name))
nodes.update(self._BuildNetwork(nodes['feature_endpoints'],
return_average=False))
nodes.update(self._AddCostFunction(batch_size, nodes['gold_actions'],
nodes['logits']))
# Add the optimizer
if self._only_train:
trainable_params = [v
for k, v in self.params.iteritems()
if k in self._only_train]
else:
trainable_params = self.params.values()
lr = self._AddLearningRate(learning_rate, decay_steps)
optimizer = tf.train.MomentumOptimizer(lr,
momentum,
use_locking=self._use_locking)
train_op = optimizer.minimize(nodes['cost'], var_list=trainable_params)
for param in trainable_params:
slot = optimizer.get_slot(param, 'momentum')
self.inits[slot.name] = state_ops.init_variable(slot,
tf.zeros_initializer())
self.variables[slot.name] = slot
numerical_checks = [
tf.check_numerics(param,
message='Parameter is not finite.')
for param in trainable_params
if param.dtype.base_dtype in [tf.float32, tf.float64]
]
check_op = tf.group(*numerical_checks)
avg_update_op = tf.group(*self._averaging.values())
train_ops = [train_op]
if self._check_parameters:
train_ops.append(check_op)
if self._use_averaging:
train_ops.append(avg_update_op)
nodes['train_op'] = tf.group(*train_ops, name='train_op')
return nodes
示例29
def _AddParam(self,
shape,
dtype,
name,
initializer=None,
return_average=False):
"""Add a model parameter w.r.t. we expect to compute gradients.
_AddParam creates both regular parameters (usually for training) and
averaged nodes (usually for inference). It returns one or the other based
on the 'return_average' arg.
Args:
shape: int list, tensor shape of the parameter to create
dtype: tf.DataType, data type of the parameter
name: string, name of the parameter in the TF graph
initializer: optional initializer for the paramter
return_average: if False, return parameter otherwise return moving average
Returns:
parameter or averaged parameter
"""
if name not in self.params:
step = tf.cast(self.GetStep(), tf.float32)
# Put all parameters and their initializing ops in their own scope
# irrespective of the current scope (training or eval).
with tf.name_scope(self._param_scope):
self.params[name] = tf.get_variable(name, shape, dtype, initializer)
param = self.params[name]
if initializer is not None:
self.inits[name] = state_ops.init_variable(param, initializer)
if self._averaging_decay == 1:
logging.info('Using vanilla averaging of parameters.')
ema = tf.train.ExponentialMovingAverage(decay=(step / (step + 1.0)),
num_updates=None)
else:
ema = tf.train.ExponentialMovingAverage(decay=self._averaging_decay,
num_updates=step)
self._averaging[name + '_avg_update'] = ema.apply([param])
self.variables[name + '_avg_var'] = ema.average(param)
self.inits[name + '_avg_init'] = state_ops.init_variable(
ema.average(param), tf.zeros_initializer)
return (self.variables[name + '_avg_var'] if return_average else
self.params[name])
示例30
def AddTraining(self,
task_context,
batch_size,
learning_rate=0.1,
decay_steps=4000,
momentum=0.9,
corpus_name='documents'):
"""Builds a trainer to minimize the cross entropy cost function.
Args:
task_context: file path from which to read the task context
batch_size: batch size to request from reader op
learning_rate: initial value of the learning rate
decay_steps: decay learning rate by 0.96 every this many steps
momentum: momentum parameter used when training with momentum
corpus_name: name of the task input to read parses from
Returns:
Dictionary of named training nodes.
"""
with tf.name_scope('training'):
nodes = self.training
nodes.update(self._AddGoldReader(task_context, batch_size, corpus_name))
nodes.update(self._BuildNetwork(nodes['feature_endpoints'],
return_average=False))
nodes.update(self._AddCostFunction(batch_size, nodes['gold_actions'],
nodes['logits']))
# Add the optimizer
if self._only_train:
trainable_params = [v
for k, v in self.params.iteritems()
if k in self._only_train]
else:
trainable_params = self.params.values()
lr = self._AddLearningRate(learning_rate, decay_steps)
optimizer = tf.train.MomentumOptimizer(lr,
momentum,
use_locking=self._use_locking)
train_op = optimizer.minimize(nodes['cost'], var_list=trainable_params)
for param in trainable_params:
slot = optimizer.get_slot(param, 'momentum')
self.inits[slot.name] = state_ops.init_variable(slot,
tf.zeros_initializer)
self.variables[slot.name] = slot
numerical_checks = [
tf.check_numerics(param,
message='Parameter is not finite.')
for param in trainable_params
if param.dtype.base_dtype in [tf.float32, tf.float64]
]
check_op = tf.group(*numerical_checks)
avg_update_op = tf.group(*self._averaging.values())
train_ops = [train_op]
if self._check_parameters:
train_ops.append(check_op)
if self._use_averaging:
train_ops.append(avg_update_op)
nodes['train_op'] = tf.group(*train_ops, name='train_op')
return nodes