Python源码示例:syntaxnet.util.check.Le()

示例1
def testCheckLe(self):
    check.Le(1, 2, 'foo')
    check.Le(1, 1, 'foo')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.Le(1, 0, 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.Le(1, -1, 'baz', RuntimeError) 
示例2
def testCheckLe(self):
    check.Le(1, 2, 'foo')
    check.Le(1, 1, 'foo')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.Le(1, 0, 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.Le(1, -1, 'baz', RuntimeError) 
示例3
def testCheckLe(self):
    check.Le(1, 2, 'foo')
    check.Le(1, 1, 'foo')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.Le(1, 0, 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.Le(1, -1, 'baz', RuntimeError) 
示例4
def testCheckLe(self):
    check.Le(1, 2, 'foo')
    check.Le(1, 1, 'foo')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.Le(1, 0, 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.Le(1, -1, 'baz', RuntimeError) 
示例5
def testCheckLe(self):
    check.Le(1, 2, 'foo')
    check.Le(1, 1, 'foo')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.Le(1, 0, 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.Le(1, -1, 'baz', RuntimeError) 
示例6
def testCheckLe(self):
    check.Le(1, 2, 'foo')
    check.Le(1, 1, 'foo')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.Le(1, 0, 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.Le(1, -1, 'baz', RuntimeError) 
示例7
def testCheckLe(self):
    check.Le(1, 2, 'foo')
    check.Le(1, 1, 'foo')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.Le(1, 0, 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.Le(1, -1, 'baz', RuntimeError) 
示例8
def testCheckLe(self):
    check.Le(1, 2, 'foo')
    check.Le(1, 1, 'foo')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.Le(1, 0, 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.Le(1, -1, 'baz', RuntimeError) 
示例9
def testCheckLe(self):
    check.Le(1, 2, 'foo')
    check.Le(1, 1, 'foo')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.Le(1, 0, 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.Le(1, -1, 'baz', RuntimeError) 
示例10
def maybe_apply_dropout(inputs, keep_prob, per_sequence, stride=None):
  """Applies dropout, if so configured, to an input tensor.

  The input may be rank 2 or 3 depending on whether the stride (i.e., batch
  size) has been incorporated into the shape.

  Args:
    inputs: [stride * num_steps, dim] or [stride, num_steps, dim] input tensor.
    keep_prob: Scalar probability of keeping each input element.  If >= 1.0, no
        dropout is performed.
    per_sequence: If true, sample the dropout mask once per sequence, instead of
        once per step.  Requires |stride| when true.
    stride: Scalar batch size.  Optional if |per_sequence| is false.

  Returns:
    [stride * num_steps, dim] or [stride, num_steps, dim] tensor, matching the
    shape of |inputs|, containing the masked or original inputs, depending on
    whether dropout was actually performed.
  """
  check.Ge(inputs.get_shape().ndims, 2, 'inputs must be rank 2 or 3')
  check.Le(inputs.get_shape().ndims, 3, 'inputs must be rank 2 or 3')
  flat = (inputs.get_shape().ndims == 2)

  if keep_prob >= 1.0:
    return inputs

  if not per_sequence:
    return tf.nn.dropout(inputs, keep_prob)

  check.NotNone(stride, 'per-sequence dropout requires stride')
  dim = inputs.get_shape().as_list()[-1]
  check.NotNone(dim, 'inputs must have static activation dimension, but have '
                'static shape %s' % inputs.get_shape().as_list())

  # If needed, restore the batch dimension to separate the sequences.
  inputs_sxnxd = tf.reshape(inputs, [stride, -1, dim]) if flat else inputs

  # Replace |num_steps| with 1 in |noise_shape|, so the dropout mask broadcasts
  # to all steps for a particular sequence.
  noise_shape = [stride, 1, dim]
  masked_sxnxd = tf.nn.dropout(inputs_sxnxd, keep_prob, noise_shape)

  # If needed, flatten out the batch dimension in the return value.
  return tf.reshape(masked_sxnxd, [-1, dim]) if flat else masked_sxnxd 
示例11
def maybe_apply_dropout(inputs, keep_prob, per_sequence, stride=None):
  """Applies dropout, if so configured, to an input tensor.

  The input may be rank 2 or 3 depending on whether the stride (i.e., batch
  size) has been incorporated into the shape.

  Args:
    inputs: [stride * num_steps, dim] or [stride, num_steps, dim] input tensor.
    keep_prob: Scalar probability of keeping each input element.  If >= 1.0, no
        dropout is performed.
    per_sequence: If true, sample the dropout mask once per sequence, instead of
        once per step.  Requires |stride| when true.
    stride: Scalar batch size.  Optional if |per_sequence| is false.

  Returns:
    [stride * num_steps, dim] or [stride, num_steps, dim] tensor, matching the
    shape of |inputs|, containing the masked or original inputs, depending on
    whether dropout was actually performed.
  """

  if keep_prob >= 1.0:
    return inputs

  if not per_sequence:
    return tf.nn.dropout(inputs, keep_prob)

  # We only check the dims if we are applying per-sequence dropout
  check.Ge(inputs.get_shape().ndims, 2, 'inputs must be rank 2 or 3')
  check.Le(inputs.get_shape().ndims, 3, 'inputs must be rank 2 or 3')
  flat = (inputs.get_shape().ndims == 2)

  check.NotNone(stride, 'per-sequence dropout requires stride')
  dim = inputs.get_shape().as_list()[-1]
  check.NotNone(dim, 'inputs must have static activation dimension, but have '
                'static shape %s' % inputs.get_shape().as_list())

  # If needed, restore the batch dimension to separate the sequences.
  inputs_sxnxd = tf.reshape(inputs, [stride, -1, dim]) if flat else inputs

  # Replace |num_steps| with 1 in |noise_shape|, so the dropout mask broadcasts
  # to all steps for a particular sequence.
  noise_shape = [stride, 1, dim]
  masked_sxnxd = tf.nn.dropout(inputs_sxnxd, keep_prob, noise_shape)

  # If needed, flatten out the batch dimension in the return value.
  return tf.reshape(masked_sxnxd, [-1, dim]) if flat else masked_sxnxd 
示例12
def maybe_apply_dropout(inputs, keep_prob, per_sequence, stride=None):
  """Applies dropout, if so configured, to an input tensor.

  The input may be rank 2 or 3 depending on whether the stride (i.e., batch
  size) has been incorporated into the shape.

  Args:
    inputs: [stride * num_steps, dim] or [stride, num_steps, dim] input tensor.
    keep_prob: Scalar probability of keeping each input element.  If >= 1.0, no
        dropout is performed.
    per_sequence: If true, sample the dropout mask once per sequence, instead of
        once per step.  Requires |stride| when true.
    stride: Scalar batch size.  Optional if |per_sequence| is false.

  Returns:
    [stride * num_steps, dim] or [stride, num_steps, dim] tensor, matching the
    shape of |inputs|, containing the masked or original inputs, depending on
    whether dropout was actually performed.
  """

  if keep_prob >= 1.0:
    return inputs

  if not per_sequence:
    return tf.nn.dropout(inputs, keep_prob)

  # We only check the dims if we are applying per-sequence dropout
  check.Ge(inputs.get_shape().ndims, 2, 'inputs must be rank 2 or 3')
  check.Le(inputs.get_shape().ndims, 3, 'inputs must be rank 2 or 3')
  flat = (inputs.get_shape().ndims == 2)

  check.NotNone(stride, 'per-sequence dropout requires stride')
  dim = inputs.get_shape().as_list()[-1]
  check.NotNone(dim, 'inputs must have static activation dimension, but have '
                'static shape %s' % inputs.get_shape().as_list())

  # If needed, restore the batch dimension to separate the sequences.
  inputs_sxnxd = tf.reshape(inputs, [stride, -1, dim]) if flat else inputs

  # Replace |num_steps| with 1 in |noise_shape|, so the dropout mask broadcasts
  # to all steps for a particular sequence.
  noise_shape = [stride, 1, dim]
  masked_sxnxd = tf.nn.dropout(inputs_sxnxd, keep_prob, noise_shape)

  # If needed, flatten out the batch dimension in the return value.
  return tf.reshape(masked_sxnxd, [-1, dim]) if flat else masked_sxnxd 
示例13
def maybe_apply_dropout(inputs, keep_prob, per_sequence, stride=None):
  """Applies dropout, if so configured, to an input tensor.

  The input may be rank 2 or 3 depending on whether the stride (i.e., batch
  size) has been incorporated into the shape.

  Args:
    inputs: [stride * num_steps, dim] or [stride, num_steps, dim] input tensor.
    keep_prob: Scalar probability of keeping each input element.  If >= 1.0, no
        dropout is performed.
    per_sequence: If true, sample the dropout mask once per sequence, instead of
        once per step.  Requires |stride| when true.
    stride: Scalar batch size.  Optional if |per_sequence| is false.

  Returns:
    [stride * num_steps, dim] or [stride, num_steps, dim] tensor, matching the
    shape of |inputs|, containing the masked or original inputs, depending on
    whether dropout was actually performed.
  """
  check.Ge(inputs.get_shape().ndims, 2, 'inputs must be rank 2 or 3')
  check.Le(inputs.get_shape().ndims, 3, 'inputs must be rank 2 or 3')
  flat = (inputs.get_shape().ndims == 2)

  if keep_prob >= 1.0:
    return inputs

  if not per_sequence:
    return tf.nn.dropout(inputs, keep_prob)

  check.NotNone(stride, 'per-sequence dropout requires stride')
  dim = inputs.get_shape().as_list()[-1]
  check.NotNone(dim, 'inputs must have static activation dimension, but have '
                'static shape %s' % inputs.get_shape().as_list())

  # If needed, restore the batch dimension to separate the sequences.
  inputs_sxnxd = tf.reshape(inputs, [stride, -1, dim]) if flat else inputs

  # Replace |num_steps| with 1 in |noise_shape|, so the dropout mask broadcasts
  # to all steps for a particular sequence.
  noise_shape = [stride, 1, dim]
  masked_sxnxd = tf.nn.dropout(inputs_sxnxd, keep_prob, noise_shape)

  # If needed, flatten out the batch dimension in the return value.
  return tf.reshape(masked_sxnxd, [-1, dim]) if flat else masked_sxnxd 
示例14
def maybe_apply_dropout(inputs, keep_prob, per_sequence, stride=None):
  """Applies dropout, if so configured, to an input tensor.

  The input may be rank 2 or 3 depending on whether the stride (i.e., batch
  size) has been incorporated into the shape.

  Args:
    inputs: [stride * num_steps, dim] or [stride, num_steps, dim] input tensor.
    keep_prob: Scalar probability of keeping each input element.  If >= 1.0, no
        dropout is performed.
    per_sequence: If true, sample the dropout mask once per sequence, instead of
        once per step.  Requires |stride| when true.
    stride: Scalar batch size.  Optional if |per_sequence| is false.

  Returns:
    [stride * num_steps, dim] or [stride, num_steps, dim] tensor, matching the
    shape of |inputs|, containing the masked or original inputs, depending on
    whether dropout was actually performed.
  """
  check.Ge(inputs.get_shape().ndims, 2, 'inputs must be rank 2 or 3')
  check.Le(inputs.get_shape().ndims, 3, 'inputs must be rank 2 or 3')
  flat = (inputs.get_shape().ndims == 2)

  if keep_prob >= 1.0:
    return inputs

  if not per_sequence:
    return tf.nn.dropout(inputs, keep_prob)

  check.NotNone(stride, 'per-sequence dropout requires stride')
  dim = inputs.get_shape().as_list()[-1]
  check.NotNone(dim, 'inputs must have static activation dimension, but have '
                'static shape %s' % inputs.get_shape().as_list())

  # If needed, restore the batch dimension to separate the sequences.
  inputs_sxnxd = tf.reshape(inputs, [stride, -1, dim]) if flat else inputs

  # Replace |num_steps| with 1 in |noise_shape|, so the dropout mask broadcasts
  # to all steps for a particular sequence.
  noise_shape = [stride, 1, dim]
  masked_sxnxd = tf.nn.dropout(inputs_sxnxd, keep_prob, noise_shape)

  # If needed, flatten out the batch dimension in the return value.
  return tf.reshape(masked_sxnxd, [-1, dim]) if flat else masked_sxnxd 
示例15
def maybe_apply_dropout(inputs, keep_prob, per_sequence, stride=None):
  """Applies dropout, if so configured, to an input tensor.

  The input may be rank 2 or 3 depending on whether the stride (i.e., batch
  size) has been incorporated into the shape.

  Args:
    inputs: [stride * num_steps, dim] or [stride, num_steps, dim] input tensor.
    keep_prob: Scalar probability of keeping each input element.  If >= 1.0, no
        dropout is performed.
    per_sequence: If true, sample the dropout mask once per sequence, instead of
        once per step.  Requires |stride| when true.
    stride: Scalar batch size.  Optional if |per_sequence| is false.

  Returns:
    [stride * num_steps, dim] or [stride, num_steps, dim] tensor, matching the
    shape of |inputs|, containing the masked or original inputs, depending on
    whether dropout was actually performed.
  """

  if keep_prob >= 1.0:
    return inputs

  if not per_sequence:
    return tf.nn.dropout(inputs, keep_prob)

  # We only check the dims if we are applying per-sequence dropout
  check.Ge(inputs.get_shape().ndims, 2, 'inputs must be rank 2 or 3')
  check.Le(inputs.get_shape().ndims, 3, 'inputs must be rank 2 or 3')
  flat = (inputs.get_shape().ndims == 2)

  check.NotNone(stride, 'per-sequence dropout requires stride')
  dim = inputs.get_shape().as_list()[-1]
  check.NotNone(dim, 'inputs must have static activation dimension, but have '
                'static shape %s' % inputs.get_shape().as_list())

  # If needed, restore the batch dimension to separate the sequences.
  inputs_sxnxd = tf.reshape(inputs, [stride, -1, dim]) if flat else inputs

  # Replace |num_steps| with 1 in |noise_shape|, so the dropout mask broadcasts
  # to all steps for a particular sequence.
  noise_shape = [stride, 1, dim]
  masked_sxnxd = tf.nn.dropout(inputs_sxnxd, keep_prob, noise_shape)

  # If needed, flatten out the batch dimension in the return value.
  return tf.reshape(masked_sxnxd, [-1, dim]) if flat else masked_sxnxd 
示例16
def maybe_apply_dropout(inputs, keep_prob, per_sequence, stride=None):
  """Applies dropout, if so configured, to an input tensor.

  The input may be rank 2 or 3 depending on whether the stride (i.e., batch
  size) has been incorporated into the shape.

  Args:
    inputs: [stride * num_steps, dim] or [stride, num_steps, dim] input tensor.
    keep_prob: Scalar probability of keeping each input element.  If >= 1.0, no
        dropout is performed.
    per_sequence: If true, sample the dropout mask once per sequence, instead of
        once per step.  Requires |stride| when true.
    stride: Scalar batch size.  Optional if |per_sequence| is false.

  Returns:
    [stride * num_steps, dim] or [stride, num_steps, dim] tensor, matching the
    shape of |inputs|, containing the masked or original inputs, depending on
    whether dropout was actually performed.
  """
  check.Ge(inputs.get_shape().ndims, 2, 'inputs must be rank 2 or 3')
  check.Le(inputs.get_shape().ndims, 3, 'inputs must be rank 2 or 3')
  flat = (inputs.get_shape().ndims == 2)

  if keep_prob >= 1.0:
    return inputs

  if not per_sequence:
    return tf.nn.dropout(inputs, keep_prob)

  check.NotNone(stride, 'per-sequence dropout requires stride')
  dim = inputs.get_shape().as_list()[-1]
  check.NotNone(dim, 'inputs must have static activation dimension, but have '
                'static shape %s' % inputs.get_shape().as_list())

  # If needed, restore the batch dimension to separate the sequences.
  inputs_sxnxd = tf.reshape(inputs, [stride, -1, dim]) if flat else inputs

  # Replace |num_steps| with 1 in |noise_shape|, so the dropout mask broadcasts
  # to all steps for a particular sequence.
  noise_shape = [stride, 1, dim]
  masked_sxnxd = tf.nn.dropout(inputs_sxnxd, keep_prob, noise_shape)

  # If needed, flatten out the batch dimension in the return value.
  return tf.reshape(masked_sxnxd, [-1, dim]) if flat else masked_sxnxd 
示例17
def apply_feature_id_dropout(ids, weights, channel):
  """Randomly perturbs a vector of feature IDs.

  Args:
    ids: Vector of feature IDs.
    weights: Vector of feature weights.
    channel: FixedFeatureChannel that extracted the |ids|.

  Returns:
    Copy of |ids| and |weights| where each ID is randomly replaced with
    |channel.dropout_id|, according to the probabilities in
    |channel.dropout_keep_probabilities|. The weights of dropped features are
    set to zero if |channel.dropped_id| equals |channel.vocabulary_size|.
  """
  check.Gt(
      len(channel.dropout_keep_probability), 0,
      'Channel {} dropout_keep_probability is empty'.format(channel.name))
  check.Le(
      len(channel.dropout_keep_probability), channel.vocabulary_size,
      'Channel {} dropout_keep_probability is too long'.format(channel.name))

  # Channel fields, converted from proto to constant tensor.
  dropout_id = tf.constant(
      channel.dropout_id, name='dropout_id', dtype=tf.int64)
  dropout_keep_probabilities = tf.constant(
      list(channel.dropout_keep_probability),
      name='dropout_keep_probability',
      dtype=tf.float32,
      shape=[channel.vocabulary_size])

  # The keep probabilities for the current batch of feature IDs.
  keep_probabilities = tf.gather(dropout_keep_probabilities, ids)

  # Draw random values and determine which IDs should be kept.
  shape = tf.shape(ids)
  noise = tf.random_uniform(shape)  # \in [0,1)^d
  should_keep = noise < keep_probabilities

  # Replace dropped IDs with the specified replacement ID.
  dropout_ids = tf.fill(shape, dropout_id)
  new_ids = tf.where(should_keep, ids, dropout_ids)
  if channel.dropout_id == channel.vocabulary_size:
    # Replace weights of dropped IDs with 0.
    zeros = tf.zeros(shape, dtype=tf.float32)
    new_weights = tf.where(should_keep, weights, zeros)
  else:
    new_weights = weights
  return new_ids, new_weights 
示例18
def maybe_apply_dropout(inputs,
                        keep_prob,
                        per_sequence,
                        stride=None,
                        dropout_mask=None,
                        name=None):
  """Applies dropout, if so configured, to an input tensor.

  The input may be rank 2 or 3 depending on whether the stride (i.e., batch
  size) has been incorporated into the shape.

  Args:
    inputs: [stride * num_steps, dim] or [stride, num_steps, dim] input tensor.
    keep_prob: Scalar probability of keeping each input element.  If >= 1.0, no
        dropout is performed.
    per_sequence: If true, sample the dropout mask once per sequence, instead of
        once per step.  Either |stride| or |dropout_mask| must be set when true.
    stride: Scalar batch size.  Optional if |per_sequence| is false, or if
        |dropout_mask| is provided.
    dropout_mask: Precomputed dropout mask to apply to the |inputs|; must be
        broadcastable to |inputs|.  Optional if |per_sequence| is false, or if
        |stride| is provided.
    name: Optional name for the dropout operation, if dropout is applied.

  Returns:
    [stride * num_steps, dim] or [stride, num_steps, dim] tensor, matching the
    shape of |inputs|, containing the masked or original inputs, depending on
    whether dropout was actually performed.
  """
  if keep_prob >= 1.0:
    return inputs

  if not per_sequence:
    return tf.nn.dropout(inputs, keep_prob, name=name)

  if dropout_mask is not None:
    return tf.multiply(inputs, dropout_mask, name=name)

  # We only check the dims if we are applying per-sequence dropout
  check.Ge(inputs.get_shape().ndims, 2, 'inputs must be rank 2 or 3')
  check.Le(inputs.get_shape().ndims, 3, 'inputs must be rank 2 or 3')
  flat = (inputs.get_shape().ndims == 2)

  check.NotNone(stride, 'per-sequence dropout requires stride')
  dim = inputs.get_shape().as_list()[-1]
  check.NotNone(dim, 'inputs must have static activation dimension, but have '
                'static shape %s' % inputs.get_shape().as_list())

  # If needed, restore the batch dimension to separate the sequences.
  inputs_sxnxd = tf.reshape(inputs, [stride, -1, dim]) if flat else inputs

  # Replace |num_steps| with 1 in |noise_shape|, so the dropout mask broadcasts
  # to all steps for a particular sequence.
  noise_shape = [stride, 1, dim]
  masked_sxnxd = tf.nn.dropout(inputs_sxnxd, keep_prob, noise_shape, name=name)

  # If needed, flatten out the batch dimension in the return value.
  return tf.reshape(masked_sxnxd, [-1, dim]) if flat else masked_sxnxd 
示例19
def apply_feature_id_dropout(ids, weights, channel):
  """Randomly perturbs a vector of feature IDs.

  Args:
    ids: Vector of feature IDs.
    weights: Vector of feature weights.
    channel: FixedFeatureChannel that extracted the |ids|.

  Returns:
    Copy of |ids| and |weights| where each ID is randomly replaced with
    |channel.dropout_id|, according to the probabilities in
    |channel.dropout_keep_probabilities|. The weights of dropped features are
    set to zero if |channel.dropped_id| equals |channel.vocabulary_size|.
  """
  check.Gt(
      len(channel.dropout_keep_probability), 0,
      'Channel {} dropout_keep_probability is empty'.format(channel.name))
  check.Le(
      len(channel.dropout_keep_probability), channel.vocabulary_size,
      'Channel {} dropout_keep_probability is too long'.format(channel.name))

  # Channel fields, converted from proto to constant tensor.
  dropout_id = tf.constant(
      channel.dropout_id, name='dropout_id', dtype=tf.int64)
  dropout_keep_probabilities = tf.constant(
      list(channel.dropout_keep_probability),
      name='dropout_keep_probability',
      dtype=tf.float32,
      shape=[channel.vocabulary_size])

  # The keep probabilities for the current batch of feature IDs.
  keep_probabilities = tf.gather(dropout_keep_probabilities, ids)

  # Draw random values and determine which IDs should be kept.
  shape = tf.shape(ids)
  noise = tf.random_uniform(shape)  # \in [0,1)^d
  should_keep = noise < keep_probabilities

  # Replace dropped IDs with the specified replacement ID.
  dropout_ids = tf.fill(shape, dropout_id)
  new_ids = tf.where(should_keep, ids, dropout_ids)
  if channel.dropout_id == channel.vocabulary_size:
    # Replace weights of dropped IDs with 0.
    zeros = tf.zeros(shape, dtype=tf.float32)
    new_weights = tf.where(should_keep, weights, zeros)
  else:
    new_weights = weights
  return new_ids, new_weights 
示例20
def maybe_apply_dropout(inputs,
                        keep_prob,
                        per_sequence,
                        stride=None,
                        dropout_mask=None,
                        name=None):
  """Applies dropout, if so configured, to an input tensor.

  The input may be rank 2 or 3 depending on whether the stride (i.e., batch
  size) has been incorporated into the shape.

  Args:
    inputs: [stride * num_steps, dim] or [stride, num_steps, dim] input tensor.
    keep_prob: Scalar probability of keeping each input element.  If >= 1.0, no
        dropout is performed.
    per_sequence: If true, sample the dropout mask once per sequence, instead of
        once per step.  Either |stride| or |dropout_mask| must be set when true.
    stride: Scalar batch size.  Optional if |per_sequence| is false, or if
        |dropout_mask| is provided.
    dropout_mask: Precomputed dropout mask to apply to the |inputs|; must be
        broadcastable to |inputs|.  Optional if |per_sequence| is false, or if
        |stride| is provided.
    name: Optional name for the dropout operation, if dropout is applied.

  Returns:
    [stride * num_steps, dim] or [stride, num_steps, dim] tensor, matching the
    shape of |inputs|, containing the masked or original inputs, depending on
    whether dropout was actually performed.
  """
  if keep_prob >= 1.0:
    return inputs

  if not per_sequence:
    return tf.nn.dropout(inputs, keep_prob, name=name)

  if dropout_mask is not None:
    return tf.multiply(inputs, dropout_mask, name=name)

  # We only check the dims if we are applying per-sequence dropout
  check.Ge(inputs.get_shape().ndims, 2, 'inputs must be rank 2 or 3')
  check.Le(inputs.get_shape().ndims, 3, 'inputs must be rank 2 or 3')
  flat = (inputs.get_shape().ndims == 2)

  check.NotNone(stride, 'per-sequence dropout requires stride')
  dim = inputs.get_shape().as_list()[-1]
  check.NotNone(dim, 'inputs must have static activation dimension, but have '
                'static shape %s' % inputs.get_shape().as_list())

  # If needed, restore the batch dimension to separate the sequences.
  inputs_sxnxd = tf.reshape(inputs, [stride, -1, dim]) if flat else inputs

  # Replace |num_steps| with 1 in |noise_shape|, so the dropout mask broadcasts
  # to all steps for a particular sequence.
  noise_shape = [stride, 1, dim]
  masked_sxnxd = tf.nn.dropout(inputs_sxnxd, keep_prob, noise_shape, name=name)

  # If needed, flatten out the batch dimension in the return value.
  return tf.reshape(masked_sxnxd, [-1, dim]) if flat else masked_sxnxd