Python源码示例:syntaxnet.util.check.Ge()

示例1
def create_array(self, stride):
    """Creates a new tensor array to store this layer's activations.

    Arguments:
      stride: Possibly dynamic batch * beam size with which to initialize the
        tensor array

    Returns:
      TensorArray object
    """
    check.Ge(self.dim, 0, 'Cannot create array when dimension is dynamic')
    tensor_array = ta.TensorArray(
        dtype=tf.float32,
        size=0,
        dynamic_size=True,
        clear_after_read=False,
        infer_shape=False,
        name='%s_array' % self.name)

    # Start each array with all zeros. Special values will still be learned via
    # the extra embedding dimension stored for each linked feature channel.
    initial_value = tf.zeros([stride, self.dim])
    return tensor_array.write(0, initial_value) 
示例2
def create_array(self, stride):
    """Creates a new tensor array to store this layer's activations.

    Arguments:
      stride: Possibly dynamic batch * beam size with which to initialize the
        tensor array

    Returns:
      TensorArray object
    """
    check.Ge(self.dim, 0, 'Cannot create array when dimension is dynamic')
    tensor_array = ta.TensorArray(
        dtype=tf.float32,
        size=0,
        dynamic_size=True,
        clear_after_read=False,
        infer_shape=False,
        name='%s_array' % self.name)

    # Start each array with all zeros. Special values will still be learned via
    # the extra embedding dimension stored for each linked feature channel.
    initial_value = tf.zeros([stride, self.dim])
    return tensor_array.write(0, initial_value) 
示例3
def testCheckGe(self):
    check.Ge(2, 1, 'foo')
    check.Ge(1, 1, 'foo')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.Ge(0, 1, 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.Ge(-1, 1, 'baz', RuntimeError) 
示例4
def testCheckGe(self):
    check.Ge(2, 1, 'foo')
    check.Ge(1, 1, 'foo')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.Ge(0, 1, 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.Ge(-1, 1, 'baz', RuntimeError) 
示例5
def testCheckGe(self):
    check.Ge(2, 1, 'foo')
    check.Ge(1, 1, 'foo')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.Ge(0, 1, 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.Ge(-1, 1, 'baz', RuntimeError) 
示例6
def testCheckGe(self):
    check.Ge(2, 1, 'foo')
    check.Ge(1, 1, 'foo')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.Ge(0, 1, 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.Ge(-1, 1, 'baz', RuntimeError) 
示例7
def testCheckGe(self):
    check.Ge(2, 1, 'foo')
    check.Ge(1, 1, 'foo')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.Ge(0, 1, 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.Ge(-1, 1, 'baz', RuntimeError) 
示例8
def testCheckGe(self):
    check.Ge(2, 1, 'foo')
    check.Ge(1, 1, 'foo')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.Ge(0, 1, 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.Ge(-1, 1, 'baz', RuntimeError) 
示例9
def testCheckGe(self):
    check.Ge(2, 1, 'foo')
    check.Ge(1, 1, 'foo')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.Ge(0, 1, 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.Ge(-1, 1, 'baz', RuntimeError) 
示例10
def testCheckGe(self):
    check.Ge(2, 1, 'foo')
    check.Ge(1, 1, 'foo')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.Ge(0, 1, 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.Ge(-1, 1, 'baz', RuntimeError) 
示例11
def testCheckGe(self):
    check.Ge(2, 1, 'foo')
    check.Ge(1, 1, 'foo')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.Ge(0, 1, 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.Ge(-1, 1, 'baz', RuntimeError) 
示例12
def calculate_segmentation_metrics(gold_corpus, annotated_corpus):
  """Calculate precision/recall/f1 based on gold and annotated sentences."""
  check.Eq(len(gold_corpus), len(annotated_corpus), 'Corpora are not aligned')
  num_gold_tokens = 0
  num_test_tokens = 0
  num_correct_tokens = 0
  def token_span(token):
    check.Ge(token.end, token.start)
    return (token.start, token.end)

  def ratio(numerator, denominator):
    check.Ge(numerator, 0)
    check.Ge(denominator, 0)
    if denominator > 0:
      return numerator / denominator
    elif numerator == 0:
      return 0.0  # map 0/0 to 0
    else:
      return float('inf')  # map x/0 to inf

  for gold_str, annotated_str in zip(gold_corpus, annotated_corpus):
    gold = sentence_pb2.Sentence()
    annotated = sentence_pb2.Sentence()
    gold.ParseFromString(gold_str)
    annotated.ParseFromString(annotated_str)
    check.Eq(gold.text, annotated.text, 'Text is not aligned')
    gold_spans = set()
    test_spans = set()
    for token in gold.token:
      check.NotIn(token_span(token), gold_spans, 'Duplicate token')
      gold_spans.add(token_span(token))
    for token in annotated.token:
      check.NotIn(token_span(token), test_spans, 'Duplicate token')
      test_spans.add(token_span(token))
    num_gold_tokens += len(gold_spans)
    num_test_tokens += len(test_spans)
    num_correct_tokens += len(gold_spans.intersection(test_spans))

  tf.logging.info('Total num documents: %d', len(annotated_corpus))
  tf.logging.info('Total gold tokens: %d', num_gold_tokens)
  tf.logging.info('Total test tokens: %d', num_test_tokens)
  precision = 100 * ratio(num_correct_tokens, num_test_tokens)
  recall = 100 * ratio(num_correct_tokens, num_gold_tokens)
  f1 = ratio(2 * precision * recall, precision + recall)
  tf.logging.info('Precision: %.2f%%', precision)
  tf.logging.info('Recall: %.2f%%', recall)
  tf.logging.info('F1: %.2f%%', f1)

  return round(precision, 2), round(recall, 2), round(f1, 2) 
示例13
def maybe_apply_dropout(inputs, keep_prob, per_sequence, stride=None):
  """Applies dropout, if so configured, to an input tensor.

  The input may be rank 2 or 3 depending on whether the stride (i.e., batch
  size) has been incorporated into the shape.

  Args:
    inputs: [stride * num_steps, dim] or [stride, num_steps, dim] input tensor.
    keep_prob: Scalar probability of keeping each input element.  If >= 1.0, no
        dropout is performed.
    per_sequence: If true, sample the dropout mask once per sequence, instead of
        once per step.  Requires |stride| when true.
    stride: Scalar batch size.  Optional if |per_sequence| is false.

  Returns:
    [stride * num_steps, dim] or [stride, num_steps, dim] tensor, matching the
    shape of |inputs|, containing the masked or original inputs, depending on
    whether dropout was actually performed.
  """
  check.Ge(inputs.get_shape().ndims, 2, 'inputs must be rank 2 or 3')
  check.Le(inputs.get_shape().ndims, 3, 'inputs must be rank 2 or 3')
  flat = (inputs.get_shape().ndims == 2)

  if keep_prob >= 1.0:
    return inputs

  if not per_sequence:
    return tf.nn.dropout(inputs, keep_prob)

  check.NotNone(stride, 'per-sequence dropout requires stride')
  dim = inputs.get_shape().as_list()[-1]
  check.NotNone(dim, 'inputs must have static activation dimension, but have '
                'static shape %s' % inputs.get_shape().as_list())

  # If needed, restore the batch dimension to separate the sequences.
  inputs_sxnxd = tf.reshape(inputs, [stride, -1, dim]) if flat else inputs

  # Replace |num_steps| with 1 in |noise_shape|, so the dropout mask broadcasts
  # to all steps for a particular sequence.
  noise_shape = [stride, 1, dim]
  masked_sxnxd = tf.nn.dropout(inputs_sxnxd, keep_prob, noise_shape)

  # If needed, flatten out the batch dimension in the return value.
  return tf.reshape(masked_sxnxd, [-1, dim]) if flat else masked_sxnxd 
示例14
def calculate_segmentation_metrics(gold_corpus, annotated_corpus):
  """Calculate precision/recall/f1 based on gold and annotated sentences."""
  check.Eq(len(gold_corpus), len(annotated_corpus), 'Corpora are not aligned')
  num_gold_tokens = 0
  num_test_tokens = 0
  num_correct_tokens = 0
  def token_span(token):
    check.Ge(token.end, token.start)
    return (token.start, token.end)

  def ratio(numerator, denominator):
    check.Ge(numerator, 0)
    check.Ge(denominator, 0)
    if denominator > 0:
      return numerator / denominator
    elif numerator == 0:
      return 0.0  # map 0/0 to 0
    else:
      return float('inf')  # map x/0 to inf

  for gold_str, annotated_str in zip(gold_corpus, annotated_corpus):
    gold = sentence_pb2.Sentence()
    annotated = sentence_pb2.Sentence()
    gold.ParseFromString(gold_str)
    annotated.ParseFromString(annotated_str)
    check.Eq(gold.text, annotated.text, 'Text is not aligned')
    gold_spans = set()
    test_spans = set()
    for token in gold.token:
      check.NotIn(token_span(token), gold_spans, 'Duplicate token')
      gold_spans.add(token_span(token))
    for token in annotated.token:
      check.NotIn(token_span(token), test_spans, 'Duplicate token')
      test_spans.add(token_span(token))
    num_gold_tokens += len(gold_spans)
    num_test_tokens += len(test_spans)
    num_correct_tokens += len(gold_spans.intersection(test_spans))

  tf.logging.info('Total num documents: %d', len(annotated_corpus))
  tf.logging.info('Total gold tokens: %d', num_gold_tokens)
  tf.logging.info('Total test tokens: %d', num_test_tokens)
  precision = 100 * ratio(num_correct_tokens, num_test_tokens)
  recall = 100 * ratio(num_correct_tokens, num_gold_tokens)
  f1 = ratio(2 * precision * recall, precision + recall)
  tf.logging.info('Precision: %.2f%%', precision)
  tf.logging.info('Recall: %.2f%%', recall)
  tf.logging.info('F1: %.2f%%', f1)

  return round(precision, 2), round(recall, 2), round(f1, 2) 
示例15
def maybe_apply_dropout(inputs, keep_prob, per_sequence, stride=None):
  """Applies dropout, if so configured, to an input tensor.

  The input may be rank 2 or 3 depending on whether the stride (i.e., batch
  size) has been incorporated into the shape.

  Args:
    inputs: [stride * num_steps, dim] or [stride, num_steps, dim] input tensor.
    keep_prob: Scalar probability of keeping each input element.  If >= 1.0, no
        dropout is performed.
    per_sequence: If true, sample the dropout mask once per sequence, instead of
        once per step.  Requires |stride| when true.
    stride: Scalar batch size.  Optional if |per_sequence| is false.

  Returns:
    [stride * num_steps, dim] or [stride, num_steps, dim] tensor, matching the
    shape of |inputs|, containing the masked or original inputs, depending on
    whether dropout was actually performed.
  """

  if keep_prob >= 1.0:
    return inputs

  if not per_sequence:
    return tf.nn.dropout(inputs, keep_prob)

  # We only check the dims if we are applying per-sequence dropout
  check.Ge(inputs.get_shape().ndims, 2, 'inputs must be rank 2 or 3')
  check.Le(inputs.get_shape().ndims, 3, 'inputs must be rank 2 or 3')
  flat = (inputs.get_shape().ndims == 2)

  check.NotNone(stride, 'per-sequence dropout requires stride')
  dim = inputs.get_shape().as_list()[-1]
  check.NotNone(dim, 'inputs must have static activation dimension, but have '
                'static shape %s' % inputs.get_shape().as_list())

  # If needed, restore the batch dimension to separate the sequences.
  inputs_sxnxd = tf.reshape(inputs, [stride, -1, dim]) if flat else inputs

  # Replace |num_steps| with 1 in |noise_shape|, so the dropout mask broadcasts
  # to all steps for a particular sequence.
  noise_shape = [stride, 1, dim]
  masked_sxnxd = tf.nn.dropout(inputs_sxnxd, keep_prob, noise_shape)

  # If needed, flatten out the batch dimension in the return value.
  return tf.reshape(masked_sxnxd, [-1, dim]) if flat else masked_sxnxd 
示例16
def calculate_segmentation_metrics(gold_corpus, annotated_corpus):
  """Calculate precision/recall/f1 based on gold and annotated sentences."""
  check.Eq(len(gold_corpus), len(annotated_corpus), 'Corpora are not aligned')
  num_gold_tokens = 0
  num_test_tokens = 0
  num_correct_tokens = 0
  def token_span(token):
    check.Ge(token.end, token.start)
    return (token.start, token.end)

  def ratio(numerator, denominator):
    check.Ge(numerator, 0)
    check.Ge(denominator, 0)
    if denominator > 0:
      return numerator / denominator
    elif numerator == 0:
      return 0.0  # map 0/0 to 0
    else:
      return float('inf')  # map x/0 to inf

  for gold_str, annotated_str in zip(gold_corpus, annotated_corpus):
    gold = sentence_pb2.Sentence()
    annotated = sentence_pb2.Sentence()
    gold.ParseFromString(gold_str)
    annotated.ParseFromString(annotated_str)
    check.Eq(gold.text, annotated.text, 'Text is not aligned')
    gold_spans = set()
    test_spans = set()
    for token in gold.token:
      check.NotIn(token_span(token), gold_spans, 'Duplicate token')
      gold_spans.add(token_span(token))
    for token in annotated.token:
      check.NotIn(token_span(token), test_spans, 'Duplicate token')
      test_spans.add(token_span(token))
    num_gold_tokens += len(gold_spans)
    num_test_tokens += len(test_spans)
    num_correct_tokens += len(gold_spans.intersection(test_spans))

  tf.logging.info('Total num documents: %d', len(annotated_corpus))
  tf.logging.info('Total gold tokens: %d', num_gold_tokens)
  tf.logging.info('Total test tokens: %d', num_test_tokens)
  precision = 100 * ratio(num_correct_tokens, num_test_tokens)
  recall = 100 * ratio(num_correct_tokens, num_gold_tokens)
  f1 = ratio(2 * precision * recall, precision + recall)
  tf.logging.info('Precision: %.2f%%', precision)
  tf.logging.info('Recall: %.2f%%', recall)
  tf.logging.info('F1: %.2f%%', f1)

  return round(precision, 2), round(recall, 2), round(f1, 2) 
示例17
def maybe_apply_dropout(inputs, keep_prob, per_sequence, stride=None):
  """Applies dropout, if so configured, to an input tensor.

  The input may be rank 2 or 3 depending on whether the stride (i.e., batch
  size) has been incorporated into the shape.

  Args:
    inputs: [stride * num_steps, dim] or [stride, num_steps, dim] input tensor.
    keep_prob: Scalar probability of keeping each input element.  If >= 1.0, no
        dropout is performed.
    per_sequence: If true, sample the dropout mask once per sequence, instead of
        once per step.  Requires |stride| when true.
    stride: Scalar batch size.  Optional if |per_sequence| is false.

  Returns:
    [stride * num_steps, dim] or [stride, num_steps, dim] tensor, matching the
    shape of |inputs|, containing the masked or original inputs, depending on
    whether dropout was actually performed.
  """

  if keep_prob >= 1.0:
    return inputs

  if not per_sequence:
    return tf.nn.dropout(inputs, keep_prob)

  # We only check the dims if we are applying per-sequence dropout
  check.Ge(inputs.get_shape().ndims, 2, 'inputs must be rank 2 or 3')
  check.Le(inputs.get_shape().ndims, 3, 'inputs must be rank 2 or 3')
  flat = (inputs.get_shape().ndims == 2)

  check.NotNone(stride, 'per-sequence dropout requires stride')
  dim = inputs.get_shape().as_list()[-1]
  check.NotNone(dim, 'inputs must have static activation dimension, but have '
                'static shape %s' % inputs.get_shape().as_list())

  # If needed, restore the batch dimension to separate the sequences.
  inputs_sxnxd = tf.reshape(inputs, [stride, -1, dim]) if flat else inputs

  # Replace |num_steps| with 1 in |noise_shape|, so the dropout mask broadcasts
  # to all steps for a particular sequence.
  noise_shape = [stride, 1, dim]
  masked_sxnxd = tf.nn.dropout(inputs_sxnxd, keep_prob, noise_shape)

  # If needed, flatten out the batch dimension in the return value.
  return tf.reshape(masked_sxnxd, [-1, dim]) if flat else masked_sxnxd 
示例18
def calculate_segmentation_metrics(gold_corpus, annotated_corpus):
  """Calculate precision/recall/f1 based on gold and annotated sentences."""
  check.Eq(len(gold_corpus), len(annotated_corpus), 'Corpora are not aligned')
  num_gold_tokens = 0
  num_test_tokens = 0
  num_correct_tokens = 0
  def token_span(token):
    check.Ge(token.end, token.start)
    return (token.start, token.end)

  def ratio(numerator, denominator):
    check.Ge(numerator, 0)
    check.Ge(denominator, 0)
    if denominator > 0:
      return numerator / denominator
    elif numerator == 0:
      return 0.0  # map 0/0 to 0
    else:
      return float('inf')  # map x/0 to inf

  for gold_str, annotated_str in zip(gold_corpus, annotated_corpus):
    gold = sentence_pb2.Sentence()
    annotated = sentence_pb2.Sentence()
    gold.ParseFromString(gold_str)
    annotated.ParseFromString(annotated_str)
    check.Eq(gold.text, annotated.text, 'Text is not aligned')
    gold_spans = set()
    test_spans = set()
    for token in gold.token:
      check.NotIn(token_span(token), gold_spans, 'Duplicate token')
      gold_spans.add(token_span(token))
    for token in annotated.token:
      check.NotIn(token_span(token), test_spans, 'Duplicate token')
      test_spans.add(token_span(token))
    num_gold_tokens += len(gold_spans)
    num_test_tokens += len(test_spans)
    num_correct_tokens += len(gold_spans.intersection(test_spans))

  tf.logging.info('Total num documents: %d', len(annotated_corpus))
  tf.logging.info('Total gold tokens: %d', num_gold_tokens)
  tf.logging.info('Total test tokens: %d', num_test_tokens)
  precision = 100 * ratio(num_correct_tokens, num_test_tokens)
  recall = 100 * ratio(num_correct_tokens, num_gold_tokens)
  f1 = ratio(2 * precision * recall, precision + recall)
  tf.logging.info('Precision: %.2f%%', precision)
  tf.logging.info('Recall: %.2f%%', recall)
  tf.logging.info('F1: %.2f%%', f1)

  return round(precision, 2), round(recall, 2), round(f1, 2) 
示例19
def maybe_apply_dropout(inputs, keep_prob, per_sequence, stride=None):
  """Applies dropout, if so configured, to an input tensor.

  The input may be rank 2 or 3 depending on whether the stride (i.e., batch
  size) has been incorporated into the shape.

  Args:
    inputs: [stride * num_steps, dim] or [stride, num_steps, dim] input tensor.
    keep_prob: Scalar probability of keeping each input element.  If >= 1.0, no
        dropout is performed.
    per_sequence: If true, sample the dropout mask once per sequence, instead of
        once per step.  Requires |stride| when true.
    stride: Scalar batch size.  Optional if |per_sequence| is false.

  Returns:
    [stride * num_steps, dim] or [stride, num_steps, dim] tensor, matching the
    shape of |inputs|, containing the masked or original inputs, depending on
    whether dropout was actually performed.
  """
  check.Ge(inputs.get_shape().ndims, 2, 'inputs must be rank 2 or 3')
  check.Le(inputs.get_shape().ndims, 3, 'inputs must be rank 2 or 3')
  flat = (inputs.get_shape().ndims == 2)

  if keep_prob >= 1.0:
    return inputs

  if not per_sequence:
    return tf.nn.dropout(inputs, keep_prob)

  check.NotNone(stride, 'per-sequence dropout requires stride')
  dim = inputs.get_shape().as_list()[-1]
  check.NotNone(dim, 'inputs must have static activation dimension, but have '
                'static shape %s' % inputs.get_shape().as_list())

  # If needed, restore the batch dimension to separate the sequences.
  inputs_sxnxd = tf.reshape(inputs, [stride, -1, dim]) if flat else inputs

  # Replace |num_steps| with 1 in |noise_shape|, so the dropout mask broadcasts
  # to all steps for a particular sequence.
  noise_shape = [stride, 1, dim]
  masked_sxnxd = tf.nn.dropout(inputs_sxnxd, keep_prob, noise_shape)

  # If needed, flatten out the batch dimension in the return value.
  return tf.reshape(masked_sxnxd, [-1, dim]) if flat else masked_sxnxd 
示例20
def calculate_segmentation_metrics(gold_corpus, annotated_corpus):
  """Calculate precision/recall/f1 based on gold and annotated sentences."""
  check.Eq(len(gold_corpus), len(annotated_corpus), 'Corpora are not aligned')
  num_gold_tokens = 0
  num_test_tokens = 0
  num_correct_tokens = 0
  def token_span(token):
    check.Ge(token.end, token.start)
    return (token.start, token.end)

  def ratio(numerator, denominator):
    check.Ge(numerator, 0)
    check.Ge(denominator, 0)
    if denominator > 0:
      return numerator / denominator
    elif numerator == 0:
      return 0.0  # map 0/0 to 0
    else:
      return float('inf')  # map x/0 to inf

  for gold_str, annotated_str in zip(gold_corpus, annotated_corpus):
    gold = sentence_pb2.Sentence()
    annotated = sentence_pb2.Sentence()
    gold.ParseFromString(gold_str)
    annotated.ParseFromString(annotated_str)
    check.Eq(gold.text, annotated.text, 'Text is not aligned')
    gold_spans = set()
    test_spans = set()
    for token in gold.token:
      check.NotIn(token_span(token), gold_spans, 'Duplicate token')
      gold_spans.add(token_span(token))
    for token in annotated.token:
      check.NotIn(token_span(token), test_spans, 'Duplicate token')
      test_spans.add(token_span(token))
    num_gold_tokens += len(gold_spans)
    num_test_tokens += len(test_spans)
    num_correct_tokens += len(gold_spans.intersection(test_spans))

  tf.logging.info('Total num documents: %d', len(annotated_corpus))
  tf.logging.info('Total gold tokens: %d', num_gold_tokens)
  tf.logging.info('Total test tokens: %d', num_test_tokens)
  precision = 100 * ratio(num_correct_tokens, num_test_tokens)
  recall = 100 * ratio(num_correct_tokens, num_gold_tokens)
  f1 = ratio(2 * precision * recall, precision + recall)
  tf.logging.info('Precision: %.2f%%', precision)
  tf.logging.info('Recall: %.2f%%', recall)
  tf.logging.info('F1: %.2f%%', f1)

  return round(precision, 2), round(recall, 2), round(f1, 2) 
示例21
def maybe_apply_dropout(inputs, keep_prob, per_sequence, stride=None):
  """Applies dropout, if so configured, to an input tensor.

  The input may be rank 2 or 3 depending on whether the stride (i.e., batch
  size) has been incorporated into the shape.

  Args:
    inputs: [stride * num_steps, dim] or [stride, num_steps, dim] input tensor.
    keep_prob: Scalar probability of keeping each input element.  If >= 1.0, no
        dropout is performed.
    per_sequence: If true, sample the dropout mask once per sequence, instead of
        once per step.  Requires |stride| when true.
    stride: Scalar batch size.  Optional if |per_sequence| is false.

  Returns:
    [stride * num_steps, dim] or [stride, num_steps, dim] tensor, matching the
    shape of |inputs|, containing the masked or original inputs, depending on
    whether dropout was actually performed.
  """
  check.Ge(inputs.get_shape().ndims, 2, 'inputs must be rank 2 or 3')
  check.Le(inputs.get_shape().ndims, 3, 'inputs must be rank 2 or 3')
  flat = (inputs.get_shape().ndims == 2)

  if keep_prob >= 1.0:
    return inputs

  if not per_sequence:
    return tf.nn.dropout(inputs, keep_prob)

  check.NotNone(stride, 'per-sequence dropout requires stride')
  dim = inputs.get_shape().as_list()[-1]
  check.NotNone(dim, 'inputs must have static activation dimension, but have '
                'static shape %s' % inputs.get_shape().as_list())

  # If needed, restore the batch dimension to separate the sequences.
  inputs_sxnxd = tf.reshape(inputs, [stride, -1, dim]) if flat else inputs

  # Replace |num_steps| with 1 in |noise_shape|, so the dropout mask broadcasts
  # to all steps for a particular sequence.
  noise_shape = [stride, 1, dim]
  masked_sxnxd = tf.nn.dropout(inputs_sxnxd, keep_prob, noise_shape)

  # If needed, flatten out the batch dimension in the return value.
  return tf.reshape(masked_sxnxd, [-1, dim]) if flat else masked_sxnxd 
示例22
def calculate_segmentation_metrics(gold_corpus, annotated_corpus):
  """Calculate precision/recall/f1 based on gold and annotated sentences."""
  check.Eq(len(gold_corpus), len(annotated_corpus), 'Corpora are not aligned')
  num_gold_tokens = 0
  num_test_tokens = 0
  num_correct_tokens = 0
  def token_span(token):
    check.Ge(token.end, token.start)
    return (token.start, token.end)

  def ratio(numerator, denominator):
    check.Ge(numerator, 0)
    check.Ge(denominator, 0)
    if denominator > 0:
      return numerator / denominator
    elif numerator == 0:
      return 0.0  # map 0/0 to 0
    else:
      return float('inf')  # map x/0 to inf

  for gold_str, annotated_str in zip(gold_corpus, annotated_corpus):
    gold = sentence_pb2.Sentence()
    annotated = sentence_pb2.Sentence()
    gold.ParseFromString(gold_str)
    annotated.ParseFromString(annotated_str)
    check.Eq(gold.text, annotated.text, 'Text is not aligned')
    gold_spans = set()
    test_spans = set()
    for token in gold.token:
      check.NotIn(token_span(token), gold_spans, 'Duplicate token')
      gold_spans.add(token_span(token))
    for token in annotated.token:
      check.NotIn(token_span(token), test_spans, 'Duplicate token')
      test_spans.add(token_span(token))
    num_gold_tokens += len(gold_spans)
    num_test_tokens += len(test_spans)
    num_correct_tokens += len(gold_spans.intersection(test_spans))

  tf.logging.info('Total num documents: %d', len(annotated_corpus))
  tf.logging.info('Total gold tokens: %d', num_gold_tokens)
  tf.logging.info('Total test tokens: %d', num_test_tokens)
  precision = 100 * ratio(num_correct_tokens, num_test_tokens)
  recall = 100 * ratio(num_correct_tokens, num_gold_tokens)
  f1 = ratio(2 * precision * recall, precision + recall)
  tf.logging.info('Precision: %.2f%%', precision)
  tf.logging.info('Recall: %.2f%%', recall)
  tf.logging.info('F1: %.2f%%', f1)

  return round(precision, 2), round(recall, 2), round(f1, 2) 
示例23
def maybe_apply_dropout(inputs, keep_prob, per_sequence, stride=None):
  """Applies dropout, if so configured, to an input tensor.

  The input may be rank 2 or 3 depending on whether the stride (i.e., batch
  size) has been incorporated into the shape.

  Args:
    inputs: [stride * num_steps, dim] or [stride, num_steps, dim] input tensor.
    keep_prob: Scalar probability of keeping each input element.  If >= 1.0, no
        dropout is performed.
    per_sequence: If true, sample the dropout mask once per sequence, instead of
        once per step.  Requires |stride| when true.
    stride: Scalar batch size.  Optional if |per_sequence| is false.

  Returns:
    [stride * num_steps, dim] or [stride, num_steps, dim] tensor, matching the
    shape of |inputs|, containing the masked or original inputs, depending on
    whether dropout was actually performed.
  """

  if keep_prob >= 1.0:
    return inputs

  if not per_sequence:
    return tf.nn.dropout(inputs, keep_prob)

  # We only check the dims if we are applying per-sequence dropout
  check.Ge(inputs.get_shape().ndims, 2, 'inputs must be rank 2 or 3')
  check.Le(inputs.get_shape().ndims, 3, 'inputs must be rank 2 or 3')
  flat = (inputs.get_shape().ndims == 2)

  check.NotNone(stride, 'per-sequence dropout requires stride')
  dim = inputs.get_shape().as_list()[-1]
  check.NotNone(dim, 'inputs must have static activation dimension, but have '
                'static shape %s' % inputs.get_shape().as_list())

  # If needed, restore the batch dimension to separate the sequences.
  inputs_sxnxd = tf.reshape(inputs, [stride, -1, dim]) if flat else inputs

  # Replace |num_steps| with 1 in |noise_shape|, so the dropout mask broadcasts
  # to all steps for a particular sequence.
  noise_shape = [stride, 1, dim]
  masked_sxnxd = tf.nn.dropout(inputs_sxnxd, keep_prob, noise_shape)

  # If needed, flatten out the batch dimension in the return value.
  return tf.reshape(masked_sxnxd, [-1, dim]) if flat else masked_sxnxd 
示例24
def calculate_segmentation_metrics(gold_corpus, annotated_corpus):
  """Calculate precision/recall/f1 based on gold and annotated sentences."""
  check.Eq(len(gold_corpus), len(annotated_corpus), 'Corpora are not aligned')
  num_gold_tokens = 0
  num_test_tokens = 0
  num_correct_tokens = 0
  def token_span(token):
    check.Ge(token.end, token.start)
    return (token.start, token.end)

  def ratio(numerator, denominator):
    check.Ge(numerator, 0)
    check.Ge(denominator, 0)
    if denominator > 0:
      return numerator / denominator
    elif numerator == 0:
      return 0.0  # map 0/0 to 0
    else:
      return float('inf')  # map x/0 to inf

  for gold_str, annotated_str in zip(gold_corpus, annotated_corpus):
    gold = sentence_pb2.Sentence()
    annotated = sentence_pb2.Sentence()
    gold.ParseFromString(gold_str)
    annotated.ParseFromString(annotated_str)
    check.Eq(gold.text, annotated.text, 'Text is not aligned')
    gold_spans = set()
    test_spans = set()
    for token in gold.token:
      check.NotIn(token_span(token), gold_spans, 'Duplicate token')
      gold_spans.add(token_span(token))
    for token in annotated.token:
      check.NotIn(token_span(token), test_spans, 'Duplicate token')
      test_spans.add(token_span(token))
    num_gold_tokens += len(gold_spans)
    num_test_tokens += len(test_spans)
    num_correct_tokens += len(gold_spans.intersection(test_spans))

  tf.logging.info('Total num documents: %d', len(annotated_corpus))
  tf.logging.info('Total gold tokens: %d', num_gold_tokens)
  tf.logging.info('Total test tokens: %d', num_test_tokens)
  precision = 100 * ratio(num_correct_tokens, num_test_tokens)
  recall = 100 * ratio(num_correct_tokens, num_gold_tokens)
  f1 = ratio(2 * precision * recall, precision + recall)
  tf.logging.info('Precision: %.2f%%', precision)
  tf.logging.info('Recall: %.2f%%', recall)
  tf.logging.info('F1: %.2f%%', f1)

  return round(precision, 2), round(recall, 2), round(f1, 2) 
示例25
def maybe_apply_dropout(inputs, keep_prob, per_sequence, stride=None):
  """Applies dropout, if so configured, to an input tensor.

  The input may be rank 2 or 3 depending on whether the stride (i.e., batch
  size) has been incorporated into the shape.

  Args:
    inputs: [stride * num_steps, dim] or [stride, num_steps, dim] input tensor.
    keep_prob: Scalar probability of keeping each input element.  If >= 1.0, no
        dropout is performed.
    per_sequence: If true, sample the dropout mask once per sequence, instead of
        once per step.  Requires |stride| when true.
    stride: Scalar batch size.  Optional if |per_sequence| is false.

  Returns:
    [stride * num_steps, dim] or [stride, num_steps, dim] tensor, matching the
    shape of |inputs|, containing the masked or original inputs, depending on
    whether dropout was actually performed.
  """
  check.Ge(inputs.get_shape().ndims, 2, 'inputs must be rank 2 or 3')
  check.Le(inputs.get_shape().ndims, 3, 'inputs must be rank 2 or 3')
  flat = (inputs.get_shape().ndims == 2)

  if keep_prob >= 1.0:
    return inputs

  if not per_sequence:
    return tf.nn.dropout(inputs, keep_prob)

  check.NotNone(stride, 'per-sequence dropout requires stride')
  dim = inputs.get_shape().as_list()[-1]
  check.NotNone(dim, 'inputs must have static activation dimension, but have '
                'static shape %s' % inputs.get_shape().as_list())

  # If needed, restore the batch dimension to separate the sequences.
  inputs_sxnxd = tf.reshape(inputs, [stride, -1, dim]) if flat else inputs

  # Replace |num_steps| with 1 in |noise_shape|, so the dropout mask broadcasts
  # to all steps for a particular sequence.
  noise_shape = [stride, 1, dim]
  masked_sxnxd = tf.nn.dropout(inputs_sxnxd, keep_prob, noise_shape)

  # If needed, flatten out the batch dimension in the return value.
  return tf.reshape(masked_sxnxd, [-1, dim]) if flat else masked_sxnxd 
示例26
def calculate_segmentation_metrics(gold_corpus, annotated_corpus):
  """Calculate precision/recall/f1 based on gold and annotated sentences."""
  check.Eq(len(gold_corpus), len(annotated_corpus), 'Corpora are not aligned')
  num_gold_tokens = 0
  num_test_tokens = 0
  num_correct_tokens = 0
  def token_span(token):
    check.Ge(token.end, token.start)
    return (token.start, token.end)

  def ratio(numerator, denominator):
    check.Ge(numerator, 0)
    check.Ge(denominator, 0)
    if denominator > 0:
      return numerator / denominator
    elif numerator == 0:
      return 0.0  # map 0/0 to 0
    else:
      return float('inf')  # map x/0 to inf

  for gold_str, annotated_str in zip(gold_corpus, annotated_corpus):
    gold = sentence_pb2.Sentence()
    annotated = sentence_pb2.Sentence()
    gold.ParseFromString(gold_str)
    annotated.ParseFromString(annotated_str)
    check.Eq(gold.text, annotated.text, 'Text is not aligned')
    gold_spans = set()
    test_spans = set()
    for token in gold.token:
      check.NotIn(token_span(token), gold_spans, 'Duplicate token')
      gold_spans.add(token_span(token))
    for token in annotated.token:
      check.NotIn(token_span(token), test_spans, 'Duplicate token')
      test_spans.add(token_span(token))
    num_gold_tokens += len(gold_spans)
    num_test_tokens += len(test_spans)
    num_correct_tokens += len(gold_spans.intersection(test_spans))

  tf.logging.info('Total num documents: %d', len(annotated_corpus))
  tf.logging.info('Total gold tokens: %d', num_gold_tokens)
  tf.logging.info('Total test tokens: %d', num_test_tokens)
  precision = 100 * ratio(num_correct_tokens, num_test_tokens)
  recall = 100 * ratio(num_correct_tokens, num_gold_tokens)
  f1 = ratio(2 * precision * recall, precision + recall)
  tf.logging.info('Precision: %.2f%%', precision)
  tf.logging.info('Recall: %.2f%%', recall)
  tf.logging.info('F1: %.2f%%', f1)

  return round(precision, 2), round(recall, 2), round(f1, 2) 
示例27
def maybe_apply_dropout(inputs,
                        keep_prob,
                        per_sequence,
                        stride=None,
                        dropout_mask=None,
                        name=None):
  """Applies dropout, if so configured, to an input tensor.

  The input may be rank 2 or 3 depending on whether the stride (i.e., batch
  size) has been incorporated into the shape.

  Args:
    inputs: [stride * num_steps, dim] or [stride, num_steps, dim] input tensor.
    keep_prob: Scalar probability of keeping each input element.  If >= 1.0, no
        dropout is performed.
    per_sequence: If true, sample the dropout mask once per sequence, instead of
        once per step.  Either |stride| or |dropout_mask| must be set when true.
    stride: Scalar batch size.  Optional if |per_sequence| is false, or if
        |dropout_mask| is provided.
    dropout_mask: Precomputed dropout mask to apply to the |inputs|; must be
        broadcastable to |inputs|.  Optional if |per_sequence| is false, or if
        |stride| is provided.
    name: Optional name for the dropout operation, if dropout is applied.

  Returns:
    [stride * num_steps, dim] or [stride, num_steps, dim] tensor, matching the
    shape of |inputs|, containing the masked or original inputs, depending on
    whether dropout was actually performed.
  """
  if keep_prob >= 1.0:
    return inputs

  if not per_sequence:
    return tf.nn.dropout(inputs, keep_prob, name=name)

  if dropout_mask is not None:
    return tf.multiply(inputs, dropout_mask, name=name)

  # We only check the dims if we are applying per-sequence dropout
  check.Ge(inputs.get_shape().ndims, 2, 'inputs must be rank 2 or 3')
  check.Le(inputs.get_shape().ndims, 3, 'inputs must be rank 2 or 3')
  flat = (inputs.get_shape().ndims == 2)

  check.NotNone(stride, 'per-sequence dropout requires stride')
  dim = inputs.get_shape().as_list()[-1]
  check.NotNone(dim, 'inputs must have static activation dimension, but have '
                'static shape %s' % inputs.get_shape().as_list())

  # If needed, restore the batch dimension to separate the sequences.
  inputs_sxnxd = tf.reshape(inputs, [stride, -1, dim]) if flat else inputs

  # Replace |num_steps| with 1 in |noise_shape|, so the dropout mask broadcasts
  # to all steps for a particular sequence.
  noise_shape = [stride, 1, dim]
  masked_sxnxd = tf.nn.dropout(inputs_sxnxd, keep_prob, noise_shape, name=name)

  # If needed, flatten out the batch dimension in the return value.
  return tf.reshape(masked_sxnxd, [-1, dim]) if flat else masked_sxnxd 
示例28
def calculate_segmentation_metrics(gold_corpus, annotated_corpus):
  """Calculate precision/recall/f1 based on gold and annotated sentences."""
  check.Eq(len(gold_corpus), len(annotated_corpus), 'Corpora are not aligned')
  num_gold_tokens = 0
  num_test_tokens = 0
  num_correct_tokens = 0
  def token_span(token):
    check.Ge(token.end, token.start)
    return (token.start, token.end)

  def ratio(numerator, denominator):
    check.Ge(numerator, 0)
    check.Ge(denominator, 0)
    if denominator > 0:
      return numerator / denominator
    elif numerator == 0:
      return 0.0  # map 0/0 to 0
    else:
      return float('inf')  # map x/0 to inf

  for gold_str, annotated_str in zip(gold_corpus, annotated_corpus):
    gold = sentence_pb2.Sentence()
    annotated = sentence_pb2.Sentence()
    gold.ParseFromString(gold_str)
    annotated.ParseFromString(annotated_str)
    check.Eq(gold.text, annotated.text, 'Text is not aligned')
    gold_spans = set()
    test_spans = set()
    for token in gold.token:
      check.NotIn(token_span(token), gold_spans, 'Duplicate token')
      gold_spans.add(token_span(token))
    for token in annotated.token:
      check.NotIn(token_span(token), test_spans, 'Duplicate token')
      test_spans.add(token_span(token))
    num_gold_tokens += len(gold_spans)
    num_test_tokens += len(test_spans)
    num_correct_tokens += len(gold_spans.intersection(test_spans))

  tf.logging.info('Total num documents: %d', len(annotated_corpus))
  tf.logging.info('Total gold tokens: %d', num_gold_tokens)
  tf.logging.info('Total test tokens: %d', num_test_tokens)
  precision = 100 * ratio(num_correct_tokens, num_test_tokens)
  recall = 100 * ratio(num_correct_tokens, num_gold_tokens)
  f1 = ratio(2 * precision * recall, precision + recall)
  tf.logging.info('Precision: %.2f%%', precision)
  tf.logging.info('Recall: %.2f%%', recall)
  tf.logging.info('F1: %.2f%%', f1)

  return round(precision, 2), round(recall, 2), round(f1, 2) 
示例29
def maybe_apply_dropout(inputs,
                        keep_prob,
                        per_sequence,
                        stride=None,
                        dropout_mask=None,
                        name=None):
  """Applies dropout, if so configured, to an input tensor.

  The input may be rank 2 or 3 depending on whether the stride (i.e., batch
  size) has been incorporated into the shape.

  Args:
    inputs: [stride * num_steps, dim] or [stride, num_steps, dim] input tensor.
    keep_prob: Scalar probability of keeping each input element.  If >= 1.0, no
        dropout is performed.
    per_sequence: If true, sample the dropout mask once per sequence, instead of
        once per step.  Either |stride| or |dropout_mask| must be set when true.
    stride: Scalar batch size.  Optional if |per_sequence| is false, or if
        |dropout_mask| is provided.
    dropout_mask: Precomputed dropout mask to apply to the |inputs|; must be
        broadcastable to |inputs|.  Optional if |per_sequence| is false, or if
        |stride| is provided.
    name: Optional name for the dropout operation, if dropout is applied.

  Returns:
    [stride * num_steps, dim] or [stride, num_steps, dim] tensor, matching the
    shape of |inputs|, containing the masked or original inputs, depending on
    whether dropout was actually performed.
  """
  if keep_prob >= 1.0:
    return inputs

  if not per_sequence:
    return tf.nn.dropout(inputs, keep_prob, name=name)

  if dropout_mask is not None:
    return tf.multiply(inputs, dropout_mask, name=name)

  # We only check the dims if we are applying per-sequence dropout
  check.Ge(inputs.get_shape().ndims, 2, 'inputs must be rank 2 or 3')
  check.Le(inputs.get_shape().ndims, 3, 'inputs must be rank 2 or 3')
  flat = (inputs.get_shape().ndims == 2)

  check.NotNone(stride, 'per-sequence dropout requires stride')
  dim = inputs.get_shape().as_list()[-1]
  check.NotNone(dim, 'inputs must have static activation dimension, but have '
                'static shape %s' % inputs.get_shape().as_list())

  # If needed, restore the batch dimension to separate the sequences.
  inputs_sxnxd = tf.reshape(inputs, [stride, -1, dim]) if flat else inputs

  # Replace |num_steps| with 1 in |noise_shape|, so the dropout mask broadcasts
  # to all steps for a particular sequence.
  noise_shape = [stride, 1, dim]
  masked_sxnxd = tf.nn.dropout(inputs_sxnxd, keep_prob, noise_shape, name=name)

  # If needed, flatten out the batch dimension in the return value.
  return tf.reshape(masked_sxnxd, [-1, dim]) if flat else masked_sxnxd