Python源码示例:syntaxnet.util.check.NotIn()

示例1
def testCheckNotIn(self):
    check.NotIn('d', ('a', 'b', 'c'), 'foo')
    check.NotIn('c', {'a': 1, 'b': 2}, 'bar')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.NotIn('a', ('a', 'b', 'c'), 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.NotIn('b', {'a': 1, 'b': 2}, 'baz', RuntimeError) 
示例2
def testCheckNotIn(self):
    check.NotIn('d', ('a', 'b', 'c'), 'foo')
    check.NotIn('c', {'a': 1, 'b': 2}, 'bar')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.NotIn('a', ('a', 'b', 'c'), 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.NotIn('b', {'a': 1, 'b': 2}, 'baz', RuntimeError) 
示例3
def testCheckNotIn(self):
    check.NotIn('d', ('a', 'b', 'c'), 'foo')
    check.NotIn('c', {'a': 1, 'b': 2}, 'bar')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.NotIn('a', ('a', 'b', 'c'), 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.NotIn('b', {'a': 1, 'b': 2}, 'baz', RuntimeError) 
示例4
def testCheckNotIn(self):
    check.NotIn('d', ('a', 'b', 'c'), 'foo')
    check.NotIn('c', {'a': 1, 'b': 2}, 'bar')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.NotIn('a', ('a', 'b', 'c'), 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.NotIn('b', {'a': 1, 'b': 2}, 'baz', RuntimeError) 
示例5
def testCheckNotIn(self):
    check.NotIn('d', ('a', 'b', 'c'), 'foo')
    check.NotIn('c', {'a': 1, 'b': 2}, 'bar')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.NotIn('a', ('a', 'b', 'c'), 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.NotIn('b', {'a': 1, 'b': 2}, 'baz', RuntimeError) 
示例6
def testCheckNotIn(self):
    check.NotIn('d', ('a', 'b', 'c'), 'foo')
    check.NotIn('c', {'a': 1, 'b': 2}, 'bar')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.NotIn('a', ('a', 'b', 'c'), 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.NotIn('b', {'a': 1, 'b': 2}, 'baz', RuntimeError) 
示例7
def testCheckNotIn(self):
    check.NotIn('d', ('a', 'b', 'c'), 'foo')
    check.NotIn('c', {'a': 1, 'b': 2}, 'bar')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.NotIn('a', ('a', 'b', 'c'), 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.NotIn('b', {'a': 1, 'b': 2}, 'baz', RuntimeError) 
示例8
def testCheckNotIn(self):
    check.NotIn('d', ('a', 'b', 'c'), 'foo')
    check.NotIn('c', {'a': 1, 'b': 2}, 'bar')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.NotIn('a', ('a', 'b', 'c'), 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.NotIn('b', {'a': 1, 'b': 2}, 'baz', RuntimeError) 
示例9
def testCheckNotIn(self):
    check.NotIn('d', ('a', 'b', 'c'), 'foo')
    check.NotIn('c', {'a': 1, 'b': 2}, 'bar')
    with self.assertRaisesRegexp(ValueError, 'bar'):
      check.NotIn('a', ('a', 'b', 'c'), 'bar')
    with self.assertRaisesRegexp(RuntimeError, 'baz'):
      check.NotIn('b', {'a': 1, 'b': 2}, 'baz', RuntimeError) 
示例10
def calculate_segmentation_metrics(gold_corpus, annotated_corpus):
  """Calculate precision/recall/f1 based on gold and annotated sentences."""
  check.Eq(len(gold_corpus), len(annotated_corpus), 'Corpora are not aligned')
  num_gold_tokens = 0
  num_test_tokens = 0
  num_correct_tokens = 0
  def token_span(token):
    check.Ge(token.end, token.start)
    return (token.start, token.end)

  def ratio(numerator, denominator):
    check.Ge(numerator, 0)
    check.Ge(denominator, 0)
    if denominator > 0:
      return numerator / denominator
    elif numerator == 0:
      return 0.0  # map 0/0 to 0
    else:
      return float('inf')  # map x/0 to inf

  for gold_str, annotated_str in zip(gold_corpus, annotated_corpus):
    gold = sentence_pb2.Sentence()
    annotated = sentence_pb2.Sentence()
    gold.ParseFromString(gold_str)
    annotated.ParseFromString(annotated_str)
    check.Eq(gold.text, annotated.text, 'Text is not aligned')
    gold_spans = set()
    test_spans = set()
    for token in gold.token:
      check.NotIn(token_span(token), gold_spans, 'Duplicate token')
      gold_spans.add(token_span(token))
    for token in annotated.token:
      check.NotIn(token_span(token), test_spans, 'Duplicate token')
      test_spans.add(token_span(token))
    num_gold_tokens += len(gold_spans)
    num_test_tokens += len(test_spans)
    num_correct_tokens += len(gold_spans.intersection(test_spans))

  tf.logging.info('Total num documents: %d', len(annotated_corpus))
  tf.logging.info('Total gold tokens: %d', num_gold_tokens)
  tf.logging.info('Total test tokens: %d', num_test_tokens)
  precision = 100 * ratio(num_correct_tokens, num_test_tokens)
  recall = 100 * ratio(num_correct_tokens, num_gold_tokens)
  f1 = ratio(2 * precision * recall, precision + recall)
  tf.logging.info('Precision: %.2f%%', precision)
  tf.logging.info('Recall: %.2f%%', recall)
  tf.logging.info('F1: %.2f%%', f1)

  return round(precision, 2), round(recall, 2), round(f1, 2) 
示例11
def calculate_segmentation_metrics(gold_corpus, annotated_corpus):
  """Calculate precision/recall/f1 based on gold and annotated sentences."""
  check.Eq(len(gold_corpus), len(annotated_corpus), 'Corpora are not aligned')
  num_gold_tokens = 0
  num_test_tokens = 0
  num_correct_tokens = 0
  def token_span(token):
    check.Ge(token.end, token.start)
    return (token.start, token.end)

  def ratio(numerator, denominator):
    check.Ge(numerator, 0)
    check.Ge(denominator, 0)
    if denominator > 0:
      return numerator / denominator
    elif numerator == 0:
      return 0.0  # map 0/0 to 0
    else:
      return float('inf')  # map x/0 to inf

  for gold_str, annotated_str in zip(gold_corpus, annotated_corpus):
    gold = sentence_pb2.Sentence()
    annotated = sentence_pb2.Sentence()
    gold.ParseFromString(gold_str)
    annotated.ParseFromString(annotated_str)
    check.Eq(gold.text, annotated.text, 'Text is not aligned')
    gold_spans = set()
    test_spans = set()
    for token in gold.token:
      check.NotIn(token_span(token), gold_spans, 'Duplicate token')
      gold_spans.add(token_span(token))
    for token in annotated.token:
      check.NotIn(token_span(token), test_spans, 'Duplicate token')
      test_spans.add(token_span(token))
    num_gold_tokens += len(gold_spans)
    num_test_tokens += len(test_spans)
    num_correct_tokens += len(gold_spans.intersection(test_spans))

  tf.logging.info('Total num documents: %d', len(annotated_corpus))
  tf.logging.info('Total gold tokens: %d', num_gold_tokens)
  tf.logging.info('Total test tokens: %d', num_test_tokens)
  precision = 100 * ratio(num_correct_tokens, num_test_tokens)
  recall = 100 * ratio(num_correct_tokens, num_gold_tokens)
  f1 = ratio(2 * precision * recall, precision + recall)
  tf.logging.info('Precision: %.2f%%', precision)
  tf.logging.info('Recall: %.2f%%', recall)
  tf.logging.info('F1: %.2f%%', f1)

  return round(precision, 2), round(recall, 2), round(f1, 2) 
示例12
def calculate_segmentation_metrics(gold_corpus, annotated_corpus):
  """Calculate precision/recall/f1 based on gold and annotated sentences."""
  check.Eq(len(gold_corpus), len(annotated_corpus), 'Corpora are not aligned')
  num_gold_tokens = 0
  num_test_tokens = 0
  num_correct_tokens = 0
  def token_span(token):
    check.Ge(token.end, token.start)
    return (token.start, token.end)

  def ratio(numerator, denominator):
    check.Ge(numerator, 0)
    check.Ge(denominator, 0)
    if denominator > 0:
      return numerator / denominator
    elif numerator == 0:
      return 0.0  # map 0/0 to 0
    else:
      return float('inf')  # map x/0 to inf

  for gold_str, annotated_str in zip(gold_corpus, annotated_corpus):
    gold = sentence_pb2.Sentence()
    annotated = sentence_pb2.Sentence()
    gold.ParseFromString(gold_str)
    annotated.ParseFromString(annotated_str)
    check.Eq(gold.text, annotated.text, 'Text is not aligned')
    gold_spans = set()
    test_spans = set()
    for token in gold.token:
      check.NotIn(token_span(token), gold_spans, 'Duplicate token')
      gold_spans.add(token_span(token))
    for token in annotated.token:
      check.NotIn(token_span(token), test_spans, 'Duplicate token')
      test_spans.add(token_span(token))
    num_gold_tokens += len(gold_spans)
    num_test_tokens += len(test_spans)
    num_correct_tokens += len(gold_spans.intersection(test_spans))

  tf.logging.info('Total num documents: %d', len(annotated_corpus))
  tf.logging.info('Total gold tokens: %d', num_gold_tokens)
  tf.logging.info('Total test tokens: %d', num_test_tokens)
  precision = 100 * ratio(num_correct_tokens, num_test_tokens)
  recall = 100 * ratio(num_correct_tokens, num_gold_tokens)
  f1 = ratio(2 * precision * recall, precision + recall)
  tf.logging.info('Precision: %.2f%%', precision)
  tf.logging.info('Recall: %.2f%%', recall)
  tf.logging.info('F1: %.2f%%', f1)

  return round(precision, 2), round(recall, 2), round(f1, 2) 
示例13
def calculate_segmentation_metrics(gold_corpus, annotated_corpus):
  """Calculate precision/recall/f1 based on gold and annotated sentences."""
  check.Eq(len(gold_corpus), len(annotated_corpus), 'Corpora are not aligned')
  num_gold_tokens = 0
  num_test_tokens = 0
  num_correct_tokens = 0
  def token_span(token):
    check.Ge(token.end, token.start)
    return (token.start, token.end)

  def ratio(numerator, denominator):
    check.Ge(numerator, 0)
    check.Ge(denominator, 0)
    if denominator > 0:
      return numerator / denominator
    elif numerator == 0:
      return 0.0  # map 0/0 to 0
    else:
      return float('inf')  # map x/0 to inf

  for gold_str, annotated_str in zip(gold_corpus, annotated_corpus):
    gold = sentence_pb2.Sentence()
    annotated = sentence_pb2.Sentence()
    gold.ParseFromString(gold_str)
    annotated.ParseFromString(annotated_str)
    check.Eq(gold.text, annotated.text, 'Text is not aligned')
    gold_spans = set()
    test_spans = set()
    for token in gold.token:
      check.NotIn(token_span(token), gold_spans, 'Duplicate token')
      gold_spans.add(token_span(token))
    for token in annotated.token:
      check.NotIn(token_span(token), test_spans, 'Duplicate token')
      test_spans.add(token_span(token))
    num_gold_tokens += len(gold_spans)
    num_test_tokens += len(test_spans)
    num_correct_tokens += len(gold_spans.intersection(test_spans))

  tf.logging.info('Total num documents: %d', len(annotated_corpus))
  tf.logging.info('Total gold tokens: %d', num_gold_tokens)
  tf.logging.info('Total test tokens: %d', num_test_tokens)
  precision = 100 * ratio(num_correct_tokens, num_test_tokens)
  recall = 100 * ratio(num_correct_tokens, num_gold_tokens)
  f1 = ratio(2 * precision * recall, precision + recall)
  tf.logging.info('Precision: %.2f%%', precision)
  tf.logging.info('Recall: %.2f%%', recall)
  tf.logging.info('F1: %.2f%%', f1)

  return round(precision, 2), round(recall, 2), round(f1, 2) 
示例14
def calculate_segmentation_metrics(gold_corpus, annotated_corpus):
  """Calculate precision/recall/f1 based on gold and annotated sentences."""
  check.Eq(len(gold_corpus), len(annotated_corpus), 'Corpora are not aligned')
  num_gold_tokens = 0
  num_test_tokens = 0
  num_correct_tokens = 0
  def token_span(token):
    check.Ge(token.end, token.start)
    return (token.start, token.end)

  def ratio(numerator, denominator):
    check.Ge(numerator, 0)
    check.Ge(denominator, 0)
    if denominator > 0:
      return numerator / denominator
    elif numerator == 0:
      return 0.0  # map 0/0 to 0
    else:
      return float('inf')  # map x/0 to inf

  for gold_str, annotated_str in zip(gold_corpus, annotated_corpus):
    gold = sentence_pb2.Sentence()
    annotated = sentence_pb2.Sentence()
    gold.ParseFromString(gold_str)
    annotated.ParseFromString(annotated_str)
    check.Eq(gold.text, annotated.text, 'Text is not aligned')
    gold_spans = set()
    test_spans = set()
    for token in gold.token:
      check.NotIn(token_span(token), gold_spans, 'Duplicate token')
      gold_spans.add(token_span(token))
    for token in annotated.token:
      check.NotIn(token_span(token), test_spans, 'Duplicate token')
      test_spans.add(token_span(token))
    num_gold_tokens += len(gold_spans)
    num_test_tokens += len(test_spans)
    num_correct_tokens += len(gold_spans.intersection(test_spans))

  tf.logging.info('Total num documents: %d', len(annotated_corpus))
  tf.logging.info('Total gold tokens: %d', num_gold_tokens)
  tf.logging.info('Total test tokens: %d', num_test_tokens)
  precision = 100 * ratio(num_correct_tokens, num_test_tokens)
  recall = 100 * ratio(num_correct_tokens, num_gold_tokens)
  f1 = ratio(2 * precision * recall, precision + recall)
  tf.logging.info('Precision: %.2f%%', precision)
  tf.logging.info('Recall: %.2f%%', recall)
  tf.logging.info('F1: %.2f%%', f1)

  return round(precision, 2), round(recall, 2), round(f1, 2) 
示例15
def calculate_segmentation_metrics(gold_corpus, annotated_corpus):
  """Calculate precision/recall/f1 based on gold and annotated sentences."""
  check.Eq(len(gold_corpus), len(annotated_corpus), 'Corpora are not aligned')
  num_gold_tokens = 0
  num_test_tokens = 0
  num_correct_tokens = 0
  def token_span(token):
    check.Ge(token.end, token.start)
    return (token.start, token.end)

  def ratio(numerator, denominator):
    check.Ge(numerator, 0)
    check.Ge(denominator, 0)
    if denominator > 0:
      return numerator / denominator
    elif numerator == 0:
      return 0.0  # map 0/0 to 0
    else:
      return float('inf')  # map x/0 to inf

  for gold_str, annotated_str in zip(gold_corpus, annotated_corpus):
    gold = sentence_pb2.Sentence()
    annotated = sentence_pb2.Sentence()
    gold.ParseFromString(gold_str)
    annotated.ParseFromString(annotated_str)
    check.Eq(gold.text, annotated.text, 'Text is not aligned')
    gold_spans = set()
    test_spans = set()
    for token in gold.token:
      check.NotIn(token_span(token), gold_spans, 'Duplicate token')
      gold_spans.add(token_span(token))
    for token in annotated.token:
      check.NotIn(token_span(token), test_spans, 'Duplicate token')
      test_spans.add(token_span(token))
    num_gold_tokens += len(gold_spans)
    num_test_tokens += len(test_spans)
    num_correct_tokens += len(gold_spans.intersection(test_spans))

  tf.logging.info('Total num documents: %d', len(annotated_corpus))
  tf.logging.info('Total gold tokens: %d', num_gold_tokens)
  tf.logging.info('Total test tokens: %d', num_test_tokens)
  precision = 100 * ratio(num_correct_tokens, num_test_tokens)
  recall = 100 * ratio(num_correct_tokens, num_gold_tokens)
  f1 = ratio(2 * precision * recall, precision + recall)
  tf.logging.info('Precision: %.2f%%', precision)
  tf.logging.info('Recall: %.2f%%', recall)
  tf.logging.info('F1: %.2f%%', f1)

  return round(precision, 2), round(recall, 2), round(f1, 2) 
示例16
def calculate_segmentation_metrics(gold_corpus, annotated_corpus):
  """Calculate precision/recall/f1 based on gold and annotated sentences."""
  check.Eq(len(gold_corpus), len(annotated_corpus), 'Corpora are not aligned')
  num_gold_tokens = 0
  num_test_tokens = 0
  num_correct_tokens = 0
  def token_span(token):
    check.Ge(token.end, token.start)
    return (token.start, token.end)

  def ratio(numerator, denominator):
    check.Ge(numerator, 0)
    check.Ge(denominator, 0)
    if denominator > 0:
      return numerator / denominator
    elif numerator == 0:
      return 0.0  # map 0/0 to 0
    else:
      return float('inf')  # map x/0 to inf

  for gold_str, annotated_str in zip(gold_corpus, annotated_corpus):
    gold = sentence_pb2.Sentence()
    annotated = sentence_pb2.Sentence()
    gold.ParseFromString(gold_str)
    annotated.ParseFromString(annotated_str)
    check.Eq(gold.text, annotated.text, 'Text is not aligned')
    gold_spans = set()
    test_spans = set()
    for token in gold.token:
      check.NotIn(token_span(token), gold_spans, 'Duplicate token')
      gold_spans.add(token_span(token))
    for token in annotated.token:
      check.NotIn(token_span(token), test_spans, 'Duplicate token')
      test_spans.add(token_span(token))
    num_gold_tokens += len(gold_spans)
    num_test_tokens += len(test_spans)
    num_correct_tokens += len(gold_spans.intersection(test_spans))

  tf.logging.info('Total num documents: %d', len(annotated_corpus))
  tf.logging.info('Total gold tokens: %d', num_gold_tokens)
  tf.logging.info('Total test tokens: %d', num_test_tokens)
  precision = 100 * ratio(num_correct_tokens, num_test_tokens)
  recall = 100 * ratio(num_correct_tokens, num_gold_tokens)
  f1 = ratio(2 * precision * recall, precision + recall)
  tf.logging.info('Precision: %.2f%%', precision)
  tf.logging.info('Recall: %.2f%%', recall)
  tf.logging.info('F1: %.2f%%', f1)

  return round(precision, 2), round(recall, 2), round(f1, 2) 
示例17
def calculate_segmentation_metrics(gold_corpus, annotated_corpus):
  """Calculate precision/recall/f1 based on gold and annotated sentences."""
  check.Eq(len(gold_corpus), len(annotated_corpus), 'Corpora are not aligned')
  num_gold_tokens = 0
  num_test_tokens = 0
  num_correct_tokens = 0
  def token_span(token):
    check.Ge(token.end, token.start)
    return (token.start, token.end)

  def ratio(numerator, denominator):
    check.Ge(numerator, 0)
    check.Ge(denominator, 0)
    if denominator > 0:
      return numerator / denominator
    elif numerator == 0:
      return 0.0  # map 0/0 to 0
    else:
      return float('inf')  # map x/0 to inf

  for gold_str, annotated_str in zip(gold_corpus, annotated_corpus):
    gold = sentence_pb2.Sentence()
    annotated = sentence_pb2.Sentence()
    gold.ParseFromString(gold_str)
    annotated.ParseFromString(annotated_str)
    check.Eq(gold.text, annotated.text, 'Text is not aligned')
    gold_spans = set()
    test_spans = set()
    for token in gold.token:
      check.NotIn(token_span(token), gold_spans, 'Duplicate token')
      gold_spans.add(token_span(token))
    for token in annotated.token:
      check.NotIn(token_span(token), test_spans, 'Duplicate token')
      test_spans.add(token_span(token))
    num_gold_tokens += len(gold_spans)
    num_test_tokens += len(test_spans)
    num_correct_tokens += len(gold_spans.intersection(test_spans))

  tf.logging.info('Total num documents: %d', len(annotated_corpus))
  tf.logging.info('Total gold tokens: %d', num_gold_tokens)
  tf.logging.info('Total test tokens: %d', num_test_tokens)
  precision = 100 * ratio(num_correct_tokens, num_test_tokens)
  recall = 100 * ratio(num_correct_tokens, num_gold_tokens)
  f1 = ratio(2 * precision * recall, precision + recall)
  tf.logging.info('Precision: %.2f%%', precision)
  tf.logging.info('Recall: %.2f%%', recall)
  tf.logging.info('F1: %.2f%%', f1)

  return round(precision, 2), round(recall, 2), round(f1, 2) 
示例18
def calculate_segmentation_metrics(gold_corpus, annotated_corpus):
  """Calculate precision/recall/f1 based on gold and annotated sentences."""
  check.Eq(len(gold_corpus), len(annotated_corpus), 'Corpora are not aligned')
  num_gold_tokens = 0
  num_test_tokens = 0
  num_correct_tokens = 0
  def token_span(token):
    check.Ge(token.end, token.start)
    return (token.start, token.end)

  def ratio(numerator, denominator):
    check.Ge(numerator, 0)
    check.Ge(denominator, 0)
    if denominator > 0:
      return numerator / denominator
    elif numerator == 0:
      return 0.0  # map 0/0 to 0
    else:
      return float('inf')  # map x/0 to inf

  for gold_str, annotated_str in zip(gold_corpus, annotated_corpus):
    gold = sentence_pb2.Sentence()
    annotated = sentence_pb2.Sentence()
    gold.ParseFromString(gold_str)
    annotated.ParseFromString(annotated_str)
    check.Eq(gold.text, annotated.text, 'Text is not aligned')
    gold_spans = set()
    test_spans = set()
    for token in gold.token:
      check.NotIn(token_span(token), gold_spans, 'Duplicate token')
      gold_spans.add(token_span(token))
    for token in annotated.token:
      check.NotIn(token_span(token), test_spans, 'Duplicate token')
      test_spans.add(token_span(token))
    num_gold_tokens += len(gold_spans)
    num_test_tokens += len(test_spans)
    num_correct_tokens += len(gold_spans.intersection(test_spans))

  tf.logging.info('Total num documents: %d', len(annotated_corpus))
  tf.logging.info('Total gold tokens: %d', num_gold_tokens)
  tf.logging.info('Total test tokens: %d', num_test_tokens)
  precision = 100 * ratio(num_correct_tokens, num_test_tokens)
  recall = 100 * ratio(num_correct_tokens, num_gold_tokens)
  f1 = ratio(2 * precision * recall, precision + recall)
  tf.logging.info('Precision: %.2f%%', precision)
  tf.logging.info('Recall: %.2f%%', recall)
  tf.logging.info('F1: %.2f%%', f1)

  return round(precision, 2), round(recall, 2), round(f1, 2)