Skip to content

Commit

Permalink
Mixture of non-prefixed and prefixed (B-, I-)
Browse files Browse the repository at this point in the history
huggingface#13493 seems not work well for mixture case.
  • Loading branch information
KoichiYasuoka committed Sep 14, 2021
1 parent 3ab0185 commit a0e91ae
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion src/transformers/pipelines/token_classification.py
Expand Up @@ -106,6 +106,7 @@ def __init__(self, args_parser=TokenClassificationArgumentHandler(), *args, **kw

self._basic_tokenizer = BasicTokenizer(do_lower_case=False)
self._args_parser = args_parser
self._tag_policy = "I"

def _sanitize_parameters(
self,
Expand Down Expand Up @@ -401,13 +402,15 @@ def get_tag(self, entity_name: str) -> Tuple[str, str]:
if entity_name.startswith("B-"):
bi = "B"
tag = entity_name[2:]
self._tag_policy = "B"
elif entity_name.startswith("I-"):
bi = "I"
tag = entity_name[2:]
self._tag_policy = "B"
else:
# It's not in B-, I- format
# Default to I- for continuation.
bi = "I"
bi = self._tag_policy
tag = entity_name
return bi, tag

Expand Down

0 comments on commit a0e91ae

Please sign in to comment.