Source code for sgnlp.models.span_extraction.tokenization

from transformers import BertTokenizer


[docs]class RecconSpanExtractionTokenizer(BertTokenizer): """ Constructs a Reccon Span Extraction tokenizer, derived from the Bert tokenizer. Args: vocab_file (:obj:`str`): Path to the vocabulary file. do_lower_case (:obj:`bool`, defaults to :obj:`False`): Whether or not to lowercase the input when tokenizing. Example:: from sg_nlp import RecconSpanExtractionTokenizer tokenizer = RecconSpanExtractionTokenizer.from_pretrained("mrm8488/spanbert-finetuned-squadv2") text = "Our company's wei-ya is tomorrow night ! It's your first Chinese New Year in Taiwan--you must be excited !" inputs = tokenizer(text, return_tensors="pt") """ def __init__(self, vocab_file: str, do_lower_case: bool = False, **kwargs) -> None: super().__init__(vocab_file=vocab_file, do_lower_case=do_lower_case, **kwargs)