Source code for sgnlp.models.emotion_entailment.tokenization

from transformers import RobertaTokenizer


[docs]class RecconEmotionEntailmentTokenizer(RobertaTokenizer): """ Constructs a Reccon Emotion Entailment tokenizer, derived from the RoBERTa tokenizer, using byte-level Byte-Pair-Encoding. Args: vocab_file (:obj:`str`): Path to the vocabulary file. merges_file (:obj:`str`): Path to the merges file. do_lower_case (:obj:`bool`, defaults to :obj:`False`): Whether or not to lowercase the input when tokenizing. Example:: from sg_nlp import RecconEmotionEntailmentTokenizer tokenizer = RecconEmotionEntailmentTokenizer.from_pretrained("roberta-base") text = "surprise <SEP> Me ? You're the one who pulled out in front of me ! <SEP> Why don't you watch where you're going ? <SEP> Why don't you watch where you're going ? Me ? You're the one who pulled out in front of me !" inputs = tokenizer(text, return_tensors="pt") """ def __init__( self, vocab_file: str, merges_file: str, do_lower_case: bool = False, **kwargs ) -> None: super().__init__( vocab_file=vocab_file, merges_file=merges_file, do_lower_case=do_lower_case, **kwargs )