Source code for sgnlp.models.emotion_entailment.tokenization

from transformers import RobertaTokenizer


[docs]class RecconEmotionEntailmentTokenizer(RobertaTokenizer):
    """
    Constructs a Reccon Emotion Entailment tokenizer, derived from the RoBERTa tokenizer, using byte-level Byte-Pair-Encoding.

    Args:
        vocab_file (:obj:`str`):
            Path to the vocabulary file.
        merges_file (:obj:`str`):
            Path to the merges file.
        do_lower_case (:obj:`bool`, defaults to :obj:`False`):
            Whether or not to lowercase the input when tokenizing.


    Example::

        from sg_nlp import RecconEmotionEntailmentTokenizer

        tokenizer = RecconEmotionEntailmentTokenizer.from_pretrained("roberta-base")
        text = "surprise <SEP> Me ? You're the one who pulled out in front of me ! <SEP> Why don't you watch where you're going ? <SEP> Why don't you watch where you're going ? Me ? You're the one who pulled out in front of me !"
        inputs = tokenizer(text, return_tensors="pt")

    """

    def __init__(
        self, vocab_file: str, merges_file: str, do_lower_case: bool = False, **kwargs
    ) -> None:
        super().__init__(
            vocab_file=vocab_file,
            merges_file=merges_file,
            do_lower_case=do_lower_case,
            **kwargs
        )