Source code for sgnlp.models.ufd.modeling

from dataclasses import dataclass
from typing import Optional, Tuple

import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import PreTrainedModel, XLMRobertaModel
from transformers.file_utils import ModelOutput


from .config import (
    UFDAdaptorGlobalConfig,
    UFDAdaptorDomainConfig,
    UFDCombineFeaturesMapConfig,
    UFDClassifierConfig,
)


[docs]@dataclass
class UFDModelOutput(ModelOutput):
    """
    Base class for outputs of UFD models

    Args:
        loss (:obj:`torch.Tensor` of shape `(1,)`, `optional`, returned when :obj:`labels` is provided):
            Classification loss. Loss function used is dependent on what is specified in UFDConfig
        logits (:obj:`torch.Tensor` of shape :obj:`(batch_size, 1)`):
            Classification scores.
    """

    loss: Optional[torch.Tensor] = None
    logits: torch.Tensor = None


[docs]class UFDAdaptorGlobalPreTrainedModel(PreTrainedModel):
    """
    The UFD Adaptor Global Pre-Trained Model used as base class for derived Adaptor Global Model.

    This model is the abstract super class for the UFD Adaptor Global model which defines the config class types
    and weights initalization method. This class should not be used or instantiated directly,
    see UFDAdaptorGlobalModel class for usage.
    """

    config_class = UFDAdaptorGlobalConfig
    base_model_prefix = "UFDAdaptorGlobal"

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.uniform_(-self.config.initrange, self.config.initrange)
            module.bias.data.zero_()


[docs]class UFDAdaptorGlobalModel(UFDAdaptorGlobalPreTrainedModel):
    """
    The UFD Adaptor Global Model used for unsupervised training for global context.

    This method inherits from :obj:`UFDAdaptorGlobalPreTrainedModel` for weights initalization and utility functions
    from transformer :obj:`PreTrainedModel` class.

    Args:
        config (:class:`~UFDAdaptorGlobalConfig`): Model configuration class with all parameters required for
                                                    the model. Initializing with a config file does not load
                                                    the weights associated with the model, only the configuration.
                                                    Use the :obj:`.from_pretrained` method to load the model weights.
    """

    def __init__(self, config):
        super().__init__(config)
        self.lin1 = nn.Linear(config.in_dim, config.dim_hidden)
        self.lin2 = nn.Linear(config.dim_hidden, config.out_dim)
        self.act = F.relu
        self.init_weights()

[docs]    def forward(
        self, input: torch.FloatTensor
    ) -> Tuple[torch.FloatTensor, torch.FloatTensor]:
        """
        Args:
            input (:ob:'torch.FloatTensor' of shape :obj:'(batch_size, max_num_words)': Word IDs of text
        """
        x = self.lin1(input)
        x = self.act(x)
        local_features = x + input
        x = self.lin2(local_features)
        x = self.act(x) + local_features
        return x, local_features


[docs]class UFDAdaptorDomainPreTrainedModel(PreTrainedModel):
    """
    The UFD Adaptor Domain Pre-Trained Model used as base class for derived Adaptor Domain Model.

    This model is the abstract super class for the UFD Adaptor Domain model which defines the config class types
    and weights initalization method. This class should not be used or instantiated directly,
    see UFDAdaptorDomainModel class for usage.
    """

    config_class = UFDAdaptorDomainConfig
    base_model_prefix = "UFDAdaptorDomain"

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.uniform_(-self.config.initrange, self.config.initrange)
            module.bias.data.zero_()


[docs]class UFDAdaptorDomainModel(UFDAdaptorDomainPreTrainedModel):
    """
    The UFD Adaptor Domain Model used for unsupervised training for domain context.

    This method inherits from :obj:`UFDAdaptorDomainPreTrainedModel` for weights initalization and utility functions
    from transformer :obj:`PreTrainedModel` class.

    Args:
        config (:class:`~UFDAdaptorDomainConfig`): Model configuration class with all parameters required for
                                                    the model. Initializing with a config file does not load
                                                    the weights associated with the model, only the configuration.
                                                    Use the :obj:`.from_pretrained` method to load the model weights.
    """

    def __init__(self, config):
        super().__init__(config)
        self.lin1 = nn.Linear(config.in_dim, config.dim_hidden)
        self.lin2 = nn.Linear(config.dim_hidden, config.out_dim)
        self.act = F.relu
        self.init_weights()

[docs]    def forward(
        self, input: torch.FloatTensor
    ) -> Tuple[torch.FloatTensor, torch.FloatTensor]:
        """
        Args:
            input (:ob:'torch.FloatTensor' of shape :obj:'(batch_size, max_num_words)': Word IDs of text
        """
        x = self.act(input)
        x = self.lin1(x)
        local_features = x
        x = self.lin2(local_features)
        x = self.act(x)
        x = F.normalize(x, p=2, dim=1)
        return x, local_features


[docs]class UFDCombineFeaturesMapPreTrainedModel(PreTrainedModel):
    """
    The UFD Combine Features Map Pre-Trained Model used as base class for derived Combine Features Map Model.

    This model is the abstract super class for the UFD Combine Features Map model which defines the config
    class types and weights initalization method. This class should not be used or instantiated directly,
    see UFDCombineFeaturesMapModel class for usage.

    Args:
        PreTrainedModel ([transformers.PreTrainedModel]): transformer PreTrainedModel base class
    """

    config_class = UFDCombineFeaturesMapConfig
    base_model_prefix = "UFDCombineFeaturesMap"

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.uniform_(-self.config.initrange, self.config.initrange)
            module.bias.data.zero_()


[docs]class UFDCombineFeaturesMapModel(UFDCombineFeaturesMapPreTrainedModel):
    """
    The UFD Combine Features Map Model used for unsupervised training for global to domain mapping.

    This method inherits from :obj:`UFDCombineFeaturesMapPreTrainedModel` for weights initalization and utility functions
    from transformer :obj:`PreTrainedModel` class.

    Args:
        config (:class:`~UFDCombineFeaturesMapConfig`): Model configuration class with all parameters required for
                                                    the model. Initializing with a config file does not load
                                                    the weights associated with the model, only the configuration.
                                                    Use the :obj:`.from_pretrained` method to load the model weights.
    """

    def __init__(self, config):
        super().__init__(config)
        self.fc = nn.Linear(2 * config.embed_dim, config.embed_dim)
        self.act = F.relu
        self.init_weights()

[docs]    def forward(self, input: torch.FloatTensor) -> torch.FloatTensor:
        """
        Args:
            input (:ob:'torch.FloatTensor' of shape :obj:'(batch_size, feature_size_of_both_adaptor_global_and_domain)':
                concatenated features from both adaptor global and adaptor domain models.
        """
        return self.fc(self.act(input))


[docs]class UFDClassifierPreTrainedModel(PreTrainedModel):
    """
    The UFD Classifier Pre-Trained Model used as base class for derived Classifier Model.

    This model is the abstract super class for the UFD Combine Features Map model which defines the config
    class types and weights initalization method. This class should not be used or instantiated directly,
    see UFDClassifierModel class for usage.
    """

    config_class = UFDClassifierConfig
    base_model_prefix = "UFDClassifier"

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.uniform_(-self.config.initrange, self.config.initrange)
            module.bias.data.zero_()


[docs]class UFDClassifierModel(UFDClassifierPreTrainedModel):
    """
    The UFD Classifier Model used for supervised training for source domain data.

    This method inherits from :obj:`UFDClassifierPreTrainedModel` for weights initalization and utility functions
    from transformer :obj:`PreTrainedModel` class.

    Args:
        config (:class:`~UFDClassifierConfig`): Model configuration class with all parameters required for
                                                    the model. Initializing with a config file does not load
                                                    the weights associated with the model, only the configuration.
                                                    Use the :obj:`.from_pretrained` method to load the model weights.
    """

    def __init__(self, config):
        super().__init__(config)
        self.fc = nn.Linear(config.embed_dim, config.num_class)
        self.act = F.relu
        self.init_weights()

[docs]    def forward(self, input_tensor: torch.Tensor) -> torch.Tensor:
        """
        Args:
            input_tensor (torch.Tensor): features from UFDCombineFeaturesMapModel
            labels (Optional[torch.Tensor], optional): labels for input feature. Defaults to None.


        """
        logits = self.fc(self.act(input_tensor))
        return logits


[docs]class UFDMaxDiscriminatorModel(nn.Module):
    """
    Max Discriminator Model used for unsupervised loss functions.
    """

    def __init__(self, hidden_g=1024, initrange=0.1):
        super().__init__()
        self.l0 = nn.Linear(2 * hidden_g, 1)
        self.init_weights(initrange)
        self.act = F.relu
        self.l0.weight.data.uniform_(-initrange, initrange)
        self.l0.bias.data.zero_()

    def init_weights(self, initrange):
        self.l0.weight.data.uniform_(-initrange, initrange)
        self.l0.bias.data.zero_()

[docs]    def forward(self, f_g, f_d):
        h = torch.cat((f_g, f_d), dim=1)
        return self.l0(self.act(h))


[docs]class UFDMinDiscriminatorModel(nn.Module):
    """
    Min Discriminator Model used for unsupervised loss functions.
    """

    def __init__(self, hidden_l=1024, initrange=0.1):
        super().__init__()
        self.l0 = nn.Linear(2 * hidden_l, 1)
        self.init_weights(initrange)
        self.act = F.relu

    def init_weights(self, initrange):
        self.l0.weight.data.uniform_(-initrange, initrange)
        self.l0.bias.data.zero_()

[docs]    def forward(self, f_g, f_d):
        h = torch.cat((f_g, f_d), dim=1)
        h = self.l0(self.act(h))
        return F.normalize(h, p=2, dim=1)


[docs]class UFDDeepInfoMaxLossModel(nn.Module):
    """
    Main unsupervised deep info max loss model used for unsupervised training loss functions.
    """

    def __init__(
        self, dim_hidden=1024, initrange=0.1, alpha=0.3, beta=1, gamma=0.2, delta=1
    ):
        super().__init__()
        self.max_d = UFDMaxDiscriminatorModel(dim_hidden, initrange)
        self.min_d = UFDMinDiscriminatorModel(dim_hidden, initrange)
        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma
        self.delta = delta

[docs]    def forward(self, x, x_n, f_g, fg_n, f_d, fd_n, y_g, y_d, yd_n):
        Ej = -F.softplus(-self.max_d(y_g, f_g)).mean()
        Em = F.softplus(self.max_d(y_g, fg_n)).mean()
        GLOBAL_A = (Em - Ej) * self.alpha

        Ej = -F.softplus(-self.max_d(x, y_g)).mean()
        Em = F.softplus(self.max_d(x_n, y_g)).mean()
        GLOBAL_B = (Em - Ej) * self.delta

        Ej = -F.softplus(-self.min_d(y_d, y_g)).mean()
        Em = F.softplus(self.min_d(yd_n, y_g)).mean()
        Local_B = (Ej - Em) * self.gamma

        return GLOBAL_A + GLOBAL_B + Local_B


[docs]class UFDEmbeddingModel(XLMRobertaModel):
    """
    The UFD Embedding Model used for to generate embeddings for model inputs.

    This method inherits from :obj:`XLMRobertaModel` for weights initalization and utility functions
    from transformer :obj:`PreTrainedModel` class.

    Args:
        config (:class:`~UFDEmbeddingConfig`): Model configuration class with all parameters required for
                                                    the model. Initializing with a config file does not load
                                                    the weights associated with the model, only the configuration.
                                                    Use the :obj:`.from_pretrained` method to load the model weights.
    """

    def __init__(self, config):
        super().__init__(config)


[docs]class UFDModel(nn.Module):
    """
    The UFDModel used for running inferences. This model wraps the trained UFDAdaptorDomainModel, UFDAdaptorGlobalModel,
    UFDCombineFeaturesMapModel and the UFDClassifierModel.

    The forward pass method executes a series of forward pass of these warpped models in the sequence defined in the
    paper and research code.
    """

    def __init__(
        self,
        adaptor_domain: UFDAdaptorDomainModel,
        adaptor_global: UFDAdaptorGlobalModel,
        feature_maper: UFDCombineFeaturesMapModel,
        classifier: UFDClassifierModel,
        loss_function: str = "crossentrophyloss",
    ):
        super().__init__()
        self.adaptor_domain = adaptor_domain
        self.adaptor_global = adaptor_global
        self.feature_maper = feature_maper
        self.classifier = classifier
        self.loss_function = loss_function

[docs]    def forward(
        self, data_batch: torch.Tensor, labels: Optional[torch.Tensor] = None
    ) -> UFDModelOutput:
        """
        Args:
            data_batch (torch.Tensor): input tensor batch.
            labels (Optional[torch.Tensor], optional): list of labels. Defaults to None.

        Returns:
            UFDModelOutput: output UFDModelOutput instance with loss and logits.

        Example::
            from sgnlp.models.ufd import (
                UFDModelBuilder,
                UFDPreprocessor
            )
            model_builder = UFDModelBuilder()
            model_grp = model_builder.build_model_group()
            preprocessor = UFDPreprocessor()
            texts = ['dieser film ist wirklich gut!', 'Diese Fortsetzung ist nicht so gut wie die Vorgeschichte']
            text_feature = preprocessor(texts)
            ufd_model_output = model_grp['books_de_dvd'](**text_feature)
        """
        with torch.no_grad():
            global_feat, _ = self.adaptor_global(data_batch)
            domain_feat, _ = self.adaptor_domain(data_batch)
            features = torch.cat((global_feat, domain_feat), dim=1)
            logits = self.classifier(self.feature_maper(features))

            loss = None
            if labels is not None:
                if self.loss_function == "crossentrophyloss":
                    loss_fct = torch.nn.CrossEntropyLoss()
                    loss = loss_fct(logits.view(-1), labels)
            return UFDModelOutput(loss=loss, logits=logits)