Source code for sgnlp.models.sentic_gcn.modeling

from dataclasses import dataclass
from typing import Dict, List, Optional

import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import PreTrainedModel, BertModel
from transformers.file_utils import ModelOutput

from .modules.dynamic_rnn import DynamicLSTM
from .modules.gcn import GraphConvolution
from .config import (
    SenticGCNConfig,
    SenticGCNBertConfig,
    SenticGCNEmbeddingConfig,
    SenticGCNBertEmbeddingConfig,
)
from .utils import build_embedding_matrix


[docs]@dataclass class SenticGCNModelOutput(ModelOutput): """ Base class for outputs of SenticGCNModel. Args: loss (:obj:`torch.Tensor` of shape `(1,)`, `optional`, return when :obj:`labels` is provided): classification loss, typically cross entropy. Loss function used is dependent on what is specified in SenticGCNConfig. logits (:obj:`torch.Tensor` of shape :obj:`(batch_size, num_classes)`): raw logits for each class. num_classes = 3 by default. """ loss: Optional[torch.Tensor] = None logits: torch.Tensor = None
[docs]class SenticGCNPreTrainedModel(PreTrainedModel): """ The SenticGCN Pre-Trained Model used as base class for derived SenticGCN Model. This model is the abstract super class for the SenticGCN Model which defines the config class types and weights initalization method. This class should not be used or instantiated directly, see SenticGCNModel class for usage. """ config_class = SenticGCNConfig base_model_prefix = "senticgcn" def _init_weights(self, module: nn.Module) -> None: pass
[docs]class SenticGCNModel(SenticGCNPreTrainedModel): """ The SenticGCN Model for aspect based sentiment analysis. This method inherits from :obj:`SenticGCNPreTrainedModel` for weights initalization and utility functions from transformer :obj:`PreTrainedModel` class. Args: config (:obj:`~SenticGCNConfig`): Model configuration class with all parameters required for the model. Initializing with a config file does not load the weights associated with the model, only the configuration. Use the :obj:`.from_pretrained` method to load the model weights. """ def __init__(self, config: SenticGCNConfig) -> None: super().__init__(config) self.text_lstm = DynamicLSTM( config.embed_dim, config.hidden_dim, num_layers=1, batch_first=True, bidirectional=True, ) self.gc1 = GraphConvolution(2 * config.hidden_dim, 2 * config.hidden_dim) self.gc2 = GraphConvolution(2 * config.hidden_dim, 2 * config.hidden_dim) self.fc = nn.Linear(2 * config.hidden_dim, config.polarities_dim) self.text_embed_dropout = nn.Dropout(config.dropout) if config.loss_function == "cross_entropy": self.loss_function = nn.CrossEntropyLoss() def position_weight( self, x: torch.Tensor, aspect_double_idx: torch.Tensor, text_len: torch.Tensor, aspect_len: torch.Tensor ) -> torch.Tensor: batch_size, seq_len = x.shape[0], x.shape[1] aspect_double_idx = aspect_double_idx.cpu().numpy() text_len = text_len.cpu().numpy() aspect_len = aspect_len.cpu().numpy() weight = [[] for i in range(batch_size)] for i in range(batch_size): context_len = text_len[i] - aspect_len[i] for j in range(aspect_double_idx[i, 0]): weight[i].append(1 - (aspect_double_idx[i, 0] - j) / context_len) for j in range(aspect_double_idx[i, 0], aspect_double_idx[i, 1] + 1): weight[i].append(0) for j in range(aspect_double_idx[i, 1] + 1, text_len[i]): weight[i].append(1 - (j - aspect_double_idx[i, 1] / context_len)) for j in range(text_len[i], seq_len): weight[i].append(0) weight = torch.tensor(weight, dtype=torch.float).unsqueeze(2).to(x.device) return weight * x def mask(self, x: torch.Tensor, aspect_double_idx: torch.Tensor) -> torch.Tensor: batch_size, seq_len = x.shape[0], x.shape[1] aspect_double_idx = aspect_double_idx.cpu().numpy() mask = [[] for i in range(batch_size)] for i in range(batch_size): for j in range(aspect_double_idx[i, 0]): mask[i].append(0) for j in range(aspect_double_idx[i, 0], aspect_double_idx[i, 1] + 1): mask[i].append(1) for j in range(aspect_double_idx[i, 1] + 1, seq_len): mask[i].append(0) mask = torch.tensor(mask, dtype=torch.float).unsqueeze(2).to(x.device) return mask * x
[docs] def forward(self, inputs: List[torch.Tensor], labels: Optional[torch.Tensor] = None) -> SenticGCNModelOutput: text_indices, aspect_indices, left_indices, text_embeddings, adj = inputs text_len = torch.sum(text_indices != 0, dim=-1) aspect_len = torch.sum(aspect_indices != 0, dim=-1) left_len = torch.sum(left_indices != 0, dim=-1) aspect_double_idx = torch.cat([left_len.unsqueeze(1), (left_len + aspect_len - 1).unsqueeze(1)], dim=1) text = self.text_embed_dropout(text_embeddings) text_out, (_, _) = self.text_lstm(text, text_len) x = F.relu( self.gc1( self.position_weight(text_out, aspect_double_idx, text_len, aspect_len), adj, ) ) x = F.relu(self.gc2(self.position_weight(x, aspect_double_idx, text_len, aspect_len), adj)) alpha_mat = torch.matmul(x, text_out.transpose(1, 2)) alpha = F.softmax(alpha_mat.sum(1, keepdim=True), dim=2) x = torch.matmul(alpha, text_out).squeeze(1) # batch_size x 2 * hidden_dim logits = self.fc(x) loss = self.loss_function(logits, labels) if labels is not None else None return SenticGCNModelOutput(loss=loss, logits=logits)
[docs]@dataclass class SenticGCNBertModelOutput(ModelOutput): """ Base class for outputs of SenticGCNBertModel. Args: loss (:obj:`torch.Tensor` of shape `(1,)`, `optional`, return when :obj:`labels` is provided): classification loss, typically cross entropy. Loss function used is dependent on what is specified in SenticGCNBertConfig. logits (:obj:`torch.Tensor` of shape :obj:`(batch_size, num_classes)`): raw logits for each class. num_classes = 3 by default. """ loss: Optional[torch.Tensor] = None logits: torch.Tensor = None
[docs]class SenticGCNBertPreTrainedModel(PreTrainedModel): """ The SenticGCNBert Pre-Trained Model used as base class for derived SenticGCNBert Model. This model is the abstract super class for the SenticGCNBert Model which defines the config class types and weights initalization method. This class should not be used or instantiated directly, see SenticGCNBertModel class for usage. """ config_class = SenticGCNBertConfig base_model_prefix = "senticgcnbert" def _init_weights(self, module: nn.Module) -> None: pass
[docs]class SenticGCNBertModel(SenticGCNBertPreTrainedModel): """ The SenticGCNBert Model for aspect based sentiment analysis. This method inherits from :obj:`SenticGCNBertPreTrainedModel` for weights initalization and utility functions from transformer :obj:`PreTrainedModel` class. Args: config (:obj:`~SenticGCNBertConfig`): Model configuration class with all parameters required for the model. Initializing with a config file does not load the weights associated with the model, only the configuration. Use the :obj:`.from_pretrained` method to load the model weights. """ def __init__(self, config: SenticGCNBertConfig) -> None: super().__init__(config) self.gc1 = GraphConvolution(config.hidden_dim, config.hidden_dim) self.gc2 = GraphConvolution(config.hidden_dim, config.hidden_dim) self.gc3 = GraphConvolution(config.hidden_dim, config.hidden_dim) self.fc = nn.Linear(config.hidden_dim, config.polarities_dim) self.text_embed_dropout = nn.Dropout(config.dropout) self.max_seq_len = config.max_seq_len self.loss_function = config.loss_function def position_weight( self, x: torch.Tensor, aspect_double_idx: torch.Tensor, text_len: torch.Tensor, aspect_len: torch.Tensor ) -> torch.Tensor: batch_size, seq_len = x.shape[0], x.shape[1] aspect_double_idx = aspect_double_idx.cpu().numpy() text_len = text_len.cpu().numpy() aspect_len = aspect_len.cpu().numpy() weight = [[] for i in range(batch_size)] for i in range(batch_size): context_len = text_len[i] - aspect_len[i] for j in range(aspect_double_idx[i, 0]): weight[i].append(1 - (aspect_double_idx[i, 0] - j) / context_len) for j in range(aspect_double_idx[i, 0], min(aspect_double_idx[i, 1] + 1, self.max_seq_len)): weight[i].append(0) for j in range(aspect_double_idx[i, 1] + 1, text_len[i]): weight[i].append(1 - (j - aspect_double_idx[i, 1]) / context_len) for j in range(text_len[i], seq_len): weight[i].append(0) weight = torch.tensor(weight).unsqueeze(2).to(x.device) return weight * x def mask(self, x: torch.Tensor, aspect_double_idx: torch.Tensor) -> torch.Tensor: batch_size, seq_len = x.shape[0], x.shape[1] aspect_double_idx = aspect_double_idx.cpu().numpy() mask = [[] for i in range(batch_size)] for i in range(batch_size): for j in range(aspect_double_idx[i, 0]): mask[i].append(0) for j in range(aspect_double_idx[i, 0], min(aspect_double_idx[i, 1] + 1, self.max_seq_len)): mask[i].append(1) for j in range(min(aspect_double_idx[i, 1] + 1, self.max_seq_len), seq_len): mask[i].append(0) mask = torch.tensor(mask).unsqueeze(2).float().to(x.device) return mask * x
[docs] def forward(self, inputs: List[torch.Tensor], labels: Optional[torch.Tensor] = None) -> SenticGCNBertModelOutput: text_indices, aspect_indices, left_indices, text_embeddings, adj = inputs # text_indices, text_ text_len = torch.sum(text_indices != 0, dim=-1) aspect_len = torch.sum(aspect_indices != 0, dim=-1) left_len = torch.sum(left_indices != 0, dim=-1) aspect_double_idx = torch.cat([left_len.unsqueeze(1), (left_len + aspect_len - 1).unsqueeze(1)], dim=1) text_out = text_embeddings x = F.relu(self.gc1(self.position_weight(text_out, aspect_double_idx, text_len, aspect_len), adj)) x = F.relu(self.gc2(self.position_weight(x, aspect_double_idx, text_len, aspect_len), adj)) x = F.relu(self.gc3(self.position_weight(x, aspect_double_idx, text_len, aspect_len), adj)) x = self.mask(x, aspect_double_idx) alpha_mat = torch.matmul(x, text_out.transpose(1, 2)) alpha = F.softmax(alpha_mat.sum(1, keepdim=True), dim=2) x = torch.matmul(alpha, text_out).squeeze(1) # batch_size x 2*hidden_dim logits = self.fc(x) loss = self.loss_function(logits, labels) if labels is not None else None return SenticGCNBertModelOutput(loss=loss, logits=logits)
[docs]class SenticGCNEmbeddingPreTrainedModel(PreTrainedModel): """ The SenticGCN Embedding Pre-Trained Model used as base class for derived SenticGCN Embedding Model. This model is the abstract super class for the SenticGCN Embedding Model which defines the config class types and weights initalization method. This class should not be used or instantiated directly, see SenticGCNEmbeddingModel class for usage. """ config_class = SenticGCNEmbeddingConfig base_model_prefix = "senticgcnembedding" def _init_weights(self, module: nn.Module) -> None: pass
[docs]class SenticGCNEmbeddingModel(SenticGCNEmbeddingPreTrainedModel): """ The SenticGCN Embedding Model used to generate embeddings for model inputs. By default, the embeddings are generated from the glove.840B.300d embeddings. This class inherits from :obj:`SenticGCNEmbeddingPreTrainedModel` for weights initalization and utility functions from transformers :obj:`PreTrainedModel` class. This class can also be constructed via the SenticGCNEmbeddingModel.build_embedding_matrix class method. Args: config (:obj:`~SenticGCNEmbeddingConfig`): Model configuration class with all parameters required for the model. Initializing with a config file does not load the weights associated with the model, only the configuration. Use the :obj:`.from_pretrained` method to load the model weights. """ def __init__(self, config: SenticGCNEmbeddingConfig) -> None: super().__init__(config) self.vocab_size = config.vocab_size self.embed = nn.Embedding(config.vocab_size, config.embed_dim)
[docs] def forward(self, token_ids: torch.Tensor) -> torch.Tensor: """ Encode input token ids using word embedding. Args: token_ids (torch.Tensor): Tensor of token ids with shape [batch_size, num_words] Returns: torch.Tensor: return Tensor of embeddings with shape (batch_size, num_words, embed_dim) """ return self.embed(token_ids)
[docs] @classmethod def build_embedding_model( cls, word_vec_file_path: str, vocab: Dict[str, int], embed_dim: int = 300, ): """ This class method is a helper method to construct the embedding model from a file containing word vectors (i.e. GloVe) and a vocab dictionary. Args: word_vec_file_path (str): file path to the word vectors vocab (Dict[str, int]): vocab dictionary consisting of words as key and index as values embed_dim (int, optional): the embedding dimension. Defaults to 300. Returns: SenticGCNEmbeddingModel: return an instance of SenticGCNEmbeddingModel """ embedding_matrix = build_embedding_matrix( word_vec_file_path=word_vec_file_path, vocab=vocab, embed_dim=embed_dim ) embedding_tensor = torch.tensor(embedding_matrix, dtype=torch.float) sentic_embed_config = SenticGCNEmbeddingConfig(vocab_size=len(vocab), embed_dim=embed_dim) senticgcn_embed = cls(sentic_embed_config) senticgcn_embed.embed.weight.data.copy_(embedding_tensor) return senticgcn_embed
[docs]class SenticGCNBertEmbeddingModel(BertModel): """ The SenticGCN Bert Embedding Model used to generate embeddings for model inputs. This class inherits from :obj:`BertModel` for weights initalization and utility functions from transformers :obj:`PreTrainedModel` class. Args: config (:obj:`~SenticGCNBertEmbeddingConfig`): Model configuration class with all parameters required for the model. Initializing with a config file does not load the weights associated with the model, only the configuration. Use the :obj:`.from_pretrained` method to load the model weights. """ def __init__(self, config: SenticGCNBertEmbeddingConfig) -> None: super().__init__(config)