# model.py - MoodLens Model Architecture import torch import torch.nn as nn from transformers import AutoModel class MultiTaskEventClassifier(nn.Module): def __init__(self, config): super().__init__() self.config = config # Load pre-trained transformer self.transformer = AutoModel.from_pretrained(config['model_name']) # Freeze embeddings (matching your training) for param in self.transformer.embeddings.parameters(): param.requires_grad = False # Attention for text features self.text_attention = nn.MultiheadAttention( embed_dim=self.transformer.config.hidden_size, num_heads=config['num_attention_heads'], dropout=config['dropout_rate'], batch_first=True ) # Shared feature extractor self.shared_layer = nn.Sequential( nn.Linear(self.transformer.config.hidden_size, config['hidden_dim']), nn.LayerNorm(config['hidden_dim']), nn.GELU(), nn.Dropout(config['dropout_rate']) ) # Task-specific classifiers self.event_type_classifier = self._make_classifier( config['hidden_dim'], config['num_classes']['event_type']) self.event_group_classifier = self._make_classifier( config['hidden_dim'], config['num_classes']['event_group']) self.emotion_classifier = self._make_classifier( config['hidden_dim'], config['num_classes']['emotion']) self.tense_classifier = self._make_classifier( config['hidden_dim'], config['num_classes']['tense']) self.sarcasm_classifier = self._make_classifier( config['hidden_dim'], 2) # Regression heads self.sentiment_regressor = self._make_regressor(config['hidden_dim']) self.certainty_regressor = self._make_regressor(config['hidden_dim']) def _make_classifier(self, input_dim, num_classes): return nn.Sequential( nn.Linear(input_dim, input_dim // 2), nn.LayerNorm(input_dim // 2), nn.GELU(), nn.Dropout(self.config['dropout_rate']), nn.Linear(input_dim // 2, num_classes) ) def _make_regressor(self, input_dim): return nn.Sequential( nn.Linear(input_dim, input_dim // 2), nn.LayerNorm(input_dim // 2), nn.GELU(), nn.Dropout(self.config['dropout_rate']), nn.Linear(input_dim // 2, 1), nn.Sigmoid() ) def forward(self, input_ids, attention_mask): # Get transformer outputs outputs = self.transformer( input_ids=input_ids, attention_mask=attention_mask ) # Apply attention hidden_state = outputs.last_hidden_state attended, _ = self.text_attention( hidden_state, hidden_state, hidden_state, key_padding_mask=~attention_mask.bool() ) # Pool text_features = (attended * attention_mask.unsqueeze(-1)).sum(dim=1) / attention_mask.sum(dim=1, keepdim=True) # Get shared features shared_features = self.shared_layer(text_features) # Get predictions from each head outputs = { 'event_type': self.event_type_classifier(shared_features), 'event_group': self.event_group_classifier(shared_features), 'emotion': self.emotion_classifier(shared_features), 'tense': self.tense_classifier(shared_features), 'sarcasm': self.sarcasm_classifier(shared_features), 'sentiment_valence': self.sentiment_regressor(shared_features).squeeze(-1), 'certainty': self.certainty_regressor(shared_features).squeeze(-1) } return outputs