added deepsad base code

2024-06-28 07:42:12 +02:00
parent 2eb1bf2e05
commit 914bb020d0
57 changed files with 4974 additions and 0 deletions
--- a/Deep-SAD-PyTorch/src/base/init.py
+++ b/Deep-SAD-PyTorch/src/base/init.py
@@ -0,0 +1,5 @@
+from .base_dataset import *
+from .torchvision_dataset import *
+from .odds_dataset import *
+from .base_net import *
+from .base_trainer import *
--- a/Deep-SAD-PyTorch/src/base/base_dataset.py
+++ b/Deep-SAD-PyTorch/src/base/base_dataset.py
@@ -0,0 +1,26 @@
+from abc import ABC, abstractmethod
+from torch.utils.data import DataLoader
+
+
+class BaseADDataset(ABC):
+    """Anomaly detection dataset base class."""
+
+    def __init__(self, root: str):
+        super().__init__()
+        self.root = root  # root path to data
+
+        self.n_classes = 2  # 0: normal, 1: outlier
+        self.normal_classes = None  # tuple with original class labels that define the normal class
+        self.outlier_classes = None  # tuple with original class labels that define the outlier class
+
+        self.train_set = None  # must be of type torch.utils.data.Dataset
+        self.test_set = None  # must be of type torch.utils.data.Dataset
+
+    @abstractmethod
+    def loaders(self, batch_size: int, shuffle_train=True, shuffle_test=False, num_workers: int = 0) -> (
+            DataLoader, DataLoader):
+        """Implement data loaders of type torch.utils.data.DataLoader for train_set and test_set."""
+        pass
+
+    def __repr__(self):
+        return self.__class__.__name__
--- a/Deep-SAD-PyTorch/src/base/base_net.py
+++ b/Deep-SAD-PyTorch/src/base/base_net.py
@@ -0,0 +1,26 @@
+import logging
+import torch.nn as nn
+import numpy as np
+
+
+class BaseNet(nn.Module):
+    """Base class for all neural networks."""
+
+    def __init__(self):
+        super().__init__()
+        self.logger = logging.getLogger(self.__class__.__name__)
+        self.rep_dim = None  # representation dimensionality, i.e. dim of the code layer or last layer
+
+    def forward(self, *input):
+        """
+        Forward pass logic
+        :return: Network output
+        """
+        raise NotImplementedError
+
+    def summary(self):
+        """Network summary."""
+        net_parameters = filter(lambda p: p.requires_grad, self.parameters())
+        params = sum([np.prod(p.size()) for p in net_parameters])
+        self.logger.info('Trainable parameters: {}'.format(params))
+        self.logger.info(self)
--- a/Deep-SAD-PyTorch/src/base/base_trainer.py
+++ b/Deep-SAD-PyTorch/src/base/base_trainer.py
@@ -0,0 +1,34 @@
+from abc import ABC, abstractmethod
+from .base_dataset import BaseADDataset
+from .base_net import BaseNet
+
+
+class BaseTrainer(ABC):
+    """Trainer base class."""
+
+    def __init__(self, optimizer_name: str, lr: float, n_epochs: int, lr_milestones: tuple, batch_size: int,
+                 weight_decay: float, device: str, n_jobs_dataloader: int):
+        super().__init__()
+        self.optimizer_name = optimizer_name
+        self.lr = lr
+        self.n_epochs = n_epochs
+        self.lr_milestones = lr_milestones
+        self.batch_size = batch_size
+        self.weight_decay = weight_decay
+        self.device = device
+        self.n_jobs_dataloader = n_jobs_dataloader
+
+    @abstractmethod
+    def train(self, dataset: BaseADDataset, net: BaseNet) -> BaseNet:
+        """
+        Implement train method that trains the given network using the train_set of dataset.
+        :return: Trained net
+        """
+        pass
+
+    @abstractmethod
+    def test(self, dataset: BaseADDataset, net: BaseNet):
+        """
+        Implement test method that evaluates the test_set of dataset on the given network.
+        """
+        pass
--- a/Deep-SAD-PyTorch/src/base/odds_dataset.py
+++ b/Deep-SAD-PyTorch/src/base/odds_dataset.py
@@ -0,0 +1,110 @@
+from pathlib import Path
+from torch.utils.data import Dataset
+from scipy.io import loadmat
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from torchvision.datasets.utils import download_url
+
+import os
+import torch
+import numpy as np
+
+
+class ODDSDataset(Dataset):
+    """
+    ODDSDataset class for datasets from Outlier Detection DataSets (ODDS): http://odds.cs.stonybrook.edu/
+
+    Dataset class with additional targets for the semi-supervised setting and modification of __getitem__ method
+    to also return the semi-supervised target as well as the index of a data sample.
+    """
+
+    urls = {
+        'arrhythmia': 'https://www.dropbox.com/s/lmlwuspn1sey48r/arrhythmia.mat?dl=1',
+        'cardio': 'https://www.dropbox.com/s/galg3ihvxklf0qi/cardio.mat?dl=1',
+        'satellite': 'https://www.dropbox.com/s/dpzxp8jyr9h93k5/satellite.mat?dl=1',
+        'satimage-2': 'https://www.dropbox.com/s/hckgvu9m6fs441p/satimage-2.mat?dl=1',
+        'shuttle': 'https://www.dropbox.com/s/mk8ozgisimfn3dw/shuttle.mat?dl=1',
+        'thyroid': 'https://www.dropbox.com/s/bih0e15a0fukftb/thyroid.mat?dl=1'
+    }
+
+    def __init__(self, root: str, dataset_name: str, train=True, random_state=None, download=False):
+        super(Dataset, self).__init__()
+
+        self.classes = [0, 1]
+
+        if isinstance(root, torch._six.string_classes):
+            root = os.path.expanduser(root)
+        self.root = Path(root)
+        self.dataset_name = dataset_name
+        self.train = train  # training set or test set
+        self.file_name = self.dataset_name + '.mat'
+        self.data_file = self.root / self.file_name
+
+        if download:
+            self.download()
+
+        mat = loadmat(self.data_file)
+        X = mat['X']
+        y = mat['y'].ravel()
+        idx_norm = y == 0
+        idx_out = y == 1
+
+        # 60% data for training and 40% for testing; keep outlier ratio
+        X_train_norm, X_test_norm, y_train_norm, y_test_norm = train_test_split(X[idx_norm], y[idx_norm],
+                                                                                test_size=0.4,
+                                                                                random_state=random_state)
+        X_train_out, X_test_out, y_train_out, y_test_out = train_test_split(X[idx_out], y[idx_out],
+                                                                            test_size=0.4,
+                                                                            random_state=random_state)
+        X_train = np.concatenate((X_train_norm, X_train_out))
+        X_test = np.concatenate((X_test_norm, X_test_out))
+        y_train = np.concatenate((y_train_norm, y_train_out))
+        y_test = np.concatenate((y_test_norm, y_test_out))
+
+        # Standardize data (per feature Z-normalization, i.e. zero-mean and unit variance)
+        scaler = StandardScaler().fit(X_train)
+        X_train_stand = scaler.transform(X_train)
+        X_test_stand = scaler.transform(X_test)
+
+        # Scale to range [0,1]
+        minmax_scaler = MinMaxScaler().fit(X_train_stand)
+        X_train_scaled = minmax_scaler.transform(X_train_stand)
+        X_test_scaled = minmax_scaler.transform(X_test_stand)
+
+        if self.train:
+            self.data = torch.tensor(X_train_scaled, dtype=torch.float32)
+            self.targets = torch.tensor(y_train, dtype=torch.int64)
+        else:
+            self.data = torch.tensor(X_test_scaled, dtype=torch.float32)
+            self.targets = torch.tensor(y_test, dtype=torch.int64)
+
+        self.semi_targets = torch.zeros_like(self.targets)
+
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+
+        Returns:
+            tuple: (sample, target, semi_target, index)
+        """
+        sample, target, semi_target = self.data[index], int(self.targets[index]), int(self.semi_targets[index])
+
+        return sample, target, semi_target, index
+
+    def __len__(self):
+        return len(self.data)
+
+    def _check_exists(self):
+        return os.path.exists(self.data_file)
+
+    def download(self):
+        """Download the ODDS dataset if it doesn't exist in root already."""
+
+        if self._check_exists():
+            return
+
+        # download file
+        download_url(self.urls[self.dataset_name], self.root, self.file_name)
+
+        print('Done!')
--- a/Deep-SAD-PyTorch/src/base/torchvision_dataset.py
+++ b/Deep-SAD-PyTorch/src/base/torchvision_dataset.py
@@ -0,0 +1,17 @@
+from .base_dataset import BaseADDataset
+from torch.utils.data import DataLoader
+
+
+class TorchvisionDataset(BaseADDataset):
+    """TorchvisionDataset class for datasets already implemented in torchvision.datasets."""
+
+    def __init__(self, root: str):
+        super().__init__(root)
+
+    def loaders(self, batch_size: int, shuffle_train=True, shuffle_test=False, num_workers: int = 0) -> (
+            DataLoader, DataLoader):
+        train_loader = DataLoader(dataset=self.train_set, batch_size=batch_size, shuffle=shuffle_train,
+                                  num_workers=num_workers, drop_last=True)
+        test_loader = DataLoader(dataset=self.test_set, batch_size=batch_size, shuffle=shuffle_test,
+                                 num_workers=num_workers, drop_last=False)
+        return train_loader, test_loader