added deepsad base code
This commit is contained in:
5
Deep-SAD-PyTorch/src/base/__init__.py
Normal file
5
Deep-SAD-PyTorch/src/base/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from .base_dataset import *
|
||||
from .torchvision_dataset import *
|
||||
from .odds_dataset import *
|
||||
from .base_net import *
|
||||
from .base_trainer import *
|
||||
26
Deep-SAD-PyTorch/src/base/base_dataset.py
Normal file
26
Deep-SAD-PyTorch/src/base/base_dataset.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
|
||||
class BaseADDataset(ABC):
|
||||
"""Anomaly detection dataset base class."""
|
||||
|
||||
def __init__(self, root: str):
|
||||
super().__init__()
|
||||
self.root = root # root path to data
|
||||
|
||||
self.n_classes = 2 # 0: normal, 1: outlier
|
||||
self.normal_classes = None # tuple with original class labels that define the normal class
|
||||
self.outlier_classes = None # tuple with original class labels that define the outlier class
|
||||
|
||||
self.train_set = None # must be of type torch.utils.data.Dataset
|
||||
self.test_set = None # must be of type torch.utils.data.Dataset
|
||||
|
||||
@abstractmethod
|
||||
def loaders(self, batch_size: int, shuffle_train=True, shuffle_test=False, num_workers: int = 0) -> (
|
||||
DataLoader, DataLoader):
|
||||
"""Implement data loaders of type torch.utils.data.DataLoader for train_set and test_set."""
|
||||
pass
|
||||
|
||||
def __repr__(self):
|
||||
return self.__class__.__name__
|
||||
26
Deep-SAD-PyTorch/src/base/base_net.py
Normal file
26
Deep-SAD-PyTorch/src/base/base_net.py
Normal file
@@ -0,0 +1,26 @@
|
||||
import logging
|
||||
import torch.nn as nn
|
||||
import numpy as np
|
||||
|
||||
|
||||
class BaseNet(nn.Module):
|
||||
"""Base class for all neural networks."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.logger = logging.getLogger(self.__class__.__name__)
|
||||
self.rep_dim = None # representation dimensionality, i.e. dim of the code layer or last layer
|
||||
|
||||
def forward(self, *input):
|
||||
"""
|
||||
Forward pass logic
|
||||
:return: Network output
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def summary(self):
|
||||
"""Network summary."""
|
||||
net_parameters = filter(lambda p: p.requires_grad, self.parameters())
|
||||
params = sum([np.prod(p.size()) for p in net_parameters])
|
||||
self.logger.info('Trainable parameters: {}'.format(params))
|
||||
self.logger.info(self)
|
||||
34
Deep-SAD-PyTorch/src/base/base_trainer.py
Normal file
34
Deep-SAD-PyTorch/src/base/base_trainer.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from .base_dataset import BaseADDataset
|
||||
from .base_net import BaseNet
|
||||
|
||||
|
||||
class BaseTrainer(ABC):
|
||||
"""Trainer base class."""
|
||||
|
||||
def __init__(self, optimizer_name: str, lr: float, n_epochs: int, lr_milestones: tuple, batch_size: int,
|
||||
weight_decay: float, device: str, n_jobs_dataloader: int):
|
||||
super().__init__()
|
||||
self.optimizer_name = optimizer_name
|
||||
self.lr = lr
|
||||
self.n_epochs = n_epochs
|
||||
self.lr_milestones = lr_milestones
|
||||
self.batch_size = batch_size
|
||||
self.weight_decay = weight_decay
|
||||
self.device = device
|
||||
self.n_jobs_dataloader = n_jobs_dataloader
|
||||
|
||||
@abstractmethod
|
||||
def train(self, dataset: BaseADDataset, net: BaseNet) -> BaseNet:
|
||||
"""
|
||||
Implement train method that trains the given network using the train_set of dataset.
|
||||
:return: Trained net
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def test(self, dataset: BaseADDataset, net: BaseNet):
|
||||
"""
|
||||
Implement test method that evaluates the test_set of dataset on the given network.
|
||||
"""
|
||||
pass
|
||||
110
Deep-SAD-PyTorch/src/base/odds_dataset.py
Normal file
110
Deep-SAD-PyTorch/src/base/odds_dataset.py
Normal file
@@ -0,0 +1,110 @@
|
||||
from pathlib import Path
|
||||
from torch.utils.data import Dataset
|
||||
from scipy.io import loadmat
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
||||
from torchvision.datasets.utils import download_url
|
||||
|
||||
import os
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
|
||||
class ODDSDataset(Dataset):
|
||||
"""
|
||||
ODDSDataset class for datasets from Outlier Detection DataSets (ODDS): http://odds.cs.stonybrook.edu/
|
||||
|
||||
Dataset class with additional targets for the semi-supervised setting and modification of __getitem__ method
|
||||
to also return the semi-supervised target as well as the index of a data sample.
|
||||
"""
|
||||
|
||||
urls = {
|
||||
'arrhythmia': 'https://www.dropbox.com/s/lmlwuspn1sey48r/arrhythmia.mat?dl=1',
|
||||
'cardio': 'https://www.dropbox.com/s/galg3ihvxklf0qi/cardio.mat?dl=1',
|
||||
'satellite': 'https://www.dropbox.com/s/dpzxp8jyr9h93k5/satellite.mat?dl=1',
|
||||
'satimage-2': 'https://www.dropbox.com/s/hckgvu9m6fs441p/satimage-2.mat?dl=1',
|
||||
'shuttle': 'https://www.dropbox.com/s/mk8ozgisimfn3dw/shuttle.mat?dl=1',
|
||||
'thyroid': 'https://www.dropbox.com/s/bih0e15a0fukftb/thyroid.mat?dl=1'
|
||||
}
|
||||
|
||||
def __init__(self, root: str, dataset_name: str, train=True, random_state=None, download=False):
|
||||
super(Dataset, self).__init__()
|
||||
|
||||
self.classes = [0, 1]
|
||||
|
||||
if isinstance(root, torch._six.string_classes):
|
||||
root = os.path.expanduser(root)
|
||||
self.root = Path(root)
|
||||
self.dataset_name = dataset_name
|
||||
self.train = train # training set or test set
|
||||
self.file_name = self.dataset_name + '.mat'
|
||||
self.data_file = self.root / self.file_name
|
||||
|
||||
if download:
|
||||
self.download()
|
||||
|
||||
mat = loadmat(self.data_file)
|
||||
X = mat['X']
|
||||
y = mat['y'].ravel()
|
||||
idx_norm = y == 0
|
||||
idx_out = y == 1
|
||||
|
||||
# 60% data for training and 40% for testing; keep outlier ratio
|
||||
X_train_norm, X_test_norm, y_train_norm, y_test_norm = train_test_split(X[idx_norm], y[idx_norm],
|
||||
test_size=0.4,
|
||||
random_state=random_state)
|
||||
X_train_out, X_test_out, y_train_out, y_test_out = train_test_split(X[idx_out], y[idx_out],
|
||||
test_size=0.4,
|
||||
random_state=random_state)
|
||||
X_train = np.concatenate((X_train_norm, X_train_out))
|
||||
X_test = np.concatenate((X_test_norm, X_test_out))
|
||||
y_train = np.concatenate((y_train_norm, y_train_out))
|
||||
y_test = np.concatenate((y_test_norm, y_test_out))
|
||||
|
||||
# Standardize data (per feature Z-normalization, i.e. zero-mean and unit variance)
|
||||
scaler = StandardScaler().fit(X_train)
|
||||
X_train_stand = scaler.transform(X_train)
|
||||
X_test_stand = scaler.transform(X_test)
|
||||
|
||||
# Scale to range [0,1]
|
||||
minmax_scaler = MinMaxScaler().fit(X_train_stand)
|
||||
X_train_scaled = minmax_scaler.transform(X_train_stand)
|
||||
X_test_scaled = minmax_scaler.transform(X_test_stand)
|
||||
|
||||
if self.train:
|
||||
self.data = torch.tensor(X_train_scaled, dtype=torch.float32)
|
||||
self.targets = torch.tensor(y_train, dtype=torch.int64)
|
||||
else:
|
||||
self.data = torch.tensor(X_test_scaled, dtype=torch.float32)
|
||||
self.targets = torch.tensor(y_test, dtype=torch.int64)
|
||||
|
||||
self.semi_targets = torch.zeros_like(self.targets)
|
||||
|
||||
def __getitem__(self, index):
|
||||
"""
|
||||
Args:
|
||||
index (int): Index
|
||||
|
||||
Returns:
|
||||
tuple: (sample, target, semi_target, index)
|
||||
"""
|
||||
sample, target, semi_target = self.data[index], int(self.targets[index]), int(self.semi_targets[index])
|
||||
|
||||
return sample, target, semi_target, index
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data)
|
||||
|
||||
def _check_exists(self):
|
||||
return os.path.exists(self.data_file)
|
||||
|
||||
def download(self):
|
||||
"""Download the ODDS dataset if it doesn't exist in root already."""
|
||||
|
||||
if self._check_exists():
|
||||
return
|
||||
|
||||
# download file
|
||||
download_url(self.urls[self.dataset_name], self.root, self.file_name)
|
||||
|
||||
print('Done!')
|
||||
17
Deep-SAD-PyTorch/src/base/torchvision_dataset.py
Normal file
17
Deep-SAD-PyTorch/src/base/torchvision_dataset.py
Normal file
@@ -0,0 +1,17 @@
|
||||
from .base_dataset import BaseADDataset
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
|
||||
class TorchvisionDataset(BaseADDataset):
|
||||
"""TorchvisionDataset class for datasets already implemented in torchvision.datasets."""
|
||||
|
||||
def __init__(self, root: str):
|
||||
super().__init__(root)
|
||||
|
||||
def loaders(self, batch_size: int, shuffle_train=True, shuffle_test=False, num_workers: int = 0) -> (
|
||||
DataLoader, DataLoader):
|
||||
train_loader = DataLoader(dataset=self.train_set, batch_size=batch_size, shuffle=shuffle_train,
|
||||
num_workers=num_workers, drop_last=True)
|
||||
test_loader = DataLoader(dataset=self.test_set, batch_size=batch_size, shuffle=shuffle_test,
|
||||
num_workers=num_workers, drop_last=False)
|
||||
return train_loader, test_loader
|
||||
Reference in New Issue
Block a user