black formatted files before changes
This commit is contained in:
@@ -12,8 +12,16 @@ import numpy as np
|
||||
|
||||
class CIFAR10_Dataset(TorchvisionDataset):
|
||||
|
||||
def __init__(self, root: str, normal_class: int = 5, known_outlier_class: int = 3, n_known_outlier_classes: int = 0,
|
||||
ratio_known_normal: float = 0.0, ratio_known_outlier: float = 0.0, ratio_pollution: float = 0.0):
|
||||
def __init__(
|
||||
self,
|
||||
root: str,
|
||||
normal_class: int = 5,
|
||||
known_outlier_class: int = 3,
|
||||
n_known_outlier_classes: int = 0,
|
||||
ratio_known_normal: float = 0.0,
|
||||
ratio_known_outlier: float = 0.0,
|
||||
ratio_pollution: float = 0.0,
|
||||
):
|
||||
super().__init__(root)
|
||||
|
||||
# Define normal and outlier classes
|
||||
@@ -28,28 +36,48 @@ class CIFAR10_Dataset(TorchvisionDataset):
|
||||
elif n_known_outlier_classes == 1:
|
||||
self.known_outlier_classes = tuple([known_outlier_class])
|
||||
else:
|
||||
self.known_outlier_classes = tuple(random.sample(self.outlier_classes, n_known_outlier_classes))
|
||||
self.known_outlier_classes = tuple(
|
||||
random.sample(self.outlier_classes, n_known_outlier_classes)
|
||||
)
|
||||
|
||||
# CIFAR-10 preprocessing: feature scaling to [0, 1]
|
||||
transform = transforms.ToTensor()
|
||||
target_transform = transforms.Lambda(lambda x: int(x in self.outlier_classes))
|
||||
|
||||
# Get train set
|
||||
train_set = MyCIFAR10(root=self.root, train=True, transform=transform, target_transform=target_transform,
|
||||
download=True)
|
||||
train_set = MyCIFAR10(
|
||||
root=self.root,
|
||||
train=True,
|
||||
transform=transform,
|
||||
target_transform=target_transform,
|
||||
download=True,
|
||||
)
|
||||
|
||||
# Create semi-supervised setting
|
||||
idx, _, semi_targets = create_semisupervised_setting(np.array(train_set.targets), self.normal_classes,
|
||||
self.outlier_classes, self.known_outlier_classes,
|
||||
ratio_known_normal, ratio_known_outlier, ratio_pollution)
|
||||
train_set.semi_targets[idx] = torch.tensor(semi_targets) # set respective semi-supervised labels
|
||||
idx, _, semi_targets = create_semisupervised_setting(
|
||||
np.array(train_set.targets),
|
||||
self.normal_classes,
|
||||
self.outlier_classes,
|
||||
self.known_outlier_classes,
|
||||
ratio_known_normal,
|
||||
ratio_known_outlier,
|
||||
ratio_pollution,
|
||||
)
|
||||
train_set.semi_targets[idx] = torch.tensor(
|
||||
semi_targets
|
||||
) # set respective semi-supervised labels
|
||||
|
||||
# Subset train_set to semi-supervised setup
|
||||
self.train_set = Subset(train_set, idx)
|
||||
|
||||
# Get test set
|
||||
self.test_set = MyCIFAR10(root=self.root, train=False, transform=transform, target_transform=target_transform,
|
||||
download=True)
|
||||
self.test_set = MyCIFAR10(
|
||||
root=self.root,
|
||||
train=False,
|
||||
transform=transform,
|
||||
target_transform=target_transform,
|
||||
download=True,
|
||||
)
|
||||
|
||||
|
||||
class MyCIFAR10(CIFAR10):
|
||||
@@ -71,7 +99,11 @@ class MyCIFAR10(CIFAR10):
|
||||
Returns:
|
||||
tuple: (image, target, semi_target, index)
|
||||
"""
|
||||
img, target, semi_target = self.data[index], self.targets[index], int(self.semi_targets[index])
|
||||
img, target, semi_target = (
|
||||
self.data[index],
|
||||
self.targets[index],
|
||||
int(self.semi_targets[index]),
|
||||
)
|
||||
|
||||
# doing this so that it is consistent with all other datasets
|
||||
# to return a PIL Image
|
||||
|
||||
@@ -11,8 +11,16 @@ import random
|
||||
|
||||
class FashionMNIST_Dataset(TorchvisionDataset):
|
||||
|
||||
def __init__(self, root: str, normal_class: int = 0, known_outlier_class: int = 1, n_known_outlier_classes: int = 0,
|
||||
ratio_known_normal: float = 0.0, ratio_known_outlier: float = 0.0, ratio_pollution: float = 0.0):
|
||||
def __init__(
|
||||
self,
|
||||
root: str,
|
||||
normal_class: int = 0,
|
||||
known_outlier_class: int = 1,
|
||||
n_known_outlier_classes: int = 0,
|
||||
ratio_known_normal: float = 0.0,
|
||||
ratio_known_outlier: float = 0.0,
|
||||
ratio_pollution: float = 0.0,
|
||||
):
|
||||
super().__init__(root)
|
||||
|
||||
# Define normal and outlier classes
|
||||
@@ -27,28 +35,48 @@ class FashionMNIST_Dataset(TorchvisionDataset):
|
||||
elif n_known_outlier_classes == 1:
|
||||
self.known_outlier_classes = tuple([known_outlier_class])
|
||||
else:
|
||||
self.known_outlier_classes = tuple(random.sample(self.outlier_classes, n_known_outlier_classes))
|
||||
self.known_outlier_classes = tuple(
|
||||
random.sample(self.outlier_classes, n_known_outlier_classes)
|
||||
)
|
||||
|
||||
# FashionMNIST preprocessing: feature scaling to [0, 1]
|
||||
transform = transforms.ToTensor()
|
||||
target_transform = transforms.Lambda(lambda x: int(x in self.outlier_classes))
|
||||
|
||||
# Get train set
|
||||
train_set = MyFashionMNIST(root=self.root, train=True, transform=transform, target_transform=target_transform,
|
||||
download=True)
|
||||
train_set = MyFashionMNIST(
|
||||
root=self.root,
|
||||
train=True,
|
||||
transform=transform,
|
||||
target_transform=target_transform,
|
||||
download=True,
|
||||
)
|
||||
|
||||
# Create semi-supervised setting
|
||||
idx, _, semi_targets = create_semisupervised_setting(train_set.targets.cpu().data.numpy(), self.normal_classes,
|
||||
self.outlier_classes, self.known_outlier_classes,
|
||||
ratio_known_normal, ratio_known_outlier, ratio_pollution)
|
||||
train_set.semi_targets[idx] = torch.tensor(semi_targets) # set respective semi-supervised labels
|
||||
idx, _, semi_targets = create_semisupervised_setting(
|
||||
train_set.targets.cpu().data.numpy(),
|
||||
self.normal_classes,
|
||||
self.outlier_classes,
|
||||
self.known_outlier_classes,
|
||||
ratio_known_normal,
|
||||
ratio_known_outlier,
|
||||
ratio_pollution,
|
||||
)
|
||||
train_set.semi_targets[idx] = torch.tensor(
|
||||
semi_targets
|
||||
) # set respective semi-supervised labels
|
||||
|
||||
# Subset train_set to semi-supervised setup
|
||||
self.train_set = Subset(train_set, idx)
|
||||
|
||||
# Get test set
|
||||
self.test_set = MyFashionMNIST(root=self.root, train=False, transform=transform,
|
||||
target_transform=target_transform, download=True)
|
||||
self.test_set = MyFashionMNIST(
|
||||
root=self.root,
|
||||
train=False,
|
||||
transform=transform,
|
||||
target_transform=target_transform,
|
||||
download=True,
|
||||
)
|
||||
|
||||
|
||||
class MyFashionMNIST(FashionMNIST):
|
||||
@@ -70,11 +98,15 @@ class MyFashionMNIST(FashionMNIST):
|
||||
Returns:
|
||||
tuple: (image, target, semi_target, index)
|
||||
"""
|
||||
img, target, semi_target = self.data[index], int(self.targets[index]), int(self.semi_targets[index])
|
||||
img, target, semi_target = (
|
||||
self.data[index],
|
||||
int(self.targets[index]),
|
||||
int(self.semi_targets[index]),
|
||||
)
|
||||
|
||||
# doing this so that it is consistent with all other datasets
|
||||
# to return a PIL Image
|
||||
img = Image.fromarray(img.numpy(), mode='L')
|
||||
img = Image.fromarray(img.numpy(), mode="L")
|
||||
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
|
||||
@@ -4,51 +4,83 @@ from .cifar10 import CIFAR10_Dataset
|
||||
from .odds import ODDSADDataset
|
||||
|
||||
|
||||
def load_dataset(dataset_name, data_path, normal_class, known_outlier_class, n_known_outlier_classes: int = 0,
|
||||
ratio_known_normal: float = 0.0, ratio_known_outlier: float = 0.0, ratio_pollution: float = 0.0,
|
||||
random_state=None):
|
||||
def load_dataset(
|
||||
dataset_name,
|
||||
data_path,
|
||||
normal_class,
|
||||
known_outlier_class,
|
||||
n_known_outlier_classes: int = 0,
|
||||
ratio_known_normal: float = 0.0,
|
||||
ratio_known_outlier: float = 0.0,
|
||||
ratio_pollution: float = 0.0,
|
||||
random_state=None,
|
||||
):
|
||||
"""Loads the dataset."""
|
||||
|
||||
implemented_datasets = ('mnist', 'fmnist', 'cifar10',
|
||||
'arrhythmia', 'cardio', 'satellite', 'satimage-2', 'shuttle', 'thyroid')
|
||||
implemented_datasets = (
|
||||
"mnist",
|
||||
"fmnist",
|
||||
"cifar10",
|
||||
"arrhythmia",
|
||||
"cardio",
|
||||
"satellite",
|
||||
"satimage-2",
|
||||
"shuttle",
|
||||
"thyroid",
|
||||
)
|
||||
assert dataset_name in implemented_datasets
|
||||
|
||||
dataset = None
|
||||
|
||||
if dataset_name == 'mnist':
|
||||
dataset = MNIST_Dataset(root=data_path,
|
||||
normal_class=normal_class,
|
||||
known_outlier_class=known_outlier_class,
|
||||
n_known_outlier_classes=n_known_outlier_classes,
|
||||
ratio_known_normal=ratio_known_normal,
|
||||
ratio_known_outlier=ratio_known_outlier,
|
||||
ratio_pollution=ratio_pollution)
|
||||
if dataset_name == "mnist":
|
||||
dataset = MNIST_Dataset(
|
||||
root=data_path,
|
||||
normal_class=normal_class,
|
||||
known_outlier_class=known_outlier_class,
|
||||
n_known_outlier_classes=n_known_outlier_classes,
|
||||
ratio_known_normal=ratio_known_normal,
|
||||
ratio_known_outlier=ratio_known_outlier,
|
||||
ratio_pollution=ratio_pollution,
|
||||
)
|
||||
|
||||
if dataset_name == 'fmnist':
|
||||
dataset = FashionMNIST_Dataset(root=data_path,
|
||||
normal_class=normal_class,
|
||||
known_outlier_class=known_outlier_class,
|
||||
n_known_outlier_classes=n_known_outlier_classes,
|
||||
ratio_known_normal=ratio_known_normal,
|
||||
ratio_known_outlier=ratio_known_outlier,
|
||||
ratio_pollution=ratio_pollution)
|
||||
if dataset_name == "fmnist":
|
||||
dataset = FashionMNIST_Dataset(
|
||||
root=data_path,
|
||||
normal_class=normal_class,
|
||||
known_outlier_class=known_outlier_class,
|
||||
n_known_outlier_classes=n_known_outlier_classes,
|
||||
ratio_known_normal=ratio_known_normal,
|
||||
ratio_known_outlier=ratio_known_outlier,
|
||||
ratio_pollution=ratio_pollution,
|
||||
)
|
||||
|
||||
if dataset_name == 'cifar10':
|
||||
dataset = CIFAR10_Dataset(root=data_path,
|
||||
normal_class=normal_class,
|
||||
known_outlier_class=known_outlier_class,
|
||||
n_known_outlier_classes=n_known_outlier_classes,
|
||||
ratio_known_normal=ratio_known_normal,
|
||||
ratio_known_outlier=ratio_known_outlier,
|
||||
ratio_pollution=ratio_pollution)
|
||||
if dataset_name == "cifar10":
|
||||
dataset = CIFAR10_Dataset(
|
||||
root=data_path,
|
||||
normal_class=normal_class,
|
||||
known_outlier_class=known_outlier_class,
|
||||
n_known_outlier_classes=n_known_outlier_classes,
|
||||
ratio_known_normal=ratio_known_normal,
|
||||
ratio_known_outlier=ratio_known_outlier,
|
||||
ratio_pollution=ratio_pollution,
|
||||
)
|
||||
|
||||
if dataset_name in ('arrhythmia', 'cardio', 'satellite', 'satimage-2', 'shuttle', 'thyroid'):
|
||||
dataset = ODDSADDataset(root=data_path,
|
||||
dataset_name=dataset_name,
|
||||
n_known_outlier_classes=n_known_outlier_classes,
|
||||
ratio_known_normal=ratio_known_normal,
|
||||
ratio_known_outlier=ratio_known_outlier,
|
||||
ratio_pollution=ratio_pollution,
|
||||
random_state=random_state)
|
||||
if dataset_name in (
|
||||
"arrhythmia",
|
||||
"cardio",
|
||||
"satellite",
|
||||
"satimage-2",
|
||||
"shuttle",
|
||||
"thyroid",
|
||||
):
|
||||
dataset = ODDSADDataset(
|
||||
root=data_path,
|
||||
dataset_name=dataset_name,
|
||||
n_known_outlier_classes=n_known_outlier_classes,
|
||||
ratio_known_normal=ratio_known_normal,
|
||||
ratio_known_outlier=ratio_known_outlier,
|
||||
ratio_pollution=ratio_pollution,
|
||||
random_state=random_state,
|
||||
)
|
||||
|
||||
return dataset
|
||||
|
||||
@@ -11,8 +11,16 @@ import random
|
||||
|
||||
class MNIST_Dataset(TorchvisionDataset):
|
||||
|
||||
def __init__(self, root: str, normal_class: int = 0, known_outlier_class: int = 1, n_known_outlier_classes: int = 0,
|
||||
ratio_known_normal: float = 0.0, ratio_known_outlier: float = 0.0, ratio_pollution: float = 0.0):
|
||||
def __init__(
|
||||
self,
|
||||
root: str,
|
||||
normal_class: int = 0,
|
||||
known_outlier_class: int = 1,
|
||||
n_known_outlier_classes: int = 0,
|
||||
ratio_known_normal: float = 0.0,
|
||||
ratio_known_outlier: float = 0.0,
|
||||
ratio_pollution: float = 0.0,
|
||||
):
|
||||
super().__init__(root)
|
||||
|
||||
# Define normal and outlier classes
|
||||
@@ -27,28 +35,48 @@ class MNIST_Dataset(TorchvisionDataset):
|
||||
elif n_known_outlier_classes == 1:
|
||||
self.known_outlier_classes = tuple([known_outlier_class])
|
||||
else:
|
||||
self.known_outlier_classes = tuple(random.sample(self.outlier_classes, n_known_outlier_classes))
|
||||
self.known_outlier_classes = tuple(
|
||||
random.sample(self.outlier_classes, n_known_outlier_classes)
|
||||
)
|
||||
|
||||
# MNIST preprocessing: feature scaling to [0, 1]
|
||||
transform = transforms.ToTensor()
|
||||
target_transform = transforms.Lambda(lambda x: int(x in self.outlier_classes))
|
||||
|
||||
# Get train set
|
||||
train_set = MyMNIST(root=self.root, train=True, transform=transform, target_transform=target_transform,
|
||||
download=True)
|
||||
train_set = MyMNIST(
|
||||
root=self.root,
|
||||
train=True,
|
||||
transform=transform,
|
||||
target_transform=target_transform,
|
||||
download=True,
|
||||
)
|
||||
|
||||
# Create semi-supervised setting
|
||||
idx, _, semi_targets = create_semisupervised_setting(train_set.targets.cpu().data.numpy(), self.normal_classes,
|
||||
self.outlier_classes, self.known_outlier_classes,
|
||||
ratio_known_normal, ratio_known_outlier, ratio_pollution)
|
||||
train_set.semi_targets[idx] = torch.tensor(semi_targets) # set respective semi-supervised labels
|
||||
idx, _, semi_targets = create_semisupervised_setting(
|
||||
train_set.targets.cpu().data.numpy(),
|
||||
self.normal_classes,
|
||||
self.outlier_classes,
|
||||
self.known_outlier_classes,
|
||||
ratio_known_normal,
|
||||
ratio_known_outlier,
|
||||
ratio_pollution,
|
||||
)
|
||||
train_set.semi_targets[idx] = torch.tensor(
|
||||
semi_targets
|
||||
) # set respective semi-supervised labels
|
||||
|
||||
# Subset train_set to semi-supervised setup
|
||||
self.train_set = Subset(train_set, idx)
|
||||
|
||||
# Get test set
|
||||
self.test_set = MyMNIST(root=self.root, train=False, transform=transform, target_transform=target_transform,
|
||||
download=True)
|
||||
self.test_set = MyMNIST(
|
||||
root=self.root,
|
||||
train=False,
|
||||
transform=transform,
|
||||
target_transform=target_transform,
|
||||
download=True,
|
||||
)
|
||||
|
||||
|
||||
class MyMNIST(MNIST):
|
||||
@@ -70,11 +98,15 @@ class MyMNIST(MNIST):
|
||||
Returns:
|
||||
tuple: (image, target, semi_target, index)
|
||||
"""
|
||||
img, target, semi_target = self.data[index], int(self.targets[index]), int(self.semi_targets[index])
|
||||
img, target, semi_target = (
|
||||
self.data[index],
|
||||
int(self.targets[index]),
|
||||
int(self.semi_targets[index]),
|
||||
)
|
||||
|
||||
# doing this so that it is consistent with all other datasets
|
||||
# to return a PIL Image
|
||||
img = Image.fromarray(img.numpy(), mode='L')
|
||||
img = Image.fromarray(img.numpy(), mode="L")
|
||||
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
|
||||
@@ -8,8 +8,16 @@ import torch
|
||||
|
||||
class ODDSADDataset(BaseADDataset):
|
||||
|
||||
def __init__(self, root: str, dataset_name: str, n_known_outlier_classes: int = 0, ratio_known_normal: float = 0.0,
|
||||
ratio_known_outlier: float = 0.0, ratio_pollution: float = 0.0, random_state=None):
|
||||
def __init__(
|
||||
self,
|
||||
root: str,
|
||||
dataset_name: str,
|
||||
n_known_outlier_classes: int = 0,
|
||||
ratio_known_normal: float = 0.0,
|
||||
ratio_known_outlier: float = 0.0,
|
||||
ratio_pollution: float = 0.0,
|
||||
random_state=None,
|
||||
):
|
||||
super().__init__(root)
|
||||
|
||||
# Define normal and outlier classes
|
||||
@@ -23,25 +31,58 @@ class ODDSADDataset(BaseADDataset):
|
||||
self.known_outlier_classes = (1,)
|
||||
|
||||
# Get train set
|
||||
train_set = ODDSDataset(root=self.root, dataset_name=dataset_name, train=True, random_state=random_state,
|
||||
download=True)
|
||||
train_set = ODDSDataset(
|
||||
root=self.root,
|
||||
dataset_name=dataset_name,
|
||||
train=True,
|
||||
random_state=random_state,
|
||||
download=True,
|
||||
)
|
||||
|
||||
# Create semi-supervised setting
|
||||
idx, _, semi_targets = create_semisupervised_setting(train_set.targets.cpu().data.numpy(), self.normal_classes,
|
||||
self.outlier_classes, self.known_outlier_classes,
|
||||
ratio_known_normal, ratio_known_outlier, ratio_pollution)
|
||||
train_set.semi_targets[idx] = torch.tensor(semi_targets) # set respective semi-supervised labels
|
||||
idx, _, semi_targets = create_semisupervised_setting(
|
||||
train_set.targets.cpu().data.numpy(),
|
||||
self.normal_classes,
|
||||
self.outlier_classes,
|
||||
self.known_outlier_classes,
|
||||
ratio_known_normal,
|
||||
ratio_known_outlier,
|
||||
ratio_pollution,
|
||||
)
|
||||
train_set.semi_targets[idx] = torch.tensor(
|
||||
semi_targets
|
||||
) # set respective semi-supervised labels
|
||||
|
||||
# Subset train_set to semi-supervised setup
|
||||
self.train_set = Subset(train_set, idx)
|
||||
|
||||
# Get test set
|
||||
self.test_set = ODDSDataset(root=self.root, dataset_name=dataset_name, train=False, random_state=random_state)
|
||||
self.test_set = ODDSDataset(
|
||||
root=self.root,
|
||||
dataset_name=dataset_name,
|
||||
train=False,
|
||||
random_state=random_state,
|
||||
)
|
||||
|
||||
def loaders(self, batch_size: int, shuffle_train=True, shuffle_test=False, num_workers: int = 0) -> (
|
||||
DataLoader, DataLoader):
|
||||
train_loader = DataLoader(dataset=self.train_set, batch_size=batch_size, shuffle=shuffle_train,
|
||||
num_workers=num_workers, drop_last=True)
|
||||
test_loader = DataLoader(dataset=self.test_set, batch_size=batch_size, shuffle=shuffle_test,
|
||||
num_workers=num_workers, drop_last=False)
|
||||
def loaders(
|
||||
self,
|
||||
batch_size: int,
|
||||
shuffle_train=True,
|
||||
shuffle_test=False,
|
||||
num_workers: int = 0,
|
||||
) -> (DataLoader, DataLoader):
|
||||
train_loader = DataLoader(
|
||||
dataset=self.train_set,
|
||||
batch_size=batch_size,
|
||||
shuffle=shuffle_train,
|
||||
num_workers=num_workers,
|
||||
drop_last=True,
|
||||
)
|
||||
test_loader = DataLoader(
|
||||
dataset=self.test_set,
|
||||
batch_size=batch_size,
|
||||
shuffle=shuffle_test,
|
||||
num_workers=num_workers,
|
||||
drop_last=False,
|
||||
)
|
||||
return train_loader, test_loader
|
||||
|
||||
@@ -2,10 +2,17 @@ import torch
|
||||
import numpy as np
|
||||
|
||||
|
||||
def create_semisupervised_setting(labels, normal_classes, outlier_classes, known_outlier_classes,
|
||||
ratio_known_normal, ratio_known_outlier, ratio_pollution):
|
||||
def create_semisupervised_setting(
|
||||
labels,
|
||||
normal_classes,
|
||||
outlier_classes,
|
||||
known_outlier_classes,
|
||||
ratio_known_normal,
|
||||
ratio_known_outlier,
|
||||
ratio_pollution,
|
||||
):
|
||||
"""
|
||||
Create a semi-supervised data setting.
|
||||
Create a semi-supervised data setting.
|
||||
:param labels: np.array with labels of all dataset samples
|
||||
:param normal_classes: tuple with normal class labels
|
||||
:param outlier_classes: tuple with anomaly class labels
|
||||
@@ -17,15 +24,31 @@ def create_semisupervised_setting(labels, normal_classes, outlier_classes, known
|
||||
"""
|
||||
idx_normal = np.argwhere(np.isin(labels, normal_classes)).flatten()
|
||||
idx_outlier = np.argwhere(np.isin(labels, outlier_classes)).flatten()
|
||||
idx_known_outlier_candidates = np.argwhere(np.isin(labels, known_outlier_classes)).flatten()
|
||||
idx_known_outlier_candidates = np.argwhere(
|
||||
np.isin(labels, known_outlier_classes)
|
||||
).flatten()
|
||||
|
||||
n_normal = len(idx_normal)
|
||||
|
||||
# Solve system of linear equations to obtain respective number of samples
|
||||
a = np.array([[1, 1, 0, 0],
|
||||
[(1-ratio_known_normal), -ratio_known_normal, -ratio_known_normal, -ratio_known_normal],
|
||||
[-ratio_known_outlier, -ratio_known_outlier, -ratio_known_outlier, (1-ratio_known_outlier)],
|
||||
[0, -ratio_pollution, (1-ratio_pollution), 0]])
|
||||
a = np.array(
|
||||
[
|
||||
[1, 1, 0, 0],
|
||||
[
|
||||
(1 - ratio_known_normal),
|
||||
-ratio_known_normal,
|
||||
-ratio_known_normal,
|
||||
-ratio_known_normal,
|
||||
],
|
||||
[
|
||||
-ratio_known_outlier,
|
||||
-ratio_known_outlier,
|
||||
-ratio_known_outlier,
|
||||
(1 - ratio_known_outlier),
|
||||
],
|
||||
[0, -ratio_pollution, (1 - ratio_pollution), 0],
|
||||
]
|
||||
)
|
||||
b = np.array([n_normal, 0, 0, 0])
|
||||
x = np.linalg.solve(a, b)
|
||||
|
||||
@@ -41,9 +64,13 @@ def create_semisupervised_setting(labels, normal_classes, outlier_classes, known
|
||||
perm_known_outlier = np.random.permutation(len(idx_known_outlier_candidates))
|
||||
|
||||
idx_known_normal = idx_normal[perm_normal[:n_known_normal]].tolist()
|
||||
idx_unlabeled_normal = idx_normal[perm_normal[n_known_normal:n_known_normal+n_unlabeled_normal]].tolist()
|
||||
idx_unlabeled_normal = idx_normal[
|
||||
perm_normal[n_known_normal : n_known_normal + n_unlabeled_normal]
|
||||
].tolist()
|
||||
idx_unlabeled_outlier = idx_outlier[perm_outlier[:n_unlabeled_outlier]].tolist()
|
||||
idx_known_outlier = idx_known_outlier_candidates[perm_known_outlier[:n_known_outlier]].tolist()
|
||||
idx_known_outlier = idx_known_outlier_candidates[
|
||||
perm_known_outlier[:n_known_outlier]
|
||||
].tolist()
|
||||
|
||||
# Get original class labels
|
||||
labels_known_normal = labels[idx_known_normal].tolist()
|
||||
@@ -53,14 +80,32 @@ def create_semisupervised_setting(labels, normal_classes, outlier_classes, known
|
||||
|
||||
# Get semi-supervised setting labels
|
||||
semi_labels_known_normal = np.ones(n_known_normal).astype(np.int32).tolist()
|
||||
semi_labels_unlabeled_normal = np.zeros(n_unlabeled_normal).astype(np.int32).tolist()
|
||||
semi_labels_unlabeled_outlier = np.zeros(n_unlabeled_outlier).astype(np.int32).tolist()
|
||||
semi_labels_unlabeled_normal = (
|
||||
np.zeros(n_unlabeled_normal).astype(np.int32).tolist()
|
||||
)
|
||||
semi_labels_unlabeled_outlier = (
|
||||
np.zeros(n_unlabeled_outlier).astype(np.int32).tolist()
|
||||
)
|
||||
semi_labels_known_outlier = (-np.ones(n_known_outlier).astype(np.int32)).tolist()
|
||||
|
||||
# Create final lists
|
||||
list_idx = idx_known_normal + idx_unlabeled_normal + idx_unlabeled_outlier + idx_known_outlier
|
||||
list_labels = labels_known_normal + labels_unlabeled_normal + labels_unlabeled_outlier + labels_known_outlier
|
||||
list_semi_labels = (semi_labels_known_normal + semi_labels_unlabeled_normal + semi_labels_unlabeled_outlier
|
||||
+ semi_labels_known_outlier)
|
||||
list_idx = (
|
||||
idx_known_normal
|
||||
+ idx_unlabeled_normal
|
||||
+ idx_unlabeled_outlier
|
||||
+ idx_known_outlier
|
||||
)
|
||||
list_labels = (
|
||||
labels_known_normal
|
||||
+ labels_unlabeled_normal
|
||||
+ labels_unlabeled_outlier
|
||||
+ labels_known_outlier
|
||||
)
|
||||
list_semi_labels = (
|
||||
semi_labels_known_normal
|
||||
+ semi_labels_unlabeled_normal
|
||||
+ semi_labels_unlabeled_outlier
|
||||
+ semi_labels_known_outlier
|
||||
)
|
||||
|
||||
return list_idx, list_labels, list_semi_labels
|
||||
|
||||
Reference in New Issue
Block a user