# -*- coding: utf-8 -*-
# LIBTwinSVM: A Library for Twin Support Vector Machines
# Developers: Mir, A. and Mahdi Rahbar
# License: GNU General Public License v3.0
"""
In this module, multi-class schemes such as One-vs-One and One-vs-All are
implemented.
"""
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.multiclass import check_classification_targets
from sklearn.utils.validation import check_X_y, check_is_fitted, check_array
from sklearn.utils import column_or_1d
from sklearn.base import clone
import numpy as np
[docs]class OneVsOneClassifier(BaseEstimator, ClassifierMixin):
"""
Multi-class classification using One-vs-One scheme
The :class:`OneVsOneClassifier` is scikit-learn compatible, which means
scikit-learn tools such as `cross_val_score <https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_val_score.html>`_
and `GridSearchCV <https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html>`_
can be used for an instance of :class:`OneVsOneClassifier`
Parameters
----------
estimator : estimator object
An estimator object implementing `fit` and `predict`.
Attributes
----------
clf_name : str
Name of the classifier.
bin_clf_ : list
Stores intances of each binary :class:`TSVM` classifier.
"""
def __init__(self, estimator):
self.estimator = estimator
self.clf_name = 'OVO-' + estimator.clf_name
def _validate_targets(self, y):
"""
Validates labels for training and testing classifier
"""
y_ = column_or_1d(y, warn=True)
check_classification_targets(y)
self.classes_, y = np.unique(y_, return_inverse=True)
if len(self.classes_) < 2:
raise ValueError(
"The number of classes has to be greater than one; got %d"
" class" % len(self.classes_))
return np.asarray(y, dtype=np.int)
def _validate_for_predict(self, X):
"""
Checks that the classifier is already trained and also test samples are
valid
"""
check_is_fitted(self, ['bin_clf_'])
X = check_array(X, dtype=np.float64)
n_samples, n_features = X.shape
if n_features != self.shape_fit_[1]:
raise ValueError("X.shape[1] = %d should be equal to %d,"
"the number of features of training samples" %
(n_features, self.shape_fit_[1]))
return X
[docs] def fit(self, X, y):
"""
It fits the OVO-classfier model according to the given training data.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training feature vectors, where n_samples is the number of samples
and n_features is the number of features.
y : array-like, shape(n_samples,)
Target values or class labels.
Returns
-------
self : object
"""
X, y = check_X_y(X, y, dtype=np.float64)
y = self._validate_targets(y)
# Allocate n(n-1)/2 binary classifiers
self.bin_clf_ = [clone(self.estimator) for i in range(((self.classes_.size * \
(self.classes_.size - 1)) // 2))]
p = 0
for i in range(self.classes_.size):
for j in range(i + 1, self.classes_.size):
# print("%d, %d" % (i, j))
# Break multi-class problem into a binary problem
sub_prob_X_i_j = X[(y == i) | (y == j)]
sub_prob_y_i_j = y[(y == i) | (y == j)]
# print(sub_prob_y_i_j)
# For binary classification, labels must be {-1, +1}
# i-th class -> +1 and j-th class -> -1
sub_prob_y_i_j[sub_prob_y_i_j == j] = -1
sub_prob_y_i_j[sub_prob_y_i_j == i] = 1
self.bin_clf_[p].fit(sub_prob_X_i_j, sub_prob_y_i_j)
p = p + 1
self.shape_fit_ = X.shape
return self
[docs] def predict(self, X):
"""
Performs classification on samples in X using the OVO-classifier model.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Feature vectors of test data.
Returns
-------
y_pred : array, shape (n_samples,)
Predicted class lables of test data.
"""
X = self._validate_for_predict(X)
# Initialze votes
votes = np.zeros((X.shape[0], self.classes_.size), dtype=np.int)
# iterate over test samples
for k in range(X.shape[0]):
p = 0
for i in range(self.classes_.size):
for j in range(i + 1, self.classes_.size):
y_pred = self.bin_clf_[p].predict(X[k, :].reshape(1,
X.shape[1]))
if y_pred == 1:
votes[k, i] = votes[k, i] + 1
else:
votes[k, j] = votes[k, j] + 1
p = p + 1
# Labels of test samples based max-win strategy
max_votes = np.argmax(votes, axis=1)
return self.classes_.take(np.asarray(max_votes, dtype=np.int))
[docs]class OneVsAllClassifier(BaseEstimator, ClassifierMixin):
"""
Multi-class classification using One-vs-One scheme
Parameters
----------
estimator : estimator object
An estimator object implementing `fit` and `predict`.
Attributes
----------
clf_name : str
Name of the classifier.
bin_clf_ : list
Stores intances of each binary :class:`TSVM` classifier.
"""
def __init__(self, estimator):
self.estimator = estimator
self.clf_name = 'OVA-' + estimator.clf_name
def _validate_targets(self, y):
"""
Validates labels for training and testing classifier
"""
y_ = column_or_1d(y, warn=True)
check_classification_targets(y)
self.classes_, y = np.unique(y_, return_inverse=True)
if len(self.classes_) < 2:
raise ValueError(
"The number of classes has to be greater than one; got %d"
" class" % len(self.classes_))
return np.asarray(y, dtype=np.int)
def _validate_for_predict(self, X):
"""
Checks that the classifier is already trained and also test samples are
valid
"""
check_is_fitted(self, ['bin_clf_'])
X = check_array(X, dtype=np.float64)
n_samples, n_features = X.shape
if n_features != self.shape_fit_[1]:
raise ValueError("X.shape[1] = %d should be equal to %d,"
"the number of features of training samples" %
(n_features, self.shape_fit_[1]))
return X
[docs] def fit(self, X, y):
"""
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training feature vectors, where n_samples is the number of samples
and n_features is the number of features.
y : array-like, shape(n_samples,)
Target values or class labels.
Returns
-------
self : object
"""
X, y = check_X_y(X, y, dtype=np.float64)
y = self._validate_targets(y)
# Allocate n binary classifiers
# Note that an estimator should be cloned for training a multi-class
# method
self.bin_clf_ = [clone(self.estimator) for i in range(self.classes_.size)]
for i in range(self.classes_.size):
# labels of samples of i-th class and other classes
mat_y_i = y[(y == i) | (y != i)]
# For binary classification, labels must be {-1, +1}
# i-th class -> +1 and other class -> -1
mat_y_i[y == i] = 1
mat_y_i[y != i] = -1
self.bin_clf_[i].fit(X, mat_y_i)
self.shape_fit_ = X.shape
return self
[docs] def predict(self, X):
"""
Performs classification on samples in X using the OVO-classifier model.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Feature vectors of test data.
Returns
-------
test_labels : array, shape (n_samples,)
Predicted class lables of test data.
"""
X = self._validate_for_predict(X)
pred = np.zeros((X.shape[0], self.classes_.size), dtype=np.float64)
for i in range(X.shape[0]):
for j in range(self.classes_.size):
pred[i, j] = self.bin_clf_[j].decision_function(X[i, :].reshape(1,
X.shape[1]))[0, 1]
# pred[i, j] = self.bin_clf_[j].predict(X[i, :].reshape(1, X.shape[1]))
test_lables = np.argmin(pred, axis=1)
return self.classes_.take(np.asarray(test_lables, dtype=np.int))
[docs]def mc_clf_no_params(bin_clfs):
"""
It calculates number of parameters for a multi-class model.
Parameters
----------
bin_clfs : list
Instances of binary TSVM-based estimators.
Returns
-------
int
Number of parameters of a multi-class model.
"""
return sum([clf.w1.shape[0] + clf.w2.shape[0] + 2 for clf in bin_clfs])