Source code for mc_scheme

# -*- coding: utf-8 -*-

# LIBTwinSVM: A Library for Twin Support Vector Machines
# Developers: Mir, A. and Mahdi Rahbar
# License: GNU General Public License v3.0

"""
In this module, multi-class schemes such as One-vs-One and One-vs-All are
implemented.
"""

from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.multiclass import check_classification_targets
from sklearn.utils.validation import check_X_y, check_is_fitted, check_array
from sklearn.utils import column_or_1d
from sklearn.base import clone
import numpy as np


[docs]class OneVsOneClassifier(BaseEstimator, ClassifierMixin): """ Multi-class classification using One-vs-One scheme The :class:`OneVsOneClassifier` is scikit-learn compatible, which means scikit-learn tools such as `cross_val_score <https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_val_score.html>`_ and `GridSearchCV <https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html>`_ can be used for an instance of :class:`OneVsOneClassifier` Parameters ---------- estimator : estimator object An estimator object implementing `fit` and `predict`. Attributes ---------- clf_name : str Name of the classifier. bin_clf_ : list Stores intances of each binary :class:`TSVM` classifier. """ def __init__(self, estimator): self.estimator = estimator self.clf_name = 'OVO-' + estimator.clf_name def _validate_targets(self, y): """ Validates labels for training and testing classifier """ y_ = column_or_1d(y, warn=True) check_classification_targets(y) self.classes_, y = np.unique(y_, return_inverse=True) if len(self.classes_) < 2: raise ValueError( "The number of classes has to be greater than one; got %d" " class" % len(self.classes_)) return np.asarray(y, dtype=np.int) def _validate_for_predict(self, X): """ Checks that the classifier is already trained and also test samples are valid """ check_is_fitted(self, ['bin_clf_']) X = check_array(X, dtype=np.float64) n_samples, n_features = X.shape if n_features != self.shape_fit_[1]: raise ValueError("X.shape[1] = %d should be equal to %d," "the number of features of training samples" % (n_features, self.shape_fit_[1])) return X
[docs] def fit(self, X, y): """ It fits the OVO-classfier model according to the given training data. Parameters ---------- X : array-like, shape (n_samples, n_features) Training feature vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape(n_samples,) Target values or class labels. Returns ------- self : object """ X, y = check_X_y(X, y, dtype=np.float64) y = self._validate_targets(y) # Allocate n(n-1)/2 binary classifiers self.bin_clf_ = [clone(self.estimator) for i in range(((self.classes_.size * \ (self.classes_.size - 1)) // 2))] p = 0 for i in range(self.classes_.size): for j in range(i + 1, self.classes_.size): # print("%d, %d" % (i, j)) # Break multi-class problem into a binary problem sub_prob_X_i_j = X[(y == i) | (y == j)] sub_prob_y_i_j = y[(y == i) | (y == j)] # print(sub_prob_y_i_j) # For binary classification, labels must be {-1, +1} # i-th class -> +1 and j-th class -> -1 sub_prob_y_i_j[sub_prob_y_i_j == j] = -1 sub_prob_y_i_j[sub_prob_y_i_j == i] = 1 self.bin_clf_[p].fit(sub_prob_X_i_j, sub_prob_y_i_j) p = p + 1 self.shape_fit_ = X.shape return self
[docs] def predict(self, X): """ Performs classification on samples in X using the OVO-classifier model. Parameters ---------- X : array-like, shape (n_samples, n_features) Feature vectors of test data. Returns ------- y_pred : array, shape (n_samples,) Predicted class lables of test data. """ X = self._validate_for_predict(X) # Initialze votes votes = np.zeros((X.shape[0], self.classes_.size), dtype=np.int) # iterate over test samples for k in range(X.shape[0]): p = 0 for i in range(self.classes_.size): for j in range(i + 1, self.classes_.size): y_pred = self.bin_clf_[p].predict(X[k, :].reshape(1, X.shape[1])) if y_pred == 1: votes[k, i] = votes[k, i] + 1 else: votes[k, j] = votes[k, j] + 1 p = p + 1 # Labels of test samples based max-win strategy max_votes = np.argmax(votes, axis=1) return self.classes_.take(np.asarray(max_votes, dtype=np.int))
[docs]class OneVsAllClassifier(BaseEstimator, ClassifierMixin): """ Multi-class classification using One-vs-One scheme Parameters ---------- estimator : estimator object An estimator object implementing `fit` and `predict`. Attributes ---------- clf_name : str Name of the classifier. bin_clf_ : list Stores intances of each binary :class:`TSVM` classifier. """ def __init__(self, estimator): self.estimator = estimator self.clf_name = 'OVA-' + estimator.clf_name def _validate_targets(self, y): """ Validates labels for training and testing classifier """ y_ = column_or_1d(y, warn=True) check_classification_targets(y) self.classes_, y = np.unique(y_, return_inverse=True) if len(self.classes_) < 2: raise ValueError( "The number of classes has to be greater than one; got %d" " class" % len(self.classes_)) return np.asarray(y, dtype=np.int) def _validate_for_predict(self, X): """ Checks that the classifier is already trained and also test samples are valid """ check_is_fitted(self, ['bin_clf_']) X = check_array(X, dtype=np.float64) n_samples, n_features = X.shape if n_features != self.shape_fit_[1]: raise ValueError("X.shape[1] = %d should be equal to %d," "the number of features of training samples" % (n_features, self.shape_fit_[1])) return X
[docs] def fit(self, X, y): """ Parameters ---------- X : array-like, shape (n_samples, n_features) Training feature vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape(n_samples,) Target values or class labels. Returns ------- self : object """ X, y = check_X_y(X, y, dtype=np.float64) y = self._validate_targets(y) # Allocate n binary classifiers # Note that an estimator should be cloned for training a multi-class # method self.bin_clf_ = [clone(self.estimator) for i in range(self.classes_.size)] for i in range(self.classes_.size): # labels of samples of i-th class and other classes mat_y_i = y[(y == i) | (y != i)] # For binary classification, labels must be {-1, +1} # i-th class -> +1 and other class -> -1 mat_y_i[y == i] = 1 mat_y_i[y != i] = -1 self.bin_clf_[i].fit(X, mat_y_i) self.shape_fit_ = X.shape return self
[docs] def predict(self, X): """ Performs classification on samples in X using the OVO-classifier model. Parameters ---------- X : array-like, shape (n_samples, n_features) Feature vectors of test data. Returns ------- test_labels : array, shape (n_samples,) Predicted class lables of test data. """ X = self._validate_for_predict(X) pred = np.zeros((X.shape[0], self.classes_.size), dtype=np.float64) for i in range(X.shape[0]): for j in range(self.classes_.size): pred[i, j] = self.bin_clf_[j].decision_function(X[i, :].reshape(1, X.shape[1]))[0, 1] # pred[i, j] = self.bin_clf_[j].predict(X[i, :].reshape(1, X.shape[1])) test_lables = np.argmin(pred, axis=1) return self.classes_.take(np.asarray(test_lables, dtype=np.int))
[docs]def mc_clf_no_params(bin_clfs): """ It calculates number of parameters for a multi-class model. Parameters ---------- bin_clfs : list Instances of binary TSVM-based estimators. Returns ------- int Number of parameters of a multi-class model. """ return sum([clf.w1.shape[0] + clf.w2.shape[0] + 2 for clf in bin_clfs])