# -*- coding: utf-8 -*-
# LIBTwinSVM: A Library for Twin Support Vector Machines
# Developers: Mir, A. and Mahdi Rahbar
# License: GNU General Public License v3.0
"""
In this module, Standard TwinSVM and Least Squares TwinSVM estimators are
defined.
"""
from sklearn.base import BaseEstimator
from sklearn.utils.validation import check_X_y
from libtsvm.optimizer import clipdcd
import numpy as np
[docs]class BaseTSVM(BaseEstimator):
"""
Base class for TSVM-based estimators
Parameters
----------
kernel : str
Type of the kernel function which is either 'linear' or 'RBF'.
rect_kernel : float
Percentage of training samples for Rectangular kernel.
C1 : float
Penalty parameter of first optimization problem.
C2 : float
Penalty parameter of second optimization problem.
gamma : float
Parameter of the RBF kernel function.
Attributes
----------
mat_C_t : array-like, shape = [n_samples, n_samples]
A matrix that contains kernel values.
cls_name : str
Name of the classifier.
w1 : array-like, shape=[n_features]
Weight vector of class +1's hyperplane.
b1 : float
Bias of class +1's hyperplane.
w2 : array-like, shape=[n_features]
Weight vector of class -1's hyperplane.
b2 : float
Bias of class -1's hyperplane.
"""
def __init__(self, kernel, rect_kernel, C1, C2, gamma):
self.C1 = C1
self.C2 = C2
self.gamma = gamma
self.kernel = kernel
self.rect_kernel = rect_kernel
self.mat_C_t = None
self.clf_name = None
# Two hyperplanes attributes
self.w1, self.b1, self.w2, self.b2 = None, None, None, None
self.check_clf_params()
[docs] def check_clf_params(self):
"""
Checks whether the estimator's input parameters are valid.
"""
if not(self.kernel in ['linear', 'RBF']):
raise ValueError("\"%s\" is an invalid kernel. \"linear\" and"
" \"RBF\" values are valid." % self.kernel)
[docs] def get_params_names(self):
"""
For retrieving the names of hyper-parameters of the TSVM-based
estimator.
Returns
-------
parameters : list of str, {['C1', 'C2'], ['C1', 'C2', 'gamma']}
Returns the names of the hyperparameters which are same as
the class' attributes.
"""
return ['C1', 'C2'] if self.kernel == 'linear' else ['C1', 'C2',
'gamma']
[docs] def fit(self, X, y):
"""
It fits a TSVM-based estimator.
THIS METHOD SHOULD BE IMPLEMENTED IN CHILD CLASS.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training feature vectors, where n_samples is the number of samples
and n_features is the number of features.
y : array-like, shape(n_samples,)
Target values or class labels.
"""
pass # Impelement fit method in child class
[docs] def predict(self, X):
"""
Performs classification on samples in X using the TSVM-based model.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Feature vectors of test data.
Returns
-------
array, shape (n_samples,)
Predicted class lables of test data.
"""
# Assign data points to class +1 or -1 based on distance from
# hyperplanes
return 2 * np.argmin(self.decision_function(X), axis=1) - 1
[docs] def decision_function(self, X):
"""
Computes distance of test samples from both non-parallel hyperplanes
Parameters
----------
X : array-like, shape (n_samples, n_features)
Returns
-------
array-like, shape(n_samples, 2)
distance from both hyperplanes.
"""
# dist = np.zeros((X.shape[0], 2), dtype=np.float64)
#
# kernel_f = {'linear': lambda i: X[i, :],
# 'RBF': lambda i: rbf_kernel(X[i, :], self.mat_C_t,
# self.gamma)}
#
# for i in range(X.shape[0]):
#
# # Prependicular distance of data pint i from hyperplanes
# dist[i, 1] = np.abs(np.dot(kernel_f[self.kernel](i), self.w1) \
# + self.b1)
#
# dist[i, 0] = np.abs(np.dot(kernel_f[self.kernel](i), self.w2) \
# + self.b2)
#
# return dist
kernel_f = {'linear': lambda: X, 'RBF': lambda: rbf_kernel(X,
self.mat_C_t, self.gamma)}
return np.column_stack((np.abs(np.dot(kernel_f[self.kernel](), self.w2)
+ self.b2),
np.abs(np.dot(kernel_f[self.kernel](),
self.w1) + self.b1)))
[docs]class TSVM(BaseTSVM):
"""
Standard Twin Support Vector Machine for binary classification.
It inherits attributes of :class:`BaseTSVM`.
Parameters
----------
kernel : str, optional (default='linear')
Type of the kernel function which is either 'linear' or 'RBF'.
rect_kernel : float, optional (default=1.0)
Percentage of training samples for Rectangular kernel.
C1 : float, optional (default=1.0)
Penalty parameter of first optimization problem.
C2 : float, optional (default=1.0)
Penalty parameter of second optimization problem.
gamma : float, optional (default=1.0)
Parameter of the RBF kernel function.
"""
def __init__(self, kernel='linear', rect_kernel=1, C1=2**0, C2=2**0,
gamma=2**0):
super(TSVM, self).__init__(kernel, rect_kernel, C1, C2, gamma)
self.clf_name = 'TSVM'
# @profile
[docs] def fit(self, X_train, y_train):
"""
It fits the binary TwinSVM model according to the given training data.
Parameters
----------
X_train : array-like, shape (n_samples, n_features)
Training feature vectors, where n_samples is the number of samples
and n_features is the number of features.
y_train : array-like, shape(n_samples,)
Target values or class labels.
"""
X_train = np.array(X_train, dtype=np.float64) if isinstance(X_train,
list) else X_train
y_train = np.array(y_train) if isinstance(y_train, list) else y_train
# Matrix A or class 1 samples
mat_A = X_train[y_train == 1]
# Matrix B or class -1 data
mat_B = X_train[y_train == -1]
# Vectors of ones
mat_e1 = np.ones((mat_A.shape[0], 1))
mat_e2 = np.ones((mat_B.shape[0], 1))
if self.kernel == 'linear': # Linear kernel
mat_H = np.column_stack((mat_A, mat_e1))
mat_G = np.column_stack((mat_B, mat_e2))
elif self.kernel == 'RBF': # Non-linear
# class 1 & class -1
mat_C = np.row_stack((mat_A, mat_B))
self.mat_C_t = np.transpose(mat_C)[:, :int(mat_C.shape[0] *
self.rect_kernel)]
mat_H = np.column_stack((rbf_kernel(mat_A, self.mat_C_t,
self.gamma), mat_e1))
mat_G = np.column_stack((rbf_kernel(mat_B, self.mat_C_t,
self.gamma), mat_e2))
mat_H_t = np.transpose(mat_H)
mat_G_t = np.transpose(mat_G)
# Compute inverses:
# Regulariztion term used for ill-possible condition
reg_term = 2 ** float(-7)
mat_H_H = np.linalg.inv(np.dot(mat_H_t, mat_H) + (reg_term *
np.identity(mat_H.shape[1])))
# Wolfe dual problem of class 1
mat_dual1 = np.dot(np.dot(mat_G, mat_H_H), mat_G_t)
# Obtaining Lagrange multipliers using ClipDCD optimizer
alpha_d1 = clipdcd.optimize(mat_dual1, self.C1).reshape(mat_dual1.shape[0], 1)
# Obtain hyperplanes
hyper_p_1 = -1 * np.dot(np.dot(mat_H_H, mat_G_t), alpha_d1)
# Free memory
del mat_dual1, mat_H_H
mat_G_G = np.linalg.inv(np.dot(mat_G_t, mat_G) + (reg_term *
np.identity(mat_G.shape[1])))
# Wolfe dual problem of class -1
mat_dual2 = np.dot(np.dot(mat_H, mat_G_G), mat_H_t)
alpha_d2 = clipdcd.optimize(mat_dual2, self.C2).reshape(mat_dual2.shape[0], 1)
hyper_p_2 = np.dot(np.dot(mat_G_G, mat_H_t), alpha_d2)
# Class 1
self.w1 = hyper_p_1[:hyper_p_1.shape[0] - 1, :]
self.b1 = hyper_p_1[-1, :]
# Class -1
self.w2 = hyper_p_2[:hyper_p_2.shape[0] - 1, :]
self.b2 = hyper_p_2[-1, :]
[docs]class LSTSVM(BaseTSVM):
"""
Least Squares Twin Support Vector Machine (LSTSVM) for binary
classification. It inherits attributes of :class:`BaseTSVM`.
Parameters
----------
kernel : str, optional (default='linear')
Type of the kernel function which is either 'linear' or 'RBF'.
rect_kernel : float, optional (default=1.0)
Percentage of training samples for Rectangular kernel.
C1 : float, optional (default=1.0)
Penalty parameter of first optimization problem.
C2 : float, optional (default=1.0)
Penalty parameter of second optimization problem.
gamma : float, optional (default=1.0)
Parameter of the RBF kernel function.
mem_optimize : boolean, optional (default=False)
If it's True, it optimizes the memory consumption siginificantly.
However, the memory optimization increases the CPU time.
"""
def __init__(self, kernel='linear', rect_kernel=1, C1=2**0, C2=2**0,
gamma=2**0, mem_optimize=False):
super(LSTSVM, self).__init__(kernel, rect_kernel, C1, C2, gamma)
self.mem_optimize = mem_optimize
self.clf_name = 'LSTSVM'
# @profile
[docs] def fit(self, X, y):
"""
It fits the binary Least Squares TwinSVM model according to the given
training data.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training feature vectors, where n_samples is the number of samples
and n_features is the number of features.
y : array-like, shape(n_samples,)
Target values or class labels.
"""
X = np.array(X, dtype=np.float64) if isinstance(X, list) else X
y = np.array(y) if isinstance(y, list) else y
# Matrix A or class 1 data
mat_A = X[y == 1]
# Matrix B or class -1 data
mat_B = X[y == -1]
# Vectors of ones
mat_e1 = np.ones((mat_A.shape[0], 1))
mat_e2 = np.ones((mat_B.shape[0], 1))
# Regularization term used for ill-possible condition
reg_term = 2 ** float(-7)
if self.kernel == 'linear':
mat_H = np.column_stack((mat_A, mat_e1))
mat_G = np.column_stack((mat_B, mat_e2))
elif self.kernel == 'RBF':
# class 1 & class -1
mat_C = np.row_stack((mat_A, mat_B))
self.mat_C_t = np.transpose(mat_C)[:, :int(mat_C.shape[0] *
self.rect_kernel)]
mat_H = np.column_stack((rbf_kernel(mat_A, self.mat_C_t,
self.gamma), mat_e1))
mat_G = np.column_stack((rbf_kernel(mat_B, self.mat_C_t,
self.gamma), mat_e2))
mat_H_t = np.transpose(mat_H)
mat_G_t = np.transpose(mat_G)
if self.mem_optimize:
inv_p_1 = np.linalg.inv((np.dot(mat_G_t, mat_G) + (1 / self.C1) \
* np.dot(mat_H_t,mat_H)) + (reg_term * np.identity(mat_H.shape[1])))
# Determine parameters of two non-parallel hyperplanes
hyper_p_1 = -1 * np.dot(inv_p_1, np.dot(mat_G_t, mat_e2))
# Free memory
del inv_p_1
inv_p_2 = np.linalg.inv((np.dot(mat_H_t,mat_H) + (1 / self.C2) \
* np.dot(mat_G_t, mat_G)) + (reg_term * np.identity(mat_H.shape[1])))
hyper_p_2 = np.dot(inv_p_2, np.dot(mat_H_t, mat_e1))
else:
stabilizer = reg_term * np.identity(mat_H.shape[1])
mat_G_G_t = np.dot(mat_G_t, mat_G)
mat_H_H_t = np.dot(mat_H_t,mat_H)
inv_p_1 = np.linalg.inv((mat_G_G_t + (1 / self.C1) * mat_H_H_t) \
+ stabilizer)
# Determine parameters of two non-parallel hyperplanes
hyper_p_1 = -1 * np.dot(inv_p_1, np.dot(mat_G_t, mat_e2))
# Free memory
del inv_p_1
inv_p_2 = np.linalg.inv((mat_H_H_t + (1 / self.C2) * mat_G_G_t) \
+ stabilizer)
hyper_p_2 = np.dot(inv_p_2, np.dot(mat_H_t, mat_e1))
self.w1 = hyper_p_1[:hyper_p_1.shape[0] - 1, :]
self.b1 = hyper_p_1[-1, :]
self.w2 = hyper_p_2[:hyper_p_2.shape[0] - 1, :]
self.b2 = hyper_p_2[-1, :]
# @profile
# def fit_2(self, X, y):
# """
# It fits the binary Least Squares TwinSVM model according to the given
# training data.
#
# Parameters
# ----------
# X : array-like, shape (n_samples, n_features)
# Training feature vectors, where n_samples is the number of samples
# and n_features is the number of features.
#
# y : array-like, shape(n_samples,)
# Target values or class labels.
# """
#
# X, y = check_X_y(X, y, dtype=np.float64)
#
# # Matrix A or class 1 data
# mat_A = X[y == 1]
#
# # Matrix B or class -1 data
# mat_B = X[y == -1]
#
# # Vectors of ones
# mat_e1 = np.ones((mat_A.shape[0], 1))
# mat_e2 = np.ones((mat_B.shape[0], 1))
#
# # Regularization term used for ill-possible condition
# reg_term = 2 ** float(-7)
#
# if self.kernel == 'linear':
#
# mat_H = np.column_stack((mat_A, mat_e1))
# mat_G = np.column_stack((mat_B, mat_e2))
#
# mat_H_t = np.transpose(mat_H)
# mat_G_t = np.transpose(mat_G)
#
# stabilizer = reg_term * np.identity(mat_H.shape[1])
#
# # Determine parameters of two non-parallel hyperplanes
# hyper_p_1 = -1 * np.dot(np.linalg.inv((np.dot(mat_G_t, mat_G) +
# (1 / self.C1) * np.dot(mat_H_t,mat_H)) + stabilizer),
# np.dot(mat_G_t, mat_e2))
#
# self.w1 = hyper_p_1[:hyper_p_1.shape[0] - 1, :]
# self.b1 = hyper_p_1[-1, :]
#
# hyper_p_2 = np.dot(np.linalg.inv((np.dot(mat_H_t, mat_H) + (1 / self.C2)
# * np.dot(mat_G_t, mat_G)) + stabilizer), np.dot(mat_H_t, mat_e1))
#
# self.w2 = hyper_p_2[:hyper_p_2.shape[0] - 1, :]
# self.b2 = hyper_p_2[-1, :]
#
# elif self.kernel == 'RBF':
#
# # class 1 & class -1
# mat_C = np.row_stack((mat_A, mat_B))
#
# self.mat_C_t = np.transpose(mat_C)[:, :int(mat_C.shape[0] *
# self.rect_kernel)]
#
# mat_H = np.column_stack((rbf_kernel(mat_A, self.mat_C_t,
# self.gamma), mat_e1))
#
# mat_G = np.column_stack((rbf_kernel(mat_B, self.mat_C_t,
# self.gamma), mat_e2))
#
# mat_H_t = np.transpose(mat_H)
# mat_G_t = np.transpose(mat_G)
#
# mat_I_H = np.identity(mat_H.shape[0]) # (m_1 x m_1)
# mat_I_G = np.identity(mat_G.shape[0]) # (m_2 x m_2)
#
# mat_I = np.identity(mat_G.shape[1]) # (n x n)
#
# # Determine parameters of hypersurfaces # Using SMW formula
# if mat_A.shape[0] < mat_B.shape[0]:
#
# y = (1 / reg_term) * (mat_I - np.dot(np.dot(mat_G_t, \
# np.linalg.inv((reg_term * mat_I_G) + np.dot(mat_G, mat_G_t))),
# mat_G))
#
# mat_H_y = np.dot(mat_H, y)
# mat_y_Ht = np.dot(y, mat_H_t)
# mat_H_y_Ht = np.dot(mat_H_y, mat_H_t)
#
# h_surf1_inv = np.linalg.inv(self.C1 * mat_I_H + mat_H_y_Ht)
# h_surf2_inv = np.linalg.inv((mat_I_H / self.C2) + mat_H_y_Ht)
#
# hyper_surf1 = np.dot(-1 * (y - np.dot(np.dot(mat_y_Ht, h_surf1_inv),
# mat_H_y)), np.dot(mat_G_t, mat_e2))
#
# hyper_surf2 = np.dot(self.C2 * (y - np.dot(np.dot(mat_y_Ht,
# h_surf2_inv), mat_H_y)), np.dot(mat_H_t, mat_e1))
#
# # Parameters of hypersurfaces
# self.w1 = hyper_surf1[:hyper_surf1.shape[0] - 1, :]
# self.b1 = hyper_surf1[-1, :]
#
# self.w2 = hyper_surf2[:hyper_surf2.shape[0] - 1, :]
# self.b2 = hyper_surf2[-1, :]
#
# else:
#
# z = (1 / reg_term) * (mat_I - np.dot(np.dot(mat_H_t, \
# np.linalg.inv(reg_term * mat_I_H + np.dot(mat_H, mat_H_t))),
# mat_H))
#
# mat_G_z = np.dot(mat_G, z)
# mat_z_Gt = np.dot(z, mat_G_t)
# mat_G_y_Gt = np.dot(mat_G_z, mat_G_t)
#
# g_surf1_inv = np.linalg.inv((mat_I_G / self.C1) + mat_G_y_Gt)
# g_surf2_inv = np.linalg.inv(self.C2 * mat_I_G + mat_G_y_Gt)
#
# hyper_surf1 = np.dot(-self.C1 * (z - np.dot(np.dot(mat_z_Gt,
# g_surf1_inv), mat_G_z)), np.dot(mat_G_t, mat_e2))
#
# hyper_surf2 = np.dot((z - np.dot(np.dot(mat_z_Gt, g_surf2_inv),
# mat_G_z)), np.dot(mat_H_t, mat_e1))
#
# self.w1 = hyper_surf1[:hyper_surf1.shape[0] - 1, :]
# self.b1 = hyper_surf1[-1, :]
#
# self.w2 = hyper_surf2[:hyper_surf2.shape[0] - 1, :]
# self.b2 = hyper_surf2[-1, :]
[docs]def rbf_kernel(x, y, u):
"""
It transforms samples into higher dimension using Gaussian (RBF) kernel.
Parameters
----------
x, y : array-like, shape (n_features,)
A feature vector or sample.
u : float
Parameter of the RBF kernel function.
Returns
-------
float
Value of kernel matrix for feature vector x and y.
"""
return np.exp(-2 * u) * np.exp(2 * u * np.dot(x, y))
# GPU implementation of estimators ############################################
#
# try:
#
# from cupy import prof
# import cupy as cp
#
# def GPU_rbf_kernel(x, y, u):
# """
# It transforms samples into higher dimension using Gaussian (RBF)
# kernel.
#
# Parameters
# ----------
# x, y : array-like, shape (n_features,)
# A feature vector or sample.
#
# u : float
# Parameter of the RBF kernel function.
#
# Returns
# -------
# float
# Value of kernel matrix for feature vector x and y.
# """
#
# return cp.exp(-2 * u) * cp.exp(2 * u * cp.dot(x, y))
#
# class GPU_LSTSVM():
# """
# GPU implementation of least squares twin support vector machine
#
# Parameters
# ----------
# kernel : str
# Type of the kernel function which is either 'linear' or 'RBF'.
#
# rect_kernel : float
# Percentage of training samples for Rectangular kernel.
#
# C1 : float
# Penalty parameter of first optimization problem.
#
# C2 : float
# Penalty parameter of second optimization problem.
#
# gamma : float
# Parameter of the RBF kernel function.
#
# Attributes
# ----------
# mat_C_t : array-like, shape = [n_samples, n_samples]
# A matrix that contains kernel values.
#
# cls_name : str
# Name of the classifier.
#
# w1 : array-like, shape=[n_features]
# Weight vector of class +1's hyperplane.
#
# b1 : float
# Bias of class +1's hyperplane.
#
# w2 : array-like, shape=[n_features]
# Weight vector of class -1's hyperplane.
#
# b2 : float
# Bias of class -1's hyperplane.
# """
#
# def __init__(self, kernel='linear', rect_kernel=1, C1=2**0, C2=2**0,
# gamma=2**0):
#
# self.C1 = C1
# self.C2 = C2
# self.gamma = gamma
# self.kernel = kernel
# self.rect_kernel = rect_kernel
# self.mat_C_t = None
# self.clf_name = None
#
# # Two hyperplanes attributes
# self.w1, self.b1, self.w2, self.b2 = None, None, None, None
#
# #@prof.TimeRangeDecorator()
# def fit(self, X, y):
# """
# It fits the binary Least Squares TwinSVM model according to
# the given
# training data.
#
# Parameters
# ----------
# X : array-like, shape (n_samples, n_features)
# Training feature vectors, where n_samples is the number of
# samples
# and n_features is the number of features.
#
# y : array-like, shape(n_samples,)
# Target values or class labels.
# """
#
# X = cp.asarray(X)
# y = cp.asarray(y)
#
# # Matrix A or class 1 data
# mat_A = X[y == 1]
#
# # Matrix B or class -1 data
# mat_B = X[y == -1]
#
# # Vectors of ones
# mat_e1 = cp.ones((mat_A.shape[0], 1))
# mat_e2 = cp.ones((mat_B.shape[0], 1))
#
# mat_H = cp.column_stack((mat_A, mat_e1))
# mat_G = cp.column_stack((mat_B, mat_e2))
#
# mat_H_t = cp.transpose(mat_H)
# mat_G_t = cp.transpose(mat_G)
#
# # Determine parameters of two non-parallel hyperplanes
# hyper_p_1 = -1 * cp.dot(cp.linalg.inv(cp.dot(mat_G_t, mat_G) + \
# (1 / self.C1) * cp.dot(mat_H_t, mat_H)), \
# cp.dot(mat_G_t, mat_e2))
#
# self.w1 = hyper_p_1[:hyper_p_1.shape[0] - 1, :]
# self.b1 = hyper_p_1[-1, :]
#
# hyper_p_2 = cp.dot(cp.linalg.inv(cp.dot(mat_H_t, mat_H) +
# (1 / self.C2) * cp.dot(mat_G_t, mat_G)), cp.dot(mat_H_t, mat_e1))
#
# self.w2 = hyper_p_2[:hyper_p_2.shape[0] - 1, :]
# self.b2 = hyper_p_2[-1, :]
#
# #@prof.TimeRangeDecorator()
# def predict(self, X):
# """
# Performs classification on samples in X using the Least Squares
# TwinSVM model.
#
# Parameters
# ----------
# X_test : array-like, shape (n_samples, n_features)
# Feature vectors of test data.
#
# Returns
# -------
# output : array, shape (n_samples,)
# Predicted class lables of test data.
#
# """
#
# X = cp.asarray(X)
#
# # Calculate prependicular distances for new data points
# # prepen_distance = cp.zeros((X.shape[0], 2))
# #
# # kernel_f = {'linear': lambda i: X[i, :] ,
# # 'RBF': lambda i: GPU_rbf_kernel(X[i, :],
# self.mat_C_t, self.gamma)}
# #
# # for i in range(X.shape[0]):
# #
# # # Prependicular distance of data pint i from hyperplanes
# # prepen_distance[i, 1] = cp.abs(cp.dot(kernel_f[self.kernel](i),
# self.w1) + self.b1)[0]
# #
# # prepen_distance[i, 0] = cp.abs(cp.dot(kernel_f[self.kernel](i), self.w2) + self.b2)[0]
#
# dist = cp.column_stack((cp.abs(cp.dot(X, self.w2) + self.b2),
# cp.abs(cp.dot(X, self.w1) + self.b1)))
#
#
#
# # Assign data points to class +1 or -1 based on distance from
# hyperplanes
# output = 2 * cp.argmin(dist, axis=1) - 1
#
# return cp.asnumpy(output)
#
# except ImportError:
#
# print("Cannot run GPU implementation. Install CuPy package.")
##############################################################################
if __name__ == '__main__':
pass
# from preprocess import read_data
# from sklearn.model_selection import train_test_split
# from sklearn.metrics import accuracy_score
#
# X, y, filename = read_data('../dataset/australian.csv')
#
# x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
#
# tsvm_model = TSVM('linear', 0.25, 0.5)
#
# tsvm_model.fit(x_train, y_train)
# pred = tsvm_model.predict(x_test)
#
# print("Accuracy: %.2f" % (accuracy_score(y_test, pred) * 100))