Source code for model
# -*- coding: utf-8 -*-
# LIBTwinSVM: A Library for Twin Support Vector Machines
# Developers: Mir, A. and Mahdi Rahbar
# License: GNU General Public License v3.0
"""
This modules models data, user input in classes and functions
"""
from datetime import datetime
from libtsvm.estimators import TSVM, LSTSVM
from libtsvm.mc_scheme import OneVsAllClassifier, OneVsOneClassifier
[docs]class DataInfo:
"""
It stores dataset characteristics such as no. samples, no. features and
etc.
Parameters
----------
no_samples : int
Number of samples in dataset.
no_features : init
Number of features in dataset.
no_class : int
Number of classes in dataset.
class_labels: array-like
Unique class labels.
header_names: list
Name of every feature in dataset.
"""
def __init__(self, no_samples, no_features, no_class, class_labels,
header_names):
self.no_samples = no_samples
self.no_features = no_features
self.no_class = no_class
self.class_labels = class_labels
self.header_names = header_names
[docs]class UserInput:
"""
It encapsulates a user's input.
Attributes
----------
X_train : array-like, shape (n_samples, n_features)
Training feature vectors, where n_samples is the number of
samples and n_features is the number of features.
y_train : array-like, shape(n_samples,)
Target values or class labels.
data_filename : str
The filename of a user's dataset.
clf_type : str, {'tsvm', 'lstsvm'}
Type of the classifier.
class_type : str, {'binary', 'multiclass'}
Type of classification problem.
mc_scheme : str, {'ova', 'ovo'}
The multi-class strategy
result_path : str
Path for saving classification results.
save_clf_results : boolean (default=True)
Whether to save the classification results or not.
save_best_model : boolean (default=False)
Whether to save the best fitted model or not.
log_file : boolean
Whether to create a log file or not.
kernel_type : str, {'linear', 'RBF'}
Type of the kernel function
rect_kernel : float (default=1.0)
Percentage of training samples for Rectangular kernel.
test_method_tuple : tuple
A two-element tuple which contains type of evaluation method and its
parameter.
step_size : float
Step size for generating search elements.
C1_range : tuple
Lower and upper bound for C1 penalty parameter.
example: (-4, 5), first element is lower bound and second element is
upper bound
C2_range : tuple
Lower and upper bound for C2 penalty parameter.
u_range : tuple
Lower and upper bound for gamma parameter.
C1 : float
The penalty parameter.
C2 : float
The penalty parameter.
u : float
The parameter of the RBF kernel function.
input_complete : boolean
Whether all the required inputs are set.
linear_db : boolean
Whether to plot decision boundary or not.
fig_save : boolean
Whether to save the figure or not.
fig_dpi : int
DPI of the figure. It determines the quality of the output image.
fig_save_path : str
The path at which a figure will be saved.
pre_trained_model : object
A pre-trained TSVM-based classifer.
save_pred : boolean
Whether to save predicted labels of test samples in a file or not.
save_pred_path : str
The path at which the file of predicted labels will be saved.
"""
def __init__(self):
# Data
self.X_train, self.y_train = None, None
self.data_filename = ''
# Classify
self.clf_type = None
self.class_type = None
self.mc_scheme = None
# self.filename = None
self.result_path = ''
self.save_clf_results = True
self.save_best_model = False
self.log_file = False
self.kernel_type = None
self.rect_kernel = 1.0
self.test_method_tuple = None
self.step_size = 1.0
# Lower and upper bounds of hyper-parameters
self.C1_range = None
self.C2_range = None
self.u_range = None
# Whether all the input varabiles are inserted or not.
self.input_complete = False
# Visualization
self.C1 = 1.0
self.C2 = 1.0
self.u = 1.0
self.linear_db = False # Linear decision boundary
self.fig_save = False
self.fig_dpi = None
self.fig_save_path = None
# Model
self.pre_trained_model = None
self.save_pred = False
self.save_pred_path = ''
def _get_kernel_selection(self):
"""
It returns the name of the user's selected kernel function.
Returns
-------
str
Name of kernel function
"""
if self.kernel_type == 'linear':
return 'Linear'
elif self.rect_kernel == 1.0:
return 'Gaussian (RBF)'
else:
return 'Rectangular (%s%% of samples)' % (self.rect_kernel * 100)
def _get_eval_method(self):
"""
It returns the name of the user's selected evaluation method.
Returns
-------
str
Name of evaluation method.
"""
if self.test_method_tuple[0] == 'CV':
return "%d-Fold cross-validation" % self.test_method_tuple[1]
elif self.test_method_tuple[0] == 't_t_split':
return "Train/Test split (%d%%/%d%%)" % (100-(self.test_method_tuple[1]*100),
self.test_method_tuple[1]*100)
def _get_mc_scheme(self):
"""
It returns type of multi-class classifcation
Returns
-------
str
Name of mult-class strategy.
"""
if self.class_type == 'binary':
return "Binary"
elif self.class_type == 'multiclass':
if self.mc_scheme == 'ova':
return "One-vs-All"
elif self.mc_scheme == 'ovo':
return "One-vs-One"
def _get_clf_name(self):
"""
It returns the name of the user's selected classifier.
"""
if self.clf_type == 'tsvm':
clf = 'TSVM'
elif self.clf_type == 'lstsvm':
clf = 'LSTSVM'
if self.class_type == 'binary':
return clf
elif self.class_type == 'multiclass':
if self.mc_scheme == 'ova':
return "OVA-" + clf
elif self.mc_scheme == 'ovo':
return "OVO-" + clf
[docs] def get_current_selection(self):
"""
It returns a user's current selection for confirmation
"""
if self.input_complete:
y_n = lambda x: 'Yes' if x else 'No'
u_param = " | u: 2^%d to 2^%d" % (self.u_range[0], self.u_range[-1]) \
if self.kernel_type == 'RBF' else ''
clf = "Standard TwinSVM" if self.clf_type == 'tsvm' else "LeastSquares TwinSVM"
return ("Dataset: %s\nClassifier: %s\nKernel: %s\n"
"Multi-class scheme: %s\nEvaluation method: %s\n"
"Range of parameters for grid search: (step:%.2f)\nC1: 2^%d to 2^%d |"
"C2: 2^%d to 2^%d%s\n"
"---------------------------------------------------------------\n"
"Results' path: %s\nSave Classification Results: %s\n"
"Save Best Model: %s\nSave Log File: %s"
) % (self.data_filename, clf, self._get_kernel_selection(),
self._get_mc_scheme(), self._get_eval_method(), self.step_size,
self.C1_range[0], self.C1_range[1], self.C2_range[0],
self.C2_range[1], u_param, self.result_path,
y_n(self.save_clf_results), y_n(self.save_best_model),
y_n(self.log_file))
else:
raise RuntimeError("input_complete has not been set yet! "
"Check out UserInput Class Docs.")
[docs] def get_selected_clf(self):
"""
It returns the classifier that is selected by user.
Returns
-------
clf_obj : object
An estimator object.
.. warning::
"""
clf_obj = None
if self.clf_type == 'tsvm':
clf_obj = TSVM(self.kernel_type, self.rect_kernel)
elif self.clf_type == 'lstsvm':
clf_obj = LSTSVM(self.kernel_type, self.rect_kernel)
if self.class_type == 'multiclass':
if self.mc_scheme == 'ova':
clf_obj = OneVsAllClassifier(clf_obj)
elif self.mc_scheme == 'ovo':
clf_obj = OneVsOneClassifier(clf_obj)
return clf_obj
[docs] def get_clf_params(self):
"""
It returns hyper-parameters of the classifier in a dictionary.
Returns
-------
dict
Hyper-parameters of the classifier.
"""
if self.kernel_type == 'linear':
return {'C1': self.C1, 'C2': self.C2}
elif self.kernel_type == 'RBF':
return {'C1': self.C1, 'C2': self.C2, 'gamma': self.u}
[docs] def get_fig_name(self):
"""
Returns the figure's name based on the user's selection for saving
a file.
"""
return "Plot_%s_%s_%s_%s" % (self._get_clf_name(), self.kernel_type,
self.data_filename,
datetime.now().strftime('%Y-%m-%d %H-%M'))
[docs] def validate_step_size(self):
"""
Checks whether step size for generating search elements are valid or
not.
Returns
-------
boolean
Whether step size is valid or not.
"""
return (self.step_size < abs(self.C1_range[1] - self.C1_range[0]) \
and self.step_size < abs(self.C2_range[1] - self.C2_range[0]))\
and (self.step_size < abs(self.u_range[1] - self.u_range[0])\
if self.kernel_type == 'RBF' else True)