## This file is part of MLPY.
## Feature Ranking module based on Recursive Feature Elimination (RFE)
## and Reecursive Forward Selection (RFS) methods.

## This code is written by Davide Albanese, <albanese@fbk.eu>.
##(C) 2007 Fondazione Bruno Kessler - Via Santa Croce 77, 38100 Trento, ITALY.

## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program.  If not, see <http://www.gnu.org/licenses/>.

__all__ = ['Ranking']

from numpy import *
import math


def project(elem):
    """
    Return an array ranging on [0,1]
    """

    if not isinstance(elem, ndarray):
        raise TypeError('project() argument must be numpy ndarray')

    m = elem.min()
    M = elem.max()
    D = float(M - m)
    return (elem - m) / D


def Entropy(pj):
    E = 0.0
    for p in pj:
        if p != 0.0:
            E += -(p * math.log(p, 2))
    return E


def onestep(R):
    """
    One-step Recursive Feature Elimination.
    
    Return a list containing uninteresting features.
    See:
    I. Guyon, J. Weston, S.Barnhill, V. Vapnik.
    Gene selection for cancer classification using
    support vector machines.
    Machine Learning, (46):389-422, 2002.
    """

    if not isinstance(R, ndarray):
        raise TypeError('onestep() argument must be numpy ndarray')

    return R.argsort()[::-1]


def rfe(R):
    """
    Recursive Feature Elimination.
    
    Return a list containing uninteresting features.

    See:
    I. Guyon, J. Weston, S.Barnhill, V. Vapnik.
    Gene selection for cancer classification using
    support vector machines.
    Machine Learning, (46):389-422, 2002.
    """

    if not isinstance(R, ndarray):
        raise TypeError('rfe() argument must be numpy ndarray')
    
    return argmin(R)


def bisrfe(R):
    """
    Bis Recursive Feature Elimination.
    
    Return a list containing uninteresting features.

    See:
    I. Guyon, J. Weston, S.Barnhill, V. Vapnik.
    Gene selection for cancer classification using
    support vector machines.
    Machine Learning, (46):389-422, 2002.
    """

    if not isinstance(R, ndarray):
        raise TypeError('bisrfe() argument must be numpy ndarray')
    
    idx = R.argsort()[::-1]
    start = int(idx.shape[0] / 2)
    return idx[start:]


def sqrtrfe(R):
    """
    Sqrt Recursive Feature Elimination.
    
    Return a list containing uninteresting features.

    See:
    I. Guyon, J. Weston, S.Barnhill, V. Vapnik.
    Gene selection for cancer classification using
    support vector machines.
    Machine Learning, (46):389-422, 2002.
    """

    if not isinstance(R, ndarray):
        raise TypeError('sqrtrfe() argument must be numpy ndarray')
        
    idx = R.argsort()[::-1]
    start = int(idx.shape[0] - math.sqrt(idx.shape[0]))
    return idx[start:]


def erfe(R):
    """
    Entropy-based Recursive Feature Elimination.

    Return a list containing uninteresting features according
    to the entropy of the weights distribution.

    See:
    C. Furlanello, M. Serafini, S. Merler, and G. Jurman.
    Advances in Neural Network Research: IJCNN 2003.
    An accelerated procedure for recursive feature ranking
    on microarray data.
    Elsevier, 2003.
    """

    if not isinstance(R, ndarray):
        raise TypeError('erfe() argument must be numpy ndarray')
    
    bins = math.sqrt(R.shape[0])
    Ht = 0.5 * math.log(bins, 2)
    Mt = 0.2
    pw = project(R)          
    M = pw.mean()
    
    # Compute the relative frequancies
    pj = (histogram(pw, bins, range=(0.0, 1.0)))[0] / float(pw.size)

    # Compute entropy
    H = Entropy(pj)

    if H > Ht and M > Mt:
        # Return the indices s.t. pw = [0, 1/bins]
        idx = where(pw <= (1 / bins))[0]
        return idx    
    else:
        # Compute L[i] = ln(pw[i])
        L = empty_like(pw)
        for i in xrange(pw.size):
            L[i] = math.log(pw[i] + 1.0)
        M = L.mean()
        # Compute A = #{L[i] < M} and half A
        idx = where(L < M)[0]
        A = idx.shape[0]
        hA = 0.5 * A

        # If #(L[i]==0.0) >= hA return indicies where L==0.0
        iszero = where(L == 0.0)[0]
        if iszero.shape[0] >= hA:
            return iszero
        
        while True:
            M = 0.5 * M
            # Compute B = #{L[i] < M}
            idx = where(L < M)[0]
            B = idx.shape[0]
            # Stop iteration when B <= (0.5 * A)
            if (B <= hA):
                break

        return idx


def rfs(R):
    """
    Recursive Forward Selection.
    """

    if not isinstance(R, ndarray):
        raise TypeError('rfe() argument must be numpy ndarray')
    
    return argmax(R)



class Ranking:
    """
    Ranking class based on Recursive Feature Elimination (RFE) and
    Recursive Forward Selection (RFS) methods.

    Example:

    >>> from numpy import *
    >>> from mlpy import *
    >>> x = array([[1.1, 2.1, 3.1, -1.0],  # first sample
    ...            [1.2, 2.2, 3.2, 1.0],   # second sample
    ...            [1.3, 2.3, 3.3, -1.0]]) # third sample
    >>> y = array([1, -1, 1])              # classes
    >>> myrank = Ranking()                 # initialize ranking class
    >>> mysvm = Svm()                      # initialize svm class
    >>> myrank.compute(x, y, mysvm)        # compute feature ranking
    array([3, 1, 2, 0])
    """  

    RFE_METHODS   = ['rfe', 'bisrfe', 'sqrtrfe', 'erfe']
    RFS_METHODS   = ['rfs']
    OTHER_METHODS = ['onestep']


    def __init__(self, method='rfe', lastsinglesteps = 0):
        """
        Initialize Ranking class.

        Input

          * *method* - [string] method ('onestep', 'rfe', 'bisrfe', 'sqrtrfe', 'erfe', 'rfs')
          * *lastsinglesteps* - [integer] last single steps with 'rfe'
        """      

        if not method in self.RFE_METHODS + self.RFS_METHODS + self.OTHER_METHODS:
            raise ValueError("Method '%s' is not supported." % method)
                    
        self.__method = method       
        self.__lastsinglesteps = lastsinglesteps
        self.__weights = None
        

    def __compute_rfe(self, x, y, debug):       
        loc_x = x.copy()
        glo_idx = arange(x.shape[1], dtype = int)
        tot_disc = arange(0, dtype = int)

        while glo_idx.shape[0] > 1:
            R = self.__weights(loc_x, y)

            if self.__method == 'onestep':
                loc_disc = onestep(R)

            elif self.__method == 'rfe':
                loc_disc = rfe(R)

            elif self.__method == 'sqrtrfe':
                if loc_x.shape[1] > self.__lastsinglesteps: loc_disc = sqrtrfe(R)
                else: loc_disc = rfe(R)

            elif self.__method == 'bisrfe':
                if loc_x.shape[1] > self.__lastsinglesteps: loc_disc = bisrfe(R)
                else: loc_disc = rfe(R)

            elif self.__method == 'erfe':
                if loc_x.shape[1] > self.__lastsinglesteps: loc_disc = erfe(R)
                else: loc_disc = rfe(R)
                
            
            loc_x = delete(loc_x, loc_disc, 1) # remove local discarded from local x     
            glo_disc = glo_idx[loc_disc] # project local discarded into global discarded

            # remove discarded from global indicies
            glo_bool = ones(glo_idx.shape[0], dtype = bool) 
            glo_bool[loc_disc] = False
            glo_idx = glo_idx[glo_bool]
            
            if debug:
                print glo_idx.shape[0], "features remaining"
            
            tot_disc = r_[glo_disc, tot_disc]
            
        if glo_idx.shape[0] == 1:
            tot_disc = r_[glo_idx, tot_disc]
        
        return tot_disc


    def __compute_rfs(self, x, y, debug):
        loc_x = x.copy()
        glo_idx = arange(x.shape[1], dtype = int)
        tot_sel = arange(0, dtype = int)

        while glo_idx.shape[0] > 1:
            R = self.__weights(loc_x, y)
            
            if self.__method == 'rfs':
                loc_sel = rfs(R)
            
            loc_x = delete(loc_x, loc_sel, 1) # remove local selected from local x     
            glo_sel = glo_idx[loc_sel] # project local selected into global selected
        
            # remove selected from global indicies
            glo_bool = ones(glo_idx.shape[0], dtype = bool) 
            glo_bool[loc_sel] = False
            glo_idx = glo_idx[glo_bool]

            if debug:
                print glo_idx.shape[0], "features remaining"
            
            tot_sel = r_[tot_sel, glo_sel]

        if glo_idx.shape[0] == 1:
            tot_sel = r_[tot_sel, glo_idx]
        
        return tot_sel
        
        
    def compute(self, x, y, w, debug = False):
        """
        Compute the feature ranking.

        Input
        
          * *x*     - [2D numpy array float] (sample x feature) training data
          * *y*     - [1D numpy array integer] (1 or -1) classes
          * *w*     - object (e.g. classifier) with weights() method
          * *debug* - [bool] show remaining number of feature at each step (True or False)

        Output
        
          * *feature ranking* - [1D numpy array integer] ranked feature indexes
        """

        try:
            self.__weights = w.weights
        except AttributeError, e:
            raise ValueError(e)

        if self.__method in self.RFE_METHODS + self.OTHER_METHODS:
            return self.__compute_rfe(x, y, debug)
        elif self.__method in self.RFS_METHODS:
            return self.__compute_rfs(x, y, debug)
        
