import math
import statistics
import warnings

import numpy as np
from hmmlearn.hmm import GaussianHMM
from sklearn.model_selection import KFold
from asl_utils import combine_sequences

class ModelSelector(object):
    '''Base class for model selection (strategy design pattern).
    '''

    def __init__(self, all_word_sequences: dict, all_word_Xlengths: dict,
                this_word: str, n_constant=3,
                 min_n_components=2, max_n_components=10,
                 random_state=14, verbose=False):
        self.words = all_word_sequences
        self.hwords = all_word_Xlengths
        self.sequences = all_word_sequences[this_word]
        self.X, self.lengths = all_word_Xlengths[this_word]
        self.this_word = this_word
        self.n_constant = n_constant
        self.min_n_components = min_n_components
        self.max_n_components = max_n_components
        self.random_state = random_state
        self.verbose = verbose
        self.n_components = range(self.min_n_components, \
                                    self.max_n_components + 1)

    def select(self):
        raise NotImplementedError

    def base_model(self, num_states):
        # with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=DeprecationWarning)
        # warnings.filterwarnings("ignore", category=RuntimeWarning)
        try:
            hmm_model = GaussianHMM(n_components=num_states,
                                    covariance_type="diag", n_iter=1000,
                                    random_state=self.random_state,
                                    verbose=False).fit(self.X, self.lengths)
            if self.verbose:
                print("Model created for {} with {} states"\
                        .format(self.this_word, num_states))
            return hmm_model
        except:
            if self.verbose:
                print("failure on {} with {} states".\
                        format(self.this_word, num_states))
            return None


class SelectorConstant(ModelSelector):
    """Select the model with value self.n_constant.
    """

    def select(self):
        """Select based on n_constant value.

        :return: GaussianHMM object
        """
        best_num_components = self.n_constant
        return self.base_model(best_num_components)


class SelectorBIC(ModelSelector):
    """Select the model with the lowest Baysian Information Criterion (BIC)
    score -- http://www2.imm.dtu.dk/courses/02433/doc/ch6_slides.pdf.

    Bayesian information criteria: BIC = -2 * logL + p * logN
    """

    def select(self):
        """Select the best model for self.this_word based on
        BIC score for n between self.min_n_components and self.max_n_components

        :return: GaussianHMM object
        """
        warnings.filterwarnings("ignore", category=DeprecationWarning)
        bic_scores = []
        try:
            for n in self.n_components:
                # BIC = −2 log L + p log N
                # L = is the likelihood of the fitted model
                # p = is the number of parameters
                # N = is the number of data points
                model = self.base_model(n)
                log_l = model.score(self.X, self.lengths)
                p = n ** 2 + 2 * n * model.n_features - 1
                bic_score = -2 * log_l + p * math.log(n)
                bic_scores.append(bic_score)
        except Exception as e:
            pass

        states = self.n_components[np.argmax(bic_scores)] \
                    if bic_scores else self.n_constant
        return self.base_model(states)

class SelectorDIC(ModelSelector):
    """Select best model based on Discriminative Information Criterion

    Biem, Alain. "A model selection criterion for classification:
    Application to hmm topology optimization."

    Document Analysis and Recognition, 2003. Proceedings.
    Seventh International Conference on. IEEE, 2003.
    http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.58.6208
    DIC = log(P(X(i)) - 1/(M-1)SUM(log(P(X(all but i))
    """

    def select(self):
        warnings.filterwarnings("ignore", category=DeprecationWarning)
        dic_scores = []
        logs_l = []
        try:
            for n_component in self.n_components:
                model = self.base_model(n_component)
                logs_l.append(model.score(self.X, self.lengths))
            sum_logs_l = sum(logs_l)
            m = len(self.n_components)
            for log_l in logs_l:
                # DIC = log(P(X(i)) - 1/(M-1)SUM(log(P(X(all but i))
                other_words_likelihood = (sum_logs_l - log_l) / (m - 1)
                dic_scores.append(log_l - other_words_likelihood)
        except Exception as e:
            pass

        states = self.n_components[np.argmax(dic_scores)] \
                    if dic_scores else self.n_constant
        return self.base_model(states)

class SelectorCV(ModelSelector):
    """Slect best model based on average log Likelihood of cross-validation
    folds.
    """
    def select(self):
        warnings.filterwarnings("ignore", category=DeprecationWarning)
        mean_scores = []
        # Save reference to 'KFold' in variable as shown in notebook
        split_method = KFold()
        try:
            for n_component in self.n_components:
                model = self.base_model(n_component)
                # Fold and calculate model mean scores
                fold_scores = []
                for _, test_idx in split_method.split(self.sequences):
                    # Get test sequences
                    test_X, test_length = combine_sequences(test_idx, \
                                            self.sequences)
                    # Record each model score
                    fold_scores.append(model.score(test_X, test_length))

                # Compute mean of all fold scores
                mean_scores.append(np.mean(fold_scores))
        except Exception as e:
            pass

        states = self.n_components[np.argmax(mean_scores)] \
                    if mean_scores else self.n_constant
        return self.base_model(states)