import pandas as pd
import os
import maad
from maad import sound, features
from maad.util import (date_parser, plot_correlation_map, 
                       plot_features_map, plot_features, false_Color_Spectro)
from datetime import datetime
import matplotlib.pyplot as plt
import librosa as lib
from librosa import display
import numpy as np
from plotnine import *
import soundfile as sf
import seaborn as sns
from scipy.stats import shapiro, kstest


def acoustic_indices():
    # list of all acoustic indices that can be computed
    SPECTRAL_FEATURES=['MEANf','VARf','SKEWf','KURTf','NBPEAKS','LEQf',
    'ENRf','BGNf','SNRf','Hf', 'EAS','ECU','ECV','EPS','EPS_KURT','EPS_SKEW','ACI',
    'NDSI','rBA','AnthroEnergy','BioEnergy','BI','ROU','ADI','AEI','LFC','MFC','HFC',
    'ACTspFract','ACTspCount','ACTspMean', 'EVNspFract','EVNspMean','EVNspCount',
    'TFSD','H_Havrda','H_Renyi','H_pairedShannon', 'H_gamma', 'H_GiniSimpson','RAOQ',
    'AGI','ROItotal','ROIcover']

    TEMPORAL_FEATURES=['ZCR','MEANt', 'VARt', 'SKEWt', 'KURTt',
    'LEQt','BGNt', 'SNRt','MED', 'Ht','ACTtFraction', 'ACTtCount',
    'ACTtMean','EVNtFraction', 'EVNtMean', 'EVNtCount']
    # data upload
    # test first on a small subset (e.g. 5 recordings)
    df = pd.read_csv("/nfs/NAS6/SABIOD/SITE/greenpraxis/morvan_april_22/recorder5_morvan_pristine/merged/filename.txt", sep=";", names=['file']) # 2 min files, 4 channels
    # save date and time of recording
    df['Date'] = pd.to_datetime(df['file'].str.slice(start=0, stop=15),format='%Y%m%d_%H%M%S') #_%H%M%S
    data = pd.DataFrame(df['Date'])
    # computing and storing of acoustic indices

    for i in range(0,4): #4 channels

        df_indices = pd.DataFrame()
        df_indices_per_bin = pd.DataFrame()

        for index, row in df.iterrows() :
            fullfilename = row['file']
        # Load the original sound (24bits) and get the sampling frequency fs
            try :
                wave,fs = sf.read('/nfs/NAS6/SABIOD/SITE/greenpraxis/morvan_april_22/recorder5_morvan_pristine/merged/'+fullfilename)
            except:
                # Delete the row if the file does not exist or raise a value error (i.e. no EOF)
                df.drop(index, inplace=True)
                continue

            S = -35         # Sensbility microphone-35dBV (SM4) / -18dBV (Audiomoth)
            G = 26+16       # Amplification gain (26dB (SM4 preamplifier))
            # compute all the audio indices and store them into a DataFrame
            # dB_threshold and rejectDuration are used to select audio events.
            df_audio_ind = features.all_temporal_alpha_indices(wave[:,i], fs,
                                            gain = G, sensibility = S,
                                            dB_threshold = 3, rejectDuration = 0.01,
                                            verbose = False, display = False)
            
            Sxx_power,tn,fn,ext = sound.spectrogram(wave[:,i], fs, window='hanning', nperseg = 2048, noverlap=2048//2, verbose = False, display = False, savefig = None)
            df_spec_ind, df_spec_ind_per_bin = features.all_spectral_alpha_indices(Sxx_power,
                                                                tn,fn,
                                                                flim_low = [0,1500],
                                                                flim_mid = [1500,10000],
                                                                flim_hi  = [10000,128000],
                                                                gain = G, sensitivity = S,
                                                                verbose = False,
                                                                R_compatible = 'soundecology',
                                                                mask_param1 = 6,
                                                                mask_param2=0.5,
                                                                display = False)

            df_indices = df_indices.append(pd.concat([df_audio_ind, df_spec_ind], axis=1))
            df_indices_per_bin = df_indices_per_bin.append([df_spec_ind_per_bin])
        df_indices.reset_index(inplace=True, drop=True)
        df_indices_per_bin.reset_index(inplace=True, drop=True)
        df_indices2 = pd.concat([data,df_indices], axis=1)
        df_indices_per_bin2 = pd.concat([data,df_indices_per_bin], axis=1)

        df_indices2.to_csv('/nfs/NAS6/SABIOD/SITE/greenpraxis/morvan_april_22/results/indices_recorder5_morvan_pristine_channel'+str(i+1)+'.csv', sep=";",date_format='%Y-%m-%d %H:%M:%S')
        df_indices_per_bin2.to_csv('/nfs/NAS6/SABIOD/SITE/greenpraxis/morvan_april_22/results/indices_bin_recorder5_morvan_pristine_channel'+str(i+1)+'.csv', sep=";",date_format='%Y-%m-%d %H:%M:%S')


# graphical display
def fcspectro():

    ##### temporal variation ######

    # df = pd.read_csv("/nfs/NAS6/SABIOD/SITE/greenpraxis/morvan_april_22/results/indices_1dayrec1.csv", sep=";")
    
    # print(df.tail(5))
    # # compute the number of days between first and last row (! last row is nan so the row before)
    # # print((datetime.strptime(df.iloc[0,10], "%Y-%m-%d %H:%M:%S")-datetime.strptime(df.iloc[-2,10], "%Y-%m-%d %H:%M:%S")).days)
    # df = df.set_index(pd.DatetimeIndex(df['file'], name='Date'))
    # g = plot_features(df[['ACI']],norm=True,mode='24h')
    # # problem with subplot creation
    # # fig, ax = plt.subplots(3,2, sharex=True, sharey=False, squeeze = True, figsize=(5,5))
    # # fig, ax[0,0] = plot_features(df[['Ht']],norm=False,mode='24h', ax=ax[0,0])
    # # fig, ax[0,1] = plot_features(df[['ADI']],norm=False,mode='24h', ax=ax[0,1])
    # # fig, ax[1,0] = plot_features(df[['NP']],norm=False,mode='24h', ax=ax[1,0])
    # # fig, ax[1,1] = plot_features(df[['ACI']],norm=False,mode='24h', ax=ax[1,1])
    # # fig, ax[2,0] = plot_features(df[['NDSI']],norm=False,mode='24h', ax=ax[2,0])
    # # fig, ax[2,1] = plot_features(df[['Hs']],norm=False,mode='24h', ax=ax[2,1])

    ####### false-colour spectrogram ######

    # # adding the correct date to spectral_indices_per_bin
    df = pd.read_csv("/nfs/NAS6/SABIOD/SITE/greenpraxis/morvan_april_22/results/indices_per_bin_1dayrec1.csv", sep=";")
    # check
    # print(df)
    # print(list(df.columns))
    # graph display : only for 'MEANt_per_bin', 'VARt_per_bin', 'SKEWt_per_bin', 'KURTt_per_bin',
    # 'LEQf_per_bin', 'ENRf_per_bin', 'BGNf_per_bin', 'SNRf_per_bin', 'Ht_per_bin', 'ACI_per_bin',
    # 'ROU_per_bin', 'ACTspFract_per_bin', 'ACTspCount_per_bin', 'EVNspFract_per_bin', 'EVNspMean_per_bin',
    # 'EVNspCount_per_bin', 'AGI_per_bin' # see definition on scikit maad doc < acoustic features
    fcs, triplet = false_Color_Spectro(df,
                                   indices = ['EVNspCount_per_bin', #EVNspcount : number of events per s per bin
                                              'ACI_per_bin',
                                              'Ht_per_bin'],
                                   reverseLUT=False,
                                   unit='days',
                                   permut=False,
                                   display=True,
                                   figsize=(10,5),savefig='/nfs/NAS6/SABIOD/SITE/greenpraxis/morvan_april_22/results/morvan_1dayrec1_day') #no extension needed, automatically added with the name of used indices

def round_to_multiple(number, multiple):
    return multiple * round(number / multiple)

def temporal_sampling():
    # y, sr = lib.load('/nfs/NAS6/SABIOD/SITE/greenpraxis/morvan_april_22/recorder1_epiry_treatment/1day_subset/20220406_224800UTC_V12.wav', sr=None)
    y, sr = sf.read('/nfs/NAS6/SABIOD/SITE/greenpraxis/morvan_april_22/recorder1_epiry_treatment/1day_subset/20220406_224800UTC_V12.wav')
    y_new = y[:,0]  #[0*24000:5*24000] # test values
    dur = lib.get_duration(y=y_new, sr=sr)
    sequence = np.arange(0,round_to_multiple(dur, 5),5) #sequence of numbers by steps of 5 sec from 0 to a limit
                                                        # this limit is fixed by the total duration/5 to have the number of 5sec chunks which fit in the total duration of recording
    df_indices_per_bin = pd.DataFrame()

    time_start=[]
    # Ht = []
    # Ht_bin = []
    # ACI = []
    # ACI_bin = []
    # EVN_bin = []

    for j in sequence:
        if j != sequence[-1]:
            y_new2 = y_new[j*sr:(j+1)*sr]
        else :
            y_new2 = y_new[j*sr:(j+5)*sr]

    #     else :
    #         y_new2 = y_new[sequence[j+1]*sr:sequence[j+1]*sr]

        #     Sxx_power, tn, fn, ext = maad.sound.spectrogram(y_new2, sr)  
        #     Sxx_noNoise= maad.sound.median_equalizer(Sxx_power) 
        #     Sxx_dB_noNoise = maad.util.power2dB(Sxx_noNoise)

        #     _, Ht_per_bin = maad.features.frequency_entropy(Sxx_power)
        #     Ht_bin.append(Ht_per_bin)
        #     Ht.append(maad.features.temporal_entropy(y_new))

        #     _, _, EVNspCount_per_bin, _ = maad.features.spectral_events(Sxx_dB_noNoise, dt=tn[1]-tn[0], dB_threshold=6, rejectDuration=0.1, display=True, extent=ext)  
        #     EVN_bin.append(EVNspCount_per_bin)

        #     _, ACI_bin, ACI  = maad.features.acoustic_complexity_index(Sxx_power)

        #     # save as dataframe
        #     Ht2 = pd.DataFrame(Ht, columns = ['Ht'])
        #     Ht2 = Ht2.reset_index()
        #     Ht2.drop('index', inplace = True, axis = 1)

        #     Ht_bin2 = pd.DataFrame(Ht_bin, columns = ['Ht_bin'])
        #     Ht_bin2 = Ht_bin2.reset_index()
        #     Ht_bin2.drop('index', inplace = True, axis = 1)

        #     EVN2 = pd.DataFrame(EVN_bin, columns = ['EVN_bin'])
        #     EVN2 = EVN2.reset_index()
        #     EVN2.drop('index', inplace = True, axis = 1)

        #     ACI2 = pd.DataFrame(ACI, columns = ['ACI'])
        #     ACI2 = ACI2.reset_index()
        #     ACI2.drop('index', inplace = True, axis = 1)

        #     ACI_bin2 = pd.DataFrame(ACI_bin, columns = ['ACI_bin'])
        #     ACI_bin2 = ACI_bin2.reset_index()
        #     ACI_bin2.drop('index', inplace = True, axis = 1)

        # df_indices = pd.concat([Ht_bin2, ACI_bin2, EVN2], axis=1)
        # df_ind = pd.concat([Ht2,ACI2], axis=1)

        Sxx_power,tn,fn,ext = maad.sound.spectrogram (y_new2, sr, window='hanning', 
                                                    nperseg = 2048, noverlap=2048//2,
                                                    verbose = False, display = False, 
                                                    savefig = None)
        _, df_spec_ind_per_bin = features.all_spectral_alpha_indices(Sxx_power,
                                                                tn,fn,
                                                                flim_low = [0,1500],
                                                                flim_mid = [1500,8000],
                                                                flim_hi  = [8000,20000],
                                                                verbose = False,
                                                                R_compatible = 'soundecology',
                                                                display = False)
        time_start.append(j)
        time = pd.DataFrame(time_start,columns=['starting_time'])
        time = time.reset_index()
        time.drop('index', inplace = True, axis = 1)
        df_indices_per_bin = df_indices_per_bin.append(df_spec_ind_per_bin)
    df_indices_per_bin.reset_index(inplace=True, drop=True)
    df = pd.concat([time,df_indices_per_bin],axis=1)
    df.to_csv('/nfs/NAS6/SABIOD/SITE/greenpraxis/morvan_april_22/results/indices_per_bin_1dayrec1_20220406_224800UTC_V12.csv',sep=';')
   
def graph():
    df = pd.read_csv('/nfs/NAS6/SABIOD/SITE/greenpraxis/morvan_april_22/results/indices_per_bin_1dayrec1_20220406_224800UTC_V12.csv',sep=';')
    g = ggplot(df) + geom_point(aes(x=df.starting_time, y=df.EVNspCount_per_bin))
    print(g)

def date_issue():
    df = pd.read_csv('/nfs/NAS6/SABIOD/SITE/greenpraxis/morvan_april_22/recorder1_epiry_treatment/filename.txt',sep=';')
    problem=[]
    name = []
    for i in range(len(df)-1):
        if (pd.to_datetime(df.iloc[i+1,0][-26:-11], format='%Y%m%d_%H%M%S')-pd.to_datetime(df.iloc[i,0][-26:-11], format='%Y%m%d_%H%M%S')).total_seconds()/60 > 11:
            name.append(df.iloc[i+1,0])
            problem.append((pd.to_datetime(df.iloc[i+1,0][-26:-11], format='%Y%m%d_%H%M%S')-pd.to_datetime(df.iloc[i,0][-26:-11], format='%Y%m%d_%H%M%S')).total_seconds()/60)
    a= pd.DataFrame(pd.concat([name,problem]), columns=['file_after_break','break_interval_in_min'])
    a.to_csv('/nfs/NAS6/SABIOD/SITE/greenpraxis/morvan_april_22/results/discontinuities_rec1_april22.csv',sep=';')


def ai_display():
    dfind1 = pd.read_csv("/nfs/NAS4/SABIOD/SITE/greenpraxis/morvan_april_22/results/indices_recorder1_epiry_treatment_channel1.csv", sep=";")
    dfind2 = pd.read_csv("/nfs/NAS4/SABIOD/SITE/greenpraxis/morvan_april_22/results/indices_recorder2_epiry_control_channel1.csv", sep=";")
    dfind3 = pd.read_csv("/nfs/NAS4/SABIOD/SITE/greenpraxis/morvan_april_22/results/indices_recorder3_tannay_treatment_channel1.csv", sep=";")
    dfind4 = pd.read_csv("/nfs/NAS4/SABIOD/SITE/greenpraxis/morvan_april_22/results/indices_recorder4_tannay_control_channel1.csv", sep=";")
    dfind5 = pd.read_csv("/nfs/NAS4/SABIOD/SITE/greenpraxis/morvan_april_22/results/indices_recorder5_morvan_pristine_channel1.csv", sep=";")

    dfind1.drop(['Unnamed: 0'], inplace=True, axis=1)
    dfind2.drop(['Unnamed: 0'], inplace=True, axis=1)
    dfind3.drop(['Unnamed: 0'], inplace=True, axis=1)
    dfind4.drop(['Unnamed: 0'], inplace=True, axis=1)
    dfind5.drop(['Unnamed: 0'], inplace=True, axis=1)

    # dfind1['site']='epiry_treatment'
    # dfind2['site']='epiry_control'
    # dfind3['site']='tannay_treatment'
    # dfind4['site']='tannay_control'
    # dfind5['site']='montreuillon'

    dfind1['site']='Epiry traitement'
    dfind2['site']='Epiry témoin'
    dfind3['site']='Tannay traitement'
    dfind4['site']='Tannay témoin'
    dfind5['site']=' Témoin\nhors zone ferroviaire'

    ## correct datetime
    dftime1 = pd.read_csv("/nfs/NAS4/SABIOD/SITE/greenpraxis/morvan_april_22/recorder1_epiry_treatment/correct_dates2.txt", sep=";",names=['filename'])
    redftime1=dftime1.iloc[::2, :]
    redftime1.reset_index(drop=True, inplace=True)
    dftime2 = pd.read_csv("/nfs/NAS4/SABIOD/SITE/greenpraxis/morvan_april_22/recorder2_epiry_control/correct_dates2.txt", sep=";",names=['filename'])
    redftime2=dftime2.iloc[::2, :]
    redftime2.reset_index(drop=True, inplace=True)
    # caution: issue with the last two files on which indices were not computed. The last row is deleted in the txt file but better solution to be found.
    dftime3 = pd.read_csv("/nfs/NAS4/SABIOD/SITE/greenpraxis/morvan_april_22/recorder3_tannay_treatment/correct_dates2.txt", sep=";",names=['filename'], nrows=2659)
    # continuous mode for the indices computation
    dftime3.reset_index(drop=True, inplace=True)
    dftime4 = pd.read_csv("/nfs/NAS4/SABIOD/SITE/greenpraxis/morvan_april_22/recorder4_tannay_control/correct_dates2.txt", sep=";",names=['filename'])
    redftime4=dftime4.iloc[::2, :]
    redftime4.reset_index(drop=True, inplace=True)
    # caution: issue with the last two files on which indices were not computed. The last row is deleted in the txt file but better solution to be found.
    dftime5 = pd.read_csv("/nfs/NAS4/SABIOD/SITE/greenpraxis/morvan_april_22/recorder5_morvan_pristine/correct_dates2.txt", sep=";",names=['filename'], nrows=2520)
    dftime5.reset_index(drop=True, inplace=True)

    df1 = pd.concat([dfind1,redftime1], axis=1)
    df1.drop('Date', inplace=True, axis=1)
    df2 = pd.concat([dfind2,redftime2], axis=1)
    df2.drop('Date', inplace=True, axis=1)
    df3 = pd.concat([dfind3,dftime3], axis=1)
    df3.drop('Date', inplace=True, axis=1)
    df4 = pd.concat([dfind4,redftime4], axis=1)
    df4.drop('Date', inplace=True, axis=1)
    df5 = pd.concat([dfind5,dftime5], axis=1)
    df5.drop('Date', inplace=True, axis=1)
    
    df = pd.concat([df1, df2,df3,df4,df5], axis=0)
    #checks for NaN
    # df.isnull().values.any()
    # g = ggplot(df) + geom_boxplot(aes(x='site',y='ZCR', fill='factor(site)')) #increase label size and colour in black and white
    # print(g)

    df['datetime']=pd.to_datetime(df['filename'].str.slice(start=0, stop=15),format='%Y%m%d_%H%M%S')
    df['hour'] = df['datetime'].dt.hour
    hourlymeans = df.groupby(['hour','site']).mean()
    hourlymeans.reset_index(level=['hour','site'],inplace=True)
    hourlymeans['period']='nuit'
    hourlymeans.loc[(hourlymeans["hour"] >= 6) & (hourlymeans["hour"] <= 9),'period'] = "aube"
    hourlymeans.loc[(hourlymeans["hour"] >= 10) & (hourlymeans["hour"] <= 18),'period'] = "journée"
    hourlymeans.loc[(hourlymeans["hour"] >= 19) & (hourlymeans["hour"] <= 21),'period'] = "crépuscule"
    hourlymeans.loc[(hourlymeans["hour"] >= 22) & (hourlymeans["hour"] <= 5),'period'] = "nuit"
    # f = ggplot(hourlymeans) + geom_boxplot(aes(x='site',y='Hf')) + facet_wrap(['period']) + theme_bw() + theme(axis_text = element_text(size = 12, angle=60))
    # print(f)

    # get all indices in one column to be able to facet plot the data
    only_ind = hourlymeans.iloc[:,2:62]
    normalized_df=(only_ind-only_ind.min())/(only_ind.max()-only_ind.min()) #works column by column (normalization per acoustic index)
    all=pd.concat([hourlymeans[['site','hour','period']],normalized_df],axis=1)
    meas_var=all.iloc[:,3:63].columns
    id_hour=['hour', 'site','period']
    piv_hour=all.melt(id_vars=id_hour, var_name="acoustic_index")
    # select a subset of indices for better visualization
    # sub = piv_hour[(piv_hour["acoustic_index"] == 'Hf') | (piv_hour["acoustic_index"] == 'ADI') | (piv_hour["acoustic_index"] == 'MEANf')| (piv_hour["acoustic_index"] == 'BI')]
    sub = piv_hour[(piv_hour["acoustic_index"] == 'Hf') | (piv_hour["acoustic_index"] == 'BI')]    
    sub['valeur normalisée'] = sub['value']
    # k = ggplot(sub[(sub['period']=='journée')]) + geom_boxplot(aes(x='site',y='valeur normalisée')) + facet_wrap('acoustic_index') + theme_bw() + theme(axis_text = element_text(size = 12)) + theme(axis_title = element_text(size = 12)) + theme(strip_text_x = element_text(size = 12)) + theme(axis_text_x = element_text(angle=25))

    # to colour the control differently
    rep=pd.Series(['0'])
    rep2=rep.repeat(len(sub)-1)
    rep3 = rep2.append(pd.Series(['1'])) # use pd.concat instead
    sub['type']=pd.Categorical(rep3)
    sub.loc[sub['site'] == ' Témoin\nhors zone ferroviaire','type'] = '1'
    sub.iloc[-1,-1] = '0' # if the last row is not site == ' Témoin' # be aware of the space before the T
    color_dict = {'0': 'grey', '1': 'white'}

    
    k = ggplot(sub[(sub['period']=='journée')]) + geom_boxplot(aes(x='site',y='valeur normalisée', fill='type')) + facet_wrap('acoustic_index',ncol=1) + theme_classic() + theme(axis_text = element_text(size = 24)) + theme(axis_title = element_text(size = 24)) + theme(strip_text_x = element_text(size = 24)) + scale_fill_manual(values=color_dict) + scale_x_discrete(limits=[' Témoin\nhors zone ferroviaire','Epiry témoin','Epiry traitement','Tannay témoin','Tannay traitement'])
    # k = ggplot(sub[(sub['period']=='journée')]) + geom_boxplot(aes(x='site',y='valeur normalisée', fill='type')) + facet_wrap('acoustic_index') + theme_classic() + theme(axis_text = element_text(size = 24)) + theme(axis_title = element_text(size = 24)) + theme(strip_text_x = element_text(size = 24)) + scale_fill_manual(values=color_dict) + scale_x_discrete(limits=[' Témoin\nhors zone ferroviaire','Epiry témoin','Epiry traitement','Tannay témoin','Tannay traitement'])    
    # + scale_fill_manual(values=color_dict) # set colours
    # + facet_grid('period ~ acoustic_index') # two facets
    # + theme(axis_text_x = element_blank()) # no text
    # # + theme(axis_text_x = element_text(size = 10, angle=60)) # lateral text
    print(k)
    # sub.to_csv('/nfs/NAS4/SABIOD/SITE/greenpraxis/morvan_april_22/results/april_morvan_4indices.csv', sep=";",date_format='%Y-%m-%d %H:%M:%S')


    return df, hourlymeans, all, sub

def stat():
    df, hourlymeans, all, sub = ai_display()
    # plt.hist(df['BI'], edgecolor='black', bins=100) # None of the 4 indices selected in sub seems to be normally distributed
    # plt.show()
    shapiro(df['BI'])

    return df

# acoustic_indices()
# fcspectro()
# temporal_sampling()
# graph()
# date_issue()
df, hourlymeans, all,sub = ai_display()
# df = stat()