import pandas as pd
import numpy as np
import tensorflow as tf
import keras

# Loading Dataset with pandas
df = pd.read_csv('vitalsign_dataset')
df


icu = pd.read_pickle("patient_list.pkl")
icu


vital_signs = ['HF', 'sys', 'map', 'dia', 'SpO2', 'Ta']


df = df.loc[df['Vitalsign']==vital_signs[0],:].reset_index(drop=True)


import pandas as pd



df['value_as_number'] = df['value_as_number'].astype(float)
df['artefact'] = df['artefact'].astype(int)
df['measurement_datetime'] = pd.to_datetime(df['measurement_datetime'])
# Convert 'measurement_datetime' to datetime if it's not already
df['measurement_datetime_seconds'] = df.groupby('person_id')['measurement_datetime'].transform(lambda x: (x - x.min()).dt.total_seconds())



# Sort the dataframe by 'measurement_datetime'
df = df.sort_values(by=['person_id', 'measurement_datetime_seconds'])

df['value_as_number_diff'] = df.groupby('person_id')['value_as_number'].diff(1)
# Compute the difference in 'measurement_datetime' for each 'person_id' in seconds
df['time_diff'] = df['measurement_datetime_seconds'].diff()

# Compute the rate of change of 'Vitalsign' (derivative)
df['value_as_number_rate'] = df['value_as_number_diff'] / df['time_diff']



# Replace any inf/nan values with forward fill
df = df.replace([np.inf, -np.inf], np.nan)
df = df.fillna(method='ffill')


from sklearn.model_selection import train_test_split

# Assuming df is your entire dataframe
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['person_id'])
train_df, valid_df = train_test_split(train_df, test_size=0.25, random_state=42, stratify=train_df['person_id'])  # 0.25 x 0.8 = 0.2


train_df = train_df.sort_values(by=['person_id', 'measurement_datetime_seconds'])
test_df = test_df.sort_values(by=['person_id', 'measurement_datetime_seconds'])
valid_df = valid_df.sort_values(by=['person_id', 'measurement_datetime_seconds'])


from sklearn.preprocessing import LabelEncoder, OneHotEncoder

from keras.utils import to_categorical
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# Define sequence length
sequence_length = 12


# Create sequences
def create_sequences(df, sequence_length):
    data_sequences = []
    label_sequences = []
    person_ids = df['person_id'].unique()

    # Exclude 'artefact' and 'person_id' columns from features
    features = df.columns.difference(['artefact', 'person_id'])

    for id in person_ids:
        person_data = df[df['person_id'] == id]
        for i in range(len(person_data) - sequence_length):
            # Get the feature values for the current sequence
            seq_data = person_data[features].iloc[i:i+sequence_length].values
            data_sequences.append(seq_data)

            # Get the label for the current sequence
            seq_label = person_data['artefact'].iloc[i+sequence_length]
            label_sequences.append(seq_label)

    return np.array(data_sequences), np.array(label_sequences)


# Normalize numerical features
numerical_features = ['value_as_number', 'measurement_datetime_seconds', 'value_as_number_rate']

scaler = StandardScaler()  # or use MinMaxScaler()

# Fit on the training data
scaler.fit(train_df[numerical_features])

# Transform the training and validation data
train_df_normalized = train_df.copy()
valid_df_normalized = valid_df.copy()
test_df_normalized = test_df.copy()

train_df_normalized[numerical_features] = scaler.transform(train_df[numerical_features])
valid_df_normalized[numerical_features] = scaler.transform(valid_df[numerical_features])
test_df_normalized[numerical_features] = scaler.transform(test_df[numerical_features])


# Create sequences
x_train, y_train = create_sequences(train_df_normalized, sequence_length)
x_valid, y_valid = create_sequences(valid_df_normalized, sequence_length)
x_test, y_test = create_sequences(test_df_normalized, sequence_length)


x_valid.shape

(13399, 12, 3)


from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import LSTM, Dropout, Dense, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Dense, Bidirectional, LSTM
import numpy as np

from tensorflow.keras import backend as K




def define_model(num_neurons=64, history_length = 5, input_size = 3, number_of_outputs= 1):
    """
    Defines a LSTM model for sequence prediction.

    Args:
        num_neurons (int): The number of neurons in the LSTM layer.
        history_length (int): The history length of the input sequences.
        input_size (int): The number of features in each input sequence.
        number_of_outputs (int): The number of output classes.

    Returns:
        Model: The defined LSTM model.

    """

    
    model = Sequential()
    model.add(LSTM(num_neurons, input_shape=(history_length, input_size), return_sequences=True))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(LSTM(num_neurons))
    model.add(Dense(1, activation='sigmoid'))
    return model

# Model configuration
model = define_model(num_neurons=64,history_length = sequence_length, input_size=3,  number_of_outputs=1)
model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.0001), metrics=['accuracy',  tf.keras.metrics.TruePositives(), tf.keras.metrics.TrueNegatives(), tf.keras.metrics.FalsePositives(), tf.keras.metrics.FalseNegatives(), tf.keras.metrics.AUC()])

# Define callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5)

# Fit the model
history = model.fit(
    x_train, y_train, 
    validation_data=(x_valid, y_valid),
    epochs=150,
    batch_size=128,
    callbacks=[early_stopping]
)

2023-07-31 21:55:13.678819: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-07-31 21:55:13.679995: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-07-31 21:55:13.680807: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32
	 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]]
2023-07-31 21:55:13.832832: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-07-31 21:55:13.833793: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-07-31 21:55:13.834606: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32
	 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]]
WARNING:absl:`lr` is deprecated in Keras optimizer, please use `learning_rate` or use the legacy optimizer, e.g.,tf.keras.optimizers.legacy.Adam.

Epoch 1/150

2023-07-31 21:55:14.055518: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-07-31 21:55:14.056953: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-07-31 21:55:14.057999: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32
	 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]]
2023-07-31 21:55:14.224961: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-07-31 21:55:14.226029: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-07-31 21:55:14.227023: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32
	 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]]
2023-07-31 21:55:15.126517: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-07-31 21:55:15.128088: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-07-31 21:55:15.129058: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32
	 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]]
2023-07-31 21:55:15.292062: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-07-31 21:55:15.293226: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-07-31 21:55:15.294210: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32
	 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]]

327/334 [============================>.] - ETA: 0s - loss: 0.1090 - accuracy: 0.9743 - true_positives_9: 26.0000 - true_negatives_9: 40753.0000 - false_positives_9: 180.0000 - false_negatives_9: 897.0000 - auc_9: 0.6792

2023-07-31 21:55:18.594104: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-07-31 21:55:18.595529: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-07-31 21:55:18.596516: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32
	 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]]
2023-07-31 21:55:18.756906: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-07-31 21:55:18.757983: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-07-31 21:55:18.758957: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32
	 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]]

334/334 [==============================] - 5s 8ms/step - loss: 0.1090 - accuracy: 0.9743 - true_positives_9: 27.0000 - true_negatives_9: 41585.0000 - false_positives_9: 181.0000 - false_negatives_9: 918.0000 - auc_9: 0.6783 - val_loss: 0.0948 - val_accuracy: 0.9796 - val_true_positives_9: 1.0000 - val_true_negatives_9: 13124.0000 - val_false_positives_9: 1.0000 - val_false_negatives_9: 273.0000 - val_auc_9: 0.6701
Epoch 2/150
334/334 [==============================] - 2s 6ms/step - loss: 0.0923 - accuracy: 0.9782 - true_positives_9: 36.0000 - true_negatives_9: 41745.0000 - false_positives_9: 21.0000 - false_negatives_9: 909.0000 - auc_9: 0.7532 - val_loss: 0.0942 - val_accuracy: 0.9793 - val_true_positives_9: 1.0000 - val_true_negatives_9: 13121.0000 - val_false_positives_9: 4.0000 - val_false_negatives_9: 273.0000 - val_auc_9: 0.7022
Epoch 3/150
334/334 [==============================] - 2s 7ms/step - loss: 0.0910 - accuracy: 0.9782 - true_positives_9: 30.0000 - true_negatives_9: 41750.0000 - false_positives_9: 16.0000 - false_negatives_9: 915.0000 - auc_9: 0.7618 - val_loss: 0.0924 - val_accuracy: 0.9795 - val_true_positives_9: 1.0000 - val_true_negatives_9: 13123.0000 - val_false_positives_9: 2.0000 - val_false_negatives_9: 273.0000 - val_auc_9: 0.7126
Epoch 4/150
334/334 [==============================] - 2s 7ms/step - loss: 0.0898 - accuracy: 0.9785 - true_positives_9: 42.0000 - true_negatives_9: 41751.0000 - false_positives_9: 15.0000 - false_negatives_9: 903.0000 - auc_9: 0.7686 - val_loss: 0.0933 - val_accuracy: 0.9790 - val_true_positives_9: 1.0000 - val_true_negatives_9: 13117.0000 - val_false_positives_9: 8.0000 - val_false_negatives_9: 273.0000 - val_auc_9: 0.7224
Epoch 5/150
334/334 [==============================] - 2s 7ms/step - loss: 0.0888 - accuracy: 0.9784 - true_positives_9: 45.0000 - true_negatives_9: 41742.0000 - false_positives_9: 24.0000 - false_negatives_9: 900.0000 - auc_9: 0.7722 - val_loss: 0.0928 - val_accuracy: 0.9793 - val_true_positives_9: 3.0000 - val_true_negatives_9: 13118.0000 - val_false_positives_9: 7.0000 - val_false_negatives_9: 271.0000 - val_auc_9: 0.7094
Epoch 6/150
334/334 [==============================] - 2s 7ms/step - loss: 0.0886 - accuracy: 0.9786 - true_positives_9: 54.0000 - true_negatives_9: 41744.0000 - false_positives_9: 22.0000 - false_negatives_9: 891.0000 - auc_9: 0.7835 - val_loss: 0.0921 - val_accuracy: 0.9792 - val_true_positives_9: 0.0000e+00 - val_true_negatives_9: 13120.0000 - val_false_positives_9: 5.0000 - val_false_negatives_9: 274.0000 - val_auc_9: 0.7178
Epoch 7/150
334/334 [==============================] - 2s 7ms/step - loss: 0.0877 - accuracy: 0.9786 - true_positives_9: 51.0000 - true_negatives_9: 41747.0000 - false_positives_9: 19.0000 - false_negatives_9: 894.0000 - auc_9: 0.7845 - val_loss: 0.0919 - val_accuracy: 0.9793 - val_true_positives_9: 1.0000 - val_true_negatives_9: 13121.0000 - val_false_positives_9: 4.0000 - val_false_negatives_9: 273.0000 - val_auc_9: 0.7086
Epoch 8/150
334/334 [==============================] - 2s 7ms/step - loss: 0.0872 - accuracy: 0.9787 - true_positives_9: 58.0000 - true_negatives_9: 41743.0000 - false_positives_9: 23.0000 - false_negatives_9: 887.0000 - auc_9: 0.7867 - val_loss: 0.0920 - val_accuracy: 0.9795 - val_true_positives_9: 0.0000e+00 - val_true_negatives_9: 13124.0000 - val_false_positives_9: 1.0000 - val_false_negatives_9: 274.0000 - val_auc_9: 0.7002
Epoch 9/150
334/334 [==============================] - 2s 7ms/step - loss: 0.0864 - accuracy: 0.9790 - true_positives_9: 68.0000 - true_negatives_9: 41745.0000 - false_positives_9: 21.0000 - false_negatives_9: 877.0000 - auc_9: 0.7912 - val_loss: 0.0911 - val_accuracy: 0.9793 - val_true_positives_9: 1.0000 - val_true_negatives_9: 13121.0000 - val_false_positives_9: 4.0000 - val_false_negatives_9: 273.0000 - val_auc_9: 0.7237
Epoch 10/150
334/334 [==============================] - 2s 7ms/step - loss: 0.0859 - accuracy: 0.9788 - true_positives_9: 61.0000 - true_negatives_9: 41746.0000 - false_positives_9: 20.0000 - false_negatives_9: 884.0000 - auc_9: 0.7936 - val_loss: 0.0937 - val_accuracy: 0.9792 - val_true_positives_9: 1.0000 - val_true_negatives_9: 13119.0000 - val_false_positives_9: 6.0000 - val_false_negatives_9: 273.0000 - val_auc_9: 0.7088
Epoch 11/150
334/334 [==============================] - 2s 7ms/step - loss: 0.0852 - accuracy: 0.9790 - true_positives_9: 72.0000 - true_negatives_9: 41740.0000 - false_positives_9: 26.0000 - false_negatives_9: 873.0000 - auc_9: 0.7943 - val_loss: 0.0951 - val_accuracy: 0.9784 - val_true_positives_9: 1.0000 - val_true_negatives_9: 13108.0000 - val_false_positives_9: 17.0000 - val_false_negatives_9: 273.0000 - val_auc_9: 0.7092
Epoch 12/150
334/334 [==============================] - 2s 7ms/step - loss: 0.0853 - accuracy: 0.9794 - true_positives_9: 84.0000 - true_negatives_9: 41746.0000 - false_positives_9: 20.0000 - false_negatives_9: 861.0000 - auc_9: 0.7913 - val_loss: 0.0929 - val_accuracy: 0.9789 - val_true_positives_9: 1.0000 - val_true_negatives_9: 13115.0000 - val_false_positives_9: 10.0000 - val_false_negatives_9: 273.0000 - val_auc_9: 0.7190
Epoch 13/150
334/334 [==============================] - 2s 7ms/step - loss: 0.0842 - accuracy: 0.9794 - true_positives_9: 91.0000 - true_negatives_9: 41740.0000 - false_positives_9: 26.0000 - false_negatives_9: 854.0000 - auc_9: 0.7941 - val_loss: 0.0926 - val_accuracy: 0.9791 - val_true_positives_9: 1.0000 - val_true_negatives_9: 13118.0000 - val_false_positives_9: 7.0000 - val_false_negatives_9: 273.0000 - val_auc_9: 0.7221
Epoch 14/150
334/334 [==============================] - 2s 7ms/step - loss: 0.0836 - accuracy: 0.9795 - true_positives_9: 95.0000 - true_negatives_9: 41741.0000 - false_positives_9: 25.0000 - false_negatives_9: 850.0000 - auc_9: 0.7984 - val_loss: 0.0931 - val_accuracy: 0.9785 - val_true_positives_9: 1.0000 - val_true_negatives_9: 13110.0000 - val_false_positives_9: 15.0000 - val_false_negatives_9: 273.0000 - val_auc_9: 0.7167


X_test = x_test


X_train = x_train


neg_neg = 0 
neg_pos = 0 
pos_neg = 0 
pos_pos = 0
counter = 0 
X_train_len = X_train.shape[0]*X_train.shape[1]
    

for batch_index in range(X_test.shape[0]):

    label_curve = [] 
    predicted_curve = [] 
    main_curve = []
    prob_pos = []
    preds = model.predict(X_test[batch_index,:,:,:])
    for i in range(X_test.shape[1]):
        patient_id = df_sign.iloc[i+X_train_len+batch_index*batch_size,1]
        if(get_group(patient_id, icu)==station or station == 'ALL'):
            pred = preds[i]
            label = y_test[batch_index,i]
            main_curve.append(X_test[batch_index, i, -1 ,0])
            label_curve.append(label)
    
    
    
    
            if(label>0):
                counter+=1
    
            if(label==0):
                if(pred<0.5):
                    neg_neg+=1
                else:
                    neg_pos+=1
    
            else:
                if(pred<0.5):
                    pos_neg+=1
                else:
                    pos_pos+=1
    
            prob_pos.append(preds.flatten())
            predicted_curve.append(pred)
sum_neg = neg_neg + neg_pos
sum_pos = pos_neg + pos_pos

print(neg_neg, neg_pos, pos_neg, pos_pos) 
print(neg_neg/sum_neg, pos_pos/sum_pos) 
print((neg_neg+ pos_pos) / ((sum_neg+ sum_pos)))

32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step
3036 6 10 20
0.9980276134122288 0.6666666666666666
0.9947916666666666


X_test.shape

(3, 1024, 7, 6)


y_test = y_test.flatten()


y_test.shape

(3072,)


prob_pos = []
for batch_index in range(X_test.shape[0]):
    prob_pos.append(model.predict(X_test[batch_index,:,:,:]))

prob_pos_train = []
for batch_index in range(x_train.shape[0]):
    prob_pos_train .append(model.predict(x_train[batch_index,:,:,:]))

32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step
32/32 [==============================] - 0s 1ms/step


prob_pos = np.array(prob_pos).flatten()


prob_pos_train = np.array(prob_pos_train).flatten()


y_test.shape

(3072,)


import matplotlib.pyplot as plt
import numpy as np

# Create a color array based on TP, FP, TN, FN
colors = []

for i in range(len(label_curve)):
    label = label_curve[i]
    pred = predicted_curve[i]
    
    if label > 0 and pred >= 0.5:  # TP: Actual positive and predicted as positive
        colors.append('green')
    elif label == 0 and pred < 0.5:  # TN: Actual negative and predicted as negative
        colors.append('blue')
    elif label == 0 and pred >= 0.5:  # FP: Actual negative but predicted as positive
        colors.append('red')
    else:  # FN: Actual positive but predicted as negative
        colors.append('orange')

# Convert colors to a numpy array
colors = np.array(colors)

# Create a scatter plot of main_curve with color-coding based on prediction correctness
plt.scatter(range(len(main_curve)), main_curve, c=colors, s=15)  # s parameter controls the size of dots
plt.plot(range(len(main_curve)), main_curve, color='black', linewidth=0.5)  # plot the line

# Title and labels
plt.title('Example Signal with Prediction Outcomes')
plt.xlabel('Time Step (index)')
plt.ylabel('Example Signal')

# Setting the x limit
plt.xlim([400, 600])

# Legend
plt.scatter([], [], color='green', label='True Positive', s=15)
plt.scatter([], [], color='blue', label='True Negative', s=15)
plt.scatter([], [], color='red', label='False Positive', s=15)
plt.scatter([], [], color='orange', label='False Negative', s=15)
plt.legend()
plt.savefig('Example_plot_'+station+'_'+sign+'.png')

plt.show()


from sklearn.linear_model import LogisticRegression


raw_probabilities = prob_pos_train.reshape(-1, 1)
# Fit the Logistic Regression to calibrate the probabilities
lr = LogisticRegression().fit(raw_probabilities, y_train.flatten())


calibrated_probabilities = lr.predict_proba(prob_pos.reshape(-1, 1))  # Calibrate probabilities


prob_pos = calibrated_probabilities


prob_pos = prob_pos[:,1]


y_pred = (prob_pos > 0.5).astype(int)


import seaborn as sns
sns.set_style("white")



from sklearn.calibration import calibration_curve

fraction_of_positives, mean_predicted_value = calibration_curve(y_test, prob_pos, n_bins=10)

from sklearn.metrics import roc_curve, roc_auc_score

# Calculate the ROC curve points
fpr, tpr, thresholds = roc_curve(y_test, prob_pos)

# Calculate the AUROC score
auroc = roc_auc_score(y_test, prob_pos)


# Figure 1 for Calibration curve and Histogram
fig1 = plt.figure(figsize=(10, 8)) # Set the overall figure size

gs1 = plt.GridSpec(2, 1, height_ratios=[1, 1]) # Set height ratios to 1:1 for equal vertical size

# Calibration curve
ax1 = plt.subplot(gs1[0])
ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")  
ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s" % 'LSTM-Classifer', color='blue')
ax1.set_xlim([0, 1]) # Set x range to 0 to 1
ax1.set_ylim([0, 1]) # Set y range to 0 to 1
ax1.set_ylabel("Fraction of positives")
ax1.set_title('Calibration plot of LSTM classifer for '+sign+ ' ('+station+')')
ax1.legend(loc="upper left")
ax1.xaxis.tick_top() # Move x-axis labels to top
ax1.xaxis.set_label_position('top') # Move x-axis label to top

# Position legend of plot 1 on x-axis
handles, labels = ax1.get_legend_handles_labels()
ax1.legend(handles, labels,  ncol=2)

# Histogram as an "inset" plot
ax2 = plt.subplot(gs1[1], sharex=ax1) # Sharing x-axis with first plot
hist, bins = np.histogram(prob_pos, bins=10, range=(0, 1))
width = (bins[1] - bins[0]) * 0.8
centers = (bins[:-1] + bins[1:]) / 2
ax2.bar(centers, hist, align='center', width=width, alpha=0.5, color='green', label='Histogram')
ax2.set_xlabel("Predicted probability") # Add x-axis label
ax2.set_ylabel("Number of cases")

ax2.yaxis.set_major_locator(plt.MaxNLocator(5))
plt.subplots_adjust(hspace=0)  

plt.savefig('Calibration_Histogram_'+station+'_'+sign+'.png')
plt.show()


import numpy as np
from sklearn.metrics import roc_auc_score, recall_score, precision_score
from sklearn.utils import resample

# Define the number of bootstrap samples
n_bootstrap = 2000

# Create empty lists to store bootstrapped metrics
sensitivity_values = []
specificity_values = []
ppv_values = []
npv_values = []
auroc_values = []

# Perform bootstrap resampling
for _ in range(n_bootstrap):
    # Sample with replacement from the predictions
    indices = resample(range(len(y_pred)), replace=True)
    bootstrap_y_test = y_test[indices]
    bootstrap_y_pred = y_pred[indices]
    bootstrap_prob_pos = prob_pos[indices]

    # Calculate metrics for the bootstrap sample
    sensitivity = recall_score(bootstrap_y_test, bootstrap_y_pred)
    specificity = recall_score(bootstrap_y_test, bootstrap_y_pred, pos_label=0)
    ppv = precision_score(bootstrap_y_test, bootstrap_y_pred)
    npv = precision_score(bootstrap_y_test, bootstrap_y_pred, pos_label=0)
    auroc = roc_auc_score(bootstrap_y_test, bootstrap_prob_pos)

    # Append the metric values to the lists
    sensitivity_values.append(sensitivity)
    specificity_values.append(specificity)
    ppv_values.append(ppv)
    npv_values.append(npv)
    auroc_values.append(auroc)

# Calculate confidence intervals
confidence_interval = 0.95
alpha = (1 - confidence_interval) / 2
lower_percentile = alpha * 100
upper_percentile = (1 - alpha) * 100

# Calculate confidence intervals for each metric
sensitivity_ci = np.percentile(sensitivity_values, [lower_percentile, upper_percentile])
specificity_ci = np.percentile(specificity_values, [lower_percentile, upper_percentile])
ppv_ci = np.percentile(ppv_values, [lower_percentile, upper_percentile])
npv_ci = np.percentile(npv_values, [lower_percentile, upper_percentile])
auroc_ci = np.percentile(auroc_values, [lower_percentile, upper_percentile])

with open("metrics_"+station+"_"+sign+".txt", "w") as file:
    file.write("Sensitivity: {:.3f} ({:.3f}-{:.3f})\n".format(sensitivity, sensitivity_ci[0], sensitivity_ci[1]))
    file.write("Specificity: {:.3f} ({:.3f}-{:.3f})\n".format(specificity, specificity_ci[0], specificity_ci[1]))
    file.write("Positive Predictive Value (PPV): {:.3f} ({:.3f}-{:.3f})\n".format(ppv, ppv_ci[0], ppv_ci[1]))
    file.write("Negative Predictive Value (NPV): {:.3f} ({:.3f}-{:.3f})\n".format(npv, npv_ci[0], npv_ci[1]))
    file.write("Area Under the ROC Curve (AUROC): {:.3f} ({:.3f}-{:.3f})".format(auroc, auroc_ci[0], auroc_ci[1]))


from sklearn.metrics import roc_curve, auc
from numpy import interp

n_bootstraps = 2000
rng_seed = 42  # control reproducibility
bootstrapped_tprs = []
bootstrapped_auc = []

base_fpr = np.linspace(0, 1, 101)  # Fix the FPR values for interpolation

rng = np.random.RandomState(rng_seed)
for i in range(n_bootstraps):
    # bootstrap by sampling with replacement on the prediction indices
    indices = rng.randint(0, len(prob_pos), len(prob_pos))
    if len(np.unique(y_test[indices])) < 2:
        # We need at least one positive and one negative sample for ROC AUC
        # to be defined: reject the sample
        continue

    fpr_boot, tpr_boot, _ = roc_curve(y_test[indices], prob_pos[indices])
    bootstrapped_auc.append(auc(fpr_boot, tpr_boot))

    # Interpolate TPR for the fixed FPR values
    tpr_boot_interp = interp(base_fpr, fpr_boot, tpr_boot)
    bootstrapped_tprs.append(tpr_boot_interp)

# Convert list to a 2D array
bootstrapped_tprs = np.array(bootstrapped_tprs)

# Compute the lower and upper bound of the ROC curve
tprs_lower = np.percentile(bootstrapped_tprs, 2.5, axis=0)
tprs_upper = np.percentile(bootstrapped_tprs, 97.5, axis=0)

# Compute the lower and upper bound of the ROC AUC
auc_lower = np.percentile(bootstrapped_auc, 2.5)
auc_upper = np.percentile(bootstrapped_auc, 97.5)

print("Confidence interval for the ROC curve: [{:0.3f} - {:0.3}]".format(auc_lower, auc_upper))

Confidence interval for the ROC curve: [0.840 - 0.979]


fig2 = plt.figure(figsize=(10, 6))

specificity = 1 - base_fpr  # Calculate Specificity (1 - False Positive Rate)

ax3 = plt.subplot(1, 1, 1)
ax3.plot([1, 0], [0, 1], 'k--')  # Invert x-axis
ax3.plot(specificity, np.median(bootstrapped_tprs, axis=0), label=r'ROC (AUC = %0.2f $\pm$ %0.2f)' % (auroc, (auc_upper-auc_lower)/2), color='blue')
ax3.fill_between(specificity, tprs_lower, tprs_upper, color='grey', alpha=0.2)
ax3.set_xlabel('Specificity (True Negative Rate)')
ax3.set_ylabel('Sensitivity (True Positive Rate)')
ax3.set_title('Receiver Operating Characteristic for '+sign+ ' ('+station+')')
ax3.legend(loc='lower right')
ax3.set_xlim([1, 0])  # Invert x-axis limits
ax3.set_ylim([0, 1])
ax3.set_xticks([1,0.8,0.6,0.4,0.2,0])  # Set x-axis tick locations
ax3.set_xticklabels([1,0.8,0.6,0.4,0.2,0])  # Set x-axis tick labels




plt.tight_layout()
plt.savefig('ROC_Curve_'+station+'_'+sign+'.png')
plt.show()