import pandas as pd
import numpy as np
import tensorflow as tf
import keras
# Loading Dataset with pandas
df = pd.read_csv('vitalsign_dataset')
df
icu = pd.read_pickle("patient_list.pkl")
icu
vital_signs = ['HF', 'sys', 'map', 'dia', 'SpO2', 'Ta']
df = df.loc[df['Vitalsign']==vital_signs[0],:].reset_index(drop=True)
import pandas as pd
df['value_as_number'] = df['value_as_number'].astype(float)
df['artefact'] = df['artefact'].astype(int)
df['measurement_datetime'] = pd.to_datetime(df['measurement_datetime'])
# Convert 'measurement_datetime' to datetime if it's not already
df['measurement_datetime_seconds'] = df.groupby('person_id')['measurement_datetime'].transform(lambda x: (x - x.min()).dt.total_seconds())
# Sort the dataframe by 'measurement_datetime'
df = df.sort_values(by=['person_id', 'measurement_datetime_seconds'])
df['value_as_number_diff'] = df.groupby('person_id')['value_as_number'].diff(1)
# Compute the difference in 'measurement_datetime' for each 'person_id' in seconds
df['time_diff'] = df['measurement_datetime_seconds'].diff()
# Compute the rate of change of 'Vitalsign' (derivative)
df['value_as_number_rate'] = df['value_as_number_diff'] / df['time_diff']
# Replace any inf/nan values with forward fill
df = df.replace([np.inf, -np.inf], np.nan)
df = df.fillna(method='ffill')
from sklearn.model_selection import train_test_split
# Assuming df is your entire dataframe
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['person_id'])
train_df, valid_df = train_test_split(train_df, test_size=0.25, random_state=42, stratify=train_df['person_id']) # 0.25 x 0.8 = 0.2
train_df = train_df.sort_values(by=['person_id', 'measurement_datetime_seconds'])
test_df = test_df.sort_values(by=['person_id', 'measurement_datetime_seconds'])
valid_df = valid_df.sort_values(by=['person_id', 'measurement_datetime_seconds'])
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from keras.utils import to_categorical
from sklearn.preprocessing import MinMaxScaler, StandardScaler
# Define sequence length
sequence_length = 12
# Create sequences
def create_sequences(df, sequence_length):
data_sequences = []
label_sequences = []
person_ids = df['person_id'].unique()
# Exclude 'artefact' and 'person_id' columns from features
features = df.columns.difference(['artefact', 'person_id'])
for id in person_ids:
person_data = df[df['person_id'] == id]
for i in range(len(person_data) - sequence_length):
# Get the feature values for the current sequence
seq_data = person_data[features].iloc[i:i+sequence_length].values
data_sequences.append(seq_data)
# Get the label for the current sequence
seq_label = person_data['artefact'].iloc[i+sequence_length]
label_sequences.append(seq_label)
return np.array(data_sequences), np.array(label_sequences)
# Normalize numerical features
numerical_features = ['value_as_number', 'measurement_datetime_seconds', 'value_as_number_rate']
scaler = StandardScaler() # or use MinMaxScaler()
# Fit on the training data
scaler.fit(train_df[numerical_features])
# Transform the training and validation data
train_df_normalized = train_df.copy()
valid_df_normalized = valid_df.copy()
test_df_normalized = test_df.copy()
train_df_normalized[numerical_features] = scaler.transform(train_df[numerical_features])
valid_df_normalized[numerical_features] = scaler.transform(valid_df[numerical_features])
test_df_normalized[numerical_features] = scaler.transform(test_df[numerical_features])
# Create sequences
x_train, y_train = create_sequences(train_df_normalized, sequence_length)
x_valid, y_valid = create_sequences(valid_df_normalized, sequence_length)
x_test, y_test = create_sequences(test_df_normalized, sequence_length)
x_valid.shape
(13399, 12, 3)
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import LSTM, Dropout, Dense, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Dense, Bidirectional, LSTM
import numpy as np
from tensorflow.keras import backend as K
def define_model(num_neurons=64, history_length = 5, input_size = 3, number_of_outputs= 1):
"""
Defines a LSTM model for sequence prediction.
Args:
num_neurons (int): The number of neurons in the LSTM layer.
history_length (int): The history length of the input sequences.
input_size (int): The number of features in each input sequence.
number_of_outputs (int): The number of output classes.
Returns:
Model: The defined LSTM model.
"""
model = Sequential()
model.add(LSTM(num_neurons, input_shape=(history_length, input_size), return_sequences=True))
model.add(BatchNormalization())
model.add(Dropout(0.3))
model.add(LSTM(num_neurons))
model.add(Dense(1, activation='sigmoid'))
return model
# Model configuration
model = define_model(num_neurons=64,history_length = sequence_length, input_size=3, number_of_outputs=1)
model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.0001), metrics=['accuracy', tf.keras.metrics.TruePositives(), tf.keras.metrics.TrueNegatives(), tf.keras.metrics.FalsePositives(), tf.keras.metrics.FalseNegatives(), tf.keras.metrics.AUC()])
# Define callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5)
# Fit the model
history = model.fit(
x_train, y_train,
validation_data=(x_valid, y_valid),
epochs=150,
batch_size=128,
callbacks=[early_stopping]
)
2023-07-31 21:55:13.678819: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]] 2023-07-31 21:55:13.679995: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32 [[{{node gradients/split_grad/concat/split/split_dim}}]] 2023-07-31 21:55:13.680807: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]] 2023-07-31 21:55:13.832832: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]] 2023-07-31 21:55:13.833793: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32 [[{{node gradients/split_grad/concat/split/split_dim}}]] 2023-07-31 21:55:13.834606: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]] WARNING:absl:`lr` is deprecated in Keras optimizer, please use `learning_rate` or use the legacy optimizer, e.g.,tf.keras.optimizers.legacy.Adam.
Epoch 1/150
2023-07-31 21:55:14.055518: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]] 2023-07-31 21:55:14.056953: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32 [[{{node gradients/split_grad/concat/split/split_dim}}]] 2023-07-31 21:55:14.057999: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]] 2023-07-31 21:55:14.224961: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]] 2023-07-31 21:55:14.226029: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32 [[{{node gradients/split_grad/concat/split/split_dim}}]] 2023-07-31 21:55:14.227023: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]] 2023-07-31 21:55:15.126517: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]] 2023-07-31 21:55:15.128088: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32 [[{{node gradients/split_grad/concat/split/split_dim}}]] 2023-07-31 21:55:15.129058: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]] 2023-07-31 21:55:15.292062: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]] 2023-07-31 21:55:15.293226: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32 [[{{node gradients/split_grad/concat/split/split_dim}}]] 2023-07-31 21:55:15.294210: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]]
327/334 [============================>.] - ETA: 0s - loss: 0.1090 - accuracy: 0.9743 - true_positives_9: 26.0000 - true_negatives_9: 40753.0000 - false_positives_9: 180.0000 - false_negatives_9: 897.0000 - auc_9: 0.6792
2023-07-31 21:55:18.594104: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]] 2023-07-31 21:55:18.595529: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32 [[{{node gradients/split_grad/concat/split/split_dim}}]] 2023-07-31 21:55:18.596516: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]] 2023-07-31 21:55:18.756906: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]] 2023-07-31 21:55:18.757983: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32 [[{{node gradients/split_grad/concat/split/split_dim}}]] 2023-07-31 21:55:18.758957: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]]
334/334 [==============================] - 5s 8ms/step - loss: 0.1090 - accuracy: 0.9743 - true_positives_9: 27.0000 - true_negatives_9: 41585.0000 - false_positives_9: 181.0000 - false_negatives_9: 918.0000 - auc_9: 0.6783 - val_loss: 0.0948 - val_accuracy: 0.9796 - val_true_positives_9: 1.0000 - val_true_negatives_9: 13124.0000 - val_false_positives_9: 1.0000 - val_false_negatives_9: 273.0000 - val_auc_9: 0.6701 Epoch 2/150 334/334 [==============================] - 2s 6ms/step - loss: 0.0923 - accuracy: 0.9782 - true_positives_9: 36.0000 - true_negatives_9: 41745.0000 - false_positives_9: 21.0000 - false_negatives_9: 909.0000 - auc_9: 0.7532 - val_loss: 0.0942 - val_accuracy: 0.9793 - val_true_positives_9: 1.0000 - val_true_negatives_9: 13121.0000 - val_false_positives_9: 4.0000 - val_false_negatives_9: 273.0000 - val_auc_9: 0.7022 Epoch 3/150 334/334 [==============================] - 2s 7ms/step - loss: 0.0910 - accuracy: 0.9782 - true_positives_9: 30.0000 - true_negatives_9: 41750.0000 - false_positives_9: 16.0000 - false_negatives_9: 915.0000 - auc_9: 0.7618 - val_loss: 0.0924 - val_accuracy: 0.9795 - val_true_positives_9: 1.0000 - val_true_negatives_9: 13123.0000 - val_false_positives_9: 2.0000 - val_false_negatives_9: 273.0000 - val_auc_9: 0.7126 Epoch 4/150 334/334 [==============================] - 2s 7ms/step - loss: 0.0898 - accuracy: 0.9785 - true_positives_9: 42.0000 - true_negatives_9: 41751.0000 - false_positives_9: 15.0000 - false_negatives_9: 903.0000 - auc_9: 0.7686 - val_loss: 0.0933 - val_accuracy: 0.9790 - val_true_positives_9: 1.0000 - val_true_negatives_9: 13117.0000 - val_false_positives_9: 8.0000 - val_false_negatives_9: 273.0000 - val_auc_9: 0.7224 Epoch 5/150 334/334 [==============================] - 2s 7ms/step - loss: 0.0888 - accuracy: 0.9784 - true_positives_9: 45.0000 - true_negatives_9: 41742.0000 - false_positives_9: 24.0000 - false_negatives_9: 900.0000 - auc_9: 0.7722 - val_loss: 0.0928 - val_accuracy: 0.9793 - val_true_positives_9: 3.0000 - val_true_negatives_9: 13118.0000 - val_false_positives_9: 7.0000 - val_false_negatives_9: 271.0000 - val_auc_9: 0.7094 Epoch 6/150 334/334 [==============================] - 2s 7ms/step - loss: 0.0886 - accuracy: 0.9786 - true_positives_9: 54.0000 - true_negatives_9: 41744.0000 - false_positives_9: 22.0000 - false_negatives_9: 891.0000 - auc_9: 0.7835 - val_loss: 0.0921 - val_accuracy: 0.9792 - val_true_positives_9: 0.0000e+00 - val_true_negatives_9: 13120.0000 - val_false_positives_9: 5.0000 - val_false_negatives_9: 274.0000 - val_auc_9: 0.7178 Epoch 7/150 334/334 [==============================] - 2s 7ms/step - loss: 0.0877 - accuracy: 0.9786 - true_positives_9: 51.0000 - true_negatives_9: 41747.0000 - false_positives_9: 19.0000 - false_negatives_9: 894.0000 - auc_9: 0.7845 - val_loss: 0.0919 - val_accuracy: 0.9793 - val_true_positives_9: 1.0000 - val_true_negatives_9: 13121.0000 - val_false_positives_9: 4.0000 - val_false_negatives_9: 273.0000 - val_auc_9: 0.7086 Epoch 8/150 334/334 [==============================] - 2s 7ms/step - loss: 0.0872 - accuracy: 0.9787 - true_positives_9: 58.0000 - true_negatives_9: 41743.0000 - false_positives_9: 23.0000 - false_negatives_9: 887.0000 - auc_9: 0.7867 - val_loss: 0.0920 - val_accuracy: 0.9795 - val_true_positives_9: 0.0000e+00 - val_true_negatives_9: 13124.0000 - val_false_positives_9: 1.0000 - val_false_negatives_9: 274.0000 - val_auc_9: 0.7002 Epoch 9/150 334/334 [==============================] - 2s 7ms/step - loss: 0.0864 - accuracy: 0.9790 - true_positives_9: 68.0000 - true_negatives_9: 41745.0000 - false_positives_9: 21.0000 - false_negatives_9: 877.0000 - auc_9: 0.7912 - val_loss: 0.0911 - val_accuracy: 0.9793 - val_true_positives_9: 1.0000 - val_true_negatives_9: 13121.0000 - val_false_positives_9: 4.0000 - val_false_negatives_9: 273.0000 - val_auc_9: 0.7237 Epoch 10/150 334/334 [==============================] - 2s 7ms/step - loss: 0.0859 - accuracy: 0.9788 - true_positives_9: 61.0000 - true_negatives_9: 41746.0000 - false_positives_9: 20.0000 - false_negatives_9: 884.0000 - auc_9: 0.7936 - val_loss: 0.0937 - val_accuracy: 0.9792 - val_true_positives_9: 1.0000 - val_true_negatives_9: 13119.0000 - val_false_positives_9: 6.0000 - val_false_negatives_9: 273.0000 - val_auc_9: 0.7088 Epoch 11/150 334/334 [==============================] - 2s 7ms/step - loss: 0.0852 - accuracy: 0.9790 - true_positives_9: 72.0000 - true_negatives_9: 41740.0000 - false_positives_9: 26.0000 - false_negatives_9: 873.0000 - auc_9: 0.7943 - val_loss: 0.0951 - val_accuracy: 0.9784 - val_true_positives_9: 1.0000 - val_true_negatives_9: 13108.0000 - val_false_positives_9: 17.0000 - val_false_negatives_9: 273.0000 - val_auc_9: 0.7092 Epoch 12/150 334/334 [==============================] - 2s 7ms/step - loss: 0.0853 - accuracy: 0.9794 - true_positives_9: 84.0000 - true_negatives_9: 41746.0000 - false_positives_9: 20.0000 - false_negatives_9: 861.0000 - auc_9: 0.7913 - val_loss: 0.0929 - val_accuracy: 0.9789 - val_true_positives_9: 1.0000 - val_true_negatives_9: 13115.0000 - val_false_positives_9: 10.0000 - val_false_negatives_9: 273.0000 - val_auc_9: 0.7190 Epoch 13/150 334/334 [==============================] - 2s 7ms/step - loss: 0.0842 - accuracy: 0.9794 - true_positives_9: 91.0000 - true_negatives_9: 41740.0000 - false_positives_9: 26.0000 - false_negatives_9: 854.0000 - auc_9: 0.7941 - val_loss: 0.0926 - val_accuracy: 0.9791 - val_true_positives_9: 1.0000 - val_true_negatives_9: 13118.0000 - val_false_positives_9: 7.0000 - val_false_negatives_9: 273.0000 - val_auc_9: 0.7221 Epoch 14/150 334/334 [==============================] - 2s 7ms/step - loss: 0.0836 - accuracy: 0.9795 - true_positives_9: 95.0000 - true_negatives_9: 41741.0000 - false_positives_9: 25.0000 - false_negatives_9: 850.0000 - auc_9: 0.7984 - val_loss: 0.0931 - val_accuracy: 0.9785 - val_true_positives_9: 1.0000 - val_true_negatives_9: 13110.0000 - val_false_positives_9: 15.0000 - val_false_negatives_9: 273.0000 - val_auc_9: 0.7167
X_test = x_test
X_train = x_train
neg_neg = 0
neg_pos = 0
pos_neg = 0
pos_pos = 0
counter = 0
X_train_len = X_train.shape[0]*X_train.shape[1]
for batch_index in range(X_test.shape[0]):
label_curve = []
predicted_curve = []
main_curve = []
prob_pos = []
preds = model.predict(X_test[batch_index,:,:,:])
for i in range(X_test.shape[1]):
patient_id = df_sign.iloc[i+X_train_len+batch_index*batch_size,1]
if(get_group(patient_id, icu)==station or station == 'ALL'):
pred = preds[i]
label = y_test[batch_index,i]
main_curve.append(X_test[batch_index, i, -1 ,0])
label_curve.append(label)
if(label>0):
counter+=1
if(label==0):
if(pred<0.5):
neg_neg+=1
else:
neg_pos+=1
else:
if(pred<0.5):
pos_neg+=1
else:
pos_pos+=1
prob_pos.append(preds.flatten())
predicted_curve.append(pred)
sum_neg = neg_neg + neg_pos
sum_pos = pos_neg + pos_pos
print(neg_neg, neg_pos, pos_neg, pos_pos)
print(neg_neg/sum_neg, pos_pos/sum_pos)
print((neg_neg+ pos_pos) / ((sum_neg+ sum_pos)))
32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step 3036 6 10 20 0.9980276134122288 0.6666666666666666 0.9947916666666666
X_test.shape
(3, 1024, 7, 6)
y_test = y_test.flatten()
y_test.shape
(3072,)
prob_pos = []
for batch_index in range(X_test.shape[0]):
prob_pos.append(model.predict(X_test[batch_index,:,:,:]))
prob_pos_train = []
for batch_index in range(x_train.shape[0]):
prob_pos_train .append(model.predict(x_train[batch_index,:,:,:]))
32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step 32/32 [==============================] - 0s 1ms/step
prob_pos = np.array(prob_pos).flatten()
prob_pos_train = np.array(prob_pos_train).flatten()
y_test.shape
(3072,)
import matplotlib.pyplot as plt
import numpy as np
# Create a color array based on TP, FP, TN, FN
colors = []
for i in range(len(label_curve)):
label = label_curve[i]
pred = predicted_curve[i]
if label > 0 and pred >= 0.5: # TP: Actual positive and predicted as positive
colors.append('green')
elif label == 0 and pred < 0.5: # TN: Actual negative and predicted as negative
colors.append('blue')
elif label == 0 and pred >= 0.5: # FP: Actual negative but predicted as positive
colors.append('red')
else: # FN: Actual positive but predicted as negative
colors.append('orange')
# Convert colors to a numpy array
colors = np.array(colors)
# Create a scatter plot of main_curve with color-coding based on prediction correctness
plt.scatter(range(len(main_curve)), main_curve, c=colors, s=15) # s parameter controls the size of dots
plt.plot(range(len(main_curve)), main_curve, color='black', linewidth=0.5) # plot the line
# Title and labels
plt.title('Example Signal with Prediction Outcomes')
plt.xlabel('Time Step (index)')
plt.ylabel('Example Signal')
# Setting the x limit
plt.xlim([400, 600])
# Legend
plt.scatter([], [], color='green', label='True Positive', s=15)
plt.scatter([], [], color='blue', label='True Negative', s=15)
plt.scatter([], [], color='red', label='False Positive', s=15)
plt.scatter([], [], color='orange', label='False Negative', s=15)
plt.legend()
plt.savefig('Example_plot_'+station+'_'+sign+'.png')
plt.show()
from sklearn.linear_model import LogisticRegression
raw_probabilities = prob_pos_train.reshape(-1, 1)
# Fit the Logistic Regression to calibrate the probabilities
lr = LogisticRegression().fit(raw_probabilities, y_train.flatten())
calibrated_probabilities = lr.predict_proba(prob_pos.reshape(-1, 1)) # Calibrate probabilities
prob_pos = calibrated_probabilities
prob_pos = prob_pos[:,1]
y_pred = (prob_pos > 0.5).astype(int)
import seaborn as sns
sns.set_style("white")
from sklearn.calibration import calibration_curve
fraction_of_positives, mean_predicted_value = calibration_curve(y_test, prob_pos, n_bins=10)
from sklearn.metrics import roc_curve, roc_auc_score
# Calculate the ROC curve points
fpr, tpr, thresholds = roc_curve(y_test, prob_pos)
# Calculate the AUROC score
auroc = roc_auc_score(y_test, prob_pos)
# Figure 1 for Calibration curve and Histogram
fig1 = plt.figure(figsize=(10, 8)) # Set the overall figure size
gs1 = plt.GridSpec(2, 1, height_ratios=[1, 1]) # Set height ratios to 1:1 for equal vertical size
# Calibration curve
ax1 = plt.subplot(gs1[0])
ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")
ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s" % 'LSTM-Classifer', color='blue')
ax1.set_xlim([0, 1]) # Set x range to 0 to 1
ax1.set_ylim([0, 1]) # Set y range to 0 to 1
ax1.set_ylabel("Fraction of positives")
ax1.set_title('Calibration plot of LSTM classifer for '+sign+ ' ('+station+')')
ax1.legend(loc="upper left")
ax1.xaxis.tick_top() # Move x-axis labels to top
ax1.xaxis.set_label_position('top') # Move x-axis label to top
# Position legend of plot 1 on x-axis
handles, labels = ax1.get_legend_handles_labels()
ax1.legend(handles, labels, ncol=2)
# Histogram as an "inset" plot
ax2 = plt.subplot(gs1[1], sharex=ax1) # Sharing x-axis with first plot
hist, bins = np.histogram(prob_pos, bins=10, range=(0, 1))
width = (bins[1] - bins[0]) * 0.8
centers = (bins[:-1] + bins[1:]) / 2
ax2.bar(centers, hist, align='center', width=width, alpha=0.5, color='green', label='Histogram')
ax2.set_xlabel("Predicted probability") # Add x-axis label
ax2.set_ylabel("Number of cases")
ax2.yaxis.set_major_locator(plt.MaxNLocator(5))
plt.subplots_adjust(hspace=0)
plt.savefig('Calibration_Histogram_'+station+'_'+sign+'.png')
plt.show()
import numpy as np
from sklearn.metrics import roc_auc_score, recall_score, precision_score
from sklearn.utils import resample
# Define the number of bootstrap samples
n_bootstrap = 2000
# Create empty lists to store bootstrapped metrics
sensitivity_values = []
specificity_values = []
ppv_values = []
npv_values = []
auroc_values = []
# Perform bootstrap resampling
for _ in range(n_bootstrap):
# Sample with replacement from the predictions
indices = resample(range(len(y_pred)), replace=True)
bootstrap_y_test = y_test[indices]
bootstrap_y_pred = y_pred[indices]
bootstrap_prob_pos = prob_pos[indices]
# Calculate metrics for the bootstrap sample
sensitivity = recall_score(bootstrap_y_test, bootstrap_y_pred)
specificity = recall_score(bootstrap_y_test, bootstrap_y_pred, pos_label=0)
ppv = precision_score(bootstrap_y_test, bootstrap_y_pred)
npv = precision_score(bootstrap_y_test, bootstrap_y_pred, pos_label=0)
auroc = roc_auc_score(bootstrap_y_test, bootstrap_prob_pos)
# Append the metric values to the lists
sensitivity_values.append(sensitivity)
specificity_values.append(specificity)
ppv_values.append(ppv)
npv_values.append(npv)
auroc_values.append(auroc)
# Calculate confidence intervals
confidence_interval = 0.95
alpha = (1 - confidence_interval) / 2
lower_percentile = alpha * 100
upper_percentile = (1 - alpha) * 100
# Calculate confidence intervals for each metric
sensitivity_ci = np.percentile(sensitivity_values, [lower_percentile, upper_percentile])
specificity_ci = np.percentile(specificity_values, [lower_percentile, upper_percentile])
ppv_ci = np.percentile(ppv_values, [lower_percentile, upper_percentile])
npv_ci = np.percentile(npv_values, [lower_percentile, upper_percentile])
auroc_ci = np.percentile(auroc_values, [lower_percentile, upper_percentile])
with open("metrics_"+station+"_"+sign+".txt", "w") as file:
file.write("Sensitivity: {:.3f} ({:.3f}-{:.3f})\n".format(sensitivity, sensitivity_ci[0], sensitivity_ci[1]))
file.write("Specificity: {:.3f} ({:.3f}-{:.3f})\n".format(specificity, specificity_ci[0], specificity_ci[1]))
file.write("Positive Predictive Value (PPV): {:.3f} ({:.3f}-{:.3f})\n".format(ppv, ppv_ci[0], ppv_ci[1]))
file.write("Negative Predictive Value (NPV): {:.3f} ({:.3f}-{:.3f})\n".format(npv, npv_ci[0], npv_ci[1]))
file.write("Area Under the ROC Curve (AUROC): {:.3f} ({:.3f}-{:.3f})".format(auroc, auroc_ci[0], auroc_ci[1]))
from sklearn.metrics import roc_curve, auc
from numpy import interp
n_bootstraps = 2000
rng_seed = 42 # control reproducibility
bootstrapped_tprs = []
bootstrapped_auc = []
base_fpr = np.linspace(0, 1, 101) # Fix the FPR values for interpolation
rng = np.random.RandomState(rng_seed)
for i in range(n_bootstraps):
# bootstrap by sampling with replacement on the prediction indices
indices = rng.randint(0, len(prob_pos), len(prob_pos))
if len(np.unique(y_test[indices])) < 2:
# We need at least one positive and one negative sample for ROC AUC
# to be defined: reject the sample
continue
fpr_boot, tpr_boot, _ = roc_curve(y_test[indices], prob_pos[indices])
bootstrapped_auc.append(auc(fpr_boot, tpr_boot))
# Interpolate TPR for the fixed FPR values
tpr_boot_interp = interp(base_fpr, fpr_boot, tpr_boot)
bootstrapped_tprs.append(tpr_boot_interp)
# Convert list to a 2D array
bootstrapped_tprs = np.array(bootstrapped_tprs)
# Compute the lower and upper bound of the ROC curve
tprs_lower = np.percentile(bootstrapped_tprs, 2.5, axis=0)
tprs_upper = np.percentile(bootstrapped_tprs, 97.5, axis=0)
# Compute the lower and upper bound of the ROC AUC
auc_lower = np.percentile(bootstrapped_auc, 2.5)
auc_upper = np.percentile(bootstrapped_auc, 97.5)
print("Confidence interval for the ROC curve: [{:0.3f} - {:0.3}]".format(auc_lower, auc_upper))
Confidence interval for the ROC curve: [0.840 - 0.979]
fig2 = plt.figure(figsize=(10, 6))
specificity = 1 - base_fpr # Calculate Specificity (1 - False Positive Rate)
ax3 = plt.subplot(1, 1, 1)
ax3.plot([1, 0], [0, 1], 'k--') # Invert x-axis
ax3.plot(specificity, np.median(bootstrapped_tprs, axis=0), label=r'ROC (AUC = %0.2f $\pm$ %0.2f)' % (auroc, (auc_upper-auc_lower)/2), color='blue')
ax3.fill_between(specificity, tprs_lower, tprs_upper, color='grey', alpha=0.2)
ax3.set_xlabel('Specificity (True Negative Rate)')
ax3.set_ylabel('Sensitivity (True Positive Rate)')
ax3.set_title('Receiver Operating Characteristic for '+sign+ ' ('+station+')')
ax3.legend(loc='lower right')
ax3.set_xlim([1, 0]) # Invert x-axis limits
ax3.set_ylim([0, 1])
ax3.set_xticks([1,0.8,0.6,0.4,0.2,0]) # Set x-axis tick locations
ax3.set_xticklabels([1,0.8,0.6,0.4,0.2,0]) # Set x-axis tick labels
plt.tight_layout()
plt.savefig('ROC_Curve_'+station+'_'+sign+'.png')
plt.show()