|
| 1 | +import numpy as np |
| 2 | +import matplotlib.pyplot as plt |
| 3 | +import scipy.stats as stats |
| 4 | +from keras.src.saving import load_model |
| 5 | + |
| 6 | + |
| 7 | +autoencoder = load_model("C:/Users/Marco/autoencoder_model.keras") |
| 8 | + |
| 9 | +val_data = np.load("C:/Users/Marco/test_data.npy") |
| 10 | + |
| 11 | +reconstructions = autoencoder.predict(val_data) |
| 12 | + |
| 13 | +errors = np.mean(np.abs(val_data - reconstructions), axis=1) |
| 14 | + |
| 15 | +plt.figure(figsize=(10, 5)) |
| 16 | +plt.hist(errors, bins=30, alpha=0.7, color="blue", edgecolor="black", density=True) |
| 17 | + |
| 18 | +mu, std = np.mean(errors), np.std(errors) |
| 19 | +xmin, xmax = plt.xlim() |
| 20 | +x = np.linspace(xmin, xmax, 100) |
| 21 | +p = stats.norm.pdf(x, mu, std) |
| 22 | +plt.plot(x, p, "r", linewidth=2, label=f"Normal Dist (mean={mu:.4f}, stdDeviation={std:.4f})") |
| 23 | + |
| 24 | +plt.axvline(mu + 3 * std, color="red", linestyle="dashed", linewidth=2, label="mean+ 3deviations Threshold") |
| 25 | +plt.axvline(mu + 2 * std, color="orange", linestyle="dashed", linewidth=2, label="mean + 2deviations Threshold") |
| 26 | + |
| 27 | +plt.title("Histogram of Reconstruction Errors") |
| 28 | +plt.xlabel("Reconstruction Error") |
| 29 | +plt.ylabel("Density") |
| 30 | +plt.legend() |
| 31 | +plt.grid() |
| 32 | +plt.show() |
| 33 | + |
| 34 | + |
| 35 | +# test for normality |
| 36 | +shapiro_test = stats.shapiro(errors) |
| 37 | +ks_test = stats.kstest(errors, "norm", args=(mu, std)) |
| 38 | + |
| 39 | +print(f"Shapiro-Wilk Test p-value: {shapiro_test.pvalue:.4f}") |
| 40 | +print(f"Kolmogorov-Smirnov Test p-value: {ks_test.pvalue:.4f}") |
| 41 | + |
| 42 | +# Decision Rule: If p value < 0.05, then the data is not normally distributed |
| 43 | +if shapiro_test.pvalue < 0.05 or ks_test.pvalue < 0.05: |
| 44 | + print("Since the reconstruction errors don't follow a normal distribution we have to use te percentile based approach.") |
| 45 | + threshold = np.percentile(errors, 95) # 95th percentile |
| 46 | + print(f"* Using 95th Percentile Threshold: {threshold:.4f}") |
| 47 | +else: |
| 48 | + print("The reconstruction errors follow a normal distribution, therefore mean + 3deviations is a valid thresholding method.") |
| 49 | + threshold = mu + 3 * std |
| 50 | + print(f"* Using Mean + 3 Std Dev Threshold: {threshold:.4f}") |
| 51 | + |
| 52 | +np.save("C:/Users/Marco/reconstruction_errors.npy", errors) |
| 53 | +print("Reconstruction errors saved to reconstruction_errors.npy") |
0 commit comments