import numpy as np
import kaldi_native_fbank as knf
import matplotlib.pyplot as plt
from scipy.io import wavfile


def compute_fbank(samples: np.ndarray, sample_rate: int) -> np.ndarray:
    opts = knf.FbankOptions()
    opts.frame_opts.dither = 0.0
    opts.frame_opts.samp_freq = sample_rate
    opts.frame_opts.snip_edges = True
    opts.frame_opts.frame_length_ms = 25.0
    opts.frame_opts.frame_shift_ms = 10.0
    opts.mel_opts.num_bins = 80
    opts.mel_opts.debug_mel = False

    fbank = knf.OnlineFbank(opts)
    fbank.accept_waveform(sample_rate, samples.tolist())
    fbank.input_finished()

    num_frames = fbank.num_frames_ready
    features = []
    for i in range(num_frames):
        frame = fbank.get_frame(i)
        features.append(frame)
    features = np.stack(features, axis=0)

    # Apply Cepstral Mean Normalization (CMN)
    features = features - np.mean(features, axis=0)

    return features


def main():
    # Load the wave file
    file_path = "../testdata/jfk_f32le.wav"  # Specify the path to your wave file
    sample_rate, samples = wavfile.read(file_path)

    # Ensure samples are in the correct format
    samples = samples.astype(np.float32)
    if len(samples.shape) > 1:
        samples = samples[:, 0]  # Take the first channel if stereo

    # Compute filterbank features
    features = compute_fbank(samples, sample_rate)
    features = features.T
    np.savez("./testdata/kaldi_native_fbank_jfk.npz", features=features)
    print("Python Mel spectrogram shape:", features.shape)

    # Plot the spectrogram
    plt.figure(figsize=(10, 4))
    plt.imshow(features, aspect="auto", origin="lower", cmap="viridis")
    plt.title("Mel Spectrogram")
    plt.ylabel("Mel Filter Banks")
    plt.xlabel("Frame (Time)")
    plt.colorbar(label="Magnitude (dB)")
    plt.show()


if __name__ == "__main__":
    main()