#include #include #include "driver/i2s.h" #include // ===== Sparrow audio pins (ICS-43434 I2S mic) ===== static const int I2S_WS_PIN = 20; // LRCLK / WS static const int I2S_SCK_PIN = 18; // BCLK static const int I2S_SD_PIN = 19; // DOUT from mic // ===== LED ===== static const int NEOPIXEL_PIN = 3; Adafruit_NeoPixel pixel(1, NEOPIXEL_PIN, NEO_GRB + NEO_KHZ800); // ===== Audio / DSP params ===== static const int SAMPLE_RATE = 16000; // 16 kHz mic rate static const int N_FFT = 1024; // 64 ms window static const int HOP_SAMPLES = N_FFT/2; // 50% overlap (optional) static const int N_BANDS = 16; // log-spaced bands up to Nyquist static const int BOOTSTRAP_W = 40; // ~2–3 s of audio given overlap static const int K_CLUSTERS = 4; // small K for "normal" modes static const float ANOM_MULT = 3.0f; // thresh = median + ANOM_MULT*MAD // ===== Buffers ===== static int16_t pcm[N_FFT]; static float win[N_FFT]; static double vReal[N_FFT]; static double vImag[N_FFT]; arduinoFFT FFT = arduinoFFT(vReal, vImag, N_FFT, SAMPLE_RATE); // ===== Feature vector: 16 band log-energies + 2 spectral stats (optional) ===== static const int FEAT_DIM = N_BANDS + 2; // ===== Simple utilities ===== static inline int16_t convert_24_to_16(int32_t s32) { // 24-bit sample left-justified in 32-bit slot -> scale down return (int16_t)(s32 >> 11); // tweak shift if amplitude looks off } void i2s_init() { i2s_config_t cfg = { .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX), .sample_rate = SAMPLE_RATE, .bits_per_sample = I2S_BITS_PER_SAMPLE_32BIT, .channel_format = I2S_CHANNEL_FMT_ONLY_LEFT, .communication_format = I2S_COMM_FORMAT_STAND_I2S, .intr_alloc_flags = 0, .dma_buf_count = 4, .dma_buf_len = 1024, .use_apll = false, .tx_desc_auto_clear = false, .fixed_mclk = 0 }; i2s_pin_config_t pins = { .bck_io_num = I2S_SCK_PIN, .ws_io_num = I2S_WS_PIN, .data_out_num = I2S_PIN_NO_CHANGE, .data_in_num = I2S_SD_PIN }; i2s_driver_install(I2S_NUM_0, &cfg, 0, nullptr); i2s_set_pin(I2S_NUM_0, &pins); i2s_zero_dma_buffer(I2S_NUM_0); } void make_hann() { for (int i = 0; i < N_FFT; i++) { win[i] = 0.5f * (1.0f - cosf(2.0f * PI * i / (N_FFT - 1))); } } // Log-spaced frequency edges for bands void calc_band_edges(int (&edges)[N_BANDS+1]) { float fmin = 50.0f; // ignore DC / very low float fmax = SAMPLE_RATE / 2.0f; // Nyquist for (int b=0; b<=N_BANDS; b++) { float t = (float)b / (float)N_BANDS; float f = fmin * powf(fmax / fmin, t); int bin = (int)roundf(f * N_FFT / SAMPLE_RATE); if (bin < 1) bin = 1; if (bin > N_FFT/2) bin = N_FFT/2; edges[b] = bin; } // ensure strictly increasing for (int b=1; b<=N_BANDS; b++) if (edges[b] <= edges[b-1]) edges[b] = edges[b-1] + 1; edges[N_BANDS] = min(edges[N_BANDS], N_FFT/2); } bool capture_frame_blocking() { // Read N_FFT mono samples int filled = 0; while (filled < N_FFT) { int32_t tmp[256]; size_t br = 0; if (i2s_read(I2S_NUM_0, (void*)tmp, sizeof(tmp), &br, portMAX_DELAY) != ESP_OK) return false; int got = br / sizeof(int32_t); for (int i=0; i FFT for (int i=0; i0) ? (float)(num/den) : 0.0f; double cumulative = 0.0, target = 0.85 * den; float rolloff = 0.0f; for (int k=1; k<=N_FFT/2; k++) { cumulative += vReal[k]*vReal[k]; if (cumulative >= target) { rolloff = (float)k * SAMPLE_RATE / N_FFT; break; } } out[N_BANDS] = centroid; out[N_BANDS+1] = rolloff; } // ===== Stats helpers ===== struct OnlineMeanStd { double mean=0, M2=0; int n=0; void add(double x){ n++; double d=x-mean; mean += d/n; M2 += d*(x-mean); } double std() const { return (n>1) ? sqrt(M2/(n-1)) : 1.0; } }; // ===== K-means (from scratch) on standardized features ===== struct KMeans { int k; float centroids[K_CLUSTERS][FEAT_DIM]; bool initialized=false; KMeans(int kk):k(kk){} static float dist2(const float *a, const float *b){ float d=0; for(int i=0;i= r) break; } if (idx>=n) idx=n-1; memcpy(centroids[c], &X[idx*FEAT_DIM], sizeof(float)*FEAT_DIM); } initialized=true; } void fit(const float *X, int n, int iters=10){ if(!initialized) init_plus_plus(X,n); // assignment buffer int *as = (int*)malloc(sizeof(int)*n); for(int it=0; it0){ for(int j=0;j 1e-6f) ? feat_std[i] : 1.0f; z[i] = (x[i] - feat_mean[i]) / s; } } void online_led(float anomaly){ // green normal, red anomaly bool isAnom = (anomaly > (dist_median + ANOM_MULT * dist_mad)); pixel.setPixelColor(0, pixel.Color(isAnom ? 255:0, isAnom ? 0:255, 0)); pixel.show(); } void setup() { Serial.begin(115200); delay(200); pixel.begin(); pixel.clear(); pixel.show(); i2s_init(); make_hann(); Serial.println("Audio anomaly (no SDK) — bootstrapping normal audio..."); } void loop() { static bool model_ready=false; static int collected=0; static float Xbuf[BOOTSTRAP_W * FEAT_DIM]; // feature windows static float Zbuf[BOOTSTRAP_W * FEAT_DIM]; // standardized features (after we learn stats) static float Dbuf[BOOTSTRAP_W]; // distances for MAD // Capture one frame (with optional hop/overlap) if (!capture_frame_blocking()) return; // DSP → features float feat[FEAT_DIM]; extract_features(feat); if (!model_ready) { // Accumulate bootstrap features (raw) if (collected < BOOTSTRAP_W) { memcpy(&Xbuf[collected*FEAT_DIM], feat, sizeof(float)*FEAT_DIM); collected++; Serial.printf("bootstrap %d/%d\n", collected, BOOTSTRAP_W); } if (collected >= BOOTSTRAP_W) { // Compute per-feature mean/std for (int j=0;j thresh); // gentle online centroid update on non-anomalous frames if (!isAnom) { const float alpha = 0.05f; for (int j=0;j