#include <Arduino.h> #include <Adafruit_NeoPixel.h> #include "driver/i2s.h" #include <arduinoFFT.h> // ===== Sparrow audio pins (ICS-43434 I2S mic) ===== static const int I2S_WS_PIN = 20; // LRCLK / WS static const int I2S_SCK_PIN = 18; // BCLK static const int I2S_SD_PIN = 19; // DOUT from mic // ===== LED ===== static const int NEOPIXEL_PIN = 3; Adafruit_NeoPixel pixel(1, NEOPIXEL_PIN, NEO_GRB + NEO_KHZ800); // ===== Audio / DSP params ===== static const int SAMPLE_RATE = 16000; // 16 kHz mic rate static const int N_FFT = 1024; // 64 ms window static const int HOP_SAMPLES = N_FFT/2; // 50% overlap (optional) static const int N_BANDS = 16; // log-spaced bands up to Nyquist static const int BOOTSTRAP_W = 40; // ~2–3 s of audio given overlap static const int K_CLUSTERS = 4; // small K for "normal" modes static const float ANOM_MULT = 3.0f; // thresh = median + ANOM_MULT*MAD // ===== Buffers ===== static int16_t pcm[N_FFT]; static float win[N_FFT]; static double vReal[N_FFT]; static double vImag[N_FFT]; arduinoFFT FFT = arduinoFFT(vReal, vImag, N_FFT, SAMPLE_RATE); // ===== Feature vector: 16 band log-energies + 2 spectral stats (optional) ===== static const int FEAT_DIM = N_BANDS + 2; // ===== Simple utilities ===== static inline int16_t convert_24_to_16(int32_t s32) { // 24-bit sample left-justified in 32-bit slot -> scale down return (int16_t)(s32 >> 11); // tweak shift if amplitude looks off } void i2s_init() { i2s_config_t cfg = { .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX), .sample_rate = SAMPLE_RATE, .bits_per_sample = I2S_BITS_PER_SAMPLE_32BIT, .channel_format = I2S_CHANNEL_FMT_ONLY_LEFT, .communication_format = I2S_COMM_FORMAT_STAND_I2S, .intr_alloc_flags = 0, .dma_buf_count = 4, .dma_buf_len = 1024, .use_apll = false, .tx_desc_auto_clear = false, .fixed_mclk = 0 }; i2s_pin_config_t pins = { .bck_io_num = I2S_SCK_PIN, .ws_io_num = I2S_WS_PIN, .data_out_num = I2S_PIN_NO_CHANGE, .data_in_num = I2S_SD_PIN }; i2s_driver_install(I2S_NUM_0, &cfg, 0, nullptr); i2s_set_pin(I2S_NUM_0, &pins); i2s_zero_dma_buffer(I2S_NUM_0); } void make_hann() { for (int i = 0; i < N_FFT; i++) { win[i] = 0.5f * (1.0f - cosf(2.0f * PI * i / (N_FFT - 1))); } } // Log-spaced frequency edges for bands void calc_band_edges(int (&edges)[N_BANDS+1]) { float fmin = 50.0f; // ignore DC / very low float fmax = SAMPLE_RATE / 2.0f; // Nyquist for (int b=0; b<=N_BANDS; b++) { float t = (float)b / (float)N_BANDS; float f = fmin * powf(fmax / fmin, t); int bin = (int)roundf(f * N_FFT / SAMPLE_RATE); if (bin < 1) bin = 1; if (bin > N_FFT/2) bin = N_FFT/2; edges[b] = bin; } // ensure strictly increasing for (int b=1; b<=N_BANDS; b++) if (edges[b] <= edges[b-1]) edges[b] = edges[b-1] + 1; edges[N_BANDS] = min(edges[N_BANDS], N_FFT/2); } bool capture_frame_blocking() { // Read N_FFT mono samples int filled = 0; while (filled < N_FFT) { int32_t tmp[256]; size_t br = 0; if (i2s_read(I2S_NUM_0, (void*)tmp, sizeof(tmp), &br, portMAX_DELAY) != ESP_OK) return false; int got = br / sizeof(int32_t); for (int i=0; i<got && filled < N_FFT; i++) { pcm[filled++] = convert_24_to_16(tmp[i]); } } return true; } // Compute feature vector into out[FEAT_DIM] void extract_features(float *out) { // window -> FFT for (int i=0; i<N_FFT; i++) { vReal[i] = (double)((float)pcm[i] * win[i]); vImag[i] = 0.0; } FFT.Windowing(FFT_WIN_TYP_RECTANGLE, FFT_FORWARD); // already applied Hann, so RECT here FFT.Compute(FFT_FORWARD); FFT.ComplexToMagnitude(); // magnitude spectrum is in vReal[0..N_FFT/2] int edges[N_BANDS+1]; calc_band_edges(edges); // band log-energies for (int b=0; b<N_BANDS; b++) { int s = edges[b], e = edges[b+1]; double sum = 0.0; for (int k=s; k<e; k++) { double m = vReal[k]; sum += m*m; // power } float energy = (float)sum + 1e-9f; out[b] = logf(energy); } // spectral centroid & rolloff (0.85) double num=0.0, den=0.0; for (int k=1; k<=N_FFT/2; k++) { double f = (double)k * SAMPLE_RATE / N_FFT; double p = vReal[k]*vReal[k]; num += f * p; den += p; } float centroid = (den>0) ? (float)(num/den) : 0.0f; double cumulative = 0.0, target = 0.85 * den; float rolloff = 0.0f; for (int k=1; k<=N_FFT/2; k++) { cumulative += vReal[k]*vReal[k]; if (cumulative >= target) { rolloff = (float)k * SAMPLE_RATE / N_FFT; break; } } out[N_BANDS] = centroid; out[N_BANDS+1] = rolloff; } // ===== Stats helpers ===== struct OnlineMeanStd { double mean=0, M2=0; int n=0; void add(double x){ n++; double d=x-mean; mean += d/n; M2 += d*(x-mean); } double std() const { return (n>1) ? sqrt(M2/(n-1)) : 1.0; } }; // ===== K-means (from scratch) on standardized features ===== struct KMeans { int k; float centroids[K_CLUSTERS][FEAT_DIM]; bool initialized=false; KMeans(int kk):k(kk){} static float dist2(const float *a, const float *b){ float d=0; for(int i=0;i<FEAT_DIM;i++){ float t=a[i]-b[i]; d+=t*t; } return d; } int assign(const float *x) const { int best=0; float bd=dist2(x, centroids[0]); for(int c=1;c<k;c++){ float d=dist2(x,centroids[c]); if(d<bd){bd=d; best=c;} } return best; } void init_plus_plus(const float *X, int n){ // pick first at random int pick = random(n); memcpy(centroids[0], &X[pick*FEAT_DIM], sizeof(float)*FEAT_DIM); // others by D^2 for(int c=1;c<k;c++){ // compute distances double sumD=0; for(int i=0;i<n;i++){ float bestd = dist2(&X[i*FEAT_DIM], centroids[0]); for(int j=1;j<c;j++){ float d=dist2(&X[i*FEAT_DIM], centroids[j]); if(d<bestd) bestd=d; } sumD += bestd; } double r = (random(0,1000000)/1000000.0) * sumD; double acc=0; int idx=0; for(; idx<n; idx++){ float bestd = dist2(&X[idx*FEAT_DIM], centroids[0]); for(int j=1;j<c;j++){ float d=dist2(&X[idx*FEAT_DIM], centroids[j]); if(d<bestd) bestd=d; } acc += bestd; if(acc >= r) break; } if (idx>=n) idx=n-1; memcpy(centroids[c], &X[idx*FEAT_DIM], sizeof(float)*FEAT_DIM); } initialized=true; } void fit(const float *X, int n, int iters=10){ if(!initialized) init_plus_plus(X,n); // assignment buffer int *as = (int*)malloc(sizeof(int)*n); for(int it=0; it<iters; it++){ // assign for(int i=0;i<n;i++) as[i]=assign(&X[i*FEAT_DIM]); // recompute float sums[K_CLUSTERS][FEAT_DIM]; int cnt[K_CLUSTERS]; for(int c=0;c<k;c++){ memset(sums[c],0,sizeof(sums[c])); cnt[c]=0; } for(int i=0;i<n;i++){ int c=as[i]; cnt[c]++; for(int j=0;j<FEAT_DIM;j++) sums[c][j]+=X[i*FEAT_DIM+j]; } for(int c=0;c<k;c++){ if(cnt[c]>0){ for(int j=0;j<FEAT_DIM;j++) centroids[c][j]=sums[c][j]/cnt[c]; } } } free(as); } }; static KMeans kmeans(K_CLUSTERS); // Standardization (z-score) learned from bootstrap static float feat_mean[FEAT_DIM], feat_std[FEAT_DIM]; // Anomaly scaling using distances in bootstrap static float dist_median=0.0f, dist_mad=1.0f; float median_of(float *arr, int n){ // simple partial sort for(int i=0;i<n-1;i++){ for(int j=i+1;j<n;j++) if(arr[j]<arr[i]) { float t=arr[i]; arr[i]=arr[j]; arr[j]=t; } } return (n%2) ? arr[n/2] : 0.5f*(arr[n/2-1]+arr[n/2]); } void standardize(const float *x, float *z){ for(int i=0;i<FEAT_DIM;i++){ float s = (feat_std[i] > 1e-6f) ? feat_std[i] : 1.0f; z[i] = (x[i] - feat_mean[i]) / s; } } void online_led(float anomaly){ // green normal, red anomaly bool isAnom = (anomaly > (dist_median + ANOM_MULT * dist_mad)); pixel.setPixelColor(0, pixel.Color(isAnom ? 255:0, isAnom ? 0:255, 0)); pixel.show(); } void setup() { Serial.begin(115200); delay(200); pixel.begin(); pixel.clear(); pixel.show(); i2s_init(); make_hann(); Serial.println("Audio anomaly (no SDK) — bootstrapping normal audio..."); } void loop() { static bool model_ready=false; static int collected=0; static float Xbuf[BOOTSTRAP_W * FEAT_DIM]; // feature windows static float Zbuf[BOOTSTRAP_W * FEAT_DIM]; // standardized features (after we learn stats) static float Dbuf[BOOTSTRAP_W]; // distances for MAD // Capture one frame (with optional hop/overlap) if (!capture_frame_blocking()) return; // DSP → features float feat[FEAT_DIM]; extract_features(feat); if (!model_ready) { // Accumulate bootstrap features (raw) if (collected < BOOTSTRAP_W) { memcpy(&Xbuf[collected*FEAT_DIM], feat, sizeof(float)*FEAT_DIM); collected++; Serial.printf("bootstrap %d/%d\n", collected, BOOTSTRAP_W); } if (collected >= BOOTSTRAP_W) { // Compute per-feature mean/std for (int j=0;j<FEAT_DIM;j++){ OnlineMeanStd s; for(int i=0;i<BOOTSTRAP_W;i++) s.add(Xbuf[i*FEAT_DIM+j]); feat_mean[j] = (float)s.mean; feat_std[j] = (float)s.std(); } // Standardize and fit K-means for (int i=0;i<BOOTSTRAP_W;i++){ standardize(&Xbuf[i*FEAT_DIM], &Zbuf[i*FEAT_DIM]); } kmeans.init_plus_plus(Zbuf, BOOTSTRAP_W); kmeans.fit(Zbuf, BOOTSTRAP_W, 12); // Distances to nearest centroid on bootstrap → median/MAD for (int i=0;i<BOOTSTRAP_W;i++){ int c = kmeans.assign(&Zbuf[i*FEAT_DIM]); float d = KMeans::dist2(&Zbuf[i*FEAT_DIM], kmeans.centroids[c]); Dbuf[i] = sqrtf(d); } // median float tmp[BOOTSTRAP_W]; memcpy(tmp, Dbuf, sizeof(tmp)); dist_median = median_of(tmp, BOOTSTRAP_W); // MAD = median(|d - median|) for (int i=0;i<BOOTSTRAP_W;i++) tmp[i] = fabsf(Dbuf[i] - dist_median); dist_mad = median_of(tmp, BOOTSTRAP_W) + 1e-6f; model_ready = true; Serial.printf("Model ready. median=%.3f MAD=%.3f (thresh≈%.3f)\n", dist_median, dist_mad, dist_median + ANOM_MULT*dist_mad); } } else { // Standardize current feature, compute anomaly float z[FEAT_DIM]; standardize(feat, z); int c = kmeans.assign(z); float d = sqrtf(KMeans::dist2(z, kmeans.centroids[c])); float thresh = dist_median + ANOM_MULT * dist_mad; bool isAnom = (d > thresh); // gentle online centroid update on non-anomalous frames if (!isAnom) { const float alpha = 0.05f; for (int j=0;j<FEAT_DIM;j++) { kmeans.centroids[c][j] = (1-alpha)*kmeans.centroids[c][j] + alpha*z[j]; } } Serial.printf("dist=%.3f thresh≈%.3f %s\n", d, thresh, isAnom?"ANOMALY":"ok"); online_led(d); } // crude 50% overlap: reuse last half by shifting; here we just sleep to approximate hop // delay((HOP_SAMPLES * 1000) / SAMPLE_RATE); }