Recent changes

import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
 
IDLE_CSV  = "raw_data_label0.csv"
SHAKE_CSV = "raw_data_label1.csv"
 
WINDOW_SIZE = 50   # 0.5s window @ ~100 Hz
STEP_SIZE   = 25   # 50% overlap
 
MODEL_HEADER_OUT = "include/model.h"
 
def load_and_merge():
    df0 = pd.read_csv(IDLE_CSV)
    df1 = pd.read_csv(SHAKE_CSV)
 
    # Just sanity: enforce int label for safety
    df0["label"] = 0
    df1["label"] = 1
 
    df = pd.concat([df0, df1], ignore_index=True)
    # Sort by timestamp is not strictly required, but helps consistent windows
    df = df.sort_values(by="timestamp_ms").reset_index(drop=True)
    return df
 
def extract_features(df):
    """
    df columns: timestamp_ms, ax_g, ay_g, az_g, label
    We'll slide windows, compute features, and assign the majority label.
    """
    feats = []
    ax = df["ax_g"].to_numpy()
    ay = df["ay_g"].to_numpy()
    az = df["az_g"].to_numpy()
    lbl = df["label"].to_numpy()
 
    mag = np.sqrt(ax**2 + ay**2 + az**2)
 
    n = len(df)
    idx = 0
    while idx + WINDOW_SIZE <= n:
        sl = slice(idx, idx+WINDOW_SIZE)
        win_mag = mag[sl]
        win_lbl = lbl[sl]
 
        mean_mag = np.mean(win_mag)
        std_mag  = np.std(win_mag)
        p2p_mag  = np.max(win_mag) - np.min(win_mag)
 
        # majority/avg label in this window
        label_window = int(round(np.mean(win_lbl)))
 
        feats.append({
            "mean_mag": mean_mag,
            "std_mag":  std_mag,
            "p2p_mag":  p2p_mag,
            "label":    label_window
        })
 
        idx += STEP_SIZE
 
    return pd.DataFrame(feats)
 
def main():
    df = load_and_merge()
    feats = extract_features(df)
 
    X = feats[["mean_mag", "std_mag", "p2p_mag"]].to_numpy()
    y = feats["label"].to_numpy()
 
    clf = LogisticRegression()
    clf.fit(X, y)
 
    y_pred = clf.predict(X)
    print(classification_report(y, y_pred))
 
    w = clf.coef_[0]      # [w1, w2, w3]
    b = clf.intercept_[0] # bias
 
    print("Weights:", w)
    print("Bias:", b)
 
    with open(MODEL_HEADER_OUT, "w") as f:
        f.write("// Auto-generated by train_model.py\n")
        f.write("#pragma once\n\n")
        f.write("struct ShakeModel {\n")
        f.write("    static constexpr float W0 = %.8ff;\n" % b)
        f.write("    static constexpr float W1 = %.8ff;\n" % w[0])
        f.write("    static constexpr float W2 = %.8ff;\n" % w[1])
        f.write("    static constexpr float W3 = %.8ff;\n" % w[2])
        f.write("};\n\n")
        f.write("// Feature extraction params\n")
        f.write("#define WINDOW_SIZE %d\n" % WINDOW_SIZE)
        f.write("#define STEP_SIZE   %d\n" % STEP_SIZE)
 
if __name__ == "__main__":
    main()