From 4bd2ac78ed8d8887857eacfa10a30070b634b174 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 19 Sep 2025 05:37:54 +1000 Subject: [PATCH] sprint2:NLP baselines (BoW/TF-IDF + LogReg/SVM) --- .../Emotional_baseline/emotional_baseline.py | 384 ++++++++++++++++++ .../out_final/confusion_matrix.png | Bin 0 -> 20786 bytes .../out_final/feature_preview.txt | 85 ++++ .../out_final/lexicon_preview.csv | 6 + .../out_final/settings.json | 23 ++ .../out_final/sprint2_cv_results.csv | 21 + .../out_final/sprint2_cv_summary.csv | 5 + .../out_final/sprint2_report.md | 64 +++ .../out_final/sprint2_results.csv | 7 + 9 files changed, 595 insertions(+) create mode 100644 AI Guardian/Emotional_baseline/emotional_baseline.py create mode 100644 AI Guardian/Emotional_baseline/out_final/confusion_matrix.png create mode 100644 AI Guardian/Emotional_baseline/out_final/feature_preview.txt create mode 100644 AI Guardian/Emotional_baseline/out_final/lexicon_preview.csv create mode 100644 AI Guardian/Emotional_baseline/out_final/settings.json create mode 100644 AI Guardian/Emotional_baseline/out_final/sprint2_cv_results.csv create mode 100644 AI Guardian/Emotional_baseline/out_final/sprint2_cv_summary.csv create mode 100644 AI Guardian/Emotional_baseline/out_final/sprint2_report.md create mode 100644 AI Guardian/Emotional_baseline/out_final/sprint2_results.csv diff --git a/AI Guardian/Emotional_baseline/emotional_baseline.py b/AI Guardian/Emotional_baseline/emotional_baseline.py new file mode 100644 index 00000000..2b333d46 --- /dev/null +++ b/AI Guardian/Emotional_baseline/emotional_baseline.py @@ -0,0 +1,384 @@ +#!/usr/bin/env python3 +# emotional_baseline.py +""" +Sprint 2 — NLP Feature Extraction & Baseline Training + +- Strips label tokens twice (custom preprocessor + stopwords) +- Deduplicates texts after scrubbing +- Group-aware split (GroupShuffleSplit) + GroupKFold CV +- Features: BoW(1), TF-IDF(1–2), TF-IDF(1–3), optional lexicon counts +- Models: Logistic Regression & Linear SVM (class_weight="balanced") +- Tripwire: raises if any label token appears in vocabulary +- Saves: sprint2_results.csv, sprint2_cv_results.csv, sprint2_cv_summary.csv, + confusion_matrix.png, feature_preview.txt, (optional) lexicon_preview.csv, + sprint2_report.md, settings.json +""" + +from __future__ import annotations +import argparse, json, re, sys, warnings +from collections import Counter +from pathlib import Path +from typing import Callable, Iterable, List, Tuple + +import numpy as np +import pandas as pd +from scipy.sparse import csr_matrix, hstack + +from sklearn.model_selection import GroupShuffleSplit, GroupKFold +from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer, ENGLISH_STOP_WORDS +from sklearn.linear_model import LogisticRegression +from sklearn.svm import LinearSVC +from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix +import sklearn + +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt + +warnings.filterwarnings("ignore", category=UserWarning) +SEED = 42 + +# ---------- tiny lexicons ---------- +POS_WORDS = { + "good","great","excellent","happy","calm","relief","improved","better", + "comfortable","normal","stable","smiling","cooperative","ok","fine","relaxed" +} +NEG_WORDS = { + "bad","sad","angry","pain","fever","cough","dizzy","tired","weak","fatigue","anxious", + "vomit","nausea","diarrhea","bleeding","distress","agitated","worse","uncomfortable", + "sick","breathless","headache","crying","depressed" +} +EMOTION_LEX = { + "anger": {"angry","annoyed","furious","irate","mad","rage","frustrated"}, + "joy": {"joy","happy","pleased","delighted","glad","cheerful","smiling"}, + "sadness": {"sad","down","unhappy","depressed","blue","tearful","crying"}, + "fear": {"afraid","scared","fear","anxious","terrified","nervous","worried"}, + "surprise":{"surprised","startled","amazed","shocked","astonished","wow"}, + "disgust": {"disgust","disgusted","gross","nauseous","repulsed","sickened"}, +} +EMOTION_KEYS = list(EMOTION_LEX.keys()) + +# ---------- leakage prevention ---------- +DEFAULT_LABEL_WORDS = { + "normal","normally","normality", + "sick","sickness", + "uncomfortable","uncomfort","comfort","comfortable", +} + +def make_label_scrubber(extra_words: str | None) -> tuple[Callable[[str], str], list[str]]: + words = set(DEFAULT_LABEL_WORDS) + if extra_words: + for w in re.split(r"[, ]+", extra_words.strip()): + if w: + words.add(w.lower()) + pat = re.compile(r"\b(" + "|".join(map(re.escape, sorted(words))) + r")\b") + def strip_label_words(text: str) -> str: + return pat.sub(" ", str(text).lower()) + return strip_label_words, sorted(words) + +def assert_no_label_tokens(vec, label_tokens): + vocab = set(vec.get_feature_names_out()) + leaks = sorted(vocab.intersection(set(label_tokens))) + if leaks: + raise RuntimeError(f"Label leakage detected in vocabulary: {leaks}") + +# ---------- feature helpers ---------- +def build_lexicon_features(texts: Iterable[str]) -> csr_matrix: + rows = [] + for t in texts: + toks = re.findall(r"[a-zA-Z']+", str(t).lower()) + tok_count = max(len(toks), 1) + bag = Counter(toks) + pos = sum(bag.get(w, 0) for w in POS_WORDS) + neg = sum(bag.get(w, 0) for w in NEG_WORDS) + emo_counts = [sum(bag.get(w, 0) for w in EMOTION_LEX[k]) for k in EMOTION_KEYS] + emo_rates = [c / tok_count for c in emo_counts] + rows.append([pos, neg, pos/tok_count, neg/tok_count] + emo_counts + emo_rates) + return csr_matrix(np.asarray(rows, dtype=float)) + +def plot_confusion(y_true, y_pred, labels, out_path: Path): + cm = confusion_matrix(y_true, y_pred, labels=labels) + plt.figure(figsize=(6, 5)) + plt.imshow(cm, interpolation="nearest") + plt.title("Confusion Matrix (best model)") + plt.xticks(range(len(labels)), labels, rotation=45, ha="right") + plt.yticks(range(len(labels)), labels) + for i in range(cm.shape[0]): + for j in range(cm.shape[1]): + plt.text(j, i, cm[i, j], ha="center", va="center") + plt.xlabel("Predicted"); plt.ylabel("True") + plt.tight_layout(); plt.savefig(out_path); plt.close() + +def preview_vectorizer(vec, X_sample, out_txt: Path, header: str): + fn = np.array(vec.get_feature_names_out()) + if X_sample.shape[0] == 0: return + nz = X_sample[0].nonzero()[1] + vals = X_sample[0, nz].toarray().ravel() + pairs = sorted(zip(fn[nz], vals), key=lambda x: -x[1])[:25] + with open(out_txt, "a", encoding="utf-8") as f: + f.write(f"\n--- {header} ---\n") + for w, v in pairs: + f.write(f"{w}: {v:.4f}\n") + f.write(f"Vocab size: {len(fn)}\n") + +def df_to_md(df: pd.DataFrame) -> str: + try: + return df.to_markdown(index=False) # needs 'tabulate'; falls back if missing + except Exception: + return df.to_string(index=False) + +# ---------- training ---------- +def fit_and_score(Xtr, Xte, ytr, yte, feature_name: str): + rows = [] + for name, model in [ + ("LogReg", LogisticRegression(max_iter=2000, class_weight="balanced", solver="lbfgs")), + ("LinearSVM", LinearSVC(class_weight="balanced")), + ]: + model.fit(Xtr, ytr) + pred = model.predict(Xte) + rows.append({ + "Features": feature_name, "Model": name, + "Accuracy": accuracy_score(yte, pred), + "F1_macro": f1_score(yte, pred, average="macro"), + "_pred": pred, "_model": model, + }) + return rows + +def run_group_cv(texts: pd.Series, labels: pd.Series, groups: pd.Series, + vec_builders: List[tuple[str, Callable[[], object]]], + with_lexicon: bool, n_splits: int, preproc: Callable[[str], str], + stripped_tokens: list[str]) -> pd.DataFrame: + uniq = int(pd.Series(groups).nunique()) + n_splits = max(2, min(n_splits, uniq)) + gkf = GroupKFold(n_splits=n_splits) + rows = [] + for feat_name, make_vec in vec_builders: + for fold, (tr, te) in enumerate(gkf.split(texts, labels, groups=groups), 1): + Xtr_text, Xte_text = texts.iloc[tr], texts.iloc[te] + ytr, yte = labels.iloc[tr], labels.iloc[te] + vec = make_vec() + Xtr = vec.fit_transform(Xtr_text) + assert_no_label_tokens(vec, stripped_tokens) + Xte = vec.transform(Xte_text) + if with_lexicon: + Xtr = hstack([Xtr, build_lexicon_features(Xtr_text.apply(preproc))]).tocsr() + Xte = hstack([Xte, build_lexicon_features(Xte_text.apply(preproc))]).tocsr() + for name, model in [ + ("LogReg", LogisticRegression(max_iter=2000, class_weight="balanced", solver="lbfgs")), + ("LinearSVM", LinearSVC(class_weight="balanced")), + ]: + model.fit(Xtr, ytr) + pred = model.predict(Xte) + rows.append({ + "Fold": fold, "Features": feat_name, "Model": name, + "Accuracy": accuracy_score(yte, pred), + "F1_macro": f1_score(yte, pred, average="macro"), + }) + return pd.DataFrame(rows) + +# ---------- main ---------- +def main(): + p = argparse.ArgumentParser() + p.add_argument("--csv", required=True, help="Path to dataset CSV") + p.add_argument("--text_col", default="text", help="Text column") + p.add_argument("--label_col", default="label", help="Label column") + p.add_argument("--group_col", default="", help="Optional grouping column (e.g., patient_id)") + p.add_argument("--test_size", type=float, default=0.2) + p.add_argument("--min_df", type=int, default=2) + p.add_argument("--n_splits", type=int, default=5) + p.add_argument("--with_lexicon", action="store_true") + p.add_argument("--extra_label_words", default="", help="Comma/space separated extra tokens to strip") + p.add_argument("--out_dir", default="sprint2_out") + args = p.parse_args() + + out = Path(args.out_dir); out.mkdir(parents=True, exist_ok=True) + + # Load & basic checks + df = pd.read_csv(args.csv) + if args.text_col not in df.columns or args.label_col not in df.columns: + print(f"Columns present: {list(df.columns)}", file=sys.stderr) + sys.exit(f"Missing text_col='{args.text_col}' or label_col='{args.label_col}' in CSV") + + keep = [args.text_col, args.label_col] + if args.group_col and args.group_col in df.columns: + keep.append(args.group_col) + data = df[keep].copy() + + data[args.text_col] = data[args.text_col].fillna("").astype(str) + data = data[data[args.label_col].notna()] + data[args.label_col] = data[args.label_col].astype(str) + if data[args.label_col].nunique() < 2: + sys.exit("Need at least 2 label classes.") + + # Scrubber + stopwords (LIST, not set) + strip_label_words, stripped_set = make_label_scrubber(args.extra_label_words) + custom_stop = list(ENGLISH_STOP_WORDS.union(stripped_set)) + + # Normalize for dedup & grouping + data["_text_norm"] = data[args.text_col].apply(strip_label_words) + before = len(data) + data = data.drop_duplicates(subset=["_text_norm"]).reset_index(drop=True) + print(f"Deduped texts: {before} -> {len(data)}") + + # Groups + if args.group_col and args.group_col in data.columns: + groups = data[args.group_col].astype(str) + group_note = f"groups by '{args.group_col}'" + else: + groups = data["_text_norm"] + group_note = "groups by normalized text" + groups = groups.astype("category").cat.codes + + # Group-aware single split + gss = GroupShuffleSplit(n_splits=1, test_size=args.test_size, random_state=SEED) + tr_idx, te_idx = next(gss.split(data[args.text_col], data[args.label_col], groups=groups)) + X_tr, X_te = data[args.text_col].iloc[tr_idx], data[args.text_col].iloc[te_idx] + y_tr, y_te = data[args.label_col].iloc[tr_idx], data[args.label_col].iloc[te_idx] + + # Vectorizers + bow = CountVectorizer(preprocessor=strip_label_words, lowercase=False, + stop_words=custom_stop, ngram_range=(1,1), min_df=args.min_df) + tfidf12 = TfidfVectorizer(preprocessor=strip_label_words, lowercase=False, + stop_words=custom_stop, ngram_range=(1,2), min_df=args.min_df) + tfidf13 = TfidfVectorizer(preprocessor=strip_label_words, lowercase=False, + stop_words=custom_stop, ngram_range=(1,3), min_df=args.min_df) + + # Feature preview file + feat_prev = out / "feature_preview.txt" + feat_prev.write_text("Sample nonzero features for first test doc\n", encoding="utf-8") + + results = [] + + # A) BoW + Xtr = bow.fit_transform(X_tr) + assert_no_label_tokens(bow, stripped_set) + Xte = bow.transform(X_te) + preview_vectorizer(bow, Xte[:1], feat_prev, "BoW (1-gram)") + if args.with_lexicon: + Xtr = hstack([Xtr, build_lexicon_features(X_tr.apply(strip_label_words))]).tocsr() + Xte = hstack([Xte, build_lexicon_features(X_te.apply(strip_label_words))]).tocsr() + results += fit_and_score(Xtr, Xte, y_tr, y_te, f"BoW(1){' + lex' if args.with_lexicon else ''}") + + # B) TF-IDF (1–2) + Xtr = tfidf12.fit_transform(X_tr) + assert_no_label_tokens(tfidf12, stripped_set) + Xte = tfidf12.transform(X_te) + preview_vectorizer(tfidf12, Xte[:1], feat_prev, "TF-IDF (1–2)") + if args.with_lexicon: + Xtr = hstack([Xtr, build_lexicon_features(X_tr.apply(strip_label_words))]).tocsr() + Xte = hstack([Xte, build_lexicon_features(X_te.apply(strip_label_words))]).tocsr() + results += fit_and_score(Xtr, Xte, y_tr, y_te, f"TFIDF(1–2){' + lex' if args.with_lexicon else ''}") + + # C) TF-IDF (1–3) + Xtr = tfidf13.fit_transform(X_tr) + assert_no_label_tokens(tfidf13, stripped_set) + Xte = tfidf13.transform(X_te) + preview_vectorizer(tfidf13, Xte[:1], feat_prev, "TF-IDF (1–3)") + if args.with_lexicon: + Xtr = hstack([Xtr, build_lexicon_features(X_tr.apply(strip_label_words))]).tocsr() + Xte = hstack([Xte, build_lexicon_features(X_te.apply(strip_label_words))]).tocsr() + results += fit_and_score(Xtr, Xte, y_tr, y_te, f"TFIDF(1–3){' + lex' if args.with_lexicon else ''}") + + # Save single-split results + res_df = pd.DataFrame([{ + "Features": r["Features"], "Model": r["Model"], + "Accuracy": r["Accuracy"], "F1_macro": r["F1_macro"] + } for r in results]).sort_values("F1_macro", ascending=False) + res_df.to_csv(out / "sprint2_results.csv", index=False) + + # Best confusion matrix + report snippet + best = max(results, key=lambda r: r["F1_macro"]) + labels_sorted = sorted(data[args.label_col].unique()) + print("\n=== Baseline Results (single split; sorted by Macro-F1) ===") + print(res_df.to_string(index=False)) + print(f"\nBest: {best['Features']} + {best['Model']} | " + f"Accuracy={best['Accuracy']:.3f} | F1_macro={best['F1_macro']:.3f}\n") + print("=== Classification Report (Best) ===") + print(classification_report(y_te, best["_pred"], labels=labels_sorted, zero_division=0)) + plot_confusion(y_te, best["_pred"], labels_sorted, out / "confusion_matrix.png") + + # Optional lexicon preview + if args.with_lexicon and X_te.shape[0] > 0: + lex = build_lexicon_features(X_te[:5].apply(strip_label_words)) + cols = ["pos_count","neg_count","pos_rate","neg_rate"] + \ + [f"{k}_count" for k in EMOTION_KEYS] + [f"{k}_rate" for k in EMOTION_KEYS] + pd.DataFrame(np.asarray(lex.todense()), columns=cols).to_csv(out / "lexicon_preview.csv", index=False) + + # ---- 5-fold GroupKFold CV ---- + vec_builders = [ + ("TFIDF(1–2)" + (" + lex" if args.with_lexicon else ""), + lambda: TfidfVectorizer(preprocessor=strip_label_words, lowercase=False, + stop_words=custom_stop, ngram_range=(1,2), min_df=args.min_df)), + ("TFIDF(1–3)" + (" + lex" if args.with_lexicon else ""), + lambda: TfidfVectorizer(preprocessor=strip_label_words, lowercase=False, + stop_words=custom_stop, ngram_range=(1,3), min_df=args.min_df)), + ] + cv_df = run_group_cv( + texts=data[args.text_col].reset_index(drop=True), + labels=data[args.label_col].reset_index(drop=True), + groups=pd.Series(groups).reset_index(drop=True), + vec_builders=vec_builders, + with_lexicon=args.with_lexicon, + n_splits=args.n_splits, + preproc=strip_label_words, + stripped_tokens=stripped_set + ) + cv_df.to_csv(out / "sprint2_cv_results.csv", index=False) + cv_agg = (cv_df + .groupby(["Features","Model"], as_index=False) + .agg(Accuracy_mean=("Accuracy","mean"), + Accuracy_std =("Accuracy","std"), + F1_mean =("F1_macro","mean"), + F1_std =("F1_macro","std")) + .sort_values(["F1_mean"], ascending=False)) + cv_agg.to_csv(out / "sprint2_cv_summary.csv", index=False) + + # Markdown report + md = [] + md.append("# Sprint 2: Basic NLP Features & Baselines (Leakage-Safe)\n") + md.append(f"- **CSV:** {Path(args.csv).name}\n") + md.append(f"- **Text column:** `{args.text_col}` | **Label column:** `{args.label_col}`\n") + md.append(f"- **Groups:** {group_note}\n") + md.append(f"- **Dedup:** exact duplicates removed after scrubbing\n") + md.append(f"- **Leakage guard:** stripped tokens {sorted(set(stripped_set))}\n") + md.append("\n## Feature extraction\n") + md.append(f"- BoW (1-gram), min_df={args.min_df}\n") + md.append("- TF-IDF with bigrams & trigrams (captures phrases like *not happy*, *very tired*).\n") + if args.with_lexicon: + md.append("- Lexicon counts: pos/neg + six emotions (anger, joy, sadness, fear, surprise, disgust).\n") + md.append("\n## Baseline models\n- Logistic Regression (balanced)\n- Linear SVM (balanced)\n") + md.append("\n## Single-split results (sorted by Macro-F1)\n\n") + md.append(df_to_md(res_df)); md.append("\n\n") + md.append(f"**Best (single split):** {best['Features']} + {best['Model']} \n") + md.append(f"Accuracy: **{best['Accuracy']:.3f}** | Macro-F1: **{best['F1_macro']:.3f}**\n") + md.append("\n**Confusion matrix** saved to `confusion_matrix.png`.\n") + md.append("\n## 5-fold CV (GroupKFold; mean ± std)\n\n") + show = cv_agg.assign( + Accuracy=lambda d: d["Accuracy_mean"].round(3).astype(str) + " ± " + d["Accuracy_std"].round(3).astype(str), + F1_macro=lambda d: d["F1_mean"].round(3).astype(str) + " ± " + d["F1_std"].round(3).astype(str) + )[["Features","Model","Accuracy","F1_macro"]] + md.append(df_to_md(show)); md.append("\n") + md.append("\n## Environment\n") + md.append(f"- python: `{sys.executable}`\n") + md.append(f"- numpy: {np.__version__} | pandas: {pd.__version__} | sklearn: {sklearn.__version__}\n") + (out / "sprint2_report.md").write_text("\n".join(md), encoding="utf-8") + + # Settings for reproducibility + settings = { + "csv": str(Path(args.csv)), + "text_col": args.text_col, + "label_col": args.label_col, + "group_col": args.group_col if args.group_col else None, + "test_size": args.test_size, + "min_df": args.min_df, + "n_splits": args.n_splits, + "with_lexicon": bool(args.with_lexicon), + "extra_label_words": args.extra_label_words, + "stripped_tokens_effective": stripped_set, + "seed": SEED, + } + (out / "settings.json").write_text(json.dumps(settings, indent=2), encoding="utf-8") + print(f"\nAll artifacts written to: {out.resolve()}\n") + +if __name__ == "__main__": + main() diff --git a/AI Guardian/Emotional_baseline/out_final/confusion_matrix.png b/AI Guardian/Emotional_baseline/out_final/confusion_matrix.png new file mode 100644 index 0000000000000000000000000000000000000000..b141def9673cf0e29e6a5f4d0fccbaca21aa38b7 GIT binary patch literal 20786 zcmeIacTiQ^*CmPpvv3WlDB&s~5(EJe$ttEp5F|=g2};f)nQKA;14l`c4CF(SoH3Gv z5+#e05l|#Z?y>xR{q@&Xuj+Nz>weW;-G5jXX`j86U=;-J+ zQ_h}LrK4MUfsSs){u(9tnoBL7yz%f^|} z(WyjIP99gk9zNRXe0_VerzN^0Xym^IpKtlCe|RPS9}`{8kN?89q>*o)evR*?F|XK2NB7$F|ML&@E)LX240jb3 z_-^Of$Ii}9zk2oH()@VK;HQ^NJ9o}Jw03LHwo25^Nx5UxB8TTAS9f>*d!EepTuGO*F0XpzNQY&aj)cWtxdOMd2IWX|e4; zr`v64maXabM_H)R?|c~XtM{2jN}ER)BAiB=&B`C`m2e*GVC@`B&W~6OnBFbv{yQyW z^d5Jl3hu&+#yGQvQEMlivDoANO7#wP(s8OW@@B8SSMcg)Cs7|i&N8Xm;j!?uOsB-N zsJ|v`=Al)|nTKrojziS`>d=?r;tq+nJ#VvZj27pncrT{SS9Pq01h7o3%lEUwE zM0sf5-y_z%F#D@-baYg&IoHkzmseH@7FaxE@p~vPYI3qcnWmj#y8QXb(p0L~qR~jL zKLe}8V1u%3)9;_(n6_{4wTzN-NjGndfByUTZ%NfN_V(%gRjMi~k4Eb%KEE;wKtbt zII_veOJ)X8sV~-V+kccXhN4ySPu$|XPKzYV#Ar@;qCt`ChRvG;Q&Us-?b|n5&hc*YyLPoHr9 z<>|TEfhds~P6d4WV@|W_@2$1jzXs|?XsN}j580$s+@i1t!~d*hQ#pI~?u{Ea9G4eo zEGBv@giDXkZx_(JP~be?{8ZE;yw0#~-9?>Ikt?<8C7z4w&yKBdadR7a{m&|92FfBr ze*+_$H)Szja49-*umc%;@)K)&`eppykq!Fvge4kxb=3V}s&3j@IuN5sVTH^M- zDz`T<9lrMc0!7PEa@Cqmk2sllRC@&(KgA9;Cx@+ExiZtHTSnBXO?_!;sWMnF>huE^ z9!guwtn3(}P^U-R<9yg$&w6@#RtE9~yUvUyZ`>)Mmy;66 zr}{oN$Kj_%iWT z<1k*~3>|8S+hgP}?lk;F8%Hp}xGl%V1`+?~pTE$4Vc~kc@sgVfYH_81?h zimeR^YTO(Z{<}phTgMB=i$$o4J#TL-1qfVaSe;$fFnB;h&k-LXS z4Q@BWBip*u+;_)8b6%~K@9sS@aKO8N~U*nzOy8+Sha;I}gjadkAmgbOE+2LoRmUgSPWvAk8yh1Fourdh$*-T^c=FUK@%i`f--qlpxc=s>Fapy_MsT1ms^rq5e!(?S z0|SFt#ZY0>@1I|pe(+_Ab(0mh~G_sj&(GqkQnVw5`pA~M; zv01KaTV_d&mi{_-zpTB;Ipw@|$E_j_a>ne1~_l)vd$_wb-bW6b1OXi?ZyZvAvc3)5CPiJ}uB8@rpRWe<1 zb9XOXzka>Y{oSJW**M^h+xagXlKabbX?EZ~(#SnOKatX14e2HdY6+?_B}oHFxkc84 zxdAEq1voSa3;S#?Ztk#-Jcm7$+K=Y`LRV_Tg|N7>Hr>Uw+116725zIGM%tQpS}bs; z8b%+Ys)Rmz^j8D2ljEYQnwk=}xq*Ia4?Vw;Kaa{Y?;%8XY~-qrc~Tf+is#29DV2vp z^IfNgdDUVS@5t?R`PHv6HQdyYr4g@kykv1Y%t}-vQJsRw!C?)KRgOwQ0Env1%#&J| zc1iBGk58P%II@_4VQ~`bTAW46^q~aDDZ8`z31zA|*7{6*nu@seCp=OnF20A2O_BBr zOSpl7A*i6BKuTI#^!ksB5#kP~uIH6sf?Nm#;825{nR3%*N7DwGV4Tfcd8 z&Bwa-c3rI$y}aAzsiczYywNn$22TxKQ6u>#R0< z#jns=;M|g=oiX{djGbm&K}}SPt96>UZp)-7Jv|)YIK^MLyg2Sf3i_JOyB!hsj#;NY!W zw}$74%)3fB^jB|=^0SDLa#5u=7JJNZsw;~>|I*mMqOng6MZ(gfWUs|H{u5sq?3|so z-rd`}cA8P8lhV=-Ieq{9Gk=6}^JWc8OUpac-^v~;P?I0_=^)oMe0g(o3M*%U zA}{t`>`E<*i>s?SqG3p85((6+p}nxHVx`#DJ$q*wke}yf##51^7DpFRQxIsJ z`uUC&6mjODsH1jY^Q~}fHmqA`dnW^R=!D1IWZu{jg1m=@%T>FJJ(_ZC3?n3+&+%!c zH14q}d>*S5#=C$2X;k)5&%$dJ-$!^3AD+BZ_aUo(?#!7p-9Ier($dmOlKBJ##@O?| zuTS59I?Y@1>X+C52$v25M{267s^00VsHnif3E&i0o{1E-(9D+!6SHYXjwOe+eOP{8 z<#V3H6(lL0l(NoFeMYGt-oKJEl67;s(rm~g1Hwtj(lu0v2+83-mOq=YZrFa{*@{(b z;(>RD+p>1>_9B?@rm>Zcc{5T6e^Iy?IY2IA(XMl49Gur z>!~N}Xkg{NwAJM0eJ@|WY+2OIoPLBXR{ZJvyPqDqbR*JjOk5*D^;DhAd=hd=TyL*2 zN|@5}Y@L^5x5rEZo|RW4@d?sm%;4bFyu7>w;ByJICta>12a)nd0<{cYC8wb6;N#>x zsVqJ9gp>@I>({TM0rI_fkJ2*!?V)UQPPZp*$3dL~`}UncPI@YA;>$^(t(cydv2pYc zL4zdn;6J{+*|2{7C*Yt&E`^}0lY^g-8nZ`VEoiqTEPKVA^4sAG;LwC!Lta)u+V~VA zWR&8@b04_cqBdN7?t4a^dgj&hv%h~En|C!vojQGb*oWQoBYqqxW@A8l4HU#rYRb|v zcvQ(Vk5aftS0?T`Vrg5eTtHIrZZYd5oIiqa8{^OWK782n2mX{kV5K32(vX5(r(3UR z#%AaD-f(?Vn7I5%mA{g#VUj%vQOe%n%dEY8g^o-om{WJz`+B!E88HAwmqt5tV# zr{ic_)5()3cL@p#ej6XRLC&^Z!^mcd$LRYW{Pd|gLcl~mh~ErpP6?T|8XIMeAgNDI zweHN1MIXqV-H>IWr53O9+|eq(R{K`zt_-t={c0JeO5}Pt4j1=vaMW-|xl_md6R^Bt zfN-HFZf{_=v9TejVYoTjVuyfU$fZ3kk*BWu3tlXbF$-@JKKT2)o0o^;`pVuXY! z`ksu@;@@BW97oNrlr82hN9num=rPRe3iU?46pE1#6z=Nkip5beeSde0DartI?Jm+H zo7G3(Tfb@3VQP5Y_6Rh)%t(*yc=p3wf%DJv8_(H2$JBNe-h(8L)b*epp4fUeDDI3ce32K~kq-EZjB6oDPdcM1L_So5Gx z=Gh!&8^;|zx;8~7NUtWS9M->c1Bxz`>Nzk;(;W-?(WGUVAm_s{hgc+?omTMUWF-B> z>p#B^1CklmPRUKbW#OkQzrp7D`x0VQ36R;=iKn#-WzzBI*HZ&bqbu%t)6;1qZ9P3~ zMx7X_8($FT%)NtJj`R5Rpx5ATvC_RK2puDIgPxA=qR7e}badV|xePnf`O!Y3g76(W z^dmMUax>j;uhL!QfXx0H2oBrxP2k$zi+igiJQuFtVRxga>v$Rx5>m3bUeK6r#YSz- zHjNiv?K&5ZuW%o!Ol%7*oTWWmm8R@V&C)bnHqz>M);EbNDqUN-pKQTnF#U9bYb6G;-0ygWp5Fi{?>_jYqx#8x$mTI>^NA>-&4==mtR+sDs!@ zHY_nf_E#x#ogysBZc+0=fTmg}R;lYMR&AN*k;ZXh6kkyg4io2OShY)MHzrV{AB7Kf zM_M!+p6{AxNN`Nhq{(gyXYc8^Xy|yK7 z%nq$ya7jtY5$le4&{QKh!JXwXe=SfzUk62_S)@+tcMJJ zb1%Jn_eyU!eLYPLJix&r=n`i!L(QQ+`SK#smTZbXi%o7BX{Z7M0;thjx#S1mG0HZg zN8_YOI*&Di$=c4wCO~;A?hs4R2wDglDl0V_fU+iBTrJOG;9O4ESZBe=t1`C7oJG@Z z7K!Iy-XU0okVe$WdIiA*^zyY!Xtva`BoR42pq~KW&A^ci-S})sQB+jFeEG$%Ywqp~ zO42*E-)yv5NYQtrPA=ZKG1MlRx!!SUzPP?CQRkEHsYKRhYGClBt)I7~{(j+_O5;6a z1_=7yr?DkWyy|D2B@K(+CBXVjk9E?1{II=}ON~Yq;XU(!rLDj@H6vN&`3dH?O;Ig- zc3g8>SL8lBG1Nvt|K361K-Z}zt%&w@>(+e);;=SfZGqATxN~huUw6Lf%ZKA}SyyMd zlIIe=H}o%$jLXcvl~Rv?7RSIMUo6+uV|u+(Hz;Wn2P;Ls`p~``1>5HT%JK7ddN}_p z+OaCV*Y;?*L&DPcx={dduY#`3r4|c^qbZT=#V)?4^T6lsmT}MZ{N1w_AVD9EmzJvP zT-BC^Km@}uIEsO&y98hH$QOk!9plGt z=W1IAg{=&fvy%u-kv_*ADEC|-af8@RkGt(3_P(`D1Z^x~-+LE`BAny6+w}KOs0V}= zY|oj&rpJS80I3%dO>;j8kE}uIO?lrtSxDY5gEk5?+4>r;t; zFs#Kjn7$!Co~9tT{}rR1Mu{uFZ%Izqw9}1Yy0~9V+oaso^rW-TITncRa1kXDFVxNs-{w`$qirqzxKCO$iM+CDuo zc9t(MfA!zG3PKe+$@CIlONF3`_4I3{Zz^;B&(c@^2TC>n3tp$?QApDZlw#qxijJ;TGl<~=js^jyU~D*@ zot>|^H6?0*Z4%h2Nv@KCtAI?3M$Nc#b!K|H8Jx483be#yI~XsV^49f(%mB*$#z9vE; zIEBe!S}N(afT=SG8Su)eED^j`iU1=Wow1s>#6?Oyl$~S%W*xNS1t3k)t9%=#;?s@< z_pQ==ik>Pj|CVJcavyop{<^5;91liznWTNEe6NzeUp=8-)<9QtyAi{%*kTeVSGnPF z-UQ!Dv%YIi+U2)6>~xtNuF+CBqk=iPn9gIsq`+wIRPsad*9E7(r?=%vedeg4@JUriQCF=R#nt*TE&}@y;hrkT(08*7TCaZw$Cx=G4w{O_9F-N^VP9- zwo2z)FCf{_6mqYyeDdtm+QO9PdzLDnK&=gVERlGVH)TQ~g~5wk`Qkmt9EIKl8Xm!E ztkQ0!=8m_hSsV&$lY$Lghw?|!KePlL)E%=GZ7mBn^h_X{76`S8U%%AQoMjAbp()|=PShrOxu-6fX% zXAZnc^+RoPOtB6U-+)DE{&s^W7i40d8u$I+`!a0*h*HRCSAd<)4=}BHY6d3hsf1G^ zO(AdKeQ7E9nRC0PT{FP}#DTII1gDTY_WTXOPW920xi2>v)eEj!eJCw82bIM<$Hm2! z011GgvE~&0uu*08vzjmb{R?{|QaTm7zq<9VtI`Z?FlAM6r3fj+fpw9Gc=*OMH6 zZRTPwZXTg`Ic6P29sJI-Zu*Nac76|29@utysy(!ZH@GC!k5A6@2fBmG`V};Q(jn`D zygVML<%wFUk@CR;9*dGa*|`M;7C?JCprL~V4O75WZQgzO>>e>|#g@937BRIE3!qwT zY|Q38N8*p)T;tX1j4d8+&q*afu<-4>cQ#AMJNi2K#ps{1_%=}bJ}uX^-oyWzo;x+~ zv|sjT+27Qp@%Q8FsNN>6-ef!8-rh(9KCTQCOQls+XhMYsvjOh?;AS?d3y8h?LYH6l zQ$&V?jE4NjJN3Qw*PRhUAUP3Y4Z6Qhy74_wByR7DL2pb<*Rpl9aHy*mWKsUvANOnW z$7+4e6--V}X63gQxwZJ5j|t6QU&H$$AbQ(RHzw!VyNM?qZJo?=*CS#i(=A_5{P^)P zqfUla?5Fr-YTc7p@xGx>$?CJ1EyzD? zd_r}YYgDTE>^+{HeM9}pbWu`?TR$DoTj`sdf4E~cqKQ@e2y!E2)@Rs3lM%JK@yut> z{`LTy=j-dMiXFgNjh^XpQ^wJ4fKMPOWgJ`w$W=zNnov5lirr@$p=6Jw%;01Mz3Wl= z)nAk4IHcN?r2XrhRCs|-u7+aI*UE0~Padl$V8fyzCMF>fLrKRukQ8ES&c;{xK78N z?T?e1be=(R=R0tqys584B)xZ?CWQCxOiVEhtSPluuK)b7Gop&Wib^^~9?`({F?4ia zYOAO#ETGry7PpT^W$GP-sY zv<~80rMaW{2ro+3n)F_hD0_xMjnG*iPt6lfQNov+&7FmoC9sg%elut=+K5HH*Kh9*w{C=PQsDU^FUYrSZ@t z{{?4GnoN4{=JByND`@6@qjwF-l5_e}^JOZvH&C#|KFi9v(}Ys?fcMI~Qr;i+d>h=- z!>C@e$=e!YbDyaiGp$bIw;t~*+AZN2zyDl#{O8XX&~kkbYB{wo-|gSgOnLVPRUR6( zurKbV#NcrKBg*%8Ngq-jnQu!qDlYZ$8DHq{AECJ`&uAX|AU{$0_>9c$4Lq4jl3c8k ziBB@F=&Kw|U&*k=)NU=K!D3xLOh4)TBS9@hTi2rGDlvTfNFSkgDezniTjHUJ2Iid_ zx5M7Z6wZim-O5=-4NFa>DZJ75-I63roIGe)huR1`1((%XDpm!|`wLL+6?JvWT=Gvo zq9ip;bwi9JdNJ}V{hBq2puB78KnP3{2inCqH;BIpgnA}Y=Kw*T{OEGwFCyB;#MG1q z3RR}pvZqy7p*B%6jve!6a~<3pA>*M_-z0Yv6^_aY4M==dbB!ep{w8S7pTHnD7P&b` z$a)zBnN+0n4;V#>S_Bba5PU08Q?&@!1n-Nml+W^f_pZHsaYKl7 z0KUY9N1RoF2O9G7mEGSH+zivVni%(TjXRDSOlVTK>ZvnXubKbR)qnP!W=D&{c9n$m z4tIAweTFgB)k$H890ygTr#~CDLTC{ zS_(g9xVxkz_FM!n#rOVwXsO?otVT##^%E?PMEoVz|D zixUEXRl@NsG}o#h3!8QwI%ZVr{|QQ3Ah$AGmUZXc9S$RKa}AJXQV54hD3tg2w&6Ap zZ)TTSxKrl^ob~nFw}!>e_7t&wE+a3W*wRn;6qQED4mWyTkuN=}@)uRY;}XG0#4 zB?67xVX*jDN|qMq3HG5z1G`h8b=HHxBhZIZPqa1sghB=X8SW4}(O=`%&`wmYDJ+_# z&DynVtJ0GkN2t+k(ym9B^&!^55qJ(R$rd<+nEv|fi{1`q<^(uo6sW{u0Zjyaq>JnK zS7pLd!7xs?jHH&NAZ#RB=4&O#2O)cC7#sIGMJ;dPIDK{SkSTcD-HleqFOtyh_h9x9r^fJd}%%F7cKSS&0-YqNmRX zQ~=cBd?)%aPDDLKW1HOjkj*Ko@Ln`z>5u>DNVcSbF%r|O%gVNDk45JQmF&a@*e`&( zIVqq3DJ`jxnQFa!ZrzH9!;d*Tw7-nqs}U`5vbQ(gKsV_Wk#Y2k-4`>G)-bTdBjHId zgDn%*@Y4Kdn#ZFezCG^oP}?_0Rq^e^|Kd+)A~S6R7v{$+XB=(v=I0CYIXn7xy_vfi z78d3&G}N}_eto2_2WL>6g@Hn~*}q?6$z^N4(0;l0toF0I(*xO=R$XJ+3nD++-=jfp zjHYZ8HLE|FVOpaK$E98m{Q>ne zQ?#v)+Vtv;O)+~PfA-P#;}AMuamCZQ>)ZoBwXZ*o>}RGPyQw@nR~w%4-sntVj+-*` zHc`2U1~?JJ_uhpGlE-Ul zL8staGMoF4<5%Xv)sZ!B7wn3)a&d9t5fv5HZwGxaIX_-v(OVt>A7e15Rcm@*{!BNF zgYaH*Qm)iRHX&#=;E5fXE?QoipZZlNO9R&z>S@_Z`G0`5Ka<627uih@-aBKoUDEEx zuWKD&Y1~5e>IsSA_rr!KB0H(kM(4+43Ji;{LNCYCEF-@&{jdpX5p7 znQhaZ0$Wz&beTP^omVe6jkI-0$mDlOOALb_WJ{tBBQ!_AH`K$g=g>J1n^HSGry39- zK$QHk@&a(Tj7q{-M36jecCM43&Jem0SsC=JKKiuJqcZd}xBerQn8DC}`Gbbzq??%D zl2(=X$R&nmb`qiReyPCEq7Llclx?NU#1}NXyh~0_j(8=8p&byvG&1=M%H#dwXEF@j zf1Vt+bBMJV_AUz)`MIxtg6i3p z+V@$+Rgn|(#ujm&5=gRv1M9q{WfDZ?1A>C7Bhye@5{P{ObbmbBow=5hrI?v+FGDn7 z`J-iwblx=W{<>`sz4`b32b?b)sRw}m+e!U?$@r~;AdbDqebzG8gPkS@<6-+TX8->y zBmQ>^sMYV9=;?AQcX3eU&!-#f>1}#+9}q@Qf2Flt+mgb(EYoz_rn&>^@h{Ijx;nDDYb9c*&P97qx-Hvsjr}jW zudYxXK$g?M1TWZk{BOsOCeB32N|r zUbti8&ARTwYt~28?9On^iduIZJ!n|02dCzWy?h(b%FBbMdQ6<|i!j&&T2mciF$Sv1 z&6VibWaJMdi9$;p>hYS%I>cs9jHeX)G2Pb;tNFh$r14iVr|V=}CctC{dT1T^0@At@ zUl14oiLc=fcMhrmHFCVXb+fpI>fr;i0Kbrty;7Yf}-dtY?d zn||S4{nE{n>2^CLOe8a<^2&WHSFIAY>5@8i@?<>xK66tohJCh2p8yb1w73hnxw#dn z57?x6(c4zq+ik8&e?q11qhP8+C+*@t240I!(#s3K2qh!4^xJ4^tWy^t$H^`xx8;ZK zt=F;#MjyS0vnWQPM9;z{D3N-y>T%+>)9K z7gao*xld)iiUCDpfYNx82HDuyKo${y=Xx!HeylFcXk-9@D8I!^a~A&s_|3_>qL!Zz z5ogji?sHE#iJ=46(g+ncX~bkle@o9aGu?6sVm1W8=vM`I)Gj0cS+o-OeSCaS$9(TS zcyQz%#Nb^8~`9gLGl))>n&6u%@flvv(V*Rvv3+ljKWta1|WC@wc(KL@EJsNU1;co>9sHCv9M&jWnc9@B+tO<49}w2ww0vaMs#Sy($M>}0L~_nF(CdyW z+eijl;BLfh(P`pXhB1MY^2cL_YZ2KWLjtPUz)b5-iMLFaAC8;8-rxUFT(X5%HTo`G zU|Dc?k&_`C@p3Ssw;gWCbedPo^zfo`c-4tf^5Vm&R@N7H(KZdK)feyeMX% z?Y+7FZ4-2rsfAy4f7&Pfa_`U(&(N^X6qeiR%j*>|e_P;I$V|wKmoIU_p0C3hIbgZr z=+h>*kEw*=h2(O8Ktp69e8dyZnEAgAkQWF(c>n(7+d#h}IP~Ta=`i(z2d23iET`pDkBdrgss1yXkL@bJ`nMM*eSB2kwew(j_xXA{J)eHm#& zvFrhB4WMi_jE_poON&Zq5nIu^(<)2ZVIa3eVzt6+^0e^fyoo=3qAg_NcTfpJ<2!O9os-ESmWTCLp7Pk|`4K`(b3xLBr(h zn)>N(Q0t0-xXdu?;JAxoWa%Y(~QNn1Z8W_-xx`)$g^21E?b zzB>iS!Z)#^Q^s6QUO~YKhWwh4A(}9=pcQ;y7Ui9!Tih{>ec^;PA!bw1+C%4&n7D2qk`LStJqu0_e_GE)19dO_dwGa7pGUIV1%4|-5eE^&w zRoB~pFJM=~3cuPLf<5FJJ_ zsXbCqwadtH@Z(;>3xVbK_4jWi;}D>n$dbZK{E6eC)C+92DoFVhft_AwH);{jE|$q@ zcESX?UkPbb6`a5n&ancuE!&Dlo*ve7Qss!n^5aL9omvkqKg6s(@DD$yRqZ*z$WS4p zJJ|PIK0aYszqpZ|%*?+?R0A4gfFKVJnetTEZ51a#7PYZQ#(%(0McnN641fVIW|$4 zXCipdVzezQf4q2JPhT}VX!@sd%Iv)t#7+fbinxdX#uAA&8d%NzbJ}w@x9>tzg2V-5Nq?70mgH|ZAu)iN=qIyKs^Rl1A(2F&@Go=`{T6SjfE1(};3<%2g|^y)`%+rU$^ zb902ag~a2PiQY`5+!Xus%Wcp;j@0^=yZz) zvW^fLetc_%aeBSd-DmBaQVk73=U&UkwCYJ@*pHaIq%q3;KK4*%E*^0M-dy7?)5UFw za?eHA5AWX(<_$y=;#Od2c-{t++Gx56Pl`lfLz)KScEB_(;GwMt%f8hqm;BG;AUk0} zK(8nwy8<{&Oia`hZavjd7KM$1+-mQX@LHN1>w`nv02)`88~}wN{s07!gA3dcGIN6* z$kGZBP)K)y>iqSs?>~FK(ZtfTGRJo!;pV%~l7VUTTbT$JG7gL2pZJk0@o~>ze?3F$ zL}AZQH~tcd7*0V~6p!#~%XgxYnNMrdF`o9{^&Iy|0^k;3M-$B1&8U^=lA6$7Y9w7a z58fOV?>vsd6j&T{l=)gYZ_)@wMX?88qO+t<({p2b3Y`J`u#f(#!FUZ~Q4N_c6V`!E zGeA1udbf5=9X)y1(h4Zb{NuBKfRW-5tXGkXcp<1m`A@<8_0@&nKl8!mkp>Q#a|Fz4 zAM)Ih7?1BL#-vO|dCnz}T}N4n#{>&;@RH#?bQ6Oh(sgoepH~JQ9Qm5~Png{66WLBb zz8tTKl-7ma7j_bAG;*3*N3IcMjzv7r;!t@Vebj*$sdH3b}dPlIOsIv*ai|!OT37 zG=}Rhp;ENqQi(!FrDR_HtJ^x;5ppy!;~tN+So`?2l#~?FR7jPdCrw42)GvQ{ zwj#E5IP{i3$Es~fdDQn;+9NCYN3_W>FDhg-3UtgyZTNXxFxqz5{kQG@>{U6awfdxN zU^0d14_6Tggx!38;`Vk-m%;N(cx){>O|&eN7zhQ`t3kcKyjNl0#l5;Tj7TAyyg)D> zz@QDp5jz2k`snp71uHGFeXBgslex|f#gTC#R%ustbiJ;<0cS{4h>yYm--lLN>>4`G zFZ+ti>ya$a0!)OFDRQhmudpz+BGbDD?%rsOMqx_DsU9@zu;oJ_7P7F&IJfWI8OF5r zQ(z17dIJ>kytK<-rhiP)eQbJno8%%mcN!x4R)3i$XPz`u*yvkcep5ozlJIG1-iX0 zNw#(9x#?aXDc!}jfB!H%*$qYyrDMsTAVvLwrer1x!bY&;)Y|5kFJBVFDv z{ifWwLtxx&+v|&IhM*TP8Ai4raub>2A{7K~-FoCnLN-&hyc%ae>@;iyx_W`c@s?>! z+`nQaopl%{Rsj7l&XWkpK8#(mHz=ztD?3B-xrw&x;#7-9iKhpdS|vx>kG5t>oG^UW z+HK`cXfzuEJ4mSpdVd06Nvc6n_#)jgZ=!+8!TFhSE#j9()*wNPs*)nWY|H%T zqqs&@HxaQE(sKK^ZMn3XS4v>2xFaX8b&Z)w?>MwGeUu};fDU5_0a#0Ze&s--5 zi6zYfb%q)ZJG}(}DcY(>GKMNYuD_Je+t&&y)uv9WNmBn!QEkZ4&cKR^$W5|`%AK8t;jkq9;gt6tKG=A zW=vpV$6mU>SHZ>&q7Ggz(w5ZYzPTu1UH`$sZ;DmgL*OpDhGs}lf3O)|+3-PQl{2G2 z157#>5kK$`-n+Aj`2`q0(+e`d=vdNo1V#R{l712}jp#wn#VTI}J|Kerz6m0Z+?V;M z7q^jc3Csi$D#d>kGp725*U#f|AvX;S*m%qCMSu$*z4o2Fq9G3KIl3!JxggF(phN=0 zfXejrxNK+8>(UuJw1n}fVx|uoilR-)Vm2vgo3yEu;1bM{GHNr{tuTI%&o?$AeIBkb z2eO1r&44SRoekJW$|~vVA#IbwiE(Cs(mdci5q(z4dBA#03h(Taa)0B?V?JTAh|w z*WwWns(kpTK;|z|X!P`W9!=TOvC^dnWR5rftI;L!lptcrwF47~Z8Km}J0$Db2AI1_ZAHL=#?;yn%s2S`0rLefoY3?HR8sS(>9E!-c;Y0&9z$_wnu)j>(BC!4Wcx zgVQkuM{6a-8lLQbtAh4-~1Sg_9k|1PXr!1Rp0|*L#>#aC)X*E_n04DHZ@}eZ1e+>ljXjhRg zZ6-s`PYc!?*x|rt|irq|NM1&;5* zGy~rSV3P*YFF5`fFUJBzXH%wGgK$%@%IXeXDLsAI_trB?Pd5QdO;>vd*R>mzNC5d6?n0RfV>SJOa z$CC7R@TwD`4L#^F$!p)w=G5X9BE0*3L|OzKI^rbX6D3ZU zb7KoXT`LuhCwP4>3Q^Dg^Z44IUJwBO0LIIS=!U=|poz3Tq&{IzR%Us@0i)jLXa|UW zgM<#yW&(ISu?}*uKyPw-V917~b0lU&kPJYjDh^ndeV;t;_#F5$DipH)Ji@}N1h)}9 zipFOMuM!Bt++F^)(L?~W1i)X?jSwF(RJ7=QlV#x<5F8s`I zhdLsLy&<$@OY%?P#M9JPV4!@wm&5i~_wtgMemi+V58i`y3FIXxrTy@V>*YIEzmLht$Png&FbA+9C{T&r7U@=Uo}}~q>px$dkdq_sn*~Wo zA$T3w0SXyl!{r04_jnb1B%}A`6%sm+rn=g(26VS3n$1WAg`*P&qxtn&Q997y8O9FN z*D?{7gyC4eiJ6b^eW4t6MsKejD>fd>{(=!pm14@EyG>W8Nm!=vnr^mb0A~@V3`s>G-3PiQJ_HS% z$;CYq9`5dBR%-}VugRZT#1Z`@c^L>H(m6?y17)g%H%k#Y2as4H-}Prb!9k!#+Dg1i z&{M`h{~5+Z26Gbdi_V;!yUZ(fY#soyyYcEH*@aCjeoMlo`5RQjB*f7X^oZ&(BNNva z(OQ9CG{KB>l40=OQrFYSTXD$i9Z2&L=`r0}qBjm>bqtyYGrVm9Z@Tfv!00oaR0fFhc6Blp%CvzoGkpKVy literal 0 HcmV?d00001 diff --git a/AI Guardian/Emotional_baseline/out_final/feature_preview.txt b/AI Guardian/Emotional_baseline/out_final/feature_preview.txt new file mode 100644 index 00000000..3ac69772 --- /dev/null +++ b/AI Guardian/Emotional_baseline/out_final/feature_preview.txt @@ -0,0 +1,85 @@ +Sample nonzero features for first test doc + +--- BoW (1-gram) --- +patient: 3.0000 +oxygen: 2.0000 +20mg: 1.0000 +2l: 1.0000 +85: 1.0000 +92: 1.0000 +administered: 1.0000 +alert: 1.0000 +bpm: 1.0000 +breathing: 1.0000 +cannula: 1.0000 +currently: 1.0000 +drink: 1.0000 +encouraged: 1.0000 +furosemide: 1.0000 +heart: 1.0000 +nasal: 1.0000 +rate: 1.0000 +remains: 1.0000 +responsive: 1.0000 +reveal: 1.0000 +saturation: 1.0000 +signs: 1.0000 +supplemental: 1.0000 +tolerated: 1.0000 +Vocab size: 242 + +--- TF-IDF (1–2) --- +currently breathing: 0.2249 +responsive encouraged: 0.2249 +2l nasal: 0.2119 +85 bpm: 0.2119 +administered patient: 0.2119 +rate 85: 0.2119 +breathing: 0.2019 +saturation 92: 0.2019 +85: 0.1936 +92: 0.1807 +alert responsive: 0.1807 +responsive: 0.1807 +furosemide 20mg: 0.1706 +oxygen 2l: 0.1623 +20mg administered: 0.1554 +cannula: 0.1554 +drink water: 0.1554 +encouraged drink: 0.1554 +nasal: 0.1554 +nasal cannula: 0.1554 +supplemental: 0.1554 +supplemental oxygen: 0.1554 +2l: 0.1523 +drink: 0.1466 +encouraged: 0.1371 +Vocab size: 745 + +--- TF-IDF (1–3) --- +alert responsive encouraged: 0.1876 +currently breathing: 0.1876 +responsive encouraged: 0.1876 +2l nasal: 0.1767 +2l nasal cannula: 0.1767 +85 bpm: 0.1767 +administered patient: 0.1767 +heart rate 85: 0.1767 +oxygen 2l nasal: 0.1767 +patient alert responsive: 0.1767 +rate 85: 0.1767 +rate 85 bpm: 0.1767 +breathing: 0.1683 +oxygen saturation 92: 0.1683 +saturation 92: 0.1683 +85: 0.1615 +92: 0.1506 +alert responsive: 0.1506 +furosemide 20mg administered: 0.1506 +responsive: 0.1506 +furosemide 20mg: 0.1422 +oxygen 2l: 0.1354 +supplemental oxygen 2l: 0.1354 +20mg administered: 0.1296 +cannula: 0.1296 +Vocab size: 1331 diff --git a/AI Guardian/Emotional_baseline/out_final/lexicon_preview.csv b/AI Guardian/Emotional_baseline/out_final/lexicon_preview.csv new file mode 100644 index 00000000..1c169df9 --- /dev/null +++ b/AI Guardian/Emotional_baseline/out_final/lexicon_preview.csv @@ -0,0 +1,6 @@ +pos_count,neg_count,pos_rate,neg_rate,anger_count,joy_count,sadness_count,fear_count,surprise_count,disgust_count,anger_rate,joy_rate,sadness_rate,fear_rate,surprise_rate,disgust_rate +0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +1.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +1.0,0.0,0.02564102564102564,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +1.0,1.0,0.025,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +1.0,1.0,0.025,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 diff --git a/AI Guardian/Emotional_baseline/out_final/settings.json b/AI Guardian/Emotional_baseline/out_final/settings.json new file mode 100644 index 00000000..27d4969f --- /dev/null +++ b/AI Guardian/Emotional_baseline/out_final/settings.json @@ -0,0 +1,23 @@ +{ + "csv": "New AI spreadsheet - Sheet1.csv", + "text_col": "nursingNote", + "label_col": "state", + "group_col": null, + "test_size": 0.2, + "min_df": 2, + "n_splits": 5, + "with_lexicon": true, + "extra_label_words": "", + "stripped_tokens_effective": [ + "comfort", + "comfortable", + "normal", + "normality", + "normally", + "sick", + "sickness", + "uncomfort", + "uncomfortable" + ], + "seed": 42 +} \ No newline at end of file diff --git a/AI Guardian/Emotional_baseline/out_final/sprint2_cv_results.csv b/AI Guardian/Emotional_baseline/out_final/sprint2_cv_results.csv new file mode 100644 index 00000000..ce676994 --- /dev/null +++ b/AI Guardian/Emotional_baseline/out_final/sprint2_cv_results.csv @@ -0,0 +1,21 @@ +Fold,Features,Model,Accuracy,F1_macro +1,TFIDF(1–2) + lex,LogReg,0.975,0.9463203463203463 +1,TFIDF(1–2) + lex,LinearSVM,0.975,0.9463203463203463 +2,TFIDF(1–2) + lex,LogReg,0.975,0.9274853801169591 +2,TFIDF(1–2) + lex,LinearSVM,0.975,0.9274853801169591 +3,TFIDF(1–2) + lex,LogReg,0.975,0.9270440251572327 +3,TFIDF(1–2) + lex,LinearSVM,0.975,0.9270440251572327 +4,TFIDF(1–2) + lex,LogReg,0.95,0.9538461538461539 +4,TFIDF(1–2) + lex,LinearSVM,0.975,0.9775910364145659 +5,TFIDF(1–2) + lex,LogReg,1.0,1.0 +5,TFIDF(1–2) + lex,LinearSVM,1.0,1.0 +1,TFIDF(1–3) + lex,LogReg,0.975,0.9463203463203463 +1,TFIDF(1–3) + lex,LinearSVM,0.975,0.9463203463203463 +2,TFIDF(1–3) + lex,LogReg,0.975,0.9274853801169591 +2,TFIDF(1–3) + lex,LinearSVM,0.975,0.9274853801169591 +3,TFIDF(1–3) + lex,LogReg,0.975,0.9270440251572327 +3,TFIDF(1–3) + lex,LinearSVM,0.975,0.9270440251572327 +4,TFIDF(1–3) + lex,LogReg,0.95,0.9538461538461539 +4,TFIDF(1–3) + lex,LinearSVM,0.975,0.9775910364145659 +5,TFIDF(1–3) + lex,LogReg,1.0,1.0 +5,TFIDF(1–3) + lex,LinearSVM,1.0,1.0 diff --git a/AI Guardian/Emotional_baseline/out_final/sprint2_cv_summary.csv b/AI Guardian/Emotional_baseline/out_final/sprint2_cv_summary.csv new file mode 100644 index 00000000..1672ac68 --- /dev/null +++ b/AI Guardian/Emotional_baseline/out_final/sprint2_cv_summary.csv @@ -0,0 +1,5 @@ +Features,Model,Accuracy_mean,Accuracy_std,F1_mean,F1_std +TFIDF(1–2) + lex,LinearSVM,0.9800000000000001,0.011180339887498959,0.9556881576018208,0.03219826993246655 +TFIDF(1–3) + lex,LinearSVM,0.9800000000000001,0.011180339887498959,0.9556881576018208,0.03219826993246655 +TFIDF(1–2) + lex,LogReg,0.975,0.017677669529663705,0.9509391810881385,0.029823679075897343 +TFIDF(1–3) + lex,LogReg,0.975,0.017677669529663705,0.9509391810881385,0.029823679075897343 diff --git a/AI Guardian/Emotional_baseline/out_final/sprint2_report.md b/AI Guardian/Emotional_baseline/out_final/sprint2_report.md new file mode 100644 index 00000000..38550baf --- /dev/null +++ b/AI Guardian/Emotional_baseline/out_final/sprint2_report.md @@ -0,0 +1,64 @@ +# Sprint 2: Basic NLP Features & Baselines (Leakage-Safe) + +- **CSV:** New AI spreadsheet - Sheet1.csv + +- **Text column:** `nursingNote` | **Label column:** `state` + +- **Groups:** groups by normalized text + +- **Dedup:** exact duplicates removed after scrubbing + +- **Leakage guard:** stripped tokens ['comfort', 'comfortable', 'normal', 'normality', 'normally', 'sick', 'sickness', 'uncomfort', 'uncomfortable'] + + +## Feature extraction + +- BoW (1-gram), min_df=2 + +- TF-IDF with bigrams & trigrams (captures phrases like *not happy*, *very tired*). + +- Lexicon counts: pos/neg + six emotions (anger, joy, sadness, fear, surprise, disgust). + + +## Baseline models +- Logistic Regression (balanced) +- Linear SVM (balanced) + + +## Single-split results (sorted by Macro-F1) + + + Features Model Accuracy F1_macro + BoW(1) + lex LogReg 1.000 1.000000 + BoW(1) + lex LinearSVM 1.000 1.000000 +TFIDF(1–2) + lex LinearSVM 0.950 0.906020 +TFIDF(1–3) + lex LinearSVM 0.950 0.906020 +TFIDF(1–3) + lex LogReg 0.925 0.884162 +TFIDF(1–2) + lex LogReg 0.900 0.822402 + + + +**Best (single split):** BoW(1) + lex + LogReg + +Accuracy: **1.000** | Macro-F1: **1.000** + + +**Confusion matrix** saved to `confusion_matrix.png`. + + +## 5-fold CV (GroupKFold; mean ± std) + + + Features Model Accuracy F1_macro +TFIDF(1–2) + lex LinearSVM 0.98 ± 0.011 0.956 ± 0.032 +TFIDF(1–3) + lex LinearSVM 0.98 ± 0.011 0.956 ± 0.032 +TFIDF(1–2) + lex LogReg 0.975 ± 0.018 0.951 ± 0.03 +TFIDF(1–3) + lex LogReg 0.975 ± 0.018 0.951 ± 0.03 + + + +## Environment + +- python: `/opt/anaconda3/envs/sprint2nlp/bin/python` + +- numpy: 2.3.3 | pandas: 2.3.2 | sklearn: 1.7.2 diff --git a/AI Guardian/Emotional_baseline/out_final/sprint2_results.csv b/AI Guardian/Emotional_baseline/out_final/sprint2_results.csv new file mode 100644 index 00000000..8896f517 --- /dev/null +++ b/AI Guardian/Emotional_baseline/out_final/sprint2_results.csv @@ -0,0 +1,7 @@ +Features,Model,Accuracy,F1_macro +BoW(1) + lex,LogReg,1.0,1.0 +BoW(1) + lex,LinearSVM,1.0,1.0 +TFIDF(1–2) + lex,LinearSVM,0.95,0.906020066889632 +TFIDF(1–3) + lex,LinearSVM,0.95,0.906020066889632 +TFIDF(1–3) + lex,LogReg,0.925,0.8841623785020012 +TFIDF(1–2) + lex,LogReg,0.9,0.8224019167415394