Skip to content

Commit

Permalink
add analysis on mean sample position
Browse files Browse the repository at this point in the history
  • Loading branch information
sappelhoff committed Jan 31, 2022
1 parent 4f6987a commit 59fbe99
Showing 1 changed file with 295 additions and 0 deletions.
295 changes: 295 additions & 0 deletions analysis_behavior.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,301 @@
"print(f\"mode {scipy.stats.mode(df_nsamples['n_samples'])[0][0]:.1f}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Compute mean sample position for each number / probability"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# NOTE: We'll need to drop all trials with only one sample for this analysis\n",
"# ... because we cannot tell whether the \"relative\" sample position of a single sample is 0 or 1.\n",
"(\n",
" df[df[\"task\"].isin([\"AF\", \"AV\"])].drop_duplicates([\"subject\", \"task\", \"trial\"])[\n",
" \"n_samples\"\n",
" ]\n",
" < 2\n",
").sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# compute mean positions of outcomes\n",
"_tasks = [\"AF\", \"AV\"]\n",
"_dfs = []\n",
"for sub in np.unique(df[\"subject\"]):\n",
" for i, task in enumerate(_tasks):\n",
"\n",
" if task_not_present_for_subject(sub, task):\n",
" continue\n",
"\n",
" _data = df[\n",
" (df[\"task\"] == task) & (df[\"n_samples\"] > 1) & (df[\"subject\"] == sub)\n",
" ][[\"n_samples\", \"sample\", \"outcome\"]]\n",
" arr = _data.to_numpy(dtype=float)\n",
"\n",
" # normalize sample position by sample length\n",
" # z = (x - x.min) / (x.max - x.min)\n",
" # note: samples are zero-indexed, so +1 first\n",
" arr[:, 1] += 1\n",
" arr[:, 1] = (arr[:, 1] - 1) / (arr[:, 0] - 1)\n",
"\n",
" _df = pd.DataFrame(arr[:, 1:], columns=[\"sample_pos_norm\", \"value\"])\n",
" _df[\"control\"] = {\"AF\": \"partial\", \"AV\": \"full\"}[task]\n",
" _df[\"subject\"] = sub\n",
" _dfs.append(_df)\n",
"\n",
"df_values = pd.concat(_dfs)\n",
"df_values = df_values.astype({\"value\": int})\n",
"\n",
"# form means in each subj\n",
"df_values = df_values.groupby([\"subject\", \"control\", \"value\"]).mean().reset_index()\n",
"df_values"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# For each task, trial, option, outcome ...\n",
"# --> compute the \"probability\" (frequency) of that outcome occurring\n",
"data = df[df[\"task\"].isin([\"AF\", \"AV\"])][\n",
" [\"subject\", \"task\", \"trial\", \"n_samples\", \"sample\", \"action\", \"outcome\"]\n",
"]\n",
"data = data[data[\"n_samples\"] > 1]\n",
"\n",
"# go through each subj / task / trial separately\n",
"grps = []\n",
"for meta, grp in data.groupby([\"subject\", \"task\", \"trial\"]):\n",
"\n",
" # go through each option (left/right) separately\n",
" grp = grp.copy()\n",
" grp[\"outcome_freq\"] = 0.0\n",
" for _meta, _grp in grp.groupby(\"action\"):\n",
" # calculate \"experienced probability\" (frequency)\n",
" # specific for the option (left/right)\n",
" arr = _grp[\"outcome\"].value_counts().reset_index().to_numpy(dtype=float)\n",
" arr[:, -1] = arr[:, -1] / len(_grp)\n",
" assert arr[:, 1].sum() == 1\n",
"\n",
" # add the calculated frequencies to the df\n",
" def _insert_val(row, arr=arr):\n",
" out = row[\"outcome\"]\n",
" freq = row[\"outcome_freq\"]\n",
" if out in arr[:, 0]:\n",
" freq = arr[:, 1][np.nonzero(arr[:, 0] == out)[0][0]]\n",
" return freq\n",
"\n",
" grp[\"outcome_freq\"] = grp.apply(lambda row: _insert_val(row), axis=1)\n",
"\n",
" grps.append(grp)\n",
"\n",
"_data = pd.concat(grps)\n",
"\n",
"# sanity check that we only added a frequency column without changing anything else\n",
"pd.testing.assert_frame_equal(\n",
" data,\n",
" _data[[\"subject\", \"task\", \"trial\", \"n_samples\", \"sample\", \"action\", \"outcome\"]],\n",
")\n",
"data = _data.copy()\n",
"del _data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# bin the frequencies into nbins evenly spaced bins\n",
"# e.g., nbins=5 --> array([0. , 0.25, 0.5 , 0.75, 1. ])\n",
"# if right=False:\n",
"# bin 1 = 0-0.24999999\n",
"# bin 2 = 0.25-0.4999999\n",
"# bin 3 = 0.5-0.7499999999\n",
"# bin 4 = 0.75-0.9999999\n",
"# bin 5 = 1.0-infinity\n",
"#\n",
"# if right=True:\n",
"# bin 1 = 0-0.25\n",
"# bin 2 = 0.251-0.5\n",
"# bin 3 = 0.51-0.75\n",
"# bin 4 = 0.751-1.0\n",
"# bin 5 = empty\n",
"nbins = 6\n",
"bins = np.linspace(0, 1, nbins)\n",
"\n",
"data[\"outcome_freq_bin\"] = np.digitize(data[\"outcome_freq\"], bins=bins, right=False)\n",
"\n",
"datacopy = data.copy()\n",
"datacopy[\"outcome_freq_bin\"] = np.digitize(\n",
" datacopy[\"outcome_freq\"], bins=bins, right=True\n",
")\n",
"\n",
"\n",
"# sanity check bins (see above)\n",
"assert data[\"outcome_freq\"].min() > 0\n",
"assert np.allclose(\n",
" data[data[\"outcome_freq_bin\"] == nbins][\"outcome_freq\"].to_numpy(), 1.0\n",
")\n",
"assert len(datacopy[datacopy[\"outcome_freq_bin\"] == nbins]) == 0"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Which one to pick? right open or closed?\n",
"data = datacopy"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# compute mean positions of freq bins\n",
"_tasks = [\"AF\", \"AV\"]\n",
"_dfs = []\n",
"for sub in np.unique(df[\"subject\"]):\n",
" for i, task in enumerate(_tasks):\n",
"\n",
" if task_not_present_for_subject(sub, task):\n",
" continue\n",
"\n",
" _data = data[(data[\"task\"] == task) & (data[\"subject\"] == sub)][\n",
" [\"n_samples\", \"sample\", \"outcome_freq_bin\"]\n",
" ]\n",
" arr = _data.to_numpy(dtype=float)\n",
"\n",
" # normalize sample position by sample length\n",
" # z = (x - x.min) / (x.max - x.min)\n",
" # note: samples are zero-indexed, so +1 first\n",
" arr[:, 1] += 1\n",
" arr[:, 1] = (arr[:, 1] - 1) / (arr[:, 0] - 1)\n",
"\n",
" _df = pd.DataFrame(arr[:, 1:], columns=[\"sample_pos_norm\", \"freq_bin\"])\n",
" _df[\"control\"] = {\"AF\": \"partial\", \"AV\": \"full\"}[task]\n",
" _df[\"subject\"] = sub\n",
" _dfs.append(_df)\n",
"\n",
"df_freqs = pd.concat(_dfs)\n",
"df_freqs = df_freqs.astype({\"freq_bin\": int})\n",
"\n",
"# form means in each subj\n",
"# NOTE: subj 20 doesn't have samples with low sample_pos_norm (i.e., empty first bin)\n",
"df_freqs = df_freqs.groupby([\"subject\", \"control\", \"freq_bin\"]).mean().reset_index()\n",
"df_freqs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with sns.plotting_context(\"talk\"):\n",
" fig, axs = plt.subplots(1, 2, figsize=(8, 4))\n",
" ax1, ax2 = axs\n",
"\n",
" ci = 68\n",
" scale = 0.75\n",
" dodge = True\n",
" sns.pointplot(\n",
" data=df_values.groupby([\"subject\", \"control\", \"value\"])\n",
" .mean()\n",
" .reset_index(), # df_values,\n",
" x=\"value\",\n",
" y=\"sample_pos_norm\",\n",
" hue=\"control\",\n",
" ci=ci,\n",
" ax=ax1,\n",
" scale=scale,\n",
" dodge=dodge,\n",
" )\n",
" sns.pointplot(\n",
" data=df_freqs,\n",
" x=\"freq_bin\",\n",
" y=\"sample_pos_norm\",\n",
" hue=\"control\",\n",
" ci=ci,\n",
" ax=ax2,\n",
" scale=scale,\n",
" dodge=dodge,\n",
" )\n",
"\n",
" ax1.axhline(0.5, c=\"black\", lw=0.5, ls=\"--\", zorder=0)\n",
" ax2.axhline(0.5, c=\"black\", lw=0.5, ls=\"--\", zorder=0)\n",
"\n",
" ax1.set(ylim=(0.4, 0.6), xlabel=\"sample value\")\n",
" ax2.set(\n",
" ylim=(0.4, 0.6),\n",
" xlabel=\"frequency (binned)\\nwithin option\",\n",
" # xticklabels=[f\"{_:.1f}\" for _ in bins[1:]],\n",
" xticklabels=[\"0.1\", \"0.3\", \"0.5\", \"0.7\", \"0.9\"],\n",
" )\n",
" ax1.set_ylabel(\"mean sample position\\n(normalized by trial length)\")\n",
" ax2.get_legend().remove()\n",
" ax2.set_ylabel(\"mean sample position\\n(normalized by trial length)\")\n",
" ax1.legend(frameon=False, title=None)\n",
" ax2.legend(frameon=False, title=None)\n",
"\n",
"\n",
"fig.tight_layout()\n",
"sns.despine(fig)\n",
"fig"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# slope of \"sample value\"\n",
"for control in [\"partial\", \"full\"]:\n",
" _d = df_values[df_values[\"control\"] == control]\n",
" val = _d[\"value\"].to_numpy()\n",
" pos = _d[\"sample_pos_norm\"].to_numpy()\n",
" print(control)\n",
" display(pingouin.regression.linear_regression(X=val, y=pos).round(4))\n",
" print(\"------\\n\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"grid = sns.lmplot(\n",
" x=\"value\",\n",
" y=\"sample_pos_norm\",\n",
" col=\"control\",\n",
" x_jitter=0.25,\n",
" y_jitter=0.025,\n",
" data=df_values,\n",
" line_kws={\"color\": \"red\"},\n",
")\n",
"grid.fig.suptitle(\"sample values\\nindividual dots are jittered\", y=1.1)\n",
"grid.fig"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down

0 comments on commit 59fbe99

Please sign in to comment.