From 54447655d01bf7c60f809b597b7be6ef9a91bfdc Mon Sep 17 00:00:00 2001 From: Kellyn Montgomery <32572763+kellynm@users.noreply.github.com> Date: Thu, 24 Mar 2022 15:20:41 -0400 Subject: [PATCH] Use case updates (#182) * Add contamination rate variability scenarios * Plot updates, work in progress * Clean up unused code for plots Co-authored-by: Kellyn Montgomery --- .../data/consignment_scenarios.csv | 25 +- .../data/contamination_rate_estimation.csv | 8 +- .../simulation_use_cases.ipynb | 654 ++++++++++++++---- 3 files changed, 535 insertions(+), 152 deletions(-) diff --git a/examples/Montgomery_2021/data/consignment_scenarios.csv b/examples/Montgomery_2021/data/consignment_scenarios.csv index cad6fe2e..26b04a53 100644 --- a/examples/Montgomery_2021/data/consignment_scenarios.csv +++ b/examples/Montgomery_2021/data/consignment_scenarios.csv @@ -8,10 +8,21 @@ consignment scenario 6,High,box hypergeometric 0.1,input_file,aqim_box_insp_unit consignment scenario 7,Low,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,box,beta,"[0.0727, 26.8544]",random,,,,boxes,hypergeometric,0.1,random,1 consignment scenario 8,Mid,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,box,beta,"[0.0081, 2.9838]",random,,,,boxes,hypergeometric,0.1,random,1 consignment scenario 9,High,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,box,beta,"[0.0029, 1.0742]",random,,,,boxes,hypergeometric,0.1,random,1 -consignment scenario 10,Small clusters,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,box,beta,"[0.0091, 3.3769]",clustered,random,2,,boxes,hypergeometric,0.1,random,1 -consignment scenario 11,Large clusters,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,box,beta,"[0.0091, 3.3769]",clustered,random,5,,boxes,hypergeometric,0.1,random,1 -consignment scenario 12,Continuous clusters,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,item,beta,"[0.0091, 3.3769]",clustered,continuous,100,,boxes,hypergeometric,0.1,random,1 -consignment scenario 13,Large clusters,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,item,beta,"[0.0091, 3.3769]",clustered,random,400,400,boxes,hypergeometric,0.1,random,1 -consignment scenario 14,Small clusters,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,item,beta,"[0.0091, 3.3769]",clustered,random,25,25,boxes,hypergeometric,0.1,random,1 -consignment scenario 15,Random,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,item,beta,"[0.0091, 3.3769]",random,,,,boxes,hypergeometric,0.1,random,1 -consignment scenario 16,Random,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,box,beta,"[0.0091, 3.3769]",random,,,,boxes,hypergeometric,0.1,random,1 +consignment scenario 10,Continuous clusters,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,item,beta,"[0.0091, 3.3769]",clustered,continuous,25,25,boxes,hypergeometric,0.1,random,1 +consignment scenario 11,Continuous clusters,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,item,beta,"[0.0091, 3.3769]",clustered,continuous,50,50,boxes,hypergeometric,0.1,random,1 +consignment scenario 12,Continuous clusters,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,item,beta,"[0.0091, 3.3769]",clustered,continuous,75,75,boxes,hypergeometric,0.1,random,1 +consignment scenario 13,Continuous clusters,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,item,beta,"[0.0091, 3.3769]",clustered,continuous,100,100,boxes,hypergeometric,0.1,random,1 +consignment scenario 14,Continuous clusters,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,item,beta,"[0.0091, 3.3769]",clustered,continuous,125,125,boxes,hypergeometric,0.1,random,1 +consignment scenario 15,Continuous clusters,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,item,beta,"[0.0091, 3.3769]",clustered,continuous,150,150,boxes,hypergeometric,0.1,random,1 +consignment scenario 16,Continuous clusters,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,item,beta,"[0.0091, 3.3769]",clustered,continuous,175,175,boxes,hypergeometric,0.1,random,1 +consignment scenario 17,Continuous clusters,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,item,beta,"[0.0091, 3.3769]",clustered,continuous,200,200,boxes,hypergeometric,0.1,random,1 +consignment scenario 18,Continuous clusters,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,item,beta,"[0.0091, 3.3769]",clustered,continuous,225,225,boxes,hypergeometric,0.1,random,1 +consignment scenario 19,Continuous clusters,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,item,beta,"[0.0091, 3.3769]",clustered,continuous,250,250,boxes,hypergeometric,0.1,random,1 +consignment scenario 20,Continuous clusters,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,item,beta,"[0.0091, 3.3769]",clustered,continuous,275,275,boxes,hypergeometric,0.1,random,1 +consignment scenario 21,Continuous clusters,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,item,beta,"[0.0091, 3.3769]",clustered,continuous,300,300,boxes,hypergeometric,0.1,random,1 +consignment scenario 22,Continuous clusters,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,item,beta,"[0.0091, 3.3769]",clustered,continuous,325,325,boxes,hypergeometric,0.1,random,1 +consignment scenario 23,Continuous clusters,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,item,beta,"[0.0091, 3.3769]",clustered,continuous,350,350,boxes,hypergeometric,0.1,random,1 +consignment scenario 24,Continuous clusters,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,item,beta,"[0.0091, 3.3769]",clustered,continuous,375,375,boxes,hypergeometric,0.1,random,1 +consignment scenario 25,Continuous clusters,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,item,beta,"[0.0091, 3.3769]",clustered,continuous,400,400,boxes,hypergeometric,0.1,random,1 +consignment scenario 26,Random items,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,item,beta,"[0.0091, 3.3769]",random,,,,boxes,hypergeometric,0.1,random,1 +consignment scenario 27,Random boxes,box hypergeometric 0.1,input_file,aqim_box_insp_unit.csv,,,200,box,beta,"[0.0091, 3.3769]",random,,,,boxes,hypergeometric,0.1,random,1 diff --git a/examples/Montgomery_2021/data/contamination_rate_estimation.csv b/examples/Montgomery_2021/data/contamination_rate_estimation.csv index f0f5601b..10ed1800 100644 --- a/examples/Montgomery_2021/data/contamination_rate_estimation.csv +++ b/examples/Montgomery_2021/data/contamination_rate_estimation.csv @@ -1,8 +1,8 @@ name,consignment name,inspection name,consignment/generation_method,consignment/input_file/file_name,contamination/contamination_unit,contamination/contamination_rate/distribution,contamination/contamination_rate/parameters,contamination/arrangement,contamination/clustered/distribution,contamination/clustered/contaminated_units_per_cluster,contamination/clustered/random/cluster_item_width,inspection/unit,inspection/sample_strategy,inspection/hypergeometric/detection_level,inspection/selection_strategy,inspection/within_box_proportion,inspection/cluster/cluster_selection -rate scenario 1 box,mean 0.0027 sd 0.0282 box,box hypergeometric 0.1 random,input_file,aqim_box_insp_unit.csv,box,beta,"[0.0091, 3.3769]",random,,,,boxes,hypergeometric,0.1,random,1, -rate scenario 1 stem random,mean 0.0027 sd 0.0282 item random,box hypergeometric 0.1 random,input_file,aqim_box_insp_unit.csv,item,beta,"[0.0091, 3.3769]",random,,,,boxes,hypergeometric,0.1,random,1, -rate scenario 1 stem clustered random,mean 0.0027 sd 0.0282 item clustered random,box hypergeometric 0.1 random,input_file,aqim_box_insp_unit.csv,item,beta,"[0.0091, 3.3769]",clustered,random,40,40,boxes,hypergeometric,0.1,random,1, -rate scenario 1 stem clustered continuous,mean 0.0027 sd 0.0282 item clustered cont,box hypergeometric 0.1 random,input_file,aqim_box_insp_unit.csv,item,beta,"[0.0091, 3.3769]",clustered,continuous,40,,boxes,hypergeometric,0.1,random,1, +rate scenario 1 box,mean 0.0027 sd 0.0282 box,box hypergeometric 0.1 random,input_file,aqim_box_insp_unit.csv,box,beta,"[0.009, 3.3062]",random,,,,boxes,hypergeometric,0.1,random,1, +rate scenario 1 stem random,mean 0.0027 sd 0.0282 item random,box hypergeometric 0.1 random,input_file,aqim_box_insp_unit.csv,item,beta,"[0.009, 3.3062]",random,,,,boxes,hypergeometric,0.1,random,1, +rate scenario 1 stem clustered random,mean 0.0027 sd 0.0282 item clustered random,box hypergeometric 0.1 random,input_file,aqim_box_insp_unit.csv,item,beta,"[0.009, 3.3062]",clustered,random,40,40,boxes,hypergeometric,0.1,random,1, +rate scenario 1 stem clustered continuous,mean 0.0027 sd 0.0282 item clustered cont,box hypergeometric 0.1 random,input_file,aqim_box_insp_unit.csv,item,beta,"[0.009, 3.3062]",clustered,continuous,40,,boxes,hypergeometric,0.1,random,1, rate scenario 2 box,mean 0.00188 sd 0.019 box,box hypergeometric 0.1 random,input_file,aqim_box_insp_unit.csv,box,beta,"[0.0098, 5.1882]",random,,,,boxes,hypergeometric,0.1,random,1, rate scenario 2 stem random,mean 0.00188 sd 0.019 item random,box hypergeometric 0.1 random,input_file,aqim_box_insp_unit.csv,item,beta,"[0.0098, 5.1882]",random,,,,boxes,hypergeometric,0.1,random,1, rate scenario 2 stem clustered random,mean 0.00188 sd 0.019 item clustered random,box hypergeometric 0.1 random,input_file,aqim_box_insp_unit.csv,item,beta,"[0.0098, 5.1882]",clustered,random,40,40,boxes,hypergeometric,0.1,random,1, diff --git a/examples/Montgomery_2021/simulation_use_cases.ipynb b/examples/Montgomery_2021/simulation_use_cases.ipynb index feca1cad..b5e4a7ae 100644 --- a/examples/Montgomery_2021/simulation_use_cases.ipynb +++ b/examples/Montgomery_2021/simulation_use_cases.ipynb @@ -18,10 +18,9 @@ "outputs": [], "source": [ "import pandas as pd\n", - "import re\n", - "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import matplotlib.patches as mpatches\n", + "from matplotlib.lines import Line2D\n", "import numpy as np\n", "\n", "%matplotlib inline" @@ -41,6 +40,18 @@ "resultsdir.mkdir(exist_ok=True)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Set colors for plots\n", + "dark_blue = \"#1f78b4ff\"\n", + "light_blue = \"#6db8dfff\"\n", + "green = \"#72c12cff\"" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -114,7 +125,6 @@ " \"intercepted\",\n", " \"total_missed_contaminants\",\n", " \"total_intercepted_contaminants\",\n", - " \"avg_boxes_opened_completion\",\n", " ],\n", ")" ] @@ -135,8 +145,10 @@ "outputs": [], "source": [ "# Format dataframe\n", - "column_names = [\"consignment name\", \"inspection name\", \"beta parameters\", \"contaminant arrangement\", \"cluster distribution\", \"infested boxes per cluster\", \"simulated contamination rate (mean)\", \"failure rate\"]\n", - "df_contamination_pretty = df_fitted.iloc[:,[1,2,3,4,5,6,7,13]].copy()\n", + "column_names = ([\"scenario name\", \"consignment name\", \"inspection name\", \"beta parameters\", \"contaminant arrangement\", \n", + " \"cluster distribution\", \"infested units per cluster\", \"mean contamination rate\", \"missed\", \"intercepted\", \n", + " \"missed contaminants\", \"intercepted contaminants\", \"failure rate\"])\n", + "df_contamination_pretty = df_fitted.copy()\n", "\n", "df_contamination_pretty.columns = column_names\n", "df_contamination_pretty.iloc[:,6] = df_contamination_pretty.iloc[:,6].round(decimals=4)\n", @@ -151,7 +163,7 @@ "outputs": [], "source": [ "# Save results to csv\n", - "df_contamination_pretty.to_csv(resultsdir / \"contamination_rate_results.csv\")" + "#df_contamination_pretty.to_csv(resultsdir / \"contamination_rate_results.csv\")" ] }, { @@ -164,6 +176,15 @@ "#df_contamination_pretty = pd.read_csv(resultsdir / \"contamination_rate_results.csv\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_contamination_pretty" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -221,13 +242,19 @@ " \"max_intercepted_contamination_rate\",\n", " \"avg_intercepted_contamination_rate\",\n", " \"avg_boxes_opened_completion\",\n", + " \"pct_boxes_opened_completion\",\n", " \"avg_boxes_opened_detection\",\n", + " \"pct_boxes_opened_detection\",\n", " \"avg_items_inspected_completion\",\n", + " \"pct_items_inspected_completion\",\n", " \"avg_items_inspected_detection\",\n", + " \"pct_items_inspected_detection\",\n", " \"false_neg\",\n", " \"intercepted\",\n", " \"total_missed_contaminants\",\n", " \"total_intercepted_contaminants\",\n", + " \"num_boxes\",\n", + " \"num_items\",\n", " ],\n", ")" ] @@ -241,7 +268,8 @@ "df_inspections['failure rate'] = df_inspections[\"intercepted\"] / num_consignments\n", "contaminated_consignments = df_inspections[\"false_neg\"] + df_inspections[\"intercepted\"]\n", "df_inspections[\"interception rate\"] = df_inspections[\"intercepted\"] / contaminated_consignments\n", - "df_inspections[\"% missed contaminants\"] = (df_inspections[\"total_missed_contaminants\"] / (df_inspections[\"total_missed_contaminants\"] + df_inspections[\"total_intercepted_contaminants\"])) * 100" + "df_inspections[\"% missed contaminants\"] = ((df_inspections[\"total_missed_contaminants\"] / (df_inspections[\"total_missed_contaminants\"] +\n", + " df_inspections[\"total_intercepted_contaminants\"])) * 100)" ] }, { @@ -250,8 +278,13 @@ "metadata": {}, "outputs": [], "source": [ - "# Format dataframe\n", - "column_names = [\"name\", \"inspection unit\", \"sample strategy\", \"sample parameter\", \"selection strategy\", \"cluster selection\", \"avg contamination rate\", \"max missed contamination rate\", \"avg missed contamination rate\", \"max intercepted contamination rate\", \"avg intercepted contamination rate\", \"boxes opened completion\", \"boxes opened detection\", \"items inspected completion\", \"items inspected detection\", \"missed\", \"intercepted\", \"missed contaminants\", \"intercepted contamininants\", \"failure rate\", \"interception rate\", \"% missed contaminants\", \"sample size method\", \"selection method\"]" + "column_names = ([\"name\", \"inspection unit\", \"sample strategy\", \"sample parameter\", \"selection strategy\", \"cluster selection\", \n", + " \"avg contamination rate\", \"max missed contamination rate\", \"avg missed contamination rate\", \"max intercepted contamination rate\", \n", + " \"avg intercepted contamination rate\", \"avg boxes opened per inspection completion\", \"pct boxes opened per simulation completion\", \n", + " \"avg boxes opened per inspection detection\", \"pct boxes opened per simulation detection\", \"avg items inspected per inspection completion\", \n", + " \"pct items inspected per simulation completion\", \"avg items inspected per inspection detection\", \"pct items inspected per simulation detection\", \n", + " \"missed\", \"intercepted\", \"missed contaminants\", \"intercepted contaminants\", \"total boxes\", \"total items\", \"failure rate\", \n", + " \"interception rate\", \"% missed contaminants\", \"sample size method\", \"selection method\"])" ] }, { @@ -260,20 +293,61 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "df_inspections_pretty = df_inspections.loc[:, df_inspections.columns != 'inspection/hypergeometric/detection_level'].copy()\n", "hypergeometric_parameters = df_inspections.iloc[[0,1,2,3,4,5,6,7,12,13,14,15],4]\n", "df_inspections_pretty.iloc[[0,1,2,3,4,5,6,7,12,13,14,15],3] = hypergeometric_parameters\n", - "df_inspections_pretty.iloc[:,3] = df_inspections_pretty.iloc[:,3].astype(str)\n", - "\n", - "df_inspections_pretty[\"sample size method\"] = df_inspections_pretty[['inspection/sample_strategy', 'inspection/proportion/value']].agg(' '.join, axis=1)\n", - "df_inspections_pretty[\"selection method\"] = df_inspections_pretty[[\"inspection/unit\", 'inspection/selection_strategy', 'inspection/cluster/cluster_selection']].agg(' '.join, axis=1)\n", - "\n", - "df_inspections_pretty.columns = column_names\n", + "df_inspections_pretty.iloc[:,3] = df_inspections_pretty.iloc[:,3].astype(str)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_inspections_pretty[\"sample size method\"] = df_inspections_pretty[['inspection/sample_strategy', 'inspection/proportion/value']].agg(' '.join, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_inspections_pretty.iloc[:,5].fillna(\"\", inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_inspections_pretty[\"selection method\"] = (df_inspections_pretty[[\"inspection/unit\", 'inspection/selection_strategy', \n", + " 'inspection/cluster/cluster_selection']].agg(' '.join, axis=1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_inspections_pretty.columns = column_names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "df_inspections_pretty.iloc[:,6:11] = df_inspections_pretty.iloc[:,6:11].round(decimals=4)\n", - "df_inspections_pretty.iloc[:,11:19] = df_inspections_pretty.iloc[:,11:19].astype(int)\n", - "df_inspections_pretty.iloc[:,19:22] = df_inspections_pretty.iloc[:,19:22].round(decimals=4)\n", - "df_inspections_pretty.iloc[:18,[0,1,21,22,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]]\n", + "df_inspections_pretty.iloc[:,[11,13,15,17]] = df_inspections_pretty.iloc[:,[11,13,15,17]].astype(int)\n", + "df_inspections_pretty.iloc[:,[12,14,16,18]] = df_inspections_pretty.iloc[:,[12,14,16,18]].round(decimals=2)\n", + "df_inspections_pretty.iloc[:,19:25] = df_inspections_pretty.iloc[:,19:25].astype(int)\n", + "df_inspections_pretty.iloc[:,25:27] = df_inspections_pretty.iloc[:,25:27].round(decimals=4)\n", + "df_inspections_pretty.iloc[:,27] = df_inspections_pretty.iloc[:,27].round(decimals=2)\n", "df_inspections_pretty" ] }, @@ -284,7 +358,7 @@ "outputs": [], "source": [ "# Save results to csv\n", - "df_inspections_pretty.to_csv(resultsdir / \"inspection_scenario_results.csv\")" + "#df_inspections_pretty.to_csv(resultsdir / \"inspection_scenario_results.csv\")" ] }, { @@ -303,10 +377,10 @@ "metadata": {}, "outputs": [], "source": [ - "colors = {\"hypergeometric 0.1\": \"#1f78b4\", \"hypergeometric 0.05\": \"#a6cee3\", \"proportion 0.02\":\"#b2df8a\"}\n", - "patch_1 = mpatches.Patch(color=\"#a6cee3\", label=\"hypergeometric 0.05\")\n", - "patch_2 = mpatches.Patch(color=\"#1f78b4\", label=\"hypergeometric 0.1\")\n", - "patch_3 = mpatches.Patch(color=\"#b2df8a\", label=\"proportion 0.02\")" + "colors = {\"hypergeometric 0.1\": dark_blue, \"hypergeometric 0.05\": light_blue, \"proportion 0.02\":green}\n", + "patch_1 = mpatches.Patch(color=light_blue, label=\"hypergeometric 0.05\")\n", + "patch_2 = mpatches.Patch(color=dark_blue, label=\"hypergeometric 0.1\")\n", + "patch_3 = mpatches.Patch(color=green, label=\"proportion 0.02\")" ] }, { @@ -315,42 +389,100 @@ "metadata": {}, "outputs": [], "source": [ - "plt.figure(figsize=(18, 18), dpi=150)\n", - "plt.subplot(221)\n", - "plt.subplots_adjust(wspace=0.65, left=0.1,right=0.9, top=0.93, bottom=0.05, hspace=0.16)\n", - "plt.barh(df_inspections_pretty[\"name\"], df_inspections_pretty[\"interception rate\"], color=df_inspections_pretty['sample size method'].replace(colors))\n", - "plt.title(\"Interception Rate\", fontsize=24)\n", - "plt.ylabel(\"inspection method\", fontsize=20)\n", - "plt.xlabel(\"rate\", fontsize=20)\n", - "plt.yticks(ticks=np.arange(18),labels=df_inspections_pretty[\"selection method\"], fontsize=20)\n", - "plt.xticks(fontsize=18)\n", - "plt.subplot(222)\n", - "plt.barh(df_inspections_pretty[\"name\"], df_inspections_pretty[\"avg missed contamination rate\"], color=df_inspections_pretty['sample size method'].replace(colors))\n", - "plt.title(\"Avg. Missed Contamination Rate\", fontsize=24)\n", - "plt.xlabel(\"rates\", fontsize=20)\n", - "plt.yticks(ticks=np.arange(18),labels=df_inspections_pretty[\"selection method\"], fontsize=20)\n", - "plt.xticks(ticks=[0,0.01,0.02,0.03], fontsize=18)\n", - "\n", - "plt.subplot(223)\n", - "plt.subplots_adjust(wspace=0.65,left=0.22,right=0.95)\n", - "plt.barh(df_inspections_pretty[\"name\"], df_inspections_pretty[\"boxes opened completion\"], color=df_inspections_pretty['sample size method'].replace(colors))\n", - "plt.title(\"Boxes Opened per Consignment\", fontsize=24)\n", + "inspected_per_interception = ((df_inspections_pretty['pct items inspected per simulation completion'] * 0.01 * df_inspections_pretty[\"total items\"]) /\n", + " (df_inspections_pretty[\"intercepted\"]))\n", + "opened_per_interception = ((df_inspections_pretty['pct boxes opened per simulation completion'] * 0.01 * df_inspections_pretty[\"total boxes\"]) /\n", + " df_inspections_pretty[\"intercepted\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(15,7))\n", + "plt.subplot(121)\n", + "plt.subplots_adjust(wspace=0.05,left=0.24,right=0.97, bottom=0.16, top=0.92)\n", + "plt.barh(df_inspections_pretty[\"name\"], inspected_per_interception, color=df_inspections_pretty['sample size method'].replace(colors))\n", + "plt.title(\"Items Inspected per Interception\", fontsize=22)\n", "plt.ylabel(\"inspection method\", fontsize=20)\n", - "plt.xlabel(\"boxes\", fontsize=20)\n", - "plt.yticks(ticks=np.arange(18),labels=df_inspections_pretty[\"selection method\"], fontsize=20)\n", + "plt.xlabel(\"items inspected / \\n intercepted consignment\", fontsize=20)\n", + "plt.yticks(ticks=np.arange(18),labels=df_inspections_pretty[\"selection method\"].str.strip(), fontsize=20)\n", "plt.xticks(fontsize=18)\n", - "plt.subplot(224)\n", - "plt.barh(df_inspections_pretty[\"name\"], df_inspections_pretty[\"items inspected completion\"], color=df_inspections_pretty['sample size method'].replace(colors))\n", - "plt.title(\"Items Inspected per Consignment\", fontsize=24)\n", - "plt.xlabel(\"items\", fontsize=20)\n", - "plt.legend(handles=[patch_1,patch_2,patch_3], loc = \"lower right\", fontsize=20)\n", - "plt.yticks(ticks=np.arange(18),labels=df_inspections_pretty[\"selection method\"], fontsize=20)\n", + "plt.legend(handles=[patch_1,patch_2,patch_3], loc = \"lower right\", fontsize=18)\n", + "plt.subplot(122)\n", + "plt.barh(df_inspections_pretty[\"name\"], opened_per_interception, color=df_inspections_pretty['sample size method'].replace(colors))\n", + "plt.title(\"Boxes Opened per Interception\", fontsize=22)\n", + "plt.xlabel(\"boxes opened / \\n intercepted consignment\", fontsize=20)\n", + "plt.yticks(ticks=[])\n", "plt.xticks(fontsize=18)\n", - "plt.suptitle(\"Inspection Scenarios\", fontsize=28)\n", "plt.savefig(resultsdir / \"inspection_scenario_plots.png\")\n", + "\n", "plt.show()\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def rand_jitter(arr):\n", + " stdev = .01 * (max(arr) - min(arr))\n", + " return arr + np.random.randn(len(arr)) * stdev" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "box_marker = Line2D([0], [0], marker='s', color='w', label='box inspection unit', markerfacecolor='dimgray', markersize=9)\n", + "item_marker = Line2D([0], [0], marker='^', color='w', label='item inspection unit', markerfacecolor='dimgray', markersize=11)\n", + "random_selection = mpatches.Patch(fc=\"w\", fill=False, edgecolor='none', linewidth=0, label=\"random selection\")\n", + "convenience_selection = mpatches.Patch(fc=\"w\", fill=False, edgecolor='none', linewidth=0, label=\"convenience selection\")\n", + "clusterrandom_selection = mpatches.Patch(fc=\"w\", fill=False, edgecolor='none', linewidth=0, label=\"cluster random selection\")\n", + "clusterinterval_selection = mpatches.Patch(fc=\"w\", fill=False, edgecolor='none', linewidth=0, label=\"cluster interval selection\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(5)\n", + "\n", + "names = list(df_inspections_pretty[\"name\"].values)\n", + "selection = list(df_inspections_pretty[\"selection strategy\"].values)\n", + "\n", + "x = df_inspections_pretty[\"missed contaminants\"].divide(df_inspections_pretty[\"intercepted contaminants\"]+df_inspections_pretty[\"missed contaminants\"])\n", + "y = df_inspections_pretty[\"pct items inspected per simulation completion\"] * 0.01\n", + "\n", + "x = rand_jitter(x)\n", + "\n", + "is_box = df_inspections_pretty['inspection unit'] == \"boxes\"\n", + "is_item = df_inspections_pretty['inspection unit'] == \"items\"\n", + "\n", + "x_item = x[is_item]\n", + "x_box = x[is_box]\n", + "y_item = y[is_item]\n", + "y_box = y[is_box]\n", + "\n", + "plt.figure(figsize=(4.5,2.5), dpi=250)\n", + "plt.subplots_adjust(left=0.1, bottom=0.18, right=0.97, top=0.95)\n", + "plt.scatter(x_item, y_item, s=12**2, alpha=0.8, marker=\"^\", c=df_inspections_pretty['sample size method'][is_item].replace(colors))\n", + "plt.scatter(x_box, y_box, s=12**2, alpha=0.8, marker=\"s\", c=df_inspections_pretty['sample size method'][is_box].replace(colors))\n", + "plt.xlabel(\"proportion of contaminants missed\", size=8)\n", + "plt.ylabel(\"proportion of items inspected\", size=8)\n", + "plt.tick_params(axis='both', which='major', labelsize=8)\n", + "plt.ylim(-0.04, 0.66)\n", + "plt.legend(handles=[box_marker,item_marker,random_selection,convenience_selection,clusterrandom_selection,clusterinterval_selection,patch_1,patch_2,patch_3], loc = \"upper right\", fontsize=7)\n", + "plt.savefig(resultsdir / \"inspection_scenario_scatter.png\")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -428,7 +560,7 @@ "metadata": {}, "outputs": [], "source": [ - "df_consignments_10M = save_scenario_result_to_pandas(\n", + "df_cargo_10M = save_scenario_result_to_pandas(\n", " air_scenario_results+maritime_scenario_results+dtc_scenario_results,\n", " config_columns=[\n", " \"name\",\n", @@ -474,11 +606,12 @@ "metadata": {}, "outputs": [], "source": [ - "df_consignments_10M['failure rate'] = df_consignments_10M[\"intercepted\"] / num_consignments\n", - "contaminated_consignments = df_consignments_10M[\"false_neg\"] + df_consignments_10M[\"intercepted\"]\n", - "df_consignments_10M[\"interception rate\"] = df_consignments_10M[\"intercepted\"] / contaminated_consignments\n", - "df_consignments_10M[\"contaminated_consignments\"] = contaminated_consignments\n", - "df_consignments_10M[\"% missed contaminants\"] = (df_consignments_10M[\"total_missed_contaminants\"] / (df_consignments_10M[\"total_missed_contaminants\"] + df_consignments_10M[\"total_intercepted_contaminants\"])) * 100" + "df_cargo_10M['failure rate'] = df_cargo_10M[\"intercepted\"] / num_consignments\n", + "contaminated_consignments = df_cargo_10M[\"false_neg\"] + df_cargo_10M[\"intercepted\"]\n", + "df_cargo_10M[\"interception rate\"] = df_cargo_10M[\"intercepted\"] / contaminated_consignments\n", + "df_cargo_10M[\"contaminated_consignments\"] = contaminated_consignments\n", + "df_cargo_10M[\"% missed contaminants\"] = ((df_cargo_10M[\"total_missed_contaminants\"] / \n", + " (df_cargo_10M[\"total_missed_contaminants\"] + df_cargo_10M[\"total_intercepted_contaminants\"])) * 100)" ] }, { @@ -488,8 +621,12 @@ "outputs": [], "source": [ "# Format dataframe \n", - "column_names = [\"name\", \"consignment name\", \"items per box\", \"contamination unit\", \"contamination parameters\", \"contaminant arrangement\", \"cluster distribution\", \"contaminated units per cluster\", \"cluster width\", \"avg contamination rate\", \"avg missed contamination rate\", \"avg intercepted contamination rate\", \"avg boxes opened per inspection\", \"pct boxes opened per simulation\", \"avg items inspected per inspection\", \"pct items inspected per simulation\", \"missed contaminants\", \"intercepted contamininants\", \"total boxes\", \"total items\", \"interception rate\", \"contaminated_consignments\", \"% missed contaminants\"]\n", - "df_consignments_pretty_10M = df_consignments_10M" + "column_names = ([\"name\", \"consignment name\", \"items per box\", \"contamination unit\", \"contamination parameters\", \n", + " \"contaminant arrangement\", \"cluster distribution\", \"contaminated units per cluster\", \"cluster width\", \"avg contamination rate\", \n", + " \"avg missed contamination rate\", \"avg intercepted contamination rate\", \"avg boxes opened per inspection\", \n", + " \"pct boxes opened per simulation\", \"avg items inspected per inspection\", \"pct items inspected per simulation\", \"missed contaminants\", \n", + " \"intercepted contamininants\", \"total boxes\", \"total items\", \"interception rate\", \"contaminated_consignments\", \"% missed contaminants\"])\n", + "df_cargo_10M_pretty = df_cargo_10M" ] }, { @@ -498,10 +635,10 @@ "metadata": {}, "outputs": [], "source": [ - "df_consignments_pretty_10M.iloc[:,12:35] = df_consignments_pretty_10M.iloc[:,12:35].round(decimals=3)\n", - "df_consignments_pretty_10M = df_consignments_pretty_10M.iloc[:,[0,1,4,5,7,8,9,10,11,12,14,16,17,18,21,22,27,28,29,30,32,33,34]]\n", - "df_consignments_pretty_10M.columns = column_names\n", - "df_consignments_pretty_10M" + "df_cargo_10M_pretty.iloc[:,12:35] = df_cargo_10M_pretty.iloc[:,12:35].round(decimals=3)\n", + "df_cargo_10M_pretty = df_cargo_10M_pretty.iloc[:,[0,1,4,5,7,8,9,10,11,12,14,16,17,18,21,22,27,28,29,30,32,33,34]]\n", + "df_cargo_10M_pretty.columns = column_names\n", + "df_cargo_10M_pretty" ] }, { @@ -511,7 +648,7 @@ "outputs": [], "source": [ "# Save results to csv\n", - "df_consignments_pretty_10M.to_csv(resultsdir / \"cargoconfig_scenario_10M_results.csv\")" + "#df_cargo_10M_pretty.to_csv(resultsdir / \"cargoconfig_scenario_10M_results.csv\")" ] }, { @@ -521,7 +658,7 @@ "outputs": [], "source": [ "# If loading results from saved csv, uncomment and run this cell.\n", - "#df_consignments_pretty_10M = pd.read_csv(resultsdir / \"cargoconfig_scenario_10M_results.csv\")" + "#df_cargo_10M_pretty = pd.read_csv(resultsdir / \"cargoconfig_scenario_10M_results.csv\")" ] }, { @@ -533,31 +670,31 @@ "plt.figure(figsize=(16, 5), dpi=300)\n", "plt.subplot(221)\n", "plt.subplots_adjust(bottom=0.14,top=0.81, left=0.2, right=0.97, wspace=0.65, hspace=1)\n", - "plt.barh(df_consignments_pretty_10M[\"name\"], df_consignments_pretty_10M[\"interception rate\"], color=\"#b2df8a\")\n", + "plt.barh(df_cargo_10M_pretty[\"name\"], df_cargo_10M_pretty[\"interception rate\"], color=green)\n", "plt.title(\"Interception Rate\", fontsize=24)\n", "plt.xlabel(\"rate\", fontsize=20)\n", "plt.ylabel(\"cargo type\", fontsize=18, labelpad=10)\n", - "plt.yticks(ticks=np.arange(3),labels=df_consignments_pretty_10M[\"consignment name\"], fontsize=20)\n", + "plt.yticks(ticks=np.arange(3),labels=df_cargo_10M_pretty[\"consignment name\"], fontsize=20)\n", "plt.xticks(ticks=[0.0,0.2,0.4,0.6,0.8],fontsize=18)\n", "plt.subplot(222)\n", - "plt.barh(df_consignments_pretty_10M[\"name\"], df_consignments_pretty_10M[\"avg missed contamination rate\"], color=\"#b2df8a\")\n", + "plt.barh(df_cargo_10M_pretty[\"name\"], df_cargo_10M_pretty[\"avg missed contamination rate\"], color=green)\n", "plt.title(\"Avg. Missed Contamination Rate\", fontsize=24)\n", "plt.xlabel(\"rate\", fontsize=20)\n", - "plt.yticks(ticks=np.arange(3),labels=df_consignments_pretty_10M[\"consignment name\"],fontsize=20)\n", + "plt.yticks(ticks=np.arange(3),labels=df_cargo_10M_pretty[\"consignment name\"],fontsize=20)\n", "plt.xticks(ticks=[0,0.001,0.002,0.003,0.004], fontsize=18)\n", "\n", "plt.subplot(223)\n", - "plt.barh(df_consignments_pretty_10M[\"name\"], df_consignments_pretty_10M[\"avg items inspected per inspection\"], color=\"#b2df8a\")\n", + "plt.barh(df_cargo_10M_pretty[\"name\"], df_cargo_10M_pretty[\"avg items inspected per inspection\"], color=green)\n", "plt.title(\"Items Inspected per Consignment\", fontsize=24)\n", "plt.xlabel(\"items\", fontsize=20)\n", "plt.ylabel(\"cargo type\", fontsize=18, labelpad=10)\n", - "plt.yticks(ticks=np.arange(3),labels=df_consignments_pretty_10M[\"consignment name\"], fontsize=20)\n", + "plt.yticks(ticks=np.arange(3),labels=df_cargo_10M_pretty[\"consignment name\"], fontsize=20)\n", "plt.xticks(ticks=[0,4000,8000,12000,16000], fontsize=18)\n", "plt.subplot(224)\n", - "plt.barh(df_consignments_pretty_10M[\"name\"], df_consignments_pretty_10M[\"pct items inspected per simulation\"], color=\"#b2df8a\")\n", + "plt.barh(df_cargo_10M_pretty[\"name\"], df_cargo_10M_pretty[\"pct items inspected per simulation\"], color=green)\n", "plt.title(\"% Items Inspected per Scenario\", fontsize=24)\n", "plt.xlabel(\"% items\", fontsize=20)\n", - "plt.yticks(ticks=np.arange(3),labels=df_consignments_pretty_10M[\"consignment name\"], fontsize=20)\n", + "plt.yticks(ticks=np.arange(3),labels=df_cargo_10M_pretty[\"consignment name\"], fontsize=20)\n", "plt.xticks(fontsize=18)\n", "\n", "plt.suptitle(\"Cargo Packaging Scenarios\", fontsize=28)\n", @@ -565,11 +702,52 @@ "plt.show()" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "colors = {\"Direct-to-consumer\": green, \"Air\": light_blue, \"Maritime\":dark_blue}\n", + "patch_1 = mpatches.Patch(color=green, label=\"Direct-to-consumer\")\n", + "patch_2 = mpatches.Patch(color=light_blue, label=\"Air\")\n", + "patch_3 = mpatches.Patch(color=dark_blue, label=\"Maritime\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(9)\n", + "\n", + "x_missed_cont = df_cargo_10M_pretty['intercepted contamininants'].divide(df_cargo_10M_pretty[\"intercepted contamininants\"]+df_cargo_10M_pretty[\"missed contaminants\"])\n", + "x_int_rate = df_cargo_10M_pretty['interception rate']\n", + "y = df_cargo_10M_pretty['pct items inspected per simulation'] * 0.01\n", + "\n", + "plt.figure(figsize=(7,2), dpi=250)\n", + "plt.subplot(121)\n", + "plt.scatter(x_int_rate, y, s=7**2, alpha=1, marker=\"o\", c=df_cargo_10M_pretty['consignment name'].replace(colors))\n", + "plt.xlabel(\"interception rate\", size=9)\n", + "plt.ylabel(\"proportion of\\nitems inspected\", size=9)\n", + "plt.tick_params(axis='both', which='major', labelsize=9)\n", + "\n", + "plt.subplot(122)\n", + "plt.scatter(x_missed_cont, y, s=7**2, alpha=1, marker=\"o\", c=df_cargo_10M_pretty['consignment name'].replace(colors))\n", + "plt.xlabel(\"proportion of contaminants intercepted\", size=9)\n", + "plt.tick_params(axis='both', which='major', labelsize=9)\n", + "plt.xticks(ticks=[0.98,0.985,0.99, 0.995])\n", + "plt.legend(handles=[patch_1,patch_2,patch_3], loc = \"upper right\", fontsize=8)\n", + "\n", + "plt.savefig(resultsdir / \"cargo_config_scenario_10M_scatter.png\", bbox_inches='tight')" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Run contaminant arrangement and rate variability scenarios" + "### Run contaminant arrangement scenarios" ] }, { @@ -579,9 +757,9 @@ "outputs": [], "source": [ "num_consignments = 3313\n", - "consignment_scenario_results = run_scenarios(\n", + "arrangement_scenario_results = run_scenarios(\n", " config=basic_config,\n", - " scenario_table=consignment_scenario_table[3:9],\n", + " scenario_table=consignment_scenario_table[9:],\n", " seed=42,\n", " num_simulations=100,\n", " num_consignments=num_consignments,\n", @@ -595,8 +773,8 @@ "metadata": {}, "outputs": [], "source": [ - "df_consignments = save_scenario_result_to_pandas(\n", - " consignment_scenario_results,\n", + "df_arrangement = save_scenario_result_to_pandas(\n", + " arrangement_scenario_results,\n", " config_columns=[\n", " \"name\",\n", " \"consignment name\",\n", @@ -639,11 +817,12 @@ "metadata": {}, "outputs": [], "source": [ - "df_consignments['failure rate'] = df_consignments[\"intercepted\"] / num_consignments\n", - "contaminated_consignments = df_consignments[\"false_neg\"] + df_consignments[\"intercepted\"]\n", - "df_consignments[\"interception rate\"] = df_consignments[\"intercepted\"] / contaminated_consignments\n", - "df_consignments[\"contaminated_consignments\"] = contaminated_consignments\n", - "df_consignments[\"% missed contaminants\"] = (df_consignments[\"total_missed_contaminants\"] / (df_consignments[\"total_missed_contaminants\"] + df_consignments[\"total_intercepted_contaminants\"])) * 100" + "df_arrangement['failure rate'] = df_arrangement[\"intercepted\"] / num_consignments\n", + "contaminated_consignments = df_arrangement[\"false_neg\"] + df_arrangement[\"intercepted\"]\n", + "df_arrangement[\"interception rate\"] = df_arrangement[\"intercepted\"] / contaminated_consignments\n", + "df_arrangement[\"contaminated_consignments\"] = contaminated_consignments\n", + "df_arrangement[\"% missed contaminants\"] = ((df_arrangement[\"total_missed_contaminants\"] / \n", + " (df_arrangement[\"total_missed_contaminants\"] + df_arrangement[\"total_intercepted_contaminants\"])) * 100)" ] }, { @@ -653,8 +832,12 @@ "outputs": [], "source": [ "# Format dataframe \n", - "column_names = [\"name\", \"consignment name\", \"items per box\", \"contamination unit\", \"contamination parameters\", \"contaminant arrangement\", \"cluster distribution\", \"contaminated units per cluster\", \"cluster width\", \"avg contamination rate\", \"avg missed contamination rate\", \"avg intercepted contamination rate\", \"avg boxes opened per inspection\", \"pct box opened per simulation\", \"avg items inspected per inspection\", \"pct items inspected per simulation\", \"missed contaminants\", \"intercepted contamininants\", \"interception rate\", \"contaminated_consignments\", \"% missed contaminants\"]\n", - "df_consignments_pretty = df_consignments" + "column_names = ([\"name\", \"consignment name\", \"items per box\", \"contamination unit\", \"contamination parameters\", \n", + " \"contaminant arrangement\", \"cluster distribution\", \"contaminated units per cluster\", \"cluster width\", \"avg contamination rate\", \n", + " \"avg missed contamination rate\", \"avg intercepted contamination rate\", \"avg boxes opened per inspection\", \n", + " \"pct box opened per simulation\", \"avg items inspected per inspection\", \"pct items inspected per simulation\", \n", + " \"missed contaminants\", \"intercepted contaminants\", \"interception rate\", \"contaminated_consignments\", \"% missed contaminants\"])\n", + "df_arrangement_pretty = df_arrangement" ] }, { @@ -663,14 +846,13 @@ "metadata": {}, "outputs": [], "source": [ - "df_consignments_pretty.iloc[:,12:17] = df_consignments_pretty.iloc[:,12:17].round(decimals=4)\n", - "df_consignments_pretty.iloc[:,17:29] = df_consignments_pretty.iloc[:,17:29].astype(int)\n", - "df_consignments_pretty.iloc[:,29:31] = df_consignments_pretty.iloc[:,29:31].round(decimals=4)\n", - "df_consignments_pretty.iloc[:,[31]] = df_consignments_pretty.iloc[:,[31]].astype(int)\n", - "df_consignments_pretty.iloc[:,[32]] = df_consignments_pretty.iloc[:,[32]].round(decimals=4)\n", - "df_consignments_pretty = df_consignments_pretty.iloc[:,[0,1,4,5,7,8,9,10,11,12,14,16,17,18,21,22,27,28,30,31,32]]\n", - "df_consignments_pretty.columns = column_names\n", - "df_consignments_pretty" + "df_arrangement_pretty.iloc[:,12:17] = df_arrangement_pretty.iloc[:,12:17].round(decimals=4)\n", + "df_arrangement_pretty.iloc[:,17:29] = df_arrangement_pretty.iloc[:,17:29].astype(int)\n", + "df_arrangement_pretty.iloc[:,29:31] = df_arrangement_pretty.iloc[:,29:31].round(decimals=4)\n", + "df_arrangement_pretty.iloc[:,[31]] = df_arrangement_pretty.iloc[:,[31]].astype(int)\n", + "df_arrangement_pretty.iloc[:,[32]] = df_arrangement_pretty.iloc[:,[32]].round(decimals=4)\n", + "df_arrangement_pretty = df_arrangement_pretty.iloc[:,[0,1,4,5,7,8,9,10,11,12,14,16,17,18,21,22,27,28,30,31,32]]\n", + "df_arrangement_pretty.columns = column_names" ] }, { @@ -680,7 +862,7 @@ "outputs": [], "source": [ "# Save results to csv\n", - "df_consignments_pretty.to_csv(resultsdir / \"consignment_scenario_results.csv\")" + "#df_arrangement_pretty.to_csv(resultsdir / \"arrangement_scenario_results.csv\")" ] }, { @@ -690,7 +872,7 @@ "outputs": [], "source": [ "# If loading results from saved csv, uncomment and run this chunk.\n", - "#df_consignments_pretty = pd.read_csv(resultsdir / \"consignment_scenario_results.csv\")" + "#df_arrangement_pretty = pd.read_csv(resultsdir / \"arrangement_scenario_results.csv\")" ] }, { @@ -699,8 +881,9 @@ "metadata": {}, "outputs": [], "source": [ - "df_contamination_rate_scenarios = df_consignments_pretty.loc[0:5,:]\n", - "df_contaminant_arrangement_scenarios = df_consignments_pretty.loc[9:,:]" + "item_cluster_cont = df_arrangement_pretty[(df_arrangement_pretty[\"cluster distribution\"]==\"continuous\") & (df_arrangement_pretty[\"contamination unit\"]==\"item\")]\n", + "rand_arrangement_item = df_arrangement_pretty[(df_arrangement_pretty[\"contaminant arrangement\"]==\"random\") & (df_arrangement_pretty[\"contamination unit\"]==\"item\")]\n", + "#rand_arrangement_box = df_arrangement_pretty[(df_arrangement_pretty[\"contaminant arrangement\"]==\"random\") & (df_arrangement_pretty[\"contamination unit\"]==\"box\")]" ] }, { @@ -709,30 +892,190 @@ "metadata": {}, "outputs": [], "source": [ - "colors = {\"item\": \"#b2df8a\", \"box\": \"#1f78b4\"}\n", - "patch_1 = mpatches.Patch(color=\"#b2df8a\", label=\"item\")\n", - "patch_2 = mpatches.Patch(color=\"#1f78b4\", label=\"box\")\n", + "pd.concat([rand_arrangement_item[\"interception rate\"], item_cluster_cont[\"interception rate\"]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x = pd.concat([rand_arrangement_item[\"missed contaminants\"], item_cluster_cont[\"missed contaminants\"]]).reset_index(drop=True)\n", + "x = x.divide(pd.concat([rand_arrangement_item[\"intercepted contaminants\"], item_cluster_cont[\"intercepted contaminants\"]]).reset_index(drop=True) + x)\n", + "y = pd.concat([pd.Series(0), item_cluster_cont[\"contaminated units per cluster\"]])\n", "\n", - "plt.figure(figsize=(16, 4), dpi=300)\n", + "x_interception_rate = pd.concat([rand_arrangement_item[\"interception rate\"], item_cluster_cont[\"interception rate\"]]).reset_index(drop=True)\n", + "x_missed_rate = pd.concat([rand_arrangement_item[\"avg missed contamination rate\"], item_cluster_cont[\"avg missed contamination rate\"]]).reset_index(drop=True)\n", + "\n", + "plt.figure(figsize=(7,2), dpi=250)\n", "plt.subplot(121)\n", - "plt.subplots_adjust(bottom=0.25,top=0.77, left=0.08, right=0.97)\n", - "plt.barh(df_contamination_rate_scenarios[\"name\"], df_contamination_rate_scenarios[\"interception rate\"], color=df_contamination_rate_scenarios['contamination unit'].replace(colors))\n", - "plt.title(\"Interception Rate\", fontsize=24)\n", - "plt.xlabel(\"rate\", fontsize=18)\n", - "plt.ylabel(\"rate variability\", fontsize=18, labelpad=10)\n", - "plt.yticks(ticks=np.arange(6),labels=df_contamination_rate_scenarios[\"consignment name\"], fontsize=20)\n", - "plt.xticks(fontsize=18)\n", + "plt.scatter(x, y, s=4**2, alpha=1, marker=\"o\")\n", + "plt.ylabel(\"cluster size\", size=9)\n", + "plt.xlabel(\"proportion of contaminants missed\", size=9)\n", + "plt.tick_params(axis='both', which='major', labelsize=7)\n", + "plt.ticklabel_format(axis=\"both\", style=\"plain\")\n", + "\n", "plt.subplot(122)\n", - "plt.barh(df_contamination_rate_scenarios[\"name\"], df_contamination_rate_scenarios[\"avg missed contamination rate\"], color=df_contamination_rate_scenarios['contamination unit'].replace(colors))\n", - "plt.title(\"Avg. Missed Contamination Rate\", fontsize=24)\n", - "plt.xlabel(\"rate\", fontsize=18)\n", - "plt.yticks(ticks=np.arange(6),labels=df_contamination_rate_scenarios[\"consignment name\"],fontsize=20)\n", - "plt.xticks(ticks=[0,0.002,0.004,0.006], fontsize=18)\n", - "plt.suptitle(\"Contamination Rate Variability Scenarios\", fontsize=28)\n", - "plt.legend(handles=[patch_2,patch_1], loc = \"lower right\", fontsize=20, borderpad=0.2, labelspacing=0.2)\n", + "plt.scatter(x_interception_rate, y, s=4**2, alpha=1, marker=\"o\")\n", + "plt.xlabel(\"interception rate\", size=9)\n", + "plt.tick_params(axis='both', which='major', labelsize=9)\n", + "plt.ticklabel_format(axis=\"both\", style=\"plain\")\n", "\n", - "plt.savefig(resultsdir / \"rate_variability_scenario_plots.png\")\n", - "plt.show()" + "plt.savefig(resultsdir / \"cluster_scenario_scatter.png\", bbox_inches='tight')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Run rate variability scenarios" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "num_consignments = 3313\n", + "variability_scenario_results = run_scenarios(\n", + " config=basic_config,\n", + " scenario_table=consignment_scenario_table[3:9],\n", + " seed=42,\n", + " num_simulations=100,\n", + " num_consignments=num_consignments,\n", + " detailed=False,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_variability = save_scenario_result_to_pandas(\n", + " variability_scenario_results,\n", + " config_columns=[\n", + " \"name\",\n", + " \"consignment name\",\n", + " \"consignment/parameter_based/boxes/min\",\n", + " \"consignment/parameter_based/boxes/max\",\n", + " \"consignment/items_per_box/default\",\n", + " \"contamination/contamination_unit\",\n", + " \"contamination/contamination_rate/distribution\",\n", + " \"contamination/contamination_rate/parameters\",\n", + " \"contamination/arrangement\",\n", + " \"contamination/clustered/distribution\",\n", + " \"contamination/clustered/contaminated_units_per_cluster\",\n", + " \"contamination/clustered/random/cluster_item_width\",\n", + " ],\n", + " result_columns=[\n", + " \"true_contamination_rate\",\n", + " \"max_missed_contamination_rate\",\n", + " \"avg_missed_contamination_rate\",\n", + " \"max_intercepted_contamination_rate\",\n", + " \"avg_intercepted_contamination_rate\",\n", + " \"avg_boxes_opened_completion\",\n", + " \"pct_boxes_opened_completion\",\n", + " \"avg_boxes_opened_detection\",\n", + " \"pct_boxes_opened_detection\",\n", + " \"avg_items_inspected_completion\",\n", + " \"pct_items_inspected_completion\",\n", + " \"avg_items_inspected_detection\",\n", + " \"pct_items_inspected_detection\",\n", + " \"false_neg\",\n", + " \"intercepted\",\n", + " \"total_missed_contaminants\",\n", + " \"total_intercepted_contaminants\",\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_variability['failure rate'] = df_variability[\"intercepted\"] / num_consignments\n", + "contaminated_consignments = df_variability[\"false_neg\"] + df_variability[\"intercepted\"]\n", + "df_variability[\"interception rate\"] = df_variability[\"intercepted\"] / contaminated_consignments\n", + "df_variability[\"contaminated_consignments\"] = contaminated_consignments\n", + "df_variability[\"% missed contaminants\"] = ((df_variability[\"total_missed_contaminants\"] / \n", + " (df_variability[\"total_missed_contaminants\"] + df_variability[\"total_intercepted_contaminants\"])) * 100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Format dataframe \n", + "column_names = ([\"name\", \"consignment name\", \"items per box\", \"contamination unit\", \"contamination parameters\", \n", + " \"contaminant arrangement\", \"cluster distribution\", \"contaminated units per cluster\", \"cluster width\", \"avg contamination rate\", \n", + " \"avg missed contamination rate\", \"avg intercepted contamination rate\", \"avg boxes opened per inspection\", \n", + " \"pct box opened per simulation\", \"avg items inspected per inspection\", \"pct items inspected per simulation\", \n", + " \"missed contaminants\", \"intercepted contaminants\", \"interception rate\", \"contaminated_consignments\", \"% missed contaminants\"])\n", + "df_variability_pretty = df_variability" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_variability_pretty.iloc[:,12:17] = df_variability_pretty.iloc[:,12:17].round(decimals=4)\n", + "df_variability_pretty.iloc[:,17:29] = df_variability_pretty.iloc[:,17:29].astype(int)\n", + "df_variability_pretty.iloc[:,29:31] = df_variability_pretty.iloc[:,29:31].round(decimals=4)\n", + "df_variability_pretty.iloc[:,[31]] = df_variability_pretty.iloc[:,[31]].astype(int)\n", + "df_variability_pretty.iloc[:,[32]] = df_variability_pretty.iloc[:,[32]].round(decimals=4)\n", + "df_variability_pretty = df_variability_pretty.iloc[:,[0,1,4,5,7,8,9,10,11,12,14,16,17,18,21,22,27,28,30,31,32]]\n", + "df_variability_pretty.columns = column_names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Save results to csv\n", + "#df_variability_pretty.to_csv(resultsdir / \"ratevariability_scenario_results.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# If loading results from saved csv, uncomment and run this chunk.\n", + "#df_variability_pretty = pd.read_csv(resultsdir / \"ratevariability_scenario_results.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "variability_box_unit = df_variability_pretty[df_variability_pretty[\"contamination unit\"] == \"box\"]\n", + "variability_item_unit = df_variability_pretty[df_variability_pretty[\"contamination unit\"] == \"item\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rate_variability_dict = {\"Low\":0.01, \"Mid\":0.03, \"High\":0.05}\n", + "box_marker = Line2D([0], [0], marker='s', color=\"w\", label='box contamination unit', markerfacecolor=dark_blue, markersize=8)\n", + "item_marker = Line2D([0], [0], marker='^', color=\"w\", label='item contamination unit', markerfacecolor=green, markersize=9)" ] }, { @@ -741,30 +1084,33 @@ "metadata": {}, "outputs": [], "source": [ - "colors = {\"item\": \"#b2df8a\", \"box\": \"#1f78b4\"}\n", - "patch_1 = mpatches.Patch(color=\"#b2df8a\", label=\"item\")\n", - "patch_2 = mpatches.Patch(color=\"#1f78b4\", label=\"box\")\n", + "y_box = variability_box_unit[\"consignment name\"].replace(rate_variability_dict)\n", + "y_item = variability_item_unit[\"consignment name\"].replace(rate_variability_dict)\n", + "\n", + "x_missed_cont_box = variability_box_unit[\"missed contaminants\"] / variability_box_unit[\"intercepted contaminants\"]\n", + "x_interception_rate_box = variability_box_unit[\"interception rate\"]\n", "\n", - "plt.figure(figsize=(16, 3.2), dpi=300)\n", + "x_missed_cont_item = variability_item_unit[\"missed contaminants\"] / variability_item_unit[\"intercepted contaminants\"]\n", + "x_interception_rate_item = variability_item_unit[\"interception rate\"]\n", + "\n", + "plt.figure(figsize=(7,2), dpi=250)\n", "plt.subplot(121)\n", - "plt.subplots_adjust(bottom=0.25,top=0.72, left=0.18, right=0.97, wspace=0.4)\n", - "plt.barh(df_contaminant_arrangement_scenarios[\"name\"], df_contaminant_arrangement_scenarios[\"interception rate\"], color=df_contaminant_arrangement_scenarios['contamination unit'].replace(colors))\n", - "plt.title(\"Interception Rate\", fontsize=24)\n", - "plt.xlabel(\"rate\", fontsize=18)\n", - "plt.ylabel(\"contaminant \\narrangement\", fontsize=18, labelpad=10)\n", - "plt.yticks(ticks=np.arange(4),labels=df_contaminant_arrangement_scenarios[\"consignment name\"], fontsize=20)\n", - "plt.xticks(fontsize=18)\n", + "plt.scatter(x_missed_cont_box, y_box, s=7**2, alpha=0.8, marker=\"s\", c=dark_blue)\n", + "plt.scatter(x_missed_cont_item, y_item, s=7**2, alpha=0.8, marker=\"^\", c=green)\n", + "plt.ylabel(\"contamination rate\\nstandard deviation\", size=9)\n", + "plt.xlabel(\"proportion of contaminants missed\", size=9)\n", + "plt.tick_params(axis='both', which='major', labelsize=9)\n", + "plt.ticklabel_format(axis=\"both\", style=\"plain\")\n", + "plt.legend(handles=[box_marker,item_marker], loc = \"upper right\", fontsize=8)\n", + "\n", "plt.subplot(122)\n", - "plt.barh(df_contaminant_arrangement_scenarios[\"name\"], df_contaminant_arrangement_scenarios[\"avg missed contamination rate\"], color=df_contaminant_arrangement_scenarios['contamination unit'].replace(colors))\n", - "plt.title(\"Avg. Missed Contamination Rate\", fontsize=24)\n", - "plt.xlabel(\"rate\", fontsize=18)\n", - "plt.yticks(ticks=np.arange(4),labels=df_contaminant_arrangement_scenarios[\"consignment name\"],fontsize=20)\n", - "plt.xticks(ticks=[0,0.002,0.004,0.006], fontsize=18)\n", - "plt.suptitle(\"Contaminant Arrangement Scenarios\", fontsize=28)\n", - "plt.legend(handles=[patch_2,patch_1], loc=\"right\", fontsize=20, borderpad=0.2, labelspacing=0.2)\n", + "plt.scatter(x_interception_rate_box, y_box, s=7**2, alpha=0.8, marker=\"s\", c=dark_blue)\n", + "plt.scatter(x_interception_rate_item, y_item, s=7**2, alpha=0.8, marker=\"^\", c=green)\n", + "plt.xlabel(\"interception rate\", size=9)\n", + "plt.tick_params(axis='both', which='major', labelsize=9)\n", + "plt.ticklabel_format(axis=\"both\", style=\"plain\")\n", "\n", - "plt.savefig(resultsdir / \"contaminant_arrangement_scenarios_plots.png\")\n", - "plt.show()" + "plt.savefig(resultsdir / \"ratevariability_scenario_scatter.png\", bbox_inches='tight')" ] }, { @@ -772,7 +1118,33 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "colors = {\"item\": green, \"box\": dark_blue}\n", + "patch_1 = mpatches.Patch(color=green, label=\"item contamination\")\n", + "patch_2 = mpatches.Patch(color=\"#1f78b4\", label=\"box contamination\")\n", + "\n", + "prop_missed_cont = df_variability_pretty[\"missed contaminants\"] / df_variability_pretty[\"intercepted contaminants\"]\n", + "\n", + "plt.figure(figsize=(16, 4), dpi=300)\n", + "plt.subplot(121)\n", + "plt.subplots_adjust(bottom=0.25,top=0.77, left=0.08, right=0.97)\n", + "plt.barh(df_variability_pretty[\"name\"].replace(rate_variability_dict), df_variability_pretty[\"interception rate\"], color=df_variability_pretty['contamination unit'].replace(colors))\n", + "plt.title(\"Interception Rate\", fontsize=24)\n", + "plt.xlabel(\"rate\", fontsize=18)\n", + "plt.ylabel(\"contamination rate\\nvariability\", fontsize=18, labelpad=10)\n", + "plt.yticks(ticks=np.arange(6),labels=df_variability_pretty[\"consignment name\"], fontsize=20)\n", + "plt.xticks(fontsize=18)\n", + "plt.subplot(122)\n", + "plt.barh(df_variability_pretty[\"name\"].replace(rate_variability_dict), prop_missed_cont, color=df_variability_pretty['contamination unit'].replace(colors))\n", + "plt.title(\"Proportion of Contaminants Missed\", fontsize=24)\n", + "plt.xlabel(\"proportion\", fontsize=18)\n", + "plt.yticks(ticks=np.arange(6),labels=df_variability_pretty[\"consignment name\"],fontsize=20)\n", + "plt.xticks(fontsize=18)\n", + "plt.legend(handles=[patch_2,patch_1], loc = \"lower right\", fontsize=20, borderpad=0.2, labelspacing=0.2)\n", + "\n", + "plt.savefig(resultsdir / \"rate_variability_scenario_plots.png\")\n", + "plt.show()" + ] } ], "metadata": { @@ -791,7 +1163,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.9" + "version": "3.10.0" } }, "nbformat": 4,