diff --git a/docs/f-21-12/notebooks/geom_violin.ipynb b/docs/f-21-12/notebooks/geom_violin.ipynb
index c1f244d0778..470d205d035 100644
--- a/docs/f-21-12/notebooks/geom_violin.ipynb
+++ b/docs/f-21-12/notebooks/geom_violin.ipynb
@@ -12,7 +12,7 @@
" \n",
" \n",
@@ -24,7 +24,6 @@
}
],
"source": [
- "import numpy as np\n",
"import pandas as pd\n",
"\n",
"from lets_plot import *\n",
@@ -33,113 +32,32 @@
]
},
{
- "cell_type": "markdown",
+ "cell_type": "code",
+ "execution_count": 2,
"metadata": {},
+ "outputs": [],
"source": [
- "## Test datasets"
+ "DRAW_QUANTILES = [.25, .5, .75]"
]
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 3,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " sepal_length | \n",
- " sepal_width | \n",
- " petal_length | \n",
- " petal_width | \n",
- " species | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 5.1 | \n",
- " 3.5 | \n",
- " 1.4 | \n",
- " 0.2 | \n",
- " setosa | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 4.9 | \n",
- " 3.0 | \n",
- " 1.4 | \n",
- " 0.2 | \n",
- " setosa | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 4.7 | \n",
- " 3.2 | \n",
- " 1.3 | \n",
- " 0.2 | \n",
- " setosa | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 4.6 | \n",
- " 3.1 | \n",
- " 1.5 | \n",
- " 0.2 | \n",
- " setosa | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 5.0 | \n",
- " 3.6 | \n",
- " 1.4 | \n",
- " 0.2 | \n",
- " setosa | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " sepal_length sepal_width petal_length petal_width species\n",
- "0 5.1 3.5 1.4 0.2 setosa\n",
- "1 4.9 3.0 1.4 0.2 setosa\n",
- "2 4.7 3.2 1.3 0.2 setosa\n",
- "3 4.6 3.1 1.5 0.2 setosa\n",
- "4 5.0 3.6 1.4 0.2 setosa"
- ]
- },
- "execution_count": 2,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "iris_df = pd.read_csv(\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/iris.csv\")\n",
- "\n",
- "iris_df.head()"
+ "def plot_matrix(plots=[], width=400, height=300, columns=2):\n",
+ " bunch = GGBunch()\n",
+ " for i in range(len(plots)):\n",
+ " row = int(i / columns)\n",
+ " column = i % columns\n",
+ " bunch.add_plot(plots[i], column * width, row * height, width, height)\n",
+ " return bunch.show()"
]
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 4,
"metadata": {},
"outputs": [
{
@@ -163,307 +81,361 @@
" \n",
" \n",
" | \n",
- " species | \n",
- " sepal_length | \n",
- " weight | \n",
+ " Unnamed: 0 | \n",
+ " manufacturer | \n",
+ " model | \n",
+ " displ | \n",
+ " year | \n",
+ " cyl | \n",
+ " trans | \n",
+ " drv | \n",
+ " cty | \n",
+ " hwy | \n",
+ " fl | \n",
+ " class | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
- " setosa | \n",
- " 4.300000 | \n",
- " 0.222676 | \n",
+ " 1 | \n",
+ " audi | \n",
+ " a4 | \n",
+ " 1.8 | \n",
+ " 1999 | \n",
+ " 4 | \n",
+ " auto(l5) | \n",
+ " f | \n",
+ " 18 | \n",
+ " 29 | \n",
+ " p | \n",
+ " compact | \n",
"
\n",
" \n",
" | 1 | \n",
- " setosa | \n",
- " 4.302935 | \n",
- " 0.228662 | \n",
+ " 2 | \n",
+ " audi | \n",
+ " a4 | \n",
+ " 1.8 | \n",
+ " 1999 | \n",
+ " 4 | \n",
+ " manual(m5) | \n",
+ " f | \n",
+ " 21 | \n",
+ " 29 | \n",
+ " p | \n",
+ " compact | \n",
"
\n",
" \n",
" | 2 | \n",
- " setosa | \n",
- " 4.305871 | \n",
- " 0.234639 | \n",
+ " 3 | \n",
+ " audi | \n",
+ " a4 | \n",
+ " 2.0 | \n",
+ " 2008 | \n",
+ " 4 | \n",
+ " manual(m6) | \n",
+ " f | \n",
+ " 20 | \n",
+ " 31 | \n",
+ " p | \n",
+ " compact | \n",
"
\n",
" \n",
" | 3 | \n",
- " setosa | \n",
- " 4.308806 | \n",
- " 0.240684 | \n",
+ " 4 | \n",
+ " audi | \n",
+ " a4 | \n",
+ " 2.0 | \n",
+ " 2008 | \n",
+ " 4 | \n",
+ " auto(av) | \n",
+ " f | \n",
+ " 21 | \n",
+ " 30 | \n",
+ " p | \n",
+ " compact | \n",
"
\n",
" \n",
" | 4 | \n",
- " setosa | \n",
- " 4.311742 | \n",
- " 0.246886 | \n",
+ " 5 | \n",
+ " audi | \n",
+ " a4 | \n",
+ " 2.8 | \n",
+ " 1999 | \n",
+ " 6 | \n",
+ " auto(l5) | \n",
+ " f | \n",
+ " 16 | \n",
+ " 26 | \n",
+ " p | \n",
+ " compact | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " species sepal_length weight\n",
- "0 setosa 4.300000 0.222676\n",
- "1 setosa 4.302935 0.228662\n",
- "2 setosa 4.305871 0.234639\n",
- "3 setosa 4.308806 0.240684\n",
- "4 setosa 4.311742 0.246886"
+ " Unnamed: 0 manufacturer model displ year cyl trans drv cty hwy \\\n",
+ "0 1 audi a4 1.8 1999 4 auto(l5) f 18 29 \n",
+ "1 2 audi a4 1.8 1999 4 manual(m5) f 21 29 \n",
+ "2 3 audi a4 2.0 2008 4 manual(m6) f 20 31 \n",
+ "3 4 audi a4 2.0 2008 4 auto(av) f 21 30 \n",
+ "4 5 audi a4 2.8 1999 6 auto(l5) f 16 26 \n",
+ "\n",
+ " fl class \n",
+ "0 p compact \n",
+ "1 p compact \n",
+ "2 p compact \n",
+ "3 p compact \n",
+ "4 p compact "
]
},
- "execution_count": 3,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "def construct_violin_df(df, xname, yname, n=512):\n",
- " from functools import reduce\n",
- "\n",
- " from scipy.stats import gaussian_kde\n",
+ "mpg_df = pd.read_csv(\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv\")\n",
"\n",
- " def get_weights(values):\n",
- " def nrd0_bw(kde):\n",
- " iqr = np.quantile(kde.dataset, .75) - np.quantile(kde.dataset, .25)\n",
- " std = np.std(kde.dataset)\n",
- " size = kde.dataset.size\n",
- " if iqr > 0:\n",
- " return .9 * min(std, iqr / 1.34) * (size ** -.2)\n",
- " if std > 0:\n",
- " return .9 * std * (size ** -.2)\n",
- "\n",
- " yrange = np.linspace(values.min(), values.max(), n)\n",
- "\n",
- " return {yname: yrange, 'weight': gaussian_kde(values, bw_method=nrd0_bw)(yrange)}\n",
- "\n",
- " def reducer(agg_df, xval):\n",
- " weights = get_weights(df[df[xname] == xval][yname])\n",
- " y = weights[yname]\n",
- " x = [xval] * y.size\n",
- " w = weights['weight']\n",
- "\n",
- " return pd.concat([agg_df, pd.DataFrame({xname: x, yname: y, 'weight': w})], ignore_index=True)\n",
- "\n",
- " return reduce(reducer, df[xname], pd.DataFrame(columns=[xname, yname, 'weight']))\n",
- "\n",
- "violin_df = construct_violin_df(iris_df, 'species', 'sepal_length')\n",
- "violin_df.head()"
+ "mpg_df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Minimalistic example"
]
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " v | \n",
- " c1 | \n",
- " c2 | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 0.496714 | \n",
- " A | \n",
- " b | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " -0.138264 | \n",
- " B | \n",
- " b | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 0.647689 | \n",
- " A | \n",
- " a | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 1.523030 | \n",
- " A | \n",
- " a | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " -0.234153 | \n",
- " C | \n",
- " a | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
+ " \n",
+ " "
],
"text/plain": [
- " v c1 c2\n",
- "0 0.496714 A b\n",
- "1 -0.138264 B b\n",
- "2 0.647689 A a\n",
- "3 1.523030 A a\n",
- "4 -0.234153 C a"
+ ""
]
},
- "execution_count": 4,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "size = 100\n",
- "np.random.seed(42)\n",
- "random_df = pd.DataFrame({\n",
- " 'v': np.random.normal(size=size),\n",
- " 'c1': np.random.choice(['A', 'B', 'C'], size=size),\n",
- " 'c2': np.random.choice(['a', 'b'], size=size)\n",
- "})\n",
- "\n",
- "random_df.head()"
+ "ggplot(mpg_df, aes(y='hwy')) + geom_violin() + ggtitle(\"Simplest example\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Comparison of geoms"
]
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " v | \n",
- " c1 | \n",
- " c2 | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 0.496714 | \n",
- " A | \n",
- " b | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " -0.138264 | \n",
- " NaN | \n",
- " b | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " NaN | \n",
- " A | \n",
- " a | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 1.523030 | \n",
- " A | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " -0.234153 | \n",
- " C | \n",
- " a | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " v c1 c2\n",
- "0 0.496714 A b\n",
- "1 -0.138264 NaN b\n",
- "2 NaN A a\n",
- "3 1.523030 A NaN\n",
- "4 -0.234153 C a"
+ " \n",
+ " "
]
},
- "execution_count": 5,
"metadata": {},
- "output_type": "execute_result"
+ "output_type": "display_data"
}
],
"source": [
- "def mask(p=.1, seed=42):\n",
- " np.random.seed(seed)\n",
- " return np.random.choice([True, False], random_df.shape[0], p=[p, 1 - p])\n",
- "\n",
- "nullable_df = random_df.copy()\n",
- "nullable_df.loc[mask(seed=1), 'v'] = np.nan\n",
- "nullable_df.loc[mask(seed=2), 'c1'] = np.nan\n",
- "nullable_df.loc[mask(seed=6), 'c2'] = np.nan\n",
+ "p_d = ggplot(mpg_df) + \\\n",
+ " geom_density(aes(x='hwy', fill='drv'), color='black', alpha=.5) + \\\n",
+ " facet_grid(x='drv') + \\\n",
+ " coord_flip() + \\\n",
+ " ggtitle(\"geom_density()\")\n",
+ "p_v = ggplot(mpg_df, aes(x=as_discrete('drv', order=1), y='hwy')) + \\\n",
+ " geom_violin(aes(fill='drv'), alpha=.5) + \\\n",
+ " ggtitle(\"geom_violin()\")\n",
"\n",
- "nullable_df.head()"
+ "plot_matrix([p_d, p_v])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "## Minimalistic example"
+ "## Original parameters"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### `draw_quantiles`"
]
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- " \n",
+ " \n",
" "
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 6,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "ggplot(random_df, aes(y='v')) + geom_violin() + ggtitle(\"Simplest example\")"
+ "tests = [\n",
+ " {'draw_quantiles': None}, # default\n",
+ " {'draw_quantiles': [.05, .5, .95]}, # all correct\n",
+ " {'draw_quantiles': (1/3, .5, 2/3)}, # strange, but correct\n",
+ " {'draw_quantiles': [.25]}, # only one\n",
+ " {'draw_quantiles': []}, # empty\n",
+ " {'draw_quantiles': [0, .5, 1]}, # include borders\n",
+ " {'draw_quantiles': [-1, .5, 2], 'skip': True}, # beyond borders\n",
+ " {'draw_quantiles': ['0.25', '0.5', '0.75'], 'skip': True}, # invalid values\n",
+ " {'draw_quantiles': [True, False], 'skip': True}, # totally invalid values\n",
+ " {'draw_quantiles': 0.5, 'skip': True}, # wrong parameter type\n",
+ " {'draw_quantiles': True, 'skip': True}, # another wrong parameter type\n",
+ " {'draw_quantiles': '0.25', 'skip': True}, # even worse parameter type\n",
+ " {'draw_quantiles': object(), 'skip': True}, # totally wrong parameter type\n",
+ "]\n",
+ "\n",
+ "ggplot(mpg_df, aes('drv', 'hwy')) + \\\n",
+ " geom_violin(draw_quantiles=DRAW_QUANTILES) + \\\n",
+ " ggtitle(\"draw_quantiles={0}\".format(DRAW_QUANTILES))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "## Comparison of geoms"
+ "### `scale`"
]
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- " \n",
+ " \n",
" "
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 9,
+ "execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "ggplot(random_df, aes(x='c1', y='v')) + \\\n",
- " geom_violin(aes(fill='c2'), tooltips=layer_tooltips().line('^x')\n",
- " .line('category|@c2')\n",
- " .line('v|@v')\n",
- " .line('@|@..density..')\n",
- " .line('count|@..count..')\n",
- " .line('scaled|@..scaled..')) + \\\n",
+ "ggplot(mpg_df, aes(x='drv', y='hwy')) + \\\n",
+ " geom_violin(aes(group='year', fill=as_discrete('year')), \\\n",
+ " draw_quantiles=DRAW_QUANTILES, \\\n",
+ " tooltips=layer_tooltips().line('^x')\n",
+ " .line('year|@year')\n",
+ " .line('hwy|@hwy')\n",
+ " .line('violinwidth|@..violinwidth..')\n",
+ " .line('density|@..density..')\n",
+ " .line('count|@..count..')\n",
+ " .line('scaled|@..scaled..')) + \\\n",
" ggtitle(\"Grouping and tooltips\")"
]
},
@@ -894,52 +954,62 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "## `coord_flip()`"
+ "## Facets"
]
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- " \n",
+ " \n",
" "
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 10,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "ggplot(iris_df, aes('species', 'sepal_length')) + \\\n",
- " geom_violin() + \\\n",
- " coord_flip() + \\\n",
- " ggtitle(\"Use coord_flip()\")"
+ "ggplot(mpg_df, aes(x='drv', y='hwy')) + \\\n",
+ " geom_violin(aes(fill=as_discrete('year')), draw_quantiles=DRAW_QUANTILES) + \\\n",
+ " facet_grid(y='year')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "## \"identity\" statistic"
+ "## `coord_flip()`"
]
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- " \n",
+ " \n",
" "
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 11,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "ggplot(violin_df, aes('species', 'sepal_length')) + \\\n",
- " geom_violin(aes(weight='weight'), stat='identity') + \\\n",
- " ggtitle(\"Use 'identity' statistic\")"
+ "ggplot(mpg_df, aes('drv', 'hwy')) + \\\n",
+ " geom_violin(draw_quantiles=DRAW_QUANTILES) + \\\n",
+ " coord_flip() + \\\n",
+ " ggtitle(\"Use coord_flip()\")"
]
},
{
@@ -1040,41 +1113,35 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- " \n",
+ " \n",
" "
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 12,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "ggplot(random_df, aes(as_discrete('c1', order=-1), 'v')) + \\\n",
- " geom_violin(aes(color='c1', fill='c1'), alpha=.5, size=2, \\\n",
+ "ggplot(mpg_df, aes(as_discrete('drv', order=-1), 'hwy')) + \\\n",
+ " geom_violin(aes(color='drv', fill='drv'), alpha=.5, size=2, \\\n",
+ " n=8, draw_quantiles=DRAW_QUANTILES,\n",
" sampling=sampling_group_systematic(2)) + \\\n",
- " facet_grid(x='c2') + \\\n",
- " scale_y_continuous(breaks=list(np.linspace(-3, 3, 9))) + \\\n",
+ " scale_y_continuous(breaks=list(range(12, 29, 2))) + \\\n",
" scale_color_brewer(type='qual', palette='Set1') + \\\n",
" scale_fill_brewer(type='qual', palette='Set1') + \\\n",
- " ylim(-3, 3) + \\\n",
- " coord_fixed(ratio=.5) + \\\n",
+ " ylim(12, 28) + \\\n",
+ " coord_fixed(ratio=.2) + \\\n",
" theme_grey() + \\\n",
" ggtitle(\"Some additional aesthetics, parameters and layers\")"
]
},
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Dataset with NaN's"
- ]
- },
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- " \n",
+ " \n",
" "
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 13,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "ggplot(nullable_df, aes('c1', 'v')) + geom_violin()"
+ "# Note: quartiles for violin need not to be equal to the quartiles for boxplot!\n",
+ "# See the last paragraph here: https://stackoverflow.com/a/36036821/11771414\n",
+ "quartiles = [1/4, 2/4, 3/4]\n",
+ "ggplot(mpg_df, aes(x='drv', y='hwy')) + \\\n",
+ " geom_violin(draw_quantiles=quartiles) + \\\n",
+ " geom_boxplot(width=.1)"
]
}
],
diff --git a/plot-base-portable/src/commonTest/kotlin/jetbrains/datalore/plot/base/stat/YDensityStatTest.kt b/plot-base-portable/src/commonTest/kotlin/jetbrains/datalore/plot/base/stat/YDensityStatTest.kt
new file mode 100644
index 00000000000..0beb80ab2a8
--- /dev/null
+++ b/plot-base-portable/src/commonTest/kotlin/jetbrains/datalore/plot/base/stat/YDensityStatTest.kt
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2022. JetBrains s.r.o.
+ * Use of this source code is governed by the MIT license that can be found in the LICENSE file.
+ */
+
+package jetbrains.datalore.plot.base.stat
+
+import jetbrains.datalore.base.gcommon.collect.ClosedRange
+import jetbrains.datalore.plot.base.DataFrame
+import jetbrains.datalore.plot.base.StatContext
+import jetbrains.datalore.plot.base.data.TransformVar
+import kotlin.test.Test
+import kotlin.test.assertEquals
+import kotlin.test.assertTrue
+
+class YDensityStatTest {
+ private fun statContext(d: DataFrame): StatContext {
+ return SimpleStatContext(d)
+ }
+
+ private fun dataFrame(dataMap: Map>): DataFrame {
+ val builder = DataFrame.Builder()
+ for (key in dataMap.keys) {
+ builder.put(key, dataMap.getValue(key))
+ }
+ return builder.build()
+ }
+
+ private fun filteredDataFrame(df: DataFrame, variable: DataFrame.Variable, filterFun: (Double?) -> Boolean): DataFrame {
+ val indices = df.getNumeric(variable)
+ .mapIndexed { index, v -> if (filterFun(v)) index else null }
+ .filterNotNull()
+
+ return df.selectIndices(indices)
+ }
+
+ private fun yDensityStat(scale: YDensityStat.Scale? = null): YDensityStat {
+ return YDensityStat(
+ scale = scale ?: YDensityStat.DEF_SCALE,
+ bandWidth = null,
+ bandWidthMethod = DensityStat.DEF_BW,
+ adjust = DensityStat.DEF_ADJUST,
+ kernel = DensityStat.DEF_KERNEL,
+ n = DensityStat.DEF_N,
+ fullScanMax = DensityStat.DEF_FULL_SCAN_MAX
+ )
+ }
+
+ private fun checkStatVar(statDf: DataFrame, variable: DataFrame.Variable) {
+ assertTrue(statDf.has(variable), "Has var " + variable.name)
+ }
+
+ private fun checkStatVarAndValuesDomain(statDf: DataFrame, variable: DataFrame.Variable, expectedValuesDomain: Set) {
+ checkStatVar(statDf, variable)
+ assertEquals(statDf.getNumeric(variable).toSet(), expectedValuesDomain, "Unique values of var " + variable.name)
+ }
+
+ private fun checkStatVarAndValuesRange(statDf: DataFrame, variable: DataFrame.Variable, expectedValuesRange: ClosedRange) {
+ checkStatVar(statDf, variable)
+ val actualMinValue = statDf.getNumeric(variable).minByOrNull { it!! }!!
+ assertEquals(expectedValuesRange.lowerEnd, actualMinValue, "Min value of var " + variable.name)
+ val actualMaxValue = statDf.getNumeric(variable).maxByOrNull { it!! }!!
+ assertEquals(expectedValuesRange.upperEnd, actualMaxValue, "Max value of var " + variable.name)
+ }
+
+ private fun checkStatVarAndMaxValue(statDf: DataFrame, variable: DataFrame.Variable, expectedMaxValue: Double) {
+ checkStatVar(statDf, variable)
+ val actualMaxValue = statDf.getNumeric(variable).maxByOrNull { it!! }!!
+ assertEquals(expectedMaxValue, actualMaxValue, "Max value of var " + variable.name)
+ }
+
+ private fun checkStatVarAndMaxLimit(statDf: DataFrame, variable: DataFrame.Variable, expectedMaxLimit: Double) {
+ checkStatVar(statDf, variable)
+ val actualMaxValue = statDf.getNumeric(variable).maxByOrNull { it!! }!!
+ assertTrue(expectedMaxLimit - actualMaxValue > 0, "Max value of var " + variable.name + " limited")
+ }
+
+ @Test
+ fun emptyDataFrame() {
+ val df = dataFrame(emptyMap())
+ val stat = yDensityStat()
+ val statDf = stat.normalize(stat.apply(df, statContext(df)))
+
+ checkStatVarAndValuesDomain(statDf, Stats.X, emptySet())
+ checkStatVarAndValuesDomain(statDf, Stats.Y, emptySet())
+ checkStatVarAndValuesDomain(statDf, Stats.VIOLIN_WIDTH, emptySet())
+ }
+
+ @Test
+ fun oneElementDataFrame() {
+ val yValue = 3.14
+ val df = dataFrame(mapOf(
+ TransformVar.Y to listOf(yValue)
+ ))
+ val stat = yDensityStat()
+ val statDf = stat.normalize(stat.apply(df, statContext(df)))
+
+ checkStatVarAndValuesDomain(statDf, Stats.X, setOf(0.0))
+ checkStatVarAndMaxValue(statDf, Stats.VIOLIN_WIDTH, 1.0)
+ }
+
+ @Test
+ fun twoElementsInDataFrame() {
+ val y = listOf(2.71, 3.14)
+ val df = dataFrame(mapOf(
+ TransformVar.Y to y
+ ))
+ val stat = yDensityStat()
+ val statDf = stat.normalize(stat.apply(df, statContext(df)))
+
+ checkStatVarAndValuesDomain(statDf, Stats.X, setOf(0.0))
+ checkStatVarAndValuesRange(statDf, Stats.Y, ClosedRange(2.71, 3.14))
+ checkStatVarAndMaxValue(statDf, Stats.VIOLIN_WIDTH, 1.0)
+ }
+
+ @Test
+ fun withNanValues() {
+ val x = listOf(null, 4.0, 3.0, 3.0, 1.0, 1.0, 2.0, 2.0)
+ val y = listOf(3.0, null, 2.0, 3.0, 0.0, 1.0, 1.0, 2.0)
+ val df = dataFrame(mapOf(
+ TransformVar.X to x,
+ TransformVar.Y to y
+ ))
+ val stat = yDensityStat()
+ val statDf = stat.normalize(stat.apply(df, statContext(df)))
+
+ checkStatVarAndValuesDomain(statDf, Stats.X, setOf(1.0, 2.0, 3.0))
+ checkStatVarAndValuesRange(statDf, Stats.Y, ClosedRange(0.0, 3.0))
+ checkStatVarAndMaxValue(statDf, Stats.VIOLIN_WIDTH, 1.0)
+ }
+
+ @Test
+ fun changeScales() {
+ val x = listOf(0.0, 0.0, 0.0, 0.0, 1.0, 1.0)
+ val y = listOf(0.0, 1.0, 2.0, 3.0, 0.0, 1.0)
+ val df = dataFrame(mapOf(
+ TransformVar.X to x,
+ TransformVar.Y to y
+ ))
+
+ for (scale in YDensityStat.Scale.values()) {
+ val stat = yDensityStat(scale = scale)
+ val statDf = stat.normalize(stat.apply(df, statContext(df)))
+ val statDf0 = filteredDataFrame(statDf, Stats.X) { it == 0.0 }
+ val statDf1 = filteredDataFrame(statDf, Stats.X) { it == 1.0 }
+
+ checkStatVarAndValuesDomain(statDf, Stats.X, setOf(0.0, 1.0))
+ checkStatVarAndValuesRange(statDf0, Stats.Y, ClosedRange(0.0, 3.0))
+ checkStatVarAndValuesRange(statDf1, Stats.Y, ClosedRange(0.0, 1.0))
+ when (scale) {
+ YDensityStat.Scale.AREA -> {
+ checkStatVarAndMaxLimit(statDf0, Stats.VIOLIN_WIDTH, 0.5)
+ checkStatVarAndMaxValue(statDf1, Stats.VIOLIN_WIDTH, 1.0)
+ }
+ YDensityStat.Scale.COUNT -> {
+ checkStatVarAndMaxLimit(statDf0, Stats.VIOLIN_WIDTH, 0.5)
+ checkStatVarAndMaxValue(statDf1, Stats.VIOLIN_WIDTH, 0.5)
+ }
+ YDensityStat.Scale.WIDTH -> {
+ checkStatVarAndMaxValue(statDf0, Stats.VIOLIN_WIDTH, 1.0)
+ checkStatVarAndMaxValue(statDf1, Stats.VIOLIN_WIDTH, 1.0)
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Violin.kt b/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Violin.kt
index c160546d4ac..ba868a2d33b 100644
--- a/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Violin.kt
+++ b/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Violin.kt
@@ -14,14 +14,6 @@ class Violin {
basic(),
withNan(),
withGroups(),
-
-// TODO: Move this to tests
-// data132Violin(),
-// data132ViolinDiscrete(),
-// data132ViolinDefaultN(),
-// data132ViolinIdentity(),
-// data132Boxplot(),
-// data123Violin(),
)
}
@@ -101,162 +93,4 @@ class Violin {
return HashMap(parsePlotSpec(spec))
}
-
- private fun data132Violin(): MutableMap {
- val spec = "{" +
- " 'kind': 'plot'," +
- " 'data' : {'x': [1, 3, 2]," +
- " 'y': [2, 0, 1]" +
- " }," +
- " 'mapping': {" +
- " 'x': 'x'," +
- " 'y': 'y'" +
- " }," +
- " 'ggtitle': {" +
- " 'text': 'x=[1, 3, 2]'" +
- " }," +
- " 'layers': [" +
- " {" +
- " 'geom': 'violin'," +
- " 'n': 3" +
- " }" +
- " ]" +
- "}"
-
- return HashMap(parsePlotSpec(spec))
-
- }
-
- private fun data132ViolinDiscrete(): MutableMap {
- val spec = "{" +
- " 'kind': 'plot'," +
- " 'data' : {'x': [1, 3, 2]," +
- " 'y': [2, 0, 1]" +
- " }," +
- " 'mapping': {" +
- " 'x': 'x'," +
- " 'y': 'y'" +
- " }," +
- " 'ggtitle': {" +
- " 'text': 'x=[1, 3, 2] and discrete'" +
- " }," +
- " 'layers': [" +
- " {" +
- " 'geom': 'violin'," +
- " 'n': 3" +
- " }" +
- " ]," +
- " 'scales': [" +
- " {" +
- " 'aesthetic': 'x'," +
- " 'discrete': true" +
- " }" +
- " ]" +
- "}"
-
- return HashMap(parsePlotSpec(spec))
-
- }
-
- private fun data132ViolinDefaultN(): MutableMap {
- val spec = "{" +
- " 'kind': 'plot'," +
- " 'data' : {'x': [1, 3, 2]," +
- " 'y': [2, 0, 1]" +
- " }," +
- " 'mapping': {" +
- " 'x': 'x'," +
- " 'y': 'y'" +
- " }," +
- " 'ggtitle': {" +
- " 'text': 'x=[1, 3, 2], default n'" +
- " }," +
- " 'layers': [" +
- " {" +
- " 'geom': 'violin'" +
- " }" +
- " ]" +
- "}"
-
- return HashMap(parsePlotSpec(spec))
-
- }
-
- private fun data132ViolinIdentity(): MutableMap {
- val spec = "{" +
- " 'kind': 'plot'," +
- " 'data' : {'x': [1, 1, 1, 3, 3, 3, 2, 2, 2]," +
- " 'y': [4, 3, 2, 5, 4, 3, 3, 2, 1]," +
- " 'vw': [0, 1, 0, 0, 1, 0, 0, 1, 0]" +
- " }," +
- " 'mapping': {" +
- " 'x': 'x'," +
- " 'y': 'y'" +
- " }," +
- " 'ggtitle': {" +
- " 'text': 'x=[1, 3, 2], stat=identity'" +
- " }," +
- " 'layers': [" +
- " {" +
- " 'geom': 'violin'," +
- " 'mapping': {" +
- " 'violinwidth': 'vw'" +
- " }," +
- " 'stat': 'identity'" +
- " }" +
- " ]" +
- "}"
-
- return HashMap(parsePlotSpec(spec))
-
- }
-
- private fun data132Boxplot(): MutableMap {
- val spec = "{" +
- " 'kind': 'plot'," +
- " 'data' : {'x': [1, 3, 2]," +
- " 'y': [2, 0, 1]" +
- " }," +
- " 'mapping': {" +
- " 'x': 'x'," +
- " 'y': 'y'" +
- " }," +
- " 'ggtitle': {" +
- " 'text': 'x=[1, 3, 2], geom=boxplot'" +
- " }," +
- " 'layers': [" +
- " {" +
- " 'geom': 'boxplot'" +
- " }" +
- " ]" +
- "}"
-
- return HashMap(parsePlotSpec(spec))
-
- }
-
- private fun data123Violin(): MutableMap {
- val spec = "{" +
- " 'kind': 'plot'," +
- " 'data' : {'x': [1, 2, 3]," +
- " 'y': [2, 1, 0]" +
- " }," +
- " 'mapping': {" +
- " 'x': 'x'," +
- " 'y': 'y'" +
- " }," +
- " 'ggtitle': {" +
- " 'text': 'x=[1, 2, 3]'" +
- " }," +
- " 'layers': [" +
- " {" +
- " 'geom': 'violin'," +
- " 'n': 3" +
- " }" +
- " ]" +
- "}"
-
- return HashMap(parsePlotSpec(spec))
-
- }
}
\ No newline at end of file
diff --git a/python-package/lets_plot/plot/geom.py b/python-package/lets_plot/plot/geom.py
index 53f2c25b1b5..8102783719c 100644
--- a/python-package/lets_plot/plot/geom.py
+++ b/python-package/lets_plot/plot/geom.py
@@ -2788,6 +2788,136 @@ def geom_boxplot(mapping=None, *, data=None, stat=None, position=None, show_lege
def geom_violin(mapping=None, *, data=None, stat=None, position=None, show_legend=None, sampling=None, tooltips=None,
**other_args):
+ """
+ A violin plot is a mirrored density plot with an additional grouping as for a boxplot.
+
+ Parameters
+ ----------
+ mapping : `FeatureSpec`
+ Set of aesthetic mappings created by `aes()` function.
+ Aesthetic mappings describe the way that variables in the data are
+ mapped to plot "aesthetics".
+ data : dict or `DataFrame`
+ The data to be displayed in this layer. If None, the default, the data
+ is inherited from the plot data as specified in the call to ggplot.
+ stat : str, default='ydensity'
+ The statistical transformation to use on the data for this layer, as a string.
+ position : str or `FeatureSpec`
+ Position adjustment, either as a string ('identity', 'stack', 'dodge', ...),
+ or the result of a call to a position adjustment function.
+ show_legend : bool, default=True
+ False - do not show legend for this layer.
+ sampling : `FeatureSpec`
+ Result of the call to the `sampling_xxx()` function.
+ Value None (or 'none') will disable sampling for this layer.
+ tooltips : `layer_tooltips`
+ Result of the call to the `layer_tooltips()` function.
+ Specifies appearance, style and content.
+ draw_quantiles : list of float
+ Draw horizontal lines at the given quantiles of the density estimate.
+ scale : {'area', 'count', 'width'}, default='area'
+ If 'area', all violins have the same area.
+ If 'count', areas are scaled proportionally to the number of observations.
+ If 'width', all violins have the same maximum width.
+ other_args
+ Other arguments passed on to the layer.
+ These are often aesthetics settings used to set an aesthetic to a fixed value,
+ like color='red', fill='blue', size=3 or shape=21.
+ They may also be parameters to the paired geom/stat.
+
+ Returns
+ -------
+ `LayerSpec`
+ Geom object specification.
+
+ Notes
+ -----
+ Computed variables:
+
+ - ..violinwidth.. : density scaled for the violin plot, according to area, counts or to a constant maximum width (mapped by default).
+ - ..density.. : density estimate.
+ - ..count.. : density * number of points.
+ - ..scaled.. : density estimate, scaled to maximum of 1.
+
+ `geom_violin()` understands the following aesthetics mappings:
+
+ - x : x-axis coordinates.
+ - y : y-axis coordinates.
+ - alpha : transparency level of a layer. Understands numbers between 0 and 1.
+ - color (colour) : color of a geometry lines. Can be continuous or discrete. For continuous value this will be a color gradient between two colors.
+ - fill : color of geometry filling.
+ - size : lines width.
+ - linetype : type of the line of border. Codes and names: 0 = 'blank', 1 = 'solid', 2 = 'dashed', 3 = 'dotted', 4 = 'dotdash', 5 = 'longdash', 6 = 'twodash'.
+ - weight : used by 'ydensity' stat to compute weighted density.
+
+ Examples
+ --------
+ .. jupyter-execute::
+ :linenos:
+ :emphasize-lines: 9
+
+ import numpy as np
+ from lets_plot import *
+ LetsPlot.setup_html()
+ n = 100
+ np.random.seed(42)
+ x = np.random.choice(['a', 'b', 'c'], size=n)
+ y = np.random.normal(size=n)
+ ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + \\
+ geom_violin()
+
+ |
+
+ .. jupyter-execute::
+ :linenos:
+ :emphasize-lines: 9
+
+ import numpy as np
+ from lets_plot import *
+ LetsPlot.setup_html()
+ n = 100
+ np.random.seed(42)
+ x = np.random.choice(['a', 'b', 'b', 'c'], size=n)
+ y = np.random.normal(size=n)
+ ggplot({'x': x, 'y': y}, aes('x', 'y')) + \\
+ geom_violin(scale='count', draw_quantiles=[.25, .5, .75])
+
+ |
+
+ .. jupyter-execute::
+ :linenos:
+ :emphasize-lines: 10
+
+ import numpy as np
+ from lets_plot import *
+ LetsPlot.setup_html()
+ n = 3
+ np.random.seed(42)
+ x = ['a'] * n + ['b'] * n + ['c'] * n
+ y = 3 * list(range(n))
+ vw = np.random.uniform(size=3*n)
+ ggplot({'x': x, 'y': y, 'vw': vw}, aes('x', 'y')) + \\
+ geom_violin(aes(violinwidth='vw', fill='x'), stat='identity')
+
+ |
+ .. jupyter-execute::
+ :linenos:
+ :emphasize-lines: 10-11
+
+ import numpy as np
+ import pandas as pd
+ from lets_plot import *
+ LetsPlot.setup_html()
+ n, m = 100, 5
+ np.random.seed(42)
+ df = pd.DataFrame({'x%s' % i: np.random.normal(size=n) \\
+ for i in range(1, m + 1)})
+ ggplot(df.melt(), aes('variable', 'value')) + \\
+ geom_violin(aes(color='variable', fill='variable'), \\
+ size=2, alpha=.5, scale='width') + \\
+ geom_boxplot(aes(fill='variable'), width=.2)
+
+ """
return _geom('violin',
mapping=mapping,
data=data,