Skip to content

Commit

Permalink
Deploying to gh-pages from @ 012b692 🚀
Browse files Browse the repository at this point in the history
  • Loading branch information
github-merge-queue[bot] committed May 8, 2024
1 parent c238319 commit 9c3a475
Show file tree
Hide file tree
Showing 30 changed files with 3,469 additions and 2,119 deletions.
121 changes: 121 additions & 0 deletions _modules/arkouda/dataframe.html
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,127 @@ <h1>Source code for arkouda.dataframe</h1><div class="highlight"><pre>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_return_agg_dataframe</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">gb</span><span class="o">.</span><span class="n">size</span><span class="p">(),</span> <span class="s2">&quot;size&quot;</span><span class="p">,</span> <span class="n">sort_index</span><span class="o">=</span><span class="n">sort_index</span><span class="p">)</span></div>


<div class="viewcode-block" id="DataFrameGroupBy.sample">
<a class="viewcode-back" href="../../autoapi/arkouda/index.html#arkouda.dataframe.DataFrameGroupBy.sample">[docs]</a>
<span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">frac</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">replace</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">weights</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return a random sample from each group. You can either specify the number of elements</span>
<span class="sd"> or the fraction of elements to be sampled. random_state can be used for reproducibility</span>

<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> n: int, optional</span>
<span class="sd"> Number of items to return for each group.</span>
<span class="sd"> Cannot be used with frac and must be no larger than</span>
<span class="sd"> the smallest group unless replace is True.</span>
<span class="sd"> Default is one if frac is None.</span>

<span class="sd"> frac: float, optional</span>
<span class="sd"> Fraction of items to return. Cannot be used with n.</span>

<span class="sd"> replace: bool, default False</span>
<span class="sd"> Allow or disallow sampling of the same row more than once.</span>

<span class="sd"> weights: pdarray, optional</span>
<span class="sd"> Default None results in equal probability weighting.</span>
<span class="sd"> If passed a pdarray, then values must have the same length as the underlying DataFrame</span>
<span class="sd"> and will be used as sampling probabilities after normalization within each group.</span>
<span class="sd"> Weights must be non-negative with at least one positive element within each group.</span>

<span class="sd"> random_state: int or ak.random.Generator, optional</span>
<span class="sd"> If int, seed for random number generator.</span>
<span class="sd"> If ak.random.Generator, use as given.</span>

<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> DataFrame</span>
<span class="sd"> A new DataFrame containing items randomly sampled from each group</span>
<span class="sd"> sorted according to the grouped columns.</span>

<span class="sd"> Examples</span>
<span class="sd"> --------</span>

<span class="sd"> &gt;&gt;&gt; import arkouda as ak</span>
<span class="sd"> &gt;&gt;&gt; ak.connect()</span>
<span class="sd"> &gt;&gt;&gt; df = ak.DataFrame({&quot;A&quot;:[3,1,2,1,2,3],&quot;B&quot;:[3,4,5,6,7,8]})</span>
<span class="sd"> &gt;&gt;&gt; display(df)</span>
<span class="sd"> +----+-----+-----+</span>
<span class="sd"> | | A | B |</span>
<span class="sd"> +====+=====+=====+</span>
<span class="sd"> | 0 | 3 | 3 |</span>
<span class="sd"> +----+-----+-----+</span>
<span class="sd"> | 1 | 1 | 4 |</span>
<span class="sd"> +----+-----+-----+</span>
<span class="sd"> | 2 | 2 | 5 |</span>
<span class="sd"> +----+-----+-----+</span>
<span class="sd"> | 3 | 1 | 6 |</span>
<span class="sd"> +----+-----+-----+</span>
<span class="sd"> | 4 | 2 | 7 |</span>
<span class="sd"> +----+-----+-----+</span>
<span class="sd"> | 5 | 3 | 8 |</span>
<span class="sd"> +----+-----+-----+</span>

<span class="sd"> &gt;&gt;&gt; df.groupby(&quot;A&quot;).sample(random_state=6)</span>

<span class="sd"> +----+-----+-----+</span>
<span class="sd"> | | A | B |</span>
<span class="sd"> +====+=====+=====+</span>
<span class="sd"> | 3 | 1 | 6 |</span>
<span class="sd"> +----+-----+-----+</span>
<span class="sd"> | 4 | 2 | 7 |</span>
<span class="sd"> +----+-----+-----+</span>
<span class="sd"> | 5 | 3 | 8 |</span>
<span class="sd"> +----+-----+-----+</span>

<span class="sd"> &gt;&gt;&gt; df.groupby(&quot;A&quot;).sample(frac=0.5, random_state=3, weights=ak.array([1,1,1,0,0,0]))</span>

<span class="sd"> +----+-----+-----+</span>
<span class="sd"> | | A | B |</span>
<span class="sd"> +====+=====+=====+</span>
<span class="sd"> | 1 | 1 | 4 |</span>
<span class="sd"> +----+-----+-----+</span>
<span class="sd"> | 2 | 2 | 5 |</span>
<span class="sd"> +----+-----+-----+</span>
<span class="sd"> | 0 | 3 | 3 |</span>
<span class="sd"> +----+-----+-----+</span>

<span class="sd"> &gt;&gt;&gt; df.groupby(&quot;A&quot;).sample(n=3, replace=True, random_state=ak.random.default_rng(7))</span>
<span class="sd"> +----+-----+-----+</span>
<span class="sd"> | | A | B |</span>
<span class="sd"> +====+=====+=====+</span>
<span class="sd"> | 1 | 1 | 4 |</span>
<span class="sd"> +----+-----+-----+</span>
<span class="sd"> | 3 | 1 | 6 |</span>
<span class="sd"> +----+-----+-----+</span>
<span class="sd"> | 1 | 1 | 4 |</span>
<span class="sd"> +----+-----+-----+</span>
<span class="sd"> | 4 | 2 | 7 |</span>
<span class="sd"> +----+-----+-----+</span>
<span class="sd"> | 4 | 2 | 7 |</span>
<span class="sd"> +----+-----+-----+</span>
<span class="sd"> | 4 | 2 | 7 |</span>
<span class="sd"> +----+-----+-----+</span>
<span class="sd"> | 0 | 3 | 3 |</span>
<span class="sd"> +----+-----+-----+</span>
<span class="sd"> | 5 | 3 | 8 |</span>
<span class="sd"> +----+-----+-----+</span>
<span class="sd"> | 5 | 3 | 8 |</span>
<span class="sd"> +----+-----+-----+</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="p">[</span>
<span class="bp">self</span><span class="o">.</span><span class="n">gb</span><span class="o">.</span><span class="n">sample</span><span class="p">(</span>
<span class="n">values</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">values</span><span class="p">,</span>
<span class="n">n</span><span class="o">=</span><span class="n">n</span><span class="p">,</span>
<span class="n">frac</span><span class="o">=</span><span class="n">frac</span><span class="p">,</span>
<span class="n">replace</span><span class="o">=</span><span class="n">replace</span><span class="p">,</span>
<span class="n">weights</span><span class="o">=</span><span class="n">weights</span><span class="p">,</span>
<span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span><span class="p">,</span>
<span class="n">return_indices</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="n">permute_samples</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="p">)</span>
<span class="p">]</span></div>


<span class="k">def</span> <span class="nf">_return_agg_series</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">values</span><span class="p">,</span> <span class="n">sort_index</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">as_index</span> <span class="ow">is</span> <span class="kc">True</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">gb_key_names</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
Expand Down
Loading

0 comments on commit 9c3a475

Please sign in to comment.