Skip to content

Commit

Permalink
Deploying to gh-pages from @ 5a92374 🚀
Browse files Browse the repository at this point in the history
  • Loading branch information
narugo1992 committed Jul 21, 2024
1 parent 8986bd5 commit e65ec13
Show file tree
Hide file tree
Showing 937 changed files with 737 additions and 704 deletions.
2 changes: 1 addition & 1 deletion dev/chunkedcommit/.buildinfo
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
config: 4f3c2a59e92ab017ca6f9028ff4c1f93
config: 9cebdb16c6bbddcc6ab01a1bfac1b275
tags: 645f666f9bcd5a90fca523b33c5a78b7
Binary file modified dev/chunkedcommit/.doctrees/api_doc/archive/index.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/cache/delete.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/cache/index.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/config/index.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/config/meta.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/entry/base.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/entry/cli.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/entry/dispatch.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/entry/download.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/entry/index.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/entry/ls.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/entry/ls_repo.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/entry/upload.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/entry/whoami.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/index/fetch.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/index/index.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/index/make.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/index/validate.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/operate/base.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/operate/download.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/operate/index.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/operate/upload.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/operate/validate.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/repository/base.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/repository/clone.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/repository/index.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/repository/rollback.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/utils/binary.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/utils/download.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/utils/index.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/utils/path.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/utils/tqdm_.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/api_doc/utils/walk.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/environment.pickle
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/index.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/information/environment.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/information/environment.result.doctree
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@
"execution_count": 1,
"metadata": {
"execution": {
"iopub.execute_input": "2024-07-21T06:41:20.650363Z",
"iopub.status.busy": "2024-07-21T06:41:20.650167Z",
"iopub.status.idle": "2024-07-21T06:41:21.811771Z",
"shell.execute_reply": "2024-07-21T06:41:21.811134Z"
"iopub.execute_input": "2024-07-21T07:01:17.464096Z",
"iopub.status.busy": "2024-07-21T07:01:17.463535Z",
"iopub.status.idle": "2024-07-21T07:01:18.629735Z",
"shell.execute_reply": "2024-07-21T07:01:18.628955Z"
},
"pycharm": {
"name": "#%%\n"
Expand All @@ -53,7 +53,7 @@
"text": [
"CPU Brand: AMD EPYC 7763 64-Core Processor\n",
"CPU Count: 4\n",
"CPU Freq: 3086.9095 MHz\n",
"CPU Freq: 2938.7805 MHz\n",
"Memory Size: 15.606 GiB\n",
"Has CUDA: No\n"
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@
"execution_count": 1,
"metadata": {
"execution": {
"iopub.execute_input": "2024-07-21T06:12:48.438134Z",
"iopub.status.busy": "2024-07-21T06:12:48.437738Z",
"iopub.status.idle": "2024-07-21T06:12:49.608988Z",
"shell.execute_reply": "2024-07-21T06:12:49.608237Z"
"iopub.execute_input": "2024-07-21T06:31:39.044488Z",
"iopub.status.busy": "2024-07-21T06:31:39.043994Z",
"iopub.status.idle": "2024-07-21T06:31:40.217926Z",
"shell.execute_reply": "2024-07-21T06:31:40.217154Z"
},
"pycharm": {
"name": "#%%\n"
Expand All @@ -53,7 +53,7 @@
"text": [
"CPU Brand: AMD EPYC 7763 64-Core Processor\n",
"CPU Count: 4\n",
"CPU Freq: 2936.629 MHz\n",
"CPU Freq: 2921.47025 MHz\n",
"Memory Size: 15.606 GiB\n",
"Has CUDA: No\n"
]
Expand Down
Binary file modified dev/chunkedcommit/.doctrees/tutorials/installation/index.doctree
Binary file not shown.
Binary file modified dev/chunkedcommit/.doctrees/tutorials/quick_start/index.doctree
Binary file not shown.
40 changes: 33 additions & 7 deletions dev/chunkedcommit/_modules/hfutils/operate/upload.html
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,15 @@ <h1>Source code for hfutils.operate.upload</h1><div class="highlight"><pre>
<span class="kn">import</span> <span class="nn">math</span>
<span class="kn">import</span> <span class="nn">os.path</span>
<span class="kn">import</span> <span class="nn">re</span>
<span class="kn">import</span> <span class="nn">time</span>
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Optional</span><span class="p">,</span> <span class="n">List</span>

<span class="kn">from</span> <span class="nn">hbutils.string</span> <span class="kn">import</span> <span class="n">plural_word</span>
<span class="kn">from</span> <span class="nn">huggingface_hub</span> <span class="kn">import</span> <span class="n">CommitOperationAdd</span><span class="p">,</span> <span class="n">CommitOperationDelete</span>

<span class="kn">from</span> <span class="nn">.base</span> <span class="kn">import</span> <span class="n">RepoTypeTyping</span><span class="p">,</span> <span class="n">get_hf_client</span><span class="p">,</span> <span class="n">list_files_in_repository</span><span class="p">,</span> <span class="n">_IGNORE_PATTERN_UNSET</span>
<span class="kn">from</span> <span class="nn">..archive</span> <span class="kn">import</span> <span class="n">get_archive_type</span><span class="p">,</span> <span class="n">archive_pack</span>
<span class="kn">from</span> <span class="nn">..utils</span> <span class="kn">import</span> <span class="n">walk_files</span><span class="p">,</span> <span class="n">TemporaryDirectory</span>
<span class="kn">from</span> <span class="nn">..utils</span> <span class="kn">import</span> <span class="n">walk_files</span><span class="p">,</span> <span class="n">TemporaryDirectory</span><span class="p">,</span> <span class="n">tqdm</span>


<div class="viewcode-block" id="upload_file_to_file"><a class="viewcode-back" href="../../../api_doc/operate/upload.html#hfutils.operate.upload.upload_file_to_file">[docs]</a><span class="k">def</span> <span class="nf">upload_file_to_file</span><span class="p">(</span><span class="n">local_file</span><span class="p">,</span> <span class="n">repo_id</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">file_in_repo</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
Expand Down Expand Up @@ -176,11 +178,14 @@ <h1>Source code for hfutils.operate.upload</h1><div class="highlight"><pre>
<span class="n">_PATH_SEP</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">&#39;[/</span><span class="se">\\</span><span class="s1">]+&#39;</span><span class="p">)</span>


<div class="viewcode-block" id="upload_directory_as_directory"><a class="viewcode-back" href="../../../api_doc/operate/upload.html#hfutils.operate.upload.upload_directory_as_directory">[docs]</a><span class="k">def</span> <span class="nf">upload_directory_as_directory</span><span class="p">(</span><span class="n">local_directory</span><span class="p">,</span> <span class="n">repo_id</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">path_in_repo</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="n">repo_type</span><span class="p">:</span> <span class="n">RepoTypeTyping</span> <span class="o">=</span> <span class="s1">&#39;dataset&#39;</span><span class="p">,</span> <span class="n">revision</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">&#39;main&#39;</span><span class="p">,</span>
<span class="n">message</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">time_suffix</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">clear</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">ignore_patterns</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">_IGNORE_PATTERN_UNSET</span><span class="p">,</span>
<span class="n">hf_token</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">operation_chunk_size</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
<div class="viewcode-block" id="upload_directory_as_directory"><a class="viewcode-back" href="../../../api_doc/operate/upload.html#hfutils.operate.upload.upload_directory_as_directory">[docs]</a><span class="k">def</span> <span class="nf">upload_directory_as_directory</span><span class="p">(</span>
<span class="n">local_directory</span><span class="p">,</span> <span class="n">repo_id</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">path_in_repo</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="n">repo_type</span><span class="p">:</span> <span class="n">RepoTypeTyping</span> <span class="o">=</span> <span class="s1">&#39;dataset&#39;</span><span class="p">,</span> <span class="n">revision</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">&#39;main&#39;</span><span class="p">,</span>
<span class="n">message</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">time_suffix</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">clear</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">ignore_patterns</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">_IGNORE_PATTERN_UNSET</span><span class="p">,</span>
<span class="n">hf_token</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">operation_chunk_size</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">upload_timespan</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">5.0</span><span class="p">,</span>
<span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Upload a local directory and its files to a specified path in a Hugging Face repository.</span>

Expand All @@ -207,6 +212,8 @@ <h1>Source code for hfutils.operate.upload</h1><div class="highlight"><pre>
<span class="sd"> :param operation_chunk_size: Chunk size of the operations. All the operations will be</span>
<span class="sd"> seperated into multiple commits when this is set.</span>
<span class="sd"> :type operation_chunk_size: Optional[int]</span>
<span class="sd"> :param upload_timespan: Upload minimal time interval when chunked uploading enabled.</span>
<span class="sd"> :type upload_timespan: float</span>

<span class="sd"> .. note::</span>
<span class="sd"> When `operation_chunk_size` is set, multiple commits will be created. When some commits failed,</span>
Expand All @@ -215,6 +222,11 @@ <h1>Source code for hfutils.operate.upload</h1><div class="highlight"><pre>
<span class="sd"> .. warning::</span>
<span class="sd"> When `operation_chunk_size` is set, multiple commits will be created. But HuggingFace&#39;s repository</span>
<span class="sd"> api cannot guarantee the atomic feature of your data. So **this function is not thread-safe**.</span>

<span class="sd"> .. note::</span>
<span class="sd"> The rate limit of HuggingFace repository commit creation is approximately 120 commits / hour.</span>
<span class="sd"> So if you really have large number of chunks to create, please set the `upload_timespan` to a value</span>
<span class="sd"> no less than `30.0` to make sure your uploading will not be rate-limited.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">hf_client</span> <span class="o">=</span> <span class="n">get_hf_client</span><span class="p">(</span><span class="n">hf_token</span><span class="p">)</span>
<span class="k">if</span> <span class="n">clear</span><span class="p">:</span>
Expand Down Expand Up @@ -252,16 +264,30 @@ <h1>Source code for hfutils.operate.upload</h1><div class="highlight"><pre>
<span class="n">revision</span><span class="o">=</span><span class="n">revision</span>
<span class="p">)[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">commit_id</span>

<span class="n">last_upload_at</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">try</span><span class="p">:</span>
<span class="k">for</span> <span class="n">chunk_id</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">math</span><span class="o">.</span><span class="n">ceil</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">operations</span><span class="p">)</span> <span class="o">/</span> <span class="n">operation_chunk_size</span><span class="p">))):</span>
<span class="k">for</span> <span class="n">chunk_id</span> <span class="ow">in</span> <span class="n">tqdm</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">math</span><span class="o">.</span><span class="n">ceil</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">operations</span><span class="p">)</span> <span class="o">/</span> <span class="n">operation_chunk_size</span><span class="p">))),</span>
<span class="n">desc</span><span class="o">=</span><span class="s1">&#39;Chunked Commits&#39;</span><span class="p">):</span>
<span class="n">operation_chunk</span> <span class="o">=</span> <span class="n">operations</span><span class="p">[</span><span class="n">chunk_id</span> <span class="o">*</span> <span class="n">operation_chunk_size</span><span class="p">:(</span><span class="n">chunk_id</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">operation_chunk_size</span><span class="p">]</span>

<span class="c1"># sleep for the given time</span>
<span class="k">if</span> <span class="n">last_upload_at</span><span class="p">:</span>
<span class="n">sleep_time</span> <span class="o">=</span> <span class="n">last_upload_at</span> <span class="o">+</span> <span class="n">upload_timespan</span> <span class="o">-</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span>
<span class="k">if</span> <span class="n">sleep_time</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Sleep for </span><span class="si">{</span><span class="n">sleep_time</span><span class="si">:</span><span class="s1">.1f</span><span class="si">}</span><span class="s1">s due to the timespan limitation ...&#39;</span><span class="p">)</span>
<span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="n">sleep_time</span><span class="p">)</span>

<span class="n">last_upload_at</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Uploading chunk #</span><span class="si">{</span><span class="n">chunk_id</span><span class="si">}</span><span class="s1">, with </span><span class="si">{</span><span class="n">plural_word</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">operation_chunk</span><span class="p">),</span><span class="w"> </span><span class="s2">&quot;operation&quot;</span><span class="p">)</span><span class="si">}</span><span class="s1"> ...&#39;</span><span class="p">)</span>
<span class="n">hf_client</span><span class="o">.</span><span class="n">create_commit</span><span class="p">(</span>
<span class="n">repo_id</span><span class="o">=</span><span class="n">repo_id</span><span class="p">,</span>
<span class="n">repo_type</span><span class="o">=</span><span class="n">repo_type</span><span class="p">,</span>
<span class="n">revision</span><span class="o">=</span><span class="n">revision</span><span class="p">,</span>
<span class="n">operations</span><span class="o">=</span><span class="n">operation_chunk</span><span class="p">,</span>
<span class="n">commit_message</span><span class="o">=</span><span class="sa">f</span><span class="s1">&#39;[Chunk #</span><span class="si">{</span><span class="n">chunk_id</span><span class="si">}</span><span class="s1">] </span><span class="si">{</span><span class="n">commit_message</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">,</span>
<span class="p">)</span>


<span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">..repository</span> <span class="kn">import</span> <span class="n">hf_hub_rollback</span>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@
"execution_count": 1,
"metadata": {
"execution": {
"iopub.execute_input": "2024-07-21T06:12:48.438134Z",
"iopub.status.busy": "2024-07-21T06:12:48.437738Z",
"iopub.status.idle": "2024-07-21T06:12:49.608988Z",
"shell.execute_reply": "2024-07-21T06:12:49.608237Z"
"iopub.execute_input": "2024-07-21T06:31:39.044488Z",
"iopub.status.busy": "2024-07-21T06:31:39.043994Z",
"iopub.status.idle": "2024-07-21T06:31:40.217926Z",
"shell.execute_reply": "2024-07-21T06:31:40.217154Z"
},
"pycharm": {
"name": "#%%\n"
Expand All @@ -53,7 +53,7 @@
"text": [
"CPU Brand: AMD EPYC 7763 64-Core Processor\n",
"CPU Count: 4\n",
"CPU Freq: 2936.629 MHz\n",
"CPU Freq: 2921.47025 MHz\n",
"Memory Size: 15.606 GiB\n",
"Has CUDA: No\n"
]
Expand Down
Loading

0 comments on commit e65ec13

Please sign in to comment.