From 3083a4c8f5da70e2c6055122df479f398039682a Mon Sep 17 00:00:00 2001 From: len-sla <68053910+len-sla@users.noreply.github.com> Date: Thu, 25 Mar 2021 11:31:45 +0100 Subject: [PATCH] Update finetuning-English-GPT2-language-Polish-HuggingFace-fastaiv2.ipynb --- ...ning-English-GPT2-language-Polish-HuggingFace-fastaiv2.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/finetuning-English-GPT2-language-Polish-HuggingFace-fastaiv2.ipynb b/finetuning-English-GPT2-language-Polish-HuggingFace-fastaiv2.ipynb index 81d004a..dc6f77b 100644 --- a/finetuning-English-GPT2-language-Polish-HuggingFace-fastaiv2.ipynb +++ b/finetuning-English-GPT2-language-Polish-HuggingFace-fastaiv2.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.7"},"colab":{"name":"finetuning-English-GPT2-any-language-Polish-HuggingFace-fastaiv2.ipynb","provenance":[{"file_id":"1n7sol-CvBSblO33ScpgQQIfy5v_VJVQb","timestamp":1597923568454},{"file_id":"1qrhSZ4nBKgv2sz_-OjzEsiuWYHUyOeq9","timestamp":1597489060569},{"file_id":"1d_tsC-i3804eHIBJsoy6QNZ6Jm4JtgJ-","timestamp":1597391589678}],"collapsed_sections":["D18pRY2C9NUf","eAs4xPsR9NUj","Z6oUdKay9NUv","kQrp3wcB9NVL","DAvDfxhq9NVQ","6NUQ_V299NVR","7bKWxnx19NVT","i2frlbhv9NVU","j6WB2Qyy9NVX","CmKm8BETBpB2","P-2JYQnD9N6F","NBx8vCFkBtDu","Zad8TrPNybqT","GeSgUiiz9NZq"],"toc_visible":true},"accelerator":"GPU","widgets":{"application/vnd.jupyter.widget-state+json":{"7f0767ff35a344b0b6da17083132427e":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_view_name":"HBoxView","_dom_classes":[],"_model_name":"HBoxModel","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.5.0","box_style":"","layout":"IPY_MODEL_596851de3e714ab8942a72f69b18a63c","_model_module":"@jupyter-widgets/controls","children":["IPY_MODEL_f0a9328a838041c0979b2b709b0b42a0","IPY_MODEL_156da666d81b4026941d5acb5b487128"]}},"596851de3e714ab8942a72f69b18a63c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"f0a9328a838041c0979b2b709b0b42a0":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_view_name":"ProgressView","style":"IPY_MODEL_87da6b072be3484b98e01b534d1a751a","_dom_classes":[],"description":"Downloading: 100%","_model_name":"FloatProgressModel","bar_style":"success","max":1042301,"_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":1042301,"_view_count":null,"_view_module_version":"1.5.0","orientation":"horizontal","min":0,"description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_1ae0380be03140089589c6d485e7c1d0"}},"156da666d81b4026941d5acb5b487128":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_view_name":"HTMLView","style":"IPY_MODEL_f8b5835af84c482092be9d849cb9bd02","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 1.04M/1.04M [00:14<00:00, 73.6kB/s]","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_60b9f541de2648f08b7ad6257f32499b"}},"87da6b072be3484b98e01b534d1a751a":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_view_name":"StyleView","_model_name":"ProgressStyleModel","description_width":"initial","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","bar_color":null,"_model_module":"@jupyter-widgets/controls"}},"1ae0380be03140089589c6d485e7c1d0":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"f8b5835af84c482092be9d849cb9bd02":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"60b9f541de2648f08b7ad6257f32499b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"7d5d97ff327f4d6aa5b44fc7656f07d8":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_view_name":"HBoxView","_dom_classes":[],"_model_name":"HBoxModel","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.5.0","box_style":"","layout":"IPY_MODEL_c7f7c2744375405298d3b95a6aa2b361","_model_module":"@jupyter-widgets/controls","children":["IPY_MODEL_bdfee3f0cab649989a96841b3a487635","IPY_MODEL_8e3b1e20656248c8a9d92c26028d5026"]}},"c7f7c2744375405298d3b95a6aa2b361":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"bdfee3f0cab649989a96841b3a487635":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_view_name":"ProgressView","style":"IPY_MODEL_e99dd500836c4156a91e7a75b18a683d","_dom_classes":[],"description":"Downloading: 100%","_model_name":"FloatProgressModel","bar_style":"success","max":456318,"_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":456318,"_view_count":null,"_view_module_version":"1.5.0","orientation":"horizontal","min":0,"description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_63d976528be746609f4910ca4b73bfc8"}},"8e3b1e20656248c8a9d92c26028d5026":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_view_name":"HTMLView","style":"IPY_MODEL_91552ff8f58d4563b60b8a1764704aa8","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 456k/456k [00:02<00:00, 200kB/s]","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_cb3db61b459f4082a5c2aca136564999"}},"e99dd500836c4156a91e7a75b18a683d":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_view_name":"StyleView","_model_name":"ProgressStyleModel","description_width":"initial","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","bar_color":null,"_model_module":"@jupyter-widgets/controls"}},"63d976528be746609f4910ca4b73bfc8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"91552ff8f58d4563b60b8a1764704aa8":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"cb3db61b459f4082a5c2aca136564999":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"0a3b0c8f447543899950a6d2ff342a57":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_view_name":"HBoxView","_dom_classes":[],"_model_name":"HBoxModel","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.5.0","box_style":"","layout":"IPY_MODEL_b428499e94534b6ebdc7be8f4adea734","_model_module":"@jupyter-widgets/controls","children":["IPY_MODEL_18501534dc6a496b99e7017805acd781","IPY_MODEL_75c7bd71ca2b4f3b93f53e2355e2856d"]}},"b428499e94534b6ebdc7be8f4adea734":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"18501534dc6a496b99e7017805acd781":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_view_name":"ProgressView","style":"IPY_MODEL_edc9e07a4bc14e4cb9ba9dde5a75d5aa","_dom_classes":[],"description":"Downloading: 100%","_model_name":"FloatProgressModel","bar_style":"success","max":1355256,"_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":1355256,"_view_count":null,"_view_module_version":"1.5.0","orientation":"horizontal","min":0,"description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_75e507e0573244a8a0c1fb9f7704322e"}},"75c7bd71ca2b4f3b93f53e2355e2856d":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_view_name":"HTMLView","style":"IPY_MODEL_c098c5b96cb2449688d1516eae241690","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 1.36M/1.36M [00:09<00:00, 140kB/s]","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_40b5ac5ade1b46e4b04b5eee03840cf6"}},"edc9e07a4bc14e4cb9ba9dde5a75d5aa":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_view_name":"StyleView","_model_name":"ProgressStyleModel","description_width":"initial","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","bar_color":null,"_model_module":"@jupyter-widgets/controls"}},"75e507e0573244a8a0c1fb9f7704322e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"c098c5b96cb2449688d1516eae241690":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"40b5ac5ade1b46e4b04b5eee03840cf6":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"007c3da396954f70a2906377d4792428":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_view_name":"HBoxView","_dom_classes":[],"_model_name":"HBoxModel","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.5.0","box_style":"","layout":"IPY_MODEL_ef838f3ace384c62b4cea145ae3f1def","_model_module":"@jupyter-widgets/controls","children":["IPY_MODEL_e8e2ce51cc244407b612edabe6cb9347","IPY_MODEL_3a81e636511542028f37db75c58b0df3"]}},"ef838f3ace384c62b4cea145ae3f1def":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"e8e2ce51cc244407b612edabe6cb9347":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_view_name":"ProgressView","style":"IPY_MODEL_001f30a0328645bcbae474bd0ecb784c","_dom_classes":[],"description":"Downloading: 100%","_model_name":"FloatProgressModel","bar_style":"success","max":665,"_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":665,"_view_count":null,"_view_module_version":"1.5.0","orientation":"horizontal","min":0,"description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_8af84301f2c4416fa727bfd613b7ba9c"}},"3a81e636511542028f37db75c58b0df3":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_view_name":"HTMLView","style":"IPY_MODEL_a0a0092604a5494b9e5f3947cfe4fd8e","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 665/665 [00:22<00:00, 29.5B/s]","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_de4dbaa8e418452cad8237812ce45aef"}},"001f30a0328645bcbae474bd0ecb784c":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_view_name":"StyleView","_model_name":"ProgressStyleModel","description_width":"initial","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","bar_color":null,"_model_module":"@jupyter-widgets/controls"}},"8af84301f2c4416fa727bfd613b7ba9c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"a0a0092604a5494b9e5f3947cfe4fd8e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"de4dbaa8e418452cad8237812ce45aef":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"9df88e6e1226457e9b58d2171abc5b0b":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_view_name":"HBoxView","_dom_classes":[],"_model_name":"HBoxModel","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.5.0","box_style":"","layout":"IPY_MODEL_54f9353d6516478b8b20ba8a92a14086","_model_module":"@jupyter-widgets/controls","children":["IPY_MODEL_5f357509b9c74c239bf9a50fe3e795e0","IPY_MODEL_22a330ae10f44a98a5e2210e1e0f83fe"]}},"54f9353d6516478b8b20ba8a92a14086":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"5f357509b9c74c239bf9a50fe3e795e0":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_view_name":"ProgressView","style":"IPY_MODEL_cce461c343214f4b8ee48873d9e92722","_dom_classes":[],"description":"Downloading: 100%","_model_name":"FloatProgressModel","bar_style":"success","max":548118077,"_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":548118077,"_view_count":null,"_view_module_version":"1.5.0","orientation":"horizontal","min":0,"description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_de5bdfa89ab84e27928938d29eb3beef"}},"22a330ae10f44a98a5e2210e1e0f83fe":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_view_name":"HTMLView","style":"IPY_MODEL_05763c86c54a4c2ebff74747363eb1f8","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 548M/548M [00:21<00:00, 25.3MB/s]","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_ed5f9c349cc7448db86872a7a0bcaeab"}},"cce461c343214f4b8ee48873d9e92722":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_view_name":"StyleView","_model_name":"ProgressStyleModel","description_width":"initial","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","bar_color":null,"_model_module":"@jupyter-widgets/controls"}},"de5bdfa89ab84e27928938d29eb3beef":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"05763c86c54a4c2ebff74747363eb1f8":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"ed5f9c349cc7448db86872a7a0bcaeab":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}}}}},"cells":[{"cell_type":"markdown","metadata":{"id":"D18pRY2C9NUf"},"source":["# Faster than training from scratch \n","# Fine-tuning the English GPT-2 in any language with Hugging Face and fastai v2 \n","\n","> Tutorial on how to use fastai v2 over Hugging Face's Transformers and Tokenizers libraries to fine-tune an English pre-trained transformer-based language model (GPT-2) to any language other than English"]},{"cell_type":"markdown","metadata":{"id":"C-Cx5Xrk9NUh"},"source":["Notebook is based on work of Pierre Guillou (https://www.linkedin.com/in/pierreguillou)\n","\n","Other resources used:\n","---\n","\n","\n","- Post in medium: [Faster than training from scratch - Fine-tuning the English GPT-2 in any language with Hugging Face and fastai v2 (practical case with Portuguese)](https://medium.com/@pierre_guillou/faster-than-training-from-scratch-fine-tuning-the-english-gpt-2-in-any-language-with-hugging-f2ec05c98787)\n","- Fast notebook: [finetuning-English-GPT2-any-language-Portuguese-HuggingFace-fastaiv2_FAST.ipynb](https://github.com/piegu/fastai-projects/blob/master/finetuning-English-GPT2-any-language-Portuguese-HuggingFace-fastaiv2_FAST.ipynb)\n","- Hugging face model page of [GPorTuguese-2](https://huggingface.co/pierreguillou/gpt2-small-portuguese): a language model for Portuguese text generation (and more NLP tasks...)\n","- Other posts in medium of the GPT-2 series: \n"," - [NLP & fastai | GPT-2](https://medium.com/@pierre_guillou/nlp-fastai-gpt-2-16ee145a4a28)\n"," - [Byte-level BPE, an universal tokenizer but...](https://medium.com/@pierre_guillou/byte-level-bpe-an-universal-tokenizer-but-aff932332ffe)"]},{"cell_type":"markdown","metadata":{"id":"eAs4xPsR9NUj"},"source":["## Overview"]},{"cell_type":"markdown","metadata":{"id":"TOQ1ZSE99NUl"},"source":["In this tutorial, instead of training from scratch, we will see how to fine-tune in just over a day, on one GPU and with a little more than 1GB of training data an English pre-trained [transformer](https://arxiv.org/abs/1706.03762)-based language model to any another language. \n","\n","As a practical case, we fine-tune to Portuguese the [English pre-trained GPT-2](https://github.com/openai/gpt-2) by wrapping the [Transformers](https://github.com/huggingface/transformers) and [Tokenizers](https://github.com/huggingface/tokenizers) libraries of Hugging Face into [fastai v2](https://github.com/fastai/fastai2). We thus create a new language model: [GPorTuguese-2](https://huggingface.co/pierreguillou/gpt2-small-portuguese), a language model for Portuguese text generation (and more NLP tasks...)."]},{"cell_type":"markdown","metadata":{"id":"MXu6MQF-9NUn"},"source":["![The 3 main steps of fine-tuning the English GPT-2 to Portuguese with Hugging Face and fastai v2 (image edited from fast.ai NLP)](images/GPT2_tf_ft_approach.png \"The 3 main steps of fine-tuning the English GPT-2 to Portuguese with Hugging Face and fastai v2 (image edited from fast.ai NLP)\")"]},{"cell_type":"markdown","metadata":{"heading_collapsed":true,"id":"kQrp3wcB9NVL"},"source":["## About the choice of GPT-2"]},{"cell_type":"markdown","metadata":{"hidden":true,"id":"heoybdvR9NVM"},"source":["In order to demonstrate the feasibility of fine-tuning Hugging Face models via fastai v2, we had to choose an emblematic model of the [Transformer revolution](https://arxiv.org/abs/1706.03762) in the NLP since 2017.\n","\n","Thus, between the GPT-2 and [BERT](https://github.com/google-research/bert) models, we chose the GPT-2 model because it has strongly influenced minds beyond the circle of Deep Learning specialists in early 2019 by [writing texts of a quality level close to that of humans](https://openai.com/blog/better-language-models/#samples). Today \"exceeded\" in number of parameters and performance by more recent models like BART, T5 and of course GPT-3 (175 billion parameters!), it remains a reference and a model used in research and applications.\n","For those you want to understand better how GPT-2 works, read the following posts:\n","- [The Illustrated GPT-2 (Visualizing Transformer Language Models)](http://jalammar.github.io/illustrated-gpt2/)\n","- [NLP & fastai | GPT-2](https://medium.com/@pierre_guillou/nlp-fastai-gpt-2-16ee145a4a28)\n","\n","**About the version of GPT-2**\n","\n","There are 3 versions of the GPT-2 model (look at the [transformers documentation](https://huggingface.co/transformers/pretrained_models.html) for more details). Here, **we use the small version**, the one with the smallest number of weights (124 millions, not 117 as written in the original paper) but you can change the model used by changing the content of `pretrained_weights` (if it's not a GPT2 model, you'll need to change the classes used for the model and the tokenizer of course)."]},{"cell_type":"markdown","metadata":{"hidden":true,"id":"guzLJ_st9NVO"},"source":["**More about GPT-2**"]},{"cell_type":"markdown","metadata":{"hidden":true,"id":"vuex9WUD9NVP"},"source":["Source: https://huggingface.co/transformers/model_doc/gpt2.html"]},{"cell_type":"markdown","metadata":{"hidden":true,"id":"FxAZPPFo9NVQ"},"source":["> OpenAI GPT-2 model was proposed in [Language Models are Unsupervised Multitask Learners](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf) by Alec Radford*, Jeffrey Wu*, Rewon Child, David Luan, Dario Amodei** and Ilya Sutskever**. It’s a causal (unidirectional) transformer pre-trained using language modeling on a very large corpus of ~40 GB of text data.\n","\n","> The abstract from the paper is the following: *GPT-2 is a large transformer-based language model with 1.5 billion parameters, trained on a dataset[1] of 8 million web pages. GPT-2 is trained with a simple objective: predict the next word, given all of the previous words within some text. The diversity of the dataset causes this simple goal to contain naturally occurring demonstrations of many tasks across diverse domains. GPT-2 is a direct scale-up of GPT, with more than 10X the parameters and trained on more than 10X the amount of data.*\n","\n","> Tips:\n","> - GPT-2 is a model with absolute position embeddings so it’s usually advised to pad the inputs on the right rather than the left.\n","> - GPT-2 was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next token in a sequence. Leveraging this feature allows GPT-2 to generate syntactically coherent text as it can be observed in the run_generation.py example script.\n","> - The PyTorch models can take the past as input, which is the previously computed key/value attention pairs. Using this past value prevents the model from re-computing pre-computed values in the context of text generation. See [reusing the past in generative models](https://huggingface.co/transformers/quickstart.html#using-the-past) for more information on the usage of this argument.\n","\n","> [Write With Transformer](https://transformer.huggingface.co/doc/gpt2-large) is a webapp created and hosted by Hugging Face showcasing the generative capabilities of several models. GPT-2 is one of them and is available in five different sizes: small, medium, large, xl and a distilled version of the small checkpoint: distilgpt-2.\n","\n",">The original code can be found [here](https://openai.com/blog/better-language-models/)."]},{"cell_type":"markdown","metadata":{"heading_collapsed":true,"id":"DAvDfxhq9NVQ"},"source":["## References"]},{"cell_type":"markdown","metadata":{"heading_collapsed":true,"hidden":true,"id":"6NUQ_V299NVR"},"source":["### GPT-2"]},{"cell_type":"markdown","metadata":{"hidden":true,"id":"n1mGmvkE9NVS"},"source":["- Understanding\n"," - [Better Language Models and Their Implications](https://openai.com/blog/better-language-models/) (OpenAI, 02/14/2019)\n"," - [The Illustrated GPT-2 (Visualizing Transformer Language Models)](http://jalammar.github.io/illustrated-gpt2/)\n"," - [The Annotated GPT-2](https://amaarora.github.io/2020/02/18/annotatedGPT2.html)\n"," - [Understanding the GPT-2 Source Code](https://medium.com/analytics-vidhya/understanding-the-gpt-2-source-code-part-1-4481328ee10b)\n"," - [How To Make Custom AI-Generated Text With GPT-2](https://minimaxir.com/2019/09/howto-gpt2/)\n","- Online Apps\n"," - [Write With Transformer (distilgpt2-small, gpt2small, gpt2medium, gpt2large)](https://transformer.huggingface.co/doc/gpt2-large)\n"," - [Write With DistilGPT-2](https://transformer.huggingface.co/model/distil-gpt2)\n"," - [Generate custom text from an AI using GPT-2 (using the 117M default model)](https://minimaxir.com/apps/gpt2-small/)\n"," - [Allen GPT2 Large Demo](https://demo.allennlp.org/next-token-lm?text=AllenNLP%20is)\n","- Others papers: [The Annotated Transformer](https://nlp.seas.harvard.edu/2018/04/03/attention.html), [Layer Normalization](https://arxiv.org/abs/1607.06450)"]},{"cell_type":"markdown","metadata":{"heading_collapsed":true,"hidden":true,"id":"7bKWxnx19NVT"},"source":["### Datasets in Portuguese"]},{"cell_type":"markdown","metadata":{"hidden":true,"id":"BcPILDYB9NVU"},"source":["- Wikipedia\n"," - (fastai): code from [Vietnamese ULMFiT from scratch](https://github.com/fastai/course-nlp/blob/master/nn-vietnamese.ipynb)\n"," - (Hugging Face): [code from nlp](https://huggingface.co/nlp/viewer/?dataset=wikipedia&config=20200501.pt)\n","- [OSCAR corpus](https://traces1.inria.fr/oscar/): code from [Find a Dataset](https://colab.research.google.com/github/huggingface/blog/blob/master/notebooks/01_how_to_train.ipynb#scrollTo=oK7PPVm2XBgr)"]},{"cell_type":"markdown","metadata":{"heading_collapsed":true,"hidden":true,"id":"i2frlbhv9NVU"},"source":["### Hugging Face"]},{"cell_type":"markdown","metadata":{"hidden":true,"id":"9h14XyS59NVW"},"source":["- Dataset\n"," - [nlp](https://github.com/huggingface/nlp)\n"," - [Colab tutorial](https://colab.research.google.com/github/huggingface/nlp/blob/master/notebooks/Overview.ipynb)\n"," - [Online dataset explorer](https://huggingface.co/nlp/viewer)\n","- Tokenizers\n"," - [Tokenizers](https://github.com/huggingface/tokenizers) (github)\n"," - Source code\n"," - [Source code for transformers.tokenization_gpt2](https://huggingface.co/transformers/_modules/transformers/tokenization_gpt2.html)\n"," - [Source code for transformers.tokenization_utils_base](https://huggingface.co/transformers/_modules/transformers/tokenization_utils_base.html)\n"," - [Source code for transformers.tokenization_utils](https://huggingface.co/transformers/_modules/transformers/tokenization_utils.html)\n"," - [Source code for transformers.tokenization_utils_fast](https://huggingface.co/transformers/_modules/transformers/tokenization_utils_fast.html)\n"," - [classmethod from_pretrained()](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.PreTrainedTokenizer.from_pretrained): Instantiate a PreTrainedTokenizer (or a derived class) from a predefined tokenizer.\n"," - [Source code for transformers.tokenization_gpt2](https://huggingface.co/transformers/_modules/transformers/tokenization_gpt2.html)\n"," - [Hugging Face Tutorials - Training Tokenizer](https://www.kaggle.com/funtowiczmo/hugging-face-tutorials-training-tokenizer)\n"," - [Hugging Face Introduces Tokenizers](https://medium.com/dair-ai/hugging-face-introduces-tokenizers-d792482db360)\n"," - How to train a new language model from scratch using Transformers and Tokenizers (05/15/2020): [blog post](https://huggingface.co/blog/how-to-train) & [colab notebook](https://colab.research.google.com/github/huggingface/blog/blob/master/notebooks/01_how_to_train.ipynb)\n"," - [HuggingFace Tokenizers Cheat Sheet](https://www.kaggle.com/debanga/huggingface-tokenizers-cheat-sheet)\n"," - [Tokenizers: How machines read](https://blog.floydhub.com/tokenization-nlp/) (01/28/2020)\n"," - [Byte Pair Encoding](https://leimao.github.io/blog/Byte-Pair-Encoding/) (07/19/2019)\n"," - [What is a tokenizer?](https://docs.rs/tokenizers/0.10.1/tokenizers/#what-is-a-tokenizer)\n","- Transformers\n"," - [Transformers](https://huggingface.co/transformers/) de Hugging Face & [Transformers github](https://github.com/huggingface/transformers)\n"," - [Glossary](https://huggingface.co/transformers/glossary.html)\n"," - [OpenAI GPT2](https://huggingface.co/transformers/model_doc/gpt2.html#openai-gpt2)\n"," - Source code\n"," - [Source code for transformers.modeling_gpt2](https://huggingface.co/transformers/_modules/transformers/modeling_gpt2.html)\n"," - [Source code for transformers.configuration_gpt2](https://huggingface.co/transformers/_modules/transformers/configuration_gpt2.html)\n"," - [DistilBERT](https://medium.com/huggingface/distilbert-8cf3380435b5), [DistilGPT2](https://huggingface.co/distilgpt2) & [Download Model: distilgpt2](https://huggingface.co/distilgpt2)\n"," - [Train a GPT-2 Text-Generating Model w/ GPU For Free](https://colab.research.google.com/drive/1VLG8e7YSEwypxU-noRNhsv5dW4NfTGce#scrollTo=H7LoMj4GA4n_) (colab notebook, 11/10/2019)\n"," - How to generate text: using different decoding methods for language generation with Transformers (03/18/2020, Hugging Face): [blog post](https://huggingface.co/blog/how-to-generate) and [colab notebook](https://colab.research.google.com/github/huggingface/blog/blob/master/notebooks/02_how_to_generate.ipynb) "]},{"cell_type":"markdown","metadata":{"hidden":true,"id":"j6WB2Qyy9NVX"},"source":["### Pytorch, fastai & Transformers (Hugging Face)"]},{"cell_type":"markdown","metadata":{"hidden":true,"id":"FcYAwd0A9NVY"},"source":["- [Sequence-to-Sequence Modeling with nn.Transformer and TorchText](https://pytorch.org/tutorials/beginner/transformer_tutorial.html#sequence-to-sequence-modeling-with-nn-transformer-and-torchtext)\n","- [Fastai v2](https://dev.fast.ai) (Deep Learning library on PyTorch) & [Hugging face](https://huggingface.co/)\n","- [blurr](https://ohmeow.github.io/blurr/): a library that integrates huggingface transformers with version 2 of the fastai framework\n","- fastai v2\n"," - Integration of the GPT2 model into fastai v2: code from [Tutorial - Transformers](https://dev.fast.ai/tutorial.transformers) and [10_nlp.ipynb](https://github.com/fastai/fastbook/blob/master/10_nlp.ipynb) (how to fine-tune an NLP model with fastai v2)\n"," - FastHugs\n"," - [FastHugs in the fastai forum](https://forums.fast.ai/t/fasthugs-fastai-v2-and-huggingface-transformers/63681)\n"," - [FastHugs: Language Modelling with Tranformers and Fastai](https://www.ntentional.com/nlp/transformers/training%20technique/classification/2020/04/24/fasthugs_language_model.html) (04/24/2020, fastai v2)\n"," - [FastHugs: Sequence Classification with Transformers and Fastai](https://www.ntentional.com/nlp/training%20technique/classification/2020/04/17/fasthugs_seq_classification.html) (04/17/2020, fastai v2)\n","- fastai v1\n"," - [A Tutorial to Fine-Tuning BERT with Fast AI](http://mlexplained.com/2019/05/13/a-tutorial-to-fine-tuning-bert-with-fast-ai/) (05/15/2019, fastai v1)\n"," - [Fastai integration with BERT: Multi-label text classification identifying toxicity in texts](https://medium.com/@abhikjha/fastai-integration-with-bert-a0a66b1cecbe) (07/17/2019, fastai v1)\n"," - [When Pytorch-transformers meets Fastai (w/ Google Colab)](https://towardsdatascience.com/best-of-two-worlds-pytorch-transformers-meets-fastai-5fd51ef34b0f) (08/26/2019, fastai v1)\n"," - [Using RoBERTa with Fastai for NLP](https://medium.com/analytics-vidhya/using-roberta-with-fastai-for-nlp-7ed3fed21f6c) (09/02/2019, fastai v1)\n"," - [RoBERTa with Fastai](https://www.kaggle.com/abhikjha/roberta-with-fastai) (11/14/2019, fastai v1)\n"," - [Fastai with 🤗Transformers (BERT, RoBERTa, XLNet, XLM, DistilBERT)](https://towardsdatascience.com/fastai-with-transformers-bert-roberta-xlnet-xlm-distilbert-4f41ee18ecb2) (11/27/2019, fastai v1): A tutorial to implement state-of-the-art NLP models with Fastai for Sentiment Analysis ([notebook](https://www.kaggle.com/maroberti/fastai-with-transformers-bert-roberta))\n"," - [RoBERTa (fastai, HuggingFace 🤗Transformers)](https://www.kaggle.com/melissarajaram/roberta-fastai-huggingface-transformers/execution) (01/17/2020, fastai v1)"]},{"cell_type":"markdown","metadata":{"id":"6ITOliM_9NVa"},"source":["## Main coding steps to fine-tune a Hugging Face language model with fastai v2"]},{"cell_type":"markdown","metadata":{"id":"dKtXMsgj9NVa"},"source":["The 6 main steps detailed below can be summarized in 3 main ones:\n","\n","1. **Initialization & download** (download of Portuguese Wikipedia and GPT-2 English pre-trained model and tokenizer)\n","2. **GPT-2 tokenizer with a Portuguese vocab** (train a GPT-2 tokenizer with a vocab in Portuguese, wrap it into a fastai v2 tokenizer and update the embeddings matrix of the GPT-2 English pre-trained model according to the new Portuguese vocab: keep the embeddings vectors of the common tokens between English and Portuguese vocabs)\n","3. **Fine-tune on Portuguese Wikipedia the GPT-2 model with fastai v2 training functionalities**"]},{"cell_type":"code","metadata":{"id":"_XZCyJqS9pJt"},"source":["# extra small thing to setup drives paths etc written "],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"VAK6Bn0Aw4_u"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"VI_AUR8K9ncO","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616061296952,"user_tz":-60,"elapsed":30851,"user":{"displayName":"Mark Lina","photoUrl":"","userId":"17651129667533642938"}},"outputId":"c1d01012-4596-4c52-ef71-bf2e32ea8d76"},"source":["#start by mounting google drive\n","from google.colab import drive, files\n","drive.mount('/content/gdrive', force_remount=True)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Mounted at /content/gdrive\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"yzT-SC9hmTKG","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615837934244,"user_tz":-60,"elapsed":167861,"user":{"displayName":"Mark Lina","photoUrl":"","userId":"17651129667533642938"}},"outputId":"36c1fb8b-6b9e-4fb1-be39-8b68fdfbfa23"},"source":["# need to instal fastai 2 etc before \n","!pip install -q git+https://github.com/fastai/fastai\n","!pip install -q git+https://github.com/fastai/fastcore\n","!pip install -q iterative-stratification"],"execution_count":null,"outputs":[{"output_type":"stream","text":["\u001b[K |████████████████████████████████| 61kB 3.0MB/s \n","\u001b[K |████████████████████████████████| 12.8MB 326kB/s \n","\u001b[K |████████████████████████████████| 776.8MB 21kB/s \n","\u001b[?25h Building wheel for fastai (setup.py) ... \u001b[?25l\u001b[?25hdone\n","\u001b[31mERROR: torchtext 0.9.0 has requirement torch==1.8.0, but you'll have torch 1.7.1 which is incompatible.\u001b[0m\n"," Building wheel for fastcore (setup.py) ... \u001b[?25l\u001b[?25hdone\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"g-Zna9nuQE8C","executionInfo":{"status":"ok","timestamp":1616061331938,"user_tz":-60,"elapsed":582,"user":{"displayName":"Mark Lina","photoUrl":"","userId":"17651129667533642938"}},"outputId":"ec9b1082-42d2-4773-ece4-ad60c5f567f7"},"source":["cd /content/gdrive/MyDrive/fastai"],"execution_count":null,"outputs":[{"output_type":"stream","text":["/content/gdrive/MyDrive/fastai\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"a4VVvLIhQBjv"},"source":["from nlputilsfastai import * # augumented py file ---> from fastai.basics import * # was fastai2"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":309},"id":"6prc1OyVI221","executionInfo":{"status":"ok","timestamp":1615837583603,"user_tz":-60,"elapsed":2993,"user":{"displayName":"Mark Lina","photoUrl":"","userId":"17651129667533642938"}},"outputId":"d9fc6e69-482f-496f-e52c-f49b41a3694b"},"source":["# !pip install fastcore==1.3.8"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Collecting fastcore==1.3.8\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/26/53/d79c0f942f8bb44903108462541130b53fc7b4d744b1b5df9127b0b524d6/fastcore-1.3.8-py3-none-any.whl (48kB)\n","\r\u001b[K |██████▉ | 10kB 19.8MB/s eta 0:00:01\r\u001b[K |█████████████▋ | 20kB 25.6MB/s eta 0:00:01\r\u001b[K |████████████████████▍ | 30kB 23.5MB/s eta 0:00:01\r\u001b[K |███████████████████████████▏ | 40kB 26.4MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 51kB 5.8MB/s \n","\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from fastcore==1.3.8) (20.9)\n","Requirement already satisfied: pip in /usr/local/lib/python3.7/dist-packages (from fastcore==1.3.8) (19.3.1)\n","Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->fastcore==1.3.8) (2.4.7)\n","Installing collected packages: fastcore\n"," Found existing installation: fastcore 1.3.20\n"," Uninstalling fastcore-1.3.20:\n"," Successfully uninstalled fastcore-1.3.20\n","Successfully installed fastcore-1.3.8\n"],"name":"stdout"},{"output_type":"display_data","data":{"application/vnd.colab-display-data+json":{"pip_warning":{"packages":["fastcore"]}}},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"qMQ9fAujMD74"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"qBjPMyIYTHjr"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"CmKm8BETBpB2"},"source":["# 1. Installing required libraries and mounting google drive"]},{"cell_type":"code","metadata":{"id":"nqp4kpUG9tsV","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616332781966,"user_tz":-60,"elapsed":20545,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"e8c9b1b7-5c52-43dc-d8ef-a486e3e82649"},"source":["#start by mounting google drive\n","from google.colab import drive, files\n","drive.mount('/content/gdrive', force_remount=True)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Mounted at /content/gdrive\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"M2y5n_KU99lq","executionInfo":{"status":"ok","timestamp":1616332996058,"user_tz":-60,"elapsed":234624,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"29073dbc-db55-408b-ddfa-112eb14dcd1e"},"source":["# need to instal fastai 2 etc before \n","%%time\n","!pip install -q git+https://github.com/fastai/fastai\n","!pip install -q git+https://github.com/fastai/fastcore\n","!pip install -q iterative-stratification\n","!pip install --upgrade tables"],"execution_count":null,"outputs":[{"output_type":"stream","text":["\u001b[K |████████████████████████████████| 61kB 3.3MB/s \n","\u001b[K |████████████████████████████████| 12.8MB 251kB/s \n","\u001b[K |████████████████████████████████| 776.8MB 22kB/s \n","\u001b[?25h Building wheel for fastai (setup.py) ... \u001b[?25l\u001b[?25hdone\n","\u001b[31mERROR: torchtext 0.9.0 has requirement torch==1.8.0, but you'll have torch 1.7.1 which is incompatible.\u001b[0m\n"," Building wheel for fastcore (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Collecting tables\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/0f/cb/4097be890a773af95343389faa8c283b0d9ff606f144227a548461dcbdd5/tables-3.6.1-cp37-cp37m-manylinux1_x86_64.whl (4.3MB)\n","\u001b[K |████████████████████████████████| 4.3MB 5.6MB/s \n","\u001b[?25hRequirement already satisfied, skipping upgrade: numexpr>=2.6.2 in /usr/local/lib/python3.7/dist-packages (from tables) (2.7.3)\n","Requirement already satisfied, skipping upgrade: numpy>=1.9.3 in /usr/local/lib/python3.7/dist-packages (from tables) (1.19.5)\n","Installing collected packages: tables\n"," Found existing installation: tables 3.4.4\n"," Uninstalling tables-3.4.4:\n"," Successfully uninstalled tables-3.4.4\n","Successfully installed tables-3.6.1\n","CPU times: user 1.43 s, sys: 391 ms, total: 1.82 s\n","Wall time: 3min 34s\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"P-2JYQnD9N6F"},"source":["# 2. Initialization"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"A7Z0xlgv-TvJ","executionInfo":{"status":"ok","timestamp":1616332996060,"user_tz":-60,"elapsed":145493,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"c5558794-aff1-427a-de12-8f3214654f4c"},"source":["cd /content/gdrive/MyDrive/fastai"],"execution_count":null,"outputs":[{"output_type":"stream","text":["/content/gdrive/MyDrive/fastai\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"nbpresent":{"id":"151cd18f-76e3-440f-a8c7-ffa5c6b5da01"},"id":"RsJYkiK99N6G"},"source":["# from fastai2.text.all import *\n","# from nlputils_fastai2 import * \n","\n","from fastai.text.all import *\n","from nlputilsfastai import * \n","\n","%reload_ext autoreload\n","%autoreload 2\n","%matplotlib inline"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"lHjl3W7HBdej","executionInfo":{"status":"ok","timestamp":1616333001350,"user_tz":-60,"elapsed":150772,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"7d34a178-4200-4013-d710-77808b9fd9c5"},"source":["gpu = 0\n","torch.cuda.set_device(gpu)\n","print(f'cuda device: {torch.cuda.current_device()}')\n","print(f'cuda device name: {torch.cuda.get_device_name(gpu)}')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["cuda device: 0\n","cuda device name: Tesla K80\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"gK0-GZGhC4nF","executionInfo":{"status":"ok","timestamp":1616333001351,"user_tz":-60,"elapsed":150770,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"cef0df9a-bdc3-4902-d73f-3154389b6e60"},"source":["!nvidia-smi"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Sun Mar 21 13:23:20 2021 \n","+-----------------------------------------------------------------------------+\n","| NVIDIA-SMI 460.56 Driver Version: 460.32.03 CUDA Version: 11.2 |\n","|-------------------------------+----------------------+----------------------+\n","| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n","| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n","| | | MIG M. |\n","|===============================+======================+======================|\n","| 0 Tesla K80 Off | 00000000:00:04.0 Off | 0 |\n","| N/A 44C P8 31W / 149W | 3MiB / 11441MiB | 0% Default |\n","| | | N/A |\n","+-------------------------------+----------------------+----------------------+\n"," \n","+-----------------------------------------------------------------------------+\n","| Processes: |\n","| GPU GI CI PID Type Process name GPU Memory |\n","| ID ID Usage |\n","|=============================================================================|\n","| No running processes found |\n","+-----------------------------------------------------------------------------+\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"pZIeiQm9Cya8"},"source":["Load standard snipet to prevent random disconnects\n","This cell runs JS code to automatic reconnect to runtime."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"T-N9y4S6C1I5","executionInfo":{"status":"ok","timestamp":1616333001352,"user_tz":-60,"elapsed":150763,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"e8c3979a-173f-456f-fa89-94e36b4b0810"},"source":["import IPython\n","from google.colab import output\n","\n","display(IPython.display.Javascript('''\n"," function ClickConnect(){\n"," btn = document.querySelector(\"colab-connect-button\")\n"," if (btn != null){\n"," console.log(\"Click colab-connect-button\"); \n"," btn.click() \n"," }\n"," \n"," btn = document.getElementById('ok')\n"," if (btn != null){\n"," console.log(\"Click reconnect\"); \n"," btn.click() \n"," }\n"," }\n"," \n","setInterval(ClickConnect,60000)\n","'''))\n","\n","print(\"Done.\")"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"application/javascript":["\n"," function ClickConnect(){\n"," btn = document.querySelector(\"colab-connect-button\")\n"," if (btn != null){\n"," console.log(\"Click colab-connect-button\"); \n"," btn.click() \n"," }\n"," \n"," btn = document.getElementById('ok')\n"," if (btn != null){\n"," console.log(\"Click reconnect\"); \n"," btn.click() \n"," }\n"," }\n"," \n","setInterval(ClickConnect,60000)\n"],"text/plain":[""]},"metadata":{"tags":[]}},{"output_type":"stream","text":["Done.\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"o2qnh-a79N6H","executionInfo":{"status":"ok","timestamp":1616333001353,"user_tz":-60,"elapsed":150759,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"3933bc50-99a5-4778-c065-79d6c19e8926"},"source":["# Get config of fastai2 paths\n","config = Config()\n","config.d"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["{'archive_path': '/root/.fastai/archive',\n"," 'data_path': '/root/.fastai/data',\n"," 'model_path': '/root/.fastai/models',\n"," 'storage_path': '/tmp',\n"," 'version': 2}"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"markdown","metadata":{"nbpresent":{"id":"cf070ab7-babb-4cf0-a315-401f65461dc8"},"id":"5pL4tfG49N6I"},"source":["This will create a `{lang}wiki` folder, containing a `{lang}wiki` text file with the wikipedia contents (for other languages, replace `{lang}` with the appropriate code from the [list of wikipedias](https://meta.wikimedia.org/wiki/List_of_Wikipedias))."]},{"cell_type":"code","metadata":{"id":"2ShRUXWj_NoG"},"source":["# setup new path_data and create the corresponding folder\n","lang = 'pl'\n","name = f'{lang}wiki'\n","data_path = config['data_path']\n","path_data = data_path/name\n","path_data.mkdir(exist_ok=True, parents=True)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"NykPqucXA0hF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616333001355,"user_tz":-60,"elapsed":150755,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"1b2d0fb5-61f5-401b-f2c3-783c56c9c517"},"source":["cd /content/gdrive/MyDrive/fastai"],"execution_count":null,"outputs":[{"output_type":"stream","text":["/content/gdrive/MyDrive/fastai\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"9YmkrjvBDPPr","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616333001356,"user_tz":-60,"elapsed":150752,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"02fdd4b6-5ce1-41a8-d688-25c5960eb098"},"source":["data_path, path_data"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(Path('/root/.fastai/data'), Path('/root/.fastai/data/plwiki'))"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"egNzIEWtylqk"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"NBx8vCFkBtDu"},"source":["# 3. Loading previously prepared scraped wiki file ~1G for particular language\n","for that purpose another notebook was used [wiki download](https://github.com/len-sla/other/blob/main/wiki_download.ipynb)"]},{"cell_type":"code","metadata":{"id":"pyZnd8Srze_Z"},"source":["!cp /content/gdrive/MyDrive/fastai/all_texts_plwiki.csv /root/.fastai/data/plwiki\n","!cp /content/gdrive/MyDrive/fastai/all_texts_plwiki.txt /root/.fastai/data/plwiki"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"N-2ccH-IAuUC","executionInfo":{"status":"ok","timestamp":1616333093899,"user_tz":-60,"elapsed":230141,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"9d93dfcb-8fc4-4351-bbaf-3a7a9329e9cf"},"source":["!du -hs {'/content/gdrive/MyDrive/fastai/all_texts_plwiki.csv'}"],"execution_count":null,"outputs":[{"output_type":"stream","text":["1.1G\t/content/gdrive/MyDrive/fastai/all_texts_plwiki.csv\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"fdlfLPzP_3Z0","executionInfo":{"status":"ok","timestamp":1616333132791,"user_tz":-60,"elapsed":269026,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"44366af8-2e01-45d1-e611-c5248f99f27f"},"source":["df = pd.read_csv('/content/gdrive/MyDrive/fastai/all_texts_plwiki.csv')\n","df.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
text
0Henry Wager Halleck (ur. 16 stycznia 1815, zm. 9 stycznia 1872) – amerykański wojskowy, naukowiec i prawnik, oficer United States Army.\\n\\n, znany pod – obraźliwym później – przydomkiem „Old Brains”, brał czynny udział w dziele przyłączenia Kalifornii jako stanu. Z powodzeniem praktykował jako prawnik i deweloper. Na początku wojny secesyjnej, był naczelnym dowódcą Armii Unii na zachodnim teatrze działań, a jednocześnie – przez prawie dwa lata – głównodowodzącym wszystkich armii USA. „Awansował” na szefa sztabu armii, gdy generał-porucznik Ulysses Grant, były podkomendny Hallecka na zachod...
1Kościół Najświętszej Marii Panny (\"in summo\") w Poznaniu – zabytkowy gotycki kościół na Ostrowie Tumskim wraz z resztkami wczesnopiastowskiego palatium.\\n\\nW dzisiejszym kształcie powstał w połowie XV wieku, jednak jego historia rozpoczyna się około 965 roku, gdy po przybyciu Dobrawy wzniesiono na Ostrowie Tumskim kaplicę zamkową. W dokumentach kościół Najświętszej Marii Panny pod swoim dzisiejszym wezwaniem pojawia się po raz pierwszy w 1247. \\n\\nWedług najnowszych badań prawdopodobnie pod prezbiterium znajdują się fundamenty rotundy pełniącej funkcję kaplicy, pewnym jest natomiast istnie...
2Gieorgij Andriejewicz Mołczanow (ros. Георгий Андреевич Молчанов, ur. 3 kwietnia 1897 w Charkowie, zm. 9 października 1937 w miejscu egzekucji Kommunarka) – funkcjonariusz radzieckiej policji politycznej, komisarz bezpieczeństwa państwowego II rangi, ludowy komisarz spraw wewnętrznych Białoruskiej SRR (1936-1937).\\n\\nUrodzony w rodzinie rosyjskiej. Do 1917 uczył się w szkole handlowej w Charkowie, od listopada 1917 do czerwca 1918 był żołnierzem i członkiem sztabu Głównodowodzącego Wojsk Południa Rosji Antonowa-Owsiejenki, później pracował w sztabie Frontu Wschodniego. \\n\\nOd grudnia 1917 ...
3José Manuel Durão Barroso (wym. []; ur. 23 marca 1956 w Lizbonie) – portugalski polityk, prawnik i nauczyciel akademicki. W latach 1992–1995 minister spraw zagranicznych w rządzie Aníbal Cavaco Silvy, od 1999 do 2004 przewodniczący Partii Socjaldemokratycznej. Premier Portugalii od 6 kwietnia 2002 do 17 lipca 2004. Od 22 listopada 2004 do 31 października 2014 przewodniczący Komisji Europejskiej.\\n\\nUkończył prawo na Uniwersytecie Lizbońskim, a także studia europejskie na Uniwersytecie Genewskim, na którym uzyskał również magisterium w zakresie nauk politycznych. Pracował jako nauczyciel ak...
4Laodika I (gr. \"Λαοδίκη\", \"Laodíkē\") (zm. po 242 p.n.e.) – córka Achajosa Starszego z dynastii Seleucydów, brata Antiocha I Sotera, pierwsza żona brata stryjecznego Antiocha II Theosa, króla państwa Seleucydów, syna Antiocha I Sotera.\\n\\nW czasie II wojny syryjskiej (258-248 p.n.e.) jej mąż Antioch II Theos, jako sprzymierzeniec Macedonii walczył przeciwko Egiptowi. W wyniku tej wojny Antioch II zawarł porozumienie z królem Egiptu Ptolemeuszem II Filadelfem w r. 250 p.n.e. Miał się wyprzeć żony Laodiki I i wspólnych z nią dzieci, a poślubić jego córkę Berenikę oraz zdeklarować się uczynić ...
\n","
"],"text/plain":[" text\n","0 Henry Wager Halleck (ur. 16 stycznia 1815, zm. 9 stycznia 1872) – amerykański wojskowy, naukowiec i prawnik, oficer United States Army.\\n\\n, znany pod – obraźliwym później – przydomkiem „Old Brains”, brał czynny udział w dziele przyłączenia Kalifornii jako stanu. Z powodzeniem praktykował jako prawnik i deweloper. Na początku wojny secesyjnej, był naczelnym dowódcą Armii Unii na zachodnim teatrze działań, a jednocześnie – przez prawie dwa lata – głównodowodzącym wszystkich armii USA. „Awansował” na szefa sztabu armii, gdy generał-porucznik Ulysses Grant, były podkomendny Hallecka na zachod...\n","1 Kościół Najświętszej Marii Panny (\"in summo\") w Poznaniu – zabytkowy gotycki kościół na Ostrowie Tumskim wraz z resztkami wczesnopiastowskiego palatium.\\n\\nW dzisiejszym kształcie powstał w połowie XV wieku, jednak jego historia rozpoczyna się około 965 roku, gdy po przybyciu Dobrawy wzniesiono na Ostrowie Tumskim kaplicę zamkową. W dokumentach kościół Najświętszej Marii Panny pod swoim dzisiejszym wezwaniem pojawia się po raz pierwszy w 1247. \\n\\nWedług najnowszych badań prawdopodobnie pod prezbiterium znajdują się fundamenty rotundy pełniącej funkcję kaplicy, pewnym jest natomiast istnie...\n","2 Gieorgij Andriejewicz Mołczanow (ros. Георгий Андреевич Молчанов, ur. 3 kwietnia 1897 w Charkowie, zm. 9 października 1937 w miejscu egzekucji Kommunarka) – funkcjonariusz radzieckiej policji politycznej, komisarz bezpieczeństwa państwowego II rangi, ludowy komisarz spraw wewnętrznych Białoruskiej SRR (1936-1937).\\n\\nUrodzony w rodzinie rosyjskiej. Do 1917 uczył się w szkole handlowej w Charkowie, od listopada 1917 do czerwca 1918 był żołnierzem i członkiem sztabu Głównodowodzącego Wojsk Południa Rosji Antonowa-Owsiejenki, później pracował w sztabie Frontu Wschodniego. \\n\\nOd grudnia 1917 ...\n","3 José Manuel Durão Barroso (wym. []; ur. 23 marca 1956 w Lizbonie) – portugalski polityk, prawnik i nauczyciel akademicki. W latach 1992–1995 minister spraw zagranicznych w rządzie Aníbal Cavaco Silvy, od 1999 do 2004 przewodniczący Partii Socjaldemokratycznej. Premier Portugalii od 6 kwietnia 2002 do 17 lipca 2004. Od 22 listopada 2004 do 31 października 2014 przewodniczący Komisji Europejskiej.\\n\\nUkończył prawo na Uniwersytecie Lizbońskim, a także studia europejskie na Uniwersytecie Genewskim, na którym uzyskał również magisterium w zakresie nauk politycznych. Pracował jako nauczyciel ak...\n","4 Laodika I (gr. \"Λαοδίκη\", \"Laodíkē\") (zm. po 242 p.n.e.) – córka Achajosa Starszego z dynastii Seleucydów, brata Antiocha I Sotera, pierwsza żona brata stryjecznego Antiocha II Theosa, króla państwa Seleucydów, syna Antiocha I Sotera.\\n\\nW czasie II wojny syryjskiej (258-248 p.n.e.) jej mąż Antioch II Theos, jako sprzymierzeniec Macedonii walczył przeciwko Egiptowi. W wyniku tej wojny Antioch II zawarł porozumienie z królem Egiptu Ptolemeuszem II Filadelfem w r. 250 p.n.e. Miał się wyprzeć żony Laodiki I i wspólnych z nią dzieci, a poślubić jego córkę Berenikę oraz zdeklarować się uczynić ..."]},"metadata":{"tags":[]},"execution_count":14}]},{"cell_type":"markdown","metadata":{"id":"Zad8TrPNybqT"},"source":["# 4. Loading ready polish tokenizer( previosuly prepared)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"S90iGQ1i1Dhi","executionInfo":{"status":"ok","timestamp":1616333142614,"user_tz":-60,"elapsed":278526,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"08db3d0f-a3f6-435e-fc73-789c972f9970"},"source":["%%time\n","!pip install transformers\n","!pip freeze | grep transformers"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Collecting transformers\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/ed/d5/f4157a376b8a79489a76ce6cfe147f4f3be1e029b7144fa7b8432e8acb26/transformers-4.4.2-py3-none-any.whl (2.0MB)\n","\u001b[K |████████████████████████████████| 2.0MB 4.2MB/s \n","\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from transformers) (20.9)\n","Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.0.12)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.19.5)\n","Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from transformers) (3.7.2)\n","Collecting sacremoses\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)\n","\u001b[K |████████████████████████████████| 890kB 38.8MB/s \n","\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.41.1)\n","Collecting tokenizers<0.11,>=0.10.1\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/71/23/2ddc317b2121117bf34dd00f5b0de194158f2a44ee2bf5e47c7166878a97/tokenizers-0.10.1-cp37-cp37m-manylinux2010_x86_64.whl (3.2MB)\n","\u001b[K |████████████████████████████████| 3.2MB 39.6MB/s \n","\u001b[?25hRequirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->transformers) (2.4.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2020.12.5)\n","Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n","Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n","Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n","Requirement already satisfied: typing-extensions>=3.6.4; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from importlib-metadata; python_version < \"3.8\"->transformers) (3.7.4.3)\n","Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata; python_version < \"3.8\"->transformers) (3.4.1)\n","Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.15.0)\n","Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n","Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.0.1)\n","Building wheels for collected packages: sacremoses\n"," Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for sacremoses: filename=sacremoses-0.0.43-cp37-none-any.whl size=893262 sha256=f5a5b523d9fb1ed0c922ce2644030484b0826823cca49a932f1d58ed343f2c13\n"," Stored in directory: /root/.cache/pip/wheels/29/3c/fd/7ce5c3f0666dab31a50123635e6fb5e19ceb42ce38d4e58f45\n","Successfully built sacremoses\n","Installing collected packages: sacremoses, tokenizers, transformers\n","Successfully installed sacremoses-0.0.43 tokenizers-0.10.1 transformers-4.4.2\n","transformers==4.4.2\n","CPU times: user 39.9 ms, sys: 117 ms, total: 157 ms\n","Wall time: 8.29 s\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","referenced_widgets":["7f0767ff35a344b0b6da17083132427e","596851de3e714ab8942a72f69b18a63c","f0a9328a838041c0979b2b709b0b42a0","156da666d81b4026941d5acb5b487128","87da6b072be3484b98e01b534d1a751a","1ae0380be03140089589c6d485e7c1d0","f8b5835af84c482092be9d849cb9bd02","60b9f541de2648f08b7ad6257f32499b","7d5d97ff327f4d6aa5b44fc7656f07d8","c7f7c2744375405298d3b95a6aa2b361","bdfee3f0cab649989a96841b3a487635","8e3b1e20656248c8a9d92c26028d5026","e99dd500836c4156a91e7a75b18a683d","63d976528be746609f4910ca4b73bfc8","91552ff8f58d4563b60b8a1764704aa8","cb3db61b459f4082a5c2aca136564999","0a3b0c8f447543899950a6d2ff342a57","b428499e94534b6ebdc7be8f4adea734","18501534dc6a496b99e7017805acd781","75c7bd71ca2b4f3b93f53e2355e2856d","edc9e07a4bc14e4cb9ba9dde5a75d5aa","75e507e0573244a8a0c1fb9f7704322e","c098c5b96cb2449688d1516eae241690","40b5ac5ade1b46e4b04b5eee03840cf6"]},"id":"x8q1Ck6J9N6W","executionInfo":{"status":"ok","timestamp":1616333150282,"user_tz":-60,"elapsed":286189,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"f8ba0d38-d5f2-412f-a82e-6044395387cc"},"source":["%%time\n","from transformers import GPT2TokenizerFast\n","\n","pretrained_weights = 'gpt2'\n","tokenizer_en = GPT2TokenizerFast.from_pretrained(pretrained_weights)"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"7f0767ff35a344b0b6da17083132427e","version_minor":0,"version_major":2},"text/plain":["HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1042301.0, style=ProgressStyle(descript…"]},"metadata":{"tags":[]}},{"output_type":"stream","text":["\n"],"name":"stdout"},{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"7d5d97ff327f4d6aa5b44fc7656f07d8","version_minor":0,"version_major":2},"text/plain":["HBox(children=(FloatProgress(value=0.0, description='Downloading', max=456318.0, style=ProgressStyle(descripti…"]},"metadata":{"tags":[]}},{"output_type":"stream","text":["\n"],"name":"stdout"},{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"0a3b0c8f447543899950a6d2ff342a57","version_minor":0,"version_major":2},"text/plain":["HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1355256.0, style=ProgressStyle(descript…"]},"metadata":{"tags":[]}},{"output_type":"stream","text":["\n","CPU times: user 665 ms, sys: 123 ms, total: 788 ms\n","Wall time: 9.07 s\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"25v40IRy01Ye"},"source":["# To correct the warning about token_pad (GPT2TokenizerFast), run the following code\n","# source: https://github.com/huggingface/transformers/issues/2648#issuecomment-616177044\n","tokenizer_en.pad_token = tokenizer_en.eos_token"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"IErVmjWa01Vl"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"scrolled":true,"colab":{"base_uri":"https://localhost:8080/"},"id":"LSV4oDRI9N6W","executionInfo":{"status":"ok","timestamp":1616333150290,"user_tz":-60,"elapsed":286187,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"5978839c-4c53-486d-bbbe-b6c38938e687"},"source":["# source: https://huggingface.co/transformers/_modules/transformers/tokenization_utils_fast.html\n","\n","print('---------- vocab ----------')\n","print()\n","\n","print('vocab_files_names:',tokenizer_en.vocab_files_names)\n","print()\n","\n","for k,v in tokenizer_en.pretrained_vocab_files_map.items():\n"," print(k)\n"," for kk,vv in v.items():\n"," print('- ',kk,':',vv)\n"," print()\n"," \n","print('vocab_size:',tokenizer_en.vocab_size)\n","print()\n","#print(tokenizer_en.get_vocab())\n","\n","num = 50\n","print(f'First {num} items of the vocab: {dict(itertools.islice(tokenizer_en.get_vocab().items(), 20))}')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["---------- vocab ----------\n","\n","vocab_files_names: {'vocab_file': 'vocab.json', 'merges_file': 'merges.txt', 'tokenizer_file': 'tokenizer.json'}\n","\n","vocab_file\n","- gpt2 : https://huggingface.co/gpt2/resolve/main/vocab.json\n","- gpt2-medium : https://huggingface.co/gpt2-medium/resolve/main/vocab.json\n","- gpt2-large : https://huggingface.co/gpt2-large/resolve/main/vocab.json\n","- gpt2-xl : https://huggingface.co/gpt2-xl/resolve/main/vocab.json\n","- distilgpt2 : https://huggingface.co/distilgpt2/resolve/main/vocab.json\n","\n","merges_file\n","- gpt2 : https://huggingface.co/gpt2/resolve/main/merges.txt\n","- gpt2-medium : https://huggingface.co/gpt2-medium/resolve/main/merges.txt\n","- gpt2-large : https://huggingface.co/gpt2-large/resolve/main/merges.txt\n","- gpt2-xl : https://huggingface.co/gpt2-xl/resolve/main/merges.txt\n","- distilgpt2 : https://huggingface.co/distilgpt2/resolve/main/merges.txt\n","\n","tokenizer_file\n","- gpt2 : https://huggingface.co/gpt2/resolve/main/tokenizer.json\n","- gpt2-medium : https://huggingface.co/gpt2-medium/resolve/main/tokenizer.json\n","- gpt2-large : https://huggingface.co/gpt2-large/resolve/main/tokenizer.json\n","- gpt2-xl : https://huggingface.co/gpt2-xl/resolve/main/tokenizer.json\n","- distilgpt2 : https://huggingface.co/distilgpt2/resolve/main/tokenizer.json\n","\n","vocab_size: 50257\n","\n","First 50 items of the vocab: {'Ġtopple': 49377, 'Ġblocked': 10226, '258': 25600, 'Ġaddicted': 28357, 'Ġdrum': 13026, 'It': 1026, 'ought': 2917, 'rib': 822, 'Ġhomemade': 24584, 'Ġnoises': 26782, 'ÙĨ': 23338, 'Ġprinces': 42676, 'Ġconvenience': 15607, 'Ġearthquake': 16295, 'vim': 31124, 'ĠJefferson': 15375, 'ãģĻ': 33623, 'Ġrebellious': 43860, 'adoes': 46368, 'anton': 23026}\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"vspJiJsr01P8","executionInfo":{"status":"ok","timestamp":1616333154292,"user_tz":-60,"elapsed":290187,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"0515893a-3a1a-4e74-d5fe-681528af233e"},"source":["!pip install tokenizers\n","!pip freeze | grep tokenizers"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Requirement already satisfied: tokenizers in /usr/local/lib/python3.7/dist-packages (0.10.1)\n","tokenizers==0.10.1\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"2om1-fULxN6Y"},"source":["# creating directory for tokenizer\n","ByteLevelBPE_tokenizer_pl_rep = 'ByteLevelBPE_tokenizer_pl'\n","path_to_ByteLevelBPE_tokenizer_pl_rep = path_data/ByteLevelBPE_tokenizer_pl_rep\n","if not (path_to_ByteLevelBPE_tokenizer_pl_rep).exists():\n"," path_to_ByteLevelBPE_tokenizer_pl_rep.mkdir(exist_ok=True, parents=True)\n","# ByteLevelBPE_tokenizer_pl.save_model(str(path_to_ByteLevelBPE_tokenizer_pl_rep))"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"beVl5rWQ16to","executionInfo":{"status":"ok","timestamp":1616333154297,"user_tz":-60,"elapsed":290183,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"b42f3b97-56f9-4e01-d9e0-806c8eadda4b"},"source":["ls /root/.fastai/data/plwiki -all"],"execution_count":null,"outputs":[{"output_type":"stream","text":["total 2147980\n","drwxr-xr-x 3 root root 4096 Mar 21 13:25 \u001b[0m\u001b[01;34m.\u001b[0m/\n","drwxr-xr-x 3 root root 4096 Mar 21 13:23 \u001b[01;34m..\u001b[0m/\n","-rw------- 1 root root 1101183658 Mar 21 13:23 all_texts_plwiki.csv\n","-rw------- 1 root root 1098323868 Mar 21 13:24 all_texts_plwiki.txt\n","drwxr-xr-x 2 root root 4096 Mar 21 13:25 \u001b[01;34mByteLevelBPE_tokenizer_pl\u001b[0m/\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"OXbGsBhBxN3P"},"source":["#copying previiously created pl okenizer ( saving ~30min fro preparing that)\n","!cp /content/gdrive/MyDrive/fastai/vocab.json /root/.fastai/data/plwiki/ByteLevelBPE_tokenizer_pl\n","!cp /content/gdrive/MyDrive/fastai/merges.txt /root/.fastai/data/plwiki/ByteLevelBPE_tokenizer_pl"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"VS0TEzJy3qBy"},"source":["from tokenizers.implementations import ByteLevelBPETokenizer\n","ByteLevelBPE_tokenizer_pl = ByteLevelBPETokenizer(\n"," \"/root/.fastai/data/plwiki/ByteLevelBPE_tokenizer_pl/vocab.json\",\n"," \"/root/.fastai/data/plwiki/ByteLevelBPE_tokenizer_pl/merges.txt\",\n",")"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"aB9cX5nV4kOv"},"source":["Testing if it is working"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Vwo-Baa4xN0w","executionInfo":{"status":"ok","timestamp":1616333156963,"user_tz":-60,"elapsed":292842,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"765f231b-748a-44b5-b293-908ad041078e"},"source":["# Get vocab as a list\n","ByteLevelBPE_tokenizer_pl_vocab = ByteLevelBPE_tokenizer_pl.get_vocab() \n","ByteLevelBPE_tokenizer_pl_vocab_ls = [k for k, v in sorted(ByteLevelBPE_tokenizer_pl_vocab.items(), key=lambda item: item[1])]\n","len(ByteLevelBPE_tokenizer_pl_vocab_ls),ByteLevelBPE_tokenizer_pl_vocab_ls[:5]"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(50257, ['<|endoftext|>', '!', '\"', '#', '$'])"]},"metadata":{"tags":[]},"execution_count":24}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"1S_KAGDsxNyA","executionInfo":{"status":"ok","timestamp":1616333156965,"user_tz":-60,"elapsed":292840,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"da5042b8-6e01-423d-c718-eb69f8cf7761"},"source":["text = \"Taki mały tekst dla sprawdzenia .\"\n","output = ByteLevelBPE_tokenizer_pl.encode(text)\n","print('\\n splitting by tokens\\n ')\n","print(output.ids,)\n","print(output.tokens)\n","print(output.offsets)\n","\n","back_to_text = ByteLevelBPE_tokenizer_pl.decode(ByteLevelBPE_tokenizer_pl.encode(text).ids)\n","\n","print('\\ninput text:', text)\n","print('tokens ids:', output.ids)\n","print('back to text:', back_to_text)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["\n"," splitting by tokens\n"," \n","[5565, 335, 10120, 7591, 624, 1877, 1054, 4461]\n","['Ta', 'ki', 'ĠmaÅĤy', 'Ġtekst', 'Ġdla', 'Ġspraw', 'dzenia', 'Ġ.']\n","[(0, 2), (2, 4), (4, 9), (9, 15), (15, 19), (19, 25), (25, 31), (31, 33)]\n","\n","input text: Taki mały tekst dla sprawdzenia .\n","tokens ids: [5565, 335, 10120, 7591, 624, 1877, 1054, 4461]\n","back to text: Taki mały tekst dla sprawdzenia .\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"pnsLlsZVOf-_"},"source":[""]},{"cell_type":"markdown","metadata":{"id":"BYgcVlYB9NYd"},"source":["# 5. Create a fastai tokenizer and update the embeddings matrix of the GPT-2 English pre-trained model"]},{"cell_type":"markdown","metadata":{"id":"2Fmobv0m9NYd"},"source":["Now let's see how we can use fastai v2 to fine-tune this model on Wikipedia in Portuguese, using all the fastai v2 training utilities.\n","\n","We will follow these 2 following steps:"]},{"cell_type":"markdown","metadata":{"id":"mD9vUIko9NYd"},"source":["- 4.1) **GPT2TokenizerFast (imported GPT-2 tokenizer) --> fastai Tokenizer**: to process the data to train a model, we need to build a fastai tokenizer from the GPT-2 tokenizer with vocab in Portuguese.\n","- 4.2) **Change vocab embeddings (wte matrix) in the GPT-2 pre-trained model to adapt to the Portuguese vocab**: as the vocab embedding matrix (wte) of the pre-trained GPT-2 model corresponds to the English vocabulary, we'll keep the embeddings vectors of the common tokens between the English and Portuguese vocab."]},{"cell_type":"markdown","metadata":{"id":"hHrKpeRo9NYe"},"source":[" First, we import all the text utilities:"]},{"cell_type":"code","metadata":{"id":"63BrGAAX9NYe"},"source":["from fastai.text.all import *"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"dhfoTxlr9NYh"},"source":["#### 4.1 GPT2TokenizerFast (imported GPT-2 tokenizer) --> fastai Tokenizer"]},{"cell_type":"markdown","metadata":{"id":"wj-Y5lue9NYh"},"source":["*(text from Sylvain Gugger Transformers Tutorial)* To process this data to train a model, we need to build a `Transform` that will be applied lazily. In a fastai `Transform` you can define:\n","- an `encodes` method that is applied when you call the transform (a bit like the `forward` method in a `nn.Module`)\n","- a `decodes` method that is applied when you call the [decode](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.PreTrainedTokenizer.decode) method of the transform, if you need to decode anything for showing purposes (like converting ids to a text here)\n","- a `setups` method that sets some inner state of the `Transform` (not needed here)"]},{"cell_type":"code","metadata":{"id":"cjTS3O4W9NYi"},"source":["class TransformersTokenizer(Transform):\n"," def __init__(self, tokenizer): self.tokenizer = tokenizer\n"," def encodes(self, x): \n"," toks = self.tokenizer.tokenize(x)\n"," return tensor(self.tokenizer.convert_tokens_to_ids(toks))\n"," def decodes(self, x): return TitledStr(self.tokenizer.decode(x.cpu().numpy()))"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"BQrK6KdF9NYk"},"source":["Two comments on the code above:\n","- in `encodes` we don't use the [tokenizer.encode](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.PreTrainedTokenizer.encode) method since it does some additional preprocessing for the model after tokenizing and numericalizing (the aprt throwing a warning before). Here we don't need any post-processing so it's fine to skip it and we use the [tokenizer.tokenize](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.PreTrainedTokenizer.tokenize) method followed by the [tokenizer.convert_tokens_to_ids](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.PreTrainedTokenizer.convert_tokens_to_ids) one.\n","- in `decodes` we return a `TitledStr` object and not just a plain string. That's a fastai class that adds a `show` method to the string, which will allow us to use all the fastai show methods."]},{"cell_type":"markdown","metadata":{"id":"rg1SBjFg9NYl"},"source":["##### Tokenizers"]},{"cell_type":"markdown","metadata":{"id":"7eT6Ao209qhy"},"source":["ENGLISH"]},{"cell_type":"code","metadata":{"id":"BQkp9fdc9NYl","colab":{"base_uri":"https://localhost:8080/","height":154,"referenced_widgets":["007c3da396954f70a2906377d4792428","ef838f3ace384c62b4cea145ae3f1def","e8e2ce51cc244407b612edabe6cb9347","3a81e636511542028f37db75c58b0df3","001f30a0328645bcbae474bd0ecb784c","8af84301f2c4416fa727bfd613b7ba9c","a0a0092604a5494b9e5f3947cfe4fd8e","de4dbaa8e418452cad8237812ce45aef","9df88e6e1226457e9b58d2171abc5b0b","54f9353d6516478b8b20ba8a92a14086","5f357509b9c74c239bf9a50fe3e795e0","22a330ae10f44a98a5e2210e1e0f83fe","cce461c343214f4b8ee48873d9e92722","de5bdfa89ab84e27928938d29eb3beef","05763c86c54a4c2ebff74747363eb1f8","ed5f9c349cc7448db86872a7a0bcaeab"]},"executionInfo":{"status":"ok","timestamp":1616333190116,"user_tz":-60,"elapsed":310762,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"125aa0b8-a0b9-4199-9ce9-5f0ece033486"},"source":["%%time\n","# Load the GPT2 tokenizer in English\n","from transformers import GPT2TokenizerFast, GPT2LMHeadModel\n","pretrained_weights = 'gpt2'\n","tokenizer_en = GPT2TokenizerFast.from_pretrained(pretrained_weights)\n","model_en = GPT2LMHeadModel.from_pretrained(pretrained_weights)\n","\n","# To correct the warning about token_pad (GPT2TokenizerFast), run the following code\n","# source: https://github.com/huggingface/transformers/issues/2648#issuecomment-616177044\n","tokenizer_en.pad_token = tokenizer_en.eos_token"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"007c3da396954f70a2906377d4792428","version_minor":0,"version_major":2},"text/plain":["HBox(children=(FloatProgress(value=0.0, description='Downloading', max=665.0, style=ProgressStyle(description_…"]},"metadata":{"tags":[]}},{"output_type":"stream","text":["\n"],"name":"stdout"},{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"9df88e6e1226457e9b58d2171abc5b0b","version_minor":0,"version_major":2},"text/plain":["HBox(children=(FloatProgress(value=0.0, description='Downloading', max=548118077.0, style=ProgressStyle(descri…"]},"metadata":{"tags":[]}},{"output_type":"stream","text":["\n","CPU times: user 18.4 s, sys: 2.41 s, total: 20.8 s\n","Wall time: 32.7 s\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"Xvucwww09s0a"},"source":["POLISH"]},{"cell_type":"code","metadata":{"id":"oEDiYVJy9NYo"},"source":["# Get the path to ByteLevelBPE_tokenizer_pt config files\n","ByteLevelBPE_tokenizer_pl_rep = 'ByteLevelBPE_tokenizer_pl'\n","path_to_ByteLevelBPE_tokenizer_pl_rep = path_data/ByteLevelBPE_tokenizer_pl_rep\n","\n","# import the pre-trained GPT2TokenizerFast tokenizer with the tokenizer_pt config files\n","tokenizer_pl = GPT2TokenizerFast.from_pretrained(\n"," str(path_to_ByteLevelBPE_tokenizer_pl_rep), \n"," pad_token='<|endoftext|>')\n","\n","# Get sequence length max of 1024\n","tokenizer_pl.model_max_length = 1024"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"1lSq0oP0xNvY"},"source":["tokenizer_pl.model_max_length = 1024"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"GeSgUiiz9NZq"},"source":["##### Sample (this allows us to quickly test our code======================)"]},{"cell_type":"markdown","metadata":{"id":"ZVD6ZrCs9NZq"},"source":["- train: 80%\n","- val = 20%"]},{"cell_type":"code","metadata":{"id":"w-Lbo-bB9NZr"},"source":["df_sample = df[:1000]\n","\n","num = int(0.8*len(df_sample))\n","\n","idxs = np.random.randint(0, len(df_sample), len(df_sample))\n","idxs_train = idxs[:num]\n","idxs_val = idxs[num:]"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"xCAWqMOW9NZt"},"source":["We gather all texts in one numpy array (since it will be easier to use this way with fastai):"]},{"cell_type":"code","metadata":{"id":"lfmfeaZ49NZt","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616065678540,"user_tz":-60,"elapsed":668,"user":{"displayName":"Mark Lina","photoUrl":"","userId":"17651129667533642938"}},"outputId":"78d9d2ca-4773-47ba-bad4-5f33ef3792a5"},"source":["%%time\n","all_texts = np.concatenate([df_sample.iloc[idxs_train].text.values, df_sample.iloc[idxs_val].text.values])"],"execution_count":null,"outputs":[{"output_type":"stream","text":["CPU times: user 2.63 ms, sys: 0 ns, total: 2.63 ms\n","Wall time: 5.8 ms\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"zIi7JXj49NZv","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616065685751,"user_tz":-60,"elapsed":669,"user":{"displayName":"Mark Lina","photoUrl":"","userId":"17651129667533642938"}},"outputId":"d97cbb6b-e1ec-4f0c-ae38-7db39ee2e5c3"},"source":["%%time\n","splits = [list(idxs_train), list(idxs_val)]\n","tls = TfmdLists(all_texts, TransformersTokenizer(tokenizer_pl), splits=splits, dl_type=LMDataLoader)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["CPU times: user 8.48 ms, sys: 858 µs, total: 9.34 ms\n","Wall time: 10.4 ms\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"MgCWf26z9NZz"},"source":["We specify `dl_type=LMDataLoader` for when we will convert this `TfmdLists` to `DataLoaders`: we will use an `LMDataLoader` since we have a language modeling problem, not the usual fastai `TfmdDL`."]},{"cell_type":"markdown","metadata":{"id":"Nl7vLTd6wOis"},"source":[""]},{"cell_type":"markdown","metadata":{"id":"KRmicfJ99NZz"},"source":["##### All data"]},{"cell_type":"code","metadata":{"id":"71luExLxufk-"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"7hU8wH-kwU6q"},"source":["# Taking 50% of the df to be able to train in 5h one cycle"]},{"cell_type":"markdown","metadata":{"id":"1H1SGwyLuqdI"},"source":["when I tried to train in a reasonable time a model with this df it appeared that it will takie more then 10h ( there is attempt to take 50% of df to make it quicker)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":300},"id":"kyLrg_bi3NCG","executionInfo":{"status":"ok","timestamp":1616333191409,"user_tz":-60,"elapsed":1275,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"e78b42b1-166b-4150-be3a-686bd85d81fb"},"source":["# df1= df.sample(frac=0.5)\n","# df.sample(3)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
text
80080Józef Krukowski (ur. 4 stycznia 1936 w Sułowcu) – ksiądz katolicki, teolog, kanonista, profesor zwyczajny, pierwszy dziekan reaktywowanego Wydziału Prawa Kanonicznego i Świeckiego na Katolickim Uniwersytecie Lubelskim. Specjalista z zakresu kanonicznego prawa małżeńskiego, stosunków Państwo-Kościół, prawa konkordatowego i prawa wyznaniowego. Wieloletni wykładowca KUL i UKSW. \\n\\nEdukację podstawową odbył w 1949 w Radecznicy. W 1953 ukończył Liceum Biskupie w Lublinie i wstąpił do Wyższego Seminarium Duchownego w Lublinie. Otrzymał święcenia kapłańskie. Na Wydziale Teologii Katolickiego Uni...
20629Rezydencja Piękna Nova – apartamentowiec na Śródmieściu Południowym w Warszawie, który stanowi połączenie kamienicy stylizowanej na XIX-wieczną architekturę z nowoczesną zabudową. Powstał w latach 2008-2010 według projektu pracowni Andrzeja Bulandy i Włodzimierza Muchy.\\n\\nNiegdyś w miejscu dzisiejszego apartamentowca znajdował się pałacyk będący siedzibą Ambasady Niemiec. W 1939 w wyniku bombardowania Warszawy w czasie II wojny światowej przez Luftwaffe budynek spłonął, a po wojnie go zburzono. Wycięto też drzewa. W okresie powojennym działka stanowiła zaplecze kamieniarskie budowy poblis...
73159Inintimajos lub Inintimeos, właśc. Tyberiusz Juliusz Inintimajos Filokajsar Filoromajos Eusebes (gr.: \"Τιβέριος Ἰούλιος Iνινθιμηος Φιλόκαισαρ Φιλορώμαίος Eυσεbής\", \"Tibérios Ioúlios Ininthimēos Filókaisar Filorṓmaíos Eusebḗs\") (zm. 239) – król Bosporu z dynastii Asandrydów od 234 do swej śmierci. Prawdopodobnie młodszy syn króla Bosporu Tyberiusza Juliusza Sauromatesa III Filokajsara Filoromajosa Eusebesa i nieznanej z imienia królowej.\\n\\nInintimajos prawdopodobnie odziedziczył imię po przodkach ze strony matki. Ze strony ojca miał perskiego, greckiego, rzymskiego, trackiego oraz prawdopo...
\n","
"],"text/plain":[" text\n","80080 Józef Krukowski (ur. 4 stycznia 1936 w Sułowcu) – ksiądz katolicki, teolog, kanonista, profesor zwyczajny, pierwszy dziekan reaktywowanego Wydziału Prawa Kanonicznego i Świeckiego na Katolickim Uniwersytecie Lubelskim. Specjalista z zakresu kanonicznego prawa małżeńskiego, stosunków Państwo-Kościół, prawa konkordatowego i prawa wyznaniowego. Wieloletni wykładowca KUL i UKSW. \\n\\nEdukację podstawową odbył w 1949 w Radecznicy. W 1953 ukończył Liceum Biskupie w Lublinie i wstąpił do Wyższego Seminarium Duchownego w Lublinie. Otrzymał święcenia kapłańskie. Na Wydziale Teologii Katolickiego Uni...\n","20629 Rezydencja Piękna Nova – apartamentowiec na Śródmieściu Południowym w Warszawie, który stanowi połączenie kamienicy stylizowanej na XIX-wieczną architekturę z nowoczesną zabudową. Powstał w latach 2008-2010 według projektu pracowni Andrzeja Bulandy i Włodzimierza Muchy.\\n\\nNiegdyś w miejscu dzisiejszego apartamentowca znajdował się pałacyk będący siedzibą Ambasady Niemiec. W 1939 w wyniku bombardowania Warszawy w czasie II wojny światowej przez Luftwaffe budynek spłonął, a po wojnie go zburzono. Wycięto też drzewa. W okresie powojennym działka stanowiła zaplecze kamieniarskie budowy poblis...\n","73159 Inintimajos lub Inintimeos, właśc. Tyberiusz Juliusz Inintimajos Filokajsar Filoromajos Eusebes (gr.: \"Τιβέριος Ἰούλιος Iνινθιμηος Φιλόκαισαρ Φιλορώμαίος Eυσεbής\", \"Tibérios Ioúlios Ininthimēos Filókaisar Filorṓmaíos Eusebḗs\") (zm. 239) – król Bosporu z dynastii Asandrydów od 234 do swej śmierci. Prawdopodobnie młodszy syn króla Bosporu Tyberiusza Juliusza Sauromatesa III Filokajsara Filoromajosa Eusebesa i nieznanej z imienia królowej.\\n\\nInintimajos prawdopodobnie odziedziczył imię po przodkach ze strony matki. Ze strony ojca miał perskiego, greckiego, rzymskiego, trackiego oraz prawdopo..."]},"metadata":{"tags":[]},"execution_count":30}]},{"cell_type":"markdown","metadata":{"id":"iQyvLoEGzSKb"},"source":["SAVE"]},{"cell_type":"code","metadata":{"id":"U_65aMKYzEH2"},"source":["# df1.to_csv('/content/gdrive/MyDrive/fastai/05_1pl-wiki.csv', index= False)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Cwf-Virbzco-"},"source":["*LOAD* 500MB to speed up traing"]},{"cell_type":"code","metadata":{"id":"Qz4Tm8eszeiO"},"source":["df = pd.read_csv('/content/gdrive/MyDrive/fastai/05_1pl-wiki.csv')"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":467},"id":"DrwL89hr5KDL","executionInfo":{"status":"ok","timestamp":1616333214579,"user_tz":-60,"elapsed":24428,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"59bb80f4-8000-490e-d7cc-e0ba0714c04e"},"source":["df.sample(5)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
text
76544Just Dance 2014 – piąta gra z serii Just Dance, stworzona i wydana przez firmę Ubisoft. Została wydana na konsole siódmej generacji w październiku 2013 roku i jako tytuł startowy na PlayStation 4 i Xbox One. Gra została oficjalnie zapowiedziana na konferencji prasowej Ubisoftu na E3 2013. To pierwsza gra z serii, która zawiera w tytule rok, zamiast kolejnego numeru gry.\\n\\nGra obsługuje do sześciu graczy jednocześnie w wersji na Xbox One oraz maksymalnie czterech graczy w przypadku innych konsol. Wszystkie wersje zawierają tryb \"Klasyczny\" znany z poprzednich gier z serii. W trybie \"On-Sta...
69921Eugenia Róża Pragierowa z domu Berke (ur. 14 lipca 1888 w Kaliszu, zm. 5 maja 1964 w Warszawie) – prawnik, działaczka polskiego ruchu socjalistycznego, feministka, polityk Polskiej Partii Socjalistycznej i Polskiej Zjednoczonej Partii Robotniczej, członek Polskiego Komitetu Obrońców Pokoju w 1949 roku.\\n\\nPrawdopodobnie pochodziła z rodziny żydowskiej. Córka Henryka i Antoni. Uczyła się w kaliskim gimnazjum żeńskim. Uczestniczka strajków szkolnych w 1905, aresztowana, przebywała w więzieniu łącznie siedem miesięcy.\\n\\nStudiowała historię na Uniwersytecie Jagiellońskim, a w latach 1908–1911...
45974Wojna stuletnia – nazwa nadana przez XIX-wiecznych historyków serii konfliktów zbrojnych, które toczyły się przez 116 lat (z przerwami) w XIV i XV wieku między Anglią a Francją.\\n\\nPodstawową przyczyną konfliktu był spór feudalny wokół statusu króla angielskiego wobec francuskiego w kontekście posiadania przez tego pierwszego lenn w Gujennie, narastający od traktatu paryskiego z 1259, który na pewien czas uregulował owe stosunki. Drugą przyczyną, która nabrała znaczenia jednak dopiero w późniejszej fazie wojny, był konflikt sukcesyjny, powstały po śmierci w 1328 ostatniego z najstarszej li...
98686Szczepan Walkowski (ur. 20 listopada 1912 w Wieruszowie, zm. 8 czerwca 1969 w Ezpeleta w Argentynie) – ksiądz kapelan pilot Polskich Sił Powietrznych w Wielkiej Brytanii, uczestnik II wojny światowej, kawaler Orderu Odrodzenia Polski.\\n\\nUrodził się w rodzinie Stanisława i Eleonory z d. Wolna. Absolwent gimnazjum w Kępnie i Częstochowskiego Seminarium Duchownego w Krakowie. Studiował też teologię na Uniwersytecie Jagiellońskim. 25 czerwca 1939 otrzymał święcenia kapłańskie. Wikariusz w parafii Lututów. Po ataku niemieckim we wrześniu 1939 przedostał się przez Rumunię i Jugosławię do Włoch....
32167Roy Herbert Thomson (ur. 5 czerwca 1894 w Toronto, zm. 4 sierpnia 1976 w Londynie) – kanadyjski magnat prasowy, inwestor, założyciel imperium medialnego The Thomson Corporation (dzisiaj jako Thomson Reuters) i wielu innych przedsiębiorstw.\\n\\nPochodzący z wiejskiej Szkocji ród Thomsonów jest głęboko osadzony w najnowszej historii Kanady. Pierwszym historycznie osadnikiem w osadzie Scarborough (później dzielnica Toronto) był farmer David Thomson. Jego bratanek miał dziesięcioro dzieci, a wśród nich Herberta Thomsona, golibrodę w torontońskim Hotelu Grosvenor. W jego rodzinie przyszedł na św...
\n","
"],"text/plain":[" text\n","76544 Just Dance 2014 – piąta gra z serii Just Dance, stworzona i wydana przez firmę Ubisoft. Została wydana na konsole siódmej generacji w październiku 2013 roku i jako tytuł startowy na PlayStation 4 i Xbox One. Gra została oficjalnie zapowiedziana na konferencji prasowej Ubisoftu na E3 2013. To pierwsza gra z serii, która zawiera w tytule rok, zamiast kolejnego numeru gry.\\n\\nGra obsługuje do sześciu graczy jednocześnie w wersji na Xbox One oraz maksymalnie czterech graczy w przypadku innych konsol. Wszystkie wersje zawierają tryb \"Klasyczny\" znany z poprzednich gier z serii. W trybie \"On-Sta...\n","69921 Eugenia Róża Pragierowa z domu Berke (ur. 14 lipca 1888 w Kaliszu, zm. 5 maja 1964 w Warszawie) – prawnik, działaczka polskiego ruchu socjalistycznego, feministka, polityk Polskiej Partii Socjalistycznej i Polskiej Zjednoczonej Partii Robotniczej, członek Polskiego Komitetu Obrońców Pokoju w 1949 roku.\\n\\nPrawdopodobnie pochodziła z rodziny żydowskiej. Córka Henryka i Antoni. Uczyła się w kaliskim gimnazjum żeńskim. Uczestniczka strajków szkolnych w 1905, aresztowana, przebywała w więzieniu łącznie siedem miesięcy.\\n\\nStudiowała historię na Uniwersytecie Jagiellońskim, a w latach 1908–1911...\n","45974 Wojna stuletnia – nazwa nadana przez XIX-wiecznych historyków serii konfliktów zbrojnych, które toczyły się przez 116 lat (z przerwami) w XIV i XV wieku między Anglią a Francją.\\n\\nPodstawową przyczyną konfliktu był spór feudalny wokół statusu króla angielskiego wobec francuskiego w kontekście posiadania przez tego pierwszego lenn w Gujennie, narastający od traktatu paryskiego z 1259, który na pewien czas uregulował owe stosunki. Drugą przyczyną, która nabrała znaczenia jednak dopiero w późniejszej fazie wojny, był konflikt sukcesyjny, powstały po śmierci w 1328 ostatniego z najstarszej li...\n","98686 Szczepan Walkowski (ur. 20 listopada 1912 w Wieruszowie, zm. 8 czerwca 1969 w Ezpeleta w Argentynie) – ksiądz kapelan pilot Polskich Sił Powietrznych w Wielkiej Brytanii, uczestnik II wojny światowej, kawaler Orderu Odrodzenia Polski.\\n\\nUrodził się w rodzinie Stanisława i Eleonory z d. Wolna. Absolwent gimnazjum w Kępnie i Częstochowskiego Seminarium Duchownego w Krakowie. Studiował też teologię na Uniwersytecie Jagiellońskim. 25 czerwca 1939 otrzymał święcenia kapłańskie. Wikariusz w parafii Lututów. Po ataku niemieckim we wrześniu 1939 przedostał się przez Rumunię i Jugosławię do Włoch....\n","32167 Roy Herbert Thomson (ur. 5 czerwca 1894 w Toronto, zm. 4 sierpnia 1976 w Londynie) – kanadyjski magnat prasowy, inwestor, założyciel imperium medialnego The Thomson Corporation (dzisiaj jako Thomson Reuters) i wielu innych przedsiębiorstw.\\n\\nPochodzący z wiejskiej Szkocji ród Thomsonów jest głęboko osadzony w najnowszej historii Kanady. Pierwszym historycznie osadnikiem w osadzie Scarborough (później dzielnica Toronto) był farmer David Thomson. Jego bratanek miał dziesięcioro dzieci, a wśród nich Herberta Thomsona, golibrodę w torontońskim Hotelu Grosvenor. W jego rodzinie przyszedł na św..."]},"metadata":{"tags":[]},"execution_count":33}]},{"cell_type":"markdown","metadata":{"id":"4oGbWUBh9NZ0"},"source":["- train: 80%\n","- val = 20%"]},{"cell_type":"code","metadata":{"id":"tHYnpj3E9NZ0"},"source":["num = int(0.8*len(df))\n","\n","idxs = np.random.randint(0, len(df), len(df))\n","idxs_train = idxs[:num]\n","idxs_val = idxs[num:]\n","\n","# save idxs train and valid\n","torch.save(idxs_train, path_data/'idxs_train.pl')\n","torch.save(idxs_val, path_data/'idxs_val.pl')"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Cmuxe717LVQr"},"source":["SAVING"]},{"cell_type":"code","metadata":{"id":"EspHnFC6CeMg"},"source":["!cp /root/.fastai/data/plwiki/idxs_train.pl /content/gdrive/MyDrive/fastai\n","!cp /root/.fastai/data/plwiki/idxs_val.pl /content/gdrive/MyDrive/fastai"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"v1yY6gJQLUAL"},"source":["LOADING"]},{"cell_type":"code","metadata":{"id":"vxtkyVDbK87W"},"source":["!cp /content/gdrive/MyDrive/fastai/idxs_train.pl /root/.fastai/data/plwiki\n","!cp /content/gdrive/MyDrive/fastai/idxs_val.pl /root/.fastai/data/plwiki"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"miw2it8R9NZ3"},"source":["# load idxs train and valid\n","idxs_train = torch.load(path_data/'idxs_train.pl')\n","idxs_val = torch.load(path_data/'idxs_val.pl')"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"zzVoS2Zy9NZ5"},"source":["We gather all texts in one numpy array (since it will be easier to use this way with fastai):"]},{"cell_type":"code","metadata":{"id":"si2TssSh9NZ5","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616333496696,"user_tz":-60,"elapsed":979,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"5dce2eef-fcd8-4579-ea83-172ac7365b32"},"source":["%%time\n","all_texts = np.concatenate([df.iloc[idxs_train].text.values, df.iloc[idxs_val].text.values])"],"execution_count":null,"outputs":[{"output_type":"stream","text":["CPU times: user 28.3 ms, sys: 44.6 ms, total: 72.9 ms\n","Wall time: 72.9 ms\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"JwYj28ON9NZ7","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616333499808,"user_tz":-60,"elapsed":873,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"02d80251-064d-447c-de25-b8124fd7d2ee"},"source":["%%time\n","splits = [list(idxs_train), list(idxs_val)]\n","tls = TfmdLists(all_texts, TransformersTokenizer(tokenizer_pl), splits=splits, dl_type=LMDataLoader)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Token indices sequence length is longer than the specified maximum sequence length for this model (2174 > 1024). Running this sequence through the model will result in indexing errors\n"],"name":"stderr"},{"output_type":"stream","text":["CPU times: user 91.2 ms, sys: 9.93 ms, total: 101 ms\n","Wall time: 101 ms\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"-YI7gfnh9NZ-"},"source":["We specify `dl_type=LMDataLoader` for when we will convert this `TfmdLists` to `DataLoaders`: we will use an `LMDataLoader` since we have a language modeling problem, not the usual fastai `TfmdDL`."]},{"cell_type":"markdown","metadata":{"id":"qlao5xNg9NZ-"},"source":["##### Check datasets"]},{"cell_type":"markdown","metadata":{"id":"spFg6v8j9NZ-"},"source":["In a `TfmdLists` you can access to the elements of the training or validation set quite easily:"]},{"cell_type":"code","metadata":{"id":"prVLO67E9NaA","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616333504547,"user_tz":-60,"elapsed":876,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"70a9542e-7811-410a-b957-3e1c04bbd15a"},"source":["tls.train[0],tls.valid[0]"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(tensor([39020, 685, 2526, ..., 859, 9016, 12]),\n"," tensor([ 28, 19903, 30, 15583, 19903, 30, 199, 18704, 2944, 562,\n"," 441, 15587, 11590, 2446, 7100, 25190, 4910, 24809, 5189, 18436,\n"," 14, 1978, 524, 830, 11590, 2446, 4910, 24809, 365, 8171,\n"," 5142, 389, 3921, 2601, 409, 604, 2, 343, 8500, 497,\n"," 18135, 260, 1465, 7060, 332, 2197, 1279, 1878, 14, 5272,\n"," 32828, 315, 5130, 4612, 332, 1441, 1279, 1878, 14, 11276,\n"," 1943, 32958, 8645, 389, 39, 789, 1312, 17592, 713, 9446,\n"," 3372, 289, 357, 1900, 522, 14, 51, 14, 19489, 538,\n"," 14, 19489, 6862, 389, 18704, 2944, 562, 2, 311, 15838,\n"," 9584, 1878, 388, 14, 44448, 2208, 734, 6702, 902, 8717,\n"," 42250, 332, 5631, 260, 1878, 14, 9291, 11299, 263, 5418,\n"," 1103, 389, 48, 3217, 15482, 1549, 2, 474, 25027, 10668,\n"," 23906, 1213, 954, 6039, 18335, 26532, 8603, 13271, 1903, 260,\n"," 14767, 2524, 703, 26, 476, 30428, 40665, 17936, 369, 996,\n"," 476, 30428, 15321, 1873, 338, 467, 478, 476, 30428, 15321,\n"," 388, 849, 3190, 260, 3580, 1903, 930, 82, 14, 954,\n"," 6039, 18335, 13465, 8603, 13271, 1903, 260, 28563, 4910, 24809,\n"," 4181, 4890, 260, 4039, 476, 36764, 360, 12954, 467, 311,\n"," 2446, 389, 18704, 2944, 562, 713, 199, 199, 17820, 18436,\n"," 5248, 21426, 5852, 289, 2446, 260, 3580, 2588, 12, 32705,\n"," 537, 263, 2246, 8965, 26353, 31407, 308, 14, 397, 23884,\n"," 263, 792, 16801, 11741, 605, 12, 4343, 48137, 463, 22644,\n"," 7910, 286, 282, 35843, 792, 2831, 3213, 12, 15952, 332,\n"," 260, 2006, 388, 289, 10384, 1801, 1239, 263, 48044, 405,\n"," 50, 9662, 352, 12, 9651, 359, 286, 11542, 14362, 42501,\n"," 1008, 14, 1626, 3210, 2006, 10126, 5010, 3213, 286, 6961,\n"," 274, 38639, 12, 334, 1234, 21159, 3177, 592, 2159, 263,\n"," 7748, 6599, 2624, 2277, 29241, 84, 266, 286, 39084, 2908,\n"," 89, 5333, 33795, 1796, 14, 309, 3379, 3873, 5555, 10184,\n"," 15609, 38707, 2277, 349, 11, 45, 6377, 6735, 18796, 1153,\n"," 260, 18981, 260, 4618, 2492, 14, 199, 199, 51, 745,\n"," 296, 21820, 532, 306, 389, 18704, 2944, 562, 2, 9511,\n"," 332, 260, 4428, 1588, 12, 21761, 263, 1156, 7147, 21820,\n"," 628, 260, 2442, 21971, 14, 10940, 7327, 12533, 541, 18265,\n"," 87, 624, 33675, 792, 1720, 286, 38455, 7022, 7106, 7790,\n"," 21316, 12, 4982, 5189, 18436, 2913, 8279, 1739, 14, 651,\n"," 29901, 387, 792, 2446, 45357, 306, 17546, 28744, 12, 422,\n"," 1166, 1571, 15447, 2092, 14, 5680, 2944, 562, 461, 1812,\n"," 41826, 13582, 29372, 12, 2194, 5629, 2907, 312, 12, 18419,\n"," 1187, 504, 286, 22206, 577, 24501, 14, 10579, 302, 5418,\n"," 12391, 306, 29025, 21971, 478, 17546, 28744, 14, 37087, 306,\n"," 5418, 1303, 7624, 443, 9711, 14417, 41533, 3005, 11055, 1087,\n"," 1763, 14, 5680, 2944, 562, 1249, 1167, 5098, 260, 3041,\n"," 1189, 1318, 1878, 465, 12, 2045, 519, 2053, 289, 372,\n"," 40908, 715, 14063, 29560, 335, 5130, 826, 14974, 289, 1441,\n"," 1279, 14, 17376, 13292, 260, 3527, 20091, 17304, 365, 9590,\n"," 9560, 12, 1762, 29445, 12, 533, 461, 5832, 2711, 18449,\n"," 460, 263, 937, 5189, 18436, 12, 3406, 2620, 7324, 22033,\n"," 35867, 39910, 5504, 39443, 298, 25190, 14, 199, 199, 2173,\n"," 4859, 1196, 17746, 1103, 421, 789, 1312, 17592, 12, 2228,\n"," 325, 1200, 1549, 478, 6531, 462, 2601, 42239, 14, 4225,\n"," 15013, 648, 22957, 45477, 306, 6240, 3062, 294, 12, 2045,\n"," 24716, 332, 263, 1446, 11462, 12, 2728, 5189, 18436, 15952,\n"," 332, 10384, 1756, 4417, 2446, 14, 199, 199, 47335, 359,\n"," 6943, 12, 533, 4158, 5189, 18436, 939, 12473, 16759, 866,\n"," 5418, 12, 7839, 5676, 1501, 18410, 9439, 1070, 286, 1873,\n"," 827, 466, 12, 530, 372, 2712, 22639, 274, 792, 43346,\n"," 21316, 14, 9446, 9769, 830, 11299, 389, 421, 789, 1312,\n"," 17592, 389, 12, 40583, 12838, 40192, 13, 32715, 10232, 263,\n"," 37157, 41826, 277, 286, 1073, 14, 37608, 263, 2084, 35536,\n"," 392, 34827, 389, 34437, 6377, 4910, 24809, 12, 2314, 266,\n"," 35137, 13288, 12391, 306, 372, 1705, 19090, 32776, 390, 2353,\n"," 18514, 18658, 25190, 14, 25965, 10232, 12, 334, 7803, 11299,\n"," 524, 2228, 325, 1200, 1549, 14, 37718, 274, 2923, 402,\n"," 286, 389, 25118, 2966, 10909, 2, 5189, 18436, 12, 571,\n"," 625, 320, 3397, 283, 443, 8562, 14, 350, 4415, 474,\n"," 14471, 260, 2068, 14767, 289, 18335, 26532, 8603, 13271, 1903,\n"," 286, 4013, 2394, 263, 1341, 12, 260, 592, 26532, 12954,\n"," 14, 34987, 28839, 5418, 12, 524, 389, 3062, 294, 389,\n"," 14, 1069, 28416, 4995, 286, 3785, 11299, 524, 389, 6531,\n"," 462, 2601, 42239, 713, 5189, 18436, 5402, 11114, 28302, 433,\n"," 1737, 389, 34437, 6377, 4802, 347, 334, 46534, 298, 1010,\n"," 2244, 556, 356, 496, 524, 21873, 1325, 15, 438, 278,\n"," 1152, 438, 273, 1535, 469, 78, 24565, 27787, 3462, 37025,\n"," 1310, 1300, 450, 1010, 4357, 77, 12, 533, 372, 11033,\n"," 77, 811, 15907, 342, 260, 413, 31008, 1325, 15, 14483,\n"," 438, 32540, 18190, 11012, 1010, 22237, 28702, 1092, 286, 9236,\n"," 1850, 28271, 394, 22911, 524, 389, 8448, 42090, 1301, 34464,\n"," 713, 5189, 18436, 2214, 314, 12, 533, 524, 792, 17952,\n"," 521, 10232, 263, 5418, 14, 43769, 274, 18686, 12, 2055,\n"," 18558, 286, 263, 2055, 29735, 332, 35037, 14, 199, 199,\n"," 199, 37439, 7290, 41516, 532]))"]},"metadata":{"tags":[]},"execution_count":40}]},{"cell_type":"markdown","metadata":{"id":"NprBhqyf9NaG"},"source":["They are not the same. We can see the shape are differents:"]},{"cell_type":"code","metadata":{"id":"y_nIjjcT9NaH","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616234915402,"user_tz":-60,"elapsed":4168,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"b87f1837-da08-4ad5-8851-e895f001429b"},"source":["tls.tfms(tls.train.items[0]).shape, tls.tfms(tls.valid.items[0]).shape"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(torch.Size([468]), torch.Size([402]))"]},"metadata":{"tags":[]},"execution_count":33}]},{"cell_type":"markdown","metadata":{"id":"g3OpL9Zt9NaI"},"source":["And we can have a look at both decodes using `show_at`:"]},{"cell_type":"code","metadata":{"scrolled":true,"id":"865QmFWs9NaJ"},"source":["show_at(tls.train, 0)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"06TWBdZL9NaL"},"source":["#### 5.2 fastai v2 Dataloaders"]},{"cell_type":"markdown","metadata":{"id":"V9QUUcJ89NaL"},"source":["*(text from Sylvain Gugger Transformers Tutorial)* The fastai v2 library expects the data to be assembled in a `DataLoaders` object (something that has a training and validation dataloader). We can get one by using the `dataloaders` method. We just have to specify a batch size and a sequence length. \n","\n","Since the GPT-2 model was trained with sequences of size 1024, we use this sequence length (it's a stateless model, so it will change the perplexity if we use less)."]},{"cell_type":"code","metadata":{"id":"5LWheGOlvIHF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616237390858,"user_tz":-60,"elapsed":915842,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"95c4012d-6322-46c3-c9ce-7ab306a5b20a"},"source":["# %%time\n","# bs,sl = 6,1024\n","# dls = tls.dataloaders(bs=bs, seq_len=sl)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Token indices sequence length is longer than the specified maximum sequence length for this model (4097 > 1024). Running this sequence through the model will result in indexing errors\n"],"name":"stderr"},{"output_type":"stream","text":["CPU times: user 13min 39s, sys: 1min 35s, total: 15min 14s\n","Wall time: 15min 15s\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"4yje5bBSLMMt"},"source":["to avoid problem like above and problem with GPU RAM there is need to decrease"]},{"cell_type":"code","metadata":{"id":"3wKo4BNVHb3S","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616334015758,"user_tz":-60,"elapsed":501984,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"c7c5f255-096e-49da-f03e-be36b3b8a837"},"source":["%%time\n","bs,sl = 2,1024\n","dls = tls.dataloaders(bs=bs, seq_len=sl)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["CPU times: user 8min 5s, sys: 9.01 s, total: 8min 14s\n","Wall time: 8min 20s\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"eEuli0s5DKXx"},"source":["poszlo dobrze 2 x1024"]},{"cell_type":"markdown","metadata":{"id":"QjhygHGL9Naw"},"source":["# 6.2 Learner"]},{"cell_type":"markdown","metadata":{"id":"FQ5AW-oy9Naw"},"source":["*(text from Sylvain Gugger Transformers Tutorial)* Now, we are ready to create our `Learner`, which is a fastai object grouping data, model and loss function and handles model training or inference. Since we are in a language model setting, we pass accuracy and perplexity as metrics, and we need to use the callback we just defined. Lastly, we use mixed precision to save every bit of memory we can (and if you have a modern GPU, it will also make training faster)."]},{"cell_type":"code","metadata":{"id":"C7ANaSLS9Nax"},"source":["# Learner: basic class for handling the training loop\n","# source: https://dev.fast.ai/learner#Learner\n","learn = Learner(dls, model_en, loss_func=CrossEntropyLossFlat(),\n"," splitter = splitter,\n"," cbs=[DropOutput], \n"," metrics=[accuracy, Perplexity()]).to_fp16()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"MH_-M0Uk9Naz","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616334677490,"user_tz":-60,"elapsed":868,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"7a5a285f-c180-4b98-9047-8b3c5d0000f5"},"source":["# Check the number of parameters groups and the hyperparameters values\n","learn.create_opt()\n","print(f'number of parameters groups: {len(learn.opt.param_groups)}')\n","\n","# ... and the list of Learning Rates (before its atualization by the Optimizer of the function fit_one_cycle())\n","for i,h in enumerate(learn.opt.hypers):\n"," print(i,h)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["number of parameters groups: 4\n","0 {'wd': 0.01, 'sqr_mom': 0.99, 'lr': 0.001, 'mom': 0.9, 'eps': 1e-05}\n","1 {'wd': 0.01, 'sqr_mom': 0.99, 'lr': 0.001, 'mom': 0.9, 'eps': 1e-05}\n","2 {'wd': 0.01, 'sqr_mom': 0.99, 'lr': 0.001, 'mom': 0.9, 'eps': 1e-05}\n","3 {'wd': 0.01, 'sqr_mom': 0.99, 'lr': 0.001, 'mom': 0.9, 'eps': 1e-05}\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"8sCPQ_ui9Na3"},"source":["- Loss = 9.95\n","- accuracy = 0.099\n","- perplexity = 20950.94"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":72},"id":"8X8heciU7pIn","executionInfo":{"status":"ok","timestamp":1616342819960,"user_tz":-60,"elapsed":8138398,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"a4185400-ac36-4f51-a266-241b6f233a33"},"source":["%%time\n","# loss, accuracy, Perplexity() of validation dataset\n","learn.validate()"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/html":[""],"text/plain":[""]},"metadata":{"tags":[]}},{"output_type":"stream","text":["CPU times: user 1h 22min 15s, sys: 53min 26s, total: 2h 15min 42s\n","Wall time: 2h 15min 37s\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["(#3) [9.495806694030762,0.07362030446529388,13303.822265625]"]},"metadata":{"tags":[]},"execution_count":49}]},{"cell_type":"markdown","metadata":{"id":"hEEq5pLsDcHs"},"source":["for the 1GB file resullts are :"]},{"cell_type":"code","metadata":{"id":"8zuhDngwvamp","colab":{"base_uri":"https://localhost:8080/","height":72},"executionInfo":{"status":"ok","timestamp":1616164560259,"user_tz":-60,"elapsed":15660926,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"5f30e595-7eca-4885-de1b-5c94ce353d77"},"source":["# %%time\n","# # loss, accuracy, Perplexity() of validation dataset\n","# learn.validate()"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/html":[""],"text/plain":[""]},"metadata":{"tags":[]}},{"output_type":"stream","text":["CPU times: user 2h 28min 1s, sys: 1h 53min 5s, total: 4h 21min 7s\n","Wall time: 4h 20min 59s\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["(#3) [9.487800598144531,0.0741734430193901,13197.736328125]"]},"metadata":{"tags":[]},"execution_count":64}]},{"cell_type":"markdown","metadata":{"id":"WprBe3MM9Na3"},"source":["Now that we have a `Learner`, we will use during training all the **fine-tuning techniques** seen for classification model training (see the notebook [10_nlp.ipynb](https://github.com/fastai/fastbook/blob/master/10_nlp.ipynb) about \"NLP Deep Dive: RNNs\") to take advantage of the **Transfer Learning** of the GPT-2 pre-trained embeddings and model from Hugging Face Transformers:\n","- **learning rate finder** (method that helps finding the best learning rate to train the model)\n","- **Mixed precision training** (some of the operations will be done in FP16, others in FP32 in order to speed up the training)\n","- **gradual unfreezing** (the model has 4 layers groups created by our method `splitter` : the embedding one and the 3 groups of 4 decoder blocks each)\n","- **1cycle policy** with the method [fit_one_cycle()](https://dev.fast.ai/callback.schedule#Learner.fit_one_cycle) (The 1cycle policy was introduced by Leslie N. Smith et al. in Super-Convergence: Very Fast Training of Neural Networks Using Large Learning Rates. It schedules the learning rate with a cosine annealing from `lr_max/div` to `lr_max` then `lr_max/div_final` (pass an array to `lr_max` if you want to use differential learning rates) and the momentum with cosine annealing according to the values in `moms`. The first phase takes `pct_start` of the training. You can optionally pass additional `cbs` and `reset_opt`.)\n","- **differential learning rates** (each layers group with a learning rate different: the biggest one for the embeddings group, and the smallest one for the first 4 decoder blocks)"]},{"cell_type":"markdown","metadata":{"id":"yEmA-zFH9Na3"},"source":["##### 6.2.1 Freeze all layers but the last layers group (do not freeze `wte`, `wpe` embeddings matrices and last `LayerNorm`)"]},{"cell_type":"code","metadata":{"id":"sL3Ama1F9Na4","colab":{"base_uri":"https://localhost:8080/","height":1000},"executionInfo":{"status":"ok","timestamp":1616342821161,"user_tz":-60,"elapsed":8129374,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"5da0ca90-7da2-4433-bf02-986ef9fa00fc"},"source":["learn.freeze()\n","learn.summary()"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/html":[""],"text/plain":[""]},"metadata":{"tags":[]}},{"output_type":"execute_result","data":{"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"},"text/plain":["GPT2LMHeadModel (Input shape: 2)\n","============================================================================\n","Layer (type) Output Shape Param # Trainable \n","============================================================================\n"," 2 x 1024 x 768 \n","Embedding 38597376 True \n","Embedding 786432 True \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 50257 \n","Linear 38597376 True \n","____________________________________________________________________________\n","\n","Total params: 163,037,184\n","Total trainable params: 78,019,584\n","Total non-trainable params: 85,017,600\n","\n","Optimizer used: \n","Loss function: FlattenedLoss of CrossEntropyLoss()\n","\n","Model frozen up to parameter group #3\n","\n","Callbacks:\n"," - TrainEvalCallback\n"," - DropOutput\n"," - MixedPrecision\n"," - Recorder\n"," - ProgressCallback"]},"metadata":{"tags":[]},"execution_count":50}]},{"cell_type":"markdown","metadata":{"id":"I7VLvLdc9Na6"},"source":["The `learn.summary ()` method gives almost the right numbers. In fact, it counts twice the weights of the wte matrix (vocab embeddings) because they are duplicated in the weights of the output linear layer.\n","\n","The real numbers are:\n","- Total params: 163,037,184 - 38,597,376 = **124,439,808** (about 124 millions)\n","- Total trainable params: 77,982,720 - 38,597,376 = **39,385,344** (about 40 millions)\n","- Total non-trainable params: **85,054,464** (about 85 millions)"]},{"cell_type":"markdown","metadata":{"id":"NFsAJMTEISbC"},"source":["SAVE ( first time)"]},{"cell_type":"code","metadata":{"id":"0d9Sr2KSVbWt"},"source":["learn.save(path_data/'GPT2_pl_before_lr_find_bs_sl_2_1024')\n","!cp /root/.fastai/data/plwiki/GPT2_pl_before_lr_find_bs_sl_2_1024.pth /content/gdrive/MyDrive/fastai"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"nvojtjn1GgI4"},"source":["LOAD"]},{"cell_type":"code","metadata":{"id":"bJ38DTzvGlM0"},"source":["!cp /root/.fastai/data/plwiki/GPT2_pl_before_lr_find_bs_sl_2_1024.pth /root/.fastai/data/plwiki/"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"4v84AvLnvczm"},"source":[""]},{"cell_type":"markdown","metadata":{"id":"Q-UUlHqMjwto"},"source":["# 1st attempt\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":403},"id":"OZMMifcQuHeQ","executionInfo":{"status":"error","timestamp":1616360451299,"user_tz":-60,"elapsed":14548284,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"81fd1afd-0d78-473e-8f6b-d680ec9dd97f"},"source":["learn.fit_one_cycle(1, 0.1)"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/html":["\n","
\n"," \n"," \n"," 0.00% [0/1 00:00<00:00]\n","
\n"," \n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
epochtrain_lossvalid_lossaccuracyperplexitytime

\n","\n","

\n"," \n"," \n"," 18.58% [8027/43206 4:02:23<17:42:19 nan]\n","
\n"," "],"text/plain":[""]},"metadata":{"tags":[]}},{"output_type":"error","ename":"KeyboardInterrupt","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mlearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_one_cycle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/callback/schedule.py\u001b[0m in \u001b[0;36mfit_one_cycle\u001b[0;34m(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)\u001b[0m\n\u001b[1;32m 110\u001b[0m scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),\n\u001b[1;32m 111\u001b[0m 'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))}\n\u001b[0;32m--> 112\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_epoch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcbs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mParamScheduler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscheds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mL\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcbs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreset_opt\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mreset_opt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwd\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwd\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 113\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 114\u001b[0m \u001b[0;31m# Cell\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/learner.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, n_epoch, lr, wd, cbs, reset_opt)\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_hypers\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlr\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlr\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mlr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 211\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_epoch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mn_epoch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 212\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_with_events\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_do_fit\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'fit'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCancelFitException\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_end_cleanup\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 213\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 214\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_end_cleanup\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdl\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mxb\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0myb\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpred\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/learner.py\u001b[0m in \u001b[0;36m_with_events\u001b[0;34m(self, f, event_type, ex, final)\u001b[0m\n\u001b[1;32m 158\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_with_events\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mevent_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnoop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 160\u001b[0;31m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'before_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 161\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'after_cancel_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'after_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/learner.py\u001b[0m in \u001b[0;36m_do_fit\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 201\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mepoch\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_epoch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 202\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mepoch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mepoch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 203\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_with_events\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_do_epoch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'epoch'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCancelEpochException\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 204\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 205\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_epoch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwd\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcbs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreset_opt\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/learner.py\u001b[0m in \u001b[0;36m_with_events\u001b[0;34m(self, f, event_type, ex, final)\u001b[0m\n\u001b[1;32m 158\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_with_events\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mevent_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnoop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 160\u001b[0;31m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'before_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 161\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'after_cancel_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'after_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/learner.py\u001b[0m in \u001b[0;36m_do_epoch\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 195\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 196\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_do_epoch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 197\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_do_epoch_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 198\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_do_epoch_validate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/learner.py\u001b[0m in \u001b[0;36m_do_epoch_train\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 187\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_do_epoch_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 188\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdl\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 189\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_with_events\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mall_batches\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'train'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCancelTrainException\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 190\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 191\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_do_epoch_validate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mds_idx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdl\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/learner.py\u001b[0m in \u001b[0;36m_with_events\u001b[0;34m(self, f, event_type, ex, final)\u001b[0m\n\u001b[1;32m 158\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_with_events\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mevent_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnoop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 160\u001b[0;31m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'before_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 161\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'after_cancel_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'after_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/learner.py\u001b[0m in \u001b[0;36mall_batches\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mall_batches\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_iter\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdl\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 166\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mo\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdl\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mone_batch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mo\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 167\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_do_one_batch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/learner.py\u001b[0m in \u001b[0;36mone_batch\u001b[0;34m(self, i, b)\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[0mb_on_device\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m(\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0me\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mb\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"to\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdevice\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 184\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_split\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb_on_device\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 185\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_with_events\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_do_one_batch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'batch'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCancelBatchException\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 186\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 187\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_do_epoch_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/learner.py\u001b[0m in \u001b[0;36m_with_events\u001b[0;34m(self, f, event_type, ex, final)\u001b[0m\n\u001b[1;32m 158\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_with_events\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mevent_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnoop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 160\u001b[0;31m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'before_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 161\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'after_cancel_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'after_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/learner.py\u001b[0m in \u001b[0;36m_do_one_batch\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 175\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraining\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0myb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;32mreturn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 176\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'before_backward'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 177\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloss_grad\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 178\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_with_events\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'step'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCancelStepException\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 179\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzero_grad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph)\u001b[0m\n\u001b[1;32m 218\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 219\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 220\u001b[0;31m create_graph=create_graph)\n\u001b[0m\u001b[1;32m 221\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 222\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/overrides.py\u001b[0m in \u001b[0;36mhandle_torch_function\u001b[0;34m(public_api, relevant_args, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1058\u001b[0m \u001b[0;31m# Use `public_api` instead of `implementation` so __torch_function__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1059\u001b[0m \u001b[0;31m# implementations can do equality/identity comparisons.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1060\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0moverloaded_arg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__torch_function__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpublic_api\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtypes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1061\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1062\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mNotImplemented\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/torch_core.py\u001b[0m in \u001b[0;36m__torch_function__\u001b[0;34m(self, func, types, args, kwargs)\u001b[0m\n\u001b[1;32m 327\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_torch_handled\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_opt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mtypes\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 329\u001b[0;31m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__torch_function__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtypes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 330\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 331\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mTensorBase\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_meta\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mas_copy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/tensor.py\u001b[0m in \u001b[0;36m__torch_function__\u001b[0;34m(cls, func, types, args, kwargs)\u001b[0m\n\u001b[1;32m 993\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 994\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDisableTorchFunction\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 995\u001b[0;31m \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 996\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_convert\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mret\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 997\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph)\u001b[0m\n\u001b[1;32m 219\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 220\u001b[0m create_graph=create_graph)\n\u001b[0;32m--> 221\u001b[0;31m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 222\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 223\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables)\u001b[0m\n\u001b[1;32m 130\u001b[0m Variable._execution_engine.run_backward(\n\u001b[1;32m 131\u001b[0m \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 132\u001b[0;31m allow_unreachable=True) # allow_unreachable flag\n\u001b[0m\u001b[1;32m 133\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 134\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mKeyboardInterrupt\u001b[0m: "]}]},{"cell_type":"markdown","metadata":{"id":"ahH6fionvuVH"},"source":[""]},{"cell_type":"markdown","metadata":{"id":"HRnjCdBCvrKH"},"source":["takes quite a lot of time to 4h for 20%"]},{"cell_type":"code","metadata":{"id":"aDgnwvokD9Xj"},"source":["# learn.fit_one_cycle(1, 5e-1)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"EJcMQOqgkA8M"},"source":["learn.save(path_data/'GPT2_pl_before_lr_find_bs_sl_2_1024_5e_1')\n","!cp /root/.fastai/data/plwiki/GPT2_pl_before_lr_find_bs_sl_2_1024_5e_1.pth /content/gdrive/MyDrive/fastai"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":265},"id":"TD7HC2sNkDx5","executionInfo":{"status":"ok","timestamp":1616360472147,"user_tz":-60,"elapsed":1601,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"62e03a97-6bec-4619-e893-68f688aa48da"},"source":["learn.recorder.plot_loss()"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"ke rnelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.7"},"colab":{"name":"finetuning-English-GPT2-any-language-Polish-HuggingFace-fastaiv2.ipynb","provenance":[{"file_id":"1n7sol-CvBSblO33ScpgQQIfy5v_VJVQb","timestamp":1597923568454},{"file_id":"1qrhSZ4nBKgv2sz_-OjzEsiuWYHUyOeq9","timestamp":1597489060569},{"file_id":"1d_tsC-i3804eHIBJsoy6QNZ6Jm4JtgJ-","timestamp":1597391589678}],"collapsed_sections":["D18pRY2C9NUf","eAs4xPsR9NUj","Z6oUdKay9NUv","kQrp3wcB9NVL","DAvDfxhq9NVQ","6NUQ_V299NVR","7bKWxnx19NVT","i2frlbhv9NVU","j6WB2Qyy9NVX","CmKm8BETBpB2","P-2JYQnD9N6F","NBx8vCFkBtDu","Zad8TrPNybqT","GeSgUiiz9NZq"],"toc_visible":true},"accelerator":"GPU","widgets":{"application/vnd.jupyter.widget-state+json":{"7f0767ff35a344b0b6da17083132427e":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_view_name":"HBoxView","_dom_classes":[],"_model_name":"HBoxModel","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.5.0","box_style":"","layout":"IPY_MODEL_596851de3e714ab8942a72f69b18a63c","_model_module":"@jupyter-widgets/controls","children":["IPY_MODEL_f0a9328a838041c0979b2b709b0b42a0","IPY_MODEL_156da666d81b4026941d5acb5b487128"]}},"596851de3e714ab8942a72f69b18a63c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"f0a9328a838041c0979b2b709b0b42a0":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_view_name":"ProgressView","style":"IPY_MODEL_87da6b072be3484b98e01b534d1a751a","_dom_classes":[],"description":"Downloading: 100%","_model_name":"FloatProgressModel","bar_style":"success","max":1042301,"_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":1042301,"_view_count":null,"_view_module_version":"1.5.0","orientation":"horizontal","min":0,"description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_1ae0380be03140089589c6d485e7c1d0"}},"156da666d81b4026941d5acb5b487128":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_view_name":"HTMLView","style":"IPY_MODEL_f8b5835af84c482092be9d849cb9bd02","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 1.04M/1.04M [00:14<00:00, 73.6kB/s]","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_60b9f541de2648f08b7ad6257f32499b"}},"87da6b072be3484b98e01b534d1a751a":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_view_name":"StyleView","_model_name":"ProgressStyleModel","description_width":"initial","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","bar_color":null,"_model_module":"@jupyter-widgets/controls"}},"1ae0380be03140089589c6d485e7c1d0":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"f8b5835af84c482092be9d849cb9bd02":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"60b9f541de2648f08b7ad6257f32499b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"7d5d97ff327f4d6aa5b44fc7656f07d8":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_view_name":"HBoxView","_dom_classes":[],"_model_name":"HBoxModel","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.5.0","box_style":"","layout":"IPY_MODEL_c7f7c2744375405298d3b95a6aa2b361","_model_module":"@jupyter-widgets/controls","children":["IPY_MODEL_bdfee3f0cab649989a96841b3a487635","IPY_MODEL_8e3b1e20656248c8a9d92c26028d5026"]}},"c7f7c2744375405298d3b95a6aa2b361":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"bdfee3f0cab649989a96841b3a487635":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_view_name":"ProgressView","style":"IPY_MODEL_e99dd500836c4156a91e7a75b18a683d","_dom_classes":[],"description":"Downloading: 100%","_model_name":"FloatProgressModel","bar_style":"success","max":456318,"_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":456318,"_view_count":null,"_view_module_version":"1.5.0","orientation":"horizontal","min":0,"description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_63d976528be746609f4910ca4b73bfc8"}},"8e3b1e20656248c8a9d92c26028d5026":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_view_name":"HTMLView","style":"IPY_MODEL_91552ff8f58d4563b60b8a1764704aa8","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 456k/456k [00:02<00:00, 200kB/s]","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_cb3db61b459f4082a5c2aca136564999"}},"e99dd500836c4156a91e7a75b18a683d":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_view_name":"StyleView","_model_name":"ProgressStyleModel","description_width":"initial","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","bar_color":null,"_model_module":"@jupyter-widgets/controls"}},"63d976528be746609f4910ca4b73bfc8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"91552ff8f58d4563b60b8a1764704aa8":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"cb3db61b459f4082a5c2aca136564999":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"0a3b0c8f447543899950a6d2ff342a57":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_view_name":"HBoxView","_dom_classes":[],"_model_name":"HBoxModel","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.5.0","box_style":"","layout":"IPY_MODEL_b428499e94534b6ebdc7be8f4adea734","_model_module":"@jupyter-widgets/controls","children":["IPY_MODEL_18501534dc6a496b99e7017805acd781","IPY_MODEL_75c7bd71ca2b4f3b93f53e2355e2856d"]}},"b428499e94534b6ebdc7be8f4adea734":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"18501534dc6a496b99e7017805acd781":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_view_name":"ProgressView","style":"IPY_MODEL_edc9e07a4bc14e4cb9ba9dde5a75d5aa","_dom_classes":[],"description":"Downloading: 100%","_model_name":"FloatProgressModel","bar_style":"success","max":1355256,"_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":1355256,"_view_count":null,"_view_module_version":"1.5.0","orientation":"horizontal","min":0,"description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_75e507e0573244a8a0c1fb9f7704322e"}},"75c7bd71ca2b4f3b93f53e2355e2856d":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_view_name":"HTMLView","style":"IPY_MODEL_c098c5b96cb2449688d1516eae241690","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 1.36M/1.36M [00:09<00:00, 140kB/s]","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_40b5ac5ade1b46e4b04b5eee03840cf6"}},"edc9e07a4bc14e4cb9ba9dde5a75d5aa":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_view_name":"StyleView","_model_name":"ProgressStyleModel","description_width":"initial","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","bar_color":null,"_model_module":"@jupyter-widgets/controls"}},"75e507e0573244a8a0c1fb9f7704322e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"c098c5b96cb2449688d1516eae241690":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"40b5ac5ade1b46e4b04b5eee03840cf6":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"007c3da396954f70a2906377d4792428":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_view_name":"HBoxView","_dom_classes":[],"_model_name":"HBoxModel","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.5.0","box_style":"","layout":"IPY_MODEL_ef838f3ace384c62b4cea145ae3f1def","_model_module":"@jupyter-widgets/controls","children":["IPY_MODEL_e8e2ce51cc244407b612edabe6cb9347","IPY_MODEL_3a81e636511542028f37db75c58b0df3"]}},"ef838f3ace384c62b4cea145ae3f1def":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"e8e2ce51cc244407b612edabe6cb9347":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_view_name":"ProgressView","style":"IPY_MODEL_001f30a0328645bcbae474bd0ecb784c","_dom_classes":[],"description":"Downloading: 100%","_model_name":"FloatProgressModel","bar_style":"success","max":665,"_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":665,"_view_count":null,"_view_module_version":"1.5.0","orientation":"horizontal","min":0,"description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_8af84301f2c4416fa727bfd613b7ba9c"}},"3a81e636511542028f37db75c58b0df3":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_view_name":"HTMLView","style":"IPY_MODEL_a0a0092604a5494b9e5f3947cfe4fd8e","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 665/665 [00:22<00:00, 29.5B/s]","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_de4dbaa8e418452cad8237812ce45aef"}},"001f30a0328645bcbae474bd0ecb784c":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_view_name":"StyleView","_model_name":"ProgressStyleModel","description_width":"initial","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","bar_color":null,"_model_module":"@jupyter-widgets/controls"}},"8af84301f2c4416fa727bfd613b7ba9c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"a0a0092604a5494b9e5f3947cfe4fd8e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"de4dbaa8e418452cad8237812ce45aef":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"9df88e6e1226457e9b58d2171abc5b0b":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_view_name":"HBoxView","_dom_classes":[],"_model_name":"HBoxModel","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.5.0","box_style":"","layout":"IPY_MODEL_54f9353d6516478b8b20ba8a92a14086","_model_module":"@jupyter-widgets/controls","children":["IPY_MODEL_5f357509b9c74c239bf9a50fe3e795e0","IPY_MODEL_22a330ae10f44a98a5e2210e1e0f83fe"]}},"54f9353d6516478b8b20ba8a92a14086":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"5f357509b9c74c239bf9a50fe3e795e0":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_view_name":"ProgressView","style":"IPY_MODEL_cce461c343214f4b8ee48873d9e92722","_dom_classes":[],"description":"Downloading: 100%","_model_name":"FloatProgressModel","bar_style":"success","max":548118077,"_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":548118077,"_view_count":null,"_view_module_version":"1.5.0","orientation":"horizontal","min":0,"description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_de5bdfa89ab84e27928938d29eb3beef"}},"22a330ae10f44a98a5e2210e1e0f83fe":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_view_name":"HTMLView","style":"IPY_MODEL_05763c86c54a4c2ebff74747363eb1f8","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 548M/548M [00:21<00:00, 25.3MB/s]","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_ed5f9c349cc7448db86872a7a0bcaeab"}},"cce461c343214f4b8ee48873d9e92722":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_view_name":"StyleView","_model_name":"ProgressStyleModel","description_width":"initial","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","bar_color":null,"_model_module":"@jupyter-widgets/controls"}},"de5bdfa89ab84e27928938d29eb3beef":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"05763c86c54a4c2ebff74747363eb1f8":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"ed5f9c349cc7448db86872a7a0bcaeab":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}}}}},"cells":[{"cell_type":"markdown","metadata":{"id":"D18pRY2C9NUf"},"source":["# Faster than training from scratch \n","# Fine-tuning the English GPT-2 in any language with Hugging Face and fastai v2 \n","\n","> Tutorial on how to use fastai v2 over Hugging Face's Transformers and Tokenizers libraries to fine-tune an English pre-trained transformer-based language model (GPT-2) to any language other than English"]},{"cell_type":"markdown","metadata":{"id":"C-Cx5Xrk9NUh"},"source":["Notebook is based on work of Pierre Guillou (https://www.linkedin.com/in/pierreguillou)\n","\n","Other resources used:\n","---\n","\n","\n","- Post in medium: [Faster than training from scratch - Fine-tuning the English GPT-2 in any language with Hugging Face and fastai v2 (practical case with Portuguese)](https://medium.com/@pierre_guillou/faster-than-training-from-scratch-fine-tuning-the-english-gpt-2-in-any-language-with-hugging-f2ec05c98787)\n","- Fast notebook: [finetuning-English-GPT2-any-language-Portuguese-HuggingFace-fastaiv2_FAST.ipynb](https://github.com/piegu/fastai-projects/blob/master/finetuning-English-GPT2-any-language-Portuguese-HuggingFace-fastaiv2_FAST.ipynb)\n","- Hugging face model page of [GPorTuguese-2](https://huggingface.co/pierreguillou/gpt2-small-portuguese): a language model for Portuguese text generation (and more NLP tasks...)\n","- Other posts in medium of the GPT-2 series: \n"," - [NLP & fastai | GPT-2](https://medium.com/@pierre_guillou/nlp-fastai-gpt-2-16ee145a4a28)\n"," - [Byte-level BPE, an universal tokenizer but...](https://medium.com/@pierre_guillou/byte-level-bpe-an-universal-tokenizer-but-aff932332ffe)"]},{"cell_type":"markdown","metadata":{"id":"eAs4xPsR9NUj"},"source":["## Overview"]},{"cell_type":"markdown","metadata":{"id":"TOQ1ZSE99NUl"},"source":["In this tutorial, instead of training from scratch, we will see how to fine-tune in just over a day, on one GPU and with a little more than 1GB of training data an English pre-trained [transformer](https://arxiv.org/abs/1706.03762)-based language model to any another language. \n","\n","As a practical case, we fine-tune to Portuguese the [English pre-trained GPT-2](https://github.com/openai/gpt-2) by wrapping the [Transformers](https://github.com/huggingface/transformers) and [Tokenizers](https://github.com/huggingface/tokenizers) libraries of Hugging Face into [fastai v2](https://github.com/fastai/fastai2). We thus create a new language model: [GPorTuguese-2](https://huggingface.co/pierreguillou/gpt2-small-portuguese), a language model for Portuguese text generation (and more NLP tasks...)."]},{"cell_type":"markdown","metadata":{"id":"MXu6MQF-9NUn"},"source":["![The 3 main steps of fine-tuning the English GPT-2 to Portuguese with Hugging Face and fastai v2 (image edited from fast.ai NLP)](images/GPT2_tf_ft_approach.png \"The 3 main steps of fine-tuning the English GPT-2 to Portuguese with Hugging Face and fastai v2 (image edited from fast.ai NLP)\")"]},{"cell_type":"markdown","metadata":{"heading_collapsed":true,"id":"kQrp3wcB9NVL"},"source":["## About the choice of GPT-2"]},{"cell_type":"markdown","metadata":{"hidden":true,"id":"heoybdvR9NVM"},"source":["In order to demonstrate the feasibility of fine-tuning Hugging Face models via fastai v2, we had to choose an emblematic model of the [Transformer revolution](https://arxiv.org/abs/1706.03762) in the NLP since 2017.\n","\n","Thus, between the GPT-2 and [BERT](https://github.com/google-research/bert) models, we chose the GPT-2 model because it has strongly influenced minds beyond the circle of Deep Learning specialists in early 2019 by [writing texts of a quality level close to that of humans](https://openai.com/blog/better-language-models/#samples). Today \"exceeded\" in number of parameters and performance by more recent models like BART, T5 and of course GPT-3 (175 billion parameters!), it remains a reference and a model used in research and applications.\n","For those you want to understand better how GPT-2 works, read the following posts:\n","- [The Illustrated GPT-2 (Visualizing Transformer Language Models)](http://jalammar.github.io/illustrated-gpt2/)\n","- [NLP & fastai | GPT-2](https://medium.com/@pierre_guillou/nlp-fastai-gpt-2-16ee145a4a28)\n","\n","**About the version of GPT-2**\n","\n","There are 3 versions of the GPT-2 model (look at the [transformers documentation](https://huggingface.co/transformers/pretrained_models.html) for more details). Here, **we use the small version**, the one with the smallest number of weights (124 millions, not 117 as written in the original paper) but you can change the model used by changing the content of `pretrained_weights` (if it's not a GPT2 model, you'll need to change the classes used for the model and the tokenizer of course)."]},{"cell_type":"markdown","metadata":{"hidden":true,"id":"guzLJ_st9NVO"},"source":["**More about GPT-2**"]},{"cell_type":"markdown","metadata":{"hidden":true,"id":"vuex9WUD9NVP"},"source":["Source: https://huggingface.co/transformers/model_doc/gpt2.html"]},{"cell_type":"markdown","metadata":{"hidden":true,"id":"FxAZPPFo9NVQ"},"source":["> OpenAI GPT-2 model was proposed in [Language Models are Unsupervised Multitask Learners](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf) by Alec Radford*, Jeffrey Wu*, Rewon Child, David Luan, Dario Amodei** and Ilya Sutskever**. It’s a causal (unidirectional) transformer pre-trained using language modeling on a very large corpus of ~40 GB of text data.\n","\n","> The abstract from the paper is the following: *GPT-2 is a large transformer-based language model with 1.5 billion parameters, trained on a dataset[1] of 8 million web pages. GPT-2 is trained with a simple objective: predict the next word, given all of the previous words within some text. The diversity of the dataset causes this simple goal to contain naturally occurring demonstrations of many tasks across diverse domains. GPT-2 is a direct scale-up of GPT, with more than 10X the parameters and trained on more than 10X the amount of data.*\n","\n","> Tips:\n","> - GPT-2 is a model with absolute position embeddings so it’s usually advised to pad the inputs on the right rather than the left.\n","> - GPT-2 was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next token in a sequence. Leveraging this feature allows GPT-2 to generate syntactically coherent text as it can be observed in the run_generation.py example script.\n","> - The PyTorch models can take the past as input, which is the previously computed key/value attention pairs. Using this past value prevents the model from re-computing pre-computed values in the context of text generation. See [reusing the past in generative models](https://huggingface.co/transformers/quickstart.html#using-the-past) for more information on the usage of this argument.\n","\n","> [Write With Transformer](https://transformer.huggingface.co/doc/gpt2-large) is a webapp created and hosted by Hugging Face showcasing the generative capabilities of several models. GPT-2 is one of them and is available in five different sizes: small, medium, large, xl and a distilled version of the small checkpoint: distilgpt-2.\n","\n",">The original code can be found [here](https://openai.com/blog/better-language-models/)."]},{"cell_type":"markdown","metadata":{"heading_collapsed":true,"id":"DAvDfxhq9NVQ"},"source":["## References"]},{"cell_type":"markdown","metadata":{"heading_collapsed":true,"hidden":true,"id":"6NUQ_V299NVR"},"source":["### GPT-2"]},{"cell_type":"markdown","metadata":{"hidden":true,"id":"n1mGmvkE9NVS"},"source":["- Understanding\n"," - [Better Language Models and Their Implications](https://openai.com/blog/better-language-models/) (OpenAI, 02/14/2019)\n"," - [The Illustrated GPT-2 (Visualizing Transformer Language Models)](http://jalammar.github.io/illustrated-gpt2/)\n"," - [The Annotated GPT-2](https://amaarora.github.io/2020/02/18/annotatedGPT2.html)\n"," - [Understanding the GPT-2 Source Code](https://medium.com/analytics-vidhya/understanding-the-gpt-2-source-code-part-1-4481328ee10b)\n"," - [How To Make Custom AI-Generated Text With GPT-2](https://minimaxir.com/2019/09/howto-gpt2/)\n","- Online Apps\n"," - [Write With Transformer (distilgpt2-small, gpt2small, gpt2medium, gpt2large)](https://transformer.huggingface.co/doc/gpt2-large)\n"," - [Write With DistilGPT-2](https://transformer.huggingface.co/model/distil-gpt2)\n"," - [Generate custom text from an AI using GPT-2 (using the 117M default model)](https://minimaxir.com/apps/gpt2-small/)\n"," - [Allen GPT2 Large Demo](https://demo.allennlp.org/next-token-lm?text=AllenNLP%20is)\n","- Others papers: [The Annotated Transformer](https://nlp.seas.harvard.edu/2018/04/03/attention.html), [Layer Normalization](https://arxiv.org/abs/1607.06450)"]},{"cell_type":"markdown","metadata":{"heading_collapsed":true,"hidden":true,"id":"7bKWxnx19NVT"},"source":["### Datasets in Portuguese"]},{"cell_type":"markdown","metadata":{"hidden":true,"id":"BcPILDYB9NVU"},"source":["- Wikipedia\n"," - (fastai): code from [Vietnamese ULMFiT from scratch](https://github.com/fastai/course-nlp/blob/master/nn-vietnamese.ipynb)\n"," - (Hugging Face): [code from nlp](https://huggingface.co/nlp/viewer/?dataset=wikipedia&config=20200501.pt)\n","- [OSCAR corpus](https://traces1.inria.fr/oscar/): code from [Find a Dataset](https://colab.research.google.com/github/huggingface/blog/blob/master/notebooks/01_how_to_train.ipynb#scrollTo=oK7PPVm2XBgr)"]},{"cell_type":"markdown","metadata":{"heading_collapsed":true,"hidden":true,"id":"i2frlbhv9NVU"},"source":["### Hugging Face"]},{"cell_type":"markdown","metadata":{"hidden":true,"id":"9h14XyS59NVW"},"source":["- Dataset\n"," - [nlp](https://github.com/huggingface/nlp)\n"," - [Colab tutorial](https://colab.research.google.com/github/huggingface/nlp/blob/master/notebooks/Overview.ipynb)\n"," - [Online dataset explorer](https://huggingface.co/nlp/viewer)\n","- Tokenizers\n"," - [Tokenizers](https://github.com/huggingface/tokenizers) (github)\n"," - Source code\n"," - [Source code for transformers.tokenization_gpt2](https://huggingface.co/transformers/_modules/transformers/tokenization_gpt2.html)\n"," - [Source code for transformers.tokenization_utils_base](https://huggingface.co/transformers/_modules/transformers/tokenization_utils_base.html)\n"," - [Source code for transformers.tokenization_utils](https://huggingface.co/transformers/_modules/transformers/tokenization_utils.html)\n"," - [Source code for transformers.tokenization_utils_fast](https://huggingface.co/transformers/_modules/transformers/tokenization_utils_fast.html)\n"," - [classmethod from_pretrained()](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.PreTrainedTokenizer.from_pretrained): Instantiate a PreTrainedTokenizer (or a derived class) from a predefined tokenizer.\n"," - [Source code for transformers.tokenization_gpt2](https://huggingface.co/transformers/_modules/transformers/tokenization_gpt2.html)\n"," - [Hugging Face Tutorials - Training Tokenizer](https://www.kaggle.com/funtowiczmo/hugging-face-tutorials-training-tokenizer)\n"," - [Hugging Face Introduces Tokenizers](https://medium.com/dair-ai/hugging-face-introduces-tokenizers-d792482db360)\n"," - How to train a new language model from scratch using Transformers and Tokenizers (05/15/2020): [blog post](https://huggingface.co/blog/how-to-train) & [colab notebook](https://colab.research.google.com/github/huggingface/blog/blob/master/notebooks/01_how_to_train.ipynb)\n"," - [HuggingFace Tokenizers Cheat Sheet](https://www.kaggle.com/debanga/huggingface-tokenizers-cheat-sheet)\n"," - [Tokenizers: How machines read](https://blog.floydhub.com/tokenization-nlp/) (01/28/2020)\n"," - [Byte Pair Encoding](https://leimao.github.io/blog/Byte-Pair-Encoding/) (07/19/2019)\n"," - [What is a tokenizer?](https://docs.rs/tokenizers/0.10.1/tokenizers/#what-is-a-tokenizer)\n","- Transformers\n"," - [Transformers](https://huggingface.co/transformers/) de Hugging Face & [Transformers github](https://github.com/huggingface/transformers)\n"," - [Glossary](https://huggingface.co/transformers/glossary.html)\n"," - [OpenAI GPT2](https://huggingface.co/transformers/model_doc/gpt2.html#openai-gpt2)\n"," - Source code\n"," - [Source code for transformers.modeling_gpt2](https://huggingface.co/transformers/_modules/transformers/modeling_gpt2.html)\n"," - [Source code for transformers.configuration_gpt2](https://huggingface.co/transformers/_modules/transformers/configuration_gpt2.html)\n"," - [DistilBERT](https://medium.com/huggingface/distilbert-8cf3380435b5), [DistilGPT2](https://huggingface.co/distilgpt2) & [Download Model: distilgpt2](https://huggingface.co/distilgpt2)\n"," - [Train a GPT-2 Text-Generating Model w/ GPU For Free](https://colab.research.google.com/drive/1VLG8e7YSEwypxU-noRNhsv5dW4NfTGce#scrollTo=H7LoMj4GA4n_) (colab notebook, 11/10/2019)\n"," - How to generate text: using different decoding methods for language generation with Transformers (03/18/2020, Hugging Face): [blog post](https://huggingface.co/blog/how-to-generate) and [colab notebook](https://colab.research.google.com/github/huggingface/blog/blob/master/notebooks/02_how_to_generate.ipynb) "]},{"cell_type":"markdown","metadata":{"hidden":true,"id":"j6WB2Qyy9NVX"},"source":["### Pytorch, fastai & Transformers (Hugging Face)"]},{"cell_type":"markdown","metadata":{"hidden":true,"id":"FcYAwd0A9NVY"},"source":["- [Sequence-to-Sequence Modeling with nn.Transformer and TorchText](https://pytorch.org/tutorials/beginner/transformer_tutorial.html#sequence-to-sequence-modeling-with-nn-transformer-and-torchtext)\n","- [Fastai v2](https://dev.fast.ai) (Deep Learning library on PyTorch) & [Hugging face](https://huggingface.co/)\n","- [blurr](https://ohmeow.github.io/blurr/): a library that integrates huggingface transformers with version 2 of the fastai framework\n","- fastai v2\n"," - Integration of the GPT2 model into fastai v2: code from [Tutorial - Transformers](https://dev.fast.ai/tutorial.transformers) and [10_nlp.ipynb](https://github.com/fastai/fastbook/blob/master/10_nlp.ipynb) (how to fine-tune an NLP model with fastai v2)\n"," - FastHugs\n"," - [FastHugs in the fastai forum](https://forums.fast.ai/t/fasthugs-fastai-v2-and-huggingface-transformers/63681)\n"," - [FastHugs: Language Modelling with Tranformers and Fastai](https://www.ntentional.com/nlp/transformers/training%20technique/classification/2020/04/24/fasthugs_language_model.html) (04/24/2020, fastai v2)\n"," - [FastHugs: Sequence Classification with Transformers and Fastai](https://www.ntentional.com/nlp/training%20technique/classification/2020/04/17/fasthugs_seq_classification.html) (04/17/2020, fastai v2)\n","- fastai v1\n"," - [A Tutorial to Fine-Tuning BERT with Fast AI](http://mlexplained.com/2019/05/13/a-tutorial-to-fine-tuning-bert-with-fast-ai/) (05/15/2019, fastai v1)\n"," - [Fastai integration with BERT: Multi-label text classification identifying toxicity in texts](https://medium.com/@abhikjha/fastai-integration-with-bert-a0a66b1cecbe) (07/17/2019, fastai v1)\n"," - [When Pytorch-transformers meets Fastai (w/ Google Colab)](https://towardsdatascience.com/best-of-two-worlds-pytorch-transformers-meets-fastai-5fd51ef34b0f) (08/26/2019, fastai v1)\n"," - [Using RoBERTa with Fastai for NLP](https://medium.com/analytics-vidhya/using-roberta-with-fastai-for-nlp-7ed3fed21f6c) (09/02/2019, fastai v1)\n"," - [RoBERTa with Fastai](https://www.kaggle.com/abhikjha/roberta-with-fastai) (11/14/2019, fastai v1)\n"," - [Fastai with 🤗Transformers (BERT, RoBERTa, XLNet, XLM, DistilBERT)](https://towardsdatascience.com/fastai-with-transformers-bert-roberta-xlnet-xlm-distilbert-4f41ee18ecb2) (11/27/2019, fastai v1): A tutorial to implement state-of-the-art NLP models with Fastai for Sentiment Analysis ([notebook](https://www.kaggle.com/maroberti/fastai-with-transformers-bert-roberta))\n"," - [RoBERTa (fastai, HuggingFace 🤗Transformers)](https://www.kaggle.com/melissarajaram/roberta-fastai-huggingface-transformers/execution) (01/17/2020, fastai v1)"]},{"cell_type":"markdown","metadata":{"id":"6ITOliM_9NVa"},"source":["## Main coding steps to fine-tune a Hugging Face language model with fastai v2"]},{"cell_type":"markdown","metadata":{"id":"dKtXMsgj9NVa"},"source":["The 6 main steps detailed below can be summarized in 3 main ones:\n","\n","1. **Initialization & download** (download of Portuguese Wikipedia and GPT-2 English pre-trained model and tokenizer)\n","2. **GPT-2 tokenizer with a Portuguese vocab** (train a GPT-2 tokenizer with a vocab in Portuguese, wrap it into a fastai v2 tokenizer and update the embeddings matrix of the GPT-2 English pre-trained model according to the new Portuguese vocab: keep the embeddings vectors of the common tokens between English and Portuguese vocabs)\n","3. **Fine-tune on Portuguese Wikipedia the GPT-2 model with fastai v2 training functionalities**"]},{"cell_type":"code","metadata":{"id":"_XZCyJqS9pJt"},"source":["# extra small thing to setup drives paths etc written "],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"VAK6Bn0Aw4_u"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"VI_AUR8K9ncO","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616061296952,"user_tz":-60,"elapsed":30851,"user":{"displayName":"Mark Lina","photoUrl":"","userId":"17651129667533642938"}},"outputId":"c1d01012-4596-4c52-ef71-bf2e32ea8d76"},"source":["#start by mounting google drive\n","from google.colab import drive, files\n","drive.mount('/content/gdrive', force_remount=True)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Mounted at /content/gdrive\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"yzT-SC9hmTKG","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615837934244,"user_tz":-60,"elapsed":167861,"user":{"displayName":"Mark Lina","photoUrl":"","userId":"17651129667533642938"}},"outputId":"36c1fb8b-6b9e-4fb1-be39-8b68fdfbfa23"},"source":["# need to instal fastai 2 etc before \n","!pip install -q git+https://github.com/fastai/fastai\n","!pip install -q git+https://github.com/fastai/fastcore\n","!pip install -q iterative-stratification"],"execution_count":null,"outputs":[{"output_type":"stream","text":["\u001b[K |████████████████████████████████| 61kB 3.0MB/s \n","\u001b[K |████████████████████████████████| 12.8MB 326kB/s \n","\u001b[K |████████████████████████████████| 776.8MB 21kB/s \n","\u001b[?25h Building wheel for fastai (setup.py) ... \u001b[?25l\u001b[?25hdone\n","\u001b[31mERROR: torchtext 0.9.0 has requirement torch==1.8.0, but you'll have torch 1.7.1 which is incompatible.\u001b[0m\n"," Building wheel for fastcore (setup.py) ... \u001b[?25l\u001b[?25hdone\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"g-Zna9nuQE8C","executionInfo":{"status":"ok","timestamp":1616061331938,"user_tz":-60,"elapsed":582,"user":{"displayName":"Mark Lina","photoUrl":"","userId":"17651129667533642938"}},"outputId":"ec9b1082-42d2-4773-ece4-ad60c5f567f7"},"source":["cd /content/gdrive/MyDrive/fastai"],"execution_count":null,"outputs":[{"output_type":"stream","text":["/content/gdrive/MyDrive/fastai\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"a4VVvLIhQBjv"},"source":["from nlputilsfastai import * # augumented py file ---> from fastai.basics import * # was fastai2"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":309},"id":"6prc1OyVI221","executionInfo":{"status":"ok","timestamp":1615837583603,"user_tz":-60,"elapsed":2993,"user":{"displayName":"Mark Lina","photoUrl":"","userId":"17651129667533642938"}},"outputId":"d9fc6e69-482f-496f-e52c-f49b41a3694b"},"source":["# !pip install fastcore==1.3.8"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Collecting fastcore==1.3.8\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/26/53/d79c0f942f8bb44903108462541130b53fc7b4d744b1b5df9127b0b524d6/fastcore-1.3.8-py3-none-any.whl (48kB)\n","\r\u001b[K |██████▉ | 10kB 19.8MB/s eta 0:00:01\r\u001b[K |█████████████▋ | 20kB 25.6MB/s eta 0:00:01\r\u001b[K |████████████████████▍ | 30kB 23.5MB/s eta 0:00:01\r\u001b[K |███████████████████████████▏ | 40kB 26.4MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 51kB 5.8MB/s \n","\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from fastcore==1.3.8) (20.9)\n","Requirement already satisfied: pip in /usr/local/lib/python3.7/dist-packages (from fastcore==1.3.8) (19.3.1)\n","Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->fastcore==1.3.8) (2.4.7)\n","Installing collected packages: fastcore\n"," Found existing installation: fastcore 1.3.20\n"," Uninstalling fastcore-1.3.20:\n"," Successfully uninstalled fastcore-1.3.20\n","Successfully installed fastcore-1.3.8\n"],"name":"stdout"},{"output_type":"display_data","data":{"application/vnd.colab-display-data+json":{"pip_warning":{"packages":["fastcore"]}}},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"qMQ9fAujMD74"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"qBjPMyIYTHjr"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"CmKm8BETBpB2"},"source":["# 1. Installing required libraries and mounting google drive"]},{"cell_type":"code","metadata":{"id":"nqp4kpUG9tsV","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616332781966,"user_tz":-60,"elapsed":20545,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"e8c9b1b7-5c52-43dc-d8ef-a486e3e82649"},"source":["#start by mounting google drive\n","from google.colab import drive, files\n","drive.mount('/content/gdrive', force_remount=True)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Mounted at /content/gdrive\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"M2y5n_KU99lq","executionInfo":{"status":"ok","timestamp":1616332996058,"user_tz":-60,"elapsed":234624,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"29073dbc-db55-408b-ddfa-112eb14dcd1e"},"source":["# need to instal fastai 2 etc before \n","%%time\n","!pip install -q git+https://github.com/fastai/fastai\n","!pip install -q git+https://github.com/fastai/fastcore\n","!pip install -q iterative-stratification\n","!pip install --upgrade tables"],"execution_count":null,"outputs":[{"output_type":"stream","text":["\u001b[K |████████████████████████████████| 61kB 3.3MB/s \n","\u001b[K |████████████████████████████████| 12.8MB 251kB/s \n","\u001b[K |████████████████████████████████| 776.8MB 22kB/s \n","\u001b[?25h Building wheel for fastai (setup.py) ... \u001b[?25l\u001b[?25hdone\n","\u001b[31mERROR: torchtext 0.9.0 has requirement torch==1.8.0, but you'll have torch 1.7.1 which is incompatible.\u001b[0m\n"," Building wheel for fastcore (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Collecting tables\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/0f/cb/4097be890a773af95343389faa8c283b0d9ff606f144227a548461dcbdd5/tables-3.6.1-cp37-cp37m-manylinux1_x86_64.whl (4.3MB)\n","\u001b[K |████████████████████████████████| 4.3MB 5.6MB/s \n","\u001b[?25hRequirement already satisfied, skipping upgrade: numexpr>=2.6.2 in /usr/local/lib/python3.7/dist-packages (from tables) (2.7.3)\n","Requirement already satisfied, skipping upgrade: numpy>=1.9.3 in /usr/local/lib/python3.7/dist-packages (from tables) (1.19.5)\n","Installing collected packages: tables\n"," Found existing installation: tables 3.4.4\n"," Uninstalling tables-3.4.4:\n"," Successfully uninstalled tables-3.4.4\n","Successfully installed tables-3.6.1\n","CPU times: user 1.43 s, sys: 391 ms, total: 1.82 s\n","Wall time: 3min 34s\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"P-2JYQnD9N6F"},"source":["# 2. Initialization"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"A7Z0xlgv-TvJ","executionInfo":{"status":"ok","timestamp":1616332996060,"user_tz":-60,"elapsed":145493,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"c5558794-aff1-427a-de12-8f3214654f4c"},"source":["cd /content/gdrive/MyDrive/fastai"],"execution_count":null,"outputs":[{"output_type":"stream","text":["/content/gdrive/MyDrive/fastai\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"nbpresent":{"id":"151cd18f-76e3-440f-a8c7-ffa5c6b5da01"},"id":"RsJYkiK99N6G"},"source":["# from fastai2.text.all import *\n","# from nlputils_fastai2 import * \n","\n","from fastai.text.all import *\n","from nlputilsfastai import * \n","\n","%reload_ext autoreload\n","%autoreload 2\n","%matplotlib inline"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"lHjl3W7HBdej","executionInfo":{"status":"ok","timestamp":1616333001350,"user_tz":-60,"elapsed":150772,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"7d34a178-4200-4013-d710-77808b9fd9c5"},"source":["gpu = 0\n","torch.cuda.set_device(gpu)\n","print(f'cuda device: {torch.cuda.current_device()}')\n","print(f'cuda device name: {torch.cuda.get_device_name(gpu)}')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["cuda device: 0\n","cuda device name: Tesla K80\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"gK0-GZGhC4nF","executionInfo":{"status":"ok","timestamp":1616333001351,"user_tz":-60,"elapsed":150770,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"cef0df9a-bdc3-4902-d73f-3154389b6e60"},"source":["!nvidia-smi"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Sun Mar 21 13:23:20 2021 \n","+-----------------------------------------------------------------------------+\n","| NVIDIA-SMI 460.56 Driver Version: 460.32.03 CUDA Version: 11.2 |\n","|-------------------------------+----------------------+----------------------+\n","| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n","| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n","| | | MIG M. |\n","|===============================+======================+======================|\n","| 0 Tesla K80 Off | 00000000:00:04.0 Off | 0 |\n","| N/A 44C P8 31W / 149W | 3MiB / 11441MiB | 0% Default |\n","| | | N/A |\n","+-------------------------------+----------------------+----------------------+\n"," \n","+-----------------------------------------------------------------------------+\n","| Processes: |\n","| GPU GI CI PID Type Process name GPU Memory |\n","| ID ID Usage |\n","|=============================================================================|\n","| No running processes found |\n","+-----------------------------------------------------------------------------+\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"pZIeiQm9Cya8"},"source":["Load standard snipet to prevent random disconnects\n","This cell runs JS code to automatic reconnect to runtime."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"T-N9y4S6C1I5","executionInfo":{"status":"ok","timestamp":1616333001352,"user_tz":-60,"elapsed":150763,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"e8c3979a-173f-456f-fa89-94e36b4b0810"},"source":["import IPython\n","from google.colab import output\n","\n","display(IPython.display.Javascript('''\n"," function ClickConnect(){\n"," btn = document.querySelector(\"colab-connect-button\")\n"," if (btn != null){\n"," console.log(\"Click colab-connect-button\"); \n"," btn.click() \n"," }\n"," \n"," btn = document.getElementById('ok')\n"," if (btn != null){\n"," console.log(\"Click reconnect\"); \n"," btn.click() \n"," }\n"," }\n"," \n","setInterval(ClickConnect,60000)\n","'''))\n","\n","print(\"Done.\")"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"application/javascript":["\n"," function ClickConnect(){\n"," btn = document.querySelector(\"colab-connect-button\")\n"," if (btn != null){\n"," console.log(\"Click colab-connect-button\"); \n"," btn.click() \n"," }\n"," \n"," btn = document.getElementById('ok')\n"," if (btn != null){\n"," console.log(\"Click reconnect\"); \n"," btn.click() \n"," }\n"," }\n"," \n","setInterval(ClickConnect,60000)\n"],"text/plain":[""]},"metadata":{"tags":[]}},{"output_type":"stream","text":["Done.\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"o2qnh-a79N6H","executionInfo":{"status":"ok","timestamp":1616333001353,"user_tz":-60,"elapsed":150759,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"3933bc50-99a5-4778-c065-79d6c19e8926"},"source":["# Get config of fastai2 paths\n","config = Config()\n","config.d"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["{'archive_path': '/root/.fastai/archive',\n"," 'data_path': '/root/.fastai/data',\n"," 'model_path': '/root/.fastai/models',\n"," 'storage_path': '/tmp',\n"," 'version': 2}"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"markdown","metadata":{"nbpresent":{"id":"cf070ab7-babb-4cf0-a315-401f65461dc8"},"id":"5pL4tfG49N6I"},"source":["This will create a `{lang}wiki` folder, containing a `{lang}wiki` text file with the wikipedia contents (for other languages, replace `{lang}` with the appropriate code from the [list of wikipedias](https://meta.wikimedia.org/wiki/List_of_Wikipedias))."]},{"cell_type":"code","metadata":{"id":"2ShRUXWj_NoG"},"source":["# setup new path_data and create the corresponding folder\n","lang = 'pl'\n","name = f'{lang}wiki'\n","data_path = config['data_path']\n","path_data = data_path/name\n","path_data.mkdir(exist_ok=True, parents=True)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"NykPqucXA0hF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616333001355,"user_tz":-60,"elapsed":150755,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"1b2d0fb5-61f5-401b-f2c3-783c56c9c517"},"source":["cd /content/gdrive/MyDrive/fastai"],"execution_count":null,"outputs":[{"output_type":"stream","text":["/content/gdrive/MyDrive/fastai\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"9YmkrjvBDPPr","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616333001356,"user_tz":-60,"elapsed":150752,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"02fdd4b6-5ce1-41a8-d688-25c5960eb098"},"source":["data_path, path_data"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(Path('/root/.fastai/data'), Path('/root/.fastai/data/plwiki'))"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"egNzIEWtylqk"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"NBx8vCFkBtDu"},"source":["# 3. Loading previously prepared scraped wiki file ~1G for particular language\n","for that purpose another notebook was used [wiki download](https://github.com/len-sla/other/blob/main/wiki_download.ipynb)"]},{"cell_type":"code","metadata":{"id":"pyZnd8Srze_Z"},"source":["!cp /content/gdrive/MyDrive/fastai/all_texts_plwiki.csv /root/.fastai/data/plwiki\n","!cp /content/gdrive/MyDrive/fastai/all_texts_plwiki.txt /root/.fastai/data/plwiki"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"N-2ccH-IAuUC","executionInfo":{"status":"ok","timestamp":1616333093899,"user_tz":-60,"elapsed":230141,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"9d93dfcb-8fc4-4351-bbaf-3a7a9329e9cf"},"source":["!du -hs {'/content/gdrive/MyDrive/fastai/all_texts_plwiki.csv'}"],"execution_count":null,"outputs":[{"output_type":"stream","text":["1.1G\t/content/gdrive/MyDrive/fastai/all_texts_plwiki.csv\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"fdlfLPzP_3Z0","executionInfo":{"status":"ok","timestamp":1616333132791,"user_tz":-60,"elapsed":269026,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"44366af8-2e01-45d1-e611-c5248f99f27f"},"source":["df = pd.read_csv('/content/gdrive/MyDrive/fastai/all_texts_plwiki.csv')\n","df.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
text
0Henry Wager Halleck (ur. 16 stycznia 1815, zm. 9 stycznia 1872) – amerykański wojskowy, naukowiec i prawnik, oficer United States Army.\\n\\n, znany pod – obraźliwym później – przydomkiem „Old Brains”, brał czynny udział w dziele przyłączenia Kalifornii jako stanu. Z powodzeniem praktykował jako prawnik i deweloper. Na początku wojny secesyjnej, był naczelnym dowódcą Armii Unii na zachodnim teatrze działań, a jednocześnie – przez prawie dwa lata – głównodowodzącym wszystkich armii USA. „Awansował” na szefa sztabu armii, gdy generał-porucznik Ulysses Grant, były podkomendny Hallecka na zachod...
1Kościół Najświętszej Marii Panny (\"in summo\") w Poznaniu – zabytkowy gotycki kościół na Ostrowie Tumskim wraz z resztkami wczesnopiastowskiego palatium.\\n\\nW dzisiejszym kształcie powstał w połowie XV wieku, jednak jego historia rozpoczyna się około 965 roku, gdy po przybyciu Dobrawy wzniesiono na Ostrowie Tumskim kaplicę zamkową. W dokumentach kościół Najświętszej Marii Panny pod swoim dzisiejszym wezwaniem pojawia się po raz pierwszy w 1247. \\n\\nWedług najnowszych badań prawdopodobnie pod prezbiterium znajdują się fundamenty rotundy pełniącej funkcję kaplicy, pewnym jest natomiast istnie...
2Gieorgij Andriejewicz Mołczanow (ros. Георгий Андреевич Молчанов, ur. 3 kwietnia 1897 w Charkowie, zm. 9 października 1937 w miejscu egzekucji Kommunarka) – funkcjonariusz radzieckiej policji politycznej, komisarz bezpieczeństwa państwowego II rangi, ludowy komisarz spraw wewnętrznych Białoruskiej SRR (1936-1937).\\n\\nUrodzony w rodzinie rosyjskiej. Do 1917 uczył się w szkole handlowej w Charkowie, od listopada 1917 do czerwca 1918 był żołnierzem i członkiem sztabu Głównodowodzącego Wojsk Południa Rosji Antonowa-Owsiejenki, później pracował w sztabie Frontu Wschodniego. \\n\\nOd grudnia 1917 ...
3José Manuel Durão Barroso (wym. []; ur. 23 marca 1956 w Lizbonie) – portugalski polityk, prawnik i nauczyciel akademicki. W latach 1992–1995 minister spraw zagranicznych w rządzie Aníbal Cavaco Silvy, od 1999 do 2004 przewodniczący Partii Socjaldemokratycznej. Premier Portugalii od 6 kwietnia 2002 do 17 lipca 2004. Od 22 listopada 2004 do 31 października 2014 przewodniczący Komisji Europejskiej.\\n\\nUkończył prawo na Uniwersytecie Lizbońskim, a także studia europejskie na Uniwersytecie Genewskim, na którym uzyskał również magisterium w zakresie nauk politycznych. Pracował jako nauczyciel ak...
4Laodika I (gr. \"Λαοδίκη\", \"Laodíkē\") (zm. po 242 p.n.e.) – córka Achajosa Starszego z dynastii Seleucydów, brata Antiocha I Sotera, pierwsza żona brata stryjecznego Antiocha II Theosa, króla państwa Seleucydów, syna Antiocha I Sotera.\\n\\nW czasie II wojny syryjskiej (258-248 p.n.e.) jej mąż Antioch II Theos, jako sprzymierzeniec Macedonii walczył przeciwko Egiptowi. W wyniku tej wojny Antioch II zawarł porozumienie z królem Egiptu Ptolemeuszem II Filadelfem w r. 250 p.n.e. Miał się wyprzeć żony Laodiki I i wspólnych z nią dzieci, a poślubić jego córkę Berenikę oraz zdeklarować się uczynić ...
\n","
"],"text/plain":[" text\n","0 Henry Wager Halleck (ur. 16 stycznia 1815, zm. 9 stycznia 1872) – amerykański wojskowy, naukowiec i prawnik, oficer United States Army.\\n\\n, znany pod – obraźliwym później – przydomkiem „Old Brains”, brał czynny udział w dziele przyłączenia Kalifornii jako stanu. Z powodzeniem praktykował jako prawnik i deweloper. Na początku wojny secesyjnej, był naczelnym dowódcą Armii Unii na zachodnim teatrze działań, a jednocześnie – przez prawie dwa lata – głównodowodzącym wszystkich armii USA. „Awansował” na szefa sztabu armii, gdy generał-porucznik Ulysses Grant, były podkomendny Hallecka na zachod...\n","1 Kościół Najświętszej Marii Panny (\"in summo\") w Poznaniu – zabytkowy gotycki kościół na Ostrowie Tumskim wraz z resztkami wczesnopiastowskiego palatium.\\n\\nW dzisiejszym kształcie powstał w połowie XV wieku, jednak jego historia rozpoczyna się około 965 roku, gdy po przybyciu Dobrawy wzniesiono na Ostrowie Tumskim kaplicę zamkową. W dokumentach kościół Najświętszej Marii Panny pod swoim dzisiejszym wezwaniem pojawia się po raz pierwszy w 1247. \\n\\nWedług najnowszych badań prawdopodobnie pod prezbiterium znajdują się fundamenty rotundy pełniącej funkcję kaplicy, pewnym jest natomiast istnie...\n","2 Gieorgij Andriejewicz Mołczanow (ros. Георгий Андреевич Молчанов, ur. 3 kwietnia 1897 w Charkowie, zm. 9 października 1937 w miejscu egzekucji Kommunarka) – funkcjonariusz radzieckiej policji politycznej, komisarz bezpieczeństwa państwowego II rangi, ludowy komisarz spraw wewnętrznych Białoruskiej SRR (1936-1937).\\n\\nUrodzony w rodzinie rosyjskiej. Do 1917 uczył się w szkole handlowej w Charkowie, od listopada 1917 do czerwca 1918 był żołnierzem i członkiem sztabu Głównodowodzącego Wojsk Południa Rosji Antonowa-Owsiejenki, później pracował w sztabie Frontu Wschodniego. \\n\\nOd grudnia 1917 ...\n","3 José Manuel Durão Barroso (wym. []; ur. 23 marca 1956 w Lizbonie) – portugalski polityk, prawnik i nauczyciel akademicki. W latach 1992–1995 minister spraw zagranicznych w rządzie Aníbal Cavaco Silvy, od 1999 do 2004 przewodniczący Partii Socjaldemokratycznej. Premier Portugalii od 6 kwietnia 2002 do 17 lipca 2004. Od 22 listopada 2004 do 31 października 2014 przewodniczący Komisji Europejskiej.\\n\\nUkończył prawo na Uniwersytecie Lizbońskim, a także studia europejskie na Uniwersytecie Genewskim, na którym uzyskał również magisterium w zakresie nauk politycznych. Pracował jako nauczyciel ak...\n","4 Laodika I (gr. \"Λαοδίκη\", \"Laodíkē\") (zm. po 242 p.n.e.) – córka Achajosa Starszego z dynastii Seleucydów, brata Antiocha I Sotera, pierwsza żona brata stryjecznego Antiocha II Theosa, króla państwa Seleucydów, syna Antiocha I Sotera.\\n\\nW czasie II wojny syryjskiej (258-248 p.n.e.) jej mąż Antioch II Theos, jako sprzymierzeniec Macedonii walczył przeciwko Egiptowi. W wyniku tej wojny Antioch II zawarł porozumienie z królem Egiptu Ptolemeuszem II Filadelfem w r. 250 p.n.e. Miał się wyprzeć żony Laodiki I i wspólnych z nią dzieci, a poślubić jego córkę Berenikę oraz zdeklarować się uczynić ..."]},"metadata":{"tags":[]},"execution_count":14}]},{"cell_type":"markdown","metadata":{"id":"Zad8TrPNybqT"},"source":["# 4. Loading ready polish tokenizer( previosuly prepared)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"S90iGQ1i1Dhi","executionInfo":{"status":"ok","timestamp":1616333142614,"user_tz":-60,"elapsed":278526,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"08db3d0f-a3f6-435e-fc73-789c972f9970"},"source":["%%time\n","!pip install transformers\n","!pip freeze | grep transformers"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Collecting transformers\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/ed/d5/f4157a376b8a79489a76ce6cfe147f4f3be1e029b7144fa7b8432e8acb26/transformers-4.4.2-py3-none-any.whl (2.0MB)\n","\u001b[K |████████████████████████████████| 2.0MB 4.2MB/s \n","\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from transformers) (20.9)\n","Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.0.12)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.19.5)\n","Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from transformers) (3.7.2)\n","Collecting sacremoses\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)\n","\u001b[K |████████████████████████████████| 890kB 38.8MB/s \n","\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.41.1)\n","Collecting tokenizers<0.11,>=0.10.1\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/71/23/2ddc317b2121117bf34dd00f5b0de194158f2a44ee2bf5e47c7166878a97/tokenizers-0.10.1-cp37-cp37m-manylinux2010_x86_64.whl (3.2MB)\n","\u001b[K |████████████████████████████████| 3.2MB 39.6MB/s \n","\u001b[?25hRequirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->transformers) (2.4.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2020.12.5)\n","Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n","Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n","Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n","Requirement already satisfied: typing-extensions>=3.6.4; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from importlib-metadata; python_version < \"3.8\"->transformers) (3.7.4.3)\n","Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata; python_version < \"3.8\"->transformers) (3.4.1)\n","Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.15.0)\n","Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n","Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.0.1)\n","Building wheels for collected packages: sacremoses\n"," Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for sacremoses: filename=sacremoses-0.0.43-cp37-none-any.whl size=893262 sha256=f5a5b523d9fb1ed0c922ce2644030484b0826823cca49a932f1d58ed343f2c13\n"," Stored in directory: /root/.cache/pip/wheels/29/3c/fd/7ce5c3f0666dab31a50123635e6fb5e19ceb42ce38d4e58f45\n","Successfully built sacremoses\n","Installing collected packages: sacremoses, tokenizers, transformers\n","Successfully installed sacremoses-0.0.43 tokenizers-0.10.1 transformers-4.4.2\n","transformers==4.4.2\n","CPU times: user 39.9 ms, sys: 117 ms, total: 157 ms\n","Wall time: 8.29 s\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","referenced_widgets":["7f0767ff35a344b0b6da17083132427e","596851de3e714ab8942a72f69b18a63c","f0a9328a838041c0979b2b709b0b42a0","156da666d81b4026941d5acb5b487128","87da6b072be3484b98e01b534d1a751a","1ae0380be03140089589c6d485e7c1d0","f8b5835af84c482092be9d849cb9bd02","60b9f541de2648f08b7ad6257f32499b","7d5d97ff327f4d6aa5b44fc7656f07d8","c7f7c2744375405298d3b95a6aa2b361","bdfee3f0cab649989a96841b3a487635","8e3b1e20656248c8a9d92c26028d5026","e99dd500836c4156a91e7a75b18a683d","63d976528be746609f4910ca4b73bfc8","91552ff8f58d4563b60b8a1764704aa8","cb3db61b459f4082a5c2aca136564999","0a3b0c8f447543899950a6d2ff342a57","b428499e94534b6ebdc7be8f4adea734","18501534dc6a496b99e7017805acd781","75c7bd71ca2b4f3b93f53e2355e2856d","edc9e07a4bc14e4cb9ba9dde5a75d5aa","75e507e0573244a8a0c1fb9f7704322e","c098c5b96cb2449688d1516eae241690","40b5ac5ade1b46e4b04b5eee03840cf6"]},"id":"x8q1Ck6J9N6W","executionInfo":{"status":"ok","timestamp":1616333150282,"user_tz":-60,"elapsed":286189,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"f8ba0d38-d5f2-412f-a82e-6044395387cc"},"source":["%%time\n","from transformers import GPT2TokenizerFast\n","\n","pretrained_weights = 'gpt2'\n","tokenizer_en = GPT2TokenizerFast.from_pretrained(pretrained_weights)"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"7f0767ff35a344b0b6da17083132427e","version_minor":0,"version_major":2},"text/plain":["HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1042301.0, style=ProgressStyle(descript…"]},"metadata":{"tags":[]}},{"output_type":"stream","text":["\n"],"name":"stdout"},{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"7d5d97ff327f4d6aa5b44fc7656f07d8","version_minor":0,"version_major":2},"text/plain":["HBox(children=(FloatProgress(value=0.0, description='Downloading', max=456318.0, style=ProgressStyle(descripti…"]},"metadata":{"tags":[]}},{"output_type":"stream","text":["\n"],"name":"stdout"},{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"0a3b0c8f447543899950a6d2ff342a57","version_minor":0,"version_major":2},"text/plain":["HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1355256.0, style=ProgressStyle(descript…"]},"metadata":{"tags":[]}},{"output_type":"stream","text":["\n","CPU times: user 665 ms, sys: 123 ms, total: 788 ms\n","Wall time: 9.07 s\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"25v40IRy01Ye"},"source":["# To correct the warning about token_pad (GPT2TokenizerFast), run the following code\n","# source: https://github.com/huggingface/transformers/issues/2648#issuecomment-616177044\n","tokenizer_en.pad_token = tokenizer_en.eos_token"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"IErVmjWa01Vl"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"scrolled":true,"colab":{"base_uri":"https://localhost:8080/"},"id":"LSV4oDRI9N6W","executionInfo":{"status":"ok","timestamp":1616333150290,"user_tz":-60,"elapsed":286187,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"5978839c-4c53-486d-bbbe-b6c38938e687"},"source":["# source: https://huggingface.co/transformers/_modules/transformers/tokenization_utils_fast.html\n","\n","print('---------- vocab ----------')\n","print()\n","\n","print('vocab_files_names:',tokenizer_en.vocab_files_names)\n","print()\n","\n","for k,v in tokenizer_en.pretrained_vocab_files_map.items():\n"," print(k)\n"," for kk,vv in v.items():\n"," print('- ',kk,':',vv)\n"," print()\n"," \n","print('vocab_size:',tokenizer_en.vocab_size)\n","print()\n","#print(tokenizer_en.get_vocab())\n","\n","num = 50\n","print(f'First {num} items of the vocab: {dict(itertools.islice(tokenizer_en.get_vocab().items(), 20))}')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["---------- vocab ----------\n","\n","vocab_files_names: {'vocab_file': 'vocab.json', 'merges_file': 'merges.txt', 'tokenizer_file': 'tokenizer.json'}\n","\n","vocab_file\n","- gpt2 : https://huggingface.co/gpt2/resolve/main/vocab.json\n","- gpt2-medium : https://huggingface.co/gpt2-medium/resolve/main/vocab.json\n","- gpt2-large : https://huggingface.co/gpt2-large/resolve/main/vocab.json\n","- gpt2-xl : https://huggingface.co/gpt2-xl/resolve/main/vocab.json\n","- distilgpt2 : https://huggingface.co/distilgpt2/resolve/main/vocab.json\n","\n","merges_file\n","- gpt2 : https://huggingface.co/gpt2/resolve/main/merges.txt\n","- gpt2-medium : https://huggingface.co/gpt2-medium/resolve/main/merges.txt\n","- gpt2-large : https://huggingface.co/gpt2-large/resolve/main/merges.txt\n","- gpt2-xl : https://huggingface.co/gpt2-xl/resolve/main/merges.txt\n","- distilgpt2 : https://huggingface.co/distilgpt2/resolve/main/merges.txt\n","\n","tokenizer_file\n","- gpt2 : https://huggingface.co/gpt2/resolve/main/tokenizer.json\n","- gpt2-medium : https://huggingface.co/gpt2-medium/resolve/main/tokenizer.json\n","- gpt2-large : https://huggingface.co/gpt2-large/resolve/main/tokenizer.json\n","- gpt2-xl : https://huggingface.co/gpt2-xl/resolve/main/tokenizer.json\n","- distilgpt2 : https://huggingface.co/distilgpt2/resolve/main/tokenizer.json\n","\n","vocab_size: 50257\n","\n","First 50 items of the vocab: {'Ġtopple': 49377, 'Ġblocked': 10226, '258': 25600, 'Ġaddicted': 28357, 'Ġdrum': 13026, 'It': 1026, 'ought': 2917, 'rib': 822, 'Ġhomemade': 24584, 'Ġnoises': 26782, 'ÙĨ': 23338, 'Ġprinces': 42676, 'Ġconvenience': 15607, 'Ġearthquake': 16295, 'vim': 31124, 'ĠJefferson': 15375, 'ãģĻ': 33623, 'Ġrebellious': 43860, 'adoes': 46368, 'anton': 23026}\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"vspJiJsr01P8","executionInfo":{"status":"ok","timestamp":1616333154292,"user_tz":-60,"elapsed":290187,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"0515893a-3a1a-4e74-d5fe-681528af233e"},"source":["!pip install tokenizers\n","!pip freeze | grep tokenizers"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Requirement already satisfied: tokenizers in /usr/local/lib/python3.7/dist-packages (0.10.1)\n","tokenizers==0.10.1\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"2om1-fULxN6Y"},"source":["# creating directory for tokenizer\n","ByteLevelBPE_tokenizer_pl_rep = 'ByteLevelBPE_tokenizer_pl'\n","path_to_ByteLevelBPE_tokenizer_pl_rep = path_data/ByteLevelBPE_tokenizer_pl_rep\n","if not (path_to_ByteLevelBPE_tokenizer_pl_rep).exists():\n"," path_to_ByteLevelBPE_tokenizer_pl_rep.mkdir(exist_ok=True, parents=True)\n","# ByteLevelBPE_tokenizer_pl.save_model(str(path_to_ByteLevelBPE_tokenizer_pl_rep))"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"beVl5rWQ16to","executionInfo":{"status":"ok","timestamp":1616333154297,"user_tz":-60,"elapsed":290183,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"b42f3b97-56f9-4e01-d9e0-806c8eadda4b"},"source":["ls /root/.fastai/data/plwiki -all"],"execution_count":null,"outputs":[{"output_type":"stream","text":["total 2147980\n","drwxr-xr-x 3 root root 4096 Mar 21 13:25 \u001b[0m\u001b[01;34m.\u001b[0m/\n","drwxr-xr-x 3 root root 4096 Mar 21 13:23 \u001b[01;34m..\u001b[0m/\n","-rw------- 1 root root 1101183658 Mar 21 13:23 all_texts_plwiki.csv\n","-rw------- 1 root root 1098323868 Mar 21 13:24 all_texts_plwiki.txt\n","drwxr-xr-x 2 root root 4096 Mar 21 13:25 \u001b[01;34mByteLevelBPE_tokenizer_pl\u001b[0m/\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"OXbGsBhBxN3P"},"source":["#copying previiously created pl okenizer ( saving ~30min fro preparing that)\n","!cp /content/gdrive/MyDrive/fastai/vocab.json /root/.fastai/data/plwiki/ByteLevelBPE_tokenizer_pl\n","!cp /content/gdrive/MyDrive/fastai/merges.txt /root/.fastai/data/plwiki/ByteLevelBPE_tokenizer_pl"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"VS0TEzJy3qBy"},"source":["from tokenizers.implementations import ByteLevelBPETokenizer\n","ByteLevelBPE_tokenizer_pl = ByteLevelBPETokenizer(\n"," \"/root/.fastai/data/plwiki/ByteLevelBPE_tokenizer_pl/vocab.json\",\n"," \"/root/.fastai/data/plwiki/ByteLevelBPE_tokenizer_pl/merges.txt\",\n",")"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"aB9cX5nV4kOv"},"source":["Testing if it is working"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Vwo-Baa4xN0w","executionInfo":{"status":"ok","timestamp":1616333156963,"user_tz":-60,"elapsed":292842,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"765f231b-748a-44b5-b293-908ad041078e"},"source":["# Get vocab as a list\n","ByteLevelBPE_tokenizer_pl_vocab = ByteLevelBPE_tokenizer_pl.get_vocab() \n","ByteLevelBPE_tokenizer_pl_vocab_ls = [k for k, v in sorted(ByteLevelBPE_tokenizer_pl_vocab.items(), key=lambda item: item[1])]\n","len(ByteLevelBPE_tokenizer_pl_vocab_ls),ByteLevelBPE_tokenizer_pl_vocab_ls[:5]"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(50257, ['<|endoftext|>', '!', '\"', '#', '$'])"]},"metadata":{"tags":[]},"execution_count":24}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"1S_KAGDsxNyA","executionInfo":{"status":"ok","timestamp":1616333156965,"user_tz":-60,"elapsed":292840,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"da5042b8-6e01-423d-c718-eb69f8cf7761"},"source":["text = \"Taki mały tekst dla sprawdzenia .\"\n","output = ByteLevelBPE_tokenizer_pl.encode(text)\n","print('\\n splitting by tokens\\n ')\n","print(output.ids,)\n","print(output.tokens)\n","print(output.offsets)\n","\n","back_to_text = ByteLevelBPE_tokenizer_pl.decode(ByteLevelBPE_tokenizer_pl.encode(text).ids)\n","\n","print('\\ninput text:', text)\n","print('tokens ids:', output.ids)\n","print('back to text:', back_to_text)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["\n"," splitting by tokens\n"," \n","[5565, 335, 10120, 7591, 624, 1877, 1054, 4461]\n","['Ta', 'ki', 'ĠmaÅĤy', 'Ġtekst', 'Ġdla', 'Ġspraw', 'dzenia', 'Ġ.']\n","[(0, 2), (2, 4), (4, 9), (9, 15), (15, 19), (19, 25), (25, 31), (31, 33)]\n","\n","input text: Taki mały tekst dla sprawdzenia .\n","tokens ids: [5565, 335, 10120, 7591, 624, 1877, 1054, 4461]\n","back to text: Taki mały tekst dla sprawdzenia .\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"pnsLlsZVOf-_"},"source":[""]},{"cell_type":"markdown","metadata":{"id":"BYgcVlYB9NYd"},"source":["# 5. Create a fastai tokenizer and update the embeddings matrix of the GPT-2 English pre-trained model"]},{"cell_type":"markdown","metadata":{"id":"2Fmobv0m9NYd"},"source":["Now let's see how we can use fastai v2 to fine-tune this model on Wikipedia in Portuguese, using all the fastai v2 training utilities.\n","\n","We will follow these 2 following steps:"]},{"cell_type":"markdown","metadata":{"id":"mD9vUIko9NYd"},"source":["- 4.1) **GPT2TokenizerFast (imported GPT-2 tokenizer) --> fastai Tokenizer**: to process the data to train a model, we need to build a fastai tokenizer from the GPT-2 tokenizer with vocab in Portuguese.\n","- 4.2) **Change vocab embeddings (wte matrix) in the GPT-2 pre-trained model to adapt to the Portuguese vocab**: as the vocab embedding matrix (wte) of the pre-trained GPT-2 model corresponds to the English vocabulary, we'll keep the embeddings vectors of the common tokens between the English and Portuguese vocab."]},{"cell_type":"markdown","metadata":{"id":"hHrKpeRo9NYe"},"source":[" First, we import all the text utilities:"]},{"cell_type":"code","metadata":{"id":"63BrGAAX9NYe"},"source":["from fastai.text.all import *"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"dhfoTxlr9NYh"},"source":["#### 4.1 GPT2TokenizerFast (imported GPT-2 tokenizer) --> fastai Tokenizer"]},{"cell_type":"markdown","metadata":{"id":"wj-Y5lue9NYh"},"source":["*(text from Sylvain Gugger Transformers Tutorial)* To process this data to train a model, we need to build a `Transform` that will be applied lazily. In a fastai `Transform` you can define:\n","- an `encodes` method that is applied when you call the transform (a bit like the `forward` method in a `nn.Module`)\n","- a `decodes` method that is applied when you call the [decode](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.PreTrainedTokenizer.decode) method of the transform, if you need to decode anything for showing purposes (like converting ids to a text here)\n","- a `setups` method that sets some inner state of the `Transform` (not needed here)"]},{"cell_type":"code","metadata":{"id":"cjTS3O4W9NYi"},"source":["class TransformersTokenizer(Transform):\n"," def __init__(self, tokenizer): self.tokenizer = tokenizer\n"," def encodes(self, x): \n"," toks = self.tokenizer.tokenize(x)\n"," return tensor(self.tokenizer.convert_tokens_to_ids(toks))\n"," def decodes(self, x): return TitledStr(self.tokenizer.decode(x.cpu().numpy()))"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"BQrK6KdF9NYk"},"source":["Two comments on the code above:\n","- in `encodes` we don't use the [tokenizer.encode](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.PreTrainedTokenizer.encode) method since it does some additional preprocessing for the model after tokenizing and numericalizing (the aprt throwing a warning before). Here we don't need any post-processing so it's fine to skip it and we use the [tokenizer.tokenize](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.PreTrainedTokenizer.tokenize) method followed by the [tokenizer.convert_tokens_to_ids](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.PreTrainedTokenizer.convert_tokens_to_ids) one.\n","- in `decodes` we return a `TitledStr` object and not just a plain string. That's a fastai class that adds a `show` method to the string, which will allow us to use all the fastai show methods."]},{"cell_type":"markdown","metadata":{"id":"rg1SBjFg9NYl"},"source":["##### Tokenizers"]},{"cell_type":"markdown","metadata":{"id":"7eT6Ao209qhy"},"source":["ENGLISH"]},{"cell_type":"code","metadata":{"id":"BQkp9fdc9NYl","colab":{"base_uri":"https://localhost:8080/","height":154,"referenced_widgets":["007c3da396954f70a2906377d4792428","ef838f3ace384c62b4cea145ae3f1def","e8e2ce51cc244407b612edabe6cb9347","3a81e636511542028f37db75c58b0df3","001f30a0328645bcbae474bd0ecb784c","8af84301f2c4416fa727bfd613b7ba9c","a0a0092604a5494b9e5f3947cfe4fd8e","de4dbaa8e418452cad8237812ce45aef","9df88e6e1226457e9b58d2171abc5b0b","54f9353d6516478b8b20ba8a92a14086","5f357509b9c74c239bf9a50fe3e795e0","22a330ae10f44a98a5e2210e1e0f83fe","cce461c343214f4b8ee48873d9e92722","de5bdfa89ab84e27928938d29eb3beef","05763c86c54a4c2ebff74747363eb1f8","ed5f9c349cc7448db86872a7a0bcaeab"]},"executionInfo":{"status":"ok","timestamp":1616333190116,"user_tz":-60,"elapsed":310762,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"125aa0b8-a0b9-4199-9ce9-5f0ece033486"},"source":["%%time\n","# Load the GPT2 tokenizer in English\n","from transformers import GPT2TokenizerFast, GPT2LMHeadModel\n","pretrained_weights = 'gpt2'\n","tokenizer_en = GPT2TokenizerFast.from_pretrained(pretrained_weights)\n","model_en = GPT2LMHeadModel.from_pretrained(pretrained_weights)\n","\n","# To correct the warning about token_pad (GPT2TokenizerFast), run the following code\n","# source: https://github.com/huggingface/transformers/issues/2648#issuecomment-616177044\n","tokenizer_en.pad_token = tokenizer_en.eos_token"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"007c3da396954f70a2906377d4792428","version_minor":0,"version_major":2},"text/plain":["HBox(children=(FloatProgress(value=0.0, description='Downloading', max=665.0, style=ProgressStyle(description_…"]},"metadata":{"tags":[]}},{"output_type":"stream","text":["\n"],"name":"stdout"},{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"9df88e6e1226457e9b58d2171abc5b0b","version_minor":0,"version_major":2},"text/plain":["HBox(children=(FloatProgress(value=0.0, description='Downloading', max=548118077.0, style=ProgressStyle(descri…"]},"metadata":{"tags":[]}},{"output_type":"stream","text":["\n","CPU times: user 18.4 s, sys: 2.41 s, total: 20.8 s\n","Wall time: 32.7 s\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"Xvucwww09s0a"},"source":["POLISH"]},{"cell_type":"code","metadata":{"id":"oEDiYVJy9NYo"},"source":["# Get the path to ByteLevelBPE_tokenizer_pt config files\n","ByteLevelBPE_tokenizer_pl_rep = 'ByteLevelBPE_tokenizer_pl'\n","path_to_ByteLevelBPE_tokenizer_pl_rep = path_data/ByteLevelBPE_tokenizer_pl_rep\n","\n","# import the pre-trained GPT2TokenizerFast tokenizer with the tokenizer_pt config files\n","tokenizer_pl = GPT2TokenizerFast.from_pretrained(\n"," str(path_to_ByteLevelBPE_tokenizer_pl_rep), \n"," pad_token='<|endoftext|>')\n","\n","# Get sequence length max of 1024\n","tokenizer_pl.model_max_length = 1024"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"1lSq0oP0xNvY"},"source":["tokenizer_pl.model_max_length = 1024"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"GeSgUiiz9NZq"},"source":["##### Sample (this allows us to quickly test our code======================)"]},{"cell_type":"markdown","metadata":{"id":"ZVD6ZrCs9NZq"},"source":["- train: 80%\n","- val = 20%"]},{"cell_type":"code","metadata":{"id":"w-Lbo-bB9NZr"},"source":["df_sample = df[:1000]\n","\n","num = int(0.8*len(df_sample))\n","\n","idxs = np.random.randint(0, len(df_sample), len(df_sample))\n","idxs_train = idxs[:num]\n","idxs_val = idxs[num:]"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"xCAWqMOW9NZt"},"source":["We gather all texts in one numpy array (since it will be easier to use this way with fastai):"]},{"cell_type":"code","metadata":{"id":"lfmfeaZ49NZt","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616065678540,"user_tz":-60,"elapsed":668,"user":{"displayName":"Mark Lina","photoUrl":"","userId":"17651129667533642938"}},"outputId":"78d9d2ca-4773-47ba-bad4-5f33ef3792a5"},"source":["%%time\n","all_texts = np.concatenate([df_sample.iloc[idxs_train].text.values, df_sample.iloc[idxs_val].text.values])"],"execution_count":null,"outputs":[{"output_type":"stream","text":["CPU times: user 2.63 ms, sys: 0 ns, total: 2.63 ms\n","Wall time: 5.8 ms\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"zIi7JXj49NZv","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616065685751,"user_tz":-60,"elapsed":669,"user":{"displayName":"Mark Lina","photoUrl":"","userId":"17651129667533642938"}},"outputId":"d97cbb6b-e1ec-4f0c-ae38-7db39ee2e5c3"},"source":["%%time\n","splits = [list(idxs_train), list(idxs_val)]\n","tls = TfmdLists(all_texts, TransformersTokenizer(tokenizer_pl), splits=splits, dl_type=LMDataLoader)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["CPU times: user 8.48 ms, sys: 858 µs, total: 9.34 ms\n","Wall time: 10.4 ms\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"MgCWf26z9NZz"},"source":["We specify `dl_type=LMDataLoader` for when we will convert this `TfmdLists` to `DataLoaders`: we will use an `LMDataLoader` since we have a language modeling problem, not the usual fastai `TfmdDL`."]},{"cell_type":"markdown","metadata":{"id":"Nl7vLTd6wOis"},"source":[""]},{"cell_type":"markdown","metadata":{"id":"KRmicfJ99NZz"},"source":["##### All data"]},{"cell_type":"code","metadata":{"id":"71luExLxufk-"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"7hU8wH-kwU6q"},"source":["# Taking 50% of the df to be able to train in 5h one cycle"]},{"cell_type":"markdown","metadata":{"id":"1H1SGwyLuqdI"},"source":["when I tried to train in a reasonable time a model with this df it appeared that it will takie more then 10h ( there is attempt to take 50% of df to make it quicker)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":300},"id":"kyLrg_bi3NCG","executionInfo":{"status":"ok","timestamp":1616333191409,"user_tz":-60,"elapsed":1275,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"e78b42b1-166b-4150-be3a-686bd85d81fb"},"source":["# df1= df.sample(frac=0.5)\n","# df.sample(3)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
text
80080Józef Krukowski (ur. 4 stycznia 1936 w Sułowcu) – ksiądz katolicki, teolog, kanonista, profesor zwyczajny, pierwszy dziekan reaktywowanego Wydziału Prawa Kanonicznego i Świeckiego na Katolickim Uniwersytecie Lubelskim. Specjalista z zakresu kanonicznego prawa małżeńskiego, stosunków Państwo-Kościół, prawa konkordatowego i prawa wyznaniowego. Wieloletni wykładowca KUL i UKSW. \\n\\nEdukację podstawową odbył w 1949 w Radecznicy. W 1953 ukończył Liceum Biskupie w Lublinie i wstąpił do Wyższego Seminarium Duchownego w Lublinie. Otrzymał święcenia kapłańskie. Na Wydziale Teologii Katolickiego Uni...
20629Rezydencja Piękna Nova – apartamentowiec na Śródmieściu Południowym w Warszawie, który stanowi połączenie kamienicy stylizowanej na XIX-wieczną architekturę z nowoczesną zabudową. Powstał w latach 2008-2010 według projektu pracowni Andrzeja Bulandy i Włodzimierza Muchy.\\n\\nNiegdyś w miejscu dzisiejszego apartamentowca znajdował się pałacyk będący siedzibą Ambasady Niemiec. W 1939 w wyniku bombardowania Warszawy w czasie II wojny światowej przez Luftwaffe budynek spłonął, a po wojnie go zburzono. Wycięto też drzewa. W okresie powojennym działka stanowiła zaplecze kamieniarskie budowy poblis...
73159Inintimajos lub Inintimeos, właśc. Tyberiusz Juliusz Inintimajos Filokajsar Filoromajos Eusebes (gr.: \"Τιβέριος Ἰούλιος Iνινθιμηος Φιλόκαισαρ Φιλορώμαίος Eυσεbής\", \"Tibérios Ioúlios Ininthimēos Filókaisar Filorṓmaíos Eusebḗs\") (zm. 239) – król Bosporu z dynastii Asandrydów od 234 do swej śmierci. Prawdopodobnie młodszy syn króla Bosporu Tyberiusza Juliusza Sauromatesa III Filokajsara Filoromajosa Eusebesa i nieznanej z imienia królowej.\\n\\nInintimajos prawdopodobnie odziedziczył imię po przodkach ze strony matki. Ze strony ojca miał perskiego, greckiego, rzymskiego, trackiego oraz prawdopo...
\n","
"],"text/plain":[" text\n","80080 Józef Krukowski (ur. 4 stycznia 1936 w Sułowcu) – ksiądz katolicki, teolog, kanonista, profesor zwyczajny, pierwszy dziekan reaktywowanego Wydziału Prawa Kanonicznego i Świeckiego na Katolickim Uniwersytecie Lubelskim. Specjalista z zakresu kanonicznego prawa małżeńskiego, stosunków Państwo-Kościół, prawa konkordatowego i prawa wyznaniowego. Wieloletni wykładowca KUL i UKSW. \\n\\nEdukację podstawową odbył w 1949 w Radecznicy. W 1953 ukończył Liceum Biskupie w Lublinie i wstąpił do Wyższego Seminarium Duchownego w Lublinie. Otrzymał święcenia kapłańskie. Na Wydziale Teologii Katolickiego Uni...\n","20629 Rezydencja Piękna Nova – apartamentowiec na Śródmieściu Południowym w Warszawie, który stanowi połączenie kamienicy stylizowanej na XIX-wieczną architekturę z nowoczesną zabudową. Powstał w latach 2008-2010 według projektu pracowni Andrzeja Bulandy i Włodzimierza Muchy.\\n\\nNiegdyś w miejscu dzisiejszego apartamentowca znajdował się pałacyk będący siedzibą Ambasady Niemiec. W 1939 w wyniku bombardowania Warszawy w czasie II wojny światowej przez Luftwaffe budynek spłonął, a po wojnie go zburzono. Wycięto też drzewa. W okresie powojennym działka stanowiła zaplecze kamieniarskie budowy poblis...\n","73159 Inintimajos lub Inintimeos, właśc. Tyberiusz Juliusz Inintimajos Filokajsar Filoromajos Eusebes (gr.: \"Τιβέριος Ἰούλιος Iνινθιμηος Φιλόκαισαρ Φιλορώμαίος Eυσεbής\", \"Tibérios Ioúlios Ininthimēos Filókaisar Filorṓmaíos Eusebḗs\") (zm. 239) – król Bosporu z dynastii Asandrydów od 234 do swej śmierci. Prawdopodobnie młodszy syn króla Bosporu Tyberiusza Juliusza Sauromatesa III Filokajsara Filoromajosa Eusebesa i nieznanej z imienia królowej.\\n\\nInintimajos prawdopodobnie odziedziczył imię po przodkach ze strony matki. Ze strony ojca miał perskiego, greckiego, rzymskiego, trackiego oraz prawdopo..."]},"metadata":{"tags":[]},"execution_count":30}]},{"cell_type":"markdown","metadata":{"id":"iQyvLoEGzSKb"},"source":["SAVE"]},{"cell_type":"code","metadata":{"id":"U_65aMKYzEH2"},"source":["# df1.to_csv('/content/gdrive/MyDrive/fastai/05_1pl-wiki.csv', index= False)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Cwf-Virbzco-"},"source":["*LOAD* 500MB to speed up traing"]},{"cell_type":"code","metadata":{"id":"Qz4Tm8eszeiO"},"source":["df = pd.read_csv('/content/gdrive/MyDrive/fastai/05_1pl-wiki.csv')"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":467},"id":"DrwL89hr5KDL","executionInfo":{"status":"ok","timestamp":1616333214579,"user_tz":-60,"elapsed":24428,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"59bb80f4-8000-490e-d7cc-e0ba0714c04e"},"source":["df.sample(5)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
text
76544Just Dance 2014 – piąta gra z serii Just Dance, stworzona i wydana przez firmę Ubisoft. Została wydana na konsole siódmej generacji w październiku 2013 roku i jako tytuł startowy na PlayStation 4 i Xbox One. Gra została oficjalnie zapowiedziana na konferencji prasowej Ubisoftu na E3 2013. To pierwsza gra z serii, która zawiera w tytule rok, zamiast kolejnego numeru gry.\\n\\nGra obsługuje do sześciu graczy jednocześnie w wersji na Xbox One oraz maksymalnie czterech graczy w przypadku innych konsol. Wszystkie wersje zawierają tryb \"Klasyczny\" znany z poprzednich gier z serii. W trybie \"On-Sta...
69921Eugenia Róża Pragierowa z domu Berke (ur. 14 lipca 1888 w Kaliszu, zm. 5 maja 1964 w Warszawie) – prawnik, działaczka polskiego ruchu socjalistycznego, feministka, polityk Polskiej Partii Socjalistycznej i Polskiej Zjednoczonej Partii Robotniczej, członek Polskiego Komitetu Obrońców Pokoju w 1949 roku.\\n\\nPrawdopodobnie pochodziła z rodziny żydowskiej. Córka Henryka i Antoni. Uczyła się w kaliskim gimnazjum żeńskim. Uczestniczka strajków szkolnych w 1905, aresztowana, przebywała w więzieniu łącznie siedem miesięcy.\\n\\nStudiowała historię na Uniwersytecie Jagiellońskim, a w latach 1908–1911...
45974Wojna stuletnia – nazwa nadana przez XIX-wiecznych historyków serii konfliktów zbrojnych, które toczyły się przez 116 lat (z przerwami) w XIV i XV wieku między Anglią a Francją.\\n\\nPodstawową przyczyną konfliktu był spór feudalny wokół statusu króla angielskiego wobec francuskiego w kontekście posiadania przez tego pierwszego lenn w Gujennie, narastający od traktatu paryskiego z 1259, który na pewien czas uregulował owe stosunki. Drugą przyczyną, która nabrała znaczenia jednak dopiero w późniejszej fazie wojny, był konflikt sukcesyjny, powstały po śmierci w 1328 ostatniego z najstarszej li...
98686Szczepan Walkowski (ur. 20 listopada 1912 w Wieruszowie, zm. 8 czerwca 1969 w Ezpeleta w Argentynie) – ksiądz kapelan pilot Polskich Sił Powietrznych w Wielkiej Brytanii, uczestnik II wojny światowej, kawaler Orderu Odrodzenia Polski.\\n\\nUrodził się w rodzinie Stanisława i Eleonory z d. Wolna. Absolwent gimnazjum w Kępnie i Częstochowskiego Seminarium Duchownego w Krakowie. Studiował też teologię na Uniwersytecie Jagiellońskim. 25 czerwca 1939 otrzymał święcenia kapłańskie. Wikariusz w parafii Lututów. Po ataku niemieckim we wrześniu 1939 przedostał się przez Rumunię i Jugosławię do Włoch....
32167Roy Herbert Thomson (ur. 5 czerwca 1894 w Toronto, zm. 4 sierpnia 1976 w Londynie) – kanadyjski magnat prasowy, inwestor, założyciel imperium medialnego The Thomson Corporation (dzisiaj jako Thomson Reuters) i wielu innych przedsiębiorstw.\\n\\nPochodzący z wiejskiej Szkocji ród Thomsonów jest głęboko osadzony w najnowszej historii Kanady. Pierwszym historycznie osadnikiem w osadzie Scarborough (później dzielnica Toronto) był farmer David Thomson. Jego bratanek miał dziesięcioro dzieci, a wśród nich Herberta Thomsona, golibrodę w torontońskim Hotelu Grosvenor. W jego rodzinie przyszedł na św...
\n","
"],"text/plain":[" text\n","76544 Just Dance 2014 – piąta gra z serii Just Dance, stworzona i wydana przez firmę Ubisoft. Została wydana na konsole siódmej generacji w październiku 2013 roku i jako tytuł startowy na PlayStation 4 i Xbox One. Gra została oficjalnie zapowiedziana na konferencji prasowej Ubisoftu na E3 2013. To pierwsza gra z serii, która zawiera w tytule rok, zamiast kolejnego numeru gry.\\n\\nGra obsługuje do sześciu graczy jednocześnie w wersji na Xbox One oraz maksymalnie czterech graczy w przypadku innych konsol. Wszystkie wersje zawierają tryb \"Klasyczny\" znany z poprzednich gier z serii. W trybie \"On-Sta...\n","69921 Eugenia Róża Pragierowa z domu Berke (ur. 14 lipca 1888 w Kaliszu, zm. 5 maja 1964 w Warszawie) – prawnik, działaczka polskiego ruchu socjalistycznego, feministka, polityk Polskiej Partii Socjalistycznej i Polskiej Zjednoczonej Partii Robotniczej, członek Polskiego Komitetu Obrońców Pokoju w 1949 roku.\\n\\nPrawdopodobnie pochodziła z rodziny żydowskiej. Córka Henryka i Antoni. Uczyła się w kaliskim gimnazjum żeńskim. Uczestniczka strajków szkolnych w 1905, aresztowana, przebywała w więzieniu łącznie siedem miesięcy.\\n\\nStudiowała historię na Uniwersytecie Jagiellońskim, a w latach 1908–1911...\n","45974 Wojna stuletnia – nazwa nadana przez XIX-wiecznych historyków serii konfliktów zbrojnych, które toczyły się przez 116 lat (z przerwami) w XIV i XV wieku między Anglią a Francją.\\n\\nPodstawową przyczyną konfliktu był spór feudalny wokół statusu króla angielskiego wobec francuskiego w kontekście posiadania przez tego pierwszego lenn w Gujennie, narastający od traktatu paryskiego z 1259, który na pewien czas uregulował owe stosunki. Drugą przyczyną, która nabrała znaczenia jednak dopiero w późniejszej fazie wojny, był konflikt sukcesyjny, powstały po śmierci w 1328 ostatniego z najstarszej li...\n","98686 Szczepan Walkowski (ur. 20 listopada 1912 w Wieruszowie, zm. 8 czerwca 1969 w Ezpeleta w Argentynie) – ksiądz kapelan pilot Polskich Sił Powietrznych w Wielkiej Brytanii, uczestnik II wojny światowej, kawaler Orderu Odrodzenia Polski.\\n\\nUrodził się w rodzinie Stanisława i Eleonory z d. Wolna. Absolwent gimnazjum w Kępnie i Częstochowskiego Seminarium Duchownego w Krakowie. Studiował też teologię na Uniwersytecie Jagiellońskim. 25 czerwca 1939 otrzymał święcenia kapłańskie. Wikariusz w parafii Lututów. Po ataku niemieckim we wrześniu 1939 przedostał się przez Rumunię i Jugosławię do Włoch....\n","32167 Roy Herbert Thomson (ur. 5 czerwca 1894 w Toronto, zm. 4 sierpnia 1976 w Londynie) – kanadyjski magnat prasowy, inwestor, założyciel imperium medialnego The Thomson Corporation (dzisiaj jako Thomson Reuters) i wielu innych przedsiębiorstw.\\n\\nPochodzący z wiejskiej Szkocji ród Thomsonów jest głęboko osadzony w najnowszej historii Kanady. Pierwszym historycznie osadnikiem w osadzie Scarborough (później dzielnica Toronto) był farmer David Thomson. Jego bratanek miał dziesięcioro dzieci, a wśród nich Herberta Thomsona, golibrodę w torontońskim Hotelu Grosvenor. W jego rodzinie przyszedł na św..."]},"metadata":{"tags":[]},"execution_count":33}]},{"cell_type":"markdown","metadata":{"id":"4oGbWUBh9NZ0"},"source":["- train: 80%\n","- val = 20%"]},{"cell_type":"code","metadata":{"id":"tHYnpj3E9NZ0"},"source":["num = int(0.8*len(df))\n","\n","idxs = np.random.randint(0, len(df), len(df))\n","idxs_train = idxs[:num]\n","idxs_val = idxs[num:]\n","\n","# save idxs train and valid\n","torch.save(idxs_train, path_data/'idxs_train.pl')\n","torch.save(idxs_val, path_data/'idxs_val.pl')"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Cmuxe717LVQr"},"source":["SAVING"]},{"cell_type":"code","metadata":{"id":"EspHnFC6CeMg"},"source":["!cp /root/.fastai/data/plwiki/idxs_train.pl /content/gdrive/MyDrive/fastai\n","!cp /root/.fastai/data/plwiki/idxs_val.pl /content/gdrive/MyDrive/fastai"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"v1yY6gJQLUAL"},"source":["LOADING"]},{"cell_type":"code","metadata":{"id":"vxtkyVDbK87W"},"source":["!cp /content/gdrive/MyDrive/fastai/idxs_train.pl /root/.fastai/data/plwiki\n","!cp /content/gdrive/MyDrive/fastai/idxs_val.pl /root/.fastai/data/plwiki"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"miw2it8R9NZ3"},"source":["# load idxs train and valid\n","idxs_train = torch.load(path_data/'idxs_train.pl')\n","idxs_val = torch.load(path_data/'idxs_val.pl')"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"zzVoS2Zy9NZ5"},"source":["We gather all texts in one numpy array (since it will be easier to use this way with fastai):"]},{"cell_type":"code","metadata":{"id":"si2TssSh9NZ5","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616333496696,"user_tz":-60,"elapsed":979,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"5dce2eef-fcd8-4579-ea83-172ac7365b32"},"source":["%%time\n","all_texts = np.concatenate([df.iloc[idxs_train].text.values, df.iloc[idxs_val].text.values])"],"execution_count":null,"outputs":[{"output_type":"stream","text":["CPU times: user 28.3 ms, sys: 44.6 ms, total: 72.9 ms\n","Wall time: 72.9 ms\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"JwYj28ON9NZ7","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616333499808,"user_tz":-60,"elapsed":873,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"02d80251-064d-447c-de25-b8124fd7d2ee"},"source":["%%time\n","splits = [list(idxs_train), list(idxs_val)]\n","tls = TfmdLists(all_texts, TransformersTokenizer(tokenizer_pl), splits=splits, dl_type=LMDataLoader)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Token indices sequence length is longer than the specified maximum sequence length for this model (2174 > 1024). Running this sequence through the model will result in indexing errors\n"],"name":"stderr"},{"output_type":"stream","text":["CPU times: user 91.2 ms, sys: 9.93 ms, total: 101 ms\n","Wall time: 101 ms\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"-YI7gfnh9NZ-"},"source":["We specify `dl_type=LMDataLoader` for when we will convert this `TfmdLists` to `DataLoaders`: we will use an `LMDataLoader` since we have a language modeling problem, not the usual fastai `TfmdDL`."]},{"cell_type":"markdown","metadata":{"id":"qlao5xNg9NZ-"},"source":["##### Check datasets"]},{"cell_type":"markdown","metadata":{"id":"spFg6v8j9NZ-"},"source":["In a `TfmdLists` you can access to the elements of the training or validation set quite easily:"]},{"cell_type":"code","metadata":{"id":"prVLO67E9NaA","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616333504547,"user_tz":-60,"elapsed":876,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"70a9542e-7811-410a-b957-3e1c04bbd15a"},"source":["tls.train[0],tls.valid[0]"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(tensor([39020, 685, 2526, ..., 859, 9016, 12]),\n"," tensor([ 28, 19903, 30, 15583, 19903, 30, 199, 18704, 2944, 562,\n"," 441, 15587, 11590, 2446, 7100, 25190, 4910, 24809, 5189, 18436,\n"," 14, 1978, 524, 830, 11590, 2446, 4910, 24809, 365, 8171,\n"," 5142, 389, 3921, 2601, 409, 604, 2, 343, 8500, 497,\n"," 18135, 260, 1465, 7060, 332, 2197, 1279, 1878, 14, 5272,\n"," 32828, 315, 5130, 4612, 332, 1441, 1279, 1878, 14, 11276,\n"," 1943, 32958, 8645, 389, 39, 789, 1312, 17592, 713, 9446,\n"," 3372, 289, 357, 1900, 522, 14, 51, 14, 19489, 538,\n"," 14, 19489, 6862, 389, 18704, 2944, 562, 2, 311, 15838,\n"," 9584, 1878, 388, 14, 44448, 2208, 734, 6702, 902, 8717,\n"," 42250, 332, 5631, 260, 1878, 14, 9291, 11299, 263, 5418,\n"," 1103, 389, 48, 3217, 15482, 1549, 2, 474, 25027, 10668,\n"," 23906, 1213, 954, 6039, 18335, 26532, 8603, 13271, 1903, 260,\n"," 14767, 2524, 703, 26, 476, 30428, 40665, 17936, 369, 996,\n"," 476, 30428, 15321, 1873, 338, 467, 478, 476, 30428, 15321,\n"," 388, 849, 3190, 260, 3580, 1903, 930, 82, 14, 954,\n"," 6039, 18335, 13465, 8603, 13271, 1903, 260, 28563, 4910, 24809,\n"," 4181, 4890, 260, 4039, 476, 36764, 360, 12954, 467, 311,\n"," 2446, 389, 18704, 2944, 562, 713, 199, 199, 17820, 18436,\n"," 5248, 21426, 5852, 289, 2446, 260, 3580, 2588, 12, 32705,\n"," 537, 263, 2246, 8965, 26353, 31407, 308, 14, 397, 23884,\n"," 263, 792, 16801, 11741, 605, 12, 4343, 48137, 463, 22644,\n"," 7910, 286, 282, 35843, 792, 2831, 3213, 12, 15952, 332,\n"," 260, 2006, 388, 289, 10384, 1801, 1239, 263, 48044, 405,\n"," 50, 9662, 352, 12, 9651, 359, 286, 11542, 14362, 42501,\n"," 1008, 14, 1626, 3210, 2006, 10126, 5010, 3213, 286, 6961,\n"," 274, 38639, 12, 334, 1234, 21159, 3177, 592, 2159, 263,\n"," 7748, 6599, 2624, 2277, 29241, 84, 266, 286, 39084, 2908,\n"," 89, 5333, 33795, 1796, 14, 309, 3379, 3873, 5555, 10184,\n"," 15609, 38707, 2277, 349, 11, 45, 6377, 6735, 18796, 1153,\n"," 260, 18981, 260, 4618, 2492, 14, 199, 199, 51, 745,\n"," 296, 21820, 532, 306, 389, 18704, 2944, 562, 2, 9511,\n"," 332, 260, 4428, 1588, 12, 21761, 263, 1156, 7147, 21820,\n"," 628, 260, 2442, 21971, 14, 10940, 7327, 12533, 541, 18265,\n"," 87, 624, 33675, 792, 1720, 286, 38455, 7022, 7106, 7790,\n"," 21316, 12, 4982, 5189, 18436, 2913, 8279, 1739, 14, 651,\n"," 29901, 387, 792, 2446, 45357, 306, 17546, 28744, 12, 422,\n"," 1166, 1571, 15447, 2092, 14, 5680, 2944, 562, 461, 1812,\n"," 41826, 13582, 29372, 12, 2194, 5629, 2907, 312, 12, 18419,\n"," 1187, 504, 286, 22206, 577, 24501, 14, 10579, 302, 5418,\n"," 12391, 306, 29025, 21971, 478, 17546, 28744, 14, 37087, 306,\n"," 5418, 1303, 7624, 443, 9711, 14417, 41533, 3005, 11055, 1087,\n"," 1763, 14, 5680, 2944, 562, 1249, 1167, 5098, 260, 3041,\n"," 1189, 1318, 1878, 465, 12, 2045, 519, 2053, 289, 372,\n"," 40908, 715, 14063, 29560, 335, 5130, 826, 14974, 289, 1441,\n"," 1279, 14, 17376, 13292, 260, 3527, 20091, 17304, 365, 9590,\n"," 9560, 12, 1762, 29445, 12, 533, 461, 5832, 2711, 18449,\n"," 460, 263, 937, 5189, 18436, 12, 3406, 2620, 7324, 22033,\n"," 35867, 39910, 5504, 39443, 298, 25190, 14, 199, 199, 2173,\n"," 4859, 1196, 17746, 1103, 421, 789, 1312, 17592, 12, 2228,\n"," 325, 1200, 1549, 478, 6531, 462, 2601, 42239, 14, 4225,\n"," 15013, 648, 22957, 45477, 306, 6240, 3062, 294, 12, 2045,\n"," 24716, 332, 263, 1446, 11462, 12, 2728, 5189, 18436, 15952,\n"," 332, 10384, 1756, 4417, 2446, 14, 199, 199, 47335, 359,\n"," 6943, 12, 533, 4158, 5189, 18436, 939, 12473, 16759, 866,\n"," 5418, 12, 7839, 5676, 1501, 18410, 9439, 1070, 286, 1873,\n"," 827, 466, 12, 530, 372, 2712, 22639, 274, 792, 43346,\n"," 21316, 14, 9446, 9769, 830, 11299, 389, 421, 789, 1312,\n"," 17592, 389, 12, 40583, 12838, 40192, 13, 32715, 10232, 263,\n"," 37157, 41826, 277, 286, 1073, 14, 37608, 263, 2084, 35536,\n"," 392, 34827, 389, 34437, 6377, 4910, 24809, 12, 2314, 266,\n"," 35137, 13288, 12391, 306, 372, 1705, 19090, 32776, 390, 2353,\n"," 18514, 18658, 25190, 14, 25965, 10232, 12, 334, 7803, 11299,\n"," 524, 2228, 325, 1200, 1549, 14, 37718, 274, 2923, 402,\n"," 286, 389, 25118, 2966, 10909, 2, 5189, 18436, 12, 571,\n"," 625, 320, 3397, 283, 443, 8562, 14, 350, 4415, 474,\n"," 14471, 260, 2068, 14767, 289, 18335, 26532, 8603, 13271, 1903,\n"," 286, 4013, 2394, 263, 1341, 12, 260, 592, 26532, 12954,\n"," 14, 34987, 28839, 5418, 12, 524, 389, 3062, 294, 389,\n"," 14, 1069, 28416, 4995, 286, 3785, 11299, 524, 389, 6531,\n"," 462, 2601, 42239, 713, 5189, 18436, 5402, 11114, 28302, 433,\n"," 1737, 389, 34437, 6377, 4802, 347, 334, 46534, 298, 1010,\n"," 2244, 556, 356, 496, 524, 21873, 1325, 15, 438, 278,\n"," 1152, 438, 273, 1535, 469, 78, 24565, 27787, 3462, 37025,\n"," 1310, 1300, 450, 1010, 4357, 77, 12, 533, 372, 11033,\n"," 77, 811, 15907, 342, 260, 413, 31008, 1325, 15, 14483,\n"," 438, 32540, 18190, 11012, 1010, 22237, 28702, 1092, 286, 9236,\n"," 1850, 28271, 394, 22911, 524, 389, 8448, 42090, 1301, 34464,\n"," 713, 5189, 18436, 2214, 314, 12, 533, 524, 792, 17952,\n"," 521, 10232, 263, 5418, 14, 43769, 274, 18686, 12, 2055,\n"," 18558, 286, 263, 2055, 29735, 332, 35037, 14, 199, 199,\n"," 199, 37439, 7290, 41516, 532]))"]},"metadata":{"tags":[]},"execution_count":40}]},{"cell_type":"markdown","metadata":{"id":"NprBhqyf9NaG"},"source":["They are not the same. We can see the shape are differents:"]},{"cell_type":"code","metadata":{"id":"y_nIjjcT9NaH","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616234915402,"user_tz":-60,"elapsed":4168,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"b87f1837-da08-4ad5-8851-e895f001429b"},"source":["tls.tfms(tls.train.items[0]).shape, tls.tfms(tls.valid.items[0]).shape"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(torch.Size([468]), torch.Size([402]))"]},"metadata":{"tags":[]},"execution_count":33}]},{"cell_type":"markdown","metadata":{"id":"g3OpL9Zt9NaI"},"source":["And we can have a look at both decodes using `show_at`:"]},{"cell_type":"code","metadata":{"scrolled":true,"id":"865QmFWs9NaJ"},"source":["show_at(tls.train, 0)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"06TWBdZL9NaL"},"source":["#### 5.2 fastai v2 Dataloaders"]},{"cell_type":"markdown","metadata":{"id":"V9QUUcJ89NaL"},"source":["*(text from Sylvain Gugger Transformers Tutorial)* The fastai v2 library expects the data to be assembled in a `DataLoaders` object (something that has a training and validation dataloader). We can get one by using the `dataloaders` method. We just have to specify a batch size and a sequence length. \n","\n","Since the GPT-2 model was trained with sequences of size 1024, we use this sequence length (it's a stateless model, so it will change the perplexity if we use less)."]},{"cell_type":"code","metadata":{"id":"5LWheGOlvIHF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616237390858,"user_tz":-60,"elapsed":915842,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"95c4012d-6322-46c3-c9ce-7ab306a5b20a"},"source":["# %%time\n","# bs,sl = 6,1024\n","# dls = tls.dataloaders(bs=bs, seq_len=sl)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Token indices sequence length is longer than the specified maximum sequence length for this model (4097 > 1024). Running this sequence through the model will result in indexing errors\n"],"name":"stderr"},{"output_type":"stream","text":["CPU times: user 13min 39s, sys: 1min 35s, total: 15min 14s\n","Wall time: 15min 15s\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"4yje5bBSLMMt"},"source":["to avoid problem like above and problem with GPU RAM there is need to decrease"]},{"cell_type":"code","metadata":{"id":"3wKo4BNVHb3S","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616334015758,"user_tz":-60,"elapsed":501984,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"c7c5f255-096e-49da-f03e-be36b3b8a837"},"source":["%%time\n","bs,sl = 2,1024\n","dls = tls.dataloaders(bs=bs, seq_len=sl)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["CPU times: user 8min 5s, sys: 9.01 s, total: 8min 14s\n","Wall time: 8min 20s\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"eEuli0s5DKXx"},"source":["poszlo dobrze 2 x1024"]},{"cell_type":"markdown","metadata":{"id":"QjhygHGL9Naw"},"source":["# 6.2 Learner"]},{"cell_type":"markdown","metadata":{"id":"FQ5AW-oy9Naw"},"source":["*(text from Sylvain Gugger Transformers Tutorial)* Now, we are ready to create our `Learner`, which is a fastai object grouping data, model and loss function and handles model training or inference. Since we are in a language model setting, we pass accuracy and perplexity as metrics, and we need to use the callback we just defined. Lastly, we use mixed precision to save every bit of memory we can (and if you have a modern GPU, it will also make training faster)."]},{"cell_type":"code","metadata":{"id":"C7ANaSLS9Nax"},"source":["# Learner: basic class for handling the training loop\n","# source: https://dev.fast.ai/learner#Learner\n","learn = Learner(dls, model_en, loss_func=CrossEntropyLossFlat(),\n"," splitter = splitter,\n"," cbs=[DropOutput], \n"," metrics=[accuracy, Perplexity()]).to_fp16()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"MH_-M0Uk9Naz","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616334677490,"user_tz":-60,"elapsed":868,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"7a5a285f-c180-4b98-9047-8b3c5d0000f5"},"source":["# Check the number of parameters groups and the hyperparameters values\n","learn.create_opt()\n","print(f'number of parameters groups: {len(learn.opt.param_groups)}')\n","\n","# ... and the list of Learning Rates (before its atualization by the Optimizer of the function fit_one_cycle())\n","for i,h in enumerate(learn.opt.hypers):\n"," print(i,h)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["number of parameters groups: 4\n","0 {'wd': 0.01, 'sqr_mom': 0.99, 'lr': 0.001, 'mom': 0.9, 'eps': 1e-05}\n","1 {'wd': 0.01, 'sqr_mom': 0.99, 'lr': 0.001, 'mom': 0.9, 'eps': 1e-05}\n","2 {'wd': 0.01, 'sqr_mom': 0.99, 'lr': 0.001, 'mom': 0.9, 'eps': 1e-05}\n","3 {'wd': 0.01, 'sqr_mom': 0.99, 'lr': 0.001, 'mom': 0.9, 'eps': 1e-05}\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"8sCPQ_ui9Na3"},"source":["- Loss = 9.95\n","- accuracy = 0.099\n","- perplexity = 20950.94"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":72},"id":"8X8heciU7pIn","executionInfo":{"status":"ok","timestamp":1616342819960,"user_tz":-60,"elapsed":8138398,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"a4185400-ac36-4f51-a266-241b6f233a33"},"source":["%%time\n","# loss, accuracy, Perplexity() of validation dataset\n","learn.validate()"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/html":[""],"text/plain":[""]},"metadata":{"tags":[]}},{"output_type":"stream","text":["CPU times: user 1h 22min 15s, sys: 53min 26s, total: 2h 15min 42s\n","Wall time: 2h 15min 37s\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["(#3) [9.495806694030762,0.07362030446529388,13303.822265625]"]},"metadata":{"tags":[]},"execution_count":49}]},{"cell_type":"markdown","metadata":{"id":"hEEq5pLsDcHs"},"source":["for the 1GB file resullts are :"]},{"cell_type":"code","metadata":{"id":"8zuhDngwvamp","colab":{"base_uri":"https://localhost:8080/","height":72},"executionInfo":{"status":"ok","timestamp":1616164560259,"user_tz":-60,"elapsed":15660926,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"5f30e595-7eca-4885-de1b-5c94ce353d77"},"source":["# %%time\n","# # loss, accuracy, Perplexity() of validation dataset\n","# learn.validate()"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/html":[""],"text/plain":[""]},"metadata":{"tags":[]}},{"output_type":"stream","text":["CPU times: user 2h 28min 1s, sys: 1h 53min 5s, total: 4h 21min 7s\n","Wall time: 4h 20min 59s\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["(#3) [9.487800598144531,0.0741734430193901,13197.736328125]"]},"metadata":{"tags":[]},"execution_count":64}]},{"cell_type":"markdown","metadata":{"id":"WprBe3MM9Na3"},"source":["Now that we have a `Learner`, we will use during training all the **fine-tuning techniques** seen for classification model training (see the notebook [10_nlp.ipynb](https://github.com/fastai/fastbook/blob/master/10_nlp.ipynb) about \"NLP Deep Dive: RNNs\") to take advantage of the **Transfer Learning** of the GPT-2 pre-trained embeddings and model from Hugging Face Transformers:\n","- **learning rate finder** (method that helps finding the best learning rate to train the model)\n","- **Mixed precision training** (some of the operations will be done in FP16, others in FP32 in order to speed up the training)\n","- **gradual unfreezing** (the model has 4 layers groups created by our method `splitter` : the embedding one and the 3 groups of 4 decoder blocks each)\n","- **1cycle policy** with the method [fit_one_cycle()](https://dev.fast.ai/callback.schedule#Learner.fit_one_cycle) (The 1cycle policy was introduced by Leslie N. Smith et al. in Super-Convergence: Very Fast Training of Neural Networks Using Large Learning Rates. It schedules the learning rate with a cosine annealing from `lr_max/div` to `lr_max` then `lr_max/div_final` (pass an array to `lr_max` if you want to use differential learning rates) and the momentum with cosine annealing according to the values in `moms`. The first phase takes `pct_start` of the training. You can optionally pass additional `cbs` and `reset_opt`.)\n","- **differential learning rates** (each layers group with a learning rate different: the biggest one for the embeddings group, and the smallest one for the first 4 decoder blocks)"]},{"cell_type":"markdown","metadata":{"id":"yEmA-zFH9Na3"},"source":["##### 6.2.1 Freeze all layers but the last layers group (do not freeze `wte`, `wpe` embeddings matrices and last `LayerNorm`)"]},{"cell_type":"code","metadata":{"id":"sL3Ama1F9Na4","colab":{"base_uri":"https://localhost:8080/","height":1000},"executionInfo":{"status":"ok","timestamp":1616342821161,"user_tz":-60,"elapsed":8129374,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"5da0ca90-7da2-4433-bf02-986ef9fa00fc"},"source":["learn.freeze()\n","learn.summary()"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/html":[""],"text/plain":[""]},"metadata":{"tags":[]}},{"output_type":"execute_result","data":{"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"},"text/plain":["GPT2LMHeadModel (Input shape: 2)\n","============================================================================\n","Layer (type) Output Shape Param # Trainable \n","============================================================================\n"," 2 x 1024 x 768 \n","Embedding 38597376 True \n","Embedding 786432 True \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 2304 \n","Conv1D 1771776 False \n","Conv1D 590592 False \n","Dropout \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 3072 \n","Conv1D 2362368 False \n","____________________________________________________________________________\n"," 2 x 1024 x 768 \n","Conv1D 2360064 False \n","Dropout \n","LayerNorm 1536 True \n","____________________________________________________________________________\n"," 2 x 1024 x 50257 \n","Linear 38597376 True \n","____________________________________________________________________________\n","\n","Total params: 163,037,184\n","Total trainable params: 78,019,584\n","Total non-trainable params: 85,017,600\n","\n","Optimizer used: \n","Loss function: FlattenedLoss of CrossEntropyLoss()\n","\n","Model frozen up to parameter group #3\n","\n","Callbacks:\n"," - TrainEvalCallback\n"," - DropOutput\n"," - MixedPrecision\n"," - Recorder\n"," - ProgressCallback"]},"metadata":{"tags":[]},"execution_count":50}]},{"cell_type":"markdown","metadata":{"id":"I7VLvLdc9Na6"},"source":["The `learn.summary ()` method gives almost the right numbers. In fact, it counts twice the weights of the wte matrix (vocab embeddings) because they are duplicated in the weights of the output linear layer.\n","\n","The real numbers are:\n","- Total params: 163,037,184 - 38,597,376 = **124,439,808** (about 124 millions)\n","- Total trainable params: 77,982,720 - 38,597,376 = **39,385,344** (about 40 millions)\n","- Total non-trainable params: **85,054,464** (about 85 millions)"]},{"cell_type":"markdown","metadata":{"id":"NFsAJMTEISbC"},"source":["SAVE ( first time)"]},{"cell_type":"code","metadata":{"id":"0d9Sr2KSVbWt"},"source":["learn.save(path_data/'GPT2_pl_before_lr_find_bs_sl_2_1024')\n","!cp /root/.fastai/data/plwiki/GPT2_pl_before_lr_find_bs_sl_2_1024.pth /content/gdrive/MyDrive/fastai"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"nvojtjn1GgI4"},"source":["LOAD"]},{"cell_type":"code","metadata":{"id":"bJ38DTzvGlM0"},"source":["!cp /root/.fastai/data/plwiki/GPT2_pl_before_lr_find_bs_sl_2_1024.pth /root/.fastai/data/plwiki/"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"4v84AvLnvczm"},"source":[""]},{"cell_type":"markdown","metadata":{"id":"Q-UUlHqMjwto"},"source":["# 1st attempt\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":403},"id":"OZMMifcQuHeQ","executionInfo":{"status":"error","timestamp":1616360451299,"user_tz":-60,"elapsed":14548284,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"81fd1afd-0d78-473e-8f6b-d680ec9dd97f"},"source":["learn.fit_one_cycle(1, 0.1)"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/html":["\n","
\n"," \n"," \n"," 0.00% [0/1 00:00<00:00]\n","
\n"," \n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
epochtrain_lossvalid_lossaccuracyperplexitytime

\n","\n","

\n"," \n"," \n"," 18.58% [8027/43206 4:02:23<17:42:19 nan]\n","
\n"," "],"text/plain":[""]},"metadata":{"tags":[]}},{"output_type":"error","ename":"KeyboardInterrupt","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mlearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_one_cycle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/callback/schedule.py\u001b[0m in \u001b[0;36mfit_one_cycle\u001b[0;34m(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)\u001b[0m\n\u001b[1;32m 110\u001b[0m scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),\n\u001b[1;32m 111\u001b[0m 'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))}\n\u001b[0;32m--> 112\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_epoch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcbs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mParamScheduler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscheds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mL\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcbs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreset_opt\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mreset_opt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwd\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwd\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 113\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 114\u001b[0m \u001b[0;31m# Cell\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/learner.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, n_epoch, lr, wd, cbs, reset_opt)\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_hypers\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlr\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlr\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mlr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 211\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_epoch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mn_epoch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 212\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_with_events\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_do_fit\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'fit'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCancelFitException\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_end_cleanup\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 213\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 214\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_end_cleanup\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdl\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mxb\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0myb\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpred\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/learner.py\u001b[0m in \u001b[0;36m_with_events\u001b[0;34m(self, f, event_type, ex, final)\u001b[0m\n\u001b[1;32m 158\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_with_events\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mevent_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnoop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 160\u001b[0;31m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'before_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 161\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'after_cancel_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'after_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/learner.py\u001b[0m in \u001b[0;36m_do_fit\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 201\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mepoch\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_epoch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 202\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mepoch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mepoch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 203\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_with_events\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_do_epoch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'epoch'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCancelEpochException\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 204\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 205\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_epoch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwd\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcbs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreset_opt\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/learner.py\u001b[0m in \u001b[0;36m_with_events\u001b[0;34m(self, f, event_type, ex, final)\u001b[0m\n\u001b[1;32m 158\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_with_events\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mevent_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnoop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 160\u001b[0;31m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'before_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 161\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'after_cancel_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'after_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/learner.py\u001b[0m in \u001b[0;36m_do_epoch\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 195\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 196\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_do_epoch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 197\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_do_epoch_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 198\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_do_epoch_validate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/learner.py\u001b[0m in \u001b[0;36m_do_epoch_train\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 187\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_do_epoch_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 188\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdl\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 189\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_with_events\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mall_batches\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'train'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCancelTrainException\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 190\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 191\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_do_epoch_validate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mds_idx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdl\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/learner.py\u001b[0m in \u001b[0;36m_with_events\u001b[0;34m(self, f, event_type, ex, final)\u001b[0m\n\u001b[1;32m 158\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_with_events\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mevent_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnoop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 160\u001b[0;31m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'before_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 161\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'after_cancel_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'after_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/learner.py\u001b[0m in \u001b[0;36mall_batches\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mall_batches\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_iter\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdl\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 166\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mo\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdl\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mone_batch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mo\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 167\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_do_one_batch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/learner.py\u001b[0m in \u001b[0;36mone_batch\u001b[0;34m(self, i, b)\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[0mb_on_device\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m(\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0me\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mb\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"to\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdevice\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 184\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_split\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb_on_device\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 185\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_with_events\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_do_one_batch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'batch'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCancelBatchException\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 186\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 187\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_do_epoch_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/learner.py\u001b[0m in \u001b[0;36m_with_events\u001b[0;34m(self, f, event_type, ex, final)\u001b[0m\n\u001b[1;32m 158\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_with_events\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mevent_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnoop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 160\u001b[0;31m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'before_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 161\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mex\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'after_cancel_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'after_{event_type}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/learner.py\u001b[0m in \u001b[0;36m_do_one_batch\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 175\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraining\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0myb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;32mreturn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 176\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'before_backward'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 177\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloss_grad\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 178\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_with_events\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'step'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCancelStepException\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 179\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzero_grad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph)\u001b[0m\n\u001b[1;32m 218\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 219\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 220\u001b[0;31m create_graph=create_graph)\n\u001b[0m\u001b[1;32m 221\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 222\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/overrides.py\u001b[0m in \u001b[0;36mhandle_torch_function\u001b[0;34m(public_api, relevant_args, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1058\u001b[0m \u001b[0;31m# Use `public_api` instead of `implementation` so __torch_function__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1059\u001b[0m \u001b[0;31m# implementations can do equality/identity comparisons.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1060\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0moverloaded_arg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__torch_function__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpublic_api\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtypes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1061\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1062\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mNotImplemented\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/fastai/torch_core.py\u001b[0m in \u001b[0;36m__torch_function__\u001b[0;34m(self, func, types, args, kwargs)\u001b[0m\n\u001b[1;32m 327\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_torch_handled\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_opt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mtypes\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 329\u001b[0;31m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__torch_function__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtypes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 330\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 331\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mTensorBase\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_meta\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mas_copy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/tensor.py\u001b[0m in \u001b[0;36m__torch_function__\u001b[0;34m(cls, func, types, args, kwargs)\u001b[0m\n\u001b[1;32m 993\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 994\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDisableTorchFunction\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 995\u001b[0;31m \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 996\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_convert\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mret\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 997\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph)\u001b[0m\n\u001b[1;32m 219\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 220\u001b[0m create_graph=create_graph)\n\u001b[0;32m--> 221\u001b[0;31m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 222\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 223\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables)\u001b[0m\n\u001b[1;32m 130\u001b[0m Variable._execution_engine.run_backward(\n\u001b[1;32m 131\u001b[0m \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 132\u001b[0;31m allow_unreachable=True) # allow_unreachable flag\n\u001b[0m\u001b[1;32m 133\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 134\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mKeyboardInterrupt\u001b[0m: "]}]},{"cell_type":"markdown","metadata":{"id":"ahH6fionvuVH"},"source":[""]},{"cell_type":"markdown","metadata":{"id":"HRnjCdBCvrKH"},"source":["takes quite a lot of time to 4h for 20%"]},{"cell_type":"code","metadata":{"id":"aDgnwvokD9Xj"},"source":["# learn.fit_one_cycle(1, 5e-1)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"EJcMQOqgkA8M"},"source":["learn.save(path_data/'GPT2_pl_before_lr_find_bs_sl_2_1024_5e_1')\n","!cp /root/.fastai/data/plwiki/GPT2_pl_before_lr_find_bs_sl_2_1024_5e_1.pth /content/gdrive/MyDrive/fastai"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":265},"id":"TD7HC2sNkDx5","executionInfo":{"status":"ok","timestamp":1616360472147,"user_tz":-60,"elapsed":1601,"user":{"displayName":"Marek Leszczynski","photoUrl":"","userId":"05664549655810509768"}},"outputId":"62e03a97-6bec-4619-e893-68f688aa48da"},"source":["learn.recorder.plot_loss()"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]}]}