asraf-patoary
diff --git a/‎.gitignore
Lines changed: 2 additions & 1 deletion b/‎.gitignore
Lines changed: 2 additions & 1 deletion
diff --git a/‎.travis.yml
Lines changed: 1 addition & 1 deletion b/‎.travis.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md
Lines changed: 50 additions & 11 deletions b/‎README.md
Lines changed: 50 additions & 11 deletions
diff --git a/‎bnltk_downloads/__init__.py
Lines changed: 1 addition & 1 deletion b/‎bnltk_downloads/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎bnltk_downloads/file_downloader.py
Lines changed: 52 additions & 53 deletions b/‎bnltk_downloads/file_downloader.py
Lines changed: 52 additions & 53 deletions
diff --git a/‎pos_tagger/__init__.py
Lines changed: 0 additions & 1 deletion b/‎pos_tagger/__init__.py
Lines changed: 0 additions & 1 deletion
@@ -1,2 +1,3 @@
 .DS_Store
-__pycache__
+__pycache__
+myenv
@@ -1,6 +1,6 @@
 language: python 
 
 python:
-  - 3.6
+  - 3.10
 
 script: pytest 
@@ -1,13 +1,17 @@
 # BNLTK
 [![Build Status](https://travis-ci.org/ashwoolford/bnltk.svg?branch=master)](https://travis-ci.org/ashwoolford/bnltk)
 [![License: MIT](https://img.shields.io/badge/License-MIT-brightgreen.svg)](https://opensource.org/licenses/MIT)
+[![Downloads](https://static.pepy.tech/badge/bnltk)](https://pepy.tech/project/bnltk)
 
 
-BNLTK(Bangla Natural Language Processing Toolkit) is open-source python package for Bengali Natural Language Processing. It includes modules for Tokenization, Stemming, Parts of speech tagging. I'm looking forward to helping form contributors to make this look far better than this.
+
+BNLTK(Bangla Natural Language Processing Toolkit) is a open-source python package for Bengali Natural Language Processing. It includes modules for Tokenization, Stemming, Parts of speech tagging.
 
 ## installation
 
+```
 pip install bnltk 
+```
 
 ## Usage
 
@@ -16,33 +20,68 @@ pip install bnltk
 ```
 from bnltk.tokenize import Tokenizers
 t = Tokenizers()
-print(t.bn_word_tokenizer(' আমার সোনার বাংলা । '))		
+print(t.bn_word_tokenizer('আজ আবহাওয়া খুব ভালো।'))
+# ["আজ", "আবহাওয়া", "খুব", "ভালো", "।"]
 ```
 
 ### Stemmer
 
 ```
 from bnltk.stemmer import BanglaStemmer
 bn_stemmer = BanglaStemmer()
-print(bn_stemmer.stem('খেয়েছিলো'))
+print(bn_stemmer.stem('হেসেছিলেন'))
+# হাসা
 ```
 
 ### Parts of Tagger
 
-For using the Parts of Tagger you need to download some data files as follows:
-
+To use the Parts of Speech Tagger, please download the pretrained model's weights. Our trained model achieves an accuracy of 96%
 ```
 from bnltk.bnltk_downloads import DataFiles
 DataFiles().download()	
 ```
-After successfully downloading the files, then you can use this module.
+After successfully downloading the files, you can use this module as follows:
 
 ```
 from bnltk.pos_tagger import PosTagger
 
-p_tagger = PosTagger()    
-p_tagger.loader()
-sentences = 'দুশ্চিন্তার কোন কারণই নাই'
-print(p_tagger.tagger(sentences))  
-
+p_tagger = PosTagger()
+print(p_tagger.tagger('দুশ্চিন্তার কোন কারণই নাই'))  
+# [('দুশ্চিন্তার', 'NC'), ('কোন', 'JQ'), ('কারণই', 'NC'), ('নাই', 'VM')]
 ```
+
+Description of the POS tag set
+
+| Categories            | Types                 |
+|-----------------------|-----------------------|
+| Noun (N)              | Common (NC)           |
+|                       | Proper (NP)           |
+|                       | Verbal (NV)           |
+|                       | Spatio-temporal (NST) |
+| Pronoun (P)           | Pronominal (PPR)      |
+|                       | Reflexive (PRF)       |
+|                       | Reciprocal (PRC)      |
+|                       | Relative (PRL)        |
+|                       | Wh (PWH)              |
+|                       |                       |
+| Nominal Modifier (J)  | Adjectives (JJ)       |
+|                       | Quantifiers (JQ)      |
+| Demonstratives (D)    | Absolutive (DAB)      |
+|                       | Relative (DRL)        |
+|                       | Wh (DWH)              |
+| Adverb (A)            | Manner (AMN)          |
+|                       | Location (ALC)        |
+| Participle (L)        | Relative (LRL)        |
+|                       | Verbal (LV)           |
+| Postposition (PP)     |                       |
+| Particles (C)         | Coordinating (CCD)    |
+|                       | Subordinating (CSB)   |
+|                       | Classifier (CCL)      |
+|                       | Interjection (CIN)    |
+|                       | Others (CX)           |
+| Punctuations (PU)     |                       |
+| Residual (RD)         | Foreign Word (RDF)    |
+|                       | Symbol (RDS)          |
+|                       | Other (RDX)           |
+
+
@@ -1 +1 @@
-from .file_downloader import DataFiles
+from .file_downloader import DataFiles
@@ -1,71 +1,70 @@
 # Bangla Natural Language Toolkit: DataFilles Downloader
 #
-# Copyright (C) 2019 BNLTK Project
-# Author: Ashraf Hossain <asrafhossain197@gmail.com>
+# Copyright (C) 2019-2024 BNLTK Project
+# Author: Asraf Patoary <asrafhossain197@gmail.com>
 
-from requests import get  # to make GET request
+from requests import get
 import platform
 import getpass
 import os
 import sys
 
 
 class DataFiles:
-	def __init__(self):
-		pass
+    def __init__(self):
+        pass
 
-	def downloader(self, url, file_name, tag):
-		if not os.path.exists(file_name):
-				    # open in binary mode
-		    with open(file_name, "wb") as file:
-		        # get request
-		        print("Downloading....../"+tag)
-		        response = get(url, stream=True)
-		        # write to file
-		        #file.write(response.content)
-		        
-		        
-		        total_length = response.headers.get('content-length')
+    def downloader(self, url, file_name, tag):
+        if not os.path.exists(file_name):
+            with open(file_name, "wb") as file:
+                print("Downloading....../" + tag)
+                response = get(url, stream=True)
+                total_length = response.headers.get("content-length")
 
-		        if total_length is None: # no content length header
-		            file.write(response.content)
-		        else:
-		            dl = 0
-		            total_length = int(total_length)
-		            for data in response.iter_content(chunk_size=4096):
-		                dl += len(data)
-		                file.write(data)
-		                done = int(50 * dl / total_length)
-		                sys.stdout.write("\r[%s%s]" % ('=' * done, ' ' * (50-done)) )    
-		                sys.stdout.flush()
-		else:
-			print(tag + 'is already exists!!')           	
+                if total_length is None:
+                    file.write(response.content)
+                else:
+                    dl = 0
+                    total_length = int(total_length)
+                    for data in response.iter_content(chunk_size=4096):
+                        dl += len(data)
+                        file.write(data)
+                        done = int(50 * dl / total_length)
+                        sys.stdout.write("\r[%s%s]" % ("=" * done, " " * (50 - done)))
+                        sys.stdout.flush()
+        else:
+            print(tag + "is already exists!!")
 
-		     
+    def download(self):
+        file_name = None
+        tag1 = "bn_tagged_mod.txt"
+        tag2 = "pos_tagger.weights.h5"
 
-	def download(self):
-		file_name = None
-		tag1 = 'bn_tagged_mod.txt'
-		tag2 = 'keras_mlp_bangla.h5'
+        print("platform.system() ", platform.system())
 
-		if platform.system() == 'Windows':
-		    file_name = "C:\\Users\\"+getpass.getuser()
-		else:
-		    file_name = "/Users/"+getpass.getuser()
-		#print(file_name)
-		url = 'https://firebasestorage.googleapis.com/v0/b/diu-question.appspot.com/o/nlp_data%2Fbn_tagged_mod.txt?alt=media&token=00f383a3-f913-480b-85c1-971dd8fd6dd9'
-		url2 = 'https://firebasestorage.googleapis.com/v0/b/diu-question.appspot.com/o/nlp_data%2Fkeras_mlp_bangla.h5?alt=media&token=4146c1b0-1e4d-4f9e-8b2f-7e3519106a40'
+        if platform.system() == "Windows":
+            file_name = "C:\\Users\\" + getpass.getuser()
+        elif platform.system() == "Linux":
+            file_name = "/home/" + getpass.getuser()
+        elif platform.system() == "Darwin":
+            file_name = "/Users/" + getpass.getuser()
+        else:
+            raise Exception("Unable to detect OS")
 
+        corpus_url = "https://firebasestorage.googleapis.com/v0/b/diu-question.appspot.com/o/nlp_data%2Fbn_tagged_mod.txt?alt=media&token=00f383a3-f913-480b-85c1-971dd8fd6dd9"
+        saved_weights_url = "https://firebasestorage.googleapis.com/v0/b/diu-question.appspot.com/o/nlp_data%2Fpos_tagger.weights.h5?alt=media&token=2251eedd-dfaf-4572-9bce-b4d293cce980"
 
-		try:  
-		    os.makedirs(file_name+'/bnltk_data/pos_data')
-		except OSError:  
-		    print ("Creation of the directory failed or exists")
-		else:  
-		    pass   
-
-		self.downloader(url, file_name+'/bnltk_data/pos_data/bn_tagged_mod.txt', tag1) 
-		print()
-		self.downloader(url2, file_name+'/bnltk_data/pos_data/keras_mlp_bangla.h5', tag2) 
-		print('Done!')
+        try:
+            os.makedirs(file_name + "/bnltk_data/pos_data")
+        except OSError:
+            print("Creation of the directory failed or exists")
 
+        self.downloader(
+            corpus_url, file_name + "/bnltk_data/pos_data/bn_tagged_mod.txt", tag1
+        )
+        self.downloader(
+            saved_weights_url,
+            file_name + "/bnltk_data/pos_data/pos_tagger.weights.h5",
+            tag2,
+        )
+        print("Done!")
@@ -1,2 +1 @@
 from .bn_pos_tagger import PosTagger
-# h
-Original file line number
+Diff line change
@@ @@ -1,6 +1,6 @@ @@
 language: python
 python:
 -  - 3.6
 +  - 3.10
 script: pytest
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-from .file_downloader import DataFiles`
	`1`	`+from .file_downloader import DataFiles`
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1 @@`
`1`	`1`	`from .bn_pos_tagger import PosTagger`
`2`		`-# h`