From f0d0fad5a9c72d6b9c94ccf368bb735d476cb8f3 Mon Sep 17 00:00:00 2001
From: Vishal Venkat Raghavan <souljaboi943@gmail.com>
Date: Thu, 8 Feb 2024 14:00:25 +0100
Subject: [PATCH 01/78] Citation extractions by Vishal for code review

---
 cellar/cellar_extractor/citations.py | 39 ++++++++++++++++++++++++++++
 cellar/cellar_extractor/para.py      | 39 ++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+)
 create mode 100644 cellar/cellar_extractor/citations.py
 create mode 100644 cellar/cellar_extractor/para.py

diff --git a/cellar/cellar_extractor/citations.py b/cellar/cellar_extractor/citations.py
new file mode 100644
index 0000000..bfe43b1
--- /dev/null
+++ b/cellar/cellar_extractor/citations.py
@@ -0,0 +1,39 @@
+import requests
+from bs4 import BeautifulSoup
+
+def get_citations_from_celex_id(celex)->list:#Get  citations(Celex ID) from a website by providing celex ID in the function upon calling and reutrns a list of citations if exists else it returns an empty list
+    website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/ALL/?uri=CELEX:{celex}").text
+    parser=BeautifulSoup(website,'lxml')
+    div=parser.find_all('div',class_="panel-body")
+    citations=[]
+    for divs in div:
+        if divs!=None:
+            dl=divs.find('dl',class_="NMetadata")
+            if dl!=None:
+                dt=dl.find_all('dt')
+                for dls in dl:
+                    if "cited" in dls.text.lower():
+
+
+                        temp=dls.find_all_next('dd')
+                        for dd in temp:
+                            if dd!=None:
+                                li=dd.find_all('li')
+                                for mentions in li:
+                                    if mentions!=None:
+                                        a=mentions.find('a')
+                                        if a!=None:
+                                            
+                                          citations.append(a.text)
+                                        # print(a.text)
+    # print(citations)  
+    filtered=[]      
+    for splits in citations:
+        if len(splits.split(" "))<2:
+            filtered.append(splits)
+             
+    return filtered                                 
+
+                                
+sample=get_citations_from_celex_id("61962CJ0026")
+print(sample)
\ No newline at end of file
diff --git a/cellar/cellar_extractor/para.py b/cellar/cellar_extractor/para.py
new file mode 100644
index 0000000..5324f28
--- /dev/null
+++ b/cellar/cellar_extractor/para.py
@@ -0,0 +1,39 @@
+import requests
+from bs4 import BeautifulSoup
+
+def get_para_citations_from_celex_id(celex)->list:#Get paragraph citations from a website by providing celex ID in the function upon calling and reutrns a list of citations if exists else it returns an empty list
+    website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/ALL/?uri=CELEX:{celex}").text
+    parser=BeautifulSoup(website,'lxml')
+    div=parser.find_all('div',class_="panel-body")
+    citations=[]
+    for divs in div:
+        if divs!=None:
+            dl=divs.find('dl',class_="NMetadata")
+            if dl!=None:
+                dt=dl.find_all('dt')
+                for dls in dl:
+                    if "cited" in dls.text.lower():
+
+
+                        temp=dls.find_all_next('dd')
+                        for dd in temp:
+                            if dd!=None:
+                                li=dd.find_all('li')
+                                for mentions in li:
+                                    if mentions!=None:
+                                        if "p" in mentions.text.lower().split(" "):
+                                        
+                                       
+                                            # print(mentions.text)    
+                                            citations.append(mentions.text)
+                                        # print(a.text)
+    # print(citations)  
+    filtered=[]      
+    for splits in citations:
+       
+        filtered.append(splits.split(":")[1])
+             
+    return filtered                               
+    
+sample=get_para_citations_from_celex_id("61962CJ0026")
+print(sample)
\ No newline at end of file

From 7868c30a4c0b6db610129db65ea2acddff4db166 Mon Sep 17 00:00:00 2001
From: Vishal Venkat Raghavan <souljaboi943@gmail.com>
Date: Thu, 8 Feb 2024 14:05:46 +0100
Subject: [PATCH 02/78] cellar

---
 cellar/cellar_extractor/citations.py | 39 ----------------------------
 cellar/cellar_extractor/para.py      | 39 ----------------------------
 2 files changed, 78 deletions(-)
 delete mode 100644 cellar/cellar_extractor/citations.py
 delete mode 100644 cellar/cellar_extractor/para.py

diff --git a/cellar/cellar_extractor/citations.py b/cellar/cellar_extractor/citations.py
deleted file mode 100644
index bfe43b1..0000000
--- a/cellar/cellar_extractor/citations.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import requests
-from bs4 import BeautifulSoup
-
-def get_citations_from_celex_id(celex)->list:#Get  citations(Celex ID) from a website by providing celex ID in the function upon calling and reutrns a list of citations if exists else it returns an empty list
-    website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/ALL/?uri=CELEX:{celex}").text
-    parser=BeautifulSoup(website,'lxml')
-    div=parser.find_all('div',class_="panel-body")
-    citations=[]
-    for divs in div:
-        if divs!=None:
-            dl=divs.find('dl',class_="NMetadata")
-            if dl!=None:
-                dt=dl.find_all('dt')
-                for dls in dl:
-                    if "cited" in dls.text.lower():
-
-
-                        temp=dls.find_all_next('dd')
-                        for dd in temp:
-                            if dd!=None:
-                                li=dd.find_all('li')
-                                for mentions in li:
-                                    if mentions!=None:
-                                        a=mentions.find('a')
-                                        if a!=None:
-                                            
-                                          citations.append(a.text)
-                                        # print(a.text)
-    # print(citations)  
-    filtered=[]      
-    for splits in citations:
-        if len(splits.split(" "))<2:
-            filtered.append(splits)
-             
-    return filtered                                 
-
-                                
-sample=get_citations_from_celex_id("61962CJ0026")
-print(sample)
\ No newline at end of file
diff --git a/cellar/cellar_extractor/para.py b/cellar/cellar_extractor/para.py
deleted file mode 100644
index 5324f28..0000000
--- a/cellar/cellar_extractor/para.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import requests
-from bs4 import BeautifulSoup
-
-def get_para_citations_from_celex_id(celex)->list:#Get paragraph citations from a website by providing celex ID in the function upon calling and reutrns a list of citations if exists else it returns an empty list
-    website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/ALL/?uri=CELEX:{celex}").text
-    parser=BeautifulSoup(website,'lxml')
-    div=parser.find_all('div',class_="panel-body")
-    citations=[]
-    for divs in div:
-        if divs!=None:
-            dl=divs.find('dl',class_="NMetadata")
-            if dl!=None:
-                dt=dl.find_all('dt')
-                for dls in dl:
-                    if "cited" in dls.text.lower():
-
-
-                        temp=dls.find_all_next('dd')
-                        for dd in temp:
-                            if dd!=None:
-                                li=dd.find_all('li')
-                                for mentions in li:
-                                    if mentions!=None:
-                                        if "p" in mentions.text.lower().split(" "):
-                                        
-                                       
-                                            # print(mentions.text)    
-                                            citations.append(mentions.text)
-                                        # print(a.text)
-    # print(citations)  
-    filtered=[]      
-    for splits in citations:
-       
-        filtered.append(splits.split(":")[1])
-             
-    return filtered                               
-    
-sample=get_para_citations_from_celex_id("61962CJ0026")
-print(sample)
\ No newline at end of file

From 905d223a8e32f1437415a655ac9ab66f77022da6 Mon Sep 17 00:00:00 2001
From: Vishal Venkat Raghavan <souljaboi943@gmail.com>
Date: Tue, 27 Feb 2024 13:46:27 +0100
Subject: [PATCH 03/78] Updated code for extraction

---
 .../cellar_extractor/operative_extraction.py  | 270 ++++++++++++++++++
 cellar/cellar_extractor/output.py             |  50 ++++
 cellar/cellar_extractor/test_output.py        |  14 +
 3 files changed, 334 insertions(+)
 create mode 100644 cellar/cellar_extractor/operative_extraction.py
 create mode 100644 cellar/cellar_extractor/output.py
 create mode 100644 cellar/cellar_extractor/test_output.py

diff --git a/cellar/cellar_extractor/operative_extraction.py b/cellar/cellar_extractor/operative_extraction.py
new file mode 100644
index 0000000..c2247e2
--- /dev/null
+++ b/cellar/cellar_extractor/operative_extraction.py
@@ -0,0 +1,270 @@
+import requests
+from bs4 import BeautifulSoup
+import unittest
+# class ECLI():
+#     ecli:str
+#     def __init__(self,ecli):
+#         self.ecli=ecli
+class Analyzer():
+    celex:str
+    def __init__(self,celex):
+        self.celex=celex
+      
+
+    def html_page_structure_one(self)->list:
+        website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        parser=BeautifulSoup(website,'lxml')
+        div=parser.find_all('table')
+        one=[]
+        for divs in div:
+            table=divs.find('table')
+            if  table!=None:
+                p=table.find_all('p',class_="coj-normal")
+                for x in p:
+                    span=x.find_all('span',class_="coj-bold")
+                    for y in span:
+                        if x!=None and y!=None:
+                    # print(span.text)
+                            one.append(y.text)
+        return one            
+                
+
+    def html_page_structure_two(self)->list:
+        website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        parser=BeautifulSoup(website,'lxml')
+        p=parser.find_all('p')
+        two=[]
+        for para in p:
+            # print(para)
+            span=para.find('span')
+            if span!=None:
+                # print(span.text)
+                if "operative" in span.text.lower():
+                    normal=span.find_all_next('p',class_="normal")
+                    for op in normal:
+                        # print(op.text)
+                        two.append(op.text)
+        return two          
+        
+    def structure_three(self)->list:
+        website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        parser=BeautifulSoup(website,'lxml')
+        table=parser.find_all('table')
+        three=[]
+        for tables in table:
+            interior=tables.find_all('table')
+            for interiors in interior:
+                if interiors!=None:
+                    p=interiors.find_all('p',class_="coj-normal")
+                    for x in p:
+                        span=x.find_all('span',class_="coj-bold")
+                        for y in span:
+                            if x!=None and y!=None:
+                    # print(span.text)
+                                three.append(y.text)
+        return three            
+
+
+        
+    def structure_four(self)->list:
+        website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        parser=BeautifulSoup(website,'lxml')
+        p=parser.find_all('p')
+        four=[]
+        for para in p:
+            # print(para)
+            span=para.find('span')
+            if span!=None:
+                # print(span.text)
+                if "operative" in span.text.lower():
+                    normal=span.find_all_next('table')
+                    for op in normal:
+                        tbody=op.find('tbody')
+                        new_p=tbody.find_all('p',class_="oj-normal")
+                        
+
+                        for subsequent in new_p:
+                            if subsequent!=None:
+                                # print(subsequent.text)
+                                four.append(subsequent.text)
+
+                        
+        return four     
+        
+    def structure_five(self)->list:
+        website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        parser=BeautifulSoup(website,'lxml')
+        p=parser.find_all('p')
+        five=[]
+        for para in p:
+            # print(para)
+            span=para.find('span')
+            if span!=None:
+                # print(span.text)
+                if "operative" in span.text.lower():
+                    normal=span.find_all_next('table')
+                    for op in normal:
+                        tbody=op.find('tbody')
+                        new_p=tbody.find_all('p',class_="normal")
+                        
+
+                        for subsequent in new_p:
+                            if subsequent!=None:
+                                # print(subsequent.text)
+                                five.append(subsequent.text)
+
+                        
+        return five 
+    def structure_six(self)->list:
+        website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        parser=BeautifulSoup(website,'lxml')
+        div=parser.find_all('h2')
+        six=[]
+        for h2 in div:
+            # print(h2.text)
+            if h2.text=="Operative part":
+                operatives=h2.find_all_next('p')
+                for operative in operatives:
+                    # print(operative.text)
+                    six.append(operative.text)
+        return six     
+    def structure_seven(self)->list:
+        website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        parser=BeautifulSoup(website,'lxml')
+        div=parser.find_all('table')
+        seven=[]
+        for divs in div:
+            table=divs.find_all('tbody')
+            for tables in table:
+                if  tables!=None:
+                    p=tables.find_all('tr')
+                    for x in p:
+                        if x!=None:
+                                td=x.find_all('td')
+                                for y in td:
+                                    if y!=None:
+                                        p=y.find_all('p',class_="normal")
+                                        for all in p:
+                                            if all!=None:
+                                                    span=all.find_all('span',class_="bold")
+                                                    for spans in span:
+                                                        #  print(spans.text)
+                                                        seven.append(spans.text)
+        return seven   
+    def structure_eight(self)->list:  
+        website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        parser=BeautifulSoup(website,'lxml')
+        tbody=parser.find_all('tbody')
+        eight=[]
+        for all in tbody:
+            if all!=None:
+                tr=all.find_all('tr')
+                for trs in tr:
+                    if trs!=None:
+                        # print(trs)
+
+                        p=parser.find_all('p',class_="normal")
+                        for paras in p:
+                            if paras!=None:
+                                if "on those grounds" in paras.text.lower():
+
+                                    span=paras.find_all_next('span',class_="bold")
+                                    for spans in span:
+                                        if spans!=None:
+                                            eight.append(spans.text)
+                                            # print(spans.text)
+
+        return eight   
+    def structure_nine(self)->list:
+        website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        parser=BeautifulSoup(website,'lxml')
+        nine=[]
+        div=parser.find_all('p')
+        for divs in div:
+                if divs!=None:
+                        if "on those grounds" in divs.text.lower():
+                                b=divs.find_all_next('b')
+                                for bolds in b:
+                                        # print(bolds.text)
+                                        nine.append(bolds.text)
+        return nine 
+    def structure_eleven(self)->list:
+        website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        parser=BeautifulSoup(website,'lxml')
+        bold = parser.find_all('b')
+
+        eleven=[]
+        
+        # print(website)
+
+        for b in bold:
+            if b!=None:
+                if "operative part" in b.text.lower():
+                    table=b.find_all_next('p')
+                    for tables in table:
+                        if tables!=None:
+                            eleven.append(tables.text)
+                            # print(tables.text)
+                    
+                   
+        
+        return eleven
+    def structure_ten(self):
+        website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        parser=BeautifulSoup(website,'lxml')
+        appender=[]
+        for string in parser.stripped_strings:
+            # print(string)
+            appender.append(string)
+        # print(appender)
+
+        found = False
+        afterGrounds = []
+
+        for x in appender: 
+
+            if "on those grounds" in x.lower():
+                found = True
+                # print("True")
+
+            if found:
+                if len(x.split(" "))>3:
+                    afterGrounds.append(x)
+        return afterGrounds 
+    def __call__(self)->list:
+        one:list
+        one=self.html_page_structure_one()
+        if len(one)==0 or len(one)=="\n":
+            one=self.html_page_structure_two()
+            if len(one)==0 or one[0]=="\n":
+                one=self.structure_three()
+                if len(one)==0 or one[0]=="\n":
+                    one=self.structure_four()
+                    if len(one)==0 or one[0]=="\n":
+                        one=self.structure_five()
+                        if len(one)==0 or one[0]=="\n":
+                            one=self.structure_six()
+                            if len(one)==0 or one[0]=="\n":
+                                one=self.structure_seven()
+                                if len(one)==0 or one[0]=="\n":
+                                    one=self.structure_eight()
+                                    if len(one)==0 or one[0]=="\n":
+                                        one=self.structure_nine()
+                                        if len(one)==0 or one[0]=="\n":
+                                            one=self.structure_ten()
+                                            if len(one)==0 or one[0]=="\n":
+                                                one=self.structure_eleven()
+        return one               
+                        
+
+
+        
+        
+        
+instance=Analyzer("61980CJ0027")
+x=instance()   
+if x!=None:
+    print(x)
+
+
+
diff --git a/cellar/cellar_extractor/output.py b/cellar/cellar_extractor/output.py
new file mode 100644
index 0000000..2d5e07e
--- /dev/null
+++ b/cellar/cellar_extractor/output.py
@@ -0,0 +1,50 @@
+
+# from typing import Any
+from operative_extractions import Analyzer
+import csv
+import json
+
+class Writing():
+    
+    instance:str
+    x:str
+    def __init__(self, celex:str):
+        self.celex = celex
+        self.instance = Analyzer(self.celex)
+        self.x = self.instance()
+      
+
+    def to_csv(self):
+        file=open("csv/output.csv","a+")
+        writer=csv.writer(file)
+       
+        if self.x!=None:
+            writer.writerow([self.celex,self.x])
+        
+    def to_json(self):
+        if self.x!=None:
+            data={'Celex':self.celex,"Operative part":self.x}
+            file=open('json/data.json', 'a+')
+            json.dump(data,file)
+            file.close()
+    def to_txt(self):
+      
+     
+        if self.x!=None:
+            file=open(f"txt/{self.celex}.txt","a")
+            for w in self.x:
+                
+                file.write(w+"\n")
+            file.close()
+    def __call__(self):
+        self.to_csv()
+        # self.to_json()
+        # self.to_txt()        
+
+         
+
+        
+
+# example=Writing("62018CA0390")
+# example()
+
diff --git a/cellar/cellar_extractor/test_output.py b/cellar/cellar_extractor/test_output.py
new file mode 100644
index 0000000..7e836bd
--- /dev/null
+++ b/cellar/cellar_extractor/test_output.py
@@ -0,0 +1,14 @@
+import csv
+file=open("gijs_202310_node_list.tsv","r")
+reader=csv.reader(file)
+from output import Writing
+testing=[]
+for row in reader:
+    for rows in row:
+        if "Id" not in rows:
+            testing.append(rows.split("\t")[0])
+         
+for all in testing:
+    instance=Writing(all)
+    instance()
+    # print(all)
\ No newline at end of file

From 36cc2394da9aae5b43d73f4bde45ee6ce0a8c5ae Mon Sep 17 00:00:00 2001
From: Vishal Venkat Raghavan <souljaboi943@gmail.com>
Date: Wed, 6 Mar 2024 11:08:21 +0100
Subject: [PATCH 04/78] Updated code

---
 cellar/cellar_extractor/Testing_file.py       |  50 ++++
 .../cellar_extractor/operative_extraction.py  | 243 ++++++++++++++----
 cellar/cellar_extractor/output.py             |  50 ----
 cellar/cellar_extractor/test_output.py        |  14 -
 4 files changed, 236 insertions(+), 121 deletions(-)
 delete mode 100644 cellar/cellar_extractor/output.py
 delete mode 100644 cellar/cellar_extractor/test_output.py

diff --git a/cellar/cellar_extractor/Testing_file.py b/cellar/cellar_extractor/Testing_file.py
index d2d81b3..a8a0805 100644
--- a/cellar/cellar_extractor/Testing_file.py
+++ b/cellar/cellar_extractor/Testing_file.py
@@ -14,6 +14,56 @@
 from eurlex_scraping import *
 from cellar import *
 from sparql import *
+import unittest
+from operative_extraction import Analyzer
+# from test import testing
+import random
+import csv
+file=open("gijs_202310_node_list.tsv","r")
+reader=csv.reader(file)
+no_of_test_cases=30
+testing=[]
+for row in reader:
+    for rows in row:
+        if "Id" not in rows:
+            testing.append(rows.split("\t")[0])
+class Test(unittest.TestCase):
+    """
+    class for unittesing operative part , it checks whether the list returns null value or has some value.
+    """
+    ids:list
+    def __init__(self,ids):
+        self.ids=ids
+
+    def test_for_celex_id(self):
+        """
+        Main function which runs the unittest Testcase .
+        """
+        count_fail:int
+        count_pass=0
+        for id in self.ids:
+            test_output=Analyzer(id)
+            test_instance=test_output()
+         
+            # self.assertFalse(len(test_instance)<=1)
+          
+            try:
+                self.assertTrue(test_instance[0],f"{id} is  not empty and has  operative part") 
+                count_pass+=1 
+                print(f"{id} --->  PASSED.")
+            except:
+                print(f"{id} --->  FAILED.") 
+        print(f"Passed {count_pass}/{len(self.ids)} times") 
+        # print(len(self.ids)-count,"were passed successfully")  
+
+new_list=[]
+for w in range(no_of_test_cases):
+    randomized=random.randint(0,len(testing)-1)
+    new_list.append(testing[randomized])
+
+
+instance=Test(new_list)
+instance.test_for_celex_id()   
 
 
diff --git a/cellar/cellar_extractor/operative_extraction.py b/cellar/cellar_extractor/operative_extraction.py
index c2247e2..72f471c 100644
--- a/cellar/cellar_extractor/operative_extraction.py
+++ b/cellar/cellar_extractor/operative_extraction.py
@@ -1,52 +1,66 @@
 import requests
 from bs4 import BeautifulSoup
 import unittest
-# class ECLI():
-#     ecli:str
-#     def __init__(self,ecli):
-#         self.ecli=ecli
+from operative_extraction import Analyzer
+import csv
+import json
 class Analyzer():
-    celex:str
-    def __init__(self,celex):
+    """
+    This class returns a list of the operative part for a given celex id . Celex id is initialized through a constructor.
+    """
+    celex:str # declare celex as a string
+    def __init__(self,celex):# Initialize Celex id as a constructor , passed when calling the class
         self.celex=celex
       
 
     def html_page_structure_one(self)->list:
+        """
+         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a nested
+         table structure . The relevant text lies inside the coj-bold class of the span tag.
+        """
         website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
         parser=BeautifulSoup(website,'lxml')
-        div=parser.find_all('table')
+        div=parser.find_all('table') # Find all tables tag from the website
         one=[]
         for divs in div:
-            table=divs.find('table')
+            table=divs.find('table') # Find each nested table within the table
             if  table!=None:
-                p=table.find_all('p',class_="coj-normal")
+                p=table.find_all('p',class_="coj-normal") # Find all p under the nested table with the coj-normal class
                 for x in p:
-                    span=x.find_all('span',class_="coj-bold")
+                    span=x.find_all('span',class_="coj-bold")# Span class of coj-bold under the p tag
                     for y in span:
                         if x!=None and y!=None:
-                    # print(span.text)
-                            one.append(y.text)
+                 
+                            one.append(y.text)#append text from span onto a list
         return one            
                 
 
     def html_page_structure_two(self)->list:
+        """
+         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a paragraph
+         (p) structure . The relevant text lies inside the normal class of the p tag which comes after the keyword operative of the previous span tag.
+        """
         website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
         parser=BeautifulSoup(website,'lxml')
         p=parser.find_all('p')
         two=[]
         for para in p:
-            # print(para)
+        
             span=para.find('span')
             if span!=None:
-                # print(span.text)
+            
                 if "operative" in span.text.lower():
                     normal=span.find_all_next('p',class_="normal")
                     for op in normal:
-                        # print(op.text)
+                    
                         two.append(op.text)
         return two          
         
     def structure_three(self)->list:
+        """
+         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a nested
+         table structure . The relevant text lies inside the coj-bold class of the span tag.
+        """
         website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
         parser=BeautifulSoup(website,'lxml')
         table=parser.find_all('table')
@@ -60,22 +74,26 @@ def structure_three(self)->list:
                         span=x.find_all('span',class_="coj-bold")
                         for y in span:
                             if x!=None and y!=None:
-                    # print(span.text)
+                
                                 three.append(y.text)
         return three            
 
 
     def structure_four(self)->list:
+        """
+         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a paragraph
+         (p) structure . The relevant text lies inside the p  tag which comes after the keyword operative of the previous span tag.
+        """
         website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
         parser=BeautifulSoup(website,'lxml')
         p=parser.find_all('p')
         four=[]
         for para in p:
-            # print(para)
+         
             span=para.find('span')
             if span!=None:
-                # print(span.text)
+         
                 if "operative" in span.text.lower():
                     normal=span.find_all_next('table')
                     for op in normal:
@@ -85,22 +103,27 @@ def structure_four(self)->list:
 
                         for subsequent in new_p:
                             if subsequent!=None:
-                                # print(subsequent.text)
+                        
                                 four.append(subsequent.text)
 
                         
         return four     
         
     def structure_five(self)->list:
+        
+        """
+         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a paragraph
+         (p) structure . The relevant text lies inside the normal class of the p tag which comes after the keyword operative of the previous span tag.
+        """
         website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
         parser=BeautifulSoup(website,'lxml')
         p=parser.find_all('p')
         five=[]
         for para in p:
-            # print(para)
+      
             span=para.find('span')
             if span!=None:
-                # print(span.text)
+         
                 if "operative" in span.text.lower():
                     normal=span.find_all_next('table')
                     for op in normal:
@@ -110,12 +133,17 @@ def structure_five(self)->list:
 
                         for subsequent in new_p:
                             if subsequent!=None:
-                                # print(subsequent.text)
+                               
                                 five.append(subsequent.text)
 
                         
         return five 
     def structure_six(self)->list:
+        """
+         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a h2
+         (header) structure . The relevant text lies inside thee p tag which comes after the keyword operative part of the respective h2  tag.
+         """
+        
         website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
         parser=BeautifulSoup(website,'lxml')
         div=parser.find_all('h2')
@@ -125,35 +153,48 @@ def structure_six(self)->list:
             if h2.text=="Operative part":
                 operatives=h2.find_all_next('p')
                 for operative in operatives:
-                    # print(operative.text)
+                    
                     six.append(operative.text)
         return six     
     def structure_seven(self)->list:
+        """
+         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a table
+         (table) structure . The relevant text lies inside the span tag which comes after the p tag , with the class name=normal.
+        """
         website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
         parser=BeautifulSoup(website,'lxml')
         div=parser.find_all('table')
         seven=[]
         for divs in div:
+            # find tbody within the table
             table=divs.find_all('tbody')
             for tables in table:
                 if  tables!=None:
+                    # find tr within the tbody
                     p=tables.find_all('tr')
                     for x in p:
                         if x!=None:
+                                # find td within the tr
                                 td=x.find_all('td')
                                 for y in td:
                                     if y!=None:
                                         p=y.find_all('p',class_="normal")
                                         for all in p:
                                             if all!=None:
+                                                    # find operative part within the span
                                                     span=all.find_all('span',class_="bold")
                                                     for spans in span:
-                                                        #  print(spans.text)
+                                                        # APpend it into a list and return the list when the function is called
                                                         seven.append(spans.text)
         return seven   
     def structure_eight(self)->list:  
+        """
+         This function retreives operative part from documents of the respected celex id's .The text is extracted from the span tag nested inside 
+         the tbody tag.Returns a list as output.
+        """
         website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
         parser=BeautifulSoup(website,'lxml')
+        
         tbody=parser.find_all('tbody')
         eight=[]
         for all in tbody:
@@ -161,7 +202,7 @@ def structure_eight(self)->list:
                 tr=all.find_all('tr')
                 for trs in tr:
                     if trs!=None:
-                        # print(trs)
+                       
 
                         p=parser.find_all('p',class_="normal")
                         for paras in p:
@@ -172,10 +213,14 @@ def structure_eight(self)->list:
                                     for spans in span:
                                         if spans!=None:
                                             eight.append(spans.text)
-                                            # print(spans.text)
+                                           
 
         return eight   
     def structure_nine(self)->list:
+        """
+         This function retreives operative part from documents of the respected celex id's .The operative part is under the bold(b)
+         tag after the p tag where the keywords "on those grounds" exist. 
+        """
         website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
         parser=BeautifulSoup(website,'lxml')
         nine=[]
@@ -189,13 +234,17 @@ def structure_nine(self)->list:
                                         nine.append(bolds.text)
         return nine 
     def structure_eleven(self)->list:
+        """
+         This function retreives operative part from documents of the respected celex id's .The operative part is under the paragraph(p)
+         tag after the b tag where the keywords "operative part" exist. 
+        """
         website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
         parser=BeautifulSoup(website,'lxml')
         bold = parser.find_all('b')
 
         eleven=[]
         
-        # print(website)
+    
 
         for b in bold:
             if b!=None:
@@ -204,19 +253,24 @@ def structure_eleven(self)->list:
                     for tables in table:
                         if tables!=None:
                             eleven.append(tables.text)
-                            # print(tables.text)
+                       
                     
                    
         return eleven
     def structure_ten(self):
+        """
+         This function retreives operative part from documents of the respected celex id's Since the ocntent is preloaded using js/client s
+         server side functions , the text from the current page is retrieved and the operative part is scraped after the occurence of the phrase
+         "On those grounds".
+        """
         website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
         parser=BeautifulSoup(website,'lxml')
         appender=[]
         for string in parser.stripped_strings:
-            # print(string)
+       
             appender.append(string)
-        # print(appender)
+
 
         found = False
         afterGrounds = []
@@ -225,46 +279,121 @@ def structure_ten(self):
 
             if "on those grounds" in x.lower():
                 found = True
-                # print("True")
+            
 
             if found:
                 if len(x.split(" "))>3:
                     afterGrounds.append(x)
         return afterGrounds 
     def __call__(self)->list:
+        """
+        This inbuilt __call__ function loops through all the methods in the class `Analyzer` and returns  the list , with values of the operative part .
+        """
+       
+        container=[self.html_page_structure_one(),self.html_page_structure_two(),self.structure_three(),self.structure_four(),self.structure_five(),
+                   self.structure_six(),self.structure_seven(),self.structure_eight(),self.structure_nine(),self.structure_ten(),self.structure_eleven()]
+        
+       
+      
         one:list
-        one=self.html_page_structure_one()
-        if len(one)==0 or len(one)=="\n":
-            one=self.html_page_structure_two()
-            if len(one)==0 or one[0]=="\n":
-                one=self.structure_three()
-                if len(one)==0 or one[0]=="\n":
-                    one=self.structure_four()
-                    if len(one)==0 or one[0]=="\n":
-                        one=self.structure_five()
-                        if len(one)==0 or one[0]=="\n":
-                            one=self.structure_six()
-                            if len(one)==0 or one[0]=="\n":
-                                one=self.structure_seven()
-                                if len(one)==0 or one[0]=="\n":
-                                    one=self.structure_eight()
-                                    if len(one)==0 or one[0]=="\n":
-                                        one=self.structure_nine()
-                                        if len(one)==0 or one[0]=="\n":
-                                            one=self.structure_ten()
-                                            if len(one)==0 or one[0]=="\n":
-                                                one=self.structure_eleven()
-        return one               
+        for funcs in range(len(container)):
+          
+            one=container[funcs]
+          
+            if one:
+                if (len(one)!=0 or one[0]!="\n"):
+                    print("here")
+                    return one
+
+
+          
+
+                
+            
+        # one=self.html_page_structure_one()
+        # if len(one)==0 or len(one)=="\n":
+        #     one=self.html_page_structure_two()
+        #     if len(one)==0 or one[0]=="\n":
+        #         one=self.structure_three()
+        #         if len(one)==0 or one[0]=="\n":
+        #             one=self.structure_four()
+        #             if len(one)==0 or one[0]=="\n":
+        #                 one=self.structure_five()
+        #                 if len(one)==0 or one[0]=="\n":
+        #                     one=self.structure_six()
+        #                     if len(one)==0 or one[0]=="\n":
+        #                         one=self.structure_seven()
+        #                         if len(one)==0 or one[0]=="\n":
+        #                             one=self.structure_eight()
+        #                             if len(one)==0 or one[0]=="\n":
+        #                                 one=self.structure_nine()
+        #                                 if len(one)==0 or one[0]=="\n":
+        #                                     one=self.structure_ten()
+        #                                     if len(one)==0 or one[0]=="\n":
+        #                                         one=self.structure_eleven()
+                     
                         
 
-instance=Analyzer("61980CJ0027")
-x=instance()   
-if x!=None:
-    print(x)
+# instance=Analyzer("61962CJ0026")
+# x=instance()   
+# if x!=None:
+#     print(x)
+
+
+class Writing():
+    """
+    This class has different methods , for the purpose of writing the operative part into different file formats.(Csv,txt,json)
+    """
+    
+    instance:str
+    x:str
+    parameter:str
+    def __init__(self, celex:str):
+        self.celex = celex
+        self.instance = Analyzer(self.celex)
+        self.x = self.instance()
+
+      
+
+    def to_csv(self):
+        file=open("csv/output.csv","a+")
+        writer=csv.writer(file)
+       
+        if self.x!=None:
+            writer.writerow([self.celex,self.x])
+        
+    def to_json(self):
+        if self.x!=None:
+            data={'Celex':self.celex,"Operative part":self.x}
+            file=open('json/data.json', 'a+')
+            json.dump(data,file)
+            file.close()
+    def to_txt(self):
+      
+     
+        if self.x!=None:
+            file=open(f"txt/{self.celex}.txt","a")
+            for w in self.x:
+                
+                file.write(w+"\n")
+            file.close()
+#Sample code for reading celex id's froma tsv file
 
+file=open("gijs_202310_node_list.tsv","r")
+reader=csv.reader(file)
+from output import Writing
+testing=[]
+for row in reader:
+    for rows in row:
+        if "Id" not in rows:
+            testing.append(rows.split("\t")[0])
+for all in testing:
+    instance=Writing(all)
+    instance.to_csv()
+    print(all)                        
 
 
diff --git a/cellar/cellar_extractor/output.py b/cellar/cellar_extractor/output.py
deleted file mode 100644
index 2d5e07e..0000000
--- a/cellar/cellar_extractor/output.py
+++ /dev/null
@@ -1,50 +0,0 @@
-
-# from typing import Any
-from operative_extractions import Analyzer
-import csv
-import json
-
-class Writing():
-    
-    instance:str
-    x:str
-    def __init__(self, celex:str):
-        self.celex = celex
-        self.instance = Analyzer(self.celex)
-        self.x = self.instance()
-      
-
-    def to_csv(self):
-        file=open("csv/output.csv","a+")
-        writer=csv.writer(file)
-       
-        if self.x!=None:
-            writer.writerow([self.celex,self.x])
-        
-    def to_json(self):
-        if self.x!=None:
-            data={'Celex':self.celex,"Operative part":self.x}
-            file=open('json/data.json', 'a+')
-            json.dump(data,file)
-            file.close()
-    def to_txt(self):
-      
-     
-        if self.x!=None:
-            file=open(f"txt/{self.celex}.txt","a")
-            for w in self.x:
-                
-                file.write(w+"\n")
-            file.close()
-    def __call__(self):
-        self.to_csv()
-        # self.to_json()
-        # self.to_txt()        
-
-         
-
-        
-
-# example=Writing("62018CA0390")
-# example()
-
diff --git a/cellar/cellar_extractor/test_output.py b/cellar/cellar_extractor/test_output.py
deleted file mode 100644
index 7e836bd..0000000
--- a/cellar/cellar_extractor/test_output.py
+++ /dev/null
@@ -1,14 +0,0 @@
-import csv
-file=open("gijs_202310_node_list.tsv","r")
-reader=csv.reader(file)
-from output import Writing
-testing=[]
-for row in reader:
-    for rows in row:
-        if "Id" not in rows:
-            testing.append(rows.split("\t")[0])
-         
-for all in testing:
-    instance=Writing(all)
-    instance()
-    # print(all)
\ No newline at end of file

From c42ca982a7cdcd43e32704be8c19a989e21fc78a Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Thu, 21 Mar 2024 12:48:33 +0400
Subject: [PATCH 05/78] Update Testing_file.py

---
 cellar/cellar_extractor/Testing_file.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/cellar/cellar_extractor/Testing_file.py b/cellar/cellar_extractor/Testing_file.py
index a8a0805..f743aee 100644
--- a/cellar/cellar_extractor/Testing_file.py
+++ b/cellar/cellar_extractor/Testing_file.py
@@ -61,19 +61,20 @@ def test_for_celex_id(self):
     randomized=random.randint(0,len(testing)-1)
     new_list.append(testing[randomized])
 
-
-instance=Test(new_list)
-instance.test_for_celex_id()   
+ 
 
 
 if __name__ == '__main__':
    celex = "62004CJ0292"
+    
+   instance=Test([celex])
+   instance.test_for_celex_id()  
    site = get_entire_page(celex)
    text = get_full_text_from_html(site)
    cits = get_citations_with_extra_info(text)
    print(cits)
    data,d2 = get_cellar_extra(sd='2023-01-01',max_ecli=100,save_file='n')
    nodes_edges = get_nodes_and_edges_lists(data)
-   pass
\ No newline at end of file
+   pass

From 7cb83e4c55e0b2b1f3fedcec4b3292043c6d05ee Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Thu, 21 Mar 2024 12:49:38 +0400
Subject: [PATCH 06/78] Delete cellar/cellar_extractor/operative_extraction.py

---
 .../cellar_extractor/operative_extraction.py  | 399 ------------------
 1 file changed, 399 deletions(-)
 delete mode 100644 cellar/cellar_extractor/operative_extraction.py

diff --git a/cellar/cellar_extractor/operative_extraction.py b/cellar/cellar_extractor/operative_extraction.py
deleted file mode 100644
index 72f471c..0000000
--- a/cellar/cellar_extractor/operative_extraction.py
+++ /dev/null
@@ -1,399 +0,0 @@
-import requests
-from bs4 import BeautifulSoup
-import unittest
-from operative_extraction import Analyzer
-import csv
-import json
-class Analyzer():
-    """
-    This class returns a list of the operative part for a given celex id . Celex id is initialized through a constructor.
-    """
-    celex:str # declare celex as a string
-    def __init__(self,celex):# Initialize Celex id as a constructor , passed when calling the class
-        self.celex=celex
-      
-
-    def html_page_structure_one(self)->list:
-        """
-         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a nested
-         table structure . The relevant text lies inside the coj-bold class of the span tag.
-        """
-        website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
-        parser=BeautifulSoup(website,'lxml')
-        div=parser.find_all('table') # Find all tables tag from the website
-        one=[]
-        for divs in div:
-            table=divs.find('table') # Find each nested table within the table
-            if  table!=None:
-                p=table.find_all('p',class_="coj-normal") # Find all p under the nested table with the coj-normal class
-                for x in p:
-                    span=x.find_all('span',class_="coj-bold")# Span class of coj-bold under the p tag
-                    for y in span:
-                        if x!=None and y!=None:
-                 
-                            one.append(y.text)#append text from span onto a list
-        return one            
-                
-
-    def html_page_structure_two(self)->list:
-        """
-         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a paragraph
-         (p) structure . The relevant text lies inside the normal class of the p tag which comes after the keyword operative of the previous span tag.
-        """
-        website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
-        parser=BeautifulSoup(website,'lxml')
-        p=parser.find_all('p')
-        two=[]
-        for para in p:
-        
-            span=para.find('span')
-            if span!=None:
-            
-                if "operative" in span.text.lower():
-                    normal=span.find_all_next('p',class_="normal")
-                    for op in normal:
-                    
-                        two.append(op.text)
-        return two          
-        
-    def structure_three(self)->list:
-        """
-         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a nested
-         table structure . The relevant text lies inside the coj-bold class of the span tag.
-        """
-        website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
-        parser=BeautifulSoup(website,'lxml')
-        table=parser.find_all('table')
-        three=[]
-        for tables in table:
-            interior=tables.find_all('table')
-            for interiors in interior:
-                if interiors!=None:
-                    p=interiors.find_all('p',class_="coj-normal")
-                    for x in p:
-                        span=x.find_all('span',class_="coj-bold")
-                        for y in span:
-                            if x!=None and y!=None:
-                
-                                three.append(y.text)
-        return three            
-
-
-        
-    def structure_four(self)->list:
-        """
-         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a paragraph
-         (p) structure . The relevant text lies inside the p  tag which comes after the keyword operative of the previous span tag.
-        """
-        website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
-        parser=BeautifulSoup(website,'lxml')
-        p=parser.find_all('p')
-        four=[]
-        for para in p:
-         
-            span=para.find('span')
-            if span!=None:
-         
-                if "operative" in span.text.lower():
-                    normal=span.find_all_next('table')
-                    for op in normal:
-                        tbody=op.find('tbody')
-                        new_p=tbody.find_all('p',class_="oj-normal")
-                        
-
-                        for subsequent in new_p:
-                            if subsequent!=None:
-                        
-                                four.append(subsequent.text)
-
-                        
-        return four     
-        
-    def structure_five(self)->list:
-        
-        """
-         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a paragraph
-         (p) structure . The relevant text lies inside the normal class of the p tag which comes after the keyword operative of the previous span tag.
-        """
-        website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
-        parser=BeautifulSoup(website,'lxml')
-        p=parser.find_all('p')
-        five=[]
-        for para in p:
-      
-            span=para.find('span')
-            if span!=None:
-         
-                if "operative" in span.text.lower():
-                    normal=span.find_all_next('table')
-                    for op in normal:
-                        tbody=op.find('tbody')
-                        new_p=tbody.find_all('p',class_="normal")
-                        
-
-                        for subsequent in new_p:
-                            if subsequent!=None:
-                               
-                                five.append(subsequent.text)
-
-                        
-        return five 
-    def structure_six(self)->list:
-        """
-         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a h2
-         (header) structure . The relevant text lies inside thee p tag which comes after the keyword operative part of the respective h2  tag.
-         """
-        
-        website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
-        parser=BeautifulSoup(website,'lxml')
-        div=parser.find_all('h2')
-        six=[]
-        for h2 in div:
-            # print(h2.text)
-            if h2.text=="Operative part":
-                operatives=h2.find_all_next('p')
-                for operative in operatives:
-                    
-                    six.append(operative.text)
-        return six     
-    def structure_seven(self)->list:
-        """
-         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a table
-         (table) structure . The relevant text lies inside the span tag which comes after the p tag , with the class name=normal.
-        """
-        website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
-        parser=BeautifulSoup(website,'lxml')
-        div=parser.find_all('table')
-        seven=[]
-        for divs in div:
-            # find tbody within the table
-            table=divs.find_all('tbody')
-            for tables in table:
-                if  tables!=None:
-                    # find tr within the tbody
-                    p=tables.find_all('tr')
-                    for x in p:
-                        if x!=None:
-                                # find td within the tr
-                                td=x.find_all('td')
-                                for y in td:
-                                    if y!=None:
-                                        p=y.find_all('p',class_="normal")
-                                        for all in p:
-                                            if all!=None:
-                                                    # find operative part within the span
-                                                    span=all.find_all('span',class_="bold")
-                                                    for spans in span:
-                                                        # APpend it into a list and return the list when the function is called
-                                                        seven.append(spans.text)
-        return seven   
-    def structure_eight(self)->list:  
-        """
-         This function retreives operative part from documents of the respected celex id's .The text is extracted from the span tag nested inside 
-         the tbody tag.Returns a list as output.
-        """
-        website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
-        parser=BeautifulSoup(website,'lxml')
-        
-        tbody=parser.find_all('tbody')
-        eight=[]
-        for all in tbody:
-            if all!=None:
-                tr=all.find_all('tr')
-                for trs in tr:
-                    if trs!=None:
-                       
-
-                        p=parser.find_all('p',class_="normal")
-                        for paras in p:
-                            if paras!=None:
-                                if "on those grounds" in paras.text.lower():
-
-                                    span=paras.find_all_next('span',class_="bold")
-                                    for spans in span:
-                                        if spans!=None:
-                                            eight.append(spans.text)
-                                           
-
-        return eight   
-    def structure_nine(self)->list:
-        """
-         This function retreives operative part from documents of the respected celex id's .The operative part is under the bold(b)
-         tag after the p tag where the keywords "on those grounds" exist. 
-        """
-        website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
-        parser=BeautifulSoup(website,'lxml')
-        nine=[]
-        div=parser.find_all('p')
-        for divs in div:
-                if divs!=None:
-                        if "on those grounds" in divs.text.lower():
-                                b=divs.find_all_next('b')
-                                for bolds in b:
-                                        # print(bolds.text)
-                                        nine.append(bolds.text)
-        return nine 
-    def structure_eleven(self)->list:
-        """
-         This function retreives operative part from documents of the respected celex id's .The operative part is under the paragraph(p)
-         tag after the b tag where the keywords "operative part" exist. 
-        """
-        website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
-        parser=BeautifulSoup(website,'lxml')
-        bold = parser.find_all('b')
-
-        eleven=[]
-        
-    
-
-        for b in bold:
-            if b!=None:
-                if "operative part" in b.text.lower():
-                    table=b.find_all_next('p')
-                    for tables in table:
-                        if tables!=None:
-                            eleven.append(tables.text)
-                       
-                    
-                   
-        
-        return eleven
-    def structure_ten(self):
-        """
-         This function retreives operative part from documents of the respected celex id's Since the ocntent is preloaded using js/client s
-         server side functions , the text from the current page is retrieved and the operative part is scraped after the occurence of the phrase
-         "On those grounds".
-        """
-        website=requests.get(f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
-        parser=BeautifulSoup(website,'lxml')
-        appender=[]
-        for string in parser.stripped_strings:
-       
-            appender.append(string)
-
-
-        found = False
-        afterGrounds = []
-
-        for x in appender: 
-
-            if "on those grounds" in x.lower():
-                found = True
-            
-
-            if found:
-                if len(x.split(" "))>3:
-                    afterGrounds.append(x)
-        return afterGrounds 
-    def __call__(self)->list:
-        """
-        This inbuilt __call__ function loops through all the methods in the class `Analyzer` and returns  the list , with values of the operative part .
-        """
-       
-        container=[self.html_page_structure_one(),self.html_page_structure_two(),self.structure_three(),self.structure_four(),self.structure_five(),
-                   self.structure_six(),self.structure_seven(),self.structure_eight(),self.structure_nine(),self.structure_ten(),self.structure_eleven()]
-        
-       
-      
-        one:list
-        for funcs in range(len(container)):
-          
-            one=container[funcs]
-          
-            if one:
-                if (len(one)!=0 or one[0]!="\n"):
-                    print("here")
-                    return one
-
-
-          
-
-                
-            
-        # one=self.html_page_structure_one()
-        # if len(one)==0 or len(one)=="\n":
-        #     one=self.html_page_structure_two()
-        #     if len(one)==0 or one[0]=="\n":
-        #         one=self.structure_three()
-        #         if len(one)==0 or one[0]=="\n":
-        #             one=self.structure_four()
-        #             if len(one)==0 or one[0]=="\n":
-        #                 one=self.structure_five()
-        #                 if len(one)==0 or one[0]=="\n":
-        #                     one=self.structure_six()
-        #                     if len(one)==0 or one[0]=="\n":
-        #                         one=self.structure_seven()
-        #                         if len(one)==0 or one[0]=="\n":
-        #                             one=self.structure_eight()
-        #                             if len(one)==0 or one[0]=="\n":
-        #                                 one=self.structure_nine()
-        #                                 if len(one)==0 or one[0]=="\n":
-        #                                     one=self.structure_ten()
-        #                                     if len(one)==0 or one[0]=="\n":
-        #                                         one=self.structure_eleven()
-                     
-                        
-
-
-        
-        
-        
-# instance=Analyzer("61962CJ0026")
-# x=instance()   
-# if x!=None:
-#     print(x)
-
-
-class Writing():
-    """
-    This class has different methods , for the purpose of writing the operative part into different file formats.(Csv,txt,json)
-    """
-    
-    instance:str
-    x:str
-    parameter:str
-    def __init__(self, celex:str):
-        self.celex = celex
-        self.instance = Analyzer(self.celex)
-        self.x = self.instance()
-
-      
-
-    def to_csv(self):
-        file=open("csv/output.csv","a+")
-        writer=csv.writer(file)
-       
-        if self.x!=None:
-            writer.writerow([self.celex,self.x])
-        
-    def to_json(self):
-        if self.x!=None:
-            data={'Celex':self.celex,"Operative part":self.x}
-            file=open('json/data.json', 'a+')
-            json.dump(data,file)
-            file.close()
-    def to_txt(self):
-      
-     
-        if self.x!=None:
-            file=open(f"txt/{self.celex}.txt","a")
-            for w in self.x:
-                
-                file.write(w+"\n")
-            file.close()
-#Sample code for reading celex id's froma tsv file
-
-file=open("gijs_202310_node_list.tsv","r")
-reader=csv.reader(file)
-from output import Writing
-testing=[]
-for row in reader:
-    for rows in row:
-        if "Id" not in rows:
-            testing.append(rows.split("\t")[0])
-for all in testing:
-    instance=Writing(all)
-    instance.to_csv()
-    print(all)                        
-
-

From d5e1aa376b6cf4762b5919ddf3d5a34b6edd5291 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Thu, 21 Mar 2024 12:49:55 +0400
Subject: [PATCH 07/78] Add files via upload

---
 .../cellar_extractor/operative_extractions.py | 367 ++++++++++++++++++
 1 file changed, 367 insertions(+)
 create mode 100644 cellar/cellar_extractor/operative_extractions.py

diff --git a/cellar/cellar_extractor/operative_extractions.py b/cellar/cellar_extractor/operative_extractions.py
new file mode 100644
index 0000000..deb0b22
--- /dev/null
+++ b/cellar/cellar_extractor/operative_extractions.py
@@ -0,0 +1,367 @@
+
+import requests
+from bs4 import BeautifulSoup
+
+import csv
+import json
+
+
+class Analyzer():
+    """
+    This class returns a list of the operative part for a given celex id . Celex id is initialized through a constructor.
+    """
+    celex: str  # declare celex as a string
+
+    def __init__(self, celex):  # Initialize Celex id as a constructor , passed when calling the class
+        self.celex = celex
+
+    def html_page_structure_one(self) -> list:
+        """
+         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a nested
+         table structure . The relevant text lies inside the coj-bold class of the span tag.
+        """
+        website = requests.get(
+            f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        parser = BeautifulSoup(website, 'lxml')
+        div = parser.find_all('table')  # Find all tables tag from the website
+        one = []
+        for divs in div:
+            # Find each nested table within the table
+            table = divs.find('table')
+            if table != None:
+                # Find all p under the nested table with the coj-normal class
+                p = table.find_all('p', class_="coj-normal")
+                for x in p:
+                    # Span class of coj-bold under the p tag
+                    span = x.find_all('span', class_="coj-bold")
+                    for y in span:
+                        if x != None and y != None:
+
+                            # append text from span onto a list
+                            one.append(y.text)
+        return one
+
+    def html_page_structure_two(self) -> list:
+        """
+         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a paragraph
+         (p) structure . The relevant text lies inside the normal class of the p tag which comes after the keyword operative of the previous span tag.
+        """
+        website = requests.get(
+            f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        parser = BeautifulSoup(website, 'lxml')
+        p = parser.find_all('p')
+        two = []
+        for para in p:
+
+            span = para.find('span')
+            if span != None:
+
+                if "operative" in span.text.lower():
+                    normal = span.find_all_next('p', class_="normal")
+                    for op in normal:
+
+                        two.append(op.text)
+        return two
+
+    def structure_three(self) -> list:
+        """
+         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a nested
+         table structure . The relevant text lies inside the coj-bold class of the span tag.
+        """
+        website = requests.get(
+            f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        parser = BeautifulSoup(website, 'lxml')
+        table = parser.find_all('table')
+        three = []
+        for tables in table:
+            interior = tables.find_all('table')
+            for interiors in interior:
+                if interiors != None:
+                    p = interiors.find_all('p', class_="coj-normal")
+                    for x in p:
+                        span = x.find_all('span', class_="coj-bold")
+                        for y in span:
+                            if x != None and y != None:
+
+                                three.append(y.text)
+        return three
+
+    def structure_four(self) -> list:
+        """
+         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a paragraph
+         (p) structure . The relevant text lies inside the p  tag which comes after the keyword operative of the previous span tag.
+        """
+        website = requests.get(
+            f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        parser = BeautifulSoup(website, 'lxml')
+        p = parser.find_all('p')
+        four = []
+        for para in p:
+
+            span = para.find('span')
+            if span != None:
+
+                if "operative" in span.text.lower():
+                    normal = span.find_all_next('table')
+                    for op in normal:
+                        tbody = op.find('tbody')
+                        new_p = tbody.find_all('p', class_="oj-normal")
+
+                        for subsequent in new_p:
+                            if subsequent != None:
+
+                                four.append(subsequent.text)
+
+        return four
+
+    def structure_five(self) -> list:
+        """
+         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a paragraph
+         (p) structure . The relevant text lies inside the normal class of the p tag which comes after the keyword operative of the previous span tag.
+        """
+        website = requests.get(
+            f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        parser = BeautifulSoup(website, 'lxml')
+        p = parser.find_all('p')
+        five = []
+        for para in p:
+
+            span = para.find('span')
+            if span != None:
+
+                if "operative" in span.text.lower():
+                    normal = span.find_all_next('table')
+                    for op in normal:
+                        tbody = op.find('tbody')
+                        new_p = tbody.find_all('p', class_="normal")
+
+                        for subsequent in new_p:
+                            if subsequent != None:
+
+                                five.append(subsequent.text)
+
+        return five
+
+    def structure_six(self) -> list:
+        """
+         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a h2
+         (header) structure . The relevant text lies inside thee p tag which comes after the keyword operative part of the respective h2  tag.
+         """
+
+        website = requests.get(
+            f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        parser = BeautifulSoup(website, 'lxml')
+        div = parser.find_all('h2')
+        six = []
+        for h2 in div:
+            # print(h2.text)
+            if h2.text == "Operative part":
+                operatives = h2.find_all_next('p')
+                for operative in operatives:
+
+                    six.append(operative.text)
+        return six
+
+    def structure_seven(self) -> list:
+        """
+         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a table
+         (table) structure . The relevant text lies inside the span tag which comes after the p tag , with the class name=normal.
+        """
+        website = requests.get(
+            f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        parser = BeautifulSoup(website, 'lxml')
+        div = parser.find_all('table')
+        seven = []
+        for divs in div:
+            # find tbody within the table
+            table = divs.find_all('tbody')
+            for tables in table:
+                if tables != None:
+                    # find tr within the tbody
+                    p = tables.find_all('tr')
+                    for x in p:
+                        if x != None:
+                            # find td within the tr
+                            td = x.find_all('td')
+                            for y in td:
+                                if y != None:
+                                    p = y.find_all('p', class_="normal")
+                                    for all in p:
+                                        if all != None:
+                                            # find operative part within the span
+                                            span = all.find_all(
+                                                'span', class_="bold")
+                                            for spans in span:
+                                                # APpend it into a list and return the list when the function is called
+                                                seven.append(spans.text)
+        return seven
+
+    def structure_eight(self) -> list:
+        """
+         This function retreives operative part from documents of the respected celex id's .The text is extracted from the span tag nested inside 
+         the tbody tag.Returns a list as output.
+        """
+        website = requests.get(
+            f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        parser = BeautifulSoup(website, 'lxml')
+
+        tbody = parser.find_all('tbody')
+        eight = []
+        for all in tbody:
+            if all != None:
+                tr = all.find_all('tr')
+                for trs in tr:
+                    if trs != None:
+
+                        p = parser.find_all('p', class_="normal")
+                        for paras in p:
+                            if paras != None:
+                                if "on those grounds" in paras.text.lower():
+
+                                    span = paras.find_all_next(
+                                        'span', class_="bold")
+                                    for spans in span:
+                                        if spans != None:
+                                            eight.append(spans.text)
+
+        return eight
+
+    def structure_nine(self) -> list:
+        """
+         This function retreives operative part from documents of the respected celex id's .The operative part is under the bold(b)
+         tag after the p tag where the keywords "on those grounds" exist. 
+        """
+        website = requests.get(
+            f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        parser = BeautifulSoup(website, 'lxml')
+        nine = []
+        div = parser.find_all('p')
+        for divs in div:
+            if divs != None:
+                if "on those grounds" in divs.text.lower():
+                    b = divs.find_all_next('b')
+                    for bolds in b:
+                        # print(bolds.text)
+                        nine.append(bolds.text)
+        return nine
+
+    def structure_eleven(self) -> list:
+        """
+         This function retreives operative part from documents of the respected celex id's .The operative part is under the paragraph(p)
+         tag after the b tag where the keywords "operative part" exist. 
+        """
+        website = requests.get(
+            f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        parser = BeautifulSoup(website, 'lxml')
+        bold = parser.find_all('b')
+
+        eleven = []
+
+        for b in bold:
+            if b != None:
+                if "operative part" in b.text.lower():
+                    table = b.find_all_next('p')
+                    for tables in table:
+                        if tables != None:
+                            eleven.append(tables.text)
+
+        return eleven
+
+    def structure_ten(self):
+        """
+         This function retreives operative part from documents of the respected celex id's Since the ocntent is preloaded using js/client s
+         server side functions , the text from the current page is retrieved and the operative part is scraped after the occurence of the phrase
+         "On those grounds".
+        """
+        website = requests.get(
+            f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        parser = BeautifulSoup(website, 'lxml')
+        appender = []
+        for string in parser.stripped_strings:
+
+            appender.append(string)
+
+        found = False
+        afterGrounds = []
+
+        for x in appender:
+
+            if "on those grounds" in x.lower():
+                found = True
+
+            if found:
+                if len(x.split(" ")) > 3:
+                    afterGrounds.append(x)
+        return afterGrounds
+
+    def __call__(self) -> list:
+        """
+        This inbuilt __call__ function loops through all the methods in the class `Analyzer` and returns  the list , with values of the operative part .
+        """
+
+        container = [self.html_page_structure_one(), self.html_page_structure_two(), self.structure_three(), self.structure_four(), self.structure_five(),
+                     self.structure_six(), self.structure_seven(), self.structure_eight(), self.structure_nine(), self.structure_ten(), self.structure_eleven()]
+
+        one: list
+        for funcs in range(len(container)):
+
+            one = container[funcs]
+
+            if one:
+                if (len(one) != 0 or one[0] != "\n"):
+                    print("here")
+                    return one
+
+    
+
+
+class Writing():
+    """
+    This class has different methods , for the purpose of writing the operative part into different file formats.(Csv,txt,json)
+    """
+
+    instance: str
+    x: str
+    parameter: str
+
+    def __init__(self, celex: str):
+        self.celex = celex
+        self.instance = Analyzer(self.celex)
+        self.x = self.instance()
+
+    def to_csv(self):
+        file = open("csv/output.csv", "a+")
+        writer = csv.writer(file)
+
+        if self.x != None:
+            writer.writerow([self.celex, self.x])
+
+    def to_json(self):
+        if self.x != None:
+            data = {'Celex': self.celex, "Operative part": self.x}
+            file = open('json/data.json', 'a+')
+            json.dump(data, file)
+            file.close()
+
+    def to_txt(self):
+
+        if self.x != None:
+            file = open(f"txt/{self.celex}.txt", "a")
+            for w in self.x:
+
+                file.write(w+"\n")
+            file.close()
+# Sample code for reading celex id's froma tsv file
+
+
+file = open("gijs_202310_node_list.tsv", "r")
+reader = csv.reader(file)
+testing = []
+for row in reader:
+    for rows in row:
+        if "Id" not in rows:
+            testing.append(rows.split("\t")[0])
+for all in testing:
+    instance = Writing(all)
+    instance.to_csv()
+    print(all)

From 8d754a774273cc96a871962c1386d73276775195 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Thu, 21 Mar 2024 12:51:18 +0400
Subject: [PATCH 08/78] Delete cellar/cellar_extractor/Testing_file.py

---
 cellar/cellar_extractor/Testing_file.py | 80 -------------------------
 1 file changed, 80 deletions(-)
 delete mode 100644 cellar/cellar_extractor/Testing_file.py

diff --git a/cellar/cellar_extractor/Testing_file.py b/cellar/cellar_extractor/Testing_file.py
deleted file mode 100644
index f743aee..0000000
--- a/cellar/cellar_extractor/Testing_file.py
+++ /dev/null
@@ -1,80 +0,0 @@
-"""
-
-This file is purely a testing file for trying out separate parts of code, testing if everything works and such.
-Can be also used to develop future code.
-
-
-
-"""
-
-from nodes_and_edges import get_nodes_and_edges
-from os.path import join
-from json_to_csv import read_csv
-import time
-from eurlex_scraping import *
-from cellar import *
-from sparql import *
-import unittest
-from operative_extraction import Analyzer
-# from test import testing
-import random
-import csv
-file=open("gijs_202310_node_list.tsv","r")
-reader=csv.reader(file)
-no_of_test_cases=30
-testing=[]
-for row in reader:
-    for rows in row:
-        if "Id" not in rows:
-            testing.append(rows.split("\t")[0])
-class Test(unittest.TestCase):
-    """
-    class for unittesing operative part , it checks whether the list returns null value or has some value.
-    """
-    ids:list
-    def __init__(self,ids):
-        self.ids=ids
-
-    def test_for_celex_id(self):
-        """
-        Main function which runs the unittest Testcase .
-        """
-        count_fail:int
-        count_pass=0
-        for id in self.ids:
-            test_output=Analyzer(id)
-            test_instance=test_output()
-         
-            # self.assertFalse(len(test_instance)<=1)
-          
-            try:
-                self.assertTrue(test_instance[0],f"{id} is  not empty and has  operative part") 
-                count_pass+=1 
-                print(f"{id} --->  PASSED.")
-            except:
-                print(f"{id} --->  FAILED.") 
-        print(f"Passed {count_pass}/{len(self.ids)} times") 
-        # print(len(self.ids)-count,"were passed successfully")  
-
-new_list=[]
-for w in range(no_of_test_cases):
-    randomized=random.randint(0,len(testing)-1)
-    new_list.append(testing[randomized])
-
- 
-
-
-
-
-if __name__ == '__main__':
-   celex = "62004CJ0292"
-    
-   instance=Test([celex])
-   instance.test_for_celex_id()  
-   site = get_entire_page(celex)
-   text = get_full_text_from_html(site)
-   cits = get_citations_with_extra_info(text)
-   print(cits)
-   data,d2 = get_cellar_extra(sd='2023-01-01',max_ecli=100,save_file='n')
-   nodes_edges = get_nodes_and_edges_lists(data)
-   pass

From a6fd5b8a09092003c57ac4b1af5034c603f7e3e0 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Thu, 21 Mar 2024 12:51:31 +0400
Subject: [PATCH 09/78] Add files via upload

---
 cellar/cellar_extractor/Testing_file.py | 80 +++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 cellar/cellar_extractor/Testing_file.py

diff --git a/cellar/cellar_extractor/Testing_file.py b/cellar/cellar_extractor/Testing_file.py
new file mode 100644
index 0000000..9418533
--- /dev/null
+++ b/cellar/cellar_extractor/Testing_file.py
@@ -0,0 +1,80 @@
+"""
+
+This file is purely a testing file for trying out separate parts of code, testing if everything works and such.
+Can be also used to develop future code.
+
+
+
+"""
+
+from nodes_and_edges import get_nodes_and_edges
+from os.path import join
+from json_to_csv import read_csv
+import time
+from eurlex_scraping import *
+from cellar import *
+from sparql import *
+import unittest
+from operative_extraction import Analyzer
+# from test import testing
+import random
+import csv
+file=open("gijs_202310_node_list.tsv","r")
+reader=csv.reader(file)
+no_of_test_cases=30
+testing=[]
+for row in reader:
+    for rows in row:
+        if "Id" not in rows:
+            testing.append(rows.split("\t")[0])
+class Test(unittest.TestCase):
+    """
+    class for unittesing operative part , it checks whether the list returns null value or has some value.
+    """
+    ids:list
+    def __init__(self,ids):
+        self.ids=ids
+
+    def test_for_celex_id(self):
+        """
+        Main function which runs the unittest Testcase .
+        """
+        count_fail:int
+        count_pass=0
+        for id in self.ids:
+            test_output=Analyzer(id)
+            test_instance=test_output()
+         
+            # self.assertFalse(len(test_instance)<=1)
+          
+            try:
+                self.assertTrue(test_instance[0],f"{id} is  not empty and has  operative part") 
+                count_pass+=1 
+                print(f"{id} --->  PASSED.")
+            except:
+                print(f"{id} --->  FAILED.") 
+        print(f"Passed {count_pass}/{len(self.ids)} times") 
+        # print(len(self.ids)-count,"were passed successfully")  
+
+new_list=[]
+for w in range(no_of_test_cases):
+    randomized=random.randint(0,len(testing)-1)
+    new_list.append(testing[randomized])
+
+ 
+
+
+
+
+if __name__ == '__main__':
+   celex = "62004CJ0292"
+    
+   instance=Test([celex])
+   instance.test_for_celex_id()  
+   site = get_entire_page(celex)
+   text = get_full_text_from_html(site)
+   cits = get_citations_with_extra_info(text)
+   print(cits)
+   data,d2 = get_cellar_extra(sd='2023-01-01',max_ecli=100,save_file='n')
+   nodes_edges = get_nodes_and_edges_lists(data)
+   pass
\ No newline at end of file

From 3664a9c249033bdd0dd65907220c2e47c70b51b1 Mon Sep 17 00:00:00 2001
From: shashankmc <shashankmc@github.com>
Date: Thu, 28 Mar 2024 08:35:08 +0100
Subject: [PATCH 10/78] Added doc string to method extra_cellar

---
 .../cellar_extractor/cellar_extra_extract.py  | 31 +++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/cellar/cellar_extractor/cellar_extra_extract.py b/cellar/cellar_extractor/cellar_extra_extract.py
index 4b6c0eb..81c2764 100644
--- a/cellar/cellar_extractor/cellar_extra_extract.py
+++ b/cellar/cellar_extractor/cellar_extra_extract.py
@@ -4,17 +4,44 @@
 
 
 def extra_cellar(data=None, filepath=None, threads=10, username="", password=""):
+    """
+    Extracts information from a cellar dataset.
+
+    Args:
+        data (pandas.DataFrame, optional): The input dataset. If not provided, 
+        it will be read from the specified filepath.
+        filepath (str, optional): The path to the input dataset file. If provided, 
+        the data will be read from this file.
+        threads (int, optional): The number of threads to use for parallel 
+        processing. Default is 10.
+        username (str, optional): The username for accessing a separate 
+        webservice. Default is an empty string.
+        password (str, optional): The password for accessing a separate 
+        webservice. Default is an empty string.
+
+    Returns:
+        tuple: A tuple containing the modified dataset and a JSON object.
+
+    If `data` is not provided, the dataset will be read from the specified
+    `filepath`. 
+
+    If `username` and `password` are provided, the function will add
+    citations using a separate webservice.
+
+    The function will add sections to the dataset using the specified 
+    number of `threads`. If `filepath` is provided,
+    the modified dataset will be saved to the same file. Otherwise, the 
+    modified dataset and a JSON object will be returned.
+    """
     if data is None:
         data = read_csv(filepath)
     if filepath:
         if username !="" and password !="":
             add_citations_separate_webservice(data, username, password)
-            #print("Citations successfully added. The rest of additional extraction will now happen.")
         add_sections(data, threads, filepath.replace(".csv", "_fulltext.json"))
         data.to_csv(filepath, index=False)
     else:
         if username != "" and password != "":
             add_citations_separate_webservice(data, username, password)
-            #print("Citations successfully added. The rest of additional extraction will now happen.")
         json = add_sections(data, threads)
         return data, json

From 16af4bbd0aff80f80fc1f5eab094d065ccb5db70 Mon Sep 17 00:00:00 2001
From: shashankmc <shashankmc@github.com>
Date: Thu, 28 Mar 2024 08:36:05 +0100
Subject: [PATCH 11/78] Added doc string and linted code for cellar_queries
 file

---
 cellar/cellar_extractor/cellar_queries.py | 24 ++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/cellar/cellar_extractor/cellar_queries.py b/cellar/cellar_extractor/cellar_queries.py
index 6c74d16..b0f9e95 100644
--- a/cellar/cellar_extractor/cellar_queries.py
+++ b/cellar/cellar_extractor/cellar_queries.py
@@ -48,18 +48,23 @@ def get_all_eclis(starting_date=None, ending_date=None):
     return eclis
 
 
-def get_raw_cellar_metadata(eclis, get_labels=True, force_readable_cols=True, force_readable_vals=False):
+def get_raw_cellar_metadata(eclis, get_labels=True, force_readable_cols=True,
+                            force_readable_vals=False):
     """Gets cellar metadata
 
     :param eclis: The ECLIs for which to retrieve metadata
     :type eclis: list[str]
-    :param get_labels: Flag to get human-readable labels for the properties, defaults to True
+    :param get_labels: Flag to get human-readable labels for the properties, 
+    defaults to True
     :type get_labels: bool, optional
-    :param force_readable_cols: Flag to remove any non-labelled properties from the resulting dict, defaults to True
+    :param force_readable_cols: Flag to remove any non-labelled properties 
+    from the resulting dict, defaults to True
     :type force_readable_cols: bool, optional
-    :param force_readable_vals: Flag to remove any non-labelled values from the resulting dict, defaults to False
+    :param force_readable_vals: Flag to remove any non-labelled values from 
+    the resulting dict, defaults to False
     :type force_readable_vals: bool, optional
-    :return: Dictionary containing metadata. Top-level keys are ECLIs, second level are property names
+    :return: Dictionary containing metadata. Top-level keys are ECLIs, second 
+    level are property names
     :rtype: Dict[str, Dict[str, list[str]]]
     """
 
@@ -100,8 +105,8 @@ def get_raw_cellar_metadata(eclis, get_labels=True, force_readable_cols=True, fo
     for ecli in eclis:
         metadata[ecli] = {}
 
-    # Take each triple, check which source doc it belongs to, key/value pair into its dict derived from the p and o in
-    # the query
+    # Take each triple, check which source doc it belongs to, key/value pair
+    # into its dict derived from the p and o in the query
     for res in ret['results']['bindings']:
         ecli = res['ecli']['value']
         # We only want cdm predicates
@@ -125,8 +130,9 @@ def get_raw_cellar_metadata(eclis, get_labels=True, force_readable_cols=True, fo
         else:
             val = res['o']['value']
 
-        # We store the values for each property in a list. For some properties this is not necessary,
-        # but if a property can be assigned multiple times, this is important. Notable, for example is citations.b
+        # We store the values for each property in a list. For some properties
+        # this is not necessary, but if a property can be assigned multiple
+        # times, this is important. Notable, for example is citations.
         if key in metadata[ecli]:
             metadata[ecli][key].append(val)
         else:

From 4e07a793326c380785a7d2873f95a3cf9087b72f Mon Sep 17 00:00:00 2001
From: shashankmc <shashankmc@github.com>
Date: Thu, 28 Mar 2024 08:38:07 +0100
Subject: [PATCH 12/78] Linted code, added encoding for opening files, and
 removed unused libraries from cellar.py file

---
 cellar/cellar_extractor/cellar.py | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/cellar/cellar_extractor/cellar.py b/cellar/cellar_extractor/cellar.py
index 24c4a67..354cd5f 100644
--- a/cellar/cellar_extractor/cellar.py
+++ b/cellar/cellar_extractor/cellar.py
@@ -1,14 +1,15 @@
 import json
 import os
-from os.path import join
+
 from datetime import datetime
 from pathlib import Path
+
+import time
 from tqdm import tqdm
 from cellar_extractor.cellar_queries import get_all_eclis, get_raw_cellar_metadata
 from cellar_extractor.json_to_csv import json_to_csv_main, json_to_csv_returning
 from cellar_extractor.cellar_extra_extract import extra_cellar
 from cellar_extractor.nodes_and_edges import get_nodes_and_edges
-import time
 
 def get_cellar(ed=None, save_file='y', max_ecli=100, sd="2022-05-01", file_format='csv'):
     if not ed:
@@ -38,18 +39,18 @@ def get_cellar(ed=None, save_file='y', max_ecli=100, sd="2022-05-01", file_forma
             json_to_csv_main(all_eclis, file_path)
         else:
             file_path = os.path.join('data', file_name + '.json')
-            with open(file_path, "w") as f:
+            with open(file_path, "w", encoding="utf-8") as f:
                 json.dump(all_eclis, f)
     else:
         if file_format == 'csv':
             df = json_to_csv_returning(all_eclis)
             return df
-        else:
-            return all_eclis
+        return all_eclis
     print("\n--- DONE ---")
 
 
-def get_cellar_extra(ed=None, save_file='y', max_ecli=100, sd="2022-05-01", threads=10, username="", password=""):
+def get_cellar_extra(ed=None, save_file='y', max_ecli=100, sd="2022-05-01",
+                     threads=10, username="", password=""):
     if not ed:
         ed = datetime.now().isoformat(timespec='seconds')
     data = get_cellar(ed=ed, save_file='n', max_ecli=max_ecli, sd=sd, file_format='csv')
@@ -62,14 +63,16 @@ def get_cellar_extra(ed=None, save_file='y', max_ecli=100, sd="2022-05-01", thre
     file_path = os.path.join('data', file_name + '.csv')
     if save_file == 'y':
         Path('data').mkdir(parents=True, exist_ok=True)
-        extra_cellar(data = data ,filepath=file_path, threads=threads, username=username, password=password)
+        extra_cellar(data = data ,filepath=file_path, threads=threads,
+                     username=username, password=password)
         print("\n--- DONE ---")
 
     else:
-        data,json = extra_cellar(data= data, threads = threads, username= username,password=password)
+        data,json_data = extra_cellar(data= data, threads = threads,
+                                 username= username,password=password)
         print("\n--- DONE ---")
 
-        return data,json
+        return data,json_data
 
 def get_nodes_and_edges_lists(df = None):
     if df is None:

From 52436730ca4803bad99f233757cdd674538edf69 Mon Sep 17 00:00:00 2001
From: shashankmc <shashankmc@github.com>
Date: Thu, 28 Mar 2024 08:39:16 +0100
Subject: [PATCH 13/78] Linted code and moved doc strings under methods rather
 than above for citations_adder.py

---
 cellar/cellar_extractor/citations_adder.py | 168 ++++++++++-----------
 1 file changed, 77 insertions(+), 91 deletions(-)

diff --git a/cellar/cellar_extractor/citations_adder.py b/cellar/cellar_extractor/citations_adder.py
index 99de07d..bc49bb7 100644
--- a/cellar/cellar_extractor/citations_adder.py
+++ b/cellar/cellar_extractor/citations_adder.py
@@ -4,39 +4,39 @@
 from io import StringIO
 from os.path import dirname, abspath
 import pandas as pd
-from cellar_extractor.sparql import get_citations_csv, get_cited, get_citing, run_eurlex_webservice_query
+from cellar_extractor.sparql import (get_citations_csv, get_cited,
+                                     get_citing, run_eurlex_webservice_query)
 from cellar_extractor.eurlex_scraping import extract_dictionary_from_webservice_query
 from tqdm import tqdm
 sys.path.append(dirname(dirname(dirname(dirname(abspath(__file__))))))
 
-"""
-Method used by separate threads for the multi-threading method of adding citations to the dataframe
-Sends a query which returns a csv file containing the the celex identifiers of cited works for each case.
-Works with multi-case queries, at_once is the variable deciding for how many cases are used with each query.
-"""
-
 
 def execute_citations(csv_list, citations):
+    """
+    Method used by separate threads for the multi-threading method of adding
+    citations to the dataframe. Sends a query which returns a csv file
+    containing the the celex identifiers of cited works for each case. Works
+    with multi-case queries, at_once is the variable deciding for how many
+    cases are used with each query.
+    """
     at_once = 1000
     for i in range(0, len(citations), at_once):
         new_csv = get_citations_csv(citations[i:(i + at_once)])
         csv_list.append(StringIO(new_csv))
 
 
-"""
-This method replaces replaces the column with citations.
-
-Old column -> links to cited works
-New column -> celex identifiers of cited works
-
-It uses multithreading, which is very much recommended.
-Uses a query to get the citations in a csv format from the endpoint. * 
+def add_citations(data, threads):
+    """
+    This method replaces replaces the column with citations.
 
-* More details in the query method.
-"""
+    Old column -> links to cited works
+    New column -> celex identifiers of cited works
 
+    It uses multithreading, which is very much recommended.
+    Uses a query to get the citations in a csv format from the endpoint. * 
 
-def add_citations(data, threads):
+    * More details in the query method.
+    """
     name = "WORK CITES WORK. CI / CJ"
     celex = data.loc[:, "CELEX IDENTIFIER"]
 
@@ -67,15 +67,14 @@ def add_citations(data, threads):
     citations.sort_index(inplace=True)
     data.insert(1, name, citations)
 
-
-"""
-Method used by separate threads for the multi-threading method of adding citations to the dataframe
-Sends a query which returns a csv file containing the the celex identifiers of cited works for each case.
-Works with multi-case queries, at_once is the variable deciding for how many cases are used with each query.
-"""
-
-
 def execute_citations_separate(cited_list, citing_list, citations):
+    """
+    Method used by separate threads for the multi-threading method of 
+    adding citations to the dataframe. Sends a query which returns a csv 
+    file containing the the celex identifiers of cited works for each case.
+    Works with multi-case queries, at_once is the variable deciding for 
+    how many cases are used with each query.
+    """
     at_once = 1000
     for i in range(0, len(citations), at_once):
         new_cited = get_cited(citations[i:(i + at_once)], 1)
@@ -83,17 +82,12 @@ def execute_citations_separate(cited_list, citing_list, citations):
         cited_list.append(StringIO(new_cited))
         citing_list.append(StringIO(new_citing))
 
-
-"""
-
-Method used by separate threads for the multi-threading method of adding citations to the dataframe
-Uses the eurlex webservices.
-Also used for the single-thread approach.
-
-"""
-
-
 def execute_citations_webservice(dictionary_list, celexes, username, password):
+    """
+    Method used by separate threads for the multi-threading method of 
+    adding citations to the dataframe. Uses the eurlex webservices.
+    Also used for the single-thread approach.
+    """
     at_once = 100
     success=0
     retry=0
@@ -102,7 +96,8 @@ def execute_citations_webservice(dictionary_list, celexes, username, password):
     normal_celex, contains_celex = clean_celex(celexes)
     def process_queries(link, celex):
         nonlocal success,retry
-        for i in tqdm(range(0, len(celex), at_once), colour="GREEN", position=0, leave=True, maxinterval=10000):
+        for i in tqdm(range(0, len(celex), at_once), colour="GREEN",
+                      position=0, leave=True, maxinterval=10000):
             curr_celex = celex[i:(i + at_once)]
             input=" OR ".join(curr_celex)
             query = link % (str(input))
@@ -111,7 +106,8 @@ def process_queries(link, celex):
                 response = run_eurlex_webservice_query(query, username, password)
                 if response.status_code == 500 and "WS_WS_CALLS_IDLE_INTERVAL" not in response.text:
                     perc=i*100/len(celexes)
-                    print(f"Limit of web service usage reached! Citations collection will stop here at {perc} % of citations downloaded."
+                    print(f"Limit of web service usage reached! Citations collection\
+                          will stop here at {perc} % of citations downloaded."
                           f"\nThere were {success} successful queries and {retry} retries")
                     return
                 elif "<numhits>0</numhits>" in response.text:
@@ -133,16 +129,13 @@ def process_queries(link, celex):
         process_queries(base_contains_query,contains_celex)
 
 
-"""
-
-Method used to separate celex id's when there are multiple pointing to the same document.
-On top of that, separates celex id's with '(' and ')', these brackets are keywords for the webservice query.
-After separated, a different query is ran for the normal celexes, and those with brackets.
-
-"""
-
-
 def clean_celex(celex):
+    """
+    Method used to separate celex id's when there are multiple pointing to the same document.
+    On top of that, separates celex id's with '(' and ')', these brackets are keywords for the
+    webservice query. After separated, a different query is ran for the normal celexes, and 
+    those with brackets.
+    """
     normal_list = list()
     contains_list = list()
     for c1 in celex:
@@ -162,16 +155,14 @@ def clean_celex(celex):
                     normal_list.append(c1)
     return normal_list, contains_list
 
-
-"""
-
-Method used for creation of a dictionary of documents citing the document.
-Uses the dictionary of documents cited by the document.
-Output will more than likely be bigger than the input dictionary, as it will also include treaties and other documents,
-which are not being extracted by the cellar extractor.
-
-"""
 def allowed_id(id):
+    """
+    Method used for creation of a dictionary of documents citing the document.
+    Uses the dictionary of documents cited by the document.
+    Output will more than likely be bigger than the input dictionary, 
+    as it will also include treaties and other documents,
+    which are not being extracted by the cellar extractor.
+    """
     if id != "":
         return id[0] == 8 or id[0] == 6
     else:
@@ -189,17 +180,13 @@ def reverse_citing_dict(citing):
                     cited[c] = k
     return cited
 
-
-"""
-
-Method used to add the dictionaries to the dataframe.
-Used by the citations adding from the eurlex webservices.
-Implements checks, for whether the document whose data we want to add exists in the original dataframe.
-
-"""
-
-
 def add_dictionary_to_df(df, dictionary, column_title):
+    """
+    Method used to add the dictionaries to the dataframe.
+    Used by the citations adding from the eurlex webservices.
+    Implements checks, for whether the document whose data we want to add
+    exists in the original dataframe.
+    """
     column = pd.Series([], dtype='string')
     celex = df.loc[:, "CELEX IDENTIFIER"]
     for k in dictionary:
@@ -210,29 +197,28 @@ def add_dictionary_to_df(df, dictionary, column_title):
     df.insert(1, column_title, column)
 
 
-"""
-Main method for citations adding via eurlex webservices.
-
-Old column -> links to cited works
-New columns -> celex identifiers of cited works and works citing current work
-
-"""
-
-
 def add_citations_separate_webservice(data, username, password):
+    """
+    Main method for citations adding via eurlex webservices.
+    Old column -> links to cited works
+    New columns -> celex identifiers of cited works and works citing current work
+    """
     celex = data.loc[:, "CELEX IDENTIFIER"]
     query = " SELECT CI, DN WHERE DN = 62019CJ0668"
     response = run_eurlex_webservice_query(query, username, password)
     if response.status_code == 500 :
         if "WS_MAXIMUM_NB_OF_WS_CALLS" in response.text:
-            print("Maximum number of calls to the eurlex webservices reached! The code will skip the citations download.")
+            print("Maximum number of calls to the eurlex webservices reached!\
+                  The code will skip the citations download.")
             return
         else:
-            print("Incorrect username and password for eurlex webservices! (The account login credentials and webservice) "
+            print("Incorrect username and password for eurlex webservices!\
+                  (The account login credentials and webservice) "
               "login credentials are different)")
             sys.exit(2)
     elif response.status_code == 403:
-        print("Webservice connection was blocked, eurlex might be going through maintenance right now.")
+        print("Webservice connection was blocked, eurlex might be going\
+              through maintenance right now.")
         sys.exit(2)
     else:
         print("Webservice connection was successful!")
@@ -243,27 +229,26 @@ def add_citations_separate_webservice(data, username, password):
     for d in dictionary_list:
         citing_dict.update(d)
     print("Webservice extraction finished, the rest of extraction will now happen.")
-    time.sleep(1) # It seemed to print out the length of dictionary wrong, even when it was equal to 1000.
+    time.sleep(1) # It seemed to print out the length of dictionary wrong,
+    # even when it was equal to 1000.
     cited_dict = reverse_citing_dict(citing_dict)
 
     add_dictionary_to_df(data, citing_dict, "citing")
     add_dictionary_to_df(data, cited_dict, "cited_by")
 
+def add_citations_separate(data, threads):
+    """
+    This method replaces replaces the column with citations.
 
-"""
-This method replaces replaces the column with citations.
-
-Old column -> links to cited works
-New column -> celex identifiers of cited works
-
-It uses multithreading, which is very much recommended.
-Uses a query to get the citations in a csv format from the endpoint. * 
+    Old column -> links to cited works
+    New column -> celex identifiers of cited works
 
-* More details in the query method.
-"""
+    It uses multithreading, which is very much recommended.
+    Uses a query to get the citations in a csv format from the endpoint. * 
 
+    * More details in the query method.
+    """
 
-def add_citations_separate(data, threads):
     celex = data.loc[:, "CELEX IDENTIFIER"]
     length = celex.size
     if length > 100:  # to avoid getting problems with small files
@@ -276,7 +261,8 @@ def add_citations_separate(data, threads):
 
     for i in range(0, length, at_once_threads):
         curr_celex = celex[i:(i + at_once_threads)]
-        t = threading.Thread(target=execute_citations_separate, args=(cited_csv, citing_csv, curr_celex))
+        t = threading.Thread(target=execute_citations_separate,
+                             args=(cited_csv, citing_csv, curr_celex))
         threads.append(t)
 
     for t in threads:

From 7db0f6adb680f2420c6921aa3625b067dd0b83cc Mon Sep 17 00:00:00 2001
From: shashankmc <shashankmc@github.com>
Date: Thu, 28 Mar 2024 08:40:06 +0100
Subject: [PATCH 14/78] Linted code for csv_extractor.py

---
 cellar/cellar_extractor/csv_extractor.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/cellar/cellar_extractor/csv_extractor.py b/cellar/cellar_extractor/csv_extractor.py
index 23ee71c..e4612ad 100644
--- a/cellar/cellar_extractor/csv_extractor.py
+++ b/cellar/cellar_extractor/csv_extractor.py
@@ -2,12 +2,11 @@
 import argparse
 from cellar_extractor.json_to_csv import read_csv
 
-"""
-Method takes in a dataframe and returns a dataframe with only *number* of data rows.
-"""
-
-
 def extract_rows(data, number):
+    """
+    Method takes in a dataframe and returns a dataframe with only *number* of data rows.
+    """
+
     try:
         output = data[1:number]
     except Exception:
@@ -24,7 +23,7 @@ def extract_rows(data, number):
     print("")
     print("EXTRACTION FROM CSV FILE IN DATA PROCESSED DIR STARTED")
     print("")
-    csv_files = (glob.glob(DIR_DATA_RAW + "/" + "*.csv"))
+    csv_files = glob.glob(DIR_DATA_RAW + "/" + "*.csv")
     print(f"FOUND {len(csv_files)} CSV FILES")
 
     for i in range(len(csv_files)):

From 2ddf0a64b7b3346b00bcc581ee3427341c6f6f12 Mon Sep 17 00:00:00 2001
From: shashankmc <shashankmc@github.com>
Date: Thu, 28 Mar 2024 08:41:37 +0100
Subject: [PATCH 15/78] Linted code, changed conditional statements for PEP8
 conformity, and changed variable names that were similar to inbuilt ones for
 eurlex_scraping

---
 cellar/cellar_extractor/eurlex_scraping.py | 231 ++++++++-------------
 1 file changed, 86 insertions(+), 145 deletions(-)

diff --git a/cellar/cellar_extractor/eurlex_scraping.py b/cellar/cellar_extractor/eurlex_scraping.py
index 944097e..d825ccd 100644
--- a/cellar/cellar_extractor/eurlex_scraping.py
+++ b/cellar/cellar_extractor/eurlex_scraping.py
@@ -1,8 +1,9 @@
-from bs4 import BeautifulSoup
-import requests
 import time
-import xmltodict
 import re
+import requests
+import xmltodict
+
+from bs4 import BeautifulSoup
 
 LINK_SUMMARY_INF = 'https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:cIdHere&from=EN'
 LINK_SUMJURE = 'https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:cIdHere_SUM&from=EN'
@@ -17,31 +18,25 @@
 def is_code(word):
     return word.replace(".", "0").replace("-", "0")[1:].isdigit()
 
-
-"""
-Wrapped method for requests.get().
-After 10 retries, it gives up and returns a "404" string.
-"""
-
-
 def response_wrapper(link, num=1):
+    """
+    Wrapped method for requests.get().
+    After 10 retries, it gives up and returns a "404" string.
+    """
     if num == 10:
         return "404"
     try:
-        response = requests.get(link)
+        response = requests.get(link, timeout=60)
         return response
     except Exception:
         time.sleep(0.5 * num)
         return response_wrapper(link, num + 1)
 
-
-"""
-This method returns the html of a summary page.
-Cellar specific, works for celex id's starting a 6 and 8.
-"""
-
-
 def get_summary_html(celex):
+    """
+    This method returns the html of a summary page.
+    Cellar specific, works for celex id's starting a 6 and 8.
+    """
     if celex == celex:  # nan check
         if ";" in celex:
             idss = celex.split(";")
@@ -82,18 +77,16 @@ def get_summary_html(celex):
         else:
             return "No summary available"
 
-
-"""
-Method used to extract the summary from a html page.
-Cellar specific, uses get_words_from_keywords.
-Currently only walking for celex id's starting with a 6 ( EU cases).
-"""
-
-
 def get_summary_from_html(html, starting):
+    """
+    Method used to extract the summary from a html page.
+    Cellar specific, uses get_words_from_keywords.
+    Currently only walking for celex id's starting with a 6 ( EU cases).
+
     # This method turns the html code from the summary page into text
     # It has different cases depending on the first character of the CELEX ID
     # Should only be used for summaries extraction
+    """
     text = get_full_text_from_html(html)
     if starting == "8":
         return "No summary available"
@@ -107,17 +100,14 @@ def get_summary_from_html(html, starting):
             return text
     return text
 
-
-"""
-Method used to extract the keywords from a html page.
-Cellar specific, uses get_words_from_keywords.
-"""
-
-
 def get_keywords_from_html(html, starting):
+    """
+    Method used to extract the keywords from a html page.
+    Cellar specific, uses get_words_from_keywords.
     # This method turns the html code from the summary page into text
     # It has different cases depending on the first character of the CELEX ID
     # Should only be used for summaries extraction
+    """
     text = get_full_text_from_html(html)
     if starting == "8":
         text = "No keywords available"
@@ -125,22 +115,18 @@ def get_keywords_from_html(html, starting):
     elif starting == "6":
         return get_words_from_keywords(text)
 
-
-"""
-
-Method used for citations extraction from eurlex webservices.
-It reads the SOAP response from the webservices, and adds values to the dictionary based on the results.
-Dictionary is using the celex id of a work as key and a list of celex id's of works cited as value.
-
-"""
-
-
 def extract_dictionary_from_webservice_query(response):
+    """
+    Method used for citations extraction from eurlex webservices.
+    It reads the SOAP response from the webservices, and adds values to the 
+    dictionary based on the results. Dictionary is using the celex id of a 
+    work as key and a list of celex id's of works cited as value.
+    """
     text = response.text
     read = xmltodict.parse(text)
     results = read['S:Envelope']['S:Body']['searchResults']['result']
     dictionary = dict()
-    if type(results) == list:
+    if isinstance(results, list):
         for result in results:
             celex, citing = extract_citations_from_soap(result)
             dictionary[celex] = citing
@@ -149,16 +135,11 @@ def extract_dictionary_from_webservice_query(response):
         dictionary[celex] = citing
     return dictionary
 
-
-"""
-
-Method used for citations extraction from eurlex webservices.
-Reads the individual celex id and documents cited from a single result.
-
-"""
-
-
 def extract_citations_from_soap(results):
+    """
+    Method used for citations extraction from eurlex webservices.
+    Reads the individual celex id and documents cited from a single result.
+    """
     main_content = results['content']['NOTICE']['WORK']
     celex = main_content['ID_CELEX'].get('VALUE')
     try:
@@ -166,7 +147,7 @@ def extract_citations_from_soap(results):
     except KeyError:
         return celex, ""
     citing_list = list()
-    if type(citing) == list:
+    if isinstance(citing, list):
         for cited in citing:
             celex_of_citation = get_citation_celex(cited)
             if celex_of_citation != "":
@@ -175,20 +156,16 @@ def extract_citations_from_soap(results):
     else:
         return celex, get_citation_celex(citing)
 
-
-"""
-
-Method used for citations extraction from eurlex webservices.
-Goes thru all of the different id's of the document cited, and returns the one that is a celex id.
-
-"""
-
-
 def get_citation_celex(cited):
+    """
+    Method used for citations extraction from eurlex webservices.
+    Goes thru all of the different id's of the document cited, 
+    and returns the one that is a celex id.
+    """
     identifiers = cited['SAMEAS']
-    if type(identifiers) == list:
-        for id in identifiers:
-            ident = id['URI']['IDENTIFIER']
+    if isinstance(identifiers, list):
+        for _id in identifiers:
+            ident = _id['URI']['IDENTIFIER']
             if is_celex_id(ident):
                 return ident
     else:
@@ -197,31 +174,22 @@ def get_citation_celex(cited):
             return ident
     return ""
 
-
-"""
-
-Method checking if the id passed is a celex id, using regex.
-
-"""
-
-
-def is_celex_id(id):
-    if id is None:
+def is_celex_id(_id):
+    """
+    Method checking if the id passed is a celex id, using regex.
+    """
+    if _id is None:
         return False
-    if prog.match(id):
+    if prog.match(_id):
         return True
     else:
         return False
 
-
-"""
-This method tries to extract only they keywords from a part of html page containing it.
-They keywords on the page are always separated by " - " or other types of dashes.
-
-"""
-
-
 def get_words_from_keywords_em(text):
+    """
+    This method tries to extract only they keywords from a part of html page containing it.
+    They keywords on the page are always separated by " - " or other types of dashes.
+    """
     lines = text.split(sep="\n")
     returner = set()
     for line in lines:
@@ -242,15 +210,10 @@ def get_words_from_keywords_em(text):
             returner.update(line.split(sep=" - "))
     return ";".join(returner)
 
-
-"""
-
-One of the methods used to extract keywords from summary text.
-
-"""
-
-
 def get_words_from_keywords(text):
+    """
+    One of the methods used to extract keywords from summary text.
+    """
     if "Keywords" in text:
         try:
             index = text.find("Keywords")
@@ -269,15 +232,12 @@ def get_words_from_keywords(text):
             text = text[:index]
     return get_words_from_keywords_em(text)
 
-
-"""
-  This method turns the html code from the summary page into text.
-  It has different cases depending on the first character of the CELEX ID.
-  Universal method, also replaces all "," with "_".
-"""
-
-
 def get_full_text_from_html(html_text):
+    """
+    This method turns the html code from the summary page into text.
+    It has different cases depending on the first character of the CELEX ID.
+    Universal method, also replaces all "," with "_".
+    """
     # This method turns the html code from the summary page into text
     # It has different cases depending on the first character of the CELEX ID
     # Should only be used for summaries extraction
@@ -294,15 +254,12 @@ def get_full_text_from_html(html_text):
     text = text.replace(",", "_")
     return text
 
-
-"""
-This method is a wrapped for the get_html_by_celex_id method imported from eurlex.
-Sometimes thew websites do not load because of too many connections at once,
-this method waits a bit and tries again for up to 5 tries.
-"""
-
-
 def get_html_text_by_celex_id(id):
+    """
+    This method is a wrapped for the get_html_by_celex_id method imported from eurlex.
+    Sometimes thew websites do not load because of too many connections at once,
+    this method waits a bit and tries again for up to 5 tries.
+    """
     link = "https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:cIdHere&from=EN"
     final = id
     if id == id:  # nan check
@@ -321,14 +278,12 @@ def get_html_text_by_celex_id(id):
     else:
         return html.text
 
-
-"""
-This method gets the page containing all document details for extracting the subject matter and
-the case law directory codes. Uses the celex identifier of a case.
-"""
-
-
 def get_entire_page(celex):
+    """
+    This method gets the page containing all document details for extracting 
+    the subject matter and
+    the case law directory codes. Uses the celex identifier of a case.
+    """
     link = 'https://eur-lex.europa.eu/legal-content/EN/ALL/?uri=CELEX:cIdHere'
     if celex == celex:  # nan check
         if ";" in celex:
@@ -353,14 +308,11 @@ def get_entire_page(celex):
     except Exception:
         return "No data available"
 
-
-"""
-This Method gets the subject matter from a fragment of code containing them.
-Used for extracting subject matter for cellar cases only.
-"""
-
-
 def get_subject(text):
+    """
+    This Method gets the subject matter from a fragment of code containing them.
+    Used for extracting subject matter for cellar cases only.
+    """
     try:
         index_matter = text.index("Subject matter:")
         try:
@@ -375,14 +327,11 @@ def get_subject(text):
         subject = ""
     return subject
 
-
-"""
-This Method extracts all eurovocs, from a fragment containing them.
-Used for extracting eurovoc for cellar cases.
-"""
-
-
 def get_eurovoc(text):
+    """
+    This Method extracts all eurovocs, from a fragment containing them.
+    Used for extracting eurovoc for cellar cases.
+    """
     try:
         start = text.find("EUROVOC")
         try:
@@ -408,14 +357,11 @@ def get_eurovoc(text):
     except Exception:
         return ""
 
-
-"""
-Method for getting all of the case directory codes for each cellar case.
-Extracts them from a string containing the eurlex website containing all document information.
-"""
-
-
 def get_codes(text):
+    """
+    Method for getting all of the case directory codes for each cellar case.
+    Extracts them from a string containing the eurlex website containing all document information.
+    """
     try:
         index_codes = text.index("Case law directory code:")
         index_end = text.index("Miscellaneous information")
@@ -431,11 +377,8 @@ def get_codes(text):
             index_start = indexes[x]
             getting_ending = extracting[index_start:]
             words_here = getting_ending.split()
-
             for words in words_here:
-
                 if words is not words_here[0]:
-
                     if is_code(words):
                         ending = getting_ending[2:].find(words)
                         done = True
@@ -444,14 +387,12 @@ def get_codes(text):
                 code_text = getting_ending[:ending]
             else:
                 code_text = getting_ending
-
             codes_result.append(code_text.replace("\n", ""))
         code = ";".join(codes_result)
     except Exception:
         code = ""
     return code
 
-
 def get_advocate_or_judge(text, phrase):
     """
     :param text: full text of the info page of a case from eur-lex website
@@ -475,7 +416,8 @@ def get_advocate_or_judge(text, phrase):
 def get_case_affecting(text):
     """
     :param text: full text of the info page of a case from eur-lex website
-    :return: The celex id's of case affecting listed + entire string data with more information about the case affecting
+    :return: The celex id's of case affecting listed + entire string data with 
+    more information about the case affecting
     """
     phrase = 'Case affecting:'
     try:
@@ -523,6 +465,5 @@ def get_citations_with_extra_info(text):
                 data_list.append(fixed_line)
             else:
                 return ";".join(data_list)
-
     except:
-        return ''
+        return ''
\ No newline at end of file

From c355d00a9c38cc31e3ee6dbf45b16bd53b6f65c5 Mon Sep 17 00:00:00 2001
From: shashankmc <shashankmc@github.com>
Date: Thu, 28 Mar 2024 08:42:40 +0100
Subject: [PATCH 16/78] Linted code, corrected variable names that are similar
 to inbuilt references for fulltext_saving

---
 cellar/cellar_extractor/fulltext_saving.py | 115 ++++++++++-----------
 1 file changed, 57 insertions(+), 58 deletions(-)

diff --git a/cellar/cellar_extractor/fulltext_saving.py b/cellar/cellar_extractor/fulltext_saving.py
index 44af01c..1a9eaa8 100644
--- a/cellar/cellar_extractor/fulltext_saving.py
+++ b/cellar/cellar_extractor/fulltext_saving.py
@@ -1,22 +1,22 @@
-import pandas as pd
+import json
 import threading
+import time
+import pandas as pd
 from cellar_extractor.eurlex_scraping import *
-import json
 from tqdm import tqdm
-import time
-
-"""
-This is the method executed by individual threads by the add_sections method.
-
-The big dataset is divided in parts, each thread gets its portion of work to do.
-They add their portions of columns to corresponding lists, 
-after all the threads are done the individual parts are put together.
-"""
 
 
-def execute_sections_threads(celex, eclis, start, list_sum, list_key, list_full, list_codes, list_eurovoc, list_adv,
-                             list_judge, list_affecting_id, list_affecting_str,list_citations_extra, progress_bar):
-    sum = pd.Series([], dtype='string')
+def execute_sections_threads(celex, eclis, start, list_sum, list_key, list_full,
+                             list_codes, list_eurovoc, list_adv, list_judge,
+                             list_affecting_id, list_affecting_str,list_citations_extra,
+                             progress_bar):
+    """
+    This is the method executed by individual threads by the add_sections method.
+    The big dataset is divided in parts, each thread gets its portion of work to do.
+    They add their portions of columns to corresponding lists,
+    after all the threads are done the individual parts are put together.
+    """
+    _sum = pd.Series([], dtype='string')
     key = pd.Series([], dtype='string')
     full = list()
     case_codes = pd.Series([], dtype='string')
@@ -28,34 +28,34 @@ def execute_sections_threads(celex, eclis, start, list_sum, list_key, list_full,
     citations_extra = pd.Series([], dtype='string')
     for i in range(len(celex)):
         j = start + i
-        id = celex[j]
+        _id = celex[j]
         ecli = eclis[j]
-        html = get_html_text_by_celex_id(id)
+        html = get_html_text_by_celex_id(_id)
         if html != "404":
             text = get_full_text_from_html(html)
             json_text = {
-                'celex': str(id),
+                'celex': str(_id),
                 'ecli': ecli,
                 'text': text
             }
             full.append(json_text)
         else:
             json_text = {
-                'celex': str(id),
+                'celex': str(_id),
                 'ecli': ecli,
                 'text': ""
             }
             full.append(json_text)
-        summary = get_summary_html(id)
+        summary = get_summary_html(_id)
         if summary != "No summary available":
             text = get_keywords_from_html(summary, id[0])
             text2 = get_summary_from_html(summary, id[0])
             key[j] = text
-            sum[j] = text2
+            _sum[j] = text2
         else:
             key[j] = ""
-            sum[j] = ""
-        entire_page = get_entire_page(id)
+            _sum[j] = ""
+        entire_page = get_entire_page(_id)
         text = get_full_text_from_html(entire_page)
         if entire_page != "No data available":
             code = get_codes(text)
@@ -82,7 +82,7 @@ def execute_sections_threads(celex, eclis, start, list_sum, list_key, list_full,
         citations_extra[j] = citation_extra
         progress_bar.update(1)
 
-    list_sum.append(sum)
+    list_sum.append(_sum)
     list_key.append(key)
     list_full.append(full)
     list_codes.append(case_codes)
@@ -93,51 +93,53 @@ def execute_sections_threads(celex, eclis, start, list_sum, list_key, list_full,
     list_affecting_str.append(affecting_str)
     list_citations_extra.append(citations_extra)
 
-"""
-This method adds the following sections to a pandas dataframe, as separate columns:
-
-Full Text
-Case law directory codes
-Keywords
-Summary
-Advocate General
-Judge Rapporteur
-Case affecting (CELEX ID)
-Case affecting string (entire str with more info)
-
-Method is cellar-specific, scraping html from https://eur-lex.europa.eu/homepage.html.
-It operates with multiple threads, using that feature is recommended as it speeds up the entire process.
-"""
+def add_sections(data, threads, json_filepath=None):
+    """
+    This method adds the following sections to a pandas dataframe, as separate columns:
 
+    Full Text
+    Case law directory codes
+    Keywords
+    Summary
+    Advocate General
+    Judge Rapporteur
+    Case affecting (CELEX ID)
+    Case affecting string (entire str with more info)
 
-def add_sections(data, threads, json_filepath=None):
+    Method is cellar-specific, scraping html from https://eur-lex.europa.eu/homepage.html.
+    It operates with multiple threads, using that feature is recommended as it speeds up 
+    the entire process.
+    """
     celex = data.loc[:, 'CELEX IDENTIFIER']
     eclis = data.loc[:, 'ECLI']
     length = celex.size
     time.sleep(1)
-    bar = tqdm(total=length, colour="GREEN", miniters=int(length/100), position=0, leave=True, maxinterval=10000)
+    _bar = tqdm(total=length, colour="GREEN", miniters=int(length/100),
+                position=0, leave=True, maxinterval=10000)
     if length > threads:  # to avoid getting problems with small files
         at_once_threads = int(length / threads)
     else:
         at_once_threads = length
     threads = []
-    list_sum = list()
-    list_key = list()
-    list_full = list()
-    list_codes = list()
-    list_eurovoc = list()
-    list_adv = list()
-    list_judge = list()
-    list_affecting_id = list()
-    list_affecting_str = list()
-    list_citations_extra = list()
+    list_sum = []
+    list_key = []
+    list_full = []
+    list_codes = []
+    list_eurovoc = []
+    list_adv = []
+    list_judge = []
+    list_affecting_id = []
+    list_affecting_str = []
+    list_citations_extra = []
     for i in range(0, length, at_once_threads):
         curr_celex = celex[i:(i + at_once_threads)]
         curr_ecli = eclis[i:(i + at_once_threads)]
         t = threading.Thread(target=execute_sections_threads,
                              args=(
-                                 curr_celex, curr_ecli, i, list_sum, list_key, list_full, list_codes, list_eurovoc,
-                                 list_adv, list_judge, list_affecting_id, list_affecting_str,list_citations_extra, bar))
+                                 curr_celex, curr_ecli, i, list_sum, list_key, list_full,
+                                 list_codes, list_eurovoc, list_adv, list_judge,
+                                 list_affecting_id, list_affecting_str,
+                                 list_citations_extra, _bar))
         threads.append(t)
     for t in threads:
         t.start()
@@ -164,13 +166,10 @@ def add_sections(data, threads, json_filepath=None):
                 json_file.extend(l)
         return json_file
 
-
-"""
-Used for adding columns easier to a dataframe for add_sections().
-"""
-
-
 def add_column_frow_list(data, name, list):
+    """
+    Used for adding columns easier to a dataframe for add_sections().
+    """
     column = pd.Series([], dtype='string')
     for l in list:
         column = column.append(l)

From acd033e9aec63d2738ec1ba58434362067f5c5a1 Mon Sep 17 00:00:00 2001
From: shashankmc <shashankmc@github.com>
Date: Thu, 28 Mar 2024 08:43:12 +0100
Subject: [PATCH 17/78] Linted code for json_to_csv file

---
 cellar/cellar_extractor/json_to_csv.py | 55 +++++++++++++-------------
 1 file changed, 28 insertions(+), 27 deletions(-)

diff --git a/cellar/cellar_extractor/json_to_csv.py b/cellar/cellar_extractor/json_to_csv.py
index 1a1e62e..8b2e301 100644
--- a/cellar/cellar_extractor/json_to_csv.py
+++ b/cellar/cellar_extractor/json_to_csv.py
@@ -1,40 +1,44 @@
 import csv
 import re
+import sys
 import warnings
+from io import StringIO
 from bs4 import BeautifulSoup
-import sys
 import pandas as pd
-from io import StringIO
+
 warnings.filterwarnings("ignore")
 
-X = ['WORK IS CREATED BY AGENT (AU)', 'CASE LAW COMMENTED BY AGENT', 'CASE LAW HAS A TYPE OF PROCEDURE',
-     'LEGAL RESOURCE USES ORIGINALLY LANGUAGE', 'CASE LAW USES LANGUAGE OF PROCEDURE',
-     'CASE LAW HAS A JUDICIAL PROCEDURE TYPE', 'WORK HAS RESOURCE TYPE', 'LEGAL RESOURCE BASED ON TREATY CONCEPT',
+X = ['WORK IS CREATED BY AGENT (AU)', 'CASE LAW COMMENTED BY AGENT', 
+     'CASE LAW HAS A TYPE OF PROCEDURE', 'LEGAL RESOURCE USES ORIGINALLY LANGUAGE',
+     'CASE LAW USES LANGUAGE OF PROCEDURE', 'CASE LAW HAS A JUDICIAL PROCEDURE TYPE',
+     'WORK HAS RESOURCE TYPE', 'LEGAL RESOURCE BASED ON TREATY CONCEPT',
      'CASE LAW ORIGINATES IN COUNTRY OR USES A ROLE QUALIFIER', 'CASE LAW ORIGINATES IN COUNTRY',
-     'CASE LAW DELIVERED BY COURT FORMATION', 'LEGAL RESOURCE IS ABOUT SUBJECT MATTER', 'RELATED JOURNAL ARTICLE',
-     'CASE LAW DELIVERED BY ADVOCATE GENERAL', 'CASE LAW DELIVERED BY JUDGE', 'ECLI',
-     'CASE LAW INTERPRETS LEGAL RESOURCE', 'NATIONAL JUDGEMENT', 'DATE_CREATION_LEGACY', 'DATETIME NEGOTIATION',
-     'SEQUENCE OF VALUES', 'DATE OF REQUEST FOR AN OPINION', 'CELEX IDENTIFIER', 'SECTOR IDENTIFIER',
-     'NATURAL NUMBER (CELEX)', 'TYPE OF LEGAL RESOURCE', 'YEAR OF THE LEGAL RESOURCE', 'WORK CITES WORK. CI / CJ',
-     'LEGACY DATE OF CREATION OF WORK', 'DATE OF DOCUMENT', 'IDENTIFIER OF DOCUMENT', 'WORK VERSION',
+     'CASE LAW DELIVERED BY COURT FORMATION', 'LEGAL RESOURCE IS ABOUT SUBJECT MATTER',
+     'RELATED JOURNAL ARTICLE', 'CASE LAW DELIVERED BY ADVOCATE GENERAL',
+     'CASE LAW DELIVERED BY JUDGE', 'ECLI', 'CASE LAW INTERPRETS LEGAL RESOURCE',
+     'NATIONAL JUDGEMENT', 'DATE_CREATION_LEGACY', 'DATETIME NEGOTIATION',
+     'SEQUENCE OF VALUES', 'DATE OF REQUEST FOR AN OPINION', 'CELEX IDENTIFIER',
+     'SECTOR IDENTIFIER', 'NATURAL NUMBER (CELEX)', 'TYPE OF LEGAL RESOURCE',
+     'YEAR OF THE LEGAL RESOURCE', 'WORK CITES WORK. CI / CJ', 'LEGACY DATE OF CREATION OF WORK',
+     'DATE OF DOCUMENT', 'IDENTIFIER OF DOCUMENT', 'WORK VERSION',
      'LAST CMR MODIFICATION DATE', 'CASE LAW HAS CONCLUSIONS']
 Y = ['LEGAL RESOURCE HAS TYPE OF ACT', 'WORK HAS RESOURCE TYPE', 'CASE LAW ORIGINATES IN COUNTRY',
      'LEGAL RESOURCE IS ABOUT SUBJECT MATTER', 'ECLI', 'REFERENCE TO PROVISIONS OF NATIONAL LAW',
-     'PUBLICATION REFERENCE OF COURT DECISION', 'CELEX IDENTIFIER', 'LOCAL IDENTIFIER', 'SECTOR IDENTIFIER',
-     'TYPE OF LEGAL RESOURCE', 'YEAR OF THE LEGAL RESOURCE', 'WORK IS CREATED BY AGENT (AU)',
-     'LEGACY DATE OF CREATION OF WORK', 'DATE OF DOCUMENT', 'IDENTIFIER OF DOCUMENT', 'WORK TITLE', 'CMR CREATION DATE',
-     'LAST CMR MODIFICATION DATE', 'CASE LAW DELIVERED BY NATIONAL COURT', 'REFERENCE TO A EUROPEAN ACT IN FREE TEXT',
+     'PUBLICATION REFERENCE OF COURT DECISION', 'CELEX IDENTIFIER', 'LOCAL IDENTIFIER',
+     'SECTOR IDENTIFIER', 'TYPE OF LEGAL RESOURCE', 'YEAR OF THE LEGAL RESOURCE',
+     'WORK IS CREATED BY AGENT (AU)', 'LEGACY DATE OF CREATION OF WORK', 'DATE OF DOCUMENT',
+     'IDENTIFIER OF DOCUMENT', 'WORK TITLE', 'CMR CREATION DATE',
+     'LAST CMR MODIFICATION DATE', 'CASE LAW DELIVERED BY NATIONAL COURT',
+     'REFERENCE TO A EUROPEAN ACT IN FREE TEXT',
      'CASE LAW BASED ON A LEGAL INSTRUMENT', 'PARTIES OF THE CASE LAW']
 
 COLS = set(X + Y)
 COLS = sorted(COLS)
 
-"""
-Method used after the json to csv conversion, to save the file in the processed directory.
-"""
-
-
 def create_csv(filepath, encoding="UTF8", data=None):
+    """
+    Method used after the json to csv conversion, to save the file in the processed directory.
+    """
     if data != "":
         csv_file = open(filepath, 'w', encoding=encoding)
         csv_writer = csv.writer(csv_file)
@@ -42,14 +46,11 @@ def create_csv(filepath, encoding="UTF8", data=None):
         csv_writer.writerows(data)
         csv_file.close()
 
-
-"""
-Method used to transform the json file received from cellar_extraction to a csv file.
-Cellar specific, sets specific columns with names defined at the beginning of file as COLS.
-"""
-
-
 def json_to_csv(json_data):
+    """
+    Method used to transform the json file received from cellar_extraction to a csv file.
+    Cellar specific, sets specific columns with names defined at the beginning of file as COLS.
+    """
     final_data = []
     for data in json_data:
         ecli_data = json_data[data]

From f541100e1a822b1450f163b637a0ec598568cc9e Mon Sep 17 00:00:00 2001
From: shashankmc <shashankmc@github.com>
Date: Thu, 28 Mar 2024 08:43:44 +0100
Subject: [PATCH 18/78] Linted code, changed for loop to use enumerate rather
 than range and len.

---
 cellar/cellar_extractor/nodes_and_edges.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/cellar/cellar_extractor/nodes_and_edges.py b/cellar/cellar_extractor/nodes_and_edges.py
index 9578c36..843883e 100644
--- a/cellar/cellar_extractor/nodes_and_edges.py
+++ b/cellar/cellar_extractor/nodes_and_edges.py
@@ -1,10 +1,13 @@
 import pandas as pd
+
 def extract_containing_subject_matter(df,phrase):
     returner = df[df["LEGAL RESOURCE IS ABOUT SUBJECT MATTER"].str.contains(phrase, na=False)]
     return returner
+
 def get_df_with_celexes(df,celexes):
     returner = df[df['CELEX IDENTIFIER'].isin(celexes)]
     return returner
+
 def get_edges_list(df):
     extraction = df[['CELEX IDENTIFIER','citing']]
     extraction.reset_index(inplace=True)
@@ -12,7 +15,7 @@ def get_edges_list(df):
     vals = extraction['citing']
     nodes = set()
     edges = list()
-    for i in range(len(keys)):
+    for i in enumerate(keys):
         k = keys[i]
         val = vals[i]
         if val == val:
@@ -24,6 +27,7 @@ def get_edges_list(df):
         else:
             pass
     return edges, list(nodes)
+
 def get_nodes_and_edges(df):
     edges, nodes = get_edges_list(df)
     #nodes = get_df_with_celexes(df,celexes)

From 14ffecc3604982ab9465eb2377d8daae25aefd24 Mon Sep 17 00:00:00 2001
From: shashankmc <shashankmc@github.com>
Date: Thu, 28 Mar 2024 08:45:04 +0100
Subject: [PATCH 19/78] Linted code, changed conditions from != None to is not
 None for code conformity, changed variable names to avoid inbuilt references.

---
 .../cellar_extractor/operative_extractions.py | 238 +++++++++---------
 1 file changed, 112 insertions(+), 126 deletions(-)

diff --git a/cellar/cellar_extractor/operative_extractions.py b/cellar/cellar_extractor/operative_extractions.py
index deb0b22..1bb5f43 100644
--- a/cellar/cellar_extractor/operative_extractions.py
+++ b/cellar/cellar_extractor/operative_extractions.py
@@ -1,155 +1,145 @@
-
-import requests
-from bs4 import BeautifulSoup
-
 import csv
 import json
-
+import requests
+from bs4 import BeautifulSoup
 
 class Analyzer():
     """
-    This class returns a list of the operative part for a given celex id . Celex id is initialized through a constructor.
+    This class returns a list of the operative part for a given celex id. 
+    Celex id is initialized through a constructor.
     """
     celex: str  # declare celex as a string
-
-    def __init__(self, celex):  # Initialize Celex id as a constructor , passed when calling the class
+    url: str  # declare url as a string
+    def __init__(self, celex):
+        # Initialize Celex id as a constructor, passed when calling the class
         self.celex = celex
-
+        self.url = "https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A\
+            {self.celex}&from=EN"
     def html_page_structure_one(self) -> list:
         """
-         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a nested
+         This function retreives operative part from documents of the respected celex id's. 
+         This function scrapes/parse the operative part from a nested
          table structure . The relevant text lies inside the coj-bold class of the span tag.
         """
-        website = requests.get(
-            f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        website = requests.get(self.url.replace("{self.celex}", self.celex), timeout=60).text
         parser = BeautifulSoup(website, 'lxml')
         div = parser.find_all('table')  # Find all tables tag from the website
         one = []
         for divs in div:
             # Find each nested table within the table
             table = divs.find('table')
-            if table != None:
+            if table is not None:
                 # Find all p under the nested table with the coj-normal class
                 p = table.find_all('p', class_="coj-normal")
                 for x in p:
                     # Span class of coj-bold under the p tag
                     span = x.find_all('span', class_="coj-bold")
                     for y in span:
-                        if x != None and y != None:
-
+                        if x is not None and y is not None:
                             # append text from span onto a list
                             one.append(y.text)
         return one
 
     def html_page_structure_two(self) -> list:
         """
-         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a paragraph
-         (p) structure . The relevant text lies inside the normal class of the p tag which comes after the keyword operative of the previous span tag.
+         This function retreives operative part from documents of the respected celex id's. 
+         This function scrapes/parse the operative part from a paragraph
+         (p) structure . The relevant text lies inside the normal class of the p tag which
+         comes after the keyword operative of the previous span tag.
         """
-        website = requests.get(
-            f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        website = requests.get(self.url.replace("{self.celex}", self.celex), timeout=60).text
         parser = BeautifulSoup(website, 'lxml')
         p = parser.find_all('p')
         two = []
         for para in p:
-
             span = para.find('span')
-            if span != None:
-
+            if span is not None:
                 if "operative" in span.text.lower():
                     normal = span.find_all_next('p', class_="normal")
                     for op in normal:
-
                         two.append(op.text)
         return two
 
     def structure_three(self) -> list:
         """
-         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a nested
-         table structure . The relevant text lies inside the coj-bold class of the span tag.
+         This function retreives operative part from documents of the respected celex id's. 
+         This function scrapes/parse the operative part from a nested
+         table structure. The relevant text lies inside the coj-bold class of the span tag.
         """
-        website = requests.get(
-            f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        website = requests.get(self.url.replace("{self.celex}", self.celex), timeout=60).text
         parser = BeautifulSoup(website, 'lxml')
         table = parser.find_all('table')
         three = []
         for tables in table:
             interior = tables.find_all('table')
             for interiors in interior:
-                if interiors != None:
+                if interiors is not None:
                     p = interiors.find_all('p', class_="coj-normal")
                     for x in p:
                         span = x.find_all('span', class_="coj-bold")
                         for y in span:
-                            if x != None and y != None:
-
+                            if x is not None and y is not None:
                                 three.append(y.text)
         return three
 
     def structure_four(self) -> list:
         """
-         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a paragraph
-         (p) structure . The relevant text lies inside the p  tag which comes after the keyword operative of the previous span tag.
+         This function retreives operative part from documents of the respected celex id's. 
+         This function scrapes/parse the operative part from a paragraph
+         (p) structure . The relevant text lies inside the p  tag which comes after the
+         keyword operative of the previous span tag.
         """
-        website = requests.get(
-            f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        website = requests.get(self.url.replace("{self.celex}", self.celex), timeout=60).text
         parser = BeautifulSoup(website, 'lxml')
         p = parser.find_all('p')
         four = []
         for para in p:
-
             span = para.find('span')
-            if span != None:
-
+            if span is not None:
                 if "operative" in span.text.lower():
                     normal = span.find_all_next('table')
                     for op in normal:
                         tbody = op.find('tbody')
                         new_p = tbody.find_all('p', class_="oj-normal")
-
                         for subsequent in new_p:
-                            if subsequent != None:
-
+                            if subsequent is not None:
                                 four.append(subsequent.text)
-
         return four
 
     def structure_five(self) -> list:
         """
-         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a paragraph
-         (p) structure . The relevant text lies inside the normal class of the p tag which comes after the keyword operative of the previous span tag.
+         This function retreives operative part from documents of the respected celex id's. 
+         This function scrapes/parse the operative part from a paragraph
+         (p) structure. The relevant text lies inside the normal class of the p tag which
+         comes after the keyword operative of the previous span tag.
         """
-        website = requests.get(
-            f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        website = requests.get(self.url.replace("{self.celex}", self.celex), timeout=60).text
         parser = BeautifulSoup(website, 'lxml')
         p = parser.find_all('p')
         five = []
         for para in p:
 
             span = para.find('span')
-            if span != None:
-
+            if span is not None:
                 if "operative" in span.text.lower():
                     normal = span.find_all_next('table')
                     for op in normal:
                         tbody = op.find('tbody')
                         new_p = tbody.find_all('p', class_="normal")
-
                         for subsequent in new_p:
-                            if subsequent != None:
-
+                            if subsequent is not None:
                                 five.append(subsequent.text)
 
         return five
 
     def structure_six(self) -> list:
         """
-         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a h2
-         (header) structure . The relevant text lies inside thee p tag which comes after the keyword operative part of the respective h2  tag.
+         This function retreives operative part from documents of the respected celex id's. 
+         This function scrapes/parse the operative part from a h2 (header) structure.
+         The relevant text lies inside thee p tag which comes after the keyword operative 
+         part of the respective h2  tag.
          """
-
-        website = requests.get(
-            f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        website = requests.get(self.url.replace("{self.celex}", self.celex), timeout=60).text
         parser = BeautifulSoup(website, 'lxml')
         div = parser.find_all('h2')
         six = []
@@ -164,11 +154,12 @@ def structure_six(self) -> list:
 
     def structure_seven(self) -> list:
         """
-         This function retreives operative part from documents of the respected celex id's . This function scrapes/parse the operative part from a table
-         (table) structure . The relevant text lies inside the span tag which comes after the p tag , with the class name=normal.
+         This function retreives operative part from documents of the respected celex id's.
+         This function scrapes/parse the operative part from a table
+         (table) structure. The relevant text lies inside the span tag which comes after 
+         the p tag , with the class name=normal.
         """
-        website = requests.get(
-            f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        website = requests.get(self.url.replace("{self.celex}", self.celex), timeout=60).text
         parser = BeautifulSoup(website, 'lxml')
         div = parser.find_all('table')
         seven = []
@@ -176,68 +167,66 @@ def structure_seven(self) -> list:
             # find tbody within the table
             table = divs.find_all('tbody')
             for tables in table:
-                if tables != None:
+                if tables is not None:
                     # find tr within the tbody
                     p = tables.find_all('tr')
                     for x in p:
-                        if x != None:
+                        if x is not None:
                             # find td within the tr
                             td = x.find_all('td')
                             for y in td:
-                                if y != None:
+                                if y is not None:
                                     p = y.find_all('p', class_="normal")
-                                    for all in p:
-                                        if all != None:
+                                    for _all in p:
+                                        if _all is not None:
                                             # find operative part within the span
-                                            span = all.find_all(
+                                            span = _all.find_all(
                                                 'span', class_="bold")
                                             for spans in span:
-                                                # APpend it into a list and return the list when the function is called
+                                                # Append it into a list and return the
+                                                # list when the function is called
                                                 seven.append(spans.text)
         return seven
 
     def structure_eight(self) -> list:
         """
-         This function retreives operative part from documents of the respected celex id's .The text is extracted from the span tag nested inside 
+         This function retreives operative part from documents of the respected celex id's.
+         The text is extracted from the span tag nested inside 
          the tbody tag.Returns a list as output.
         """
-        website = requests.get(
-            f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        website = requests.get(self.url.replace("{self.celex}", self.celex), timeout=60).text
         parser = BeautifulSoup(website, 'lxml')
 
         tbody = parser.find_all('tbody')
         eight = []
-        for all in tbody:
-            if all != None:
-                tr = all.find_all('tr')
+        for _all in tbody:
+            if _all is not None:
+                tr = _all.find_all('tr')
                 for trs in tr:
-                    if trs != None:
-
+                    if trs is not None:
                         p = parser.find_all('p', class_="normal")
                         for paras in p:
-                            if paras != None:
+                            if paras is not None:
                                 if "on those grounds" in paras.text.lower():
-
                                     span = paras.find_all_next(
                                         'span', class_="bold")
                                     for spans in span:
-                                        if spans != None:
+                                        if spans is not None:
                                             eight.append(spans.text)
-
         return eight
 
     def structure_nine(self) -> list:
         """
-         This function retreives operative part from documents of the respected celex id's .The operative part is under the bold(b)
+         This function retreives operative part from documents of the respected celex id's.
+         The operative part is under the bold(b)
          tag after the p tag where the keywords "on those grounds" exist. 
         """
-        website = requests.get(
-            f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        website = requests.get(self.url.replace("{self.celex}", self.celex), timeout=60).text
         parser = BeautifulSoup(website, 'lxml')
         nine = []
         div = parser.find_all('p')
         for divs in div:
-            if divs != None:
+            if divs is not None:
                 if "on those grounds" in divs.text.lower():
                     b = divs.find_all_next('b')
                     for bolds in b:
@@ -247,34 +236,34 @@ def structure_nine(self) -> list:
 
     def structure_eleven(self) -> list:
         """
-         This function retreives operative part from documents of the respected celex id's .The operative part is under the paragraph(p)
+         This function retreives operative part from documents of the respected celex id's.
+         The operative part is under the paragraph(p)
          tag after the b tag where the keywords "operative part" exist. 
         """
-        website = requests.get(
-            f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        website = requests.get(self.url.replace("{self.celex}", self.celex), timeout=60).text
         parser = BeautifulSoup(website, 'lxml')
         bold = parser.find_all('b')
 
         eleven = []
 
         for b in bold:
-            if b != None:
+            if b is not None:
                 if "operative part" in b.text.lower():
-                    table = b.find_all_next('p')
-                    for tables in table:
-                        if tables != None:
-                            eleven.append(tables.text)
-
+                    tables = b.find_all_next('p')
+                    for table in tables:
+                        if table is not None:
+                            eleven.append(table.text)
         return eleven
 
     def structure_ten(self):
         """
-         This function retreives operative part from documents of the respected celex id's Since the ocntent is preloaded using js/client s
-         server side functions , the text from the current page is retrieved and the operative part is scraped after the occurence of the phrase
+         This function retreives operative part from documents of the respected celex id's.
+         Since the ocntent is preloaded using js/clients
+         server side functions , the text from the current page is retrieved and the 
+         operative part is scraped after the occurence of the phrase
          "On those grounds".
         """
-        website = requests.get(
-            f"https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A{self.celex}&from=EN").text
+        website = requests.get(self.url.replace("{self.celex}", self.celex), timeout=60).text
         parser = BeautifulSoup(website, 'lxml')
         appender = []
         for string in parser.stripped_strings:
@@ -282,7 +271,7 @@ def structure_ten(self):
             appender.append(string)
 
         found = False
-        afterGrounds = []
+        after_grounds = []
 
         for x in appender:
 
@@ -291,33 +280,32 @@ def structure_ten(self):
 
             if found:
                 if len(x.split(" ")) > 3:
-                    afterGrounds.append(x)
-        return afterGrounds
+                    after_grounds.append(x)
+        return after_grounds
 
     def __call__(self) -> list:
         """
-        This inbuilt __call__ function loops through all the methods in the class `Analyzer` and returns  the list , with values of the operative part .
+        This inbuilt __call__ function loops through all the methods in the class 
+        `Analyzer` and returns  the list , with values of the operative part .
         """
 
-        container = [self.html_page_structure_one(), self.html_page_structure_two(), self.structure_three(), self.structure_four(), self.structure_five(),
-                     self.structure_six(), self.structure_seven(), self.structure_eight(), self.structure_nine(), self.structure_ten(), self.structure_eleven()]
+        container = [self.html_page_structure_one(), self.html_page_structure_two(),
+                     self.structure_three(), self.structure_four(), self.structure_five(),
+                     self.structure_six(), self.structure_seven(), self.structure_eight(),
+                     self.structure_nine(), self.structure_ten(), self.structure_eleven()]
 
         one: list
-        for funcs in range(len(container)):
-
+        for funcs in enumerate(container):
             one = container[funcs]
-
             if one:
                 if (len(one) != 0 or one[0] != "\n"):
                     print("here")
                     return one
-
-    
-
-
+        return None
 class Writing():
     """
-    This class has different methods , for the purpose of writing the operative part into different file formats.(Csv,txt,json)
+    This class has different methods, for the purpose of writing the operative part 
+    into different file formats.(Csv,txt,json)
     """
 
     instance: str
@@ -330,38 +318,36 @@ def __init__(self, celex: str):
         self.x = self.instance()
 
     def to_csv(self):
-        file = open("csv/output.csv", "a+")
-        writer = csv.writer(file)
-
-        if self.x != None:
+        _file = open("csv/output.csv", "a+", encoding="utf-8")
+        writer = csv.writer(_file)
+        if self.x is not None:
             writer.writerow([self.celex, self.x])
 
     def to_json(self):
-        if self.x != None:
+        if self.x is not None:
             data = {'Celex': self.celex, "Operative part": self.x}
-            file = open('json/data.json', 'a+')
-            json.dump(data, file)
-            file.close()
+            _file = open('json/data.json', 'a+', encoding='utf-8')
+            json.dump(data, _file)
+            _file.close()
 
     def to_txt(self):
 
-        if self.x != None:
-            file = open(f"txt/{self.celex}.txt", "a")
+        if self.x is not None:
+            _file = open(f"txt/{self.celex}.txt", "a", encoding="utf-8")
             for w in self.x:
-
-                file.write(w+"\n")
-            file.close()
+                _file.write(w+"\n")
+            _file.close()
 # Sample code for reading celex id's froma tsv file
 
 
-file = open("gijs_202310_node_list.tsv", "r")
+file = open("gijs_202310_node_list.tsv", "r", encoding="utf-8")
 reader = csv.reader(file)
 testing = []
 for row in reader:
     for rows in row:
         if "Id" not in rows:
             testing.append(rows.split("\t")[0])
-for all in testing:
-    instance = Writing(all)
+for _all in testing:
+    instance = Writing(_all)
     instance.to_csv()
-    print(all)
+    print(_all)

From 2886aef8687c0dc20c8bf8d966342de80f532bc0 Mon Sep 17 00:00:00 2001
From: shashankmc <shashankmc@github.com>
Date: Thu, 28 Mar 2024 08:45:22 +0100
Subject: [PATCH 20/78] code linting for sparql file

---
 cellar/cellar_extractor/sparql.py | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/cellar/cellar_extractor/sparql.py b/cellar/cellar_extractor/sparql.py
index de989ce..c41a49c 100644
--- a/cellar/cellar_extractor/sparql.py
+++ b/cellar/cellar_extractor/sparql.py
@@ -1,5 +1,6 @@
 from SPARQLWrapper import SPARQLWrapper, JSON, CSV, POST
 import requests
+
 def run_eurlex_webservice_query(query_input,username,password):
     target = "https://eur-lex.europa.eu/EURLexWebService?wsdl"
     query = '''<soap:Envelope xmlns:soap="http://www.w3.org/2003/05/soap-envelope" xmlns:sear="http://eur-lex.europa.eu/search">
@@ -22,15 +23,12 @@ def run_eurlex_webservice_query(query_input,username,password):
     </soap:Envelope>''' % (username, password,query_input)
     return  requests.request("POST", target, data=query, allow_redirects=True)
 
-
-"""
-Method acquired from a different law and tech project for getting the citations of a source_celex.
-Unlike get_citations_csv, only works for one source celex at once. Returns a set containing all the works cited by
-the source celex.
-"""
-
 def get_citations(source_celex, cites_depth=1, cited_depth=1):
     """
+    Method acquired from a different law and tech project for getting the citations of a 
+    source_celex.
+    Unlike get_citations_csv, only works for one source celex at once. Returns a set 
+    containing all the works cited by the source celex.
     Gets all the citations one to X steps away. Hops can be specified as either
     the source document citing another (defined by `cites_depth`) or another document
     citing it (`cited_depth`). Any numbers higher than 1 denote that new source document
@@ -69,18 +67,16 @@ def get_citations(source_celex, cites_depth=1, cited_depth=1):
     for bind in ret['results']['bindings']:
         target = bind['name2']['value']
         targets.add(target)
-    targets = set([el for el in list(targets)])  # Filters the list. Filter type: '3'=legislation, '6'=case law.
-
+    # Filters the list. Filter type: '3'=legislation, '6'=case law.
+    targets = set([el for el in list(targets)])
     return targets
 
-
-"""
-Method sending a query to the endpoint, which asks for cited works for each celex.
-The celex variable in the method is a list of all the celex identifiers of the cases we need the citations of.
-The query returns a csv, containing all of the data needed."""
-
-
 def get_citations_csv(celex):
+    """
+    Method sending a query to the endpoint, which asks for cited works for each celex.
+    The celex variable in the method is a list of all the celex identifiers of the 
+    cases we need the citations of.
+    The query returns a csv, containing all of the data needed."""
     endpoint = 'https://publications.europa.eu/webapi/rdf/sparql'
     input_celex = '", "'.join(celex)
     query = '''

From 6ad2cab2cce17b6b3c1281260c635757e078a8f1 Mon Sep 17 00:00:00 2001
From: shashankmc <shashankmc@github.com>
Date: Thu, 28 Mar 2024 08:45:50 +0100
Subject: [PATCH 21/78] Code linting for Testing file

---
 cellar/cellar_extractor/Testing_file.py | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/cellar/cellar_extractor/Testing_file.py b/cellar/cellar_extractor/Testing_file.py
index 9418533..bf932a8 100644
--- a/cellar/cellar_extractor/Testing_file.py
+++ b/cellar/cellar_extractor/Testing_file.py
@@ -1,10 +1,6 @@
 """
-
 This file is purely a testing file for trying out separate parts of code, testing if everything works and such.
 Can be also used to develop future code.
-
-
-
 """
 
 from nodes_and_edges import get_nodes_and_edges
@@ -27,11 +23,14 @@
     for rows in row:
         if "Id" not in rows:
             testing.append(rows.split("\t")[0])
+            
 class Test(unittest.TestCase):
     """
-    class for unittesing operative part , it checks whether the list returns null value or has some value.
+    class for unittesing operative part , it checks whether the list returns null value 
+    or has some value.
     """
     ids:list
+    
     def __init__(self,ids):
         self.ids=ids
 
@@ -48,24 +47,19 @@ def test_for_celex_id(self):
             # self.assertFalse(len(test_instance)<=1)
           
             try:
-                self.assertTrue(test_instance[0],f"{id} is  not empty and has  operative part") 
+                self.assertTrue(test_instance[0],f"{id} is not empty and has operative part")
                 count_pass+=1 
                 print(f"{id} --->  PASSED.")
             except:
-                print(f"{id} --->  FAILED.") 
-        print(f"Passed {count_pass}/{len(self.ids)} times") 
-        # print(len(self.ids)-count,"were passed successfully")  
+                print(f"{id} --->  FAILED.")
+        print(f"Passed {count_pass}/{len(self.ids)} times")
+        # print(len(self.ids)-count,"were passed successfully")
 
 new_list=[]
 for w in range(no_of_test_cases):
     randomized=random.randint(0,len(testing)-1)
     new_list.append(testing[randomized])
 
- 
-
-
-
-
 if __name__ == '__main__':
    celex = "62004CJ0292"
     

From db8688902e5d1928d81cc3f6ac5e6274bd603a78 Mon Sep 17 00:00:00 2001
From: shashankmc <shashankmc@github.com>
Date: Thu, 28 Mar 2024 08:46:55 +0100
Subject: [PATCH 22/78] Update gitignore file to not consider DS_Store files
 and venv directories

---
 .gitignore | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index aa30a5f..06a87e2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
-venv
+.venv*
 .idea
 data
 rechtspraak/rechtspraak_extractor/tests/data
@@ -20,4 +20,6 @@ rechtspraak.zip
 build.bat
 echr_extractor-whl.zip
 echr_extractor-whl
-echr_extractor.egg-info
\ No newline at end of file
+echr_extractor.egg-info
+
+.*DS_Store
\ No newline at end of file

From b1f24ee8e02a56fe37726caac6e4426fadf07b3d Mon Sep 17 00:00:00 2001
From: Vishal Venkat Raghavan <souljaboi943@gmail.com>
Date: Wed, 10 Apr 2024 10:54:46 +0200
Subject: [PATCH 23/78] Unittests for operative part

---
 tests.py | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/tests.py b/tests.py
index 4732cdb..8c36e70 100644
--- a/tests.py
+++ b/tests.py
@@ -1,4 +1,5 @@
 from cellar_extractor import *
+from extraction_libraries.cellar.cellar_extractor.operative_extractions import Writing
 
 def cellar_csv_n():
     get_cellar(save_file='n', file_format='csv', sd='2022-01-01', max_ecli=100)
@@ -70,3 +71,39 @@ def test_cellar_json_n():
         assert True
     except Exception:
         assert False, "Downloading cellar as json failed."
+
+
+# from operative_extractions import Analyzer,Writing
+
+import random
+import csv
+import json 
+
+celex_store=["61983CJ0207","61988CJ0360","62005CJ0168","62008CJ0484","62010CJ0014","62005CJ0343","62000CJ0154"]
+
+celex:str
+choice=random.randint(0,len(celex_store))
+celex=celex_store[choice]
+def operative_part_csv(celex)->csv:
+
+    csv_store=Writing(celex)
+    csv_store.to_csv()
+    if csv_store.to_csv():
+        assert True
+    else:
+        assert False    
+def operative_part_json(celex)->json:
+    json_store=Writing(celex)
+    json_store.to_json()
+    if json_store.to_json():
+        assert True
+    else:
+        assert False
+
+def operative_part_txt(celex):
+    txt_store=Writing(celex)
+    txt_store.to_txt()
+    if txt_store.to_txt():
+        assert True
+    else:
+        assert False
\ No newline at end of file

From e88255c0a14ea8e19c6e7fdf161ca6dd3b0fb490 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Thu, 11 Apr 2024 12:20:12 +0200
Subject: [PATCH 24/78] Update operative_extractions.py

---
 .../cellar_extractor/operative_extractions.py | 38 +++++++++++--------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/cellar/cellar_extractor/operative_extractions.py b/cellar/cellar_extractor/operative_extractions.py
index 1bb5f43..a0e3610 100644
--- a/cellar/cellar_extractor/operative_extractions.py
+++ b/cellar/cellar_extractor/operative_extractions.py
@@ -311,6 +311,25 @@ class Writing():
     instance: str
     x: str
     parameter: str
+    txt = "txt"
+    json="json"
+    csv="csv"
+    txt_dir = os.path.join(current, txt)
+    csv_dir = os.path.join(current, csv)
+    json_dir = os.path.join(current, json)
+
+    if not os.path.exists(txt_dir):
+        os.makedirs(txt_dir)
+    if not os.path.exists(csv_dir):
+        os.makedirs(csv_dir)    
+    
+    if not os.path.exists(json_dir):
+        os.makedirs(json_dir) 
+        
+    def __init__(self, celex: str):
+        self.celex = celex
+        self.instance = Analyzer(self.celex)
+        self.x = self.instance()
 
     def __init__(self, celex: str):
         self.celex = celex
@@ -318,7 +337,7 @@ def __init__(self, celex: str):
         self.x = self.instance()
 
     def to_csv(self):
-        _file = open("csv/output.csv", "a+", encoding="utf-8")
+        _file = open("output.csv", "a+", encoding="utf-8")
         writer = csv.writer(_file)
         if self.x is not None:
             writer.writerow([self.celex, self.x])
@@ -326,7 +345,7 @@ def to_csv(self):
     def to_json(self):
         if self.x is not None:
             data = {'Celex': self.celex, "Operative part": self.x}
-            _file = open('json/data.json', 'a+', encoding='utf-8')
+            _file = open('data.json', 'a+', encoding='utf-8')
             json.dump(data, _file)
             _file.close()
 
@@ -337,17 +356,4 @@ def to_txt(self):
             for w in self.x:
                 _file.write(w+"\n")
             _file.close()
-# Sample code for reading celex id's froma tsv file
-
-
-file = open("gijs_202310_node_list.tsv", "r", encoding="utf-8")
-reader = csv.reader(file)
-testing = []
-for row in reader:
-    for rows in row:
-        if "Id" not in rows:
-            testing.append(rows.split("\t")[0])
-for _all in testing:
-    instance = Writing(_all)
-    instance.to_csv()
-    print(_all)
+

From 09865ff1173314b534c5717b176f1fd9d02e9d7a Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Thu, 11 Apr 2024 12:20:42 +0200
Subject: [PATCH 25/78] Update __init__.py

---
 cellar/cellar_extractor/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cellar/cellar_extractor/__init__.py b/cellar/cellar_extractor/__init__.py
index 39184aa..ab2ae68 100644
--- a/cellar/cellar_extractor/__init__.py
+++ b/cellar/cellar_extractor/__init__.py
@@ -2,5 +2,6 @@
 from cellar_extractor.cellar import get_cellar_extra
 from cellar_extractor.cellar import get_nodes_and_edges_lists
 from cellar_extractor.cellar import filter_subject_matter
+from cellar_extractor.operative_extractions import Analyzer,Writing
 import logging
-logging.basicConfig(level=logging.INFO)
\ No newline at end of file
+logging.basicConfig(level=logging.INFO)

From 0d7c77cc670f70dea57632eef342cd49d50d7b3b Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Thu, 11 Apr 2024 12:26:46 +0200
Subject: [PATCH 26/78] Update tests.py

---
 tests.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests.py b/tests.py
index 8c36e70..be3cef4 100644
--- a/tests.py
+++ b/tests.py
@@ -1,5 +1,4 @@
 from cellar_extractor import *
-from extraction_libraries.cellar.cellar_extractor.operative_extractions import Writing
 
 def cellar_csv_n():
     get_cellar(save_file='n', file_format='csv', sd='2022-01-01', max_ecli=100)
@@ -73,7 +72,6 @@ def test_cellar_json_n():
         assert False, "Downloading cellar as json failed."
 
 
-# from operative_extractions import Analyzer,Writing
 
 import random
 import csv
@@ -106,4 +104,4 @@ def operative_part_txt(celex):
     if txt_store.to_txt():
         assert True
     else:
-        assert False
\ No newline at end of file
+        assert False

From 01f7d84f78a6de415bba49c9bde884d929047a09 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Thu, 11 Apr 2024 12:29:06 +0200
Subject: [PATCH 27/78] Update tests.py

---
 tests.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tests.py b/tests.py
index be3cef4..c71e4ea 100644
--- a/tests.py
+++ b/tests.py
@@ -105,3 +105,20 @@ def operative_part_txt(celex):
         assert True
     else:
         assert False
+
+ def test_for_celex_id(celex):
+      
+        count_fail:int
+        count_pass=0
+        for id in self.ids:
+            test_output=Analyzer(celex)
+            test_instance=test_output()
+         
+          
+            try:
+                assert True
+          
+             
+            except:
+                assert False
+        

From 642dbe31150486dc4bf144cb459d5324da2ce910 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Thu, 11 Apr 2024 12:29:33 +0200
Subject: [PATCH 28/78] Delete cellar/cellar_extractor/Testing_file.py

---
 cellar/cellar_extractor/Testing_file.py | 75 -------------------------
 1 file changed, 75 deletions(-)
 delete mode 100644 cellar/cellar_extractor/Testing_file.py

diff --git a/cellar/cellar_extractor/Testing_file.py b/cellar/cellar_extractor/Testing_file.py
deleted file mode 100644
index 4d0372a..0000000
--- a/cellar/cellar_extractor/Testing_file.py
+++ /dev/null
@@ -1,75 +0,0 @@
-"""
-This file is purely a testing file for trying out separate parts of code, testing if everything works and such.
-Can be also used to develop future code.
-"""
-
-from nodes_and_edges import get_nodes_and_edges
-from os.path import join
-from json_to_csv import read_csv
-import time
-from eurlex_scraping import *
-from cellar import *
-from sparql import *
-import unittest
-from operative_extraction import Analyzer
-# from test import testing
-import random
-import csv
-file=open("gijs_202310_node_list.tsv","r")
-reader=csv.reader(file)
-no_of_test_cases=30
-testing=[]
-for row in reader:
-    for rows in row:
-        if "Id" not in rows:
-            testing.append(rows.split("\t")[0])
-            
-class Test(unittest.TestCase):
-    """
-    class for unittesing operative part , it checks whether the list returns null value 
-    or has some value.
-    """
-    ids:list
-    
-    def __init__(self,ids):
-        self.ids=ids
-
-    def test_for_celex_id(self):
-        """
-        Main function which runs the unittest Testcase .
-        """
-        count_fail:int
-        count_pass=0
-        for id in self.ids:
-            test_output=Analyzer(id)
-            test_instance=test_output()
-         
-            # self.assertFalse(len(test_instance)<=1)
-          
-            try:
-                self.assertTrue(test_instance[0],f"{id} is not empty and has operative part")
-                count_pass+=1 
-                print(f"{id} --->  PASSED.")
-            except:
-                print(f"{id} --->  FAILED.")
-        print(f"Passed {count_pass}/{len(self.ids)} times")
-        # print(len(self.ids)-count,"were passed successfully")
-
-new_list=[]
-for w in range(no_of_test_cases):
-    randomized=random.randint(0,len(testing)-1)
-    new_list.append(testing[randomized])
-
-if __name__ == '__main__':
-   celex = "62004CJ0292"
-    
-   instance=Test([celex])
-   instance.test_for_celex_id()  
-   site = get_entire_page(celex)
-   text = get_full_text_from_html(site)
-   cits = get_citations_with_extra_info(text)
-   print(cits)
-   data = get_cellar(sd='2023-01-01',max_ecli=100,save_file='n')
-   d3 = filter_subject_matter(data, "prices")
-   b=2
-   pass
\ No newline at end of file

From 8db810d59cf5d235d7a488138b46f9012b46ea9f Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Thu, 11 Apr 2024 12:33:09 +0200
Subject: [PATCH 29/78] Update operative_extractions.py

---
 cellar/cellar_extractor/operative_extractions.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cellar/cellar_extractor/operative_extractions.py b/cellar/cellar_extractor/operative_extractions.py
index a0e3610..4c46124 100644
--- a/cellar/cellar_extractor/operative_extractions.py
+++ b/cellar/cellar_extractor/operative_extractions.py
@@ -1,6 +1,7 @@
 import csv
 import json
 import requests
+import os
 from bs4 import BeautifulSoup
 
 class Analyzer():

From 3c32aaadadcd4cdcdbadf2a19a488e92d30ef7c8 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Thu, 11 Apr 2024 12:36:44 +0200
Subject: [PATCH 30/78] Update tests.py

---
 tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests.py b/tests.py
index c71e4ea..861c701 100644
--- a/tests.py
+++ b/tests.py
@@ -106,7 +106,7 @@ def operative_part_txt(celex):
     else:
         assert False
 
- def test_for_celex_id(celex):
+def test_for_operative_part(celex):
       
         count_fail:int
         count_pass=0

From 28858a6ea9a3699dd92db70847d6aedd848c63f5 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Thu, 11 Apr 2024 12:45:46 +0200
Subject: [PATCH 31/78] Update tests.py

---
 tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests.py b/tests.py
index 861c701..08def79 100644
--- a/tests.py
+++ b/tests.py
@@ -110,7 +110,7 @@ def test_for_operative_part(celex):
       
         count_fail:int
         count_pass=0
-        for id in self.ids:
+        for id in celex_store:
             test_output=Analyzer(celex)
             test_instance=test_output()
          

From 98ea00cf27540f6f8350ec5dfa630d2745e6ddf9 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Thu, 11 Apr 2024 12:52:25 +0200
Subject: [PATCH 32/78] Update tests.py

---
 tests.py | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/tests.py b/tests.py
index 08def79..f56f41a 100644
--- a/tests.py
+++ b/tests.py
@@ -86,24 +86,27 @@ def operative_part_csv(celex)->csv:
 
     csv_store=Writing(celex)
     csv_store.to_csv()
-    if csv_store.to_csv():
-        assert True
-    else:
-        assert False    
+    try:
+        if csv_store.to_csv():
+            assert True
+    except Exception:
+        assert False   
 def operative_part_json(celex)->json:
     json_store=Writing(celex)
     json_store.to_json()
-    if json_store.to_json():
-        assert True
-    else:
+    try:
+        if json_store.to_json():
+            assert True
+    except Exception:
         assert False
 
 def operative_part_txt(celex):
     txt_store=Writing(celex)
     txt_store.to_txt()
-    if txt_store.to_txt():
-        assert True
-    else:
+    try:
+        if txt_store.to_txt():
+            assert True
+    except Exception:
         assert False
 
 def test_for_operative_part(celex):
@@ -111,14 +114,16 @@ def test_for_operative_part(celex):
         count_fail:int
         count_pass=0
         for id in celex_store:
-            test_output=Analyzer(celex)
-            test_instance=test_output()
+           
          
           
             try:
+                test_output=Analyzer(celex)
+                test_instance=test_output()
                 assert True
           
              
-            except:
+            except Exception:
                 assert False
-        
+                
+                

From 8cae2fa368e901b4c5aa82d65e9b849b66161542 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Thu, 11 Apr 2024 15:32:53 +0200
Subject: [PATCH 33/78] Update tests.py

---
 tests.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests.py b/tests.py
index f56f41a..92ddfcd 100644
--- a/tests.py
+++ b/tests.py
@@ -125,5 +125,9 @@ def test_for_operative_part(celex):
              
             except Exception:
                 assert False
-                
+
+operative_part_txt(celex)
+operative_part_csv(celex)
+operative_part_json(celex)
+test_for_operative_part(celex)
                 

From 8503d8d23a836875e09e3c5b821ce13f6dd15df6 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Thu, 11 Apr 2024 15:34:18 +0200
Subject: [PATCH 34/78] Update tests.py


From 87271b8181dd968feba98ebd017c26495311f919 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Thu, 11 Apr 2024 15:39:53 +0200
Subject: [PATCH 35/78] Update tests.py

---
 tests.py | 46 +++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 41 insertions(+), 5 deletions(-)

diff --git a/tests.py b/tests.py
index 92ddfcd..0f0ab69 100644
--- a/tests.py
+++ b/tests.py
@@ -109,7 +109,7 @@ def operative_part_txt(celex):
     except Exception:
         assert False
 
-def test_for_operative_part(celex):
+def for_operative_part(celex):
       
         count_fail:int
         count_pass=0
@@ -126,8 +126,44 @@ def test_for_operative_part(celex):
             except Exception:
                 assert False
 
-operative_part_txt(celex)
-operative_part_csv(celex)
-operative_part_json(celex)
-test_for_operative_part(celex)
+
+def test_operative_part_txt():
+    celex_store=["61983CJ0207","61988CJ0360","62005CJ0168","62008CJ0484","62010CJ0014","62005CJ0343","62000CJ0154"]
+    celex:str
+    choice=random.randint(0,len(celex_store))
+    celex=celex_store[choice]
+    if operative_part_txt(celex):
+        assert True
+    except Exception:
+        assert False
+def test_operative_part_json():
+    celex_store=["61983CJ0207","61988CJ0360","62005CJ0168","62008CJ0484","62010CJ0014","62005CJ0343","62000CJ0154"]
+    celex:str
+    choice=random.randint(0,len(celex_store))
+    celex=celex_store[choice]
+    if operative_part_json(celex):
+        assert True
+    except Exception:
+        assert False
+def test_operative_part_csv():
+    celex_store=["61983CJ0207","61988CJ0360","62005CJ0168","62008CJ0484","62010CJ0014","62005CJ0343","62000CJ0154"]
+    celex:str
+    choice=random.randint(0,len(celex_store))
+    celex=celex_store[choice]
+    if operative_part_csv(celex):
+        assert True
+    except Exception:
+        assert False
+    
+def test_for_operative_part():
+    celex_store=["61983CJ0207","61988CJ0360","62005CJ0168","62008CJ0484","62010CJ0014","62005CJ0343","62000CJ0154"]
+    celex:str
+    choice=random.randint(0,len(celex_store))
+    celex=celex_store[choice]
+    if test_for_operative_part(celex):
+        assert True
+    except Exception:
+        assert False   
+    
+    
                 

From c3daf22eaa7099094ed090e8fe33d85d8d8afdb1 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Thu, 11 Apr 2024 15:42:22 +0200
Subject: [PATCH 36/78] Update tests.py

---
 tests.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/tests.py b/tests.py
index 0f0ab69..844f9b9 100644
--- a/tests.py
+++ b/tests.py
@@ -1,5 +1,7 @@
 from cellar_extractor import *
-
+import random
+import csv
+import json 
 def cellar_csv_n():
     get_cellar(save_file='n', file_format='csv', sd='2022-01-01', max_ecli=100)
 
@@ -73,15 +75,9 @@ def test_cellar_json_n():
 
 
-import random
-import csv
-import json 
 
-celex_store=["61983CJ0207","61988CJ0360","62005CJ0168","62008CJ0484","62010CJ0014","62005CJ0343","62000CJ0154"]
 
-celex:str
-choice=random.randint(0,len(celex_store))
-celex=celex_store[choice]
+
 def operative_part_csv(celex)->csv:
 
     csv_store=Writing(celex)

From 428f466bab6210c3d424c3db3de09a770f5b32ab Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Thu, 11 Apr 2024 16:27:50 +0200
Subject: [PATCH 37/78] Update tests.py

---
 tests.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/tests.py b/tests.py
index 844f9b9..7296857 100644
--- a/tests.py
+++ b/tests.py
@@ -128,8 +128,9 @@ def test_operative_part_txt():
     celex:str
     choice=random.randint(0,len(celex_store))
     celex=celex_store[choice]
-    if operative_part_txt(celex):
-        assert True
+    try:
+        if operative_part_txt(celex):
+            assert True
     except Exception:
         assert False
 def test_operative_part_json():
@@ -137,8 +138,9 @@ def test_operative_part_json():
     celex:str
     choice=random.randint(0,len(celex_store))
     celex=celex_store[choice]
-    if operative_part_json(celex):
-        assert True
+    try:
+        if operative_part_json(celex):
+            assert True
     except Exception:
         assert False
 def test_operative_part_csv():
@@ -146,8 +148,9 @@ def test_operative_part_csv():
     celex:str
     choice=random.randint(0,len(celex_store))
     celex=celex_store[choice]
-    if operative_part_csv(celex):
-        assert True
+    try:
+        if operative_part_csv(celex):
+            assert True
     except Exception:
         assert False
     
@@ -156,8 +159,9 @@ def test_for_operative_part():
     celex:str
     choice=random.randint(0,len(celex_store))
     celex=celex_store[choice]
-    if test_for_operative_part(celex):
-        assert True
+    try:
+        if test_for_operative_part(celex):
+            assert True
     except Exception:
         assert False   
     

From b75fe86a7d68ec1caf33c05d5d551f29f541ddab Mon Sep 17 00:00:00 2001
From: shashankmc <shashankmc@github.com>
Date: Wed, 17 Apr 2024 14:04:00 +0200
Subject: [PATCH 38/78] Updated variable name from current to current_dir

---
 cellar/cellar_extractor/operative_extractions.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/cellar/cellar_extractor/operative_extractions.py b/cellar/cellar_extractor/operative_extractions.py
index 4c46124..44c3942 100644
--- a/cellar/cellar_extractor/operative_extractions.py
+++ b/cellar/cellar_extractor/operative_extractions.py
@@ -231,7 +231,6 @@ def structure_nine(self) -> list:
                 if "on those grounds" in divs.text.lower():
                     b = divs.find_all_next('b')
                     for bolds in b:
-                        # print(bolds.text)
                         nine.append(bolds.text)
         return nine
 
@@ -308,16 +307,15 @@ class Writing():
     This class has different methods, for the purpose of writing the operative part 
     into different file formats.(Csv,txt,json)
     """
-
     instance: str
     x: str
     parameter: str
-    txt = "txt"
-    json="json"
-    csv="csv"
-    txt_dir = os.path.join(current, txt)
-    csv_dir = os.path.join(current, csv)
-    json_dir = os.path.join(current, json)
+
+    current_dir = os.getcwd()
+
+    txt_dir = os.path.join(current_dir, "txt")
+    csv_dir = os.path.join(current_dir, "csv")
+    json_dir = os.path.join(current_dir, "json")
 
     if not os.path.exists(txt_dir):
         os.makedirs(txt_dir)
@@ -351,7 +349,6 @@ def to_json(self):
             _file.close()
 
     def to_txt(self):
-
         if self.x is not None:
             _file = open(f"txt/{self.celex}.txt", "a", encoding="utf-8")
             for w in self.x:

From 2537752bbc1b9f6ef384c95cc26f5b160a200cdb Mon Sep 17 00:00:00 2001
From: shashankmc <shashankmc@github.com>
Date: Wed, 17 Apr 2024 14:04:40 +0200
Subject: [PATCH 39/78] Removed conditional statements and updated assert False
 statements

---
 tests.py | 77 +++++++++++++++++++++-----------------------------------
 1 file changed, 29 insertions(+), 48 deletions(-)

diff --git a/tests.py b/tests.py
index 7296857..f19d69b 100644
--- a/tests.py
+++ b/tests.py
@@ -73,55 +73,37 @@ def test_cellar_json_n():
     except Exception:
         assert False, "Downloading cellar as json failed."
 
-
-
-
-
-
 def operative_part_csv(celex)->csv:
-
     csv_store=Writing(celex)
-    csv_store.to_csv()
     try:
-        if csv_store.to_csv():
-            assert True
+        csv_store.to_csv()
+        assert True
     except Exception:
-        assert False   
+        assert False, "Downloading and storing as csv failed for operative part"
+       
 def operative_part_json(celex)->json:
     json_store=Writing(celex)
-    json_store.to_json()
     try:
-        if json_store.to_json():
-            assert True
+        json_store.to_json()
+        assert True
     except Exception:
-        assert False
+        assert False, "Downloading and storing as json failed for operative part"
 
 def operative_part_txt(celex):
     txt_store=Writing(celex)
-    txt_store.to_txt()
     try:
-        if txt_store.to_txt():
-            assert True
+        txt_store.to_txt()
+        assert True
     except Exception:
-        assert False
+        assert False, "Downloading and storing as txt failed for operative part"
 
 def for_operative_part(celex):
-      
-        count_fail:int
-        count_pass=0
-        for id in celex_store:
-           
-         
-          
-            try:
-                test_output=Analyzer(celex)
-                test_instance=test_output()
-                assert True
-          
-             
-            except Exception:
-                assert False
-
+        try:
+            test_output=Analyzer(id)
+            test_output()
+            assert True            
+        except Exception:
+            assert False, "Cannot extract for celex"
 
 def test_operative_part_txt():
     celex_store=["61983CJ0207","61988CJ0360","62005CJ0168","62008CJ0484","62010CJ0014","62005CJ0343","62000CJ0154"]
@@ -129,30 +111,31 @@ def test_operative_part_txt():
     choice=random.randint(0,len(celex_store))
     celex=celex_store[choice]
     try:
-        if operative_part_txt(celex):
-            assert True
+        operative_part_txt(celex)
+        assert True
     except Exception:
-        assert False
+        assert False, "Cannot extract operative text"
+
 def test_operative_part_json():
     celex_store=["61983CJ0207","61988CJ0360","62005CJ0168","62008CJ0484","62010CJ0014","62005CJ0343","62000CJ0154"]
     celex:str
     choice=random.randint(0,len(celex_store))
     celex=celex_store[choice]
     try:
-        if operative_part_json(celex):
-            assert True
+        operative_part_json(celex)
+        assert True
     except Exception:
-        assert False
+        assert False, "Cannot extract operative text"
 def test_operative_part_csv():
     celex_store=["61983CJ0207","61988CJ0360","62005CJ0168","62008CJ0484","62010CJ0014","62005CJ0343","62000CJ0154"]
     celex:str
     choice=random.randint(0,len(celex_store))
     celex=celex_store[choice]
     try:
-        if operative_part_csv(celex):
-            assert True
+        operative_part_csv(celex):
+        assert True
     except Exception:
-        assert False
+        assert False, "Cannot extract operative text"
     
 def test_for_operative_part():
     celex_store=["61983CJ0207","61988CJ0360","62005CJ0168","62008CJ0484","62010CJ0014","62005CJ0343","62000CJ0154"]
@@ -160,10 +143,8 @@ def test_for_operative_part():
     choice=random.randint(0,len(celex_store))
     celex=celex_store[choice]
     try:
-        if test_for_operative_part(celex):
-            assert True
+        test_for_operative_part(celex)
+        assert True
     except Exception:
-        assert False   
-    
-    
+        assert False, "Cannot extract operative part"    
                 

From f82a23f143da0db27ab1330e26cb8b291a2acdb3 Mon Sep 17 00:00:00 2001
From: shashankmc <shashankmc@github.com>
Date: Wed, 17 Apr 2024 14:06:25 +0200
Subject: [PATCH 40/78] Removed unnecessary colon

---
 tests.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests.py b/tests.py
index f19d69b..01548e0 100644
--- a/tests.py
+++ b/tests.py
@@ -132,7 +132,7 @@ def test_operative_part_csv():
     choice=random.randint(0,len(celex_store))
     celex=celex_store[choice]
     try:
-        operative_part_csv(celex):
+        operative_part_csv(celex)
         assert True
     except Exception:
         assert False, "Cannot extract operative text"
@@ -146,5 +146,5 @@ def test_for_operative_part():
         test_for_operative_part(celex)
         assert True
     except Exception:
-        assert False, "Cannot extract operative part"    
-                
+        assert False, "Cannot extract operative part"
+            
\ No newline at end of file

From 106675f940f2ab7a9d852c5ff6479da9a7608177 Mon Sep 17 00:00:00 2001
From: shashankmc <shashankmc@github.com>
Date: Wed, 17 Apr 2024 14:56:49 +0200
Subject: [PATCH 41/78] Removed duplicate __init__ method, reordered import
 libraries

---
 cellar/cellar_extractor/operative_extractions.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/cellar/cellar_extractor/operative_extractions.py b/cellar/cellar_extractor/operative_extractions.py
index 44c3942..b018d9b 100644
--- a/cellar/cellar_extractor/operative_extractions.py
+++ b/cellar/cellar_extractor/operative_extractions.py
@@ -1,7 +1,7 @@
 import csv
 import json
-import requests
 import os
+import requests
 from bs4 import BeautifulSoup
 
 class Analyzer():
@@ -330,10 +330,6 @@ def __init__(self, celex: str):
         self.instance = Analyzer(self.celex)
         self.x = self.instance()
 
-    def __init__(self, celex: str):
-        self.celex = celex
-        self.instance = Analyzer(self.celex)
-        self.x = self.instance()
 
     def to_csv(self):
         _file = open("output.csv", "a+", encoding="utf-8")

From 0dfd3860c1fed94cf0bd84711a3ffd0c3b86ae06 Mon Sep 17 00:00:00 2001
From: shashankmc <shashankmc@github.com>
Date: Wed, 17 Apr 2024 14:57:52 +0200
Subject: [PATCH 42/78] Corrected methods being called, extra import from
 operative_extractions

---
 tests.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/tests.py b/tests.py
index 01548e0..3557110 100644
--- a/tests.py
+++ b/tests.py
@@ -1,7 +1,9 @@
-from cellar_extractor import *
-import random
 import csv
-import json 
+import json
+import random
+from cellar_extractor import *
+from cellar_extractor.operative_extractions import *
+
 def cellar_csv_n():
     get_cellar(save_file='n', file_format='csv', sd='2022-01-01', max_ecli=100)
 
@@ -73,7 +75,7 @@ def test_cellar_json_n():
     except Exception:
         assert False, "Downloading cellar as json failed."
 
-def operative_part_csv(celex)->csv:
+def operative_part_csv(celex):
     csv_store=Writing(celex)
     try:
         csv_store.to_csv()
@@ -81,7 +83,7 @@ def operative_part_csv(celex)->csv:
     except Exception:
         assert False, "Downloading and storing as csv failed for operative part"
        
-def operative_part_json(celex)->json:
+def operative_part_json(celex):
     json_store=Writing(celex)
     try:
         json_store.to_json()
@@ -99,7 +101,7 @@ def operative_part_txt(celex):
 
 def for_operative_part(celex):
         try:
-            test_output=Analyzer(id)
+            test_output=Analyzer(celex)
             test_output()
             assert True            
         except Exception:
@@ -126,6 +128,7 @@ def test_operative_part_json():
         assert True
     except Exception:
         assert False, "Cannot extract operative text"
+
 def test_operative_part_csv():
     celex_store=["61983CJ0207","61988CJ0360","62005CJ0168","62008CJ0484","62010CJ0014","62005CJ0343","62000CJ0154"]
     celex:str
@@ -143,7 +146,7 @@ def test_for_operative_part():
     choice=random.randint(0,len(celex_store))
     celex=celex_store[choice]
     try:
-        test_for_operative_part(celex)
+        for_operative_part(celex)
         assert True
     except Exception:
         assert False, "Cannot extract operative part"

From 55b117841065454c09c3c35dfb8e0575987cde5b Mon Sep 17 00:00:00 2001
From: shashankmc <shashankmc@github.com>
Date: Wed, 17 Apr 2024 16:03:56 +0200
Subject: [PATCH 43/78] Changed from enumerate to range and len

---
 cellar/cellar_extractor/operative_extractions.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cellar/cellar_extractor/operative_extractions.py b/cellar/cellar_extractor/operative_extractions.py
index b018d9b..bd894cb 100644
--- a/cellar/cellar_extractor/operative_extractions.py
+++ b/cellar/cellar_extractor/operative_extractions.py
@@ -16,6 +16,7 @@ def __init__(self, celex):
         self.celex = celex
         self.url = "https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A\
             {self.celex}&from=EN"
+        
     def html_page_structure_one(self) -> list:
         """
          This function retreives operative part from documents of the respected celex id's. 
@@ -295,13 +296,14 @@ def __call__(self) -> list:
                      self.structure_nine(), self.structure_ten(), self.structure_eleven()]
 
         one: list
-        for funcs in enumerate(container):
+        for funcs in range(len(container)):
             one = container[funcs]
             if one:
                 if (len(one) != 0 or one[0] != "\n"):
                     print("here")
                     return one
         return None
+    
 class Writing():
     """
     This class has different methods, for the purpose of writing the operative part 

From abcd5efd3ff0ac1c97867e91cc0a7140f8b54423 Mon Sep 17 00:00:00 2001
From: shashankmc <shashankmc@github.com>
Date: Wed, 17 Apr 2024 16:08:25 +0200
Subject: [PATCH 44/78] Updated setup.py file for finding operative_extractions

---
 cellar/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cellar/setup.py b/cellar/setup.py
index eec4dce..adfab07 100644
--- a/cellar/setup.py
+++ b/cellar/setup.py
@@ -9,7 +9,7 @@
 
 setup(
     name='cellar_extractor',
-    packages=find_packages(include=['cellar_extractor']),
+    packages=find_packages(include=['cellar_extractor', 'cellar_extractor.operative_extractions']),
     version='1.0.61',
     description='Library for extracting cellar data',
     author='LawTech Lab',

From 25cdcf1ec0b4ea7af44fbad3f9104f068f7aba05 Mon Sep 17 00:00:00 2001
From: shashankmc <shashankmc@github.com>
Date: Wed, 17 Apr 2024 16:11:12 +0200
Subject: [PATCH 45/78] Correcting path to include everything under
 cellar_extractor in setup.py file

---
 cellar/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cellar/setup.py b/cellar/setup.py
index adfab07..e20f64f 100644
--- a/cellar/setup.py
+++ b/cellar/setup.py
@@ -9,7 +9,7 @@
 
 setup(
     name='cellar_extractor',
-    packages=find_packages(include=['cellar_extractor', 'cellar_extractor.operative_extractions']),
+    packages=find_packages(include=['cellar_extractor.*']),
     version='1.0.61',
     description='Library for extracting cellar data',
     author='LawTech Lab',

From cd6e55e3949510a987e8bd2c896dbeead9cdf05b Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 17 Apr 2024 17:01:49 +0200
Subject: [PATCH 46/78] F string changes and url changes for eurex website

---
 .../cellar_extractor/operative_extractions.py | 36 +++++++++----------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/cellar/cellar_extractor/operative_extractions.py b/cellar/cellar_extractor/operative_extractions.py
index bd894cb..c2edad4 100644
--- a/cellar/cellar_extractor/operative_extractions.py
+++ b/cellar/cellar_extractor/operative_extractions.py
@@ -9,13 +9,13 @@ class Analyzer():
     This class returns a list of the operative part for a given celex id. 
     Celex id is initialized through a constructor.
     """
-    celex: str  # declare celex as a string
-    url: str  # declare url as a string
+    celex: str=""  # declare celex as a string
+    # declare url as a string
+    url:str=""
     def __init__(self, celex):
         # Initialize Celex id as a constructor, passed when calling the class
         self.celex = celex
-        self.url = "https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A\
-            {self.celex}&from=EN"
+        self.url = f"https://eur-lex.europa.eu/legal-content/EN/ALL/?uri=CELEX%3A{self.celex}&from=EN"    
         
     def html_page_structure_one(self) -> list:
         """
@@ -23,7 +23,7 @@ def html_page_structure_one(self) -> list:
          This function scrapes/parse the operative part from a nested
          table structure . The relevant text lies inside the coj-bold class of the span tag.
         """
-        website = requests.get(self.url.replace("{self.celex}", self.celex), timeout=60).text
+        website = requests.get(self.url, timeout=60).text
         parser = BeautifulSoup(website, 'lxml')
         div = parser.find_all('table')  # Find all tables tag from the website
         one = []
@@ -49,7 +49,7 @@ def html_page_structure_two(self) -> list:
          (p) structure . The relevant text lies inside the normal class of the p tag which
          comes after the keyword operative of the previous span tag.
         """
-        website = requests.get(self.url.replace("{self.celex}", self.celex), timeout=60).text
+        website = requests.get(self.url, timeout=60).text
         parser = BeautifulSoup(website, 'lxml')
         p = parser.find_all('p')
         two = []
@@ -68,7 +68,7 @@ def structure_three(self) -> list:
          This function scrapes/parse the operative part from a nested
          table structure. The relevant text lies inside the coj-bold class of the span tag.
         """
-        website = requests.get(self.url.replace("{self.celex}", self.celex), timeout=60).text
+        website = requests.get(self.url, timeout=60).text
         parser = BeautifulSoup(website, 'lxml')
         table = parser.find_all('table')
         three = []
@@ -91,7 +91,7 @@ def structure_four(self) -> list:
          (p) structure . The relevant text lies inside the p  tag which comes after the
          keyword operative of the previous span tag.
         """
-        website = requests.get(self.url.replace("{self.celex}", self.celex), timeout=60).text
+        website = requests.get(self.url, timeout=60).text
         parser = BeautifulSoup(website, 'lxml')
         p = parser.find_all('p')
         four = []
@@ -115,7 +115,7 @@ def structure_five(self) -> list:
          (p) structure. The relevant text lies inside the normal class of the p tag which
          comes after the keyword operative of the previous span tag.
         """
-        website = requests.get(self.url.replace("{self.celex}", self.celex), timeout=60).text
+        website = requests.get(self.url, timeout=60).text
         parser = BeautifulSoup(website, 'lxml')
         p = parser.find_all('p')
         five = []
@@ -141,7 +141,7 @@ def structure_six(self) -> list:
          The relevant text lies inside thee p tag which comes after the keyword operative 
          part of the respective h2  tag.
          """
-        website = requests.get(self.url.replace("{self.celex}", self.celex), timeout=60).text
+        website = requests.get(self.url, timeout=60).text
         parser = BeautifulSoup(website, 'lxml')
         div = parser.find_all('h2')
         six = []
@@ -161,7 +161,7 @@ def structure_seven(self) -> list:
          (table) structure. The relevant text lies inside the span tag which comes after 
          the p tag , with the class name=normal.
         """
-        website = requests.get(self.url.replace("{self.celex}", self.celex), timeout=60).text
+        website = requests.get(self.url, timeout=60).text
         parser = BeautifulSoup(website, 'lxml')
         div = parser.find_all('table')
         seven = []
@@ -196,7 +196,7 @@ def structure_eight(self) -> list:
          The text is extracted from the span tag nested inside 
          the tbody tag.Returns a list as output.
         """
-        website = requests.get(self.url.replace("{self.celex}", self.celex), timeout=60).text
+        website = requests.get(self.url, timeout=60).text
         parser = BeautifulSoup(website, 'lxml')
 
         tbody = parser.find_all('tbody')
@@ -223,7 +223,7 @@ def structure_nine(self) -> list:
          The operative part is under the bold(b)
          tag after the p tag where the keywords "on those grounds" exist. 
         """
-        website = requests.get(self.url.replace("{self.celex}", self.celex), timeout=60).text
+        website = requests.get(self.url, timeout=60).text
         parser = BeautifulSoup(website, 'lxml')
         nine = []
         div = parser.find_all('p')
@@ -241,7 +241,7 @@ def structure_eleven(self) -> list:
          The operative part is under the paragraph(p)
          tag after the b tag where the keywords "operative part" exist. 
         """
-        website = requests.get(self.url.replace("{self.celex}", self.celex), timeout=60).text
+        website = requests.get(self.url, timeout=60).text
         parser = BeautifulSoup(website, 'lxml')
         bold = parser.find_all('b')
 
@@ -264,7 +264,7 @@ def structure_ten(self):
          operative part is scraped after the occurence of the phrase
          "On those grounds".
         """
-        website = requests.get(self.url.replace("{self.celex}", self.celex), timeout=60).text
+        website = requests.get(self.url, timeout=60).text
         parser = BeautifulSoup(website, 'lxml')
         appender = []
         for string in parser.stripped_strings:
@@ -300,10 +300,9 @@ def __call__(self) -> list:
             one = container[funcs]
             if one:
                 if (len(one) != 0 or one[0] != "\n"):
-                    print("here")
+               
                     return one
-        return None
-    
+
 class Writing():
     """
     This class has different methods, for the purpose of writing the operative part 
@@ -352,4 +351,3 @@ def to_txt(self):
             for w in self.x:
                 _file.write(w+"\n")
             _file.close()
-

From d22849ee5bb05a50dd00b5a2e74628866363591b Mon Sep 17 00:00:00 2001
From: Shashank <shashankmc@github.com>
Date: Wed, 17 Apr 2024 17:13:20 +0200
Subject: [PATCH 47/78] Removed import cellar_extractor.operative_extractions

---
 tests.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests.py b/tests.py
index 3557110..ce25a4d 100644
--- a/tests.py
+++ b/tests.py
@@ -2,7 +2,6 @@
 import json
 import random
 from cellar_extractor import *
-from cellar_extractor.operative_extractions import *
 
 def cellar_csv_n():
     get_cellar(save_file='n', file_format='csv', sd='2022-01-01', max_ecli=100)
@@ -150,4 +149,4 @@ def test_for_operative_part():
         assert True
     except Exception:
         assert False, "Cannot extract operative part"
-            
\ No newline at end of file
+            

From d89aaec241af5b4ad60c4b7f0f0f3c43270776d3 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 24 Apr 2024 10:00:48 +0200
Subject: [PATCH 48/78] os module for tests.py

---
 tests.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests.py b/tests.py
index ce25a4d..54d80a9 100644
--- a/tests.py
+++ b/tests.py
@@ -1,7 +1,9 @@
-import csv
-import json
+import sys
+import os
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 import random
-from cellar_extractor import *
+from cellar.cellar_extractor.cellar import *
+from cellar.cellar_extractor.operative_extractions import *
 
 def cellar_csv_n():
     get_cellar(save_file='n', file_format='csv', sd='2022-01-01', max_ecli=100)

From 9aaf2e36ef005ab8a11883084b1e2ec8a114b67f Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 24 Apr 2024 10:02:00 +0200
Subject: [PATCH 49/78] Include json & csv directory for outputs

---
 cellar/cellar_extractor/operative_extractions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cellar/cellar_extractor/operative_extractions.py b/cellar/cellar_extractor/operative_extractions.py
index c2edad4..2414a1a 100644
--- a/cellar/cellar_extractor/operative_extractions.py
+++ b/cellar/cellar_extractor/operative_extractions.py
@@ -333,7 +333,7 @@ def __init__(self, celex: str):
 
 
     def to_csv(self):
-        _file = open("output.csv", "a+", encoding="utf-8")
+        _file = open("csv/output.csv", "a+", encoding="utf-8")
         writer = csv.writer(_file)
         if self.x is not None:
             writer.writerow([self.celex, self.x])
@@ -341,7 +341,7 @@ def to_csv(self):
     def to_json(self):
         if self.x is not None:
             data = {'Celex': self.celex, "Operative part": self.x}
-            _file = open('data.json', 'a+', encoding='utf-8')
+            _file = open('json/data.json', 'a+', encoding='utf-8')
             json.dump(data, _file)
             _file.close()
 

From e33cc9cbed405889d1a019ca7827832584f6b8c7 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 24 Apr 2024 10:15:15 +0200
Subject: [PATCH 50/78] os configuration to include cellar/ for tests.py

---
 tests.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests.py b/tests.py
index 54d80a9..e2fd2b7 100644
--- a/tests.py
+++ b/tests.py
@@ -1,9 +1,10 @@
 import sys
 import os
-sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+sys.path.append(os.path.join(os.path.dirname(__file__), 'cellar'))
+
 import random
-from cellar.cellar_extractor.cellar import *
-from cellar.cellar_extractor.operative_extractions import *
+from cellar_extractor.cellar import *
+from cellar_extractor.operative_extractions import *
 
 def cellar_csv_n():
     get_cellar(save_file='n', file_format='csv', sd='2022-01-01', max_ecli=100)

From 483c4199c5bfc1bfb2c3d0253b58d24c063ed5a0 Mon Sep 17 00:00:00 2001
From: Shashank <shashank.chakravarthy@gmail.com>
Date: Wed, 24 Apr 2024 10:31:55 +0200
Subject: [PATCH 51/78] Update tests.py

Removed os and sys import.
---
 tests.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/tests.py b/tests.py
index e2fd2b7..e9eb21f 100644
--- a/tests.py
+++ b/tests.py
@@ -1,10 +1,5 @@
-import sys
-import os
-sys.path.append(os.path.join(os.path.dirname(__file__), 'cellar'))
-
 import random
-from cellar_extractor.cellar import *
-from cellar_extractor.operative_extractions import *
+from cellar_extractor import *
 
 def cellar_csv_n():
     get_cellar(save_file='n', file_format='csv', sd='2022-01-01', max_ecli=100)

From f8887d7c0d1704cb606b262f59a9f6e32805ec47 Mon Sep 17 00:00:00 2001
From: Shashank <shashank.chakravarthy@gmail.com>
Date: Wed, 24 Apr 2024 10:33:12 +0200
Subject: [PATCH 52/78] Update setup.py

Extended include to include operative_extractions.
---
 cellar/setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cellar/setup.py b/cellar/setup.py
index e20f64f..50bab1b 100644
--- a/cellar/setup.py
+++ b/cellar/setup.py
@@ -9,7 +9,7 @@
 
 setup(
     name='cellar_extractor',
-    packages=find_packages(include=['cellar_extractor.*']),
+    packages=find_packages(include=['cellar_extractor', 'cellar_extractor.operative_extractions']),
     version='1.0.61',
     description='Library for extracting cellar data',
     author='LawTech Lab',
@@ -23,4 +23,4 @@
         "Bug Tracker": "https://github.com/maastrichtlawtech/extraction_libraries",
         "Build Source": "https://github.com/maastrichtlawtech/extraction_libraries",
     },
-)
\ No newline at end of file
+)

From 56f7845b5239561d50c687201286901a174e8a16 Mon Sep 17 00:00:00 2001
From: Shashank <shashank.chakravarthy@gmail.com>
Date: Wed, 24 Apr 2024 10:37:15 +0200
Subject: [PATCH 53/78] Change to len(celex_store)-1 to avoid index out of
 range error.

---
 tests.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests.py b/tests.py
index e9eb21f..35000e9 100644
--- a/tests.py
+++ b/tests.py
@@ -107,7 +107,7 @@ def for_operative_part(celex):
 def test_operative_part_txt():
     celex_store=["61983CJ0207","61988CJ0360","62005CJ0168","62008CJ0484","62010CJ0014","62005CJ0343","62000CJ0154"]
     celex:str
-    choice=random.randint(0,len(celex_store))
+    choice=random.randint(0,len(celex_store)-1)
     celex=celex_store[choice]
     try:
         operative_part_txt(celex)
@@ -118,7 +118,7 @@ def test_operative_part_txt():
 def test_operative_part_json():
     celex_store=["61983CJ0207","61988CJ0360","62005CJ0168","62008CJ0484","62010CJ0014","62005CJ0343","62000CJ0154"]
     celex:str
-    choice=random.randint(0,len(celex_store))
+    choice=random.randint(0,len(celex_store)-1)
     celex=celex_store[choice]
     try:
         operative_part_json(celex)
@@ -129,7 +129,7 @@ def test_operative_part_json():
 def test_operative_part_csv():
     celex_store=["61983CJ0207","61988CJ0360","62005CJ0168","62008CJ0484","62010CJ0014","62005CJ0343","62000CJ0154"]
     celex:str
-    choice=random.randint(0,len(celex_store))
+    choice=random.randint(0,len(celex_store)-1)
     celex=celex_store[choice]
     try:
         operative_part_csv(celex)
@@ -140,7 +140,7 @@ def test_operative_part_csv():
 def test_for_operative_part():
     celex_store=["61983CJ0207","61988CJ0360","62005CJ0168","62008CJ0484","62010CJ0014","62005CJ0343","62000CJ0154"]
     celex:str
-    choice=random.randint(0,len(celex_store))
+    choice=random.randint(0,len(celex_store)-1)
     celex=celex_store[choice]
     try:
         for_operative_part(celex)

From 19fccdc7261a87473e9cf595913d78c3e63a88b7 Mon Sep 17 00:00:00 2001
From: Shashank <shashank.chakravarthy@gmail.com>
Date: Wed, 24 Apr 2024 10:42:15 +0200
Subject: [PATCH 54/78] Added additional import to test.py file

---
 tests.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests.py b/tests.py
index 35000e9..c5ddfe3 100644
--- a/tests.py
+++ b/tests.py
@@ -1,5 +1,6 @@
 import random
 from cellar_extractor import *
+from cellar_extractor.operative_extractions import Analyzer, Writing
 
 def cellar_csv_n():
     get_cellar(save_file='n', file_format='csv', sd='2022-01-01', max_ecli=100)

From d57f01543929e0238ba6c25ef0f19180a8180b5c Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 09:42:15 +0200
Subject: [PATCH 55/78] Create __init__.py

---
 cellar/__init__.py | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 cellar/__init__.py

diff --git a/cellar/__init__.py b/cellar/__init__.py
new file mode 100644
index 0000000..0901c57
--- /dev/null
+++ b/cellar/__init__.py
@@ -0,0 +1 @@
+from cellar_extractor import *

From 8045e9c0f856944d9ec2f52f83edcc1b4e717946 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 09:43:02 +0200
Subject: [PATCH 56/78]  Shorten imports after adding __init__.py to cellar
 folder

---
 tests.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests.py b/tests.py
index c5ddfe3..d89cec3 100644
--- a/tests.py
+++ b/tests.py
@@ -1,6 +1,6 @@
 import random
-from cellar_extractor import *
-from cellar_extractor.operative_extractions import Analyzer, Writing
+from cellar import *
+
 
 def cellar_csv_n():
     get_cellar(save_file='n', file_format='csv', sd='2022-01-01', max_ecli=100)

From 26d305d328fed1acaba784ff31c0d0935693d9ba Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 09:50:51 +0200
Subject: [PATCH 57/78] Add extra import command

---
 tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests.py b/tests.py
index d89cec3..13118e9 100644
--- a/tests.py
+++ b/tests.py
@@ -1,6 +1,6 @@
 import random
 from cellar import *
-
+from cellar.operative_extractions import *
 
 def cellar_csv_n():
     get_cellar(save_file='n', file_format='csv', sd='2022-01-01', max_ecli=100)

From 53343819cd92610abe285ec9470acdd1a46f11e9 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 09:52:03 +0200
Subject: [PATCH 58/78] Update tests.py

---
 tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests.py b/tests.py
index 13118e9..36c1f38 100644
--- a/tests.py
+++ b/tests.py
@@ -1,6 +1,6 @@
 import random
 from cellar import *
-from cellar.operative_extractions import *
+from cellar import Analyzer , Writing
 
 def cellar_csv_n():
     get_cellar(save_file='n', file_format='csv', sd='2022-01-01', max_ecli=100)

From e56d32876b0cf438d030db836df6aa1c226a2f27 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 09:54:07 +0200
Subject: [PATCH 59/78] Update __init__.py

---
 cellar/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cellar/__init__.py b/cellar/__init__.py
index 0901c57..cb1893b 100644
--- a/cellar/__init__.py
+++ b/cellar/__init__.py
@@ -1 +1,2 @@
 from cellar_extractor import *
+from cellar_extractor.operative_extractions import Analyzer , Writing

From dc9aba3cc81d1b0eba85dd8ead8ae8433cc295b6 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 09:55:24 +0200
Subject: [PATCH 60/78] Update __init__.py

---
 cellar/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cellar/__init__.py b/cellar/__init__.py
index cb1893b..8e13fda 100644
--- a/cellar/__init__.py
+++ b/cellar/__init__.py
@@ -1,2 +1,2 @@
 from cellar_extractor import *
-from cellar_extractor.operative_extractions import Analyzer , Writing
+from cellar_extractor import Analyzer , Writing

From 1ca331be747978fa0742f15ebd7099520cbb3fbf Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 09:56:51 +0200
Subject: [PATCH 61/78] Update __init__.py

---
 cellar/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cellar/__init__.py b/cellar/__init__.py
index 8e13fda..09fe356 100644
--- a/cellar/__init__.py
+++ b/cellar/__init__.py
@@ -1,2 +1,2 @@
 from cellar_extractor import *
-from cellar_extractor import Analyzer , Writing
+

From 75b2978ff84b6cd446841f6701cbfc1636e2dfd3 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 09:57:07 +0200
Subject: [PATCH 62/78] Update tests.py

---
 tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests.py b/tests.py
index 36c1f38..d89cec3 100644
--- a/tests.py
+++ b/tests.py
@@ -1,6 +1,6 @@
 import random
 from cellar import *
-from cellar import Analyzer , Writing
+
 
 def cellar_csv_n():
     get_cellar(save_file='n', file_format='csv', sd='2022-01-01', max_ecli=100)

From e4c72364d5834a2b81d4b5ee526de68767783a54 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 10:01:45 +0200
Subject: [PATCH 63/78] Update __init__.py

---
 cellar/cellar_extractor/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cellar/cellar_extractor/__init__.py b/cellar/cellar_extractor/__init__.py
index ab2ae68..d07093e 100644
--- a/cellar/cellar_extractor/__init__.py
+++ b/cellar/cellar_extractor/__init__.py
@@ -2,6 +2,7 @@
 from cellar_extractor.cellar import get_cellar_extra
 from cellar_extractor.cellar import get_nodes_and_edges_lists
 from cellar_extractor.cellar import filter_subject_matter
-from cellar_extractor.operative_extractions import Analyzer,Writing
+from cellar_extractor.operative_extractions import Analyzer
+from cellar_extractor.operative_extractions import Writing
 import logging
 logging.basicConfig(level=logging.INFO)

From a7455eefee7b65eb6fd9de897e6e3630787e6938 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 10:09:13 +0200
Subject: [PATCH 64/78] Update tests.py

---
 tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests.py b/tests.py
index d89cec3..cea29f8 100644
--- a/tests.py
+++ b/tests.py
@@ -1,5 +1,5 @@
 import random
-from cellar import *
+from cellar_extractor import *
 
 
 def cellar_csv_n():

From 78b71406e61752a67e07a562914a94a7065b99a3 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 10:16:39 +0200
Subject: [PATCH 65/78] add pip install -e

---
 .github/workflows/github-actions.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml
index ea25b00..497ebc4 100644
--- a/.github/workflows/github-actions.yml
+++ b/.github/workflows/github-actions.yml
@@ -17,7 +17,7 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install cellar-extractor
+          pip install -e cellar/cellar_extractor
      # pip install echr-extractor
       - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner."
       - run: echo "🖥️ The workflow is now ready to test your code on the runner."

From 8da60dcd359fc5bdf17de0d3b231bb39c4a6dfec Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 10:18:26 +0200
Subject: [PATCH 66/78] Update github-actions.yml

---
 .github/workflows/github-actions.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml
index 497ebc4..d92fc29 100644
--- a/.github/workflows/github-actions.yml
+++ b/.github/workflows/github-actions.yml
@@ -17,7 +17,7 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install -e cellar/cellar_extractor
+          pip install -e cellar/*
      # pip install echr-extractor
       - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner."
       - run: echo "🖥️ The workflow is now ready to test your code on the runner."

From 25caaf8c1d3baca810aae491b159800172de38ee Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 10:20:01 +0200
Subject: [PATCH 67/78] Add pip install -e cellar/

---
 .github/workflows/github-actions.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml
index d92fc29..29bcc92 100644
--- a/.github/workflows/github-actions.yml
+++ b/.github/workflows/github-actions.yml
@@ -17,7 +17,7 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install -e cellar/*
+          pip install -e cellar/
      # pip install echr-extractor
       - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner."
       - run: echo "🖥️ The workflow is now ready to test your code on the runner."

From c7bac514234f0090911851513db4ab5d2329d3ef Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 10:36:00 +0200
Subject: [PATCH 68/78] Update README.md

---
 cellar/README.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/cellar/README.md b/cellar/README.md
index acb6b80..2219773 100644
--- a/cellar/README.md
+++ b/cellar/README.md
@@ -37,6 +37,13 @@ Python 3.9
             <sub><b>gijsvd</b></sub>
         </a>
     </td>
+       <td align="center">
+        <a href="https://github.com/venvis">
+            <img src="https://avatars.githubusercontent.com/venvis" width="100;" alt="venvis"/>
+            <br />
+            <sub><b>venvis</b></sub>
+        </a>
+    </td>
 </tr>
 </table>
 <!-- readme: contributors,gijsvd -end -->

From 754805448c3ca2077b7c53a19da808b5dfccd0cc Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 11:24:39 +0200
Subject: [PATCH 69/78] Update README.md

---
 cellar/README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cellar/README.md b/cellar/README.md
index 2219773..fc88880 100644
--- a/cellar/README.md
+++ b/cellar/README.md
@@ -66,6 +66,10 @@ Python 3.9
     Allows the creation of a network graph of the citations. Can only be returned in-memory.
     <li><code>filter_subject_matter</code></li>
     Returns a dataframe of cases only containing a certain phrase in the column containing the subject of cases.
+    <li><code>Analyzer</code></li>
+    A class whose instance(declaration) when called returns a list of the all the text contained within the operative part for each European Case law case(En-English only).
+    <li><code>Writing</code></li>
+    A class which writes the text for the operative part for each European Case law case(En-English only) into csv,json and txt files(Generated upon initialization).
     <br>
 </ol>
 

From f15870cd7d036f099deee0a673a759ce7b9c796c Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 11:27:40 +0200
Subject: [PATCH 70/78] Update README.md

---
 cellar/README.md | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/cellar/README.md b/cellar/README.md
index fc88880..d4dddd5 100644
--- a/cellar/README.md
+++ b/cellar/README.md
@@ -69,7 +69,13 @@ Python 3.9
     <li><code>Analyzer</code></li>
     A class whose instance(declaration) when called returns a list of the all the text contained within the operative part for each European Case law case(En-English only).
     <li><code>Writing</code></li>
-    A class which writes the text for the operative part for each European Case law case(En-English only) into csv,json and txt files(Generated upon initialization).
+    A class which writes the text for the operative part for each European Case law case(En-English only) into csv,json and txt files(Generated upon initialization).<br>
+    the <code>Writing</code> has three functions : <br>
+    <ul>
+        <li><code>to_csv()</code> - Writes the operative part along with celex id into a csv file</li>
+        <li><code>to_json()</code> - Writes the operative part along with celex id into a json file</li>
+        <li><code>to_txt()</code> - Writes the operative part along with celex id into a txt file</li>
+    </ul>
     <br>
 </ol>
 

From 15b7b04fe38a60895cb0feebf6613a428777a867 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 11:28:04 +0200
Subject: [PATCH 71/78] Update README.md

---
 cellar/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cellar/README.md b/cellar/README.md
index d4dddd5..5a0177f 100644
--- a/cellar/README.md
+++ b/cellar/README.md
@@ -70,7 +70,7 @@ Python 3.9
     A class whose instance(declaration) when called returns a list of the all the text contained within the operative part for each European Case law case(En-English only).
     <li><code>Writing</code></li>
     A class which writes the text for the operative part for each European Case law case(En-English only) into csv,json and txt files(Generated upon initialization).<br>
-    the <code>Writing</code> has three functions : <br>
+    the <code>Writing</code> class has three functions : <br><br>
     <ul>
         <li><code>to_csv()</code> - Writes the operative part along with celex id into a csv file</li>
         <li><code>to_json()</code> - Writes the operative part along with celex id into a json file</li>

From 44bf4b84223a5884344922cabb47d25ef1a8d61f Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 11:31:57 +0200
Subject: [PATCH 72/78] Update README.md

---
 cellar/README.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/cellar/README.md b/cellar/README.md
index 5a0177f..c32c0c0 100644
--- a/cellar/README.md
+++ b/cellar/README.md
@@ -132,6 +132,15 @@ Python 3.9
         <li><strong>phrase: string, required, default None</strong></li>
         The phrase which has to be present in the subject matter of cases. Case insensitive.
     </ul>
+     <li><code>Analyzer</code></li>
+    <ul>
+        <li><strong>celex: str, required</strong></li>
+    </ul>
+    <li><code>Writing</code></li>
+        <ul>
+        <li><strong>celex: str, required</strong></li>
+    </ul>
+    
 </ol>
 
 
From aa09b6f9e89902165b1980d9fd64c575cb09eaf1 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 11:35:02 +0200
Subject: [PATCH 73/78] Update README.md

---
 cellar/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cellar/README.md b/cellar/README.md
index c32c0c0..e4baac2 100644
--- a/cellar/README.md
+++ b/cellar/README.md
@@ -134,11 +134,11 @@ Python 3.9
     </ul>
      <li><code>Analyzer</code></li>
     <ul>
-        <li><strong>celex: str, required</strong></li>
+        <li><strong>celex id: str, required</strong></li>
     </ul>
     <li><code>Writing</code></li>
         <ul>
-        <li><strong>celex: str, required</strong></li>
+        <li><strong>celex id: str, required</strong></li>
     </ul>
     
 </ol>

From 908d5e6b9714c1e30efd18e0552257a0e935f28a Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 11:48:44 +0200
Subject: [PATCH 74/78] Update README.md

---
 cellar/README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cellar/README.md b/cellar/README.md
index e4baac2..b4f9d3b 100644
--- a/cellar/README.md
+++ b/cellar/README.md
@@ -135,10 +135,12 @@ Python 3.9
      <li><code>Analyzer</code></li>
     <ul>
         <li><strong>celex id: str, required</strong></li>
+        <li>Pass as a constructor upon initializing the class</li>
     </ul>
     <li><code>Writing</code></li>
         <ul>
         <li><strong>celex id: str, required</strong></li>
+            <li>Pass as a constructor upon initializing the class</li>
     </ul>
     
 </ol>

From 3f00adbff2c651050d307c7915d0b235fd09a307 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 11:51:46 +0200
Subject: [PATCH 75/78] Add for Analyzer and Writing Classes

---
 cellar/README.md | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/cellar/README.md b/cellar/README.md
index b4f9d3b..e54ab47 100644
--- a/cellar/README.md
+++ b/cellar/README.md
@@ -160,7 +160,22 @@ Below are examples for in-memory saving:
 df = cell.get_cellar(save_file='n', file_format='csv', sd='2022-01-01', max_ecli=1000)
 df,json = cell.get_cellar_extra(save_file='n', max_ecli=100, sd='2022-01-01', threads=10)
 ```
+```python
+instance=Analyzer(celex_id:str)
+output_list=instance()
+print(output_list)
+```
+<p>Create a callback of the instance of the class initiated and pass a list as it's value.</p>
+
+<p>The Writing Class also takes a celex id , upon initializing the class , through the means of the constructor and writes the content of its operative part into different files , depending on the function called</p>
 
+```python
+instance=Writing(celex_id:str)
+output=instance.to_csv()#for csv
+output=instance.to_txt()#for txt
+output=instance.to_json()#for json
+
+```
 
 ## License
 [![License: Apache 2.0](https://img.shields.io/github/license/maastrichtlawtech/extraction_libraries)](https://opensource.org/licenses/Apache-2.0)

From c92fbef1abd7d119a7ec5aa49db64dccd679726d Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 11:53:19 +0200
Subject: [PATCH 76/78] Add code for Analyzer class

---
 cellar/README.md | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/cellar/README.md b/cellar/README.md
index e54ab47..455ef5e 100644
--- a/cellar/README.md
+++ b/cellar/README.md
@@ -160,17 +160,21 @@ Below are examples for in-memory saving:
 df = cell.get_cellar(save_file='n', file_format='csv', sd='2022-01-01', max_ecli=1000)
 df,json = cell.get_cellar_extra(save_file='n', max_ecli=100, sd='2022-01-01', threads=10)
 ```
+<p>Create a callback of the instance of the class initiated and pass a list as it's value.</p>
+
 ```python
-instance=Analyzer(celex_id:str)
+import cellar_extractor as cell
+instance=cell.Analyzer(celex_id:str)
 output_list=instance()
-print(output_list)
+print(output_list) # prints operative part of the Case as a list
 ```
-<p>Create a callback of the instance of the class initiated and pass a list as it's value.</p>
+
 
 <p>The Writing Class also takes a celex id , upon initializing the class , through the means of the constructor and writes the content of its operative part into different files , depending on the function called</p>
 
 ```python
-instance=Writing(celex_id:str)
+import cellar_extractor as cell
+instance=cell.Writing(celex_id:str)
 output=instance.to_csv()#for csv
 output=instance.to_txt()#for txt
 output=instance.to_json()#for json

From 868b14c922a3bfe952a1b6d985588ba2199e4394 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 13:01:27 +0200
Subject: [PATCH 77/78] Update README.md

---
 cellar/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cellar/README.md b/cellar/README.md
index 455ef5e..8b69508 100644
--- a/cellar/README.md
+++ b/cellar/README.md
@@ -147,7 +147,7 @@ Python 3.9
 
 
 ## Examples
-```
+```python
 import cellar_extractor as cell
 
 Below are examples for in-file saving:

From 636865188e96e0cb309bb9b9b79a2ad7641b3d81 Mon Sep 17 00:00:00 2001
From: venvis <127123047+venvis@users.noreply.github.com>
Date: Wed, 1 May 2024 13:51:34 +0200
Subject: [PATCH 78/78] Update README.md

---
 cellar/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cellar/README.md b/cellar/README.md
index 8b69508..f5d9d64 100644
--- a/cellar/README.md
+++ b/cellar/README.md
@@ -67,7 +67,7 @@ Python 3.9
     <li><code>filter_subject_matter</code></li>
     Returns a dataframe of cases only containing a certain phrase in the column containing the subject of cases.
     <li><code>Analyzer</code></li>
-    A class whose instance(declaration) when called returns a list of the all the text contained within the operative part for each European Case law case(En-English only).
+    A class whose instance(declaration) when called returns a list of the all the text contained within the operative part for each European Court of Justice (CJEU, formerly known as European Court of Justice (ECJ)) judgement (English only).
     <li><code>Writing</code></li>
     A class which writes the text for the operative part for each European Case law case(En-English only) into csv,json and txt files(Generated upon initialization).<br>
     the <code>Writing</code> class has three functions : <br><br>