diff --git a/Project_documentation.pdf b/Project_documentation.pdf new file mode 100644 index 00000000..de1166a3 Binary files /dev/null and b/Project_documentation.pdf differ diff --git a/data/blah.py b/data/blah.py new file mode 100644 index 00000000..e69de29b diff --git a/data/read2.py b/data/read2.py new file mode 100644 index 00000000..d6e63921 --- /dev/null +++ b/data/read2.py @@ -0,0 +1,33 @@ +#reading contents of the pdf using pdfplumber + +import pdfplumber + +# Use raw string format to avoid issues with backslashes +file_path = r"C:\Users\HP\OneDrive\Desktop\data_preprocessor\data\acetone-acs-l (1).pdf" + +# Open the PDF +with pdfplumber.open(file_path) as pdf: + # Iterate through each page of the PDF + for page_num, page in enumerate(pdf.pages): + # Extract text from the page + text = page.extract_text() + print(f"Text from page {page_num + 1}:") + print(text) + + # Extract tables from the page + tables = page.extract_tables() # Use extract_tables() instead of extract_table() + print(f"Tables from page {page_num + 1}:") + for table in tables: + for row in table: + print(row) + + # Extract images from the page + images = page.images # Use page.images to get a list of images on the page + if images: + print(f"Images on page {page_num + 1}:") + for image in images: + print(f"Image on page {page_num + 1}: {image}") + # Note: PDFPlumber does not extract the image data itself, only metadata like its position, width, height, etc. + # For extracting image data, you might need another library like Pillow to render the PDF into an image. + + diff --git a/data/read4.py b/data/read4.py new file mode 100644 index 00000000..60ecee4a --- /dev/null +++ b/data/read4.py @@ -0,0 +1,78 @@ +import pdfplumber +import json + +def process_main_heading(heading): + main_head=heading.strip() + return{heading.strip()} + +#def process_sub_heading1(heading): + # sub_head=heading.strip() + # return{heading.strip()} + +def process_paragraph(h,paragraph): + return{h : paragraph.strip()} + + + +#def process_paragraph(paragraph): + # return {"type": "paragraph", "content": paragraph.strip()} + +#def process_heading(heading): + #return {"type": "heading", "content": heading.strip()} + +def process_table(table): + return {"type": "table", "content": table} + + + +def detect_structure(text): + paragraphs=text.split('\n\n') #to split the paras ig + structured_content=[] #to store the structred content ...empty list is initialised + + for paragraph in paragraphs: + clean_paragraph = paragraph.strip() #to remove leading or trailing whitespaces + + if clean_paragraph: + + if len(clean_paragraph.split()) < 5: # include logic for headers of different font sizes + headd=clean_paragraph + structured_content.append(process_main_heading(clean_paragraph)) + + else: + structured_content.append(process_paragraph(headd,clean_paragraph)) + + return structured_content + + +def extract_tables(page): + tables = page.extract_tables() + table_list = [] + for table in tables: + table_list.append(process_table(table)) + return table_list + + +def main(): + #file_path=input(r"Enter the file path: ") + file_path = r"C:\Users\HP\OneDrive\Desktop\data_preprocessor\data\acetone-acs-l (1).pdf" + + document_structure=[] #empty list to storee doc structure?? + + with pdfplumber.open(file_path) as pdf: + + for page_num,page in enumerate(pdf.pages): + print(f"Processing page {page_num + 1}:") + + text=page.extract_text() + +# Convert the structure to JSON + json_output = json.dumps(document_structure, indent=4) + + # Save to file or print + with open('output.json', 'w') as json_file: + json_file.write(json_output) + + print("PDF content has been converted to JSON.") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/data/read5.py b/data/read5.py new file mode 100644 index 00000000..1fd1540f --- /dev/null +++ b/data/read5.py @@ -0,0 +1,62 @@ +import pdfplumber +import json + +def process_paragraph(paragraph): + return {"type": "paragraph", "content": paragraph.strip()} + +def process_heading(heading): + return {"type": "heading", "content": heading.strip()} + +def process_table(table): + return {"type": "table", "content": table} + +def detect_structure(text): + paragraphs = text.split('\n\n') + structured_content = [] + + for paragraph in paragraphs: + clean_paragraph = paragraph.strip() + if clean_paragraph: + if len(clean_paragraph.split()) < 5: + structured_content.append(process_heading(clean_paragraph)) + else: + structured_content.append(process_paragraph(clean_paragraph)) + + return structured_content + +def extract_tables(page): + tables = page.extract_tables() + table_list = [] + for table in tables: + table_list.append(process_table(table)) + return table_list + +def main(): + #filepath = input(r"Enter the file path: ") + filepath = r"C:\Users\HP\OneDrive\Desktop\data_preprocessor\data\acetone-acs-l (1).pdf" + + document_structure = [] + + with pdfplumber.open(filepath) as pdf: + for page_num, page in enumerate(pdf.pages): + print(f"Processing page {page_num + 1}:") + + text = page.extract_text() + if text: + structured_text = detect_structure(text) + document_structure.extend(structured_text) + + tables = extract_tables(page) + document_structure.extend(tables) + + # Convert the structure to JSON + json_output = json.dumps(document_structure, indent=4) + + # Save to file or print + with open('output.json', 'w') as json_file: + json_file.write(json_output) + + print("PDF content has been converted to JSON.") + +if __name__ == "__main__": + main() diff --git a/data/read6.py b/data/read6.py new file mode 100644 index 00000000..6f2fbd53 --- /dev/null +++ b/data/read6.py @@ -0,0 +1,77 @@ +import pdfplumber +import json + + + +def process_main_heading(heading): + return{heading.strip()} + +def process_sub_heading(subheading): + return{subheading.strip()} + +def process_paragraph(h,paragraph): + return{h : paragraph.strip()} + +def process_table(table): + return {"type": "table", "content": table} + + +def detect_structure(text): + paragraphs = text.split('\n\n') + structured_content = [] + + for paragraph in paragraphs: + clean_paragraph = paragraph.strip() + if clean_paragraph: + for element in clean_paragraph.extract_words(): # Each element contains text, font-size, bold, etc. + text = element["text"] + font_size = element["size"] # Access font size + is_bold = "Bold" in element["fontname"] + if len(clean_paragraph.split()) < 5 and is_bold: + headd=clean_paragraph + structured_content.append(process_main_heading(clean_paragraph)) + elif len(clean_paragraph.split())<5: + subhead=clean_paragraph + structured_content.append(process_sub_heading(clean_paragraph)) + + else: + structured_content.append(process_paragraph(headd,clean_paragraph)) + + return structured_content + +def extract_tables(page): + tables = page.extract_tables() + table_list = [] + for table in tables: + table_list.append(process_table(table)) + return table_list + +def main(): + #filepath = input(r"Enter the file path: ") + filepath = r"C:\Users\HP\OneDrive\Desktop\data_preprocessor\data\acetone-acs-l (1).pdf" + + document_structure = [] + + with pdfplumber.open(filepath) as pdf: + for page_num, page in enumerate(pdf.pages): + print(f"Processing page {page_num + 1}:") + + text = page.extract_text() + if text: + structured_text = detect_structure(text) + document_structure.extend(structured_text) + + tables = extract_tables(page) + document_structure.extend(tables) + + # Convert the structure to JSON + json_output = json.dumps(document_structure, indent=4) + + # Save to file or print + with open('output.json', 'w') as json_file: + json_file.write(json_output) + + print("PDF content has been converted to JSON.") + +if __name__ == "__main__": + main() diff --git a/data/read7.py b/data/read7.py new file mode 100644 index 00000000..a09594ef --- /dev/null +++ b/data/read7.py @@ -0,0 +1,82 @@ +import pdfplumber +import json +import re + +def process_main_heading(heading): + main_head=heading.strip() + return{heading.strip()} + +#def process_sub_heading1(heading): + # sub_head=heading.strip() + # return{heading.strip()} + +#def process_paragraph(h,paragraph): + # return{h : paragraph.strip()} + +def process_paragraph(paragraph): + return {"type": "paragraph", "content": paragraph.strip()} + +def process_heading(heading): + return {"type": "heading", "content": heading.strip()} + +def process_table(table): + return {"type": "table", "content": table} + +def detect_structure(text): + paragraphs=text.split('\n\n') #to split the paras ig + structured_content=[] #to store the structred content ...empty list is initialised + + sentences_per_paragraph = [paragraph.split('.') for paragraph in paragraphs] + cleaned_sentences_per_paragraph = [[sentence.strip() for sentence in sentences if sentence.strip()] for sentences in sentences_per_paragraph] + + for i, sentences in enumerate(cleaned_sentences_per_paragraph): + #print(f"Paragraph {i + 1}:") + for sentence in sentences: + match = re.search("ThermoFisher",sentence) or re.search("SCIENTIFIC",sentence) or re.search("SAFETY DATA SHEET",sentence) or re.search("Creation Date\s+\d+-[a-zA-Z]{3}-\d+\s+Revision Date\s+\d+-[a-zA-Z]{3}-\d+\s+Revision Number\s+\d",sentence) + print(f" {sentence}.") + +''' for paragraph in paragraphs: + clean_paragraph = paragraph.strip() #to remove leading or trailing whitespaces + + if clean_paragraph: + + if len(clean_paragraph.split()) < 5: # include logic for headers of different font sizes + headd=clean_paragraph + structured_content.append(process_main_heading(clean_paragraph)) + + else: + structured_content.append(process_paragraph(headd,clean_paragraph)) + + return structured_content''' + + +def extract_tables(page): + tables = page.extract_tables() + table_list = [] + for table in tables: + table_list.append(process_table(table)) + return table_list + + +def main(): + #file_path=input(r"Enter the file path: ") + file_path = r"C:\Users\HP\OneDrive\Desktop\data_preprocessor\data\acetone-acs-l (1).pdf" + + document_structure=[] #empty list to storee doc structure?? + + with pdfplumber.open(file_path) as pdf: + + for page_num,page in enumerate(pdf.pages): + print(f"Processing page {page_num + 1}:") + + text=page.extract_text() + + + + json_output = json.dumps(document_structure, indent=4) + with open('output.json', 'w') as json_file: + json_file.write(json_output) + print("PDF content has been converted to JSON.") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/data/read8.py b/data/read8.py new file mode 100644 index 00000000..2008eff0 --- /dev/null +++ b/data/read8.py @@ -0,0 +1,44 @@ +import pdfplumber + +def process_pdf_with_font_details(pdf_path): + with pdfplumber.open(pdf_path) as pdf: + for page in pdf.pages: + # Extract characters with their properties + chars = page.chars + + # Initialize a variable to hold the current word + current_word = "" + current_font_name = None + current_font_size = None + current_fill = None + + for char in chars: + text = char['text'] + font_size = char['size'] + font_name = char['fontname'] + fill_color = char.get('fill') # Get fill color + + # Check for spaces to identify word boundaries + if text.isspace(): + if current_word: + # Print the accumulated word with font details + print(f"'{current_word}' (Font size: {current_font_size}, Font style: {current_font_name}, Font color: {current_fill})") + current_word = "" + current_font_name = None + current_font_size = None + current_fill = None + else: + current_word += text + # Store the font details of the current character + current_font_name = font_name + current_font_size = font_size + current_fill = fill_color # Store the fill color + + # Print the last word if it exists + if current_word: + print(f"'{current_word}' (Font size: {current_font_size}, Font style: {current_font_name}, Font color: {current_fill})") + +# Example usage +pdf_path = r"C:\Users\HP\OneDrive\Desktop\data_preprocessor\data\acetone-acs-l (1).pdf" + +process_pdf_with_font_details(pdf_path) diff --git a/output.json b/output.json new file mode 100644 index 00000000..58a3876f --- /dev/null +++ b/output.json @@ -0,0 +1,311 @@ +[ + { + "type": "paragraph", + "content": "SAFETY DATA SHEET\nCreation Date 28-Apr-2009 Revision Date 13-Oct-2023 Revision Number 9\n1. Identification\nProduct Name Acetone\nCat No. : A9-4; A9-20; A9-200; A11-1; A11-4; A11-20; A11-200; A11S-4; A13-20;\nA13-200; A16F-1GAL; A16P-1GAL; A16P-4; A16S-4; A16S-20; A18-1;\nA18-4; A18-20; A18-20LC; A18-200; A18-200LC; A18-500; A18CU1300;\nA18FB-19; A18FB-50; A18FB-115; A18FB-200; A18P-4; A18POP-19;\nA18POPB-50; A18RB-19; A18RB-50; A18RB-115; A18RB-200;\nA18RS-28; A18RS-50; A18RS-115; A18RS-200; A18S-4; A18SK-4;\nA18SS-19; A18SS-28; A18SS-50; A18SS-115; A18SS-200; A19-1;\nA19-4; A19RS-115; A19RS-200; A40-4; A928-4; A929-1; A929-4;\nA929-4LC; A929RS-19; A929RS-50; A929RS-200; A929SK-4;\nA929SS-28; A929SS-50; A929SS-115; A929SS-200; A946-4; A946-4LC;\nA946FB-200; A946RB-19; A946RB-50; A946RB-115; A946RB-200;\nA949-1; A949-4; A949-4LC; A949CU-50; A949N-119; A949N-219;\nA949POP-19; A949RS-28; A949RS-50; A949RS-115; A949SK-1;\nA949SK-4; A949SS-19; A949SS-28; A949SS-50; A949SS-115;\nA949SS-200; BP2403-1; BP2403-4; BP2403-20; BP2403-RS200;\nBP2404-1; BP2404-4; BP2404-SK1; BP2404-SK4; HC300-1GAL;\nS70091; 22050131; 22050295; XXA9ET200LI; NC2396838\nCAS No 67-64-1\nSynonyms 2-Propanone; Dimethyl ketone; (Certified ACS, HPLC, OPTIMA, Histological,\nSpectranalyzed, NF/FCC/EP, Pesticide, Electronic, GC Resolv, SAFE-COTE)\nRecommended Use Laboratory chemicals.\nUses advised against Food, drug, pesticide or biocidal product use.\nDetails of the supplier of the safety data sheet\nCompany\nFisher Scientific Company\nOne Reagent Lane\nFair Lawn, NJ 07410\nTel: (201) 796-7100\nEmergency Telephone Number\nCHEMTREC\u00d2, Inside the USA: 800-424-9300\nCHEMTREC\u00d2, Outside the USA: 001-703-527-3887\n2. Hazard(s) identification\nClassification\n______________________________________________________________________________________________\nPage 1 / 9" + }, + { + "type": "paragraph", + "content": "Acetone Revision Date 13-Oct-2023\n______________________________________________________________________________________________\nThis chemical is considered hazardous by the 2012 OSHA Hazard Communication Standard (29 CFR 1910.1200)\nFlammable liquids Category 2\nSerious Eye Damage/Eye Irritation Category 2\nSpecific target organ toxicity (single exposure) Category 3\nTarget Organs - Central nervous system (CNS).\nSpecific target organ toxicity - (repeated exposure) Category 2\nLabel Elements\nSignal Word\nDanger\nHazard Statements\nHighly flammable liquid and vapor\nCauses serious eye irritation\nMay cause drowsiness or dizziness\nMay cause damage to organs through prolonged or repeated exposure\nPrecautionary Statements\nPrevention\nWash face, hands and any exposed skin thoroughly after handling\nDo not breathe dust/fume/gas/mist/vapors/spray\nUse only outdoors or in a well-ventilated area\nKeep away from heat/sparks/open flames/hot surfaces. - No smoking\nKeep container tightly closed\nGround/bond container and receiving equipment\nUse explosion-proof electrical/ventilating/lighting equipment\nUse only non-sparking tools\nTake precautionary measures against static discharge\nWear protective gloves/protective clothing/eye protection/face protection\nKeep cool\nResponse\nGet medical attention/advice if you feel unwell\nInhalation\nIF INHALED: Remove victim to fresh air and keep at rest in a position comfortable for breathing\nCall a POISON CENTER or doctor/physician if you feel unwell\nSkin\nIF ON SKIN (or hair): Take off immediately all contaminated clothing. Rinse skin with water/shower\nEyes\nIF IN EYES: Rinse cautiously with water for several minutes. Remove contact lenses, if present and easy to do. Continue rinsing\nIf eye irritation persists: Get medical advice/attention\nFire\nIn case of fire: Use CO2, dry chemical, or foam for extinction\nStorage\nStore in a well-ventilated place. Keep container tightly closed\nStore locked up\nDisposal\n______________________________________________________________________________________________\nPage 2 / 9" + }, + { + "type": "paragraph", + "content": "Acetone Revision Date 13-Oct-2023\n______________________________________________________________________________________________\nDispose of contents/container to an approved waste disposal plant\nHazards not otherwise classified (HNOC)\nRepeated exposure may cause skin dryness or cracking\n3. Composition/Information on Ingredients\nComponent CAS No Weight %\nAcetone 67-64-1 >95\n4. First-aid measures\nGeneral Advice If symptoms persist, call a physician.\nEye Contact Rinse immediately with plenty of water, also under the eyelids, for at least 15 minutes. Get\nmedical attention.\nSkin Contact Wash off immediately with plenty of water for at least 15 minutes. If skin irritation persists,\ncall a physician.\nInhalation Remove to fresh air. If not breathing, give artificial respiration. Get medical attention if\nsymptoms occur.\nIngestion Clean mouth with water and drink afterwards plenty of water.\nMost important symptoms and Difficulty in breathing. Symptoms of overexposure may be headache, dizziness, tiredness,\neffects nausea and vomiting: May cause pulmonary edema\nNotes to Physician Treat symptomatically\n5. Fire-fighting measures\nSuitable Extinguishing Media Water spray, carbon dioxide (CO2), dry chemical, alcohol-resistant foam. Water mist may\nbe used to cool closed containers.\nUnsuitable Extinguishing Media Water may be ineffective\nFlash Point -20 \u00b0C / -4 \u00b0F\nMethod - CC (closed cup)\nAutoignition Temperature 465 \u00b0C / 869 \u00b0F\nExplosion Limits\nUpper 12.8 vol %\nLower 2.5 vol %\nOxidizing Properties Not oxidising\nSensitivity to Mechanical ImpactNo information available\nSensitivity to Static Discharge No information available\nSpecific Hazards Arising from the Chemical\nFlammable. Risk of ignition. Containers may explode when heated. Vapors may form explosive mixtures with air. Vapors may\ntravel to source of ignition and flash back.\nHazardous Combustion Products\nCarbon monoxide (CO). Carbon dioxide (CO2). Formaldehyde. Methanol.\nProtective Equipment and Precautions for Firefighters\n______________________________________________________________________________________________\nPage 3 / 9" + }, + { + "type": "table", + "content": [ + [ + "Component", + "CAS No", + "Weight %" + ], + [ + "Acetone", + "67-64-1", + ">95" + ] + ] + }, + { + "type": "paragraph", + "content": "Acetone Revision Date 13-Oct-2023\n______________________________________________________________________________________________\nAs in any fire, wear self-contained breathing apparatus pressure-demand, MSHA/NIOSH (approved or equivalent) and full\nprotective gear.\nNFPA\nHealth Flammability Instability Physical hazards\n2 3 0 N/A\n6. Accidental release measures\nPersonal Precautions Use personal protective equipment as required. Ensure adequate ventilation. Remove all\nsources of ignition. Take precautionary measures against static discharges.\nEnvironmental Precautions Should not be released into the environment.\nMethods for Containment and CleanSoak up with inert absorbent material. Keep in suitable, closed containers for disposal.\nUp Remove all sources of ignition. Use spark-proof tools and explosion-proof equipment.\n7. Handling and storage\nHandling Do not get in eyes, on skin, or on clothing. Wear personal protective equipment/face\nprotection. Ensure adequate ventilation. Avoid ingestion and inhalation. Keep away from\nopen flames, hot surfaces and sources of ignition. Use only non-sparking tools. To avoid\nignition of vapors by static electricity discharge, all metal parts of the equipment must be\ngrounded. Take precautionary measures against static discharges.\nStorage. Flammables area. Keep containers tightly closed in a dry, cool and well-ventilated place.\nKeep away from heat, sparks and flame. Incompatible Materials. Strong oxidizing agents.\nStrong reducing agents. Strong bases. Peroxides. Halogenated compounds. Alkali metals.\nAmines.\n8. Exposure controls / personal protection\nExposure Guidelines\nComponent ACGIH TLV OSHA PEL NIOSH Mexico OEL (TWA)\nAcetone TWA: 250 ppm (Vacated) TWA: 750 ppm IDLH: 2500 ppm TWA: 500 ppm\nSTEL: 500 ppm (Vacated) TWA: 1800 mg/m3 TWA: 250 ppm STEL: 750 ppm\n(Vacated) STEL: 2400 TWA: 590 mg/m3\nmg/m3\n(Vacated) STEL: 1000 ppm\nTWA: 1000 ppm\nTWA: 2400 mg/m3\nLegend\nACGIH - American Conference of Governmental Industrial Hygienists\nOSHA - Occupational Safety and Health Administration\nNIOSH: NIOSH - National Institute for Occupational Safety and Health\nEngineering Measures Ensure adequate ventilation, especially in confined areas. Ensure that eyewash stations\nand safety showers are close to the workstation location. Use explosion-proof\nelectrical/ventilating/lighting equipment.\nPersonal Protective Equipment\nEye/face Protection Wear appropriate protective eyeglasses or chemical safety goggles as described by\nOSHA's eye and face protection regulations in 29 CFR 1910.133 or European Standard\nEN166.\nSkin and body protection Wear appropriate protective gloves and clothing to prevent skin exposure.\nRespiratory Protection Follow the OSHA respirator regulations found in 29 CFR 1910.134 or European Standard\n______________________________________________________________________________________________\nPage 4 / 9" + }, + { + "type": "table", + "content": [ + [ + "Component", + "ACGIH TLV", + "OSHA PEL", + "NIOSH", + "Mexico OEL (TWA)" + ], + [ + "Acetone", + "TWA: 250 ppm\nSTEL: 500 ppm", + "(Vacated) TWA: 750 ppm\n(Vacated) TWA: 1800 mg/m3\n(Vacated) STEL: 2400\nmg/m3\n(Vacated) STEL: 1000 ppm\nTWA: 1000 ppm\nTWA: 2400 mg/m3", + "IDLH: 2500 ppm\nTWA: 250 ppm\nTWA: 590 mg/m3", + "TWA: 500 ppm\nSTEL: 750 ppm" + ] + ] + }, + { + "type": "paragraph", + "content": "Acetone Revision Date 13-Oct-2023\n______________________________________________________________________________________________\nEN 149. Use a NIOSH/MSHA or European Standard EN 149 approved respirator if\nexposure limits are exceeded or if irritation or other symptoms are experienced.\nRecommended Filter type: low boiling organic solvent. Type AX. Brown. conforming to EN371.\nHygiene Measures Handle in accordance with good industrial hygiene and safety practice.\n9. Physical and chemical properties\nPhysical State Liquid\nAppearance Colorless\nOdor sweet\nOdor Threshold 19.8 ppm\npH 7\nMelting Point/Range -95 \u00b0C / -139 \u00b0F\nBoiling Point/Range 56 \u00b0C / 132.8 \u00b0F\nFlash Point -20 \u00b0C / -4 \u00b0F\nMethod - CC (closed cup)\nEvaporation Rate 5.6 (Butyl Acetate = 1.0)\nFlammability (solid,gas) Not applicable\nFlammability or explosive limits\nUpper 12.8 vol %\nLower 2.5 vol %\nVapor Pressure 247 mbar @ 20 \u00b0C\nVapor Density 2.0\nSpecific Gravity 0.790\nSolubility Soluble in water\nPartition coefficient; n-octanol/water No data available\nAutoignition Temperature 465 \u00b0C / 869 \u00b0F\nDecomposition Temperature > 4\u00b0C\nViscosity 0.32 mPa.s @ 20 \u00b0C\nMolecular Formula C3 H6 O\nMolecular Weight 58.08\nVOC Content(%) 100\nRefractive index 1.358 - 1.359\n10. Stability and reactivity\nReactive Hazard None known, based on information available\nStability Stable under normal conditions.\nConditions to Avoid Heat, flames and sparks. Incompatible products. Keep away from open flames, hot\nsurfaces and sources of ignition.\nIncompatible Materials Strong oxidizing agents, Strong reducing agents, Strong bases, Peroxides, Halogenated\ncompounds, Alkali metals, Amines\nHazardous Decomposition ProductsCarbon monoxide (CO), Carbon dioxide (CO2), Formaldehyde, Methanol\nHazardous Polymerization Hazardous polymerization does not occur.\nHazardous Reactions None under normal processing.\n11. Toxicological information\nAcute Toxicity\nProduct Information\nComponent Information\nComponent LD50 Oral LD50 Dermal LC50 Inhalation\n______________________________________________________________________________________________\nPage 5 / 9" + }, + { + "type": "table", + "content": [ + [ + "Component", + "LD50 Oral", + "LD50 Dermal", + "LC50 Inhalation" + ] + ] + }, + { + "type": "paragraph", + "content": "Acetone Revision Date 13-Oct-2023\n______________________________________________________________________________________________\nAcetone 5800 mg/kg ( Rat ) > 15800 mg/kg (rabbit) 76 mg/l, 4 h, (rat)\n> 7400 mg/kg (rat)\nToxicologically Synergistic Carbon tetrachloride; Chloroform; Trichloroethylene; Bromodichloromethane;\nProducts Dibromochloromethane; N-nitrosodimethylamine; 1,1,2-Trichloroethane; Styrene;\nAcetonitrile, 2,5-Hexanedione; Ethanol; 1,2-Dichlorobenzene\nDelayed and immediate effects as well as chronic effects from short and long-term exposure\nIrritation Irritating to eyes\nSensitization No information available\nCarcinogenicity The table below indicates whether each agency has listed any ingredient as a carcinogen.\nComponent CAS No IARC NTP ACGIH OSHA Mexico\nAcetone 67-64-1 Not listed Not listed Not listed Not listed Not listed\nMutagenic Effects No information available\nReproductive Effects No information available.\nDevelopmental Effects No information available.\nTeratogenicity No information available.\nSTOT - single exposure Central nervous system (CNS)\nSTOT - repeated exposure None known\nAspiration hazard No information available\nSymptoms / effects,both acute and Symptoms of overexposure may be headache, dizziness, tiredness, nausea and vomiting:\ndelayed May cause pulmonary edema\nEndocrine Disruptor Information No information available\nOther Adverse Effects The toxicological properties have not been fully investigated.\n12. Ecological information\nEcotoxicity\n.\nComponent Freshwater Algae Freshwater Fish Microtox Water Flea\nAcetone NOEC = 430 mg/l (algae; 96 Oncorhynchus mykiss: LC50 EC50 = 14500 mg/L/15 min EC50 = 8800 mg/L/48h\nh) = 5540 mg/l 96h EC50 = 12700 mg/L/48h\nAlburnus alburnus: LC50 = EC50 = 12600 mg/L/48h\n11000 mg/l 96h\nLeuciscus idus: LC50 =\n11300 mg/L/48h\nSalmo gairdneri: LC50 =\n6100 mg/L/24h\nPersistence and Degradability Persistence is unlikely based on information available.\nBioaccumulation/ Accumulation No information available.\nMobility Will likely be mobile in the environment due to its volatility.\nComponent log Pow\nAcetone -0.24\n13. Disposal considerations\nWaste Disposal Methods Chemical waste generators must determine whether a discarded chemical is classified as a\nhazardous waste. Chemical waste generators must also consult local, regional, and\nnational hazardous waste regulations to ensure complete and accurate classification.\n______________________________________________________________________________________________\nPage 6 / 9" + }, + { + "type": "table", + "content": [ + [ + "Acetone", + "5800 mg/kg ( Rat )", + "> 15800 mg/kg (rabbit)\n> 7400 mg/kg (rat)", + "76 mg/l, 4 h, (rat)" + ] + ] + }, + { + "type": "table", + "content": [ + [ + "Component", + "CAS No", + "IARC", + "NTP", + "ACGIH", + "OSHA", + "Mexico" + ], + [ + "Acetone", + "67-64-1", + "Not listed", + "Not listed", + "Not listed", + "Not listed", + "Not listed" + ] + ] + }, + { + "type": "table", + "content": [ + [ + "Component", + "Freshwater Algae", + "Freshwater Fish", + "Microtox", + "Water Flea" + ], + [ + "Acetone", + "NOEC = 430 mg/l (algae; 96\nh)", + "Oncorhynchus mykiss: LC50\n= 5540 mg/l 96h\nAlburnus alburnus: LC50 =\n11000 mg/l 96h\nLeuciscus idus: LC50 =\n11300 mg/L/48h\nSalmo gairdneri: LC50 =\n6100 mg/L/24h", + "EC50 = 14500 mg/L/15 min", + "EC50 = 8800 mg/L/48h\nEC50 = 12700 mg/L/48h\nEC50 = 12600 mg/L/48h" + ] + ] + }, + { + "type": "table", + "content": [ + [ + "Component", + "log Pow" + ], + [ + "Acetone", + "-0.24" + ] + ] + }, + { + "type": "paragraph", + "content": "Acetone Revision Date 13-Oct-2023\n______________________________________________________________________________________________\nComponent RCRA - U Series Wastes RCRA - P Series Wastes\nAcetone - 67-64-1 U002 -\n14. Transport information\nDOT\nUN-No UN1090\nProper Shipping Name ACETONE\nHazard Class 3\nPacking Group II\nTDG\nUN-No UN1090\nProper Shipping Name ACETONE\nHazard Class 3\nPacking Group II\nIATA\nUN-No UN1090\nProper Shipping Name ACETONE\nHazard Class 3\nPacking Group II\nIMDG/IMO\nUN-No UN1090\nProper Shipping Name ACETONE\nHazard Class 3\nPacking Group II\n15. Regulatory information\nUnited States of America Inventory\nComponent CAS No TSCA TSCA Inventory notification - TSCA - EPA Regulatory\nActive-Inactive Flags\nAcetone 67-64-1 X ACTIVE -\nLegend:\nTSCA US EPA (TSCA) - Toxic Substances Control Act, (40 CFR Part 710)\nX - Listed\n'-' - Not Listed\nTSCA - Per 40 CFR 751, Regulation of Certain Chemical Not applicable\nSubstances & Mixtures, Under TSCA Section 6(h) (PBT)\nTSCA 12(b) - Notices of Export Not applicable\nInternational Inventories\nCanada (DSL/NDSL), Europe (EINECS/ELINCS/NLP), Philippines (PICCS), Japan (ENCS), Japan (ISHL), Australia (AICS), China (IECSC), Korea\n(KECL).\nComponent CAS No DSL NDSL EINECS PICCS ENCS ISHL AICS IECSC KECL\nAcetone 67-64-1 X - 200-662-2 X X X X X KE-29367\nKECL - NIER number or KE number (http://ncis.nier.go.kr/en/main.do)\nU.S. Federal Regulations\nSARA 313 Not applicable\nSARA 311/312 Hazard Categories See section 2 for more information\nCWA (Clean Water Act) Not applicable\n______________________________________________________________________________________________\nPage 7 / 9" + }, + { + "type": "table", + "content": [ + [ + "Component", + "RCRA - U Series Wastes", + "RCRA - P Series Wastes" + ], + [ + "Acetone - 67-64-1", + "U002", + "-" + ] + ] + }, + { + "type": "table", + "content": [ + [ + "Component", + "CAS No", + "TSCA", + "TSCA Inventory notification -\nActive-Inactive", + "TSCA - EPA Regulatory\nFlags" + ], + [ + "Acetone", + "67-64-1", + "X", + "ACTIVE", + "-" + ] + ] + }, + { + "type": "table", + "content": [ + [ + "Component", + "CAS No", + "DSL", + "NDSL", + "EINECS", + "PICCS", + "ENCS", + "ISHL", + "AICS", + "IECSC", + "KECL" + ], + [ + "Acetone", + "67-64-1", + "X", + "-", + "200-662-2", + "X", + "X", + "X", + "X", + "X", + "KE-29367" + ] + ] + }, + { + "type": "paragraph", + "content": "Acetone Revision Date 13-Oct-2023\n______________________________________________________________________________________________\nClean Air Act Not applicable\nOSHA - Occupational Safety and Not applicable\nHealth Administration\nCERCLA This material, as supplied, contains one or more substances regulated as a hazardous\nsubstance under the Comprehensive Environmental Response Compensation and Liability\nAct (CERCLA) (40 CFR 302)\nComponent Hazardous Substances RQs CERCLA EHS RQs\nAcetone 5000 lb -\nCalifornia Proposition 65 This product does not contain any Proposition 65 chemicals.\nU.S. State Right-to-Know\nRegulations\nComponent Massachusetts New Jersey Pennsylvania Illinois Rhode Island\nAcetone X X X - X\nU.S. Department of Transportation\nReportable Quantity (RQ): Y\nDOT Marine Pollutant N\nDOT Severe Marine Pollutant N\nU.S. Department of Homeland This product does not contain any DHS chemicals.\nSecurity\nOther International Regulations\nMexico - Grade Serious risk, Grade 3\nAuthorisation/Restrictions according to EU REACH\nComponent CAS No REACH (1907/2006) - REACH (1907/2006) - REACH Regulation (EC\nAnnex XIV - Substances Annex XVII - Restrictions 1907/2006) article 59 -\nSubject to Authorization on Certain Dangerous Candidate List of\nSubstances Substances of Very High\nConcern (SVHC)\nAcetone 67-64-1 - Use restricted. See item -\n75.\n(see link for restriction\ndetails)\nREACH links\nhttps://echa.europa.eu/substances-restricted-under-reach\nSafety, health and environmental regulations/legislation specific for the substance or mixture\nComponent CAS No OECD HPV Persistent Organic Ozone Depletion Restriction of\nPollutant Potential Hazardous\nSubstances (RoHS)\nAcetone 67-64-1 Listed Not applicable Not applicable Not applicable\nContains component(s) that meet a 'definition' of per & poly fluoroalkyl substance (PFAS)?\nNot applicable\n______________________________________________________________________________________________\nPage 8 / 9" + }, + { + "type": "table", + "content": [ + [ + "Component", + "Hazardous Substances RQs", + "CERCLA EHS RQs" + ], + [ + "Acetone", + "5000 lb", + "-" + ] + ] + }, + { + "type": "table", + "content": [ + [ + "Component", + "Massachusetts", + "New Jersey", + "Pennsylvania", + "Illinois", + "Rhode Island" + ], + [ + "Acetone", + "X", + "X", + "X", + "-", + "X" + ] + ] + }, + { + "type": "table", + "content": [ + [ + "Component", + "CAS No", + "REACH (1907/2006) -\nAnnex XIV - Substances\nSubject to Authorization", + "REACH (1907/2006) -\nAnnex XVII - Restrictions\non Certain Dangerous\nSubstances", + "REACH Regulation (EC\n1907/2006) article 59 -\nCandidate List of\nSubstances of Very High\nConcern (SVHC)" + ], + [ + "Acetone", + "67-64-1", + "-", + "Use restricted. See item\n75.\n(see link for restriction\ndetails)", + "-" + ] + ] + }, + { + "type": "table", + "content": [ + [ + "Component", + "CAS No", + "OECD HPV", + "Persistent Organic\nPollutant", + "Ozone Depletion\nPotential", + "Restriction of\nHazardous\nSubstances (RoHS)" + ], + [ + "Acetone", + "67-64-1", + "Listed", + "Not applicable", + "Not applicable", + "Not applicable" + ] + ] + }, + { + "type": "paragraph", + "content": "Acetone Revision Date 13-Oct-2023\n______________________________________________________________________________________________\nOther International Regulations\nComponent CAS No Seveso III Directive Seveso III Directive Rotterdam Basel Convention\n(2012/18/EC) - (2012/18/EC) - Convention (PIC) (Hazardous Waste)\nQualifying QuantitiesQualifying Quantities\nfor Major Accident for Safety Report\nNotification Requirements\nAcetone 67-64-1 Not applicable Not applicable Not applicable Annex I - Y42\n16. Other information\nPrepared By Regulatory Affairs\nThermo Fisher Scientific\nEmail: EMSDS.RA@thermofisher.com\nCreation Date 28-Apr-2009\nRevision Date 13-Oct-2023\nPrint Date 13-Oct-2023\nRevision Summary This document has been updated to comply with the US OSHA HazCom 2012 Standard\nreplacing the current legislation under 29 CFR 1910.1200 to align with the Globally\nHarmonized System of Classification and Labeling of Chemicals (GHS).\nDisclaimer\nThe information provided in this Safety Data Sheet is correct to the best of our knowledge, information and belief at the\ndate of its publication. The information given is designed only as a guidance for safe handling, use, processing, storage,\ntransportation, disposal and release and is not to be considered a warranty or quality specification. The information\nrelates only to the specific material designated and may not be valid for such material used in combination with any other\nmaterials or in any process, unless specified in the text\nEnd of SDS\n______________________________________________________________________________________________\nPage 9 / 9" + }, + { + "type": "table", + "content": [ + [ + "Component", + "CAS No", + "Seveso III Directive\n(2012/18/EC) -\nQualifying Quantities\nfor Major Accident\nNotification", + "Seveso III Directive\n(2012/18/EC) -\nQualifying Quantities\nfor Safety Report\nRequirements", + "Rotterdam\nConvention (PIC)", + "Basel Convention\n(Hazardous Waste)" + ], + [ + "Acetone", + "67-64-1", + "Not applicable", + "Not applicable", + "Not applicable", + "Annex I - Y42" + ] + ] + } +] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..69514bdd Binary files /dev/null and b/requirements.txt differ