trying python and datascience

divinedeveloper · Aug 18, 2017 · 531d793 · 531d793
commit 531d793
Show file tree

Hide file tree

Showing 12 changed files with 8,286 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,65 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+bin/
+build/
+develop-eggs/
+dist/
+eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.cache
+nosetests.xml
+coverage.xml
+
+# Translations
+*.mo
+
+# Mr Developer
+.mr.developer.cfg
+.project
+.pydevproject
+
+# Rope
+.ropeproject
+
+# Django stuff:
+*.log
+*.pot
+
+# Sphinx documentation
+docs/_build/
+
+.DS_Store
+db.sqlite3
+
+# IntelliJ IDE files
+.idea
+
+# Node Modules 
+core/node_modules/
+
+# Bundles
+core/static/js/bundles/
diff --git a/backup_ver1.py b/backup_ver1.py
@@ -0,0 +1,37 @@
+import os
+import time
+
+source = ['/home/intrior/notes']
+
+target_dir = '/home/intrior/backup'
+
+if not os.path.exists(target_dir):
+	os.mkdir(target_dir) # make directory
+
+today = target_dir + os.sep + time.strftime('%Y%m%d')
+
+now = time.strftime('%H%M%S')
+
+comment = raw_input('Enter a comment --> ')
+
+if len(comment) == 0:
+	target = today + os.sep + now + '.zip'
+else:
+	target = today + os.sep + now + '_' + \
+		comment.replace(' ', '_') + '.zip'
+
+if not os.path.exists(today):
+	os.mkdir(today)
+	print 'Successfully created directory', today
+
+
+zip_command = "zip -r {0} {1}".format(target, ' '.join(source))
+
+print "Zip command is:"
+print zip_command
+print "Running:"
+
+if os.system(zip_command) == 0:
+	print 'Successful backup to', target
+else:
+	print 'Backup FAILED'
diff --git a/consignee.csv b/consignee.csv
diff --git a/hello.py b/hello.py
@@ -0,0 +1,23 @@
+shoplist = ['apple', 'mango', 'carrot', 'banana']
+
+print 'I have', len(shoplist), 'items to purchase'
+
+print 'These items are:',
+for item in shoplist:
+	print item,
+
+print '\nI also have to buy rice.'
+shoplist.append('rice')
+
+print 'My shopping list is now', shoplist
+
+print 'I will sort my list now'
+shoplist.sort()
+print 'Sorted shopping list is', shoplist
+
+print 'The first item I will buy is', shoplist[0]
+olditem = shoplist[0]
+del shoplist[0]
+
+print 'I bought the', olditem
+print 'My shopping list is now', shoplist
diff --git a/key-match.py b/key-match.py
@@ -0,0 +1,124 @@
+import numpy as np # linear algebra
+import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
+import re
+import pprint
+
+
+#take user input
+consignee_names = raw_input('Enter consignee names separated with comma(,): ')
+list_of_consignee_names = map(str, consignee_names.split(''))
+
+print list_of_consignee_names
+
+# list_of_consignee_names.
+
+#if you get list of consignee as input
+#convert it to series 
+#extract the most recurring pattern from list of consigness and return it as key
+series_of_consignee_names = pd.Series(list_of_consignee_names)
+print series_of_consignee_names
+
+# pattern = r'.*' + re.escape(list_of_consignee_names[0]) + r'.*'
+
+# pattern = re.compile(r'.*({}).*', re.IGNORECASE)
+boom = lambda x:True if any(w in kw for w in x.split()) else False
+results = series_of_consignee_names.str.contains(pattern)
+
+
+
+# results = series_of_consignee_names.str.contains(pattern)
+
+pp = pprint.PrettyPrinter(indent=4)
+
+pp.pprint(dict([('key',list_of_consignee_names[0])]))
+
+print results
+
+
+
+
+
+# CENSEA INC, CENSEA INC, M/s CENSEA INC., M/S CENSEA INC., CENSEA INC., CENSEA Food Incorporated,BAYFRESH INC, BAY FRESH INC
+
+# ['CENSEA INC,','CENSEA INC','M/s CENSEA INC.,','M/S CENSEA INC.','CENSEA INC.,']
+
+# ['BAYFRESH INC','BAYFRESH INC, 3760 SEATON DR.,','BAYFRESH INC.']
+
+
+
+
+
+
+
+# consignee = pd.read_csv("/home/intrior/projects/trial/consignee.csv", na_values="NA")
+# # consignee = pd.read_csv("/home/intrior/projects/trial/consignee.csv", na_values="NA")
+
+# consignee_names_series = consignee['CONSINEE']
+
+# consignee_names_series = consignee_names_series.str.strip()
+
+# # print consignee_names_series
+
+# #create this regex instread of hardcoding it
+# # regex_pattern = re.compile(r'.*censea.*', flags=re.IGNORECASE)
+
+
+# text_to_search = text_to_search.strip()
+
+# # text_to_search = 'india'
+# regex_pattern = r".*" + re.escape(text_to_search) + r".*"
+# # regex_pattern = r"\w\s" + re.escape(text_to_search) + r"\w"
+
+# results = consignee_names_series[consignee_names_series.str.findall(regex_pattern, flags=re.IGNORECASE).str.len() > 0]
+# # results = consignee_names_series.str.findall(r'.*censea.*', flags=re.IGNORECASE)
+
+# pp = pprint.PrettyPrinter(indent=4)
+# pp.pprint(dict([(text_to_search,dict([('matches', results.drop_duplicates().values.tolist()), ('total', len(results))]))]))
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# print results
+# print len(results)
+
+
+
+# sliced_results = results.str.slice()
+# print sliced_results
+
+#this what gives results as we want
+# y[y.str.findall(r'.*censea.*', flags=re.IGNORECASE).str.len() > 0]
+
+
+# this works from csv data
+# y.str.findall(r'.*censea.*', flags=re.IGNORECASE)
+
+# works for find all
+# regex_pat = re.compile(r'.*censea.*', flags=re.IGNORECASE)
+# results=y.str.findall(regex_pat)
+
+# gives string s as non list
+# slie_series = results.str.slice()
+
+
+# returns just list of all censea only
+# regex_pat = re.compile(r'censea', flags=re.IGNORECASE)
+
+# only beginning with censea
+# regex_pat = re.compile(r'^censea', flags=re.IGNORECASE)
+
diff --git a/match-algo.py b/match-algo.py
@@ -0,0 +1,78 @@
+import numpy as np # linear algebra
+import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
+import re
+import pprint
+
+
+consignee = pd.read_csv("/home/intrior/projects/trial/consignee.csv", na_values="NA")
+# consignee = pd.read_csv("/home/intrior/projects/trial/consignee.csv", na_values="NA")
+
+consignee_names_series = consignee['CONSINEE']
+
+consignee_names_series = consignee_names_series.str.strip()
+
+# print consignee_names_series
+
+#create this regex instread of hardcoding it
+# regex_pattern = re.compile(r'.*censea.*', flags=re.IGNORECASE)
+
+#take user input
+text_to_search = raw_input('Enter consignee name to search: ')
+
+text_to_search = text_to_search.strip()
+
+# text_to_search = 'india'
+# regex_pattern = r".*" + re.escape(text_to_search) + r".*"
+regex_pattern = r".* ?" + re.escape(text_to_search) + r".*"
+
+results = consignee_names_series[consignee_names_series.str.findall(regex_pattern, flags=re.IGNORECASE).str.len() > 0]
+# results = consignee_names_series.str.findall(r'.*censea.*', flags=re.IGNORECASE)
+
+pp = pprint.PrettyPrinter(indent=4)
+pp.pprint(dict([(text_to_search,dict([('matches', results.drop_duplicates().values.tolist()), ('total', len(results))]))]))
+
+
+
+
+
+#if you get list of consignee as input
+#convert it to series 
+#extract the most recurring pattern from list of consigness and return it as key
+
+
+
+
+
+
+
+
+
+# print results
+# print len(results)
+
+
+
+# sliced_results = results.str.slice()
+# print sliced_results
+
+#this what gives results as we want
+# y[y.str.findall(r'.*censea.*', flags=re.IGNORECASE).str.len() > 0]
+
+
+# this works from csv data
+# y.str.findall(r'.*censea.*', flags=re.IGNORECASE)
+
+# works for find all
+# regex_pat = re.compile(r'.*censea.*', flags=re.IGNORECASE)
+# results=y.str.findall(regex_pat)
+
+# gives string s as non list
+# slie_series = results.str.slice()
+
+
+# returns just list of all censea only
+# regex_pat = re.compile(r'censea', flags=re.IGNORECASE)
+
+# only beginning with censea
+# regex_pat = re.compile(r'^censea', flags=re.IGNORECASE)
+
diff --git a/more_lambda.py b/more_lambda.py
@@ -0,0 +1,5 @@
+points = [ { 'x' : 2, 'y' : 3 }, { 'x' : 4, 'y' : 1 } ]
+
+points.sort(key = lambda i : i['y'])
+
+print points
diff --git a/my_decorator.py b/my_decorator.py
@@ -0,0 +1,36 @@
+from time import sleep
+from functools import wraps
+import logging
+
+logging.basicConfig()
+log = logging.getLogger("retry")
+
+def retry(f):
+	@wraps(f)
+	def wrapped_f(*args, **kwargs):
+		MAX_ATTEMPTS = 5
+		for attempt in range(1, MAX_ATTEMPTS + 1):
+			try:
+				return f(*args, **kwargs)
+			except:
+				log.exception("Attempt %s/%s failed : %s",attempt,	MAX_ATTEMPTS,(args, kwargs))
+			sleep(10 * attempt)
+		log.critical("All %s attempts failed : %s",	MAX_ATTEMPTS,(args, kwargs))
+	return wrapped_f
+
+counter = 0
+
+@retry
+def save_to_database(arg):
+	print "Write to a database or make a network call or etc."
+	print "This will be automatically retried if exception is thrown."
+	global counter
+	counter += 1
+	# This will throw an exception in the first call
+	# And will work fine in the second call (i.e. a retry)
+	if counter < 2:
+		raise ValueError(arg)
+
+
+if __name__ == '__main__':
+	save_to_database("Some bad value")