Skip to content

Commit

Permalink
trying python and datascience
Browse files Browse the repository at this point in the history
  • Loading branch information
divinedeveloper committed Aug 18, 2017
0 parents commit 531d793
Show file tree
Hide file tree
Showing 12 changed files with 8,286 additions and 0 deletions.
65 changes: 65 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]

# C extensions
*.so

# Distribution / packaging
.Python
env/
bin/
build/
develop-eggs/
dist/
eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.cache
nosetests.xml
coverage.xml

# Translations
*.mo

# Mr Developer
.mr.developer.cfg
.project
.pydevproject

# Rope
.ropeproject

# Django stuff:
*.log
*.pot

# Sphinx documentation
docs/_build/

.DS_Store
db.sqlite3

# IntelliJ IDE files
.idea

# Node Modules
core/node_modules/

# Bundles
core/static/js/bundles/
37 changes: 37 additions & 0 deletions backup_ver1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import os
import time

source = ['/home/intrior/notes']

target_dir = '/home/intrior/backup'

if not os.path.exists(target_dir):
os.mkdir(target_dir) # make directory

today = target_dir + os.sep + time.strftime('%Y%m%d')

now = time.strftime('%H%M%S')

comment = raw_input('Enter a comment --> ')

if len(comment) == 0:
target = today + os.sep + now + '.zip'
else:
target = today + os.sep + now + '_' + \
comment.replace(' ', '_') + '.zip'

if not os.path.exists(today):
os.mkdir(today)
print 'Successfully created directory', today


zip_command = "zip -r {0} {1}".format(target, ' '.join(source))

print "Zip command is:"
print zip_command
print "Running:"

if os.system(zip_command) == 0:
print 'Successful backup to', target
else:
print 'Backup FAILED'
7,826 changes: 7,826 additions & 0 deletions consignee.csv

Large diffs are not rendered by default.

23 changes: 23 additions & 0 deletions hello.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
shoplist = ['apple', 'mango', 'carrot', 'banana']

print 'I have', len(shoplist), 'items to purchase'

print 'These items are:',
for item in shoplist:
print item,

print '\nI also have to buy rice.'
shoplist.append('rice')

print 'My shopping list is now', shoplist

print 'I will sort my list now'
shoplist.sort()
print 'Sorted shopping list is', shoplist

print 'The first item I will buy is', shoplist[0]
olditem = shoplist[0]
del shoplist[0]

print 'I bought the', olditem
print 'My shopping list is now', shoplist
124 changes: 124 additions & 0 deletions key-match.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import re
import pprint


#take user input
consignee_names = raw_input('Enter consignee names separated with comma(,): ')
list_of_consignee_names = map(str, consignee_names.split(''))

print list_of_consignee_names

# list_of_consignee_names.

#if you get list of consignee as input
#convert it to series
#extract the most recurring pattern from list of consigness and return it as key
series_of_consignee_names = pd.Series(list_of_consignee_names)
print series_of_consignee_names

# pattern = r'.*' + re.escape(list_of_consignee_names[0]) + r'.*'

# pattern = re.compile(r'.*({}).*', re.IGNORECASE)
boom = lambda x:True if any(w in kw for w in x.split()) else False
results = series_of_consignee_names.str.contains(pattern)



# results = series_of_consignee_names.str.contains(pattern)

pp = pprint.PrettyPrinter(indent=4)

pp.pprint(dict([('key',list_of_consignee_names[0])]))

print results





# CENSEA INC, CENSEA INC, M/s CENSEA INC., M/S CENSEA INC., CENSEA INC., CENSEA Food Incorporated,BAYFRESH INC, BAY FRESH INC

# ['CENSEA INC,','CENSEA INC','M/s CENSEA INC.,','M/S CENSEA INC.','CENSEA INC.,']

# ['BAYFRESH INC','BAYFRESH INC, 3760 SEATON DR.,','BAYFRESH INC.']







# consignee = pd.read_csv("/home/intrior/projects/trial/consignee.csv", na_values="NA")
# # consignee = pd.read_csv("/home/intrior/projects/trial/consignee.csv", na_values="NA")

# consignee_names_series = consignee['CONSINEE']

# consignee_names_series = consignee_names_series.str.strip()

# # print consignee_names_series

# #create this regex instread of hardcoding it
# # regex_pattern = re.compile(r'.*censea.*', flags=re.IGNORECASE)


# text_to_search = text_to_search.strip()

# # text_to_search = 'india'
# regex_pattern = r".*" + re.escape(text_to_search) + r".*"
# # regex_pattern = r"\w\s" + re.escape(text_to_search) + r"\w"

# results = consignee_names_series[consignee_names_series.str.findall(regex_pattern, flags=re.IGNORECASE).str.len() > 0]
# # results = consignee_names_series.str.findall(r'.*censea.*', flags=re.IGNORECASE)

# pp = pprint.PrettyPrinter(indent=4)
# pp.pprint(dict([(text_to_search,dict([('matches', results.drop_duplicates().values.tolist()), ('total', len(results))]))]))



















# print results
# print len(results)



# sliced_results = results.str.slice()
# print sliced_results

#this what gives results as we want
# y[y.str.findall(r'.*censea.*', flags=re.IGNORECASE).str.len() > 0]


# this works from csv data
# y.str.findall(r'.*censea.*', flags=re.IGNORECASE)

# works for find all
# regex_pat = re.compile(r'.*censea.*', flags=re.IGNORECASE)
# results=y.str.findall(regex_pat)

# gives string s as non list
# slie_series = results.str.slice()


# returns just list of all censea only
# regex_pat = re.compile(r'censea', flags=re.IGNORECASE)

# only beginning with censea
# regex_pat = re.compile(r'^censea', flags=re.IGNORECASE)

78 changes: 78 additions & 0 deletions match-algo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import re
import pprint


consignee = pd.read_csv("/home/intrior/projects/trial/consignee.csv", na_values="NA")
# consignee = pd.read_csv("/home/intrior/projects/trial/consignee.csv", na_values="NA")

consignee_names_series = consignee['CONSINEE']

consignee_names_series = consignee_names_series.str.strip()

# print consignee_names_series

#create this regex instread of hardcoding it
# regex_pattern = re.compile(r'.*censea.*', flags=re.IGNORECASE)

#take user input
text_to_search = raw_input('Enter consignee name to search: ')

text_to_search = text_to_search.strip()

# text_to_search = 'india'
# regex_pattern = r".*" + re.escape(text_to_search) + r".*"
regex_pattern = r".* ?" + re.escape(text_to_search) + r".*"

results = consignee_names_series[consignee_names_series.str.findall(regex_pattern, flags=re.IGNORECASE).str.len() > 0]
# results = consignee_names_series.str.findall(r'.*censea.*', flags=re.IGNORECASE)

pp = pprint.PrettyPrinter(indent=4)
pp.pprint(dict([(text_to_search,dict([('matches', results.drop_duplicates().values.tolist()), ('total', len(results))]))]))





#if you get list of consignee as input
#convert it to series
#extract the most recurring pattern from list of consigness and return it as key









# print results
# print len(results)



# sliced_results = results.str.slice()
# print sliced_results

#this what gives results as we want
# y[y.str.findall(r'.*censea.*', flags=re.IGNORECASE).str.len() > 0]


# this works from csv data
# y.str.findall(r'.*censea.*', flags=re.IGNORECASE)

# works for find all
# regex_pat = re.compile(r'.*censea.*', flags=re.IGNORECASE)
# results=y.str.findall(regex_pat)

# gives string s as non list
# slie_series = results.str.slice()


# returns just list of all censea only
# regex_pat = re.compile(r'censea', flags=re.IGNORECASE)

# only beginning with censea
# regex_pat = re.compile(r'^censea', flags=re.IGNORECASE)

5 changes: 5 additions & 0 deletions more_lambda.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
points = [ { 'x' : 2, 'y' : 3 }, { 'x' : 4, 'y' : 1 } ]

points.sort(key = lambda i : i['y'])

print points
36 changes: 36 additions & 0 deletions my_decorator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from time import sleep
from functools import wraps
import logging

logging.basicConfig()
log = logging.getLogger("retry")

def retry(f):
@wraps(f)
def wrapped_f(*args, **kwargs):
MAX_ATTEMPTS = 5
for attempt in range(1, MAX_ATTEMPTS + 1):
try:
return f(*args, **kwargs)
except:
log.exception("Attempt %s/%s failed : %s",attempt, MAX_ATTEMPTS,(args, kwargs))
sleep(10 * attempt)
log.critical("All %s attempts failed : %s", MAX_ATTEMPTS,(args, kwargs))
return wrapped_f

counter = 0

@retry
def save_to_database(arg):
print "Write to a database or make a network call or etc."
print "This will be automatically retried if exception is thrown."
global counter
counter += 1
# This will throw an exception in the first call
# And will work fine in the second call (i.e. a retry)
if counter < 2:
raise ValueError(arg)


if __name__ == '__main__':
save_to_database("Some bad value")
Loading

0 comments on commit 531d793

Please sign in to comment.