Skip to content

Commit

Permalink
Merge pull request #31 from timwangmusic/py2neo-deprecation
Browse files Browse the repository at this point in the history
  • Loading branch information
timwangmusic authored Mar 6, 2024
2 parents 050332d + 1823fa5 commit 69c2939
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 113 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7, 3.8, 3.9]
python-version: ['3.8', '3.9', '3.10', '3.11']

steps:
- uses: actions/checkout@v2
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
logs/
src/.DS_Store
*/__pycache__/
env/
19 changes: 9 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,27 +1,26 @@
# Auto-complete System
[![Build Status](https://travis-ci.org/weihesdlegend/Auto-complete-System.svg?branch=master)](https://travis-ci.org/weihesdlegend/Auto-complete-System)

Auto-complete system using Neo4j graph database for storing data and providing fault tolerance. Returns top suggestions to users.
# Autocomplete System
Autocomplete system using Neo4j graph database for storing data and providing fault tolerance. Returns top suggestions to users.

## **Feature Support**
* Restful search API for auto-completing any phrase in English and returns top suggestions. Auto-correct invalid user inputs.
* Restful search API for auto-completing any phrase in English and returns top suggestions.
* Autocorrect invalid user inputs.
* Delete inappropriate phrases.
* Build new servers from Neo4j databases.
* Use advance logging techniques to track usage patterns and generate reports.
* Serialization and deserialization of servers for data exchange.

### How to use
* Python version >= 3.7
* Install `Python3 venv`
* Ensure Python version >= 3.8
* Create a Python virtual environment.
```
macOS/Linux
# macOS/Linux
sudo apt-get install python3-venv # If needed
python3 -m venv env
source env/bin/activate
Windows
# Windows
python -m venv env
```
* Run `pip3 install -r requirements.txt` to install the python3 requirements
* Run `pip3 install -r requirements.txt` to install the python3 requirements.
* Run `python service_flask.py` to start REST service.
* Run `python analytics.py` to generate usage reports.
8 changes: 3 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ pytest
redis
scikit-learn
numpy
Flask==1.1.4
py2neo==4.3.0
PyYAML==5.4
jinja2<3.1.0
markupsafe==2.0.1
Flask==3.0.2
# py2neo==4.3.0
PyYAML
9 changes: 5 additions & 4 deletions src/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
"""


from py2neo import Graph, Relationship
# from py2neo import Graph, Relationship


class Parent(Relationship):
pass
# class Parent(Relationship):
# pass


class DatabaseHandler:
def __init__(self, username='neo4j', password='admin', bolt=7687):
self.graph = Graph(user=username, password=password, bolt_port=bolt)
# self.graph = Graph(user=username, password=password, bolt_port=bolt)
return
186 changes: 93 additions & 93 deletions src/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from typing import List

# from nltk.corpus import words as en_corpus
from py2neo import Node
# from py2neo import Node
from src.trienode import TrieNode
from src.spell import Spell

Expand Down Expand Up @@ -49,7 +49,7 @@ def __init__(self, *, num_res_return: int = 10, root: TrieNode = None, connect_t
self.__class__.server_index += 1
if connect_to_db:
self.db = database.DatabaseHandler()
self._selector = self.db.graph.nodes # get node matcher
# self._selector = self.db.graph.nodes # get node matcher

if root is None:
self.__root = TrieNode(prefix='', is_word=False)
Expand Down Expand Up @@ -132,97 +132,97 @@ def top_results(self, num_results=10):
res = self.__root.top_results.most_common(num_results)
return [word for word, count in res]

def build_db(self):
"""
This method removes data from database and build new graph with in-memory data in application server.
:return: None
"""
self.db.graph.delete_all() # delete all existing nodes and relationships
queue = deque()
tx = self.db.graph.begin()
self.logger.info('Start updating database.')
node = Node('TrieNode', 'ROOT',
isword=False,
name='',
)
node['count'] = self.__root.total_counts()
tx.create(node) # create root in neo4j
queue.append((node, self.__root))
count = 0

while queue:
db_node, cur = queue.popleft()
for child in cur.children:
prefix = cur.children[child].prefix
db_node_child = Node('TrieNode',
name=prefix,
isword=cur.children[child].isWord,
count=cur.children[child].total_counts()
)
queue.append((db_node_child, cur.children[child]))
tx.create(db_node_child)
count += 1
tx.create(database.Parent(db_node, db_node_child))

tx.commit()
if not self.testing:
self.logger.info(f'Finished building database. Number of nodes created is {count}')

if self.testing and tx.finished():
self.logger.info('Transaction finished.')

def update_db(self):
"""
Update database with the latest application server usage
:return: None
"""
root = self.__root
g = self.db.graph

def dfs(node, parent):
"""update node info to database"""
if not node:
return
db_node = self._selector.match('TrieNode', name=node.prefix).first()
if not db_node:
tx = g.begin()
db_node = Node('TrieNode',
name=node.prefix,
isword=node.isWord,
count=node.total_counts())
tx.create(db_node)
parent_db_node = self._selector.match('TrieNode', name=parent.prefix).first()
tx.create(database.Parent(parent_db_node, db_node))
tx.commit()
else:
db_node['count'] = node.total_counts()
g.push(db_node)
for child in node.children:
dfs(node.children[child], node)

dfs(root, None)

def build_trie(self):
"""
This method builds trie server with TrieNode-labeled nodes from the database.
Improves run-time by only inserting complete words.
:return: None
"""
self.app_reset()
root = self._selector.match('ROOT').first()
graph = self.db.graph

def dfs(node):
prefix, isword, count = node['name'], node['isword'], node['count']
if isword:
self.__insert(prefix, isword=True, from_db=True, count=count)
# find all parent-children relationships
for rel in graph.match(nodes=[node], r_type=database.Parent):
if rel is not None:
dfs(rel.nodes[1])

dfs(root)
self.update_top_results()
# def build_db(self):
# """
# This method removes data from database and build new graph with in-memory data in application server.
# :return: None
# """
# self.db.graph.delete_all() # delete all existing nodes and relationships
# queue = deque()
# tx = self.db.graph.begin()
# self.logger.info('Start updating database.')
# node = Node('TrieNode', 'ROOT',
# isword=False,
# name='',
# )
# node['count'] = self.__root.total_counts()
# tx.create(node) # create root in neo4j
# queue.append((node, self.__root))
# count = 0
#
# while queue:
# db_node, cur = queue.popleft()
# for child in cur.children:
# prefix = cur.children[child].prefix
# db_node_child = Node('TrieNode',
# name=prefix,
# isword=cur.children[child].isWord,
# count=cur.children[child].total_counts()
# )
# queue.append((db_node_child, cur.children[child]))
# tx.create(db_node_child)
# count += 1
# tx.create(database.Parent(db_node, db_node_child))
#
# tx.commit()
# if not self.testing:
# self.logger.info(f'Finished building database. Number of nodes created is {count}')
#
# if self.testing and tx.finished():
# self.logger.info('Transaction finished.')

# def update_db(self):
# """
# Update database with the latest application server usage
# :return: None
# """
# root = self.__root
# g = self.db.graph
#
# def dfs(node, parent):
# """update node info to database"""
# if not node:
# return
# db_node = self._selector.match('TrieNode', name=node.prefix).first()
# if not db_node:
# tx = g.begin()
# db_node = Node('TrieNode',
# name=node.prefix,
# isword=node.isWord,
# count=node.total_counts())
# tx.create(db_node)
# parent_db_node = self._selector.match('TrieNode', name=parent.prefix).first()
# tx.create(database.Parent(parent_db_node, db_node))
# tx.commit()
# else:
# db_node['count'] = node.total_counts()
# g.push(db_node)
# for child in node.children:
# dfs(node.children[child], node)
#
# dfs(root, None)

# def build_trie(self):
# """
# This method builds trie server with TrieNode-labeled nodes from the database.
# Improves run-time by only inserting complete words.
# :return: None
# """
# self.app_reset()
# root = self._selector.match('ROOT').first()
# graph = self.db.graph
#
# def dfs(node):
# prefix, isword, count = node['name'], node['isword'], node['count']
# if isword:
# self.__insert(prefix, isword=True, from_db=True, count=count)
# # find all parent-children relationships
# for rel in graph.match(nodes=[node], r_type=database.Parent):
# if rel is not None:
# dfs(rel.nodes[1])
#
# dfs(root)
# self.update_top_results()

def __insert(self, word: str, *, isword: bool = True, count: int = 0, from_db: bool = False) -> TrieNode:
"""
Expand Down

0 comments on commit 69c2939

Please sign in to comment.