Skip to content

Commit

Permalink
Merge pull request #12 from rtgdk/python3
Browse files Browse the repository at this point in the history
Update tool to python3 and fix requirement differences, update REAME
  • Loading branch information
Umang Taneja authored Dec 8, 2020
2 parents bf3b441 + 538e3be commit 7d7d81a
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 24 deletions.
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
__pycache__/
*.py[cod]
*$py.class
*.py.bak

# C extensions
*.so
Expand Down Expand Up @@ -127,4 +128,11 @@ dmypy.json
# Pyre type checker
.pyre/

# IDEs
.project
.pydevproject
.idea
org.eclipse.core.resources.prefs
.vscode

# End of https://www.gitignore.io/api/python
32 changes: 30 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,38 @@ Ensure that you are using Python3 for installation of the tool.

* Install dependencies inside virtual environment
* `pip3 install -r requirements.txt`

* Install redis server on your local machine.

## Usage
First of all before using the tool you will have to create a database of SPDX license list for this just run `python build_licenses.py`.
**For linux users**
* Use the command `sudo apt-get install redis-server` to install the redis server.

**For Mac users**

* Install the redis by running the command

`brew install redis`.
* If you want to run redis whenever your computer starts then run

`ln -sfv /usr/local/opt/redis/*.plist ~/Library/LaunchAgents`.

* To run the redis server use

`launchctl load ~/Library/LaunchAgents/homebrew.mxcl.redis.plist`.
* To test if the redis is working run the command `redis-cli ping`. If it returns `Pong` then you are good to go.

**For Windows users**

* Download the redis server from [here](https://github.com/microsoftarchive/redis/releases) and install it.
* Make sure redis server is running and keep it running until you are done using the tool.

*The redis is used to store the license text of license present on the SPDX license list. For the very first time it may take a while to build the license on the redis server.*

*SPDX License Matcher matches the license text input by the user(via license submittal form) against the data present on the redis to find for duplicate and near matches.*


## Usage
To run the tool just use the command `python matcher.py -f <file-name> -t <threshold>`.
* `filename` is the file with the license text(if you don't provide the file as well then it will prompt you to add it).
* `threshold` is a value upto which we will just won't consider a match.(optional)
Expand Down
7 changes: 3 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
redis==3.2.1
requests==2.22.0
jpype1==0.6.3
requests==2.24
jpype1==1.0.1
jellyfish==0.6.1
click==7.0
python-dotenv==0.13.0
futures; python_version == "2.7"
python-dotenv==0.14.0
6 changes: 3 additions & 3 deletions spdx_license_matcher/build_licenses.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
from dotenv import load_dotenv
import os

from normalize import normalize
from utils import compressStringToBytes
from spdx_license_matcher.normalize import normalize
from spdx_license_matcher.utils import compressStringToBytes

load_dotenv()

r = redis.StrictRedis(host=os.environ.get(key="SPDX_REDIS_HOST", failobj="localhost"), port=6379, db=0)
r = redis.StrictRedis(host=os.environ.get(key="SPDX_REDIS_HOST", default="localhost"), port=6379, db=0)


def get_url(url):
Expand Down
14 changes: 7 additions & 7 deletions spdx_license_matcher/computation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from normalize import normalize
from sorensen_dice import get_dice_coefficient
from utils import (checkTextStandardLicense, decompressBytesToString,
from spdx_license_matcher.normalize import normalize
from spdx_license_matcher.sorensen_dice import get_dice_coefficient
from spdx_license_matcher.utils import (checkTextStandardLicense, decompressBytesToString,
getListedLicense)

def get_close_matches(inputText, licenseData, threshold=0.9):
Expand All @@ -16,7 +16,7 @@ def get_close_matches(inputText, licenseData, threshold=0.9):
matches = {}
perfectMatches = {}
normalizedInputText = normalize(inputText)
for key in licenseData.keys():
for key in list(licenseData.keys()):
try:
licenseName = key.decode('utf-8')
normalizedLicenseText = decompressBytesToString(licenseData.get(key))
Expand All @@ -31,7 +31,7 @@ def get_close_matches(inputText, licenseData, threshold=0.9):
matches[licenseName] = score
if perfectMatches:
return perfectMatches
matches = {licenseName: score for licenseName, score in matches.items() if score >= threshold}
matches = {licenseName: score for licenseName, score in list(matches.items()) if score >= threshold}
return matches


Expand All @@ -49,8 +49,8 @@ def get_matching_string(matches, inputText):
matchingString = 'There is not enough confidence threshold for the text to match against the SPDX License database.'
return matchingString

elif all(score == 1.0 for score in matches.values()):
matchingString = 'The following license ID(s) match: ' + ", ".join(matches.keys())
elif all(score == 1.0 for score in list(matches.values())):
matchingString = 'The following license ID(s) match: ' + ", ".join(list(matches.keys()))
return matchingString

else:
Expand Down
14 changes: 7 additions & 7 deletions spdx_license_matcher/matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
import codecs
import redis

from build_licenses import build_spdx_licenses, is_keys_empty,get_url
from computation import get_close_matches, get_matching_string
from difference import generate_diff, get_similarity_percent
from utils import colors, get_spdx_license_text
from spdx_license_matcher.build_licenses import build_spdx_licenses, is_keys_empty,get_url
from spdx_license_matcher.computation import get_close_matches, get_matching_string
from spdx_license_matcher.difference import generate_diff, get_similarity_percent
from spdx_license_matcher.utils import colors, get_spdx_license_text

from dotenv import load_dotenv
import os
Expand All @@ -31,10 +31,10 @@ def matcher(text_file, threshold, build):
click.echo('Building SPDX License List. This may take a while...')
build_spdx_licenses()

r = redis.StrictRedis(host=os.environ.get(key="SPDX_REDIS_HOST", failobj="localhost"), port=6379, db=0)
keys = r.keys()
r = redis.StrictRedis(host=os.environ.get(key="SPDX_REDIS_HOST", default="localhost"), port=6379, db=0)
keys = list(r.keys())
values = r.mget(keys)
licenseData = dict(zip(keys, values))
licenseData = dict(list(zip(keys, values)))
matches = get_close_matches(inputText, licenseData, threshold)
matchingString = get_matching_string(matches, inputText)
if matchingString == '':
Expand Down
2 changes: 1 addition & 1 deletion spdx_license_matcher/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def normalize(licenseText):
licenseText = re.sub(BULLETS_NUMBERING_REGEX, " ", licenseText)

# To avoid the possibility of a non-match due to the same word being spelled differently.
for initial, final in VARIETAL_WORDS_SPELLING.items():
for initial, final in list(VARIETAL_WORDS_SPELLING.items()):
licenseText = licenseText.replace(initial, final)

# To avoid the possibility of a non-match due to different spacing of words, line breaks, or paragraphs.
Expand Down

0 comments on commit 7d7d81a

Please sign in to comment.