Skip to content

Commit

Permalink
Merge pull request #9 from johnsonandjohnson/8-pip-install-broken
Browse files Browse the repository at this point in the history
8 pip install broken
  • Loading branch information
sara-von-hein-shaw authored Jan 11, 2023
2 parents a68fbe1 + 3f7324a commit 3fbf41f
Show file tree
Hide file tree
Showing 7 changed files with 14 additions and 14 deletions.
4 changes: 2 additions & 2 deletions dev_environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ dependencies:
- pip
- pycodestyle=2.5.0
- python=3.7.*
- spacy=2.1.*
- spacy-model-en_core_web_sm=2.1.0
- spacy=3.4.*
- spacy-model-en_core_web_sm=3.4.*
- pytest=4.4.*
- pytest-xdist>=1.28.0
- pytest-sugar=0.9.*
Expand Down
4 changes: 2 additions & 2 deletions nlprov/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
Copyright © 2020 Johnson & Johnson
"""

import spacy
import en_core_web_sm


def get_spacy_nlp():
try:
spacy_nlp = spacy.load('en_core_web_sm', disable=['parser', 'ner', 'textcat'])
spacy_nlp = en_core_web_sm.load(disable=['parser', 'ner', 'textcat'])
except OSError:
# We should tell the user explicitly what they need to do.
raise Exception("Please run `python -m spacy download en_core_web_sm` locally.")
Expand Down
2 changes: 1 addition & 1 deletion nlprov/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def preprocess_text(text: pd.Series,
text = text.apply(lambda s: re.sub(regex, ' ', s))

text = text.str.strip()
text = text.str.replace(r'\s+', ' ')
text = text.str.replace(r'\s+', ' ', regex=True)

if eng_lang:
text = text[text.apply(lambda t: langid.classify(t)[0]) == 'en']
Expand Down
10 changes: 5 additions & 5 deletions nlprov/test/test_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def lemma_actual():

@pytest.fixture
def lemma_expected():
return pd.Series(data=["lemma need",
return pd.Series(data=["lemmas need",
"duck and cat and pony be not similar",
"normal string"])

Expand All @@ -238,10 +238,10 @@ def test_lemma(lemma_actual, lemma_expected):
# Test data for the list of token list output (lemmas)
@pytest.fixture
def token_list_expected():
return pd.Series(data=[["lemma", "need"],
["duck", "and", "cat", "and", "pony", "be",
"not", "similar"],
["normal", "string"]])
return pd.Series(data=[['lemmas', 'need'],
['duck', 'and', 'cat', 'and', 'pony', 'be',
'not', 'similar'],
['normal', 'string']])


# Test data for the list of token list output (no lemmas)
Expand Down
4 changes: 2 additions & 2 deletions nlprov/test/test_vectorize.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def test_count_vectorizer(vectorize_actual, count_dfm_expected,
assert type(vec_obj) is CountVectorizer

# Check original terms are included
vocab_set = set(vec_obj.get_feature_names())
vocab_set = set(vec_obj.get_feature_names_out())
assert vocab_set == vocab_set_expected


Expand All @@ -64,7 +64,7 @@ def test_tfidf_vectorizer(vectorize_actual, tfidf_dfm_expected,
assert type(vec_obj) is TfidfVectorizer

# Check original terms are included
vocab_set = set(vec_obj.get_feature_names())
vocab_set = set(vec_obj.get_feature_names_out())
assert vocab_set == vocab_set_expected


Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
pandas>=1.0.0
spacy>=2.1,<2.1.7
spacy>=3.4
nltk>=3.4.3
langid>=1.1.6
scikit-learn>=0.21.3
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
packages=pkgs,
install_requires=[
'pandas>=1.0.0',
'spacy>=2.1.0,<2.1.7',
'spacy>=3.4',
'nltk>=3.4.3',
'langid>=1.1.6',
'scikit-learn>=0.21.3'
Expand Down

0 comments on commit 3fbf41f

Please sign in to comment.