Skip to content

Commit 9ce0c71

Browse files
authored
Merge pull request #222 from sassoftware/EDMMMX-13491-write-package-requirements-txt
feature: Added ability to create requirements.txt in create_requirements_json function
2 parents 1660da9 + 973fa82 commit 9ce0c71

File tree

4 files changed

+80
-11
lines changed

4 files changed

+80
-11
lines changed

examples/pzmm_generate_complete_model_card.ipynb

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1716,12 +1716,14 @@
17161716
],
17171717
"source": [
17181718
"# Step 13: Generate requirements files\n",
1719-
"requirements_json = pzmm.JSONFiles.create_requirements_json(output_path)\n",
1719+
"requirements_json = pzmm.JSONFiles.create_requirements_json(output_path, create_requirements_txt=False)\n",
17201720
"\n",
17211721
"import json\n",
17221722
"print(json.dumps(requirements_json, sort_keys=True, indent=4))\n",
17231723
"\n",
17241724
"for requirement in requirements_json:\n",
1725+
" # Example: Replace sklearn with scikit-learn in requirements\n",
1726+
" # (This is redundant in newer versions but shows how to modify package names)\n",
17251727
" if 'sklearn' in requirement['step']:\n",
17261728
" requirement['command'] = requirement[\"command\"].replace('sklearn', 'scikit-learn')\n",
17271729
" requirement['step'] = requirement['step'].replace('sklearn', 'scikit-learn')\n",

examples/pzmm_generate_requirements_json.ipynb

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,18 @@
1414
"id": "e9b8cb7c-1974-4af5-8992-d51f90fcfe5b",
1515
"metadata": {},
1616
"source": [
17-
"# Automatic Generation of the requirements.json File\n",
17+
"# Automatic Generation of the requirements.json or requirements.txt File\n",
1818
"In order to validate Python models within a container publishing destination, the Python packages which contain the modules that are used in the Python score code file and its score resource files must be installed in the run-time container. You can install the packages when you publish a Python model or decision that contains a Python model to a container publishing destination by adding a `requirements.json` file that includes the package install statements to your model.\n",
1919
"\n",
2020
"This notebook provides an example execution and assessment of the create_requirements_json() function added in python-sasctl v1.8.0. The aim of this function is help to create the instructions (aka the `requirements.json` file) for a lightweight Python container in SAS Model Manager. Lightweight here meaning that the container will only install the packages found in the model's pickle files and python scripts.\n",
2121
"\n",
22+
"Additionally, the create_requirements_json() function provides an optional parameter `create_requirements_txt` which when set to `True` will generate a requirements.txt file alongside the requirements.json file. By default this option is set to `False`. The requirements.txt file is needed when deploying Python models to SAS Event Stream Processing, which requires this format for package installation in their environment. While SAS Model Manager continues to use the requirements.json format, adding the requirements.txt file ensures compatibility across both platforms. \n",
23+
"\n",
2224
"### **User Warnings**\n",
2325
"The methods utilized in this function can determine package dependencies and versions from provided scripts and pickle files, but there are some stipulations that need to be considered:\n",
2426
"\n",
2527
"1. If run outside of the development environment that the model was created in, the create_requirements_json() function **CANNOT** determine the required package _versions_ accurately. \n",
26-
"2. Not all Python packages have matching import and install names and as such some of the packages added to the requirements.json file may be incorrectly named (i.e. `import sklearn` vs `pip install scikit-learn`).\n",
28+
"2. Not all Python packages have matching import and install names and as such some of the packages added to the requirements.json file may be incorrectly named (i.e. `import sklearn` vs `pip install scikit-learn`). Some of the major packages with differing import and install names are automatically converted. \n",
2729
"\n",
2830
"As such, it is recommended that the user check over the requirements.json file for package name and version accuracy before deploying to a run-time container in SAS Model Manager."
2931
]
@@ -63,7 +65,7 @@
6365
"outputs": [],
6466
"source": [
6567
"model_dir = Path.cwd() / \"data/hmeqModels/DecisionTreeClassifier\"\n",
66-
"requirements_json = pzmm.JSONFiles.create_requirements_json(model_dir)"
68+
"requirements_json = pzmm.JSONFiles.create_requirements_json(model_dir, create_requirements_txt=False)"
6769
]
6870
},
6971
{
@@ -145,6 +147,8 @@
145147
],
146148
"source": [
147149
"for requirement in requirements_json:\n",
150+
" # Example: Replace sklearn with scikit-learn in requirements\n",
151+
" # (This is redundant in newer versions but shows how to modify package names)\n",
148152
" if 'sklearn' in requirement['step']:\n",
149153
" requirement['command'] = requirement[\"command\"].replace('sklearn', 'scikit-learn')\n",
150154
" requirement['step'] = requirement['step'].replace('sklearn', 'scikit-learn')\n",

src/sasctl/pzmm/write_json_files.py

Lines changed: 59 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1614,6 +1614,7 @@ def create_requirements_json(
16141614
cls,
16151615
model_path: Union[str, Path, None] = Path.cwd(),
16161616
output_path: Union[str, Path, None] = None,
1617+
create_requirements_txt: bool = False,
16171618
) -> Union[dict, None]:
16181619
"""
16191620
Searches the model directory for Python scripts and pickle files and
@@ -1636,14 +1637,22 @@ def create_requirements_json(
16361637
environment.
16371638
16381639
When provided with an output_path argument, this function outputs a JSON file
1639-
named "requirements.json". Otherwise, a list of dicts is returned.
1640+
named "requirements.json". If create_requirements_txt is True, it will also
1641+
create a requirements.txt file. Otherwise, a list of dicts is returned.
1642+
1643+
Note: requirements.txt file is only created when both output_path and
1644+
create_requirements_txt are specified.
16401645
16411646
Parameters
16421647
----------
16431648
model_path : str or pathlib.Path, optional
16441649
The path to a Python project, by default the current working directory.
16451650
output_path : str or pathlib.Path, optional
16461651
The path for the output requirements.json file. The default value is None.
1652+
create_requirements_txt : bool, optional
1653+
Whether to also create a requirements.txt file in addition to the
1654+
requirements.json file. This is useful for SAS Event Stream Processing
1655+
environments. The default value is False.
16471656
16481657
Returns
16491658
-------
@@ -1662,11 +1671,57 @@ def create_requirements_json(
16621671
package_list = list(set(list(_flatten(package_list))))
16631672
package_list = cls.remove_standard_library_packages(package_list)
16641673
package_and_version = cls.get_local_package_version(package_list)
1674+
16651675
# Identify packages with missing versions
16661676
missing_package_versions = [
16671677
item[0] for item in package_and_version if not item[1]
16681678
]
16691679

1680+
IMPORT_TO_INSTALL_MAPPING = {
1681+
# Data Science & ML Core
1682+
"sklearn": "scikit-learn",
1683+
"skimage": "scikit-image",
1684+
"cv2": "opencv-python",
1685+
"PIL": "Pillow",
1686+
# Data Formats & Parsing
1687+
"yaml": "PyYAML",
1688+
"bs4": "beautifulsoup4",
1689+
"docx": "python-docx",
1690+
"pptx": "python-pptx",
1691+
# Date & Time Utilities
1692+
"dateutil": "python-dateutil",
1693+
# Database Connectors
1694+
"MySQLdb": "MySQL-python",
1695+
"psycopg2": "psycopg2-binary",
1696+
# System & Platform
1697+
"win32api": "pywin32",
1698+
"win32com": "pywin32",
1699+
# Scientific Libraries
1700+
"Bio": "biopython",
1701+
}
1702+
1703+
# Map import names to their corresponding package installation names
1704+
package_and_version = [
1705+
(IMPORT_TO_INSTALL_MAPPING.get(name, name), version)
1706+
for name, version in package_and_version
1707+
]
1708+
1709+
if create_requirements_txt:
1710+
requirements_txt = ""
1711+
if missing_package_versions:
1712+
requirements_txt += "# Warning- The existence and/or versions for the following packages could not be determined:\n"
1713+
requirements_txt += "# " + ", ".join(missing_package_versions) + "\n"
1714+
1715+
for package, version in package_and_version:
1716+
if version:
1717+
requirements_txt += f"{package}=={version}\n"
1718+
1719+
if output_path:
1720+
with open( # skipcq: PTC-W6004
1721+
Path(output_path) / "requirements.txt", "w"
1722+
) as file:
1723+
file.write(requirements_txt)
1724+
16701725
# Create a list of dicts related to each package or warning
16711726
json_dicts = []
16721727
if missing_package_versions:
@@ -1800,16 +1855,16 @@ def find_imports(file_path: Union[str, Path]) -> List[str]:
18001855
file_text = file.read()
18011856
# Parse the file to get the abstract syntax tree representation
18021857
tree = ast.parse(file_text)
1803-
modules = []
1858+
modules = set()
18041859

18051860
# Walk through each node in the ast to find import calls
18061861
for node in ast.walk(tree):
18071862
# Determine parent module for `from * import *` calls
18081863
if isinstance(node, ast.ImportFrom):
1809-
modules.append(node.module)
1864+
modules.add(node.module.split(".")[0])
18101865
elif isinstance(node, ast.Import):
18111866
for name in node.names:
1812-
modules.append(name.name)
1867+
modules.add(name.name.split(".")[0])
18131868

18141869
modules = list(set(modules))
18151870
try:

tests/unit/test_write_json_files.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -699,8 +699,9 @@ def test_create_requirements_json(change_dir):
699699
dtc = dtc.fit(x_train, y_train)
700700
with open(tmp_dir / "DecisionTreeClassifier.pickle", "wb") as pkl_file:
701701
pickle.dump(dtc, pkl_file)
702-
jf.create_requirements_json(tmp_dir, Path(tmp_dir))
702+
jf.create_requirements_json(tmp_dir, Path(tmp_dir), True)
703703
assert (Path(tmp_dir) / "requirements.json").exists()
704+
assert (Path(tmp_dir) / "requirements.txt").exists()
704705

705706
json_dict = jf.create_requirements_json(tmp_dir)
706707
expected = [
@@ -709,13 +710,20 @@ def test_create_requirements_json(change_dir):
709710
"command": f"pip install numpy=={np.__version__}",
710711
},
711712
{
712-
"step": "install sklearn",
713-
"command": f"pip install sklearn=={sk.__version__}",
713+
"step": "install scikit-learn",
714+
"command": f"pip install scikit-learn=={sk.__version__}",
714715
},
715716
]
716717
unittest.TestCase.maxDiff = None
717718
unittest.TestCase().assertCountEqual(json_dict, expected)
718719

720+
# Verify requirements.txt content
721+
with open(Path(tmp_dir) / "requirements.txt", "r") as file:
722+
requirements_content = [line.strip() for line in file.readlines()]
723+
724+
assert f"numpy=={np.__version__}" in requirements_content
725+
assert f"scikit-learn=={sk.__version__}" in requirements_content
726+
719727

720728
class TestAssessBiasHelpers(unittest.TestCase):
721729
md_1 = pd.DataFrame({"Value": [0], "Base": ["A"], "Compare": ["C"]})

0 commit comments

Comments
 (0)