From c55595e4e18b2f0a76d7d0d7e64cdd48bad30996 Mon Sep 17 00:00:00 2001 From: Souradip Pal Date: Tue, 12 Nov 2024 01:02:09 -0600 Subject: [PATCH] Fixed tests and bib. --- doc_generator/query/create_chat_chain.py | 3 +- doc_generator/query/query.py | 4 +- paper/paper.bib | 66 ++- tests/index/test_convert_json_to_markdown.py | 129 ++--- tests/index/test_create_vector_store.py | 129 +++-- tests/index/test_index.py | 12 +- tests/index/test_process_repository.py | 528 ++++++++++--------- tests/index/test_prompts.py | 43 +- tests/index/test_select_model.py | 20 +- tests/test_main.py | 293 ++-------- tests/test_types.py | 47 +- tests/utils/test_HNSWLib.py | 3 - tests/utils/test_file_utils.py | 1 - tests/utils/test_llm_utils.py | 6 +- tests/utils/test_traverse_file_system.py | 1 - 15 files changed, 615 insertions(+), 670 deletions(-) diff --git a/doc_generator/query/create_chat_chain.py b/doc_generator/query/create_chat_chain.py index 3f69770..a8698be 100644 --- a/doc_generator/query/create_chat_chain.py +++ b/doc_generator/query/create_chat_chain.py @@ -227,8 +227,7 @@ def make_qa_chain( ) return create_retrieval_chain( - retriever=question_generator, - combine_docs_chain=doc_chain + retriever=question_generator, combine_docs_chain=doc_chain ) diff --git a/doc_generator/query/query.py b/doc_generator/query/query.py index a5806da..51535eb 100644 --- a/doc_generator/query/query.py +++ b/doc_generator/query/query.py @@ -28,8 +28,8 @@ def display_welcome_message(project_name): """Display Welcome Message""" print(f"Welcome to the {project_name} chatbot.") print( - f"Ask any questions related to the {project_name} codebase, "+ - "and I'll try to help. Type 'exit' to quit.\n" + f"Ask any questions related to the {project_name} codebase, " + + "and I'll try to help. Type 'exit' to quit.\n" ) diff --git a/paper/paper.bib b/paper/paper.bib index 9b1d02a..eef4eb4 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -8,7 +8,8 @@ @article{chomsky1956three number={3}, pages={113--124}, year={1956}, - publisher={IEEE} + publisher={IEEE}, + doi={10.1109/TIT.1956.1056813} } @article{miller2003cognitive, @@ -19,21 +20,24 @@ @article{miller2003cognitive number={3}, pages={141--144}, year={2003}, - publisher={Elsevier} + publisher={Elsevier}, + doi={10.1016/S1364-6613(03)00029-9} } @article{graves2014neural, title={Neural turing machines}, author={Graves, Alex and Wayne, Greg and Danihelka, Ivo}, journal={arXiv preprint arXiv:1410.5401}, - year={2014} + year={2014}, + doi={10.48550/arXiv.1410.5401} } @article{bahdanau2014neural, title={Neural machine translation by jointly learning to align and translate}, author={Bahdanau, Dzmitry and Cho, Kyunghyun and Bengio, Yoshua}, journal={arXiv preprint arXiv:1409.0473}, - year={2014} + year={2014}, + doi={10.48550/arXiv.1409.0473} } @inproceedings{iyer2016summarizing, @@ -42,21 +46,24 @@ @inproceedings{iyer2016summarizing booktitle={54th Annual Meeting of the Association for Computational Linguistics 2016}, pages={2073--2083}, year={2016}, - organization={Association for Computational Linguistics} + organization={Association for Computational Linguistics}, + doi={10.18653/v1/P16-1208} } @article{ling2016latent, title={Latent predictor networks for code generation}, author={Ling, Wang and Grefenstette, Edward and Hermann, Karl Moritz and Ko{\v{c}}isk{\`y}, Tom{\'a}{\v{s}} and Senior, Andrew and Wang, Fumin and Blunsom, Phil}, journal={arXiv preprint arXiv:1603.06744}, - year={2016} + year={2016}, + doi={10.48550/arXiv.1603.06744} } @article{yin2017syntactic, title={A syntactic neural model for general-purpose code generation}, author={Yin, Pengcheng and Neubig, Graham}, journal={arXiv preprint arXiv:1704.01696}, - year={2017} + year={2017}, + doi={10.48550/arXiv.1704.01696} } @inproceedings{allamanis2013mining, @@ -65,14 +72,16 @@ @inproceedings{allamanis2013mining booktitle={2013 10th working conference on mining software repositories (MSR)}, pages={207--216}, year={2013}, - organization={IEEE} + organization={IEEE}, + doi={10.1109/MSR.2013.6624030} } @article{bhoopchand2016learning, title={Learning python code suggestion with a sparse pointer network}, author={Bhoopchand, Avishkar and Rockt{\"a}schel, Tim and Barr, Earl and Riedel, Sebastian}, journal={arXiv preprint arXiv:1611.08307}, - year={2016} + year={2016}, + doi={10.48550/arXiv.1611.08307} } @inproceedings{oda2015learning, @@ -81,7 +90,8 @@ @inproceedings{oda2015learning booktitle={2015 30th IEEE/ACM International Conference on Automated Software Engineering (ASE)}, pages={574--584}, year={2015}, - organization={IEEE} + organization={IEEE}, + doi={10.1109/ASE.2015.72} } @inproceedings{quirk2015language, @@ -89,7 +99,8 @@ @inproceedings{quirk2015language author={Quirk, Chris and Mooney, Raymond and Galley, Michel}, booktitle={Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)}, pages={878--888}, - year={2015} + year={2015}, + doi={10.3115/v1/P15-1085} } @article{radford2019language, @@ -108,7 +119,8 @@ @article{brown2020language journal={Advances in neural information processing systems}, volume={33}, pages={1877--1901}, - year={2020} + year={2020}, + doi={10.48550/arXiv.2005.14165} } @article{ouyang2022training, @@ -117,7 +129,8 @@ @article{ouyang2022training journal={Advances in neural information processing systems}, volume={35}, pages={27730--27744}, - year={2022} + year={2022}, + doi={10.48550/arXiv.2203.02155} } @article{vaswani2017attention, @@ -125,14 +138,16 @@ @article{vaswani2017attention author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia}, journal={Advances in neural information processing systems}, volume={30}, - year={2017} + year={2017}, + doi={10.48550/arXiv.1706.03762} } @article{lester2021power, title={The power of scale for parameter-efficient prompt tuning}, author={Lester, Brian and Al-Rfou, Rami and Constant, Noah}, journal={arXiv preprint arXiv:2104.08691}, - year={2021} + year={2021}, + doi={10.48550/arXiv.2104.08691} } @misc{gpt-3.5-turbo, @@ -243,7 +258,8 @@ @article{barone2017parallel title={A parallel corpus of python functions and documentation strings for automated code documentation and code generation}, author={Barone, Antonio Valerio Miceli and Sennrich, Rico}, journal={arXiv preprint arXiv:1707.02275}, - year={2017} + year={2017}, + doi={10.48550/arXiv.1707.02275} } @article{malkov2018efficient, @@ -254,14 +270,16 @@ @article{malkov2018efficient number={4}, pages={824--836}, year={2018}, - publisher={IEEE} + publisher={IEEE}, + doi={10.1109/TPAMI.2018.2889473} } @article{dettmers2023qlora, title={QLoRA: Efficient Finetuning of Quantized LLMs}, author={Dettmers, Tim and Pagnoni, Artidoro and Holtzman, Ari and Zettlemoyer, Luke}, journal={arXiv preprint arXiv:2305.14314}, - year={2023} + year={2023}, + doi={10.48550/arXiv.2305.14314} } @inproceedings{ @@ -270,7 +288,8 @@ @inproceedings{ author={Edward J Hu and Yelong Shen and Phillip Wallis and Zeyuan Allen-Zhu and Yuanzhi Li and Shean Wang and Lu Wang and Weizhu Chen}, booktitle={International Conference on Learning Representations}, year={2022}, -url={https://openreview.net/forum?id=nZeVKeeFYf9} +url={https://openreview.net/forum?id=nZeVKeeFYf9}, +doi={10.48550/arXiv.2106.09685} } @article{zhang2023dynamically, @@ -278,7 +297,8 @@ @article{zhang2023dynamically author={Zhang, Youzhi and Chakrabarty, Sayak and Liu, Rui and Pugliese, Andrea and Subrahmanian, VS}, journal={IEEE Transactions on Computational Social Systems}, year={2023}, - publisher={IEEE} + publisher={IEEE}, + doi={10.1109/TCSS.2023.3248149} } @article{makarychev2024single, @@ -286,7 +306,8 @@ @article{makarychev2024single author={Makarychev, Konstantin and Chakrabarty, Sayak}, journal={Advances in Neural Information Processing Systems}, volume={36}, - year={2024} + year={2024}, + doi={10.48550/arXiv.2305.13560} } @article{datta2024consistency, @@ -294,5 +315,6 @@ @article{datta2024consistency author={Datta, Arghya and Chakrabarty, Sayak}, journal={Advances in Neural Information Processing Systems}, volume={36}, - year={2024} + year={2024}, + doi={10.48550/arXiv.2311.05046} } diff --git a/tests/index/test_convert_json_to_markdown.py b/tests/index/test_convert_json_to_markdown.py index fc97ff7..e98887e 100644 --- a/tests/index/test_convert_json_to_markdown.py +++ b/tests/index/test_convert_json_to_markdown.py @@ -1,55 +1,63 @@ -import pytest -from unittest.mock import MagicMock, patch, call +from unittest.mock import MagicMock, patch from pathlib import Path -from doc_generator.index.convert_json_to_markdown import convert_json_to_markdown - -@patch('doc_generator.index.convert_json_to_markdown.traverse_file_system') -@patch('doc_generator.index.convert_json_to_markdown.get_file_name') -@patch('doc_generator.index.convert_json_to_markdown.FileSummary') -@patch('doc_generator.index.convert_json_to_markdown.FolderSummary') -@patch('doc_generator.index.convert_json_to_markdown.Path') -def test_convert_json_to_markdown(mock_Path, mock_FolderSummary, mock_FileSummary, mock_get_file_name, mock_traverse_file_system): +from doc_generator.index.convert_json_to_markdown import ( + convert_json_to_markdown, +) + + +@patch("doc_generator.index.convert_json_to_markdown.traverse_file_system") +@patch("doc_generator.index.convert_json_to_markdown.get_file_name") +@patch("doc_generator.index.convert_json_to_markdown.FileSummary") +@patch("doc_generator.index.convert_json_to_markdown.FolderSummary") +@patch("doc_generator.index.convert_json_to_markdown.Path") +def test_convert_json_to_markdown( + mock_Path, + mock_FolderSummary, + mock_FileSummary, + mock_get_file_name, + mock_traverse_file_system, +): # Set up the config config = MagicMock() - config.name = 'test_project' - config.root = '/input/root' - config.output = '/output/root' - config.file_prompt = 'file_prompt' - config.folder_prompt = 'folder_prompt' - config.content_type = 'content_type' - config.target_audience = 'target_audience' - config.link_hosted = 'link_hosted' + config.name = "test_project" + config.root = "/input/root" + config.output = "/output/root" + config.file_prompt = "file_prompt" + config.folder_prompt = "folder_prompt" + config.content_type = "content_type" + config.target_audience = "target_audience" + config.link_hosted = "link_hosted" # Prepare different files with different contents files = [ { - 'file_path': '/input/root/empty_file.json', - 'file_name': 'empty_file.json', - 'content': '', # Empty content + "file_path": "/input/root/empty_file.json", + "file_name": "empty_file.json", + "content": "", # Empty content }, { - 'file_path': '/input/root/summary.json', - 'file_name': 'summary.json', - 'content': '{"summary": "Folder summary.", "url": "http://example.com/folder"}', - 'is_folder_summary': True, + "file_path": "/input/root/summary.json", + "file_name": "summary.json", + "content": '{"summary": "Folder summary.", "url": "http://example.com/folder"}', + "is_folder_summary": True, }, { - 'file_path': '/input/root/file_with_summary.json', - 'file_name': 'file_with_summary.json', - 'content': '{"summary": "File summary.", "url": "http://example.com/file"}', - 'is_file_summary': True, + "file_path": "/input/root/file_with_summary.json", + "file_name": "file_with_summary.json", + "content": '{"summary": "File summary.", "url": "http://example.com/file"}', + "is_file_summary": True, }, { - 'file_path': '/input/root/file_without_summary.json', - 'file_name': 'file_without_summary.json', - 'content': '{"summary": "", "url": "http://example.com/empty"}', - 'is_file_summary': True, + "file_path": "/input/root/file_without_summary.json", + "file_name": "file_without_summary.json", + "content": '{"summary": "", "url": "http://example.com/empty"}', + "is_file_summary": True, }, { - 'file_path': '/input/root/file_with_questions.json', - 'file_name': 'file_with_questions.json', - 'content': '{"summary": "File with questions.", "url": "http://example.com/questions", "questions": "Question content."}', - 'is_file_summary': True, + "file_path": "/input/root/file_with_questions.json", + "file_name": "file_with_questions.json", + "content": '{"summary": "File with questions.", "url": "http://example.com/questions", "questions": "Question content."}', + "is_file_summary": True, }, ] @@ -58,13 +66,13 @@ def test_convert_json_to_markdown(mock_Path, mock_FolderSummary, mock_FileSummar for file_info in files: path_instance = MagicMock() - path_instance.read_text.return_value = file_info['content'] - relative_path = Path(file_info['file_path']).relative_to('/input/root') + path_instance.read_text.return_value = file_info["content"] + relative_path = Path(file_info["file_path"]).relative_to("/input/root") path_instance.relative_to.return_value = relative_path path_instance.parent = MagicMock() path_instance.parent.mkdir.return_value = None path_instance.joinpath.return_value = path_instance - file_paths[file_info['file_path']] = path_instance + file_paths[file_info["file_path"]] = path_instance # Mock Path to return the appropriate mock Path instance def path_side_effect(path_str, *args, **kwargs): @@ -73,25 +81,25 @@ def path_side_effect(path_str, *args, **kwargs): mock_Path.side_effect = path_side_effect # Keep track of the 'files' variable in convert_json_to_markdown - files_counter = {'count': 0} + files_counter = {"count": 0} # Define side effect for traverse_file_system def traverse_fs_side_effect(*args, **kwargs): params = args[0] - if params.process_file.__name__ == 'count_files': + if params.process_file.__name__ == "count_files": # First call, simulate calling count_files for each file for file_info in files: process_file_params = MagicMock() - process_file_params.file_path = file_info['file_path'] - process_file_params.file_name = file_info['file_name'] + process_file_params.file_path = file_info["file_path"] + process_file_params.file_name = file_info["file_name"] params.process_file(process_file_params) - files_counter['count'] += 1 - elif params.process_file.__name__ == 'process_file': + files_counter["count"] += 1 + elif params.process_file.__name__ == "process_file": # Second call, simulate calling process_file for each file for file_info in files: process_file_params = MagicMock() - process_file_params.file_path = file_info['file_path'] - process_file_params.file_name = file_info['file_name'] + process_file_params.file_path = file_info["file_path"] + process_file_params.file_name = file_info["file_name"] params.process_file(process_file_params) else: pass @@ -126,29 +134,8 @@ def __init__(self, **kwargs): # Now we can make assertions # Check that files were counted correctly - assert files_counter['count'] == len(files) + assert files_counter["count"] == len(files) - # Expected number of markdown files written: - # - 'empty_file.json' content is empty, so process_file returns early, no write - # - 'summary.json' has summary, should write - # - 'file_with_summary.json' has summary, should write - # - 'file_without_summary.json' has empty summary, should not write - # - 'file_with_questions.json' has summary and questions, should write - - expected_write_calls = 4 # summary.json, file_with_summary.json, file_with_questions.json + expected_write_calls = 4 assert mock_output_path.write_text.call_count == expected_write_calls - - # # We can also check the content that was written - # # Get the calls to write_text - # write_text_calls = mock_output_path.write_text.call_args_list - # expected_contents = [ - # "[View code on GitHub](http://example.com/folder)\n\n", - # "[View code on GitHub](http://example.com/file)\n\n", - # "[View code on GitHub](http://example.com/questions)\n\nFile with questions.\n## Questions: \nQuestion content." - # ] - - # for call_args, expected_content in zip(write_text_calls, expected_contents): - # args, kwargs = call_args - # markdown_content = args[0] - # assert markdown_content == expected_content diff --git a/tests/index/test_create_vector_store.py b/tests/index/test_create_vector_store.py index 8e31e42..83d71e0 100644 --- a/tests/index/test_create_vector_store.py +++ b/tests/index/test_create_vector_store.py @@ -1,6 +1,4 @@ -import pytest from unittest.mock import mock_open, patch, MagicMock -from pathlib import Path from langchain_core.documents import Document import os @@ -14,85 +12,124 @@ from doc_generator.types import LLMModels from langchain.text_splitter import RecursiveCharacterTextSplitter + def test_should_ignore(): - ignore_patterns = ['*.txt', 'ignore_this*'] - assert should_ignore('test.txt', ignore_patterns) is True - assert should_ignore('ignore_this_file.py', ignore_patterns) is True - assert should_ignore('keep_this.py', ignore_patterns) is False + ignore_patterns = ["*.txt", "ignore_this*"] + assert should_ignore("test.txt", ignore_patterns) is True + assert should_ignore("ignore_this_file.py", ignore_patterns) is True + assert should_ignore("keep_this.py", ignore_patterns) is False + def test_process_file(): - with patch('builtins.open', mock_open(read_data='file contents')): - with patch('doc_generator.index.create_vector_store.should_ignore', return_value=False): - doc = process_file('test.py', []) + with patch("builtins.open", mock_open(read_data="file contents")): + with patch( + "doc_generator.index.create_vector_store.should_ignore", + return_value=False, + ): + doc = process_file("test.py", []) assert isinstance(doc, Document) - assert doc.page_content == 'file contents' - assert doc.metadata == {'source': 'test.py'} + assert doc.page_content == "file contents" + assert doc.metadata == {"source": "test.py"} + def test_process_file_ignore(): - with patch('doc_generator.index.create_vector_store.should_ignore', return_value=True): - doc = process_file('ignore.py', []) + with patch( + "doc_generator.index.create_vector_store.should_ignore", + return_value=True, + ): + doc = process_file("ignore.py", []) assert doc is None + def test_process_file_exception(): - with patch('builtins.open', side_effect=Exception('Read error')): - with patch('doc_generator.index.create_vector_store.should_ignore', return_value=False): - with patch('builtins.print') as mock_print: - doc = process_file('error.py', []) + with patch("builtins.open", side_effect=Exception("Read error")): + with patch( + "doc_generator.index.create_vector_store.should_ignore", + return_value=False, + ): + with patch("builtins.print") as mock_print: + doc = process_file("error.py", []) assert doc is None - mock_print.assert_called_with('Error reading file error.py: Read error') + mock_print.assert_called_with( + "Error reading file error.py: Read error" + ) + def test_process_directory(tmp_path): # Create test files and directories - (tmp_path / 'file1.py').write_text('print("Hello World")') - (tmp_path / 'file2.py').write_text('print("Hello Again")') - (tmp_path / 'ignore.txt').write_text('Ignore this file') - os.mkdir(tmp_path / 'subdir') - (tmp_path / 'subdir' / 'file3.py').write_text('print("Hello from subdir")') - - with patch('doc_generator.index.create_vector_store.should_ignore', side_effect=lambda x, y: x.endswith('.txt')): - docs = process_directory(str(tmp_path), ['*.txt']) + (tmp_path / "file1.py").write_text('print("Hello World")') + (tmp_path / "file2.py").write_text('print("Hello Again")') + (tmp_path / "ignore.txt").write_text("Ignore this file") + os.mkdir(tmp_path / "subdir") + (tmp_path / "subdir" / "file3.py").write_text('print("Hello from subdir")') + + with patch( + "doc_generator.index.create_vector_store.should_ignore", + side_effect=lambda x, y: x.endswith(".txt"), + ): + docs = process_directory(str(tmp_path), ["*.txt"]) assert len(docs) == 3 # ignore.txt should be ignored - doc_sources = [doc.metadata['source'] for doc in docs] - assert str(tmp_path / 'file1.py') in doc_sources - assert str(tmp_path / 'file2.py') in doc_sources - assert str(tmp_path / 'subdir' / 'file3.py') in doc_sources + doc_sources = [doc.metadata["source"] for doc in docs] + assert str(tmp_path / "file1.py") in doc_sources + assert str(tmp_path / "file2.py") in doc_sources + assert str(tmp_path / "subdir" / "file3.py") in doc_sources + def test_repo_loader_load(): - with patch('doc_generator.index.create_vector_store.process_directory', return_value=['doc1', 'doc2']) as mock_process_directory: - loader = RepoLoader('path/to/repo', []) + with patch( + "doc_generator.index.create_vector_store.process_directory", + return_value=["doc1", "doc2"], + ) as mock_process_directory: + loader = RepoLoader("path/to/repo", []) docs = loader.load() - mock_process_directory.assert_called_once_with('path/to/repo', []) - assert docs == ['doc1', 'doc2'] + mock_process_directory.assert_called_once_with("path/to/repo", []) + assert docs == ["doc1", "doc2"] + def test_create_vector_store(tmp_path): # Prepare test documents - raw_docs = [Document(page_content='Content 1', metadata={'source': 'file1.py'}), - Document(page_content='Content 2', metadata={'source': 'file2.py'})] + raw_docs = [ + Document(page_content="Content 1", metadata={"source": "file1.py"}), + Document(page_content="Content 2", metadata={"source": "file2.py"}), + ] # Mock RepoLoader - with patch.object(RepoLoader, 'load', return_value=raw_docs): + with patch.object(RepoLoader, "load", return_value=raw_docs): # Mock text splitter - with patch.object(RecursiveCharacterTextSplitter, 'split_documents', return_value=raw_docs) as mock_split: + with patch.object( + RecursiveCharacterTextSplitter, + "split_documents", + return_value=raw_docs, + ) as mock_split: # Mock HNSWLib and embeddings - with patch('doc_generator.index.create_vector_store.get_embeddings') as mock_get_embeddings: + with patch( + "doc_generator.index.create_vector_store.get_embeddings" + ) as mock_get_embeddings: mock_get_embeddings.return_value = MagicMock() - with patch('doc_generator.index.create_vector_store.HNSWLib') as mock_hnswlib: + with patch( + "doc_generator.index.create_vector_store.HNSWLib" + ) as mock_hnswlib: mock_vector_store = MagicMock() - mock_hnswlib.from_documents.return_value = mock_vector_store + mock_hnswlib.from_documents.return_value = ( + mock_vector_store + ) # Call the function under test create_vector_store( - root='path/to/root', - output=str(tmp_path / 'output'), - ignore=['*.txt'], + root="path/to/root", + output=str(tmp_path / "output"), + ignore=["*.txt"], llms=[LLMModels.GPT3, LLMModels.GPT4], - device='cpu' + device="cpu", ) # Assertions mock_split.assert_called_once_with(raw_docs) mock_hnswlib.from_documents.assert_called_once() - mock_vector_store.save.assert_called_once_with(str(tmp_path / 'output')) + mock_vector_store.save.assert_called_once_with( + str(tmp_path / "output") + ) + # def test_create_vector_store_no_docs(tmp_path): # # Mock RepoLoader to return no documents diff --git a/tests/index/test_index.py b/tests/index/test_index.py index d4d473c..4e34e3f 100644 --- a/tests/index/test_index.py +++ b/tests/index/test_index.py @@ -1,10 +1,10 @@ -import pytest from unittest import mock from pathlib import Path from doc_generator.index.index import index from doc_generator.types import AutodocRepoConfig + def test_index(tmp_path): # Create a mock configuration config = AutodocRepoConfig( @@ -28,9 +28,13 @@ def test_index(tmp_path): ) # Mock the imported functions - with mock.patch('doc_generator.index.index.process_repository') as mock_process_repository, \ - mock.patch('doc_generator.index.index.convert_json_to_markdown') as mock_convert_json_to_markdown, \ - mock.patch('doc_generator.index.index.create_vector_store') as mock_create_vector_store: + with mock.patch( + "doc_generator.index.index.process_repository" + ) as mock_process_repository, mock.patch( + "doc_generator.index.index.convert_json_to_markdown" + ) as mock_convert_json_to_markdown, mock.patch( + "doc_generator.index.index.create_vector_store" + ) as mock_create_vector_store: # Run the index function index(config) diff --git a/tests/index/test_process_repository.py b/tests/index/test_process_repository.py index 909f2af..1911690 100644 --- a/tests/index/test_process_repository.py +++ b/tests/index/test_process_repository.py @@ -1,8 +1,6 @@ -import pytest +from unittest import mock from unittest.mock import MagicMock, patch, mock_open -from pathlib import Path import json -import os from doc_generator.index.process_repository import ( process_repository, @@ -11,15 +9,11 @@ ) from doc_generator.types import ( AutodocRepoConfig, - FileSummary, - FolderSummary, ProcessFileParams, ProcessFolderParams, - TraverseFileSystemParams, LLMModels, Priority, ) -from doc_generator.utils.traverse_file_system import traverse_file_system def test_calculate_checksum(): @@ -40,7 +34,7 @@ def test_should_reindex_file_not_found(tmp_path): def test_should_reindex_same_checksum(tmp_path): content_path = tmp_path name = "summary.json" - data = "{\"checksum\": \"checksum123\"}" + data = '{"checksum": "checksum123"}' (tmp_path / name).write_text(json.dumps(data), encoding="utf-8") result = should_reindex(content_path, name, "checksum123") assert result is False @@ -49,198 +43,292 @@ def test_should_reindex_same_checksum(tmp_path): def test_should_reindex_different_checksum(tmp_path): content_path = tmp_path name = "summary.json" - data = "{\"checksum\": \"oldchecksum\"}" + data = '{"checksum": "oldchecksum"}' (tmp_path / name).write_text(json.dumps(data), encoding="utf-8") result = should_reindex(content_path, name, "newchecksum") assert result is True -# @patch("doc_generator.index.process_repository.traverse_file_system") -# @patch("doc_generator.index.process_repository.select_model") -# @patch("doc_generator.index.process_repository.calculate_checksum", return_value="checksum123") -# @patch("doc_generator.index.process_repository.should_reindex", return_value=True) -# @patch("builtins.open", new_callable=mock_open, read_data="file content") -# def test_process_repository(mock_open_file, mock_should_reindex, mock_calculate_checksum, mock_select_model, mock_traverse, tmp_path): -# # Set up configuration -# config = AutodocRepoConfig( -# name="TestRepo", -# repository_url="https://github.com/test/repo", -# root=str(tmp_path / "input"), -# output=str(tmp_path / "output"), -# llms=[LLMModels.GPT3], -# priority=Priority.COST, -# max_concurrent_calls=1, -# add_questions=True, -# ignore=[], -# file_prompt=None, -# folder_prompt=None, -# chat_prompt=None, -# content_type=None, -# target_audience=None, -# link_hosted=None, -# peft_model_path=None, -# device=None, -# ) - -# # Set up the model mock -# mock_model = MagicMock() -# mock_model.name = "gpt-3.5-turbo" -# mock_model.llm = MagicMock() -# mock_select_model.return_value = mock_model - -# # Simulate 'traverse_file_system' calling 'count_files' and 'count_folder' -# def side_effect_traverse_file_system(params): -# # Simulate processing a file and a folder -# params.process_file(ProcessFileParams( -# file_name="test.py", -# file_path="test.py", -# project_name="TestRepo", -# content_type=None, -# file_prompt=None, -# target_audience=None, -# link_hosted=None, -# )) -# params.process_folder(ProcessFolderParams( -# input_path="", -# folder_name="test_folder", -# folder_path=str(tmp_path / "test_folder"), -# project_name="TestRepo", -# content_type=None, -# folder_prompt=None, -# target_audience=None, -# link_hosted=None, -# should_ignore=lambda x: False, -# )) -# mock_traverse.side_effect = side_effect_traverse_file_system - -# # Run the function -# process_repository(config) - -# # Assertions -# mock_traverse.assert_called() -# mock_open_file.assert_called() -# mock_select_model.assert_called() - -# # Check if call_llm was called with prompts -# assert mock_model.llm.invoke.call_count == 2 # Summary and Questions - - -# @patch("doc_generator.index.process_repository.traverse_file_system") -# @patch("doc_generator.index.process_repository.select_model", return_value=None) -# @patch("builtins.open", new_callable=mock_open, read_data="file content") -# def test_process_repository_no_model(mock_open_file, mock_select_model, mock_traverse, tmp_path): -# # Set up configuration -# config = AutodocRepoConfig( -# name="TestRepo", -# repository_url="https://github.com/test/repo", -# root=str(tmp_path / "input"), -# output=str(tmp_path / "output"), -# llms=[], -# priority=Priority.COST, -# max_concurrent_calls=1, -# add_questions=False, -# ignore=[], -# file_prompt=None, -# folder_prompt=None, -# chat_prompt=None, -# content_type=None, -# target_audience=None, -# link_hosted=None, -# peft_model_path=None, -# device=None, -# ) - -# # Simulate 'traverse_file_system' calling 'process_file' and 'process_folder' -# def side_effect_traverse_file_system(params): -# params.process_file(None) -# params.process_folder(None) -# mock_traverse.side_effect = side_effect_traverse_file_system - -# # Run the function -# process_repository(config) - -# # Assertions -# mock_open_file.assert_not_called() -# mock_select_model.assert_called() - - -# @patch("doc_generator.index.process_repository.traverse_file_system") -# @patch("doc_generator.index.process_repository.select_model") -# @patch("builtins.open", new_callable=mock_open, read_data="file content") -# def test_process_repository_dry_run(mock_open_file, mock_select_model, mock_traverse, tmp_path): -# # Set up configuration -# config = AutodocRepoConfig( -# name="TestRepo", -# repository_url="https://github.com/test/repo", -# root=str(tmp_path / "input"), -# output=str(tmp_path / "output"), -# llms=[LLMModels.GPT3], -# priority=Priority.COST, -# max_concurrent_calls=1, -# add_questions=False, -# ignore=[], -# file_prompt=None, -# folder_prompt=None, -# chat_prompt=None, -# content_type=None, -# target_audience=None, -# link_hosted=None, -# peft_model_path=None, -# device=None, -# ) - -# # Simulate 'traverse_file_system' calling 'process_file' and 'process_folder' -# def side_effect_traverse_file_system(params): -# params.process_file(None) -# params.process_folder(None) -# mock_traverse.side_effect = side_effect_traverse_file_system - -# # Run the function with dry_run=True -# process_repository(config, dry_run=True) - -# # Assertions -# mock_open_file.assert_not_called() -# mock_select_model.assert_called() - - -# @patch("doc_generator.index.process_repository.traverse_file_system") -# @patch("doc_generator.index.process_repository.select_model") -# @patch("doc_generator.index.process_repository.should_reindex", return_value=False) -# @patch("builtins.open", new_callable=mock_open, read_data="file content") -# def test_process_repository_no_reindex(mock_open_file, mock_should_reindex, mock_select_model, mock_traverse, tmp_path): -# # Set up configuration -# config = AutodocRepoConfig( -# name="TestRepo", -# repository_url="https://github.com/test/repo", -# root=str(tmp_path / "input"), -# output=str(tmp_path / "output"), -# llms=[LLMModels.GPT3], -# priority=Priority.COST, -# max_concurrent_calls=1, -# add_questions=False, -# ignore=[], -# file_prompt=None, -# folder_prompt=None, -# chat_prompt=None, -# content_type=None, -# target_audience=None, -# link_hosted=None, -# peft_model_path=None, -# device=None, -# ) - -# # Simulate 'traverse_file_system' calling 'process_file' and 'process_folder' -# def side_effect_traverse_file_system(params): -# params.process_file(None) -# params.process_folder(None) -# mock_traverse.side_effect = side_effect_traverse_file_system - -# # Run the function -# process_repository(config) - -# # Assertions -# mock_open_file.assert_not_called() -# mock_select_model.assert_not_called() +@patch("doc_generator.index.process_repository.traverse_file_system") +@patch("doc_generator.index.process_repository.select_model") +@patch( + "doc_generator.index.process_repository.calculate_checksum", + return_value="checksum123", +) +@patch( + "doc_generator.index.process_repository.should_reindex", return_value=True +) +@patch( + "builtins.open", + new_callable=mock_open, + read_data='{"folder_name":"file content","summary": folder summary}', +) +@patch("doc_generator.index.process_repository.tiktoken.encoding_for_model") +def test_process_repository( + mock_tiktoken, + mock_open_file, + mock_should_reindex, + mock_calculate_checksum, + mock_select_model, + mock_traverse, + tmp_path, +): + # Set up configuration + config = AutodocRepoConfig( + name="TestRepo", + repository_url="https://github.com/test/repo", + root=str(tmp_path / "input"), + output=str(tmp_path / "output"), + llms=[LLMModels.GPT3], + priority=Priority.COST, + max_concurrent_calls=1, + add_questions=True, + ignore=[], + file_prompt="", + folder_prompt="", + chat_prompt=None, + content_type="code", + target_audience="abc", + link_hosted=None, + peft_model_path=None, + device="cpu", + ) + + # Set up the model mock + mock_model = MagicMock() + mock_model.name = "gpt-3.5-turbo" + mock_llm = MagicMock() + mock_llm.invoke.return_value = mock.ANY + mock_model.llm = mock_llm + mock_select_model.return_value = mock_model + + mock_encoding = MagicMock() + mock_encoding.encode.return_value = [1, 2, 3] + mock_tiktoken.return_value = mock_encoding + + # Simulate 'traverse_file_system' calling 'count_files' and 'count_folder' + def side_effect_traverse_file_system(params): + params.process_file = MagicMock() + params.process_folder = MagicMock() + + mock_traverse.side_effect = side_effect_traverse_file_system + + # Run the function + process_repository(config) + + # Assertions + mock_traverse.assert_called() + + +@patch("doc_generator.index.process_repository.traverse_file_system") +@patch( + "doc_generator.index.process_repository.select_model", return_value=None +) +@patch( + "builtins.open", + new_callable=mock_open, + read_data=json.dumps('{"checksum": "checksum123"}'), +) +def test_process_repository_no_model( + mock_open_file, mock_select_model, mock_traverse, tmp_path +): + # Set up configuration + config = AutodocRepoConfig( + name="TestRepo", + repository_url="https://github.com/test/repo", + root=str(tmp_path / "input"), + output=str(tmp_path / "output"), + llms=[], + priority=Priority.COST, + max_concurrent_calls=1, + add_questions=False, + ignore=[], + file_prompt=None, + folder_prompt=None, + chat_prompt=None, + content_type=None, + target_audience=None, + link_hosted=None, + peft_model_path=None, + device=None, + ) + + def side_effect_traverse_file_system(params): + # Simulate processing a file and a folder + params.process_file( + ProcessFileParams( + file_name="test.py", + file_path="test.py", + project_name="TestRepo", + content_type="code", + file_prompt="", + target_audience="abc", + link_hosted=None, + ) + ) + params.process_folder( + ProcessFolderParams( + input_path="", + folder_name=".", + folder_path=str(tmp_path / "."), + project_name="TestRepo", + content_type="code", + folder_prompt="", + target_audience="abc", + link_hosted=None, + should_ignore=lambda x: False, + ) + ) + + mock_traverse.side_effect = side_effect_traverse_file_system + + # Run the function + process_repository(config) + + # Assertions + mock_open_file.assert_called() + mock_select_model.assert_called() + + +@patch("doc_generator.index.process_repository.traverse_file_system") +@patch("doc_generator.index.process_repository.select_model") +@patch( + "builtins.open", + new_callable=mock_open, + read_data=json.dumps('{"checksum": "checksum123"}'), +) +def test_process_repository_dry_run( + mock_open_file, mock_select_model, mock_traverse, tmp_path +): + # Set up configuration + config = AutodocRepoConfig( + name="TestRepo", + repository_url="https://github.com/test/repo", + root=str(tmp_path / "input"), + output=str(tmp_path / "output"), + llms=[LLMModels.GPT3], + priority=Priority.COST, + max_concurrent_calls=1, + add_questions=False, + ignore=[], + file_prompt=None, + folder_prompt=None, + chat_prompt=None, + content_type=None, + target_audience=None, + link_hosted=None, + peft_model_path=None, + device=None, + ) + + # Simulate 'traverse_file_system' calling 'process_file' and 'process_folder' + def side_effect_traverse_file_system(params): + # Simulate processing a file and a folder + params.process_file( + ProcessFileParams( + file_name="test.py", + file_path="test.py", + project_name="TestRepo", + content_type="code", + file_prompt="", + target_audience="abc", + link_hosted=None, + ) + ) + params.process_folder( + ProcessFolderParams( + input_path="", + folder_name=".", + folder_path=str(tmp_path / "."), + project_name="TestRepo", + content_type="code", + folder_prompt="", + target_audience="abc", + link_hosted=None, + should_ignore=lambda x: False, + ) + ) + + mock_traverse.side_effect = side_effect_traverse_file_system + + # Run the function with dry_run=True + process_repository(config, dry_run=True) + + # Assertions + mock_open_file.assert_called() + mock_select_model.assert_called() + + +@patch("doc_generator.index.process_repository.traverse_file_system") +@patch("doc_generator.index.process_repository.select_model") +@patch( + "doc_generator.index.process_repository.should_reindex", return_value=False +) +@patch("builtins.open", new_callable=mock_open, read_data="file content") +def test_process_repository_no_reindex( + mock_open_file, + mock_should_reindex, + mock_select_model, + mock_traverse, + tmp_path, +): + # Set up configuration + config = AutodocRepoConfig( + name="TestRepo", + repository_url="https://github.com/test/repo", + root=str(tmp_path / "input"), + output=str(tmp_path / "output"), + llms=[LLMModels.GPT3], + priority=Priority.COST, + max_concurrent_calls=1, + add_questions=False, + ignore=[], + file_prompt=None, + folder_prompt=None, + chat_prompt=None, + content_type=None, + target_audience=None, + link_hosted=None, + peft_model_path=None, + device=None, + ) + + # Simulate 'traverse_file_system' calling 'process_file' and 'process_folder' + def side_effect_traverse_file_system(params): + # Simulate processing a file and a folder + params.process_file( + ProcessFileParams( + file_name="test.py", + file_path="test.py", + project_name="TestRepo", + content_type="code", + file_prompt="", + target_audience="abc", + link_hosted=None, + ) + ) + params.process_folder( + ProcessFolderParams( + input_path="", + folder_name=".", + folder_path=str(tmp_path / "."), + project_name="TestRepo", + content_type="code", + folder_prompt="", + target_audience="abc", + link_hosted=None, + should_ignore=lambda x: False, + ) + ) + + mock_traverse.side_effect = side_effect_traverse_file_system + + # Run the function + process_repository(config) + + # Assertions + mock_open_file.assert_called_once() + mock_select_model.assert_not_called() def test_calculate_checksum_empty(): @@ -248,53 +336,3 @@ def test_calculate_checksum_empty(): checksum = calculate_checksum(contents) assert isinstance(checksum, str) assert len(checksum) == 32 # MD5 checksum length - - -# def test_process_file_no_content(tmp_path): -# # Prepare test environment -# file_path = tmp_path / "empty.py" -# file_path.write_text("", encoding="utf-8") -# process_file_params = ProcessFileParams( -# file_name="empty.py", -# file_path=str(file_path), -# project_name="TestProject", -# content_type=None, -# file_prompt=None, -# target_audience=None, -# link_hosted=None, -# ) - -# # Import the function -# from doc_generator.index.process_repository import process_file - -# # Mock functions -# with patch("builtins.open", new_callable=mock_open, read_data=""): -# with patch("doc_generator.index.process_repository.write_file") as mock_write_file: -# process_file(process_file_params) -# mock_write_file.assert_not_called() - - -# def test_process_folder_dry_run(tmp_path): -# # Prepare test environment -# folder_path = tmp_path / "test_folder" -# folder_path.mkdir() -# process_folder_params = ProcessFolderParams( -# input_path="", -# folder_name="test_folder", -# folder_path=str(folder_path), -# project_name="TestProject", -# content_type=None, -# folder_prompt=None, -# target_audience="", -# link_hosted=None, -# should_ignore=lambda x: False, - -# ) - -# # Import the function -# from doc_generator.index.process_repository import process_folder - -# # Mock functions -# with patch("builtins.open", new_callable=mock_open) as mock_open_file: -# process_folder(process_folder_params) -# mock_open_file.assert_not_called() diff --git a/tests/index/test_prompts.py b/tests/index/test_prompts.py index 14c1364..2297dab 100644 --- a/tests/index/test_prompts.py +++ b/tests/index/test_prompts.py @@ -1,4 +1,3 @@ -import pytest import textwrap from doc_generator.index.prompts import ( create_code_file_summary, @@ -7,17 +6,18 @@ ) from doc_generator.types import FolderSummary, FileSummary + def test_create_code_file_summary(): file_path = "src/main.py" project_name = "TestProject" file_contents = "def main():\n print('Hello, World!')" content_type = "Python code" file_prompt = "Please summarize the functionality of this code." - + result = create_code_file_summary( file_path, project_name, file_contents, content_type, file_prompt ) - + expected_output = f""" You are acting as a {content_type} documentation expert for a project called {project_name}. @@ -36,17 +36,18 @@ def test_create_code_file_summary(): expected_output = textwrap.dedent(expected_output).strip() assert result == expected_output + def test_create_code_questions(): file_path = "src/utils.py" project_name = "TestProject" file_contents = "def add(a, b):\n return a + b" content_type = "Python code" target_audience = "new developers" - + result = create_code_questions( file_path, project_name, file_contents, content_type, target_audience ) - + expected_output = f""" You are acting as a {content_type} documentation expert for a project called {project_name}. @@ -66,12 +67,13 @@ def test_create_code_questions(): expected_output = textwrap.dedent(expected_output).strip() assert result == expected_output + def test_folder_summary_prompt(): folder_path = "src" project_name = "TestProject" content_type = "Python code" folder_prompt = "Please provide an overview of this folder." - + files = [ FileSummary( file_name="main.py", @@ -79,7 +81,7 @@ def test_folder_summary_prompt(): url="http://example.com/main.py", summary="This file contains the main execution logic.", questions=[], - checksum="abc123" + checksum="abc123", ), FileSummary( file_name="utils.py", @@ -87,10 +89,10 @@ def test_folder_summary_prompt(): url="http://example.com/utils.py", summary="Utility functions used across the project.", questions=[], - checksum="def456" - ) + checksum="def456", + ), ] - + folders = [ FolderSummary( folder_name="helpers", @@ -111,13 +113,13 @@ def test_folder_summary_prompt(): folders=[], questions=[], checksum="defg5678", - ) + ), ] - + result = folder_summary_prompt( folder_path, project_name, files, folders, content_type, folder_prompt ) - + files_summary = "\n".join( [ f""" @@ -128,7 +130,7 @@ def test_folder_summary_prompt(): for file in files ] ) - + folders_summary = "\n".join( [ f""" @@ -139,7 +141,7 @@ def test_folder_summary_prompt(): for folder in folders ] ) - + expected_output = f""" You are acting as a {content_type} documentation expert for a project called {project_name}. @@ -164,19 +166,20 @@ def test_folder_summary_prompt(): expected_output = textwrap.dedent(expected_output).strip() assert result == expected_output + def test_folder_summary_prompt_no_files_no_folders(): folder_path = "src/empty_folder" project_name = "TestProject" content_type = "Python code" folder_prompt = "Please provide an overview of this folder." - + files = [] folders = [] - + result = folder_summary_prompt( folder_path, project_name, files, folders, content_type, folder_prompt ) - + files_summary = "\n".join( [ f""" @@ -187,7 +190,7 @@ def test_folder_summary_prompt_no_files_no_folders(): for file in files ] ) - + folders_summary = "\n".join( [ f""" @@ -198,7 +201,7 @@ def test_folder_summary_prompt_no_files_no_folders(): for folder in folders ] ) - + expected_output = f""" You are acting as a {content_type} documentation expert for a project called {project_name}. diff --git a/tests/index/test_select_model.py b/tests/index/test_select_model.py index be264c0..c181548 100644 --- a/tests/index/test_select_model.py +++ b/tests/index/test_select_model.py @@ -1,10 +1,14 @@ from unittest import mock from unittest.mock import MagicMock from typing import Dict, List -from doc_generator.index.select_model import get_max_prompt_length, select_model +from doc_generator.index.select_model import ( + get_max_prompt_length, + select_model, +) from doc_generator.types import LLMModelDetails, LLMModels, Priority import tiktoken + def test_get_max_prompt_length(): prompts = ["hello world", "test prompt"] model = LLMModels.GPT3 @@ -13,12 +17,13 @@ def test_get_max_prompt_length(): mock_encoding = MagicMock() mock_encoding.encode.side_effect = lambda x: [1] * len(x) - with mock.patch('tiktoken.encoding_for_model', return_value=mock_encoding): + with mock.patch("tiktoken.encoding_for_model", return_value=mock_encoding): max_length = get_max_prompt_length(prompts, model) expected_length = max(len(prompt) for prompt in prompts) assert max_length == expected_length + def test_select_model_cost_priority(): prompts = ["short prompt"] llms = [LLMModels.GPT3, LLMModels.GPT4] @@ -54,11 +59,12 @@ def test_select_model_cost_priority(): mock_encoding = MagicMock() mock_encoding.encode.side_effect = lambda x: [1] * len(x) - with mock.patch('tiktoken.encoding_for_model', return_value=mock_encoding): + with mock.patch("tiktoken.encoding_for_model", return_value=mock_encoding): selected_model = select_model(prompts, llms, models, priority) assert selected_model == models[LLMModels.GPT3] + def test_select_model_performance_priority(): prompts = ["short prompt"] llms = [LLMModels.GPT4, LLMModels.GPT3] @@ -94,11 +100,12 @@ def test_select_model_performance_priority(): mock_encoding = MagicMock() mock_encoding.encode.side_effect = lambda x: [1] * len(x) - with mock.patch('tiktoken.encoding_for_model', return_value=mock_encoding): + with mock.patch("tiktoken.encoding_for_model", return_value=mock_encoding): selected_model = select_model(prompts, llms, models, priority) assert selected_model == models[LLMModels.GPT4] + def test_select_model_no_model_found(): prompts = ["this is a very long prompt that exceeds model max length"] llms = [LLMModels.GPT3] @@ -122,11 +129,12 @@ def test_select_model_no_model_found(): mock_encoding = MagicMock() mock_encoding.encode.side_effect = lambda x: [1] * 50 - with mock.patch('tiktoken.encoding_for_model', return_value=mock_encoding): + with mock.patch("tiktoken.encoding_for_model", return_value=mock_encoding): selected_model = select_model(prompts, llms, models, priority) assert selected_model is None + def test_select_model_unknown_priority(): prompts = ["prompt"] llms = [LLMModels.GPT3] @@ -150,7 +158,7 @@ def test_select_model_unknown_priority(): mock_encoding = MagicMock() mock_encoding.encode.side_effect = lambda x: [1] * len(x) - with mock.patch('tiktoken.encoding_for_model', return_value=mock_encoding): + with mock.patch("tiktoken.encoding_for_model", return_value=mock_encoding): selected_model = select_model(prompts, llms, models, priority) assert selected_model is None diff --git a/tests/test_main.py b/tests/test_main.py index 2c14f32..bde4b61 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,5 +1,5 @@ import pytest -from unittest.mock import patch, MagicMock +from unittest.mock import patch from doc_generator.main import * from doc_generator.types import LLMModels @@ -7,257 +7,78 @@ @pytest.fixture def mock_questionary(): - with patch('questionary.text') as mock_text, \ - patch('questionary.path') as mock_path, \ - patch('questionary.select') as mock_select, \ - patch('questionary.confirm') as mock_confirm: + with patch("questionary.text") as mock_text, patch( + "questionary.path" + ) as mock_path, patch("questionary.select") as mock_select, patch( + "questionary.confirm" + ) as mock_confirm: yield mock_text, mock_path, mock_select, mock_confirm -# def test_main_readme_mode(mock_questionary): -# mock_text, mock_path, mock_select, mock_confirm = mock_questionary +def test_main_readme_mode(mock_questionary): + mock_text, mock_path, mock_select, mock_confirm = mock_questionary -# # Mock the questionary inputs -# mock_text.side_effect = [ -# 'doc_generator', # name -# 'https://github.com/username/doc_generator', # project_url -# '# Introduction,## Usage' # headings -# ] + # Mock the questionary inputs + mock_text.return_value.ask.side_effect = [ + "doc_generator", # name + "https://github.com/username/doc_generator", # project_url + "# Introduction,## Usage", # headings + ] -# mock_path.side_effect = [ -# './doc_generator/', # project_root -# './output/doc_generator/', # output_dir -# None # peft_model_path -# ] + mock_path.return_value.ask.side_effect = [ + "./doc_generator/", # project_root + "./output/doc_generator/", # output_dir + None, # peft_model_path + ] -# mock_select.side_effect = [ -# 'Readme', # mode -# LLMModels.TINYLLAMA_1p1B_CHAT_GGUF.value, # model_name -# 'cpu' # device -# ] + mock_select.return_value.ask.side_effect = [ + "Readme", # mode + LLMModels.TINYLLAMA_1p1B_CHAT_GGUF.value, # model_name + "cpu", # device + ] -# mock_confirm.return_value = False # peft = False + mock_confirm.return_value.ask.return_value = False # peft = False -# with patch('doc_generator.index.index') as mock_index, \ -# patch('doc_generator.query.generate_readme') as mock_generate_readme: + with patch("doc_generator.index.index.index") as mock_index, patch( + "doc_generator.query.query.generate_readme" + ) as mock_generate_readme: -# main() + main() -# # Assert that index and generate_readme were called -# mock_index.assert_called_once() -# mock_generate_readme.assert_called_once() + # Assert that index and generate_readme were called + mock_index.assert_called_once() + mock_generate_readme.assert_called_once() -# def test_main_query_mode(mock_questionary): -# mock_text, mock_path, mock_select, mock_confirm = mock_questionary +def test_main_query_mode(mock_questionary): + mock_text, mock_path, mock_select, mock_confirm = mock_questionary -# # Mock the questionary inputs -# mock_text.side_effect = [ -# 'doc_generator', # name -# 'https://github.com/username/doc_generator' # project_url -# ] + # Mock the questionary inputs + mock_text.return_value.ask.side_effect = [ + "doc_generator", # name + "https://github.com/username/doc_generator", # project_url + ] -# mock_path.side_effect = [ -# './doc_generator/', # project_root -# './output/doc_generator/', # output_dir -# None # peft_model_path -# ] + mock_path.return_value.ask.side_effect = [ + "./doc_generator/", # project_root + "./output/doc_generator/", # output_dir + None, # peft_model_path + ] -# mock_select.side_effect = [ -# 'Query', # mode -# LLMModels.TINYLLAMA_1p1B_CHAT_GGUF.value, # model_name -# 'cpu' # device -# ] + mock_select.return_value.ask.side_effect = [ + "Query", # mode + LLMModels.TINYLLAMA_1p1B_CHAT_GGUF.value, # model_name + "cpu", # device + ] -# mock_confirm.return_value = False # peft = False + mock_confirm.return_value.ask.return_value = False # peft = False -# with patch('doc_generator.index.index') as mock_index, \ -# patch('doc_generator.query.query') as mock_query: + with patch("doc_generator.index.index.index") as mock_index, patch( + "doc_generator.query.query.query" + ) as mock_query: -# main() + main() -# # Assert that index and query were called -# mock_index.assert_called_once() -# mock_query.assert_called_once() - - -# def test_main_with_peft(mock_questionary): -# mock_text, mock_path, mock_select, mock_confirm = mock_questionary - -# # Mock the questionary inputs -# mock_text.side_effect = [ -# 'doc_generator', # name -# 'https://github.com/username/doc_generator', # project_url -# '# Introduction,## Usage' # headings -# ] - -# mock_path.side_effect = [ -# './doc_generator/', # project_root -# './output/doc_generator/', # output_dir -# './output/model/' # peft_model_path -# ] - -# mock_select.side_effect = [ -# 'Readme', # mode -# LLMModels.TINYLLAMA_1p1B_CHAT_GGUF.value, # model_name -# 'cpu' # device -# ] - -# mock_confirm.return_value = True # peft = True - -# with patch('doc_generator.index.index') as mock_index, \ -# patch('doc_generator.query.generate_readme') as mock_generate_readme: - -# main() - -# # Assert that index and generate_readme were called -# mock_index.assert_called_once() -# mock_generate_readme.assert_called_once() - - -# def test_main_all_models(mock_questionary): -# all_models = [model.value for model in LLMModels] - -# for model_name in all_models: -# mock_text, mock_path, mock_select, mock_confirm = mock_questionary - -# # Mock the questionary inputs -# mock_text.side_effect = [ -# 'doc_generator', # name -# 'https://github.com/username/doc_generator', # project_url -# '# Introduction,## Usage' # headings -# ] - -# mock_path.side_effect = [ -# './doc_generator/', # project_root -# './output/doc_generator/', # output_dir -# None # peft_model_path -# ] - -# mock_select.side_effect = [ -# 'Readme', # mode -# model_name, # model_name -# 'cpu' # device -# ] - -# mock_confirm.return_value = False # peft = False - -# with patch('doc_generator.index.index') as mock_index, \ -# patch('doc_generator.query.generate_readme') as mock_generate_readme: - -# main() - -# # Assert that index and generate_readme were called -# mock_index.assert_called_once() -# mock_generate_readme.assert_called_once() - -# # Reset mocks for next iteration -# mock_index.reset_mock() -# mock_generate_readme.reset_mock() -# mock_text.reset_mock() -# mock_path.reset_mock() -# mock_select.reset_mock() -# mock_confirm.reset_mock() - - -# def test_main_invalid_url(mock_questionary): -# mock_text, mock_path, mock_select, mock_confirm = mock_questionary - -# # Mock the questionary inputs, including an invalid URL first -# mock_text.side_effect = [ -# 'doc_generator', # name -# 'invalid_url', # invalid project_url -# 'https://github.com/username/doc_generator', # valid project_url -# '# Introduction,## Usage' # headings -# ] - -# mock_path.side_effect = [ -# './doc_generator/', # project_root -# './output/doc_generator/', # output_dir -# None # peft_model_path -# ] - -# mock_select.side_effect = [ -# 'Readme', # mode -# LLMModels.TINYLLAMA_1p1B_CHAT_GGUF.value, # model_name -# 'cpu' # device -# ] - -# mock_confirm.return_value = False # peft = False - -# with patch('doc_generator.index.index') as mock_index, \ -# patch('doc_generator.query.generate_readme') as mock_generate_readme: - -# main() - -# # Assert that index and generate_readme were called -# mock_index.assert_called_once() -# mock_generate_readme.assert_called_once() - - -# def test_main_device_gpu(mock_questionary): -# mock_text, mock_path, mock_select, mock_confirm = mock_questionary - -# # Mock the questionary inputs -# mock_text.side_effect = [ -# 'doc_generator', # name -# 'https://github.com/username/doc_generator', # project_url -# '# Introduction,## Usage' # headings -# ] - -# mock_path.side_effect = [ -# './doc_generator/', # project_root -# './output/doc_generator/', # output_dir -# None # peft_model_path -# ] - -# mock_select.side_effect = [ -# 'Readme', # mode -# LLMModels.TINYLLAMA_1p1B_CHAT_GGUF.value, # model_name -# 'gpu' # device -# ] - -# mock_confirm.return_value = False # peft = False - -# with patch('doc_generator.index.index') as mock_index, \ -# patch('doc_generator.query.generate_readme') as mock_generate_readme: - -# main() - -# # Assert that index and generate_readme were called -# mock_index.assert_called_once() -# mock_generate_readme.assert_called_once() - - -# def test_main_default_model(mock_questionary): -# mock_text, mock_path, mock_select, mock_confirm = mock_questionary - -# # Mock the questionary inputs with an invalid model name to trigger default case -# mock_text.side_effect = [ -# 'doc_generator', # name -# 'https://github.com/username/doc_generator', # project_url -# '# Introduction,## Usage' # headings -# ] - -# mock_path.side_effect = [ -# './doc_generator/', # project_root -# './output/doc_generator/', # output_dir -# None # peft_model_path -# ] - -# mock_select.side_effect = [ -# 'Readme', # mode -# 'InvalidModelName', # invalid model_name -# 'cpu' # device -# ] - -# mock_confirm.return_value = False # peft = False - -# with patch('doc_generator.index.index') as mock_index, \ -# patch('doc_generator.query.generate_readme') as mock_generate_readme: - -# main() - -# # Assert that index and generate_readme were called -# mock_index.assert_called_once() -# mock_generate_readme.assert_called_once() + # Assert that index and query were called + mock_index.assert_called_once() + mock_query.assert_called_once() diff --git a/tests/test_types.py b/tests/test_types.py index 6fc3e93..d59c94b 100644 --- a/tests/test_types.py +++ b/tests/test_types.py @@ -15,35 +15,61 @@ LLMModelDetails, ) + def test_llm_models(): # Test that all enum members are accessible and correct assert LLMModels.GPT3 == "gpt-3.5-turbo" assert LLMModels.GPT4 == "gpt-4" assert LLMModels.GPT432k == "gpt-4-32k" - assert LLMModels.TINYLLAMA_1p1B_CHAT_GGUF == "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" + assert ( + LLMModels.TINYLLAMA_1p1B_CHAT_GGUF + == "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" + ) assert LLMModels.LLAMA2_7B_CHAT_GPTQ == "TheBloke/Llama-2-7B-Chat-GPTQ" assert LLMModels.LLAMA2_13B_CHAT_GPTQ == "TheBloke/Llama-2-13B-Chat-GPTQ" - assert LLMModels.CODELLAMA_7B_INSTRUCT_GPTQ == "TheBloke/CodeLlama-7B-Instruct-GPTQ" - assert LLMModels.CODELLAMA_13B_INSTRUCT_GPTQ == "TheBloke/CodeLlama-13B-Instruct-GPTQ" + assert ( + LLMModels.CODELLAMA_7B_INSTRUCT_GPTQ + == "TheBloke/CodeLlama-7B-Instruct-GPTQ" + ) + assert ( + LLMModels.CODELLAMA_13B_INSTRUCT_GPTQ + == "TheBloke/CodeLlama-13B-Instruct-GPTQ" + ) assert LLMModels.LLAMA2_7B_CHAT_HF == "meta-llama/Llama-2-7b-chat-hf" assert LLMModels.LLAMA2_13B_CHAT_HF == "meta-llama/Llama-2-13b-chat-hf" - assert LLMModels.CODELLAMA_7B_INSTRUCT_HF == "meta-llama/CodeLlama-7b-Instruct-hf" - assert LLMModels.CODELLAMA_13B_INSTRUCT_HF == "meta-llama/CodeLlama-13b-Instruct-hf" + assert ( + LLMModels.CODELLAMA_7B_INSTRUCT_HF + == "meta-llama/CodeLlama-7b-Instruct-hf" + ) + assert ( + LLMModels.CODELLAMA_13B_INSTRUCT_HF + == "meta-llama/CodeLlama-13b-Instruct-hf" + ) assert LLMModels.GOOGLE_GEMMA_2B_INSTRUCT == "google/gemma-2b-it" assert LLMModels.GOOGLE_GEMMA_7B_INSTRUCT == "google/gemma-7b-it" assert LLMModels.GOOGLE_CODEGEMMA_2B_INSTRUCT == "google/codegemma-2b-it" assert LLMModels.GOOGLE_CODEGEMMA_7B_INSTRUCT == "google/codegemma-7b-it" - assert LLMModels.GOOGLE_GEMMA_2B_INSTRUCT_GGUF == "bartowski/gemma-2-2b-it-GGUF" + assert ( + LLMModels.GOOGLE_GEMMA_2B_INSTRUCT_GGUF + == "bartowski/gemma-2-2b-it-GGUF" + ) + def test_priority(): # Test that all enum members are accessible and correct assert Priority.COST == "cost" assert Priority.PERFORMANCE == "performance" + def test_autodoc_readme_config(): headings_input = "# Introduction, ## Usage, ### Installation" config = AutodocReadmeConfig(headings=headings_input) - assert config.headings == ["# Introduction", "## Usage", "### Installation"] + assert config.headings == [ + "# Introduction", + "## Usage", + "### Installation", + ] + def test_autodoc_user_config(): llms = [LLMModels.GPT3, LLMModels.GPT4] @@ -51,6 +77,7 @@ def test_autodoc_user_config(): assert config.llms == llms assert config.streaming is True + def test_autodoc_repo_config(): config = AutodocRepoConfig( name="MyProject", @@ -89,6 +116,7 @@ def test_autodoc_repo_config(): assert config.peft_model_path is None assert config.device == "cpu" + def test_file_summary(): summary = FileSummary( file_name="test.py", @@ -105,6 +133,7 @@ def test_file_summary(): assert summary.questions == "What does this file do?" assert summary.checksum == "abc123" + def test_process_file_params(): params = ProcessFileParams( file_name="test.py", @@ -123,6 +152,7 @@ def test_process_file_params(): assert params.target_audience == "developers" assert params.link_hosted is False + def test_folder_summary(): folder = FolderSummary( folder_name="src", @@ -143,6 +173,7 @@ def test_folder_summary(): assert folder.questions == "What is in this folder?" assert folder.checksum == "def456" + def test_process_folder_params(): should_ignore_func = MagicMock(return_value=False) params = ProcessFolderParams( @@ -167,6 +198,7 @@ def test_process_folder_params(): assert params.should_ignore("test") is False should_ignore_func.assert_called_with("test") + def test_traverse_file_system_params(): process_file_func = MagicMock() process_folder_func = MagicMock() @@ -193,6 +225,7 @@ def test_traverse_file_system_params(): assert params.target_audience == "developers" assert params.link_hosted is False + def test_llm_model_details(): llm_mock = MagicMock() details = LLMModelDetails( diff --git a/tests/utils/test_HNSWLib.py b/tests/utils/test_HNSWLib.py index 6921e3b..115d5fd 100644 --- a/tests/utils/test_HNSWLib.py +++ b/tests/utils/test_HNSWLib.py @@ -1,5 +1,4 @@ import os -import json import pytest import numpy as np from unittest.mock import MagicMock, patch @@ -7,10 +6,8 @@ from doc_generator.utils.HNSWLib import ( HNSWLib, HNSWLibArgs, - SaveableVectorStore, ) from langchain_community.docstore.in_memory import InMemoryDocstore -from langchain_core.embeddings.embeddings import Embeddings from langchain_core.documents import Document diff --git a/tests/utils/test_file_utils.py b/tests/utils/test_file_utils.py index d38b360..097484c 100644 --- a/tests/utils/test_file_utils.py +++ b/tests/utils/test_file_utils.py @@ -1,4 +1,3 @@ -import pytest from doc_generator.utils.file_utils import ( get_file_name, github_file_url, diff --git a/tests/utils/test_llm_utils.py b/tests/utils/test_llm_utils.py index e8a4de9..9cffa60 100644 --- a/tests/utils/test_llm_utils.py +++ b/tests/utils/test_llm_utils.py @@ -3,7 +3,6 @@ import pytest from unittest.mock import patch, MagicMock -import torch from doc_generator.utils.llm_utils import ( get_gemma_chat_model, @@ -15,9 +14,8 @@ total_index_cost_estimate, get_embeddings, ) -from doc_generator.types import LLMModelDetails, LLMModels -from langchain_openai import ChatOpenAI, OpenAIEmbeddings -from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline +from doc_generator.types import LLMModelDetails +from langchain_openai import ChatOpenAI def test_get_gemma_chat_model_with_peft(): diff --git a/tests/utils/test_traverse_file_system.py b/tests/utils/test_traverse_file_system.py index 9719405..61bdf4b 100644 --- a/tests/utils/test_traverse_file_system.py +++ b/tests/utils/test_traverse_file_system.py @@ -1,4 +1,3 @@ -import pytest from unittest.mock import MagicMock, mock_open, patch from doc_generator.utils.traverse_file_system import traverse_file_system