Skip to content

Commit

Permalink
llamaindex-cli to handle glob patterns correctly
Browse files Browse the repository at this point in the history
This change modifies the llamaindex-cli such that it can
handle the --files argument to properly handle glob patterns.

In order to handle globs like the example given in the issue,
the number of arguments (nargs) value must be set to + in order
to return a list of files or patterns in this case.

Because argparse now returns a list, so restructuring on how
the files are processed needed to be changed along with the
signature of the function.

Fixes run-llama#11798

Signed-off-by: Eric Brown <eric_wade_brown@yahoo.com>
  • Loading branch information
ericwb committed Feb 24, 2025
1 parent 81d4b87 commit 421da93
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 9 deletions.
7 changes: 4 additions & 3 deletions docs/docs/getting_started/starter_tools/rag_cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,15 @@ After that, you can start using the tool:

```shell
$ llamaindex-cli rag -h
usage: llamaindex-cli rag [-h] [-q QUESTION] [-f FILES] [-c] [-v] [--clear] [--create-llama]
usage: llamaindex-cli rag [-h] [-q QUESTION] [-f FILES [FILES ...]] [-c] [-v] [--clear] [--create-llama]

options:
-h, --help show this help message and exit
-q QUESTION, --question QUESTION
The question you want to ask.
-f FILES, --files FILES
The name of the file or directory you want to ask a question about,such as "file.pdf".
-f, --files FILES [FILES ...]
The name of the file(s) or directory you want to ask a question about,such
as "file.pdf". Supports globs like "*.py".
-c, --chat If flag is present, opens a chat REPL.
-v, --verbose Whether to print out verbose information during execution.
--clear Clears out all currently embedded data.
Expand Down
16 changes: 10 additions & 6 deletions llama-index-cli/llama_index/cli/rag/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from argparse import ArgumentParser
from glob import iglob
from pathlib import Path
from typing import Any, Callable, Dict, Optional, Union, cast
from typing import Any, Callable, Dict, List, Optional, Union, cast

from llama_index.core import (
Settings,
Expand Down Expand Up @@ -176,7 +176,7 @@ def chat_engine_from_query_pipeline(

async def handle_cli(
self,
files: Optional[str] = None,
files: Optional[List[str]] = None,
question: Optional[str] = None,
chat: bool = False,
verbose: bool = False,
Expand Down Expand Up @@ -205,8 +205,11 @@ async def handle_cli(
if self.verbose:
print("Saving/Loading from persist_dir: ", self.persist_dir)
if files is not None:
expanded_files = []
for pattern in files:
expanded_files.extend(iglob(pattern, recursive=True))
documents = []
for _file in iglob(files, recursive=True):
for _file in expanded_files:
_file = os.path.abspath(_file)
if os.path.isdir(_file):
reader = SimpleDirectoryReader(
Expand All @@ -228,7 +231,7 @@ async def handle_cli(

# Append the `--files` argument to the history file
with open(f"{self.persist_dir}/{RAG_HISTORY_FILE_NAME}", "a") as f:
f.write(files + "\n")
f.write(str(files) + "\n")

if create_llama:
if shutil.which("npx") is None:
Expand Down Expand Up @@ -337,9 +340,10 @@ def add_parser_args(
"-f",
"--files",
type=str,
nargs="+",
help=(
"The name of the file or directory you want to ask a question about,"
'such as "file.pdf".'
"The name of the file(s) or directory you want to ask a question about,"
'such as "file.pdf". Supports globs like "*.py".'
),
)
parser.add_argument(
Expand Down
1 change: 1 addition & 0 deletions llama-index-cli/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"}
codespell = {extras = ["toml"], version = ">=v2.2.6"}
ipython = "8.10.0"
jupyter = "^1.0.0"
llama-index-vector-stores-chroma = "^0.4.1"
mypy = "0.991"
pre-commit = "3.2.0"
pylint = "2.15.10"
Expand Down
16 changes: 16 additions & 0 deletions llama-index-cli/tests/test_rag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import glob
from unittest import mock


from llama_index.cli import command_line
from llama_index.cli.rag import RagCLI


@mock.patch.object(RagCLI, "handle_cli", return_value="noop")
@mock.patch(
"sys.argv",
["llamaindex-cli", "rag", "--files", *glob.glob("**/*.py", recursive=True)],
)
def test_handle_cli_files(mock_handle_cli) -> None:
command_line.main()
mock_handle_cli.assert_called_once()

0 comments on commit 421da93

Please sign in to comment.