Skip to content

Commit

Permalink
fix: update parameters in spider components (#3280)
Browse files Browse the repository at this point in the history
* fix: make spider tool work

* upgrade spider-client

* [autofix.ci] apply automated fixes

---------

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
  • Loading branch information
WilliamEspegren and autofix-ci[bot] authored Aug 12, 2024
1 parent 775d659 commit e09f157
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 11 deletions.
2 changes: 1 addition & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from spider.spider import Spider # type: ignore
from spider.spider import Spider

from langflow.base.langchain_utilities.spider_constants import MODES
from langflow.custom import Component
Expand Down Expand Up @@ -59,7 +59,7 @@ class SpiderTool(Component):
advanced=True,
),
BoolInput(
name="use_readability",
name="readability",
display_name="Use Readability",
info="Use readability to pre-process the content for reading.",
advanced=True,
Expand Down Expand Up @@ -89,15 +89,15 @@ class SpiderTool(Component):

def crawl(self) -> list[Data]:
if self.params:
parameters = self.params.data
parameters = self.params["data"]
else:
parameters = {
"limit": self.limit,
"depth": self.depth,
"blacklist": self.blacklist,
"whitelist": self.whitelist,
"use_readability": self.use_readability,
"request_timeout": self.request_timeout,
"limit": self.limit if self.limit else None,
"depth": self.depth if self.depth else None,
"blacklist": self.blacklist if self.blacklist else None,
"whitelist": self.whitelist if self.whitelist else None,
"readability": self.readability,
"request_timeout": self.request_timeout if self.request_timeout else None,
"metadata": self.metadata,
"return_format": "markdown",
}
Expand All @@ -117,5 +117,10 @@ def crawl(self) -> list[Data]:
records = []

for record in result:
records.append(Data(data={"content": record["content"], "url": record["url"]}))
if self.metadata:
records.append(
Data(data={"content": record["content"], "url": record["url"], "metadata": record["metadata"]})
)
else:
records.append(Data(data={"content": record["content"], "url": record["url"]}))
return records

0 comments on commit e09f157

Please sign in to comment.