+
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion lib/sycamore/sycamore/connectors/aryn/ArynReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,16 @@ def _to_doc(self, doc: dict[str, Any]) -> dict[str, Any]:
client = self.Client.from_client_params(self._client_params)
aryn_client = client._client

doc_id = doc["doc_id"]
doc = aryn_client.get_doc(self._query_params.docset_id, doc["doc_id"])
elements = doc.get("elements", [])
document = Document(**doc)
document.data["elements"] = [create_element(**element) for element in elements]
document.doc_id = doc_id
document.data["elements"] = []
for json_element in elements:
element = create_element(**json_element)
element.data["doc_id"] = json_element["id"]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there ever no "id"?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No.

document.data["elements"].append(element)
return {"doc": Document.serialize(document)}

def execute(self, **kwargs) -> "Dataset":
Expand Down
5 changes: 3 additions & 2 deletions lib/sycamore/sycamore/transforms/extract_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def __init__(
llm: LLM,
schema_name: Optional[str] = None,
schema: Optional[Union[dict[str, str], Schema]] = None,
num_of_elements: int = 10,
num_of_elements: Optional[int] = None,
prompt_formatter: Callable[[list[Element]], str] = element_list_formatter,
):
super().__init__()
Expand Down Expand Up @@ -221,7 +221,8 @@ def as_llm_map(self, child: Optional[Node], **kwargs) -> Node:

if self._schema_name is not None:
prompt = prompt.set(entity=self._schema_name)
prompt = prompt.set(num_elements=self._num_of_elements)
if self._num_of_elements is not None:
prompt = prompt.set(num_elements=self._num_of_elements)
if self._prompt_formatter is not element_list_formatter:
prompt = prompt.set(prompt_formatter=self._prompt_formatter)

Expand Down
4 changes: 2 additions & 2 deletions lib/sycamore/sycamore/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -858,9 +858,9 @@ def aryn(
raise e
client_params = ArynWriterClientParams(aryn_url, aryn_api_key)
target_params = ArynWriterTargetParams(docset_id)
ds = ArynWriter(self.plan, client_params=client_params, target_params=target_params, **kwargs)
writer: Node = ArynWriter(self.plan, client_params=client_params, target_params=target_params, **kwargs)

return self._maybe_execute(ds, True)
return self._maybe_execute(writer, True)

def _maybe_execute(
self, node: Node, execute: bool, client: Optional[BaseDBWriter.Client] = None
Expand Down
Loading
点击 这是indexloc提供的php浏览器服务,不要输入任何密码和下载