+
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion lib/sycamore/sycamore/transforms/llm_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,17 @@ def _query_text_object(
.render(doc=object)
)
else:
object_name = "ELEMENT" if isinstance(object, Element) else "DOCUMENT"
if objectPrev and objectPrev.text_representation:
prompt = self._prompt + "\n" + objectPrev.text_representation + "\n\n" + object.text_representation
prompt = (
self._prompt
+ "\n"
+ f"{object_name} 1: \n\n"
+ objectPrev.text_representation
+ "\n\n"
+ f"{object_name} 2: \n"
+ object.text_representation
)
else:
prompt = self._prompt + "\n" + object.text_representation
prompt_kwargs = {"prompt": prompt}
Expand Down
16 changes: 5 additions & 11 deletions lib/sycamore/sycamore/transforms/merge_elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,11 @@ def should_merge(self, element1: Element, element2: Element) -> bool:
def merge(self, element1: Element, element2: Element) -> Element:
pass

@abstractmethod
def preprocess_element(self, element: Element) -> Element:
pass
return element

@abstractmethod
def postprocess_element(self, element: Element) -> Element:
pass
return element

@timetrace("mergeElem")
def merge_elements(self, document: Document) -> Document:
Expand Down Expand Up @@ -443,7 +441,7 @@ class TableMerger(ElementMerger):
Respond with only 'true' or 'false' based on your certainty that the second table is a continuation. \
Certainty is determined if either of the two conditions is true."

regex_pattern = r"table \d+"
regex_pattern = r"table \\d+"

merger = TableMerger(llm_prompt = prompt, llm=llm)

Expand Down Expand Up @@ -481,8 +479,10 @@ def merge_elements(self, document: Document) -> Document:
for element in table_elements[1:]:
if self.should_merge(new_table_elements[-1], element):
new_table_elements[-1] = self.merge(new_table_elements[-1], element)
new_table_elements[-1]["properties"]["table_continuation"] = True
else:
new_table_elements.append(element)
new_table_elements[-1]["properties"]["table_continuation"] = False
other_elements.extend(new_table_elements)
document.elements = other_elements
bbox_sort_document(document)
Expand Down Expand Up @@ -607,12 +607,6 @@ def process_llm_query(self, document):
llm_results = llm_query_agent.execute_query(document)
return llm_results

def preprocess_element(self, elem: Element) -> Element:
return elem

def postprocess_element(self, elem: Element) -> Element:
return elem


class Merge(SingleThreadUser, NonGPUUser, Map):
"""
Expand Down
Loading
点击 这是indexloc提供的php浏览器服务,不要输入任何密码和下载