这是indexloc提供的服务,不要输入任何密码
Skip to content

Commit be52ff8

Browse files
committed
Add searx.results module
Based on #1412
1 parent 50ec821 commit be52ff8

File tree

3 files changed

+198
-19
lines changed

3 files changed

+198
-19
lines changed

docs/src/searx.results.models.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
.. _results models:
2+
3+
====================
4+
searx.results.models
5+
====================
6+
7+
.. automodule:: searx.results.models
8+
:members:
9+
:undoc-members:

searx/results.py renamed to searx/results/__init__.py

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@
22
from collections import defaultdict
33
from operator import itemgetter
44
from threading import RLock
5-
from typing import List, NamedTuple, Set
5+
from typing import List, NamedTuple, Set, Optional
66
from urllib.parse import urlparse, unquote
77

88
from searx import logger
99
from searx.engines import engines
1010
from searx.metrics import histogram_observe, counter_add, count_error
11+
from . import models
1112

1213

1314
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
@@ -55,7 +56,7 @@ def compare_urls(url_a, url_b):
5556
return unquote(path_a) == unquote(path_b)
5657

5758

58-
def merge_two_infoboxes(infobox1, infobox2):
59+
def merge_two_infoboxes(infobox1: models.Infobox, infobox2: models.Infobox):
5960
# get engines weights
6061
if hasattr(engines[infobox1['engine']], 'weight'):
6162
weight1 = engines[infobox1['engine']].weight
@@ -91,7 +92,7 @@ def merge_two_infoboxes(infobox1, infobox2):
9192

9293
infobox1['urls'] = urls1
9394

94-
if 'img_src' in infobox2:
95+
if infobox2.get('img_src') is not None:
9596
img1 = infobox1.get('img_src', None)
9697
img2 = infobox2.get('img_src')
9798
if img1 is None:
@@ -127,7 +128,7 @@ def merge_two_infoboxes(infobox1, infobox2):
127128
infobox1['content'] = content2
128129

129130

130-
def result_score(result):
131+
def result_score(result: models.Result):
131132
weight = 1.0
132133

133134
for result_engine in result['engines']:
@@ -173,18 +174,18 @@ class ResultContainer:
173174

174175
def __init__(self):
175176
super().__init__()
176-
self._merged_results = []
177-
self.infoboxes = []
178-
self.suggestions = set()
179-
self.answers = {}
180-
self.corrections = set()
177+
self._merged_results: List[models.MainResult] = []
178+
self.infoboxes: List[models.Infobox] = []
179+
self.suggestions: Set[models.Suggestion] = set()
180+
self.answers: Set[models.Answer] = {}
181+
self.corrections: Set[models.Correction] = set()
181182
self._number_of_results = []
182183
self.engine_data = defaultdict(dict)
183-
self._closed = False
184-
self.paging = False
184+
self._closed: bool = False
185+
self.paging: bool = False
185186
self.unresponsive_engines: Set[UnresponsiveEngine] = set()
186187
self.timings: List[Timing] = []
187-
self.redirect_url = None
188+
self.redirect_url: Optional[str] = None
188189
self.on_result = lambda _: True
189190
self._lock = RLock()
190191

@@ -234,7 +235,7 @@ def extend(self, engine_name, results):
234235
if not self.paging and standard_result_count > 0 and engine_name in engines and engines[engine_name].paging:
235236
self.paging = True
236237

237-
def _merge_infobox(self, infobox):
238+
def _merge_infobox(self, infobox: models.Infobox):
238239
add_infobox = True
239240
infobox_id = infobox.get('id', None)
240241
infobox['engines'] = set([infobox['engine']])
@@ -249,7 +250,7 @@ def _merge_infobox(self, infobox):
249250
if add_infobox:
250251
self.infoboxes.append(infobox)
251252

252-
def _is_valid_url_result(self, result, error_msgs):
253+
def _is_valid_url_result(self, result: models.UrlResult, error_msgs):
253254
if 'url' in result:
254255
if not isinstance(result['url'], str):
255256
logger.debug('result: invalid URL: %s', str(result))
@@ -269,7 +270,7 @@ def _is_valid_url_result(self, result, error_msgs):
269270

270271
return True
271272

272-
def _normalize_url_result(self, result):
273+
def _normalize_url_result(self, result: models.UrlResult):
273274
"""Return True if the result is valid"""
274275
result['parsed_url'] = urlparse(result['url'])
275276

@@ -290,7 +291,7 @@ def _normalize_url_result(self, result):
290291
if result.get('content'):
291292
result['content'] = WHITESPACE_REGEX.sub(' ', result['content'])
292293

293-
def __merge_url_result(self, result, position):
294+
def __merge_url_result(self, result: models.UrlResult, position: int):
294295
result['engines'] = set([result['engine']])
295296
with self._lock:
296297
duplicated = self.__find_duplicated_http_result(result)
@@ -302,7 +303,7 @@ def __merge_url_result(self, result, position):
302303
result['positions'] = [position]
303304
self._merged_results.append(result)
304305

305-
def __find_duplicated_http_result(self, result):
306+
def __find_duplicated_http_result(self, result: models.UrlResult):
306307
result_template = result.get('template')
307308
for merged_result in self._merged_results:
308309
if 'parsed_url' not in merged_result:
@@ -320,7 +321,7 @@ def __find_duplicated_http_result(self, result):
320321
return merged_result
321322
return None
322323

323-
def __merge_duplicated_http_result(self, duplicated, result, position):
324+
def __merge_duplicated_http_result(self, duplicated: models.UrlResult, result: models.UrlResult, position: int):
324325
# using content with more text
325326
if result_content_len(result.get('content', '')) > result_content_len(duplicated.get('content', '')):
326327
duplicated['content'] = result['content']
@@ -341,7 +342,7 @@ def __merge_duplicated_http_result(self, duplicated, result, position):
341342
duplicated['url'] = result['parsed_url'].geturl()
342343
duplicated['parsed_url'] = result['parsed_url']
343344

344-
def __merge_result_no_url(self, result, position):
345+
def __merge_result_no_url(self, result: models.KeyValueResult, position: int):
345346
result['engines'] = set([result['engine']])
346347
result['positions'] = [position]
347348
with self._lock:

searx/results/models.py

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
from typing import List, Dict, Set
2+
from typing_extensions import TypedDict, NotRequired, Required
3+
from urllib.parse import ParseResult
4+
from datetime import datetime
5+
6+
7+
__all__ = [
8+
'Result',
9+
'UrlResult',
10+
'Answer',
11+
'Correction',
12+
'Suggestion',
13+
'Infobox',
14+
'InfoboxUrl',
15+
'InfoboxImage',
16+
'InfoboxAttribute',
17+
'InfoboxRelatedTopic',
18+
]
19+
20+
21+
class Result(TypedDict):
22+
"""A result from any type"""
23+
24+
engine: str
25+
"""Internal field. DO NOT USE"""
26+
27+
engines: Set[str]
28+
"""Internal field. DO NOT USE"""
29+
30+
positions: List[int]
31+
"""Internal field. DO NOT USE"""
32+
33+
34+
class MainResult(Result):
35+
"""Result that is going to be displayed as a "main" result"""
36+
37+
template: NotRequired[str]
38+
"""Template to display the result. The default value is "default.html".
39+
see searx/templates/simple/result_templates"""
40+
41+
42+
class UrlResult(MainResult):
43+
"""Typical main result: an url, a title and a short description"""
44+
45+
title: str
46+
"""Title of the result"""
47+
48+
url: str
49+
"""URL of the result"""
50+
51+
parsed_url: NotRequired[ParseResult]
52+
"""Engines don't have to set this value: it is automatically initialized from the url field.
53+
However, plugins have to manually update this field when they change the url field"""
54+
55+
content: NotRequired[str]
56+
iframe_src: NotRequired[str]
57+
audio_src: NotRequired[str]
58+
img_src: NotRequired[str]
59+
thumbnail: NotRequired[str]
60+
publishedDate: NotRequired[datetime]
61+
length: NotRequired[str]
62+
author: NotRequired[str]
63+
metadata: NotRequired[Dict]
64+
"""Dictionnary to allow paging"""
65+
66+
67+
class KeyValueResult(MainResult):
68+
"""a set of key value to display, useful for the DB engines.
69+
70+
The template field must be "key-value.html"
71+
"""
72+
73+
74+
class Answer(Result):
75+
"""Answer item in the result list. The answer result item is used in
76+
the :origin:`results.html <searx/templates/simple/results.html>` template.
77+
A answer item is a dictionary type with dedicated keys and values."""
78+
79+
answer: Required[str]
80+
"""The answer string append by the engine."""
81+
82+
url: NotRequired[str]
83+
"""A link that is related to the answer (e.g. the origin of the answer)."""
84+
85+
86+
class Correction(Result):
87+
"""Correction item in the result list. The correction result item is used in
88+
the :origin:`results.html <searx/templates/simple/results.html>` template.
89+
A correction item is a dictionary type with dedicated keys and values."""
90+
91+
url: str
92+
"""The SearXNG search URL for the correction term."""
93+
94+
title: str
95+
"""The 'correction' string append by the engine."""
96+
97+
98+
class Suggestion(Result):
99+
"""Suggestion item in the result list. The suggestion result item is used in
100+
the :origin:`infobox.html <searx/templates/simple/results.html>` template.
101+
A sugestion item is a dictionary type with dedicated keys and values."""
102+
103+
suggestion: Required[str]
104+
"""The SearXNG search URL for the suggestion term."""
105+
106+
107+
class InfoboxUrl(TypedDict):
108+
"""A list of dictionaries with links shown in the infobox.
109+
A **url** item in the ``infobox.urls`` list is a dicticonary
110+
"""
111+
112+
title: str
113+
url: str
114+
entity: str
115+
"""set by some engines but unused"""
116+
117+
official: bool
118+
"""set by some engines but unused (oscar)"""
119+
120+
121+
class InfoboxImage(TypedDict):
122+
src: str
123+
alt: str
124+
125+
126+
class InfoboxAttribute(TypedDict):
127+
"""A **attribute** item in the ``infobox.attributes`` list is a dictionary"""
128+
129+
label: str
130+
value: str
131+
image: InfoboxImage
132+
entity: str
133+
"""set by some engines but unused"""
134+
135+
136+
class InfoboxRelatedTopic(TypedDict):
137+
"""A **topic** item in the ``infobox.relatedTopics`` list is a dictionary"""
138+
139+
suggestion: str
140+
name: str
141+
"""set by some engines but unused"""
142+
143+
144+
class Infobox(Result):
145+
"""Infobox item in the result list. The infobox result item is used in the
146+
:origin:`infobox.html <searx/templates/simple/infobox.html>` template.
147+
A infobox item is a dictionary type with dedicated keys and values.
148+
"""
149+
150+
infobox: Required[str]
151+
"""Name of the infobox (mandatory)."""
152+
153+
id: str
154+
"""URL of the infobox. Will be used to merge infoboxes."""
155+
156+
content: str
157+
"""Content of the infobox (the description)"""
158+
159+
img_src: str
160+
"""URL of the image to show in the infobox"""
161+
162+
urls: List[InfoboxUrl]
163+
"""A list of dictionaries with links shown in the infobox."""
164+
165+
attributes: List[InfoboxAttribute]
166+
"""A list of dictionaries with attributes shown in the infobox"""
167+
168+
relatedTopics: List[InfoboxRelatedTopic]
169+
"""A list of dictionaries with related topics shown in the infobox"""

0 commit comments

Comments
 (0)