From da27f9c2e1a548f911244793251228d31e60c6e5 Mon Sep 17 00:00:00 2001 From: Minoru Osuka Date: Sat, 8 Sep 2018 16:54:58 +0900 Subject: [PATCH] Change CLI flags --- README.md | 595 +++- Versionfile | 2 +- cmd/blast/bulk.go | 6 +- cmd/blast/commands.go | 29 +- cmd/blast/delete.go | 5 +- cmd/blast/flags.go | 51 +- cmd/blast/get.go | 5 +- cmd/blast/join.go | 6 +- cmd/blast/leave.go | 6 +- cmd/blast/peers.go | 5 +- cmd/blast/put.go | 5 +- cmd/blast/search.go | 5 +- cmd/blast/snapshot.go | 5 +- cmd/blast/start.go | 136 +- docker-compose.yml | 12 +- etc/index_mapping.json | 2984 +++++++++++++++++ example/bulk_delete_request.json | 228 +- example/bulk_put_delete_request.json | 661 +++- example/bulk_put_request.json | 450 ++- example/doc_arwiki_1.json | 7 + example/doc_bgwiki_1.json | 7 + example/doc_cawiki_1.json | 7 + example/doc_cswiki_1.json | 7 + example/doc_dawiki_1.json | 7 + example/doc_dewiki_1.json | 7 + example/doc_elwiki_1.json | 7 + .../{enwiki_doc1.json => doc_enwiki_1.json} | 2 +- example/doc_eswiki_1.json | 7 + example/doc_fawiki_1.json | 7 + example/doc_fiwiki_1.json | 7 + example/doc_frwiki_1.json | 7 + example/doc_gawiki_1.json | 7 + example/doc_glwiki_1.json | 7 + example/doc_guwiki_1.json | 7 + example/doc_hiwiki_1.json | 7 + example/doc_huwiki_1.json | 7 + example/doc_hywiki_1.json | 7 + example/doc_idwiki_1.json | 7 + example/doc_itwiki_1.json | 7 + .../{jawiki_doc1.json => doc_jawiki_1.json} | 2 +- example/doc_knwiki_1.json | 7 + example/doc_kowiki_1.json | 7 + example/doc_mlwiki_1.json | 7 + example/doc_nlwiki_1.json | 7 + example/doc_nowiki_1.json | 7 + example/doc_pswiki_1.json | 7 + example/doc_ptwiki_1.json | 7 + example/doc_rowiki_1.json | 7 + example/doc_ruwiki_1.json | 7 + example/doc_svwiki_1.json | 7 + example/doc_tawiki_1.json | 7 + example/doc_tewiki_1.json | 7 + example/doc_thwiki_1.json | 7 + example/doc_trwiki_1.json | 7 + example/doc_zhwiki_1.json | 7 + example/ptwiki_doc1.json | 7 - ...earch_request.json => search_request.json} | 0 example/wikipedia_index_mapping.json | 257 -- grpc/client/client.go | 17 +- grpc/server/server.go | 22 +- http/handler/get.go | 2 +- http/server/server.go | 29 +- index/bleve/index.go | 77 +- raft/raft.go | 29 +- service/service.go | 25 +- store/boltdb/store.go | 15 +- store/boltdb/store_test.go | 16 +- 67 files changed, 5229 insertions(+), 705 deletions(-) create mode 100644 etc/index_mapping.json create mode 100644 example/doc_arwiki_1.json create mode 100644 example/doc_bgwiki_1.json create mode 100644 example/doc_cawiki_1.json create mode 100644 example/doc_cswiki_1.json create mode 100644 example/doc_dawiki_1.json create mode 100644 example/doc_dewiki_1.json create mode 100644 example/doc_elwiki_1.json rename example/{enwiki_doc1.json => doc_enwiki_1.json} (78%) create mode 100644 example/doc_eswiki_1.json create mode 100644 example/doc_fawiki_1.json create mode 100644 example/doc_fiwiki_1.json create mode 100644 example/doc_frwiki_1.json create mode 100644 example/doc_gawiki_1.json create mode 100644 example/doc_glwiki_1.json create mode 100644 example/doc_guwiki_1.json create mode 100644 example/doc_hiwiki_1.json create mode 100644 example/doc_huwiki_1.json create mode 100644 example/doc_hywiki_1.json create mode 100644 example/doc_idwiki_1.json create mode 100644 example/doc_itwiki_1.json rename example/{jawiki_doc1.json => doc_jawiki_1.json} (54%) create mode 100644 example/doc_knwiki_1.json create mode 100644 example/doc_kowiki_1.json create mode 100644 example/doc_mlwiki_1.json create mode 100644 example/doc_nlwiki_1.json create mode 100644 example/doc_nowiki_1.json create mode 100644 example/doc_pswiki_1.json create mode 100644 example/doc_ptwiki_1.json create mode 100644 example/doc_rowiki_1.json create mode 100644 example/doc_ruwiki_1.json create mode 100644 example/doc_svwiki_1.json create mode 100644 example/doc_tawiki_1.json create mode 100644 example/doc_tewiki_1.json create mode 100644 example/doc_thwiki_1.json create mode 100644 example/doc_trwiki_1.json create mode 100644 example/doc_zhwiki_1.json delete mode 100644 example/ptwiki_doc1.json rename example/{wikipedia_search_request.json => search_request.json} (100%) delete mode 100644 example/wikipedia_index_mapping.json diff --git a/README.md b/README.md index 1fe2e89..3eebc4d 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ Blast ====== -The Blast is a full text search and indexing server written in [Go](https://golang.org) built on top of the [Bleve](http://www.blevesearch.com), [Bolt](https://github.com/boltdb/bolt) and [Raft](https://github.com/hashicorp/raft). -Blast server provides functions through [gRPC](http://www.grpc.io) ([HTTP/2](https://en.wikipedia.org/wiki/HTTP/2) + [Protocol Buffers](https://developers.google.com/protocol-buffers/)) or traditional [RESTful](https://en.wikipedia.org/wiki/Representational_state_transfer) API ([HTTP/1.1](https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol) + [JSON](http://www.json.org)), and uses [Raft](https://en.wikipedia.org/wiki/Raft_(computer_science)) to achieve consensus across all the instances of the nodes, ensuring that every change made to the system is made to a quorum of nodes, or none at all. +Blast is a full text search and indexing server written in [Go](https://golang.org) built on top of the [Bleve](http://www.blevesearch.com). It provides functions through [gRPC](http://www.grpc.io) ([HTTP/2](https://en.wikipedia.org/wiki/HTTP/2) + [Protocol Buffers](https://developers.google.com/protocol-buffers/)) or traditional [RESTful](https://en.wikipedia.org/wiki/Representational_state_transfer) API ([HTTP/1.1](https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol) + [JSON](http://www.json.org)). +Blast uses [Raft](https://en.wikipedia.org/wiki/Raft_(computer_science)) consensus algorithm to achieve consensus across all the instances of the nodes, ensuring that every change made to the system is made to a quorum of nodes, or none at all. Blast makes it easy for programmers to develop search applications with advanced features. ## Features @@ -149,11 +149,11 @@ Running a Blast node is easy. Start Blast node like so: $ ./bin/blast start --bind-addr=localhost:10000 \ --grpc-addr=localhost:10001 \ --http-addr=localhost:10002 \ - --node-id=node1 \ - --raft-dir=/tmp/blast/noade1/raft \ + --raft-node-id=node1 \ + --raft-dir=/tmp/blast/node1/raft \ --store-dir=/tmp/blast/node1/store \ --index-dir=/tmp/blast/node1/index \ - --index-mapping=./example/wikipedia_index_mapping.json + --index-mapping-file=./etc/index_mapping.json ``` ## Using Blast CLI @@ -165,7 +165,7 @@ You can now put, get, search and delete the document(s) via CLI. Putting a document is as following: ```bash -$ cat ./example/enwiki_doc1.json | xargs -0 ./bin/blast put --grpc-addr=localhost:10001 --pretty-print enwiki_doc1 +$ cat ./example/doc_enwiki_1.json | xargs -0 ./bin/blast put --grpc-addr=localhost:10001 --pretty-print enwiki_1 ``` You can see the result in JSON format. The result of the above command is: @@ -181,18 +181,18 @@ You can see the result in JSON format. The result of the above command is: Getting a document is as following: ```bash -$ ./bin/blast get --grpc-addr=localhost:10001 --pretty-print enwiki_doc1 +$ ./bin/blast get --grpc-addr=localhost:10001 --pretty-print enwiki_1 ``` You can see the result in JSON format. The result of the above command is: ```json { - "id": "enwiki_doc1", + "id": "enwiki_1", "fields": { "_type": "enwiki", "contributor": "unknown", - "text_en": "A search engine is an information retrieval system designed to help find information stored on a computer system. The search results are usually presented in a list and are commonly called hits. Search engines help to minimize the time required to find information and the amount of information which must be consulted, akin to other techniques for managing information overload.", + "text_en": "A search engine is an information retrieval system designed to help find information stored on a computer system. The search results are usually presented in a list and are commonly called hits. Search engines help to minimize the time required to find information and the amount of information which must be consulted, akin to other techniques for managing information overload. The most public, visible form of a search engine is a Web search engine which searches for information on the World Wide Web.", "timestamp": "2018-07-04T05:41:00Z", "title_en": "Search engine (computing)" }, @@ -205,7 +205,7 @@ You can see the result in JSON format. The result of the above command is: Deleting a document is as following: ``` -$ ./bin/blast delete --grpc-addr=localhost:10001 --pretty-print enwiki_doc1 +$ ./bin/blast delete --grpc-addr=localhost:10001 --pretty-print enwiki_1 ``` You can see the result in JSON format. The result of the above command is: @@ -228,7 +228,7 @@ You can see the result in JSON format. The result of the above command is: ```json { - "put_count": 3, + "put_count": 36, "success": true } ``` @@ -238,7 +238,7 @@ You can see the result in JSON format. The result of the above command is: Searching documents is as like following: ```bash -$ cat ./example/wikipedia_search_request.json | xargs -0 ./bin/blast search --grpc-addr=localhost:10001 --pretty-print +$ cat ./example/search_request.json | xargs -0 ./bin/blast search --grpc-addr=localhost:10001 --pretty-print ``` You can see the result in JSON format. The result of the above command is: @@ -253,16 +253,16 @@ You can see the result in JSON format. The result of the above command is: "other": 0, "terms": [ { - "count": 3, + "count": 12, "term": "unknown" } ], - "total": 3 + "total": 12 }, "Timestamp range": { "date_ranges": [ { - "count": 3, + "count": 12, "end": "2020-12-31T23:59:59Z", "name": "2011 - 2020", "start": "2011-01-01T00:00:00Z" @@ -271,7 +271,7 @@ You can see the result in JSON format. The result of the above command is: "field": "timestamp", "missing": 0, "other": 0, - "total": 3 + "total": 12 } }, "hits": [ @@ -279,12 +279,12 @@ You can see the result in JSON format. The result of the above command is: "fields": { "_type": "enwiki", "contributor": "unknown", - "text_en": "A search engine is an information retrieval system designed to help find information stored on a computer system. The search results are usually presented in a list and are commonly called hits. Search engines help to minimize the time required to find information and the amount of information which must be consulted, akin to other techniques for managing information overload.", + "text_en": "A search engine is an information retrieval system designed to help find information stored on a computer system. The search results are usually presented in a list and are commonly called hits. Search engines help to minimize the time required to find information and the amount of information which must be consulted, akin to other techniques for managing information overload. The most public, visible form of a search engine is a Web search engine which searches for information on the World Wide Web.", "timestamp": "2018-07-04T05:41:00Z", "title_en": "Search engine (computing)" }, - "id": "enwiki_doc1", - "index": "/tmp/blast/node0/index", + "id": "enwiki_1", + "index": "/tmp/blast/node1/index", "locations": { "text_en": { "search": [ @@ -305,6 +305,24 @@ You can see the result in JSON format. The result of the above command is: "end": 201, "pos": 33, "start": 195 + }, + { + "array_positions": null, + "end": 421, + "pos": 68, + "start": 415 + }, + { + "array_positions": null, + "end": 444, + "pos": 73, + "start": 438 + }, + { + "array_positions": null, + "end": 466, + "pos": 76, + "start": 458 } ] }, @@ -319,7 +337,196 @@ You can see the result in JSON format. The result of the above command is: ] } }, - "score": 0.18706386191354732, + "score": 0.633816551223398, + "sort": [ + "_score" + ] + }, + { + "fields": { + "_type": "arwiki", + "contributor": "unknown", + "text_ar": "محرك البحث (بالإنجليزية: Search engine) هو نظام لإسترجاع المعلومات صمم للمساعدة على البحث عن المعلومات المخزنة على أي نظام حاسوبي. تعرض نتائج البحث عادة على شكل قائمة لأماكن تواجد المعلومات ومرتبة وفق معايير معينة. تسمح محركات البحث باختصار مدة البحث والتغلب على مشكلة أحجام البيانات المتصاعدة (إغراق معلوماتي).", + "timestamp": "2018-03-25T18:04:00Z", + "title_ar": "محرك بحث" + }, + "id": "arwiki_1", + "index": "/tmp/blast/node1/index", + "locations": { + "text_ar": { + "search": [ + { + "array_positions": null, + "end": 51, + "pos": 4, + "start": 45 + } + ] + } + }, + "score": 0.2584513642405507, + "sort": [ + "_score" + ] + }, + { + "fields": { + "_type": "hiwiki", + "contributor": "unknown", + "text_hi": "ऐसे कम्प्यूटर प्रोग्राम खोजी इंजन (search engine) कहलाते हैं जो किसी कम्प्यूटर सिस्टम पर भण्डारित सूचना में से वांछित सूचना को ढूढ निकालते हैं। ये इंजन प्राप्त परिणामों को प्रायः एक सूची के रूप में प्रस्तुत करते हैं जिससे वांछित सूचना की प्रकृति और उसकी स्थिति का पता चलता है। खोजी इंजन किसी सूचना तक अपेक्षाकृत बहुत कम समय में पहुँचने में हमारी सहायता करते हैं। वे 'सूचना ओवरलोड' से भी हमे बचाते हैं। खोजी इंजन का सबसे प्रचलित रूप 'वेब खोजी इंजन' है जो वर्ल्ड वाइड वेब पर सूचना खोजने के लिये प्रयुक्त होता है।", + "timestamp": "2017-10-19T20:09:00Z", + "title_hi": "खोज इंजन" + }, + "id": "hiwiki_1", + "index": "/tmp/blast/node1/index", + "locations": { + "text_hi": { + "search": [ + { + "array_positions": null, + "end": 99, + "pos": 6, + "start": 93 + } + ] + } + }, + "score": 0.2315458066284217, + "sort": [ + "_score" + ] + }, + { + "fields": { + "_type": "itwiki", + "contributor": "unknown", + "text_it": "Nell'ambito delle tecnologie di Internet, un motore di ricerca (in inglese search engine) è un sistema automatico che, su richiesta, analizza un insieme di dati (spesso da esso stesso raccolti) e restituisce un indice dei contenuti disponibili[1] classificandoli in modo automatico in base a formule statistico-matematiche che ne indichino il grado di rilevanza data una determinata chiave di ricerca. Uno dei campi in cui i motori di ricerca trovano maggiore utilizzo è quello dell'information retrieval e nel web. I motori di ricerca più utilizzati nel 2017 sono stati: Google, Bing, Baidu, Qwant, Yandex, Ecosia, DuckDuckGo.", + "timestamp": "2018-07-16T12:20:00Z", + "title_it": "Motore di ricerca" + }, + "id": "itwiki_1", + "index": "/tmp/blast/node1/index", + "locations": { + "text_it": { + "search": [ + { + "array_positions": null, + "end": 81, + "pos": 12, + "start": 75 + } + ] + } + }, + "score": 0.22855799635019447, + "sort": [ + "_score" + ] + }, + { + "fields": { + "_type": "thwiki", + "contributor": "unknown", + "text_th": "เสิร์ชเอนจิน (search engine) หรือ โปรแกรมค้นหา คือ โปรแกรมที่ช่วยในการสืบค้นหาข้อมูล โดยเฉพาะข้อมูลบนอินเทอร์เน็ต โดยครอบคลุมทั้งข้อความ รูปภาพ ภาพเคลื่อนไหว เพลง ซอฟต์แวร์ แผนที่ ข้อมูลบุคคล กลุ่มข่าว และอื่น ๆ ซึ่งแตกต่างกันไปแล้วแต่โปรแกรมหรือผู้ให้บริการแต่ละราย. เสิร์ชเอนจินส่วนใหญ่จะค้นหาข้อมูลจากคำสำคัญ (คีย์เวิร์ด) ที่ผู้ใช้ป้อนเข้าไป จากนั้นก็จะแสดงรายการผลลัพธ์ที่มันคิดว่าผู้ใช้น่าจะต้องการขึ้นมา ในปัจจุบัน เสิร์ชเอนจินบางตัว เช่น กูเกิล จะบันทึกประวัติการค้นหาและการเลือกผลลัพธ์ของผู้ใช้ไว้ด้วย และจะนำประวัติที่บันทึกไว้นั้น มาช่วยกรองผลลัพธ์ในการค้นหาครั้งต่อ ๆ ไป", + "timestamp": "2016-06-18T11:06:00Z", + "title_th": "เสิร์ชเอนจิน" + }, + "id": "thwiki_1", + "index": "/tmp/blast/node1/index", + "locations": { + "text_th": { + "search": [ + { + "array_positions": null, + "end": 44, + "pos": 4, + "start": 38 + } + ] + } + }, + "score": 0.2018569611073604, + "sort": [ + "_score" + ] + }, + { + "fields": { + "_type": "zhwiki", + "contributor": "unknown", + "text_zh": "搜索引擎(英语:search engine)是一种信息检索系统,旨在协助搜索存储在计算机系统中的信息。搜索结果一般被称为“hits”,通常会以表单的形式列出。网络搜索引擎是最常见、公开的一种搜索引擎,其功能为搜索万维网上储存的信息.", + "timestamp": "2018-08-27T05:47:00Z", + "title_zh": "搜索引擎" + }, + "id": "zhwiki_1", + "index": "/tmp/blast/node1/index", + "locations": { + "text_zh": { + "search": [ + { + "array_positions": null, + "end": 30, + "pos": 5, + "start": 24 + } + ] + } + }, + "score": 0.19986816567601795, + "sort": [ + "_score" + ] + }, + { + "fields": { + "_type": "bgwiki", + "contributor": "unknown", + "text_bg": "Търсачка или търсеща машина (на английски: Web search engine) е специализиран софтуер за извличане на информация, съхранена в компютърна система или мрежа. Това може да е персонален компютър, Интернет, корпоративна мрежа и т.н. Без допълнителни уточнения, най-често под търсачка се разбира уеб(-)търсачка, която търси в Интернет. Други видове търсачки са корпоративните търсачки, които търсят в интранет мрежите, личните търсачки – за индивидуалните компютри и мобилните търсачки. В търсачката потребителят (търсещият) прави запитване за съдържание, отговарящо на определен критерий (обикновено такъв, който съдържа определени думи и фрази). В резултат се получават списък от точки, които отговарят, пълно или частично, на този критерий. Търсачките обикновено използват редовно подновявани индекси, за да оперират бързо и ефикасно. Някои търсачки също търсят в информацията, която е на разположение в нюзгрупите и други големи бази данни. За разлика от Уеб директориите, които се поддържат от хора редактори, търсачките оперират алгоритмично. Повечето Интернет търсачки са притежавани от различни корпорации.", + "timestamp": "2018-07-11T11:03:00Z", + "title_bg": "Търсачка" + }, + "id": "bgwiki_1", + "index": "/tmp/blast/node1/index", + "locations": { + "text_bg": { + "search": [ + { + "array_positions": null, + "end": 88, + "pos": 8, + "start": 82 + } + ] + } + }, + "score": 0.18275270089950202, + "sort": [ + "_score" + ] + }, + { + "fields": { + "_type": "idwiki", + "contributor": "unknown", + "text_id": "Mesin pencari web atau mesin telusur web (bahasa Inggris: web search engine) adalah program komputer yang dirancang untuk melakukan pencarian atas berkas-berkas yang tersimpan dalam layanan www, ftp, publikasi milis, ataupun news group dalam sebuah ataupun sejumlah komputer peladen dalam suatu jaringan. Mesin pencari merupakan perangkat penelusur informasi dari dokumen-dokumen yang tersedia. Hasil pencarian umumnya ditampilkan dalam bentuk daftar yang seringkali diurutkan menurut tingkat akurasi ataupun rasio pengunjung atas suatu berkas yang disebut sebagai hits. Informasi yang menjadi target pencarian bisa terdapat dalam berbagai macam jenis berkas seperti halaman situs web, gambar, ataupun jenis-jenis berkas lainnya. Beberapa mesin pencari juga diketahui melakukan pengumpulan informasi atas data yang tersimpan dalam suatu basis data ataupun direktori web. Sebagian besar mesin pencari dijalankan oleh perusahaan swasta yang menggunakan algoritme kepemilikan dan basis data tertutup, di antaranya yang paling populer adalah safari Google (MSN Search dan Yahoo!). Telah ada beberapa upaya menciptakan mesin pencari dengan sumber terbuka (open source), contohnya adalah Htdig, Nutch, Egothor dan OpenFTS.", + "timestamp": "2017-11-20T17:47:00Z", + "title_id": "Mesin pencari web" + }, + "id": "idwiki_1", + "index": "/tmp/blast/node1/index", + "locations": { + "text_id": { + "search": [ + { + "array_positions": null, + "end": 68, + "pos": 11, + "start": 62 + } + ] + } + }, + "score": 0.17060026115019133, "sort": [ "_score" ] @@ -328,12 +535,12 @@ You can see the result in JSON format. The result of the above command is: "fields": { "_type": "ptwiki", "contributor": "unknown", - "text_pt": "Motor de pesquisa (português europeu) ou ferramenta de busca (português brasileiro) ou buscador (em inglês: search engine) é um programa desenhado para procurar palavras-chave fornecidas pelo utilizador em documentos e bases de dados. No contexto da internet, um motor de pesquisa permite procurar palavras-chave em documentos alojados na world wide web, como aqueles que se encontram armazenados em websites.", - "timestamp": "2018-07-04T05:41:00Z", + "text_pt": "Motor de pesquisa (português europeu) ou ferramenta de busca (português brasileiro) ou buscador (em inglês: search engine) é um programa desenhado para procurar palavras-chave fornecidas pelo utilizador em documentos e bases de dados. No contexto da internet, um motor de pesquisa permite procurar palavras-chave em documentos alojados na world wide web, como aqueles que se encontram armazenados em websites. Os motores de busca surgiram logo após o aparecimento da Internet, com a intenção de prestar um serviço extremamente importante: a busca de qualquer informação na rede, apresentando os resultados de uma forma organizada, e também com a proposta de fazer isto de uma maneira rápida e eficiente. A partir deste preceito básico, diversas empresas se desenvolveram, chegando algumas a valer milhões de dólares. Entre as maiores empresas encontram-se o Google, o Yahoo, o Bing, o Lycos, o Cadê e, mais recentemente, a Amazon.com com o seu mecanismo de busca A9 porém inativo. Os buscadores se mostraram imprescindíveis para o fluxo de acesso e a conquista novos visitantes. Antes do advento da Web, havia sistemas para outros protocolos ou usos, como o Archie para sites FTP anônimos e o Veronica para o Gopher (protocolo de redes de computadores que foi desenhado para indexar repositórios de documentos na Internet, baseado-se em menus).", + "timestamp": "2017-11-09T14:38:00Z", "title_pt": "Motor de busca" }, - "id": "ptwiki_doc1", - "index": "/tmp/blast/node0/index", + "id": "ptwiki_1", + "index": "/tmp/blast/node1/index", "locations": { "text_pt": { "search": [ @@ -346,40 +553,40 @@ You can see the result in JSON format. The result of the above command is: ] } }, - "score": 0.09273589609475594, + "score": 0.1688012644026126, "sort": [ "_score" ] }, { "fields": { - "_type": "jawiki", + "_type": "frwiki", "contributor": "unknown", - "text_ja": "検索エンジン(けんさくエンジン、英語: search engine)は、狭義にはインターネットに存在する情報(ウェブページ、ウェブサイト、画像フみを提供していたウェブサイトそのものを検索エンジンと呼んだが、現在では様々なサービスが加わったポータルサイト化が進んだため、検索をサービスの一つとしてトを単に検索サイトと呼ぶことはなくなっている。広義には、インターネットに限定せず情報を検索するシステム全般を含む。", - "timestamp": "2018-05-30T00:52:00Z", - "title_ja": "検索エンジン" + "text_fr": "Un moteur de recherche est une application web permettant de trouver des ressources à partir d'une requête sous forme de mots. Les ressources peuvent être des pages web, des articles de forums Usenet, des images, des vidéos, des fichiers, etc. Certains sites web offrent un moteur de recherche comme principale fonctionnalité ; on appelle alors « moteur de recherche » le site lui-même. Ce sont des instruments de recherche sur le web sans intervention humaine, ce qui les distingue des annuaires. Ils sont basés sur des « robots », encore appelés « bots », « spiders «, « crawlers » ou « agents », qui parcourent les sites à intervalles réguliers et de façon automatique pour découvrir de nouvelles adresses (URL). Ils suivent les liens hypertextes qui relient les pages les unes aux autres, les uns après les autres. Chaque page identifiée est alors indexée dans une base de données, accessible ensuite par les internautes à partir de mots-clés. C'est par abus de langage qu'on appelle également « moteurs de recherche » des sites web proposant des annuaires de sites web : dans ce cas, ce sont des instruments de recherche élaborés par des personnes qui répertorient et classifient des sites web jugés dignes d'intérêt, et non des robots d'indexation. Les moteurs de recherche ne s'appliquent pas qu'à Internet : certains moteurs sont des logiciels installés sur un ordinateur personnel. Ce sont des moteurs dits « de bureau » qui combinent la recherche parmi les fichiers stockés sur le PC et la recherche parmi les sites Web — on peut citer par exemple Exalead Desktop, Google Desktop et Copernic Desktop Search, Windex Server, etc. On trouve également des métamoteurs, c'est-à-dire des sites web où une même recherche est lancée simultanément sur plusieurs moteurs de recherche, les résultats étant ensuite fusionnés pour être présentés à l'internaute. On peut citer dans cette catégorie Ixquick, Mamma, Kartoo, Framabee ou Lilo.", + "timestamp": "2018-05-30T15:15:00Z", + "title_fr": "Moteur de recherche" }, - "id": "jawiki_doc1", - "index": "/tmp/blast/node0/index", + "id": "frwiki_1", + "index": "/tmp/blast/node1/index", "locations": { - "text_ja": { + "text_fr": { "search": [ { "array_positions": null, - "end": 62, - "pos": 11, - "start": 56 + "end": 1662, + "pos": 253, + "start": 1656 } ] } }, - "score": 0.05875099443799347, + "score": 0.14418354794511895, "sort": [ "_score" ] } ], - "max_score": 0.18706386191354732, + "max_score": 0.633816551223398, "request": { "explain": false, "facets": { @@ -429,8 +636,8 @@ You can see the result in JSON format. The result of the above command is: "successful": 1, "total": 1 }, - "took": 318296, - "total_hits": 3 + "took": 571914, + "total_hits": 12 }, "success": true } @@ -446,7 +653,7 @@ Also you can do above commands via HTTP REST API that listened port 10001 (addre Putting a document via HTTP is as following: ```bash -$ curl -X PUT 'http://localhost:10002/rest/enwiki_doc1?pretty-print=true' -d @./example/enwiki_doc1.json +$ curl -X PUT 'http://localhost:10002/rest/enwiki_1?pretty-print=true' -d @./example/doc_enwiki_1.json ``` You can see the result in JSON format. The result of the above request is: @@ -462,18 +669,18 @@ You can see the result in JSON format. The result of the above request is: Getting a document via HTTP is as following: ```bash -$ curl -X GET 'http://localhost:10002/rest/enwiki_doc1?pretty-print=true' +$ curl -X GET 'http://localhost:10002/rest/enwiki_1?pretty-print=true' ``` You can see the result in JSON format. The result of the above request is: ```json { - "id": "enwiki_doc1", + "id": "enwiki_1", "fields": { "_type": "enwiki", "contributor": "unknown", - "text_en": "A search engine is an information retrieval system designed to help find information stored on a computer system. The search results are usually presented in a list and are commonly called hits. Search engines help to minimize the time required to find information and the amount of information which must be consulted, akin to other techniques for managing information overload.", + "text_en": "A search engine is an information retrieval system designed to help find information stored on a computer system. The search results are usually presented in a list and are commonly called hits. Search engines help to minimize the time required to find information and the amount of information which must be consulted, akin to other techniques for managing information overload. The most public, visible form of a search engine is a Web search engine which searches for information on the World Wide Web.", "timestamp": "2018-07-04T05:41:00Z", "title_en": "Search engine (computing)" }, @@ -486,7 +693,7 @@ You can see the result in JSON format. The result of the above request is: Deleting a document via HTTP is as following: ```bash -$ curl -X DELETE 'http://localhost:10002/rest/enwiki_doc1?pretty-print=true' +$ curl -X DELETE 'http://localhost:10002/rest/enwiki_1?pretty-print=true' ``` You can see the result in JSON format. The result of the above request is: @@ -509,7 +716,7 @@ You can see the result in JSON format. The result of the above command is: ```json { - "put_count": 3, + "put_count": 36, "success": true } ``` @@ -519,7 +726,7 @@ You can see the result in JSON format. The result of the above command is: Searching documents via HTTP is as following: ```bash -$ curl -X POST 'http://localhost:10002/rest/_search?pretty-print=true' -d @./example/wikipedia_search_request.json +$ curl -X POST 'http://localhost:10002/rest/_search?pretty-print=true' -d @./example/search_request.json ``` You can see the result in JSON format. The result of the above request is: @@ -534,16 +741,16 @@ You can see the result in JSON format. The result of the above request is: "other": 0, "terms": [ { - "count": 3, + "count": 12, "term": "unknown" } ], - "total": 3 + "total": 12 }, "Timestamp range": { "date_ranges": [ { - "count": 3, + "count": 12, "end": "2020-12-31T23:59:59Z", "name": "2011 - 2020", "start": "2011-01-01T00:00:00Z" @@ -552,7 +759,7 @@ You can see the result in JSON format. The result of the above request is: "field": "timestamp", "missing": 0, "other": 0, - "total": 3 + "total": 12 } }, "hits": [ @@ -560,12 +767,12 @@ You can see the result in JSON format. The result of the above request is: "fields": { "_type": "enwiki", "contributor": "unknown", - "text_en": "A search engine is an information retrieval system designed to help find information stored on a computer system. The search results are usually presented in a list and are commonly called hits. Search engines help to minimize the time required to find information and the amount of information which must be consulted, akin to other techniques for managing information overload.", + "text_en": "A search engine is an information retrieval system designed to help find information stored on a computer system. The search results are usually presented in a list and are commonly called hits. Search engines help to minimize the time required to find information and the amount of information which must be consulted, akin to other techniques for managing information overload. The most public, visible form of a search engine is a Web search engine which searches for information on the World Wide Web.", "timestamp": "2018-07-04T05:41:00Z", "title_en": "Search engine (computing)" }, - "id": "enwiki_doc1", - "index": "/tmp/blast/node0/index", + "id": "enwiki_1", + "index": "/tmp/blast/node1/index", "locations": { "text_en": { "search": [ @@ -586,6 +793,24 @@ You can see the result in JSON format. The result of the above request is: "end": 201, "pos": 33, "start": 195 + }, + { + "array_positions": null, + "end": 421, + "pos": 68, + "start": 415 + }, + { + "array_positions": null, + "end": 444, + "pos": 73, + "start": 438 + }, + { + "array_positions": null, + "end": 466, + "pos": 76, + "start": 458 } ] }, @@ -600,7 +825,196 @@ You can see the result in JSON format. The result of the above request is: ] } }, - "score": 0.18706386191354732, + "score": 0.633816551223398, + "sort": [ + "_score" + ] + }, + { + "fields": { + "_type": "arwiki", + "contributor": "unknown", + "text_ar": "محرك البحث (بالإنجليزية: Search engine) هو نظام لإسترجاع المعلومات صمم للمساعدة على البحث عن المعلومات المخزنة على أي نظام حاسوبي. تعرض نتائج البحث عادة على شكل قائمة لأماكن تواجد المعلومات ومرتبة وفق معايير معينة. تسمح محركات البحث باختصار مدة البحث والتغلب على مشكلة أحجام البيانات المتصاعدة (إغراق معلوماتي).", + "timestamp": "2018-03-25T18:04:00Z", + "title_ar": "محرك بحث" + }, + "id": "arwiki_1", + "index": "/tmp/blast/node1/index", + "locations": { + "text_ar": { + "search": [ + { + "array_positions": null, + "end": 51, + "pos": 4, + "start": 45 + } + ] + } + }, + "score": 0.2584513642405507, + "sort": [ + "_score" + ] + }, + { + "fields": { + "_type": "hiwiki", + "contributor": "unknown", + "text_hi": "ऐसे कम्प्यूटर प्रोग्राम खोजी इंजन (search engine) कहलाते हैं जो किसी कम्प्यूटर सिस्टम पर भण्डारित सूचना में से वांछित सूचना को ढूढ निकालते हैं। ये इंजन प्राप्त परिणामों को प्रायः एक सूची के रूप में प्रस्तुत करते हैं जिससे वांछित सूचना की प्रकृति और उसकी स्थिति का पता चलता है। खोजी इंजन किसी सूचना तक अपेक्षाकृत बहुत कम समय में पहुँचने में हमारी सहायता करते हैं। वे 'सूचना ओवरलोड' से भी हमे बचाते हैं। खोजी इंजन का सबसे प्रचलित रूप 'वेब खोजी इंजन' है जो वर्ल्ड वाइड वेब पर सूचना खोजने के लिये प्रयुक्त होता है।", + "timestamp": "2017-10-19T20:09:00Z", + "title_hi": "खोज इंजन" + }, + "id": "hiwiki_1", + "index": "/tmp/blast/node1/index", + "locations": { + "text_hi": { + "search": [ + { + "array_positions": null, + "end": 99, + "pos": 6, + "start": 93 + } + ] + } + }, + "score": 0.2315458066284217, + "sort": [ + "_score" + ] + }, + { + "fields": { + "_type": "itwiki", + "contributor": "unknown", + "text_it": "Nell'ambito delle tecnologie di Internet, un motore di ricerca (in inglese search engine) è un sistema automatico che, su richiesta, analizza un insieme di dati (spesso da esso stesso raccolti) e restituisce un indice dei contenuti disponibili[1] classificandoli in modo automatico in base a formule statistico-matematiche che ne indichino il grado di rilevanza data una determinata chiave di ricerca. Uno dei campi in cui i motori di ricerca trovano maggiore utilizzo è quello dell'information retrieval e nel web. I motori di ricerca più utilizzati nel 2017 sono stati: Google, Bing, Baidu, Qwant, Yandex, Ecosia, DuckDuckGo.", + "timestamp": "2018-07-16T12:20:00Z", + "title_it": "Motore di ricerca" + }, + "id": "itwiki_1", + "index": "/tmp/blast/node1/index", + "locations": { + "text_it": { + "search": [ + { + "array_positions": null, + "end": 81, + "pos": 12, + "start": 75 + } + ] + } + }, + "score": 0.22855799635019447, + "sort": [ + "_score" + ] + }, + { + "fields": { + "_type": "thwiki", + "contributor": "unknown", + "text_th": "เสิร์ชเอนจิน (search engine) หรือ โปรแกรมค้นหา คือ โปรแกรมที่ช่วยในการสืบค้นหาข้อมูล โดยเฉพาะข้อมูลบนอินเทอร์เน็ต โดยครอบคลุมทั้งข้อความ รูปภาพ ภาพเคลื่อนไหว เพลง ซอฟต์แวร์ แผนที่ ข้อมูลบุคคล กลุ่มข่าว และอื่น ๆ ซึ่งแตกต่างกันไปแล้วแต่โปรแกรมหรือผู้ให้บริการแต่ละราย. เสิร์ชเอนจินส่วนใหญ่จะค้นหาข้อมูลจากคำสำคัญ (คีย์เวิร์ด) ที่ผู้ใช้ป้อนเข้าไป จากนั้นก็จะแสดงรายการผลลัพธ์ที่มันคิดว่าผู้ใช้น่าจะต้องการขึ้นมา ในปัจจุบัน เสิร์ชเอนจินบางตัว เช่น กูเกิล จะบันทึกประวัติการค้นหาและการเลือกผลลัพธ์ของผู้ใช้ไว้ด้วย และจะนำประวัติที่บันทึกไว้นั้น มาช่วยกรองผลลัพธ์ในการค้นหาครั้งต่อ ๆ ไป", + "timestamp": "2016-06-18T11:06:00Z", + "title_th": "เสิร์ชเอนจิน" + }, + "id": "thwiki_1", + "index": "/tmp/blast/node1/index", + "locations": { + "text_th": { + "search": [ + { + "array_positions": null, + "end": 44, + "pos": 4, + "start": 38 + } + ] + } + }, + "score": 0.2018569611073604, + "sort": [ + "_score" + ] + }, + { + "fields": { + "_type": "zhwiki", + "contributor": "unknown", + "text_zh": "搜索引擎(英语:search engine)是一种信息检索系统,旨在协助搜索存储在计算机系统中的信息。搜索结果一般被称为“hits”,通常会以表单的形式列出。网络搜索引擎是最常见、公开的一种搜索引擎,其功能为搜索万维网上储存的信息.", + "timestamp": "2018-08-27T05:47:00Z", + "title_zh": "搜索引擎" + }, + "id": "zhwiki_1", + "index": "/tmp/blast/node1/index", + "locations": { + "text_zh": { + "search": [ + { + "array_positions": null, + "end": 30, + "pos": 5, + "start": 24 + } + ] + } + }, + "score": 0.19986816567601795, + "sort": [ + "_score" + ] + }, + { + "fields": { + "_type": "bgwiki", + "contributor": "unknown", + "text_bg": "Търсачка или търсеща машина (на английски: Web search engine) е специализиран софтуер за извличане на информация, съхранена в компютърна система или мрежа. Това може да е персонален компютър, Интернет, корпоративна мрежа и т.н. Без допълнителни уточнения, най-често под търсачка се разбира уеб(-)търсачка, която търси в Интернет. Други видове търсачки са корпоративните търсачки, които търсят в интранет мрежите, личните търсачки – за индивидуалните компютри и мобилните търсачки. В търсачката потребителят (търсещият) прави запитване за съдържание, отговарящо на определен критерий (обикновено такъв, който съдържа определени думи и фрази). В резултат се получават списък от точки, които отговарят, пълно или частично, на този критерий. Търсачките обикновено използват редовно подновявани индекси, за да оперират бързо и ефикасно. Някои търсачки също търсят в информацията, която е на разположение в нюзгрупите и други големи бази данни. За разлика от Уеб директориите, които се поддържат от хора редактори, търсачките оперират алгоритмично. Повечето Интернет търсачки са притежавани от различни корпорации.", + "timestamp": "2018-07-11T11:03:00Z", + "title_bg": "Търсачка" + }, + "id": "bgwiki_1", + "index": "/tmp/blast/node1/index", + "locations": { + "text_bg": { + "search": [ + { + "array_positions": null, + "end": 88, + "pos": 8, + "start": 82 + } + ] + } + }, + "score": 0.18275270089950202, + "sort": [ + "_score" + ] + }, + { + "fields": { + "_type": "idwiki", + "contributor": "unknown", + "text_id": "Mesin pencari web atau mesin telusur web (bahasa Inggris: web search engine) adalah program komputer yang dirancang untuk melakukan pencarian atas berkas-berkas yang tersimpan dalam layanan www, ftp, publikasi milis, ataupun news group dalam sebuah ataupun sejumlah komputer peladen dalam suatu jaringan. Mesin pencari merupakan perangkat penelusur informasi dari dokumen-dokumen yang tersedia. Hasil pencarian umumnya ditampilkan dalam bentuk daftar yang seringkali diurutkan menurut tingkat akurasi ataupun rasio pengunjung atas suatu berkas yang disebut sebagai hits. Informasi yang menjadi target pencarian bisa terdapat dalam berbagai macam jenis berkas seperti halaman situs web, gambar, ataupun jenis-jenis berkas lainnya. Beberapa mesin pencari juga diketahui melakukan pengumpulan informasi atas data yang tersimpan dalam suatu basis data ataupun direktori web. Sebagian besar mesin pencari dijalankan oleh perusahaan swasta yang menggunakan algoritme kepemilikan dan basis data tertutup, di antaranya yang paling populer adalah safari Google (MSN Search dan Yahoo!). Telah ada beberapa upaya menciptakan mesin pencari dengan sumber terbuka (open source), contohnya adalah Htdig, Nutch, Egothor dan OpenFTS.", + "timestamp": "2017-11-20T17:47:00Z", + "title_id": "Mesin pencari web" + }, + "id": "idwiki_1", + "index": "/tmp/blast/node1/index", + "locations": { + "text_id": { + "search": [ + { + "array_positions": null, + "end": 68, + "pos": 11, + "start": 62 + } + ] + } + }, + "score": 0.17060026115019133, "sort": [ "_score" ] @@ -609,12 +1023,12 @@ You can see the result in JSON format. The result of the above request is: "fields": { "_type": "ptwiki", "contributor": "unknown", - "text_pt": "Motor de pesquisa (português europeu) ou ferramenta de busca (português brasileiro) ou buscador (em inglês: search engine) é um programa desenhado para procurar palavras-chave fornecidas pelo utilizador em documentos e bases de dados. No contexto da internet, um motor de pesquisa permite procurar palavras-chave em documentos alojados na world wide web, como aqueles que se encontram armazenados em websites.", - "timestamp": "2018-07-04T05:41:00Z", + "text_pt": "Motor de pesquisa (português europeu) ou ferramenta de busca (português brasileiro) ou buscador (em inglês: search engine) é um programa desenhado para procurar palavras-chave fornecidas pelo utilizador em documentos e bases de dados. No contexto da internet, um motor de pesquisa permite procurar palavras-chave em documentos alojados na world wide web, como aqueles que se encontram armazenados em websites. Os motores de busca surgiram logo após o aparecimento da Internet, com a intenção de prestar um serviço extremamente importante: a busca de qualquer informação na rede, apresentando os resultados de uma forma organizada, e também com a proposta de fazer isto de uma maneira rápida e eficiente. A partir deste preceito básico, diversas empresas se desenvolveram, chegando algumas a valer milhões de dólares. Entre as maiores empresas encontram-se o Google, o Yahoo, o Bing, o Lycos, o Cadê e, mais recentemente, a Amazon.com com o seu mecanismo de busca A9 porém inativo. Os buscadores se mostraram imprescindíveis para o fluxo de acesso e a conquista novos visitantes. Antes do advento da Web, havia sistemas para outros protocolos ou usos, como o Archie para sites FTP anônimos e o Veronica para o Gopher (protocolo de redes de computadores que foi desenhado para indexar repositórios de documentos na Internet, baseado-se em menus).", + "timestamp": "2017-11-09T14:38:00Z", "title_pt": "Motor de busca" }, - "id": "ptwiki_doc1", - "index": "/tmp/blast/node0/index", + "id": "ptwiki_1", + "index": "/tmp/blast/node1/index", "locations": { "text_pt": { "search": [ @@ -627,40 +1041,40 @@ You can see the result in JSON format. The result of the above request is: ] } }, - "score": 0.09273589609475594, + "score": 0.1688012644026126, "sort": [ "_score" ] }, { "fields": { - "_type": "jawiki", + "_type": "frwiki", "contributor": "unknown", - "text_ja": "検索エンジン(けんさくエンジン、英語: search engine)は、狭義にはインターネットに存在する情報(ウェブページ、ウェブサイト、画像フム。インターネットの普及初期には、検索としての機能のみを提供していたウェブサイトそのものを検索エンジンと呼んだが、現在では様々なサービスが加わったポー情報を検索するシステム全般を含む。", - "timestamp": "2018-05-30T00:52:00Z", - "title_ja": "検索エンジン" + "text_fr": "Un moteur de recherche est une application web permettant de trouver des ressources à partir d'une requête sous forme de mots. Les ressources peuvent être des pages web, des articles de forums Usenet, des images, des vidéos, des fichiers, etc. Certains sites web offrent un moteur de recherche comme principale fonctionnalité ; on appelle alors « moteur de recherche » le site lui-même. Ce sont des instruments de recherche sur le web sans intervention humaine, ce qui les distingue des annuaires. Ils sont basés sur des « robots », encore appelés « bots », « spiders «, « crawlers » ou « agents », qui parcourent les sites à intervalles réguliers et de façon automatique pour découvrir de nouvelles adresses (URL). Ils suivent les liens hypertextes qui relient les pages les unes aux autres, les uns après les autres. Chaque page identifiée est alors indexée dans une base de données, accessible ensuite par les internautes à partir de mots-clés. C'est par abus de langage qu'on appelle également « moteurs de recherche » des sites web proposant des annuaires de sites web : dans ce cas, ce sont des instruments de recherche élaborés par des personnes qui répertorient et classifient des sites web jugés dignes d'intérêt, et non des robots d'indexation. Les moteurs de recherche ne s'appliquent pas qu'à Internet : certains moteurs sont des logiciels installés sur un ordinateur personnel. Ce sont des moteurs dits « de bureau » qui combinent la recherche parmi les fichiers stockés sur le PC et la recherche parmi les sites Web — on peut citer par exemple Exalead Desktop, Google Desktop et Copernic Desktop Search, Windex Server, etc. On trouve également des métamoteurs, c'est-à-dire des sites web où une même recherche est lancée simultanément sur plusieurs moteurs de recherche, les résultats étant ensuite fusionnés pour être présentés à l'internaute. On peut citer dans cette catégorie Ixquick, Mamma, Kartoo, Framabee ou Lilo.", + "timestamp": "2018-05-30T15:15:00Z", + "title_fr": "Moteur de recherche" }, - "id": "jawiki_doc1", - "index": "/tmp/blast/node0/index", + "id": "frwiki_1", + "index": "/tmp/blast/node1/index", "locations": { - "text_ja": { + "text_fr": { "search": [ { "array_positions": null, - "end": 62, - "pos": 11, - "start": 56 + "end": 1662, + "pos": 253, + "start": 1656 } ] } }, - "score": 0.05875099443799347, + "score": 0.14418354794511895, "sort": [ "_score" ] } ], - "max_score": 0.18706386191354732, + "max_score": 0.633816551223398, "request": { "explain": false, "facets": { @@ -710,8 +1124,8 @@ You can see the result in JSON format. The result of the above request is: "successful": 1, "total": 1 }, - "took": 332682, - "total_hits": 3 + "took": 1754440, + "total_hits": 12 }, "success": true } @@ -725,21 +1139,21 @@ Blast is easy to bring up the cluster. Blast node is already running, but that i $ ./bin/blast start --bind-addr=localhost:11000 \ --grpc-addr=localhost:11001 \ --http-addr=localhost:11002 \ - --node-id=node2 \ + --raft-node-id=node2 \ --raft-dir=/tmp/blast/node2/raft \ --store-dir=/tmp/blast/node2/store \ --index-dir=/tmp/blast/node2/index \ --peer-grpc-addr=localhost:10001 \ - --index-mapping=./example/wikipedia_index_mapping.json + --index-mapping-file=./etc/index_mapping.json $ ./bin/blast start --bind-addr=localhost:12000 \ --grpc-addr=localhost:12001 \ --http-addr=localhost:12002 \ - --node-id=node3 \ + --raft-node-id=node3 \ --raft-dir=/tmp/blast/node3/raft \ --store-dir=/tmp/blast/node3/store \ --index-dir=/tmp/blast/node3/index \ --peer-grpc-addr=localhost:10001 \ - --index-mapping=./example/wikipedia_index_mapping.json + --index-mapping-file=./etc/index_mapping.json ``` _Above example shows each Blast node running on the same host, so each node must listen on different ports. This would not be necessary if each node ran on a different host._ @@ -792,7 +1206,7 @@ This tells each new node to join the existing node. Once joined, each node now k Following command puts a document to node0: ```bash -$ cat ./example/enwiki_doc1.json | xargs -0 ./bin/blast put --grpc-addr=localhost:10001 --pretty-print enwiki_doc1 +$ cat ./example/doc_enwiki_1.json | xargs -0 ./bin/blast put --grpc-addr=localhost:10001 --pretty-print enwiki_1 ``` You can see the result in JSON format. The result of the above command is: @@ -806,18 +1220,18 @@ You can see the result in JSON format. The result of the above command is: So, you can get a document from node1 like following: ```bash -$ ./bin/blast get --grpc-addr=localhost:10001 --pretty-print enwiki_doc1 +$ ./bin/blast get --grpc-addr=localhost:10001 --pretty-print enwiki_1 ``` You can see the result in JSON format. The result of the above command is: ```json { - "id": "enwiki_doc1", + "id": "enwiki_1", "fields": { "_type": "enwiki", "contributor": "unknown", - "text_en": "A search engine is an information retrieval system designed to help find information stored on a computer system. The search results are usually presented in a list and are commonly called hits. Search engines help to minimize the time required to find information and the amount of information which must be consulted, akin to other techniques for managing information overload.", + "text_en": "A search engine is an information retrieval system designed to help find information stored on a computer system. The search results are usually presented in a list and are commonly called hits. Search engines help to minimize the time required to find information and the amount of information which must be consulted, akin to other techniques for managing information overload. The most public, visible form of a search engine is a Web search engine which searches for information on the World Wide Web.", "timestamp": "2018-07-04T05:41:00Z", "title_en": "Search engine (computing)" }, @@ -828,18 +1242,18 @@ You can see the result in JSON format. The result of the above command is: Also, you can get same document from node2 (localhost:11001) like following: ```bash -$ ./bin/blast get --grpc-addr=localhost:11001 --pretty-print enwiki_doc1 +$ ./bin/blast get --grpc-addr=localhost:11001 --pretty-print enwiki_1 ``` You can see the result in JSON format. The result of the above command is: ```json { - "id": "enwiki_doc1", + "id": "enwiki_1", "fields": { "_type": "enwiki", "contributor": "unknown", - "text_en": "A search engine is an information retrieval system designed to help find information stored on a computer system. The search results are usually presented in a list and are commonly called hits. Search engines help to minimize the time required to find information and the amount of information which must be consulted, akin to other techniques for managing information overload.", + "text_en": "A search engine is an information retrieval system designed to help find information stored on a computer system. The search results are usually presented in a list and are commonly called hits. Search engines help to minimize the time required to find information and the amount of information which must be consulted, akin to other techniques for managing information overload. The most public, visible form of a search engine is a Web search engine which searches for information on the World Wide Web.", "timestamp": "2018-07-04T05:41:00Z", "title_en": "Search engine (computing)" }, @@ -850,18 +1264,18 @@ You can see the result in JSON format. The result of the above command is: Lastly, you can get same document from node3 (localhost:12001) like following: ```bash -$ ./bin/blast get --grpc-addr=localhost:12001 --pretty-print enwiki_doc1 +$ ./bin/blast get --grpc-addr=localhost:12001 --pretty-print enwiki_1 ``` You can see the result in JSON format. The result of the above command is: ```json { - "id": "enwiki_doc1", + "id": "enwiki_1", "fields": { "_type": "enwiki", "contributor": "unknown", - "text_en": "A search engine is an information retrieval system designed to help find information stored on a computer system. The search results are usually presented in a list and are commonly called hits. Search engines help to minimize the time required to find information and the amount of information which must be consulted, akin to other techniques for managing information overload.", + "text_en": "A search engine is an information retrieval system designed to help find information stored on a computer system. The search results are usually presented in a list and are commonly called hits. Search engines help to minimize the time required to find information and the amount of information which must be consulted, akin to other techniques for managing information overload. The most public, visible form of a search engine is a Web search engine which searches for information on the World Wide Web.", "timestamp": "2018-07-04T05:41:00Z", "title_en": "Search engine (computing)" }, @@ -884,7 +1298,7 @@ $ make docker You can also use the Docker container image already registered in docker.io like so: ```bash -$ docker pull mosuka/blast:v0.2.0 +$ docker pull mosuka/blast:latest ``` See https://hub.docker.com/r/mosuka/blast/tags/ @@ -898,11 +1312,11 @@ $ docker run --rm --name blast1 \ -p 10000:10000 \ -p 10001:10001 \ -p 10002:10002 \ - mosuka/blast:v0.3.0 start \ + mosuka/blast:latest start \ --bind-addr=:10000 \ --grpc-addr=:10001 \ --http-addr=:10002 \ - --node-id=node1 + --raft-node-id=node1 ``` ### Running Blast cluster on Docker Compose @@ -924,4 +1338,5 @@ All nodes are stopped as follows: ```bash $ docker-compose stop +$ docker-compose rm ``` diff --git a/Versionfile b/Versionfile index 0d91a54..9e11b32 100644 --- a/Versionfile +++ b/Versionfile @@ -1 +1 @@ -0.3.0 +0.3.1 diff --git a/cmd/blast/bulk.go b/cmd/blast/bulk.go index 7b37c67..d0cdae0 100644 --- a/cmd/blast/bulk.go +++ b/cmd/blast/bulk.go @@ -27,11 +27,7 @@ import ( func bulk(c *cli.Context) { grpcAddr := c.String("grpc-addr") - maxSendMsgSize := c.Int("max-send-msg-size") - maxRecvMsgSize := c.Int("max-recv-msg-size") - batchSize := c.Int("batch-size") - prettyPrint := c.Bool("pretty-print") updateRequestsBytes := []byte(c.Args().Get(0)) @@ -45,7 +41,7 @@ func bulk(c *cli.Context) { } var grpcClient *client.GRPCClient - if grpcClient, err = client.NewGRPCClient(grpcAddr, maxSendMsgSize, maxRecvMsgSize); err != nil { + if grpcClient, err = client.NewGRPCClient(grpcAddr); err != nil { fmt.Fprintln(os.Stderr, err) return } diff --git a/cmd/blast/commands.go b/cmd/blast/commands.go index fffece3..7f7309c 100644 --- a/cmd/blast/commands.go +++ b/cmd/blast/commands.go @@ -22,22 +22,19 @@ var ( Name: "start", Usage: "Start a node", Flags: []cli.Flag{ - flNodeID, flBindAddr, flGRPCAddr, flHTTPAddr, + flRaftNodeID, flRaftDir, flRaftSnapshotCount, flRaftTimeout, flStoreDir, flIndexDir, - flIndexMapping, + flIndexMappingFile, flIndexType, flIndexKvstore, - flIndexKvconfig, flPeerGRPCAddr, - flMaxSendMsgSize, - flMaxRecvMsgSize, flLogLevel, flLogFile, flLogMaxSize, @@ -57,8 +54,6 @@ var ( Usage: "Get a document", Flags: []cli.Flag{ flGRPCAddr, - flMaxSendMsgSize, - flMaxRecvMsgSize, flPrettyPrint, }, ArgsUsage: "[id]", @@ -69,8 +64,6 @@ var ( Usage: "Put a document", Flags: []cli.Flag{ flGRPCAddr, - flMaxSendMsgSize, - flMaxRecvMsgSize, flPrettyPrint, }, ArgsUsage: "[id] [fields]", @@ -81,8 +74,6 @@ var ( Usage: "Delete a document", Flags: []cli.Flag{ flGRPCAddr, - flMaxSendMsgSize, - flMaxRecvMsgSize, flPrettyPrint, }, ArgsUsage: "[id]", @@ -93,8 +84,6 @@ var ( Usage: "Update documents in bulk", Flags: []cli.Flag{ flGRPCAddr, - flMaxSendMsgSize, - flMaxRecvMsgSize, flPrettyPrint, flBatchSize, }, @@ -106,8 +95,6 @@ var ( Usage: "Search documents", Flags: []cli.Flag{ flGRPCAddr, - flMaxSendMsgSize, - flMaxRecvMsgSize, flPrettyPrint, }, ArgsUsage: "[search request]", @@ -117,13 +104,11 @@ var ( Name: "join", Usage: "Join a node to the cluster", Flags: []cli.Flag{ - flNodeID, + flRaftNodeID, flBindAddr, flGRPCAddr, flHTTPAddr, flPeerGRPCAddr, - flMaxSendMsgSize, - flMaxRecvMsgSize, flPrettyPrint, }, Action: join, @@ -132,11 +117,9 @@ var ( Name: "leave", Usage: "Leave a node from the cluster", Flags: []cli.Flag{ - flNodeID, + flRaftNodeID, flBindAddr, flPeerGRPCAddr, - flMaxSendMsgSize, - flMaxRecvMsgSize, flPrettyPrint, }, Action: leave, @@ -146,8 +129,6 @@ var ( Usage: "Shows a list of peers in a cluster", Flags: []cli.Flag{ flGRPCAddr, - flMaxSendMsgSize, - flMaxRecvMsgSize, flPrettyPrint, }, Action: peers, @@ -157,8 +138,6 @@ var ( Usage: "Create snapshot", Flags: []cli.Flag{ flBindAddr, - flMaxSendMsgSize, - flMaxRecvMsgSize, flPrettyPrint, }, Action: snapshot, diff --git a/cmd/blast/delete.go b/cmd/blast/delete.go index 22c468a..b93847b 100644 --- a/cmd/blast/delete.go +++ b/cmd/blast/delete.go @@ -27,9 +27,6 @@ import ( func delete(c *cli.Context) { grpcAddr := c.String("grpc-addr") - maxSendMsgSize := c.Int("max-send-msg-size") - maxRecvMsgSize := c.Int("max-recv-msg-size") - prettyPrint := c.Bool("pretty-print") id := c.Args().Get(0) @@ -42,7 +39,7 @@ func delete(c *cli.Context) { } var grpcClient *client.GRPCClient - if grpcClient, err = client.NewGRPCClient(grpcAddr, maxSendMsgSize, maxRecvMsgSize); err != nil { + if grpcClient, err = client.NewGRPCClient(grpcAddr); err != nil { fmt.Fprintln(os.Stderr, err) return } diff --git a/cmd/blast/flags.go b/cmd/blast/flags.go index 8a22134..bdec212 100644 --- a/cmd/blast/flags.go +++ b/cmd/blast/flags.go @@ -15,11 +15,11 @@ package main import ( - "math" "os" - "github.com/blevesearch/bleve/index/store/boltdb" - "github.com/blevesearch/bleve/index/upsidedown" + "github.com/mosuka/blast/index/bleve" + "github.com/mosuka/blast/raft" + "github.com/mosuka/blast/store/boltdb" "github.com/urfave/cli" ) @@ -43,66 +43,62 @@ var ( EnvVar: "BLAST_HTTP_ADDR", } - flNodeID = cli.StringFlag{ - Name: "node-id", - Value: "node1", + flRaftNodeID = cli.StringFlag{ + Name: "raft-node-id", + Value: raft.DefaultNodeID, Usage: "Node ID", EnvVar: "BLAST_NODE_ID", } flRaftDir = cli.StringFlag{ Name: "raft-dir", - Value: "./data/raft", + Value: raft.DefaultDir, Usage: "Raft data directory", EnvVar: "BLAST_RAFT_DIR", } flRaftSnapshotCount = cli.IntFlag{ Name: "raft-snapshot-count", - Value: 2, + Value: raft.DefaultSnapshotCount, Usage: "Raft snapshot count", EnvVar: "BLAST_RAFT_SNAPSHOT_COUNT", } flRaftTimeout = cli.StringFlag{ Name: "raft-timeout", - Value: "10s", + Value: raft.DefaultTimeout, Usage: "Raft timeout", EnvVar: "BLAST_RAFT_TIMEOUT", } flStoreDir = cli.StringFlag{ Name: "store-dir", - Value: "./data/store", + Value: boltdb.DefaultDir, Usage: "Store data directory", EnvVar: "BLAST_STORE_DIR", } flIndexDir = cli.StringFlag{ Name: "index-dir", - Value: "./data/index", + Value: bleve.DefaultDir, Usage: "Index data directory", EnvVar: "BLAST_INDEX_DIR", } - flIndexMapping = cli.StringFlag{ - Name: "index-mapping", - Usage: "Index mapping path", - EnvVar: "BLAST_INDEX_MAPPING", + flIndexMappingFile = cli.StringFlag{ + Name: "index-mapping-file", + Value: bleve.DefaultIndexMappingFile, + Usage: "Index mapping file", + EnvVar: "BLAST_INDEX_MAPPING_FILE", } flIndexType = cli.StringFlag{ Name: "index-type", - Value: upsidedown.Name, + Value: bleve.DefaultIndexType, Usage: "Index type", EnvVar: "BLAST_INDEX_TYPE", } flIndexKvstore = cli.StringFlag{ Name: "index-kvstore", - Value: boltdb.Name, + Value: bleve.DefaultKvstore, Usage: "Index Key-Value store", EnvVar: "BLAST_INDEX_KVSTORE", } - flIndexKvconfig = cli.StringFlag{ - Name: "index-kvconfig", - Usage: "Index Key-Value config", - EnvVar: "BLAST_INDEX_KVCONFIG", - } flPeerGRPCAddr = cli.StringFlag{ Name: "peer-grpc-addr", @@ -110,17 +106,6 @@ var ( EnvVar: "BLAST_PEER_GRPC_ADDR", } - flMaxSendMsgSize = cli.IntFlag{ - Name: "max-send-msg-size", - Value: math.MaxInt32, - Usage: "Max size of send message via gRPC", - } - flMaxRecvMsgSize = cli.IntFlag{ - Name: "max-recv-msg-size", - Value: math.MaxInt32, - Usage: "Max size of receive message via gRPC", - } - flLogLevel = cli.StringFlag{ Name: "log-level", Value: "INFO", diff --git a/cmd/blast/get.go b/cmd/blast/get.go index 70dd333..627dcdf 100644 --- a/cmd/blast/get.go +++ b/cmd/blast/get.go @@ -27,9 +27,6 @@ import ( func get(c *cli.Context) { grpcAddr := c.String("grpc-addr") - maxSendMsgSize := c.Int("max-send-msg-size") - maxRecvMsgSize := c.Int("max-recv-msg-size") - prettyPrint := c.Bool("pretty-print") id := c.Args().Get(0) @@ -42,7 +39,7 @@ func get(c *cli.Context) { } var grpcClient *client.GRPCClient - if grpcClient, err = client.NewGRPCClient(grpcAddr, maxSendMsgSize, maxRecvMsgSize); err != nil { + if grpcClient, err = client.NewGRPCClient(grpcAddr); err != nil { fmt.Fprintln(os.Stderr, err) return } diff --git a/cmd/blast/join.go b/cmd/blast/join.go index 18907ec..471d7e9 100644 --- a/cmd/blast/join.go +++ b/cmd/blast/join.go @@ -30,17 +30,13 @@ func join(c *cli.Context) { bindAddr := c.String("bind-addr") grpcAddr := c.String("grpc-addr") httpAddr := c.String("http-addr") - peerGRPCAddr := c.String("peer-grpc-addr") - maxSendMsgSize := c.Int("max-send-msg-size") - maxRecvMsgSize := c.Int("max-recv-msg-size") - prettyPrint := c.Bool("pretty-print") var err error var grpcClient *client.GRPCClient - if grpcClient, err = client.NewGRPCClient(peerGRPCAddr, maxSendMsgSize, maxRecvMsgSize); err != nil { + if grpcClient, err = client.NewGRPCClient(peerGRPCAddr); err != nil { fmt.Fprintln(os.Stderr, err) return } diff --git a/cmd/blast/leave.go b/cmd/blast/leave.go index c11aac5..55720bb 100644 --- a/cmd/blast/leave.go +++ b/cmd/blast/leave.go @@ -28,17 +28,13 @@ import ( func leave(c *cli.Context) { nodeID := c.String("node-id") bindAddr := c.String("bind-addr") - peerGRPCAddr := c.String("peer-grpc-addr") - maxSendMsgSize := c.Int("max-send-msg-size") - maxRecvMsgSize := c.Int("max-recv-msg-size") - prettyPrint := c.Bool("pretty-print") var err error var grpcClient *client.GRPCClient - if grpcClient, err = client.NewGRPCClient(peerGRPCAddr, maxSendMsgSize, maxRecvMsgSize); err != nil { + if grpcClient, err = client.NewGRPCClient(peerGRPCAddr); err != nil { fmt.Fprintln(os.Stderr, err) return } diff --git a/cmd/blast/peers.go b/cmd/blast/peers.go index 49ba40a..f66e558 100644 --- a/cmd/blast/peers.go +++ b/cmd/blast/peers.go @@ -27,15 +27,12 @@ import ( func peers(c *cli.Context) { grpcAddr := c.String("grpc-addr") - maxSendMsgSize := c.Int("max-send-msg-size") - maxRecvMsgSize := c.Int("max-recv-msg-size") - prettyPrint := c.Bool("pretty-print") var err error var grpcClient *client.GRPCClient - if grpcClient, err = client.NewGRPCClient(grpcAddr, maxSendMsgSize, maxRecvMsgSize); err != nil { + if grpcClient, err = client.NewGRPCClient(grpcAddr); err != nil { fmt.Fprintln(os.Stderr, err) return } diff --git a/cmd/blast/put.go b/cmd/blast/put.go index 7a2371c..f0b5817 100644 --- a/cmd/blast/put.go +++ b/cmd/blast/put.go @@ -28,9 +28,6 @@ import ( func put(c *cli.Context) { grpcAddr := c.String("grpc-addr") - maxSendMsgSize := c.Int("max-send-msg-size") - maxRecvMsgSize := c.Int("max-recv-msg-size") - prettyPrint := c.Bool("pretty-print") id := c.Args().Get(0) @@ -51,7 +48,7 @@ func put(c *cli.Context) { } var grpcClient *client.GRPCClient - if grpcClient, err = client.NewGRPCClient(grpcAddr, maxSendMsgSize, maxRecvMsgSize); err != nil { + if grpcClient, err = client.NewGRPCClient(grpcAddr); err != nil { fmt.Fprintln(os.Stderr, err) return } diff --git a/cmd/blast/search.go b/cmd/blast/search.go index 6f40677..86796fe 100644 --- a/cmd/blast/search.go +++ b/cmd/blast/search.go @@ -27,9 +27,6 @@ import ( func search(c *cli.Context) { grpcAddr := c.String("grpc-addr") - maxSendMsgSize := c.Int("max-send-msg-size") - maxRecvMsgSize := c.Int("max-recv-msg-size") - prettyPrint := c.Bool("pretty-print") searchRequest := []byte(c.Args().Get(0)) @@ -42,7 +39,7 @@ func search(c *cli.Context) { } var grpcClient *client.GRPCClient - if grpcClient, err = client.NewGRPCClient(grpcAddr, maxSendMsgSize, maxRecvMsgSize); err != nil { + if grpcClient, err = client.NewGRPCClient(grpcAddr); err != nil { fmt.Fprintln(os.Stderr, err) return } diff --git a/cmd/blast/snapshot.go b/cmd/blast/snapshot.go index a1893ac..ddd25c4 100644 --- a/cmd/blast/snapshot.go +++ b/cmd/blast/snapshot.go @@ -27,15 +27,12 @@ import ( func snapshot(c *cli.Context) { grpcAddr := c.String("grpc-addr") - maxSendMsgSize := c.Int("max-send-msg-size") - maxRecvMsgSize := c.Int("max-recv-msg-size") - prettyPrint := c.Bool("pretty-print") var err error var grpcClient *client.GRPCClient - if grpcClient, err = client.NewGRPCClient(grpcAddr, maxSendMsgSize, maxRecvMsgSize); err != nil { + if grpcClient, err = client.NewGRPCClient(grpcAddr); err != nil { fmt.Fprintln(os.Stderr, err) return } diff --git a/cmd/blast/start.go b/cmd/blast/start.go index 773b501..c8e88f3 100644 --- a/cmd/blast/start.go +++ b/cmd/blast/start.go @@ -15,16 +15,13 @@ package main import ( - "encoding/json" "fmt" - "io/ioutil" "log" "os" "os/signal" "syscall" "time" - "github.com/blevesearch/bleve/mapping" "github.com/hashicorp/raft" "github.com/mosuka/blast/grpc/client" grpcserver "github.com/mosuka/blast/grpc/server" @@ -41,11 +38,11 @@ import ( ) var logo = ` - ____ _ _ - | __ )| | __ _ ___| |_ - | _ \| |/ _' / __| __| The lightweight distributed - | |_) | | (_| \__ \ |_ indexing and search server. - |____/|_|\__,_|___/\__| version ` + version.Version + ` + ____ __ __ + / __ ) / /____ _ _____ / /_ + / __ \ / // __ '// ___// __/ The lightweight distributed + / /_/ // // /_/ /(__ )/ /_ indexing and search server. +/_.___//_/ \__,_//____/ \__/ version ` + version.Version + ` ` func start(c *cli.Context) { @@ -56,30 +53,28 @@ func start(c *cli.Context) { grpcAddr := c.String("grpc-addr") httpAddr := c.String("http-addr") - nodeID := c.String("node-id") + nodeID := c.String("raft-node-id") raftDir := c.String("raft-dir") - retainSnapshotCount := c.Int("retain-snapshot-count") + snapshotCount := c.Int("raft-snapshot-count") raftTimeout := c.String("raft-timeout") storeDir := c.String("store-dir") indexDir := c.String("index-dir") - indexMapping := c.String("index-mapping") + indexMappingFile := c.String("index-mapping-file") indexType := c.String("index-type") indexKvstore := c.String("index-kvstore") peerGRPCAddr := c.String("peer-grpc-addr") - maxSendMsgSize := c.Int("max-send-msg-size") - maxRecvMsgSize := c.Int("max-recv-msg-size") logLevel := c.String("log-level") - logFilename := c.String("log-filename") + logFilename := c.String("log-file") logMaxSize := c.Int("log-max-size") logMaxBackups := c.Int("log-max-backups") logMaxAge := c.Int("log-max-age") logCompress := c.Bool("log-compress") - httpAccessLogFilename := c.String("http-access-log-filename") + httpAccessLogFilename := c.String("http-access-log-file") httpAccessLogMaxSize := c.Int("http-access-log-max-size") httpAccessLogMaxBackups := c.Int("http-access-log-max-backups") httpAccessLogMaxAge := c.Int("http-access-log-max-age") @@ -88,60 +83,31 @@ func start(c *cli.Context) { var err error // Raft config - raftConfig := braft.DefaultConfig() - if nodeID != "" { - raftConfig.Config.LocalID = raft.ServerID(nodeID) - } - if raftDir != "" { - raftConfig.Path = raftDir - } - if retainSnapshotCount > 0 { - raftConfig.RetainSnapshotCount = retainSnapshotCount - } - if raftTimeout != "" { - if raftConfig.Timeout, err = time.ParseDuration(raftTimeout); err != nil { - fmt.Fprint(os.Stderr, errors.Wrap(err, "Failed to parse raft timeout")) - return - } + raftConfig := braft.DefaultRaftConfig() + raftConfig.Config.LocalID = raft.ServerID(nodeID) + raftConfig.Dir = raftDir + raftConfig.SnapshotCount = snapshotCount + raftConfig.Timeout, err = time.ParseDuration(raftTimeout) + if err != nil { + fmt.Fprint(os.Stderr, errors.Wrap(err, "Failed to parse raft timeout")) + return } // Store config - storeConfig := boltdb.DefaultConfig() - if storeDir != "" { - storeConfig.Path = storeDir - } + storeConfig := boltdb.DefaultStoreConfig() + storeConfig.Dir = storeDir // Index config - indexConfig := bleve.DefaultConfig() - if indexDir != "" { - indexConfig.Path = indexDir - } - if indexMapping != "" { - var imf *os.File - if imf, err = os.Open(indexMapping); err != nil { - fmt.Fprint(os.Stderr, errors.Wrap(err, "Failed to open index mapping")) - return - } - defer imf.Close() - - var imb []byte - if imb, err = ioutil.ReadAll(imf); err != nil { - fmt.Fprintln(os.Stderr, errors.Wrap(err, "Failed to read index mapping")) - return - } - - im := mapping.NewIndexMapping() - if err = json.Unmarshal(imb, &im); err != nil { - fmt.Fprintln(os.Stderr, errors.Wrap(err, "Failed to unmarshal index mapping")) + indexConfig := bleve.DefaultIndexConfig() + indexConfig.Dir = indexDir + indexConfig.IndexType = indexType + indexConfig.Kvstore = indexKvstore + if indexMappingFile != "" { + err = indexConfig.SetIndexMapping(indexMappingFile) + if err != nil { + fmt.Fprint(os.Stderr, errors.Wrap(err, "Failed to read index mapping file")) return } - indexConfig.IndexMapping = im - } - if indexType != "" { - indexConfig.IndexType = indexType - } - if indexKvstore != "" { - indexConfig.Kvstore = indexKvstore } // Create logger @@ -157,37 +123,36 @@ func start(c *cli.Context) { ) // Check bootstrap node - var bootstrap bool - bootstrap = peerGRPCAddr == "" || peerGRPCAddr == grpcAddr + bootstrap := peerGRPCAddr == "" || peerGRPCAddr == grpcAddr // Create Service - var svc *service.KVSService - if svc, err = service.NewKVSService(bindAddr, raftConfig, bootstrap, storeConfig, indexConfig); err != nil { + svc, err := service.NewKVSService(bindAddr, raftConfig, bootstrap, storeConfig, indexConfig) + if err != nil { fmt.Fprintln(os.Stderr, errors.Wrap(err, "Failed to create service")) return } svc.SetLogger(logger) // Start service - if err = svc.Start(); err != nil { + err = svc.Start() + defer svc.Stop() + if err != nil { fmt.Fprintln(os.Stderr, errors.Wrap(err, "Failed to start service")) return } - defer svc.Stop() // Create gRPC server - var grpcServer *grpcserver.GRPCServer - if grpcServer, err = grpcserver.NewGRPCServer(grpcAddr, svc); err != nil { + grpcServer, err := grpcserver.NewGRPCServer(grpcAddr, svc) + if err != nil { fmt.Fprintln(os.Stderr, errors.Wrap(err, "Failed to create gRPC Server")) return } - defer grpcServer.Stop() grpcServer.SetLogger(logger) - grpcServer.SetMaxSendMessageSize(maxSendMsgSize) - grpcServer.SetMaxReceiveMessageSize(maxRecvMsgSize) // Start gRPC server - if err = grpcServer.Start(); err != nil { + err = grpcServer.Start() + defer grpcServer.Stop() + if err != nil { fmt.Fprintln(os.Stderr, errors.Wrap(err, "Failed to start gRPC Server")) return } @@ -202,27 +167,25 @@ func start(c *cli.Context) { ) // Create HTTP server - var httpServer *httpserver.HTTPServer - if httpServer, err = httpserver.NewHTTPServer(httpAddr, grpcAddr); err != nil { + httpServer, err := httpserver.NewHTTPServer(httpAddr, grpcAddr) + if err != nil { fmt.Fprintln(os.Stderr, errors.Wrap(err, "Failed to initialize HTTP Server")) return } - defer httpServer.Stop() // Setup HTTP server httpServer.SetLogger(logger) httpServer.SetHTTPAccessLogger(httpAccessLogger) - httpServer.SetMaxSendMessageSize(maxSendMsgSize) - httpServer.SetMaxReceiveMessageSize(maxRecvMsgSize) // Start HTTP server - if err = httpServer.Start(); err != nil { + err = httpServer.Start() + defer httpServer.Stop() + if err != nil { fmt.Fprintln(os.Stderr, errors.Wrap(err, "Failed to start HTTP Server")) return } - var joinReq *protobuf.JoinRequest - joinReq = &protobuf.JoinRequest{ + joinReq := &protobuf.JoinRequest{ NodeId: nodeID, Address: bindAddr, Metadata: &protobuf.Metadata{ @@ -234,7 +197,8 @@ func start(c *cli.Context) { if bootstrap { // If node is bootstrap, put metadata into service. // Wait for leader detected - if _, err = svc.WaitForLeader(60 * time.Second); err != nil { + _, err = svc.WaitForLeader(60 * time.Second) + if err != nil { fmt.Fprintln(os.Stderr, errors.Wrap(err, "Failed to detect leader node")) return } @@ -243,12 +207,12 @@ func start(c *cli.Context) { svc.PutMetadata(joinReq) } else { // If node is not bootstrap, make the join request. - var grpcClient *client.GRPCClient - if grpcClient, err = client.NewGRPCClient(peerGRPCAddr, maxSendMsgSize, maxRecvMsgSize); err != nil { + grpcClient, err := client.NewGRPCClient(peerGRPCAddr) + defer grpcClient.Close() + if err != nil { fmt.Fprintln(os.Stderr, errors.New(err.Error())) return } - defer grpcClient.Close() grpcClient.Join(joinReq) } diff --git a/docker-compose.yml b/docker-compose.yml index 14974a6..074c6ed 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,7 +1,7 @@ version: '2' services: blast1: - image: mosuka/blast:v0.3.0 + image: mosuka/blast:latest restart: always ports: - 10000:10000 @@ -11,11 +11,11 @@ services: BLAST_BIND_ADDR: blast1:10000 BLAST_GRPC_ADDR: blast1:10001 BLAST_HTTP_ADDR: blast1:10002 - BLAST_NODE_ID: node1 + BLAST_RAFT_NODE_ID: node1 command: start blast2: - image: mosuka/blast:v0.3.0 + image: mosuka/blast:latest restart: always ports: - 11000:10000 @@ -25,12 +25,12 @@ services: BLAST_BIND_ADDR: blast2:10000 BLAST_GRPC_ADDR: blast2:10001 BLAST_HTTP_ADDR: blast2:10002 - BLAST_NODE_ID: node2 + BLAST_RAFT_NODE_ID: node2 BLAST_PEER_GRPC_ADDR: blast1:10001 command: start blast3: - image: mosuka/blast:v0.3.0 + image: mosuka/blast:latest restart: always ports: - 12000:10000 @@ -40,6 +40,6 @@ services: BLAST_BIND_ADDR: blast3:10000 BLAST_GRPC_ADDR: blast3:10001 BLAST_HTTP_ADDR: blast3:10002 - BLAST_NODE_ID: node3 + BLAST_RAFT_NODE_ID: node3 BLAST_PEER_GRPC_ADDR: blast1:10001 command: start diff --git a/etc/index_mapping.json b/etc/index_mapping.json new file mode 100644 index 0000000..d3d514f --- /dev/null +++ b/etc/index_mapping.json @@ -0,0 +1,2984 @@ +{ + "types": { + "arwiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_ar": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "ar", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "ar" + }, + "text_ar": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "ar", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "ar" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "ar" + }, + "bgwiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_bg": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "bg", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "bg" + }, + "text_bg": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "bg", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "bg" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "bg" + }, + "cawiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_ca": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "ca", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "ca" + }, + "text_ca": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "ca", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "ca" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "ca" + }, + "cswiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_cs": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "cs", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "cs" + }, + "text_cs": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "cs", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "cs" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "cs" + }, + "dawiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_da": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "da", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "da" + }, + "text_da": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "da", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "da" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "da" + }, + "dewiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_de": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "de", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "de" + }, + "text_de": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "de", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "de" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "de" + }, + "elwiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_el": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "el", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "el" + }, + "text_el": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "el", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "el" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "el" + }, + "enwiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_en": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "en", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "en" + }, + "text_en": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "en", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "en" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "en" + }, + "eswiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_es": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "es", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "es" + }, + "text_es": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "es", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "es" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "es" + }, + "fawiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_fa": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "fa", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "fa" + }, + "text_fa": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "fa", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "fa" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "fa" + }, + "fiwiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_fi": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "fi", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "fi" + }, + "text_fi": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "fi", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "fi" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "fi" + }, + "frwiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_fr": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "fr", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "fr" + }, + "text_fr": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "fr", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "fr" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "fr" + }, + "gawiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_ga": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "ga", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "ga" + }, + "text_ga": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "ga", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "ga" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "ga" + }, + "glwiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_gl": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "gl", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "gl" + }, + "text_gl": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "gl", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "gl" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "gl" + }, + "guwiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_gu": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "in", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "in" + }, + "text_gl": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "in", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "in" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "in" + }, + "hiwiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_hi": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "hi", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "hi" + }, + "text_hi": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "hi", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "hi" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "hi" + }, + "huwiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_hu": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "hu", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "hu" + }, + "text_hu": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "hu", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "hu" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "hu" + }, + "hywiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_hy": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "hy", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "hy" + }, + "text_hy": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "hy", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "hy" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "hy" + }, + "idwiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_id": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "id", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "id" + }, + "text_id": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "id", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "id" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "id" + }, + "itwiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_it": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "it", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "it" + }, + "text_it": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "it", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "it" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "it" + }, + "jawiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_ja": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "ja", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "ja" + }, + "text_ja": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "ja", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "ja" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "ja" + }, + "knwiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_kn": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "in", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "in" + }, + "text_kn": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "in", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "in" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "in" + }, + "kowiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_ko": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "cjk", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "cjk" + }, + "text_ko": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "cjk", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "cjk" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "cjk" + }, + "mlwiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_ml": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "in", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "in" + }, + "text_ml": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "in", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "in" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "in" + }, + "nlwiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_nl": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "nl", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "nl" + }, + "text_nl": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "nl", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "nl" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "nl" + }, + "nowiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_no": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "no", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "no" + }, + "text_no": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "no", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "no" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "no" + }, + "pswiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_ps": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "ckb", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "ckb" + }, + "text_ps": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "ckb", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "ckb" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "ckb" + }, + "ptwiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_pt": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "pt", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "pt" + }, + "text_pt": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "pt", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "pt" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "pt" + }, + "rowiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_ro": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "ro", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "ro" + }, + "text_ro": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "ro", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "ro" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "ro" + }, + "ruwiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_ru": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "ru", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "ru" + }, + "text_ru": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "ru", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "ru" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "ru" + }, + "svwiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_sv": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "sv", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "sv" + }, + "text_sv": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "sv", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "sv" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "sv" + }, + "tawiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_ta": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "in", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "in" + }, + "text_ta": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "in", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "in" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "in" + }, + "tewiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_te": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "in", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "in" + }, + "text_te": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "in", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "in" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "in" + }, + "thwiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_th": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "th", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "th" + }, + "text_th": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "th", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "th" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "th" + }, + "trwiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_tr": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "tr", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "tr" + }, + "text_tr": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "tr", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "tr" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "tr" + }, + "zhwiki": { + "enabled": true, + "dynamic": true, + "properties": { + "title_zh": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "cjk", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "cjk" + }, + "text_zh": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "cjk", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "cjk" + }, + "contributor": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + }, + "timestamp": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "datetime", + "store": true, + "index": true, + "include_in_all": true + } + ], + "default_analyzer": "" + }, + "_type": { + "enabled": true, + "dynamic": true, + "fields": [ + { + "type": "text", + "analyzer": "keyword", + "store": true, + "index": true, + "include_term_vectors": true, + "include_in_all": true + } + ], + "default_analyzer": "keyword" + } + }, + "default_analyzer": "cjk" + } + }, + "default_mapping": { + "enabled": true, + "dynamic": true, + "default_analyzer": "standard" + }, + "type_field": "_type", + "default_type": "_default", + "default_analyzer": "standard", + "default_datetime_parser": "dateTimeOptional", + "default_field": "_all", + "store_dynamic": true, + "index_dynamic": true, + "analysis": { + "analyzers": { + "bg": { + "type": "custom", + "char_filters": [], + "tokenizer": "unicode", + "token_filters": [ + "stop_bg" + ] + }, + "ca": { + "type": "custom", + "char_filters": [], + "tokenizer": "unicode", + "token_filters": [ + "stop_ca" + ] + }, + "cs": { + "type": "custom", + "char_filters": [], + "tokenizer": "unicode", + "token_filters": [ + "stop_cs" + ] + }, + "el": { + "type": "custom", + "char_filters": [], + "tokenizer": "unicode", + "token_filters": [ + "stop_el" + ] + }, + "ga": { + "type": "custom", + "char_filters": [], + "tokenizer": "unicode", + "token_filters": [ + "stop_ga" + ] + }, + "gl": { + "type": "custom", + "char_filters": [], + "tokenizer": "unicode", + "token_filters": [ + "stop_gl" + ] + }, + "in": { + "type": "custom", + "char_filters": [], + "tokenizer": "unicode", + "token_filters": [ + "normalize_in" + ] + }, + "hy": { + "type": "custom", + "char_filters": [], + "tokenizer": "unicode", + "token_filters": [ + "stop_hy" + ] + }, + "id": { + "type": "custom", + "char_filters": [], + "tokenizer": "unicode", + "token_filters": [ + "stop_id" + ] + } + }, + "char_filters": {}, + "tokenizers": {}, + "token_filters": {}, + "token_maps": { + "hw": { + "type": "custom", + "tokens": [ + "hello", + "there" + ] + } + } + } +} diff --git a/example/bulk_delete_request.json b/example/bulk_delete_request.json index b59289e..60cf55f 100644 --- a/example/bulk_delete_request.json +++ b/example/bulk_delete_request.json @@ -2,41 +2,217 @@ { "type": "DELETE", "document": { - "id": "enwiki_doc1", - "fields": { - "title_en": "Search engine (computing)", - "text_en": "A search engine is an information retrieval system designed to help find information stored on a computer system. The search results are usually presented in a list and are commonly called hits. Search engines help to minimize the time required to find information and the amount of information which must be consulted, akin to other techniques for managing information overload.", - "contributor": "unknown", - "timestamp": "2018-07-04T05:41:00Z", - "_type": "enwiki" - } + "id": "arwiki_1" } }, { "type": "DELETE", "document": { - "id": "ptwiki_doc1", - "fields": { - "title_pt": "Motor de busca", - "text_pt": "Motor de pesquisa (português europeu) ou ferramenta de busca (português brasileiro) ou buscador (em inglês: search engine) é um programa desenhado para procurar palavras-chave fornecidas pelo utilizador em documentos e bases de dados. No contexto da internet, um motor de pesquisa permite procurar palavras-chave em documentos alojados na world wide web, como aqueles que se encontram armazenados em websites.", - "contributor": "unknown", - "timestamp": "2018-07-04T05:41:00Z", - "_type": "ptwiki" - } - + "id": "bgwiki_1" } }, { "type": "DELETE", "document": { - "id": "jawiki_doc1", - "fields": { - "title_ja": "検索エンジン", - "text_ja": "検索エンジン(けんさくエンジン、英語: search engine)は、狭義にはインターネットに存在する情報(ウェブページ、ウェブサイト、画像ファイル、ネットニュースなど)を検索する機能およびそのプログラム。インターネットの普及初期には、検索としての機能のみを提供していたウェブサイトそのものを検索エンジンと呼んだが、現在では様々なサービスが加わったポータルサイト化が進んだため、検索をサービスの一つとして提供するウェブサイトを単に検索サイトと呼ぶことはなくなっている。広義には、インターネットに限定せず情報を検索するシステム全般を含む。", - "contributor": "unknown", - "timestamp": "2018-05-30T00:52:00Z", - "_type": "jawiki" - } + "id": "cawiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "cswiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "dawiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "dewiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "elwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "enwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "eswiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "fawiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "fiwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "frwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "gawiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "glwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "guwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "hiwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "huwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "hywiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "idwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "itwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "jawiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "knwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "kowiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "mlwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "nlwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "nowiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "pswiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "ptwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "rowiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "ruwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "svwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "tawiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "tewiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "thwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "trwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "zhwiki_1" } } -] \ No newline at end of file +] diff --git a/example/bulk_put_delete_request.json b/example/bulk_put_delete_request.json index 7ac5bbe..313af3e 100644 --- a/example/bulk_put_delete_request.json +++ b/example/bulk_put_delete_request.json @@ -2,10 +2,101 @@ { "type": "PUT", "document": { - "id": "enwiki_doc1", + "id": "arwiki_1", + "fields": { + "title_ar": "محرك بحث", + "text_ar": "محرك البحث (بالإنجليزية: Search engine) هو نظام لإسترجاع المعلومات صمم للمساعدة على البحث عن المعلومات المخزنة على أي نظام حاسوبي. تعرض نتائج البحث عادة على شكل قائمة لأماكن تواجد المعلومات ومرتبة وفق معايير معينة. تسمح محركات البحث باختصار مدة البحث والتغلب على مشكلة أحجام البيانات المتصاعدة (إغراق معلوماتي).", + "contributor": "unknown", + "timestamp": "2018-03-25T18:04:00Z", + "_type": "arwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "bgwiki_1", + "fields": { + "title_bg": "Търсачка", + "text_bg": "Търсачка или търсеща машина (на английски: Web search engine) е специализиран софтуер за извличане на информация, съхранена в компютърна система или мрежа. Това може да е персонален компютър, Интернет, корпоративна мрежа и т.н. Без допълнителни уточнения, най-често под търсачка се разбира уеб(-)търсачка, която търси в Интернет. Други видове търсачки са корпоративните търсачки, които търсят в интранет мрежите, личните търсачки – за индивидуалните компютри и мобилните търсачки. В търсачката потребителят (търсещият) прави запитване за съдържание, отговарящо на определен критерий (обикновено такъв, който съдържа определени думи и фрази). В резултат се получават списък от точки, които отговарят, пълно или частично, на този критерий. Търсачките обикновено използват редовно подновявани индекси, за да оперират бързо и ефикасно. Някои търсачки също търсят в информацията, която е на разположение в нюзгрупите и други големи бази данни. За разлика от Уеб директориите, които се поддържат от хора редактори, търсачките оперират алгоритмично. Повечето Интернет търсачки са притежавани от различни корпорации.", + "contributor": "unknown", + "timestamp": "2018-07-11T11:03:00Z", + "_type": "bgwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "cawiki_1", + "fields": { + "title_ca": "Motor de cerca", + "text_ca": "Un motor de cerca o de recerca o bé cercador és un programa informàtic dissenyat per ajudar a trobar informació emmagatzemada en un sistema informàtic com ara una xarxa, Internet, un servidor o un ordinador personal. L'objectiu principal és el de trobar altres programes informàtics, pàgines web i documents, entre d'altres. A partir d'una determinada paraula o paraules o una determinada frase l'usuari demana un contingut sota un criteri determinat i retorna una llista de referències que compleixin aquest criteri. El procés es realitza a través de les metadades, vies per comunicar informació que utilitzen els motors per cada cerca. Els índex que utilitzen els cercadors sempre estan actualitzats a través d'un robot web per generar rapidesa i eficàcia en la recerca. Els directoris, en canvi, són gestionats per editors humans.", + "contributor": "unknown", + "timestamp": "2018-07-09T18:07:00Z", + "_type": "cawiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "cswiki_1", + "fields": { + "title_cs": "Vyhledávač", + "text_cs": "Vyhledávač je počítačový systém či program, který umožňuje uživateli zadat nějaký libovolný nebo specifikovaný vyhledávaný výraz a získat z velkého objemu dat informace, které jsou v souladu s tímto dotazem. Jako vyhledávač se označují i ​​webové stránky, jejichž hlavní funkcí je poskytování takového systému či programu. Jako internetový vyhledávač se označuje buď vyhledávač, na který se přistupuje přes internet, nebo vyhledávač, jehož zdrojem vyhledávání je internet (tj. WWW, Usenet apod.). Jako online vyhledávač se označuje vyhledávač, při jehož výkonu činnosti dochází k výměně dat v rámci nějaké počítačové sítě, nejčastěji to je internetový vyhledávač. Fulltextový vyhledávač je vyhedávač, který vykonává fulltextové vyhledávání.", + "contributor": "unknown", + "timestamp": "2017-11-10T21:59:00Z", + "_type": "cswiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "dawiki_1", + "fields": { + "title_da": "Søgemaskine", + "text_da": "En søgemaskine er en applikation til at hjælpe en bruger med at finde information. Det kan f.eks. være at finde filer med bestemte data (f.eks. ord), gemt i en computers hukommelse, for eksempel via World Wide Web (kaldes så en websøgemaskine). Ofte bruges søgemaskine fejlagtigt om linkkataloger eller Netguider.", + "contributor": "unknown", + "timestamp": "2017-09-04T01:54:00Z", + "_type": "dawiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "dewiki_1", + "fields": { + "title_de": "Suchmaschine", + "text_de": "Eine Suchmaschine ist ein Programm zur Recherche von Dokumenten, die in einem Computer oder einem Computernetzwerk wie z. B. dem World Wide Web gespeichert sind. Internet-Suchmaschinen haben ihren Ursprung in Information-Retrieval-Systemen. Sie erstellen einen Schlüsselwort-Index für die Dokumentbasis, um Suchanfragen über Schlüsselwörter mit einer nach Relevanz geordneten Trefferliste zu beantworten. Nach Eingabe eines Suchbegriffs liefert eine Suchmaschine eine Liste von Verweisen auf möglicherweise relevante Dokumente, meistens dargestellt mit Titel und einem kurzen Auszug des jeweiligen Dokuments. Dabei können verschiedene Suchverfahren Anwendung finden.", + "contributor": "unknown", + "timestamp": "2017-09-04T01:54:00Z", + "_type": "dewiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "elwiki_1", + "fields": { + "title_el": "Μηχανή αναζήτησης", + "text_el": "Μια μηχανή αναζήτησης είναι μια εφαρμογή που επιτρέπει την αναζήτηση κειμένων και αρχείων στο Διαδίκτυο. Αποτελείται από ένα πρόγραμμα υπολογιστή που βρίσκεται σε έναν ή περισσότερους υπολογιστές στους οποίους δημιουργεί μια βάση δεδομένων με τις πληροφορίες που συλλέγει από το διαδίκτυο, και το διαδραστικό περιβάλλον που εμφανίζεται στον τελικό χρήστη ο οποίος χρησιμοποιεί την εφαρμογή από άλλον υπολογιστή συνδεδεμένο στο διαδίκτυο. Οι μηχανές αναζήτησης αποτελούνται από 3 είδη λογισμικού, το spider software, το index software και το query software.", + "contributor": "unknown", + "timestamp": "2017-11-21T19:57:00Z", + "_type": "elwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "enwiki_1", "fields": { "title_en": "Search engine (computing)", - "text_en": "A search engine is an information retrieval system designed to help find information stored on a computer system. The search results are usually presented in a list and are commonly called hits. Search engines help to minimize the time required to find information and the amount of information which must be consulted, akin to other techniques for managing information overload.", + "text_en": "A search engine is an information retrieval system designed to help find information stored on a computer system. The search results are usually presented in a list and are commonly called hits. Search engines help to minimize the time required to find information and the amount of information which must be consulted, akin to other techniques for managing information overload. The most public, visible form of a search engine is a Web search engine which searches for information on the World Wide Web.", "contributor": "unknown", "timestamp": "2018-07-04T05:41:00Z", "_type": "enwiki" @@ -15,24 +106,166 @@ { "type": "PUT", "document": { - "id": "ptwiki_doc1", + "id": "eswiki_1", "fields": { - "title_pt": "Motor de busca", - "text_pt": "Motor de pesquisa (português europeu) ou ferramenta de busca (português brasileiro) ou buscador (em inglês: search engine) é um programa desenhado para procurar palavras-chave fornecidas pelo utilizador em documentos e bases de dados. No contexto da internet, um motor de pesquisa permite procurar palavras-chave em documentos alojados na world wide web, como aqueles que se encontram armazenados em websites.", + "title_es": "Motor de búsqueda", + "text_es": "Un motor de búsqueda o buscador es un sistema informático que busca archivos almacenados en servidores web gracias a su spider (también llamado araña web). Un ejemplo son los buscadores de Internet (algunos buscan únicamente en la web, pero otros lo hacen además en noticias, servicios como Gopher, FTP, etc.) cuando se pide información sobre algún tema. Las búsquedas se hacen con palabras clave o con árboles jerárquicos por temas; el resultado de la búsqueda «Página de resultados del buscador» es un listado de direcciones web en los que se mencionan temas relacionados con las palabras clave buscadas. Como operan de forma automática, los motores de búsqueda contienen generalmente más información que los directorios. Sin embargo, estos últimos también han de construirse a partir de búsquedas (no automatizadas) o bien a partir de avisos dados por los creadores de páginas.", "contributor": "unknown", - "timestamp": "2018-07-04T05:41:00Z", - "_type": "ptwiki" + "timestamp": "2018-08-30T11:30:00Z", + "_type": "eswiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "fawiki_1", + "fields": { + "title_fa": "موتور جستجو (پردازش)", + "text_fa": "موتور جستجو یا جویشگر، در فرهنگ رایانه، به طور عمومی به برنامه‌ای گفته می‌شود که کلمات کلیدی را در یک سند یا بانک اطلاعاتی جستجو می‌کند. در اینترنت به برنامه‌ای گفته می‌شود که کلمات کلیدی موجود در فایل‌ها و سندهای وب جهانی، گروه‌های خبری، منوهای گوفر و آرشیوهای FTP را جستجو می‌کند. جویشگرهای زیادی وجود دارند که امروزه از معروفترین و پراستفاده‌ترین آنها می‌توان به google و یاهو! جستجو اشاره کرد.", + "contributor": "unknown", + "timestamp": "2017-01-06T02:46:00Z", + "_type": "fawiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "fiwiki_1", + "fields": { + "title_fi": "Hakukone", + "text_fi": "Hakukone on web-pohjainen ohjelma, joka etsii jatkuvasti Internetistä (varsinkin Webistä) uusia sivuja eritellen ja liittäen ne hakemistoonsa erityisten hakusanojen mukaan. Näitä hyväksi käyttäen hakukone tulostaa käyttäjän syöttämiä hakusanoja lähimpänä olevat sivut. Analysointi tapahtuu käytännössä eri hakukoneissa erilaisilla menetelmillä.", + "contributor": "unknown", + "timestamp": "2017-10-04T14:33:00Z", + "_type": "fiwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "frwiki_1", + "fields": { + "title_fr": "Moteur de recherche", + "text_fr": "Un moteur de recherche est une application web permettant de trouver des ressources à partir d'une requête sous forme de mots. Les ressources peuvent être des pages web, des articles de forums Usenet, des images, des vidéos, des fichiers, etc. Certains sites web offrent un moteur de recherche comme principale fonctionnalité ; on appelle alors « moteur de recherche » le site lui-même. Ce sont des instruments de recherche sur le web sans intervention humaine, ce qui les distingue des annuaires. Ils sont basés sur des « robots », encore appelés « bots », « spiders «, « crawlers » ou « agents », qui parcourent les sites à intervalles réguliers et de façon automatique pour découvrir de nouvelles adresses (URL). Ils suivent les liens hypertextes qui relient les pages les unes aux autres, les uns après les autres. Chaque page identifiée est alors indexée dans une base de données, accessible ensuite par les internautes à partir de mots-clés. C'est par abus de langage qu'on appelle également « moteurs de recherche » des sites web proposant des annuaires de sites web : dans ce cas, ce sont des instruments de recherche élaborés par des personnes qui répertorient et classifient des sites web jugés dignes d'intérêt, et non des robots d'indexation. Les moteurs de recherche ne s'appliquent pas qu'à Internet : certains moteurs sont des logiciels installés sur un ordinateur personnel. Ce sont des moteurs dits « de bureau » qui combinent la recherche parmi les fichiers stockés sur le PC et la recherche parmi les sites Web — on peut citer par exemple Exalead Desktop, Google Desktop et Copernic Desktop Search, Windex Server, etc. On trouve également des métamoteurs, c'est-à-dire des sites web où une même recherche est lancée simultanément sur plusieurs moteurs de recherche, les résultats étant ensuite fusionnés pour être présentés à l'internaute. On peut citer dans cette catégorie Ixquick, Mamma, Kartoo, Framabee ou Lilo.", + "contributor": "unknown", + "timestamp": "2018-05-30T15:15:00Z", + "_type": "frwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "gawiki_1", + "fields": { + "title_ga": "Inneall cuardaigh", + "text_ga": "Acmhainn ar an ngréasán domhanda atá insroichte le brabhsálaí Gréasáin, a chabhraíonn leis an úsáideoir ionaid is eolas a aimsiú. Bíonn na hinnill cuardaigh (Yahoo, Lycos, Google, Ask Jeeves) ag cuardach tríd an ngréasán an t-am ar fad, ag tógáil innéacsanna ábhar éagsúla — mar shampla, ag aimsiú teidil, fotheidil, eochairfhocail is céadlínte cáipéisí. Uaidh sin, is féidir cuid mhaith cáipéisí éagsúla ar ábhar ar leith a aisghabháil. Déanann an cuardach leanúnach cinnte de go bhfuil na hinnéacsanna suas chun dáta. Mar sin féin, aisghabhann na hinnill an-chuid cháipéisí nach mbaineann le hábhar, agus tá an-iarracht ar siúl an t-am ar fad iad a fheabhsú.", + "contributor": "unknown", + "timestamp": "2013-10-27T18:17:00Z", + "_type": "gawiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "glwiki_1", + "fields": { + "title_gl": "Motor de busca", + "text_gl": "Un motor de busca ou buscador é un sistema informático que procura arquivos almacenados en servidores web, un exemplo son os buscadores de internet (algúns buscan só na Web pero outros buscan ademais en News, Gopher, FTP etc.) cando lles pedimos información sobre algún tema. As procuras fanse con palabras clave ou con árbores xerárquicas por temas; o resultado da procura é unha listaxe de direccións Web nas que se mencionan temas relacionados coas palabras clave buscadas.", + "contributor": "unknown", + "timestamp": "2016-10-31T13:33:00Z", + "_type": "glwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "guwiki_1", + "fields": { + "title_gu": "વેબ શોધ એન્જીન", + "text_gu": "વેબ શોધ એન્જિન એ વર્લ્ડ વાઈડ વેબ (World Wide Web) પર વિવિધ માહિતી શોધવા માટે ઉપયોગમાં લેવામાં આવે છે. શોધ લીસ્ટને સામાન્ય રીતે યાદીમાં દર્શાવવામાં આવે છે અને જેને સામાન્ય રીતે હીટ્સ કહેવામાં આવે છે. જે માહિતી મળે છે તેમાં વેબ પૃષ્ઠ (web page), છબીઓ, માહિતી અને અન્ય પ્રકારની ફાઈલો હોય છે. કેટલાક શોધ એન્જિનો ન્યુઝબુક, ડેટાબેઝ અને અન્ય પ્રકારની ઓપન ડીરેક્ટરી (open directories)ઓની વિગતો પણ આપે છે. વ્યકિતઓ દ્વારા દુરસ્ત થતી વેબ ડાયરેક્ટરીઝ (Web directories)થી અલગ રીતે, શોધ એન્જિન ઍલ્ગરિધમનો અથવા ઍલ્ગરિધમ (algorithmic) અને માનવીય બાબતોના મિક્ષણનો ઉપયોગ કરે છે.", + "contributor": "unknown", + "timestamp": "2013-04-04T19:28:00Z", + "_type": "guwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "hiwiki_1", + "fields": { + "title_hi": "खोज इंजन", + "text_hi": "ऐसे कम्प्यूटर प्रोग्राम खोजी इंजन (search engine) कहलाते हैं जो किसी कम्प्यूटर सिस्टम पर भण्डारित सूचना में से वांछित सूचना को ढूढ निकालते हैं। ये इंजन प्राप्त परिणामों को प्रायः एक सूची के रूप में प्रस्तुत करते हैं जिससे वांछित सूचना की प्रकृति और उसकी स्थिति का पता चलता है। खोजी इंजन किसी सूचना तक अपेक्षाकृत बहुत कम समय में पहुँचने में हमारी सहायता करते हैं। वे 'सूचना ओवरलोड' से भी हमे बचाते हैं। खोजी इंजन का सबसे प्रचलित रूप 'वेब खोजी इंजन' है जो वर्ल्ड वाइड वेब पर सूचना खोजने के लिये प्रयुक्त होता है।", + "contributor": "unknown", + "timestamp": "2017-10-19T20:09:00Z", + "_type": "hiwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "huwiki_1", + "fields": { + "title_hu": "Keresőmotor", + "text_hu": "A keresőmotor az informatikában egy program vagy alkalmazás, amely bizonyos feltételeknek (többnyire egy szónak vagy kifejezésnek) megfelelő információkat keres valamilyen számítógépes környezetben. Ez a cikk a World Wide Weben (és esetleg az internet más részein, például a Useneten) kereső alkalmazásokról szól, a keresőmotor kifejezés önmagában általában ezekre vonatkozik. Másfajta keresőmotorokra példák a vállalati keresőmotorok, amik egy intraneten, és a személyi keresőmotorok, amik egy személyi számítógép állományai között keresnek.", + "contributor": "unknown", + "timestamp": "2018-05-15T20:40:00Z", + "_type": "huwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "hywiki_1", + "fields": { + "title_hy": "Որոնողական համակարգ", + "text_hy": "Որոնողական համակարգը գործիք է, որը նախատեսված է համապատասխան բառերով Համաշխարհային ցանցում որոնումներ կատարելու համար։ Ստեղծված է համացանցում և FTP սերվերներում ինֆորմացիա փնտրելու համար։ Փնտրված արդյունքները ընդհանրապես ներկայացվում են արդյունքների ցանկում և սովորաբար կոչվում են նպատակակակետ, հիթ։ Ինֆորմացիան կարող է բաղկացած լինել վեբ էջերից, նկարներից, ինֆորմացիաներից և այլ տիպի ֆայլերից ու տվյալներից։ Այն կարող է օգտագործվել տարբեր տեսակի տեղեկատվություն որոնելու համար, ներառյալ՝ կայքեր, ֆորումներ, նկարներ, վիդեոներ, ֆայլեր և այլն։ Որոշ կայքեր արդեն իրենցից ներկայացնում են ինչ-որ որոնողական համակարգ, օրինակ՝ Dailymotion, YouTube և Google Videos ինտերնետում տեղադրված տեսահոլովակների որոնողական կայքեր են։ Որոնողական կայքը բաղկացած է \"ռոբոտներից\", որոնց անվանում են նաև bot, spider, crawler, որոնք ավտոմատ կերպով, առանց մարդկային միջամտության պարբերաբար հետազոտում են կայքերը։ Որոնողական կայքերը հետևում են հղումներին, որոնք կապված լինելով իրար հետ ինդեքսավորում է յուրաքանչյուր էջ տվյալների բազայում՝ հետագայում բանալի բառերի օգնությամբ դառնալով հասանելի ինտերնետից օգտվողների համար։ Սխալմամբ, որոնողական կայքեր են անվանում նաև այն կայքերը, որոնք իրենցից ներկայացնում են կայքային տեղեկատուներ։ Այս կայքերում ուշադրության արժանի կայքերը ցուցակագրվում և դասակարգվում են մարդկային ռեսուրսների շնորհիվ, այլ ոչ թե բոտերի կամ ռոբետների միջոցով։ Այդ կայքերից կարելի է նշել օրինակ՝ Yahoo!։ Yahoo!-ի որոնողական կայքը գտնվում է այստեղ։ Բոլոր որոնողական համակարգերը նախատեսված են ինտերնետում որոնում իրականացնելու համար, սակայն կան որոշ որոնողական համակարգերի տարատեսակներ, որոնք համակարգչային ծրագրեր են և հետևաբար տեղակայվում են համակարգչի մեջ։ Այս համակարգերը կոչվում են desktop։ Վերջիներս հնարավորություն են տալիս որոնելու թե համակարգչի մեջ կուտակված ֆայլեը, թե կայքերում տեղադրված ռեսուրսները։ Այդ ծրագրերից ամենահայտնիներն են՝ Exalead Desktop, Copernic Desktop Search Գոյություն ունեն նաև մետա-որոնողական համակարգեր, այսինքն կայքեր, որ նույն որոնումը կատարում են միաժամանակ տարբեր որոնողական կայքերի միջնորդությամբ։ Որոնման արդյունքները հետո դասակարգվում են որպեսզի ներկայացվեն օգտագործողին։ Մետա-որոնողական համակարգերի շարքից կարելի է թվարկել օրինակ՝ Mamma և Kartoo։", + "contributor": "unknown", + "timestamp": "2017-11-20T17:47:00Z", + "_type": "hywiki" } - } }, { "type": "PUT", "document": { - "id": "jawiki_doc1", + "id": "idwiki_1", + "fields": { + "title_id": "Mesin pencari web", + "text_id": "Mesin pencari web atau mesin telusur web (bahasa Inggris: web search engine) adalah program komputer yang dirancang untuk melakukan pencarian atas berkas-berkas yang tersimpan dalam layanan www, ftp, publikasi milis, ataupun news group dalam sebuah ataupun sejumlah komputer peladen dalam suatu jaringan. Mesin pencari merupakan perangkat penelusur informasi dari dokumen-dokumen yang tersedia. Hasil pencarian umumnya ditampilkan dalam bentuk daftar yang seringkali diurutkan menurut tingkat akurasi ataupun rasio pengunjung atas suatu berkas yang disebut sebagai hits. Informasi yang menjadi target pencarian bisa terdapat dalam berbagai macam jenis berkas seperti halaman situs web, gambar, ataupun jenis-jenis berkas lainnya. Beberapa mesin pencari juga diketahui melakukan pengumpulan informasi atas data yang tersimpan dalam suatu basis data ataupun direktori web. Sebagian besar mesin pencari dijalankan oleh perusahaan swasta yang menggunakan algoritme kepemilikan dan basis data tertutup, di antaranya yang paling populer adalah safari Google (MSN Search dan Yahoo!). Telah ada beberapa upaya menciptakan mesin pencari dengan sumber terbuka (open source), contohnya adalah Htdig, Nutch, Egothor dan OpenFTS.", + "contributor": "unknown", + "timestamp": "2017-11-20T17:47:00Z", + "_type": "idwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "itwiki_1", + "fields": { + "title_it": "Motore di ricerca", + "text_it": "Nell'ambito delle tecnologie di Internet, un motore di ricerca (in inglese search engine) è un sistema automatico che, su richiesta, analizza un insieme di dati (spesso da esso stesso raccolti) e restituisce un indice dei contenuti disponibili[1] classificandoli in modo automatico in base a formule statistico-matematiche che ne indichino il grado di rilevanza data una determinata chiave di ricerca. Uno dei campi in cui i motori di ricerca trovano maggiore utilizzo è quello dell'information retrieval e nel web. I motori di ricerca più utilizzati nel 2017 sono stati: Google, Bing, Baidu, Qwant, Yandex, Ecosia, DuckDuckGo.", + "contributor": "unknown", + "timestamp": "2018-07-16T12:20:00Z", + "_type": "itwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "jawiki_1", "fields": { "title_ja": "検索エンジン", - "text_ja": "検索エンジン(けんさくエンジン、英語: search engine)は、狭義にはインターネットに存在する情報(ウェブページ、ウェブサイト、画像ファイル、ネットニュースなど)を検索する機能およびそのプログラム。インターネットの普及初期には、検索としての機能のみを提供していたウェブサイトそのものを検索エンジンと呼んだが、現在では様々なサービスが加わったポータルサイト化が進んだため、検索をサービスの一つとして提供するウェブサイトを単に検索サイトと呼ぶことはなくなっている。広義には、インターネットに限定せず情報を検索するシステム全般を含む。", + "text_ja": "検索エンジン(けんさくエンジン、英語: search engine)は、狭義にはインターネットに存在する情報(ウェブページ、ウェブサイト、画像ファイル、ネットニュースなど)を検索する機能およびそのプログラム。インターネットの普及初期には、検索としての機能のみを提供していたウェブサイトそのものを検索エンジンと呼んだが、現在では様々なサービスが加わったポータルサイト化が進んだため、検索をサービスの一つとして提供するウェブサイトを単に検索サイトと呼ぶことはなくなっている。広義には、インターネットに限定せず情報を検索するシステム全般を含む。狭義の検索エンジンは、ロボット型検索エンジン、ディレクトリ型検索エンジン、メタ検索エンジンなどに分類される。広義の検索エンジンとしては、ある特定のウェブサイト内に登録されているテキスト情報の全文検索機能を備えたソフトウェア(全文検索システム)等がある。検索エンジンは、検索窓と呼ばれるボックスにキーワードを入力して検索をかけるもので、全文検索が可能なものと不可能なものとがある。検索サイトを一般に「検索エンジン」と呼ぶことはあるが、厳密には検索サイト自体は検索エンジンでない。", "contributor": "unknown", "timestamp": "2018-05-30T00:52:00Z", "_type": "jawiki" @@ -40,30 +273,414 @@ } }, { - "type": "DELETE", + "type": "PUT", + "document": { + "id": "knwiki_1", + "fields": { + "title_kn": "ಅಂತರ್ಜಾಲ ಹುಡುಕಾಟ ಯಂತ್ರ", + "text_kn": "ಅಂತರ್ಜಾಲ ಹುಡುಕಾಟ ಯಂತ್ರ ಎಂದರೆ World Wide Webನಲ್ಲಿ ಮಾಹಿತಿ ಹುಡುಕುವುದಕ್ಕಾಗಿ ವಿನ್ಯಾಸಗೊಳಿಸಲಾದ ಒಂದು ಸಾಧನ. ಹುಡುಕಾಟದ ಫಲಿತಾಂಶಗಳನ್ನು ಸಾಮಾನ್ಯವಾಗಿ ಒಂದು ಪಟ್ಟಿಯ ರೂಪದಲ್ಲಿ ಪ್ರಸ್ತುತಪಡಿಸಲಾಗುತ್ತದೆ ಮತ್ತು ಇವನ್ನು ’ಹಿಟ್ಸ್’ ಎಂದು ಕರೆಯಲಾಗುತ್ತದೆ. ಈ ಮಾಹಿತಿಯು ಅನೇಕ ಜಾಲ ಪುಟಗಳು, ಚಿತ್ರಗಳು, ಮಾಹಿತಿ ಹಾಗೂ ಇತರೆ ಕಡತಗಳನ್ನು ಹೊಂದಿರಬಹುದು. ಕೆಲವು ಹುಡುಕಾಟ ಯಂತ್ರಗಳು ಬೇರೆ ದತ್ತಸಂಚಯಗಳು ಅಥವಾ ಮುಕ್ತ ಮಾಹಿತಿ ಸೂಚಿಗಳಿಂದ ದತ್ತಾಂಶಗಳ ಗಣಿಗಾರಿಕೆ ಮಾಡಿ ಹೊರತೆಗೆಯುತ್ತವೆ. ಜಾಲ ಮಾಹಿತಿಸೂಚಿಗಳನ್ನು ಸಂಬಂಧಿಸಿದ ಸಂಪಾದಕರು ನಿರ್ವಹಿಸಿದರೆ, ಹುಡುಕಾಟ ಯಂತ್ರಗಳು ಗಣನಪದ್ಧತಿಯ ಮೂಲಕ ಅಥವಾ ಗಣನಪದ್ಧತಿ ಮತ್ತು ಮಾನವ ಹೂಡುವಳಿಯ ಮಿಶ್ರಣದ ಮುಖಾಂತರ ಕಾರ್ಯನಿರ್ವಹಿಸುತ್ತವೆ.", + "contributor": "unknown", + "timestamp": "2017-10-03T14:13:00Z", + "_type": "knwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "kowiki_1", + "fields": { + "title_cjk": "검색 엔진", + "text_cjk": "검색 엔진은 컴퓨터 시스템에 저장된 정보를 찾아주는 것을 도와주도록 설계된 정보 검색 시스템이다. 이러한 검색 결과는 목록으로 표현되는 것이 보통이다. 검색 엔진을 사용하면 정보를 찾는데 필요한 시간을 최소화할 수 있다. 가장 눈에 띄는 형태의 공용 검색 엔진으로는 웹 검색 엔진이 있으며 월드 와이드 웹에서 정보를 찾아준다.", + "contributor": "unknown", + "timestamp": "2017-11-19T12:50:00Z", + "_type": "kowiki" + } + } + }, + { + "type": "PUT", "document": { - "id": "ptwiki_doc1", + "id": "mlwiki_1", + "fields": { + "title_ml": "വെബ് സെർച്ച് എഞ്ചിൻ", + "text_ml": "വേൾഡ് വൈഡ് വെബ്ബിലുള്ള വിവരങ്ങൾ തിരയാനുള്ള ഒരു ഉപാധിയാണ്‌ വെബ് സെർച്ച് എഞ്ചിൻ അഥവാ സെർച്ച് എഞ്ചിൻ. തിരച്ചിൽ ഫലങ്ങൾ സാധാരണായായി ഒരു പട്ടികയായി നൽകുന്നു, തിരച്ചിൽ ഫലങ്ങളെ ഹിറ്റുകൾ എന്നാണ്‌ വിളിച്ചുവരുന്നത്[അവലംബം ആവശ്യമാണ്]. തിരച്ചിൽ ഫലങ്ങളിൽ വെബ് പേജുകൾ, ചിത്രങ്ങൾ, വിവരങ്ങൾ, വെബ്ബിലുള്ള മറ്റ് ഫയൽ തരങ്ങൾ എന്നിവ ഉൾപ്പെടാം. അൽഗോരിതങ്ങൾ ഉപയോഗിച്ചാണ് സെർച്ച് എഞ്ചിനുകൾ പ്രവർത്തിക്കുന്നത്.", + "contributor": "unknown", + "timestamp": "2010-05-05T15:06:00Z", + "_type": "mlwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "nlwiki_1", + "fields": { + "title_nl": "Zoekmachine", + "text_nl": "Een zoekmachine is een computerprogramma waarmee informatie kan worden gezocht in een bepaalde collectie; dit kan een bibliotheek, het internet, of een persoonlijke verzameling zijn. Zonder nadere aanduiding wordt meestal een webdienst bedoeld waarmee met behulp van vrije trefwoorden volledige tekst (full text) kan worden gezocht in het gehele wereldwijde web. In tegenstelling tot startpagina's of webgidsen is er geen of zeer weinig menselijke tussenkomst nodig; het bezoeken van de webpagina's en het sorteren van de rangschikkingen gebeurt met behulp van een algoritme. Google is wereldwijd de meest gebruikte zoekmachine, andere populaire zoekmachines zijn Yahoo!, Bing en Baidu.", + "contributor": "unknown", + "timestamp": "2018-05-07T11:05:00Z", + "_type": "nlwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "nowiki_1", + "fields": { + "title_no": "Søkemotor", + "text_no": "En søkemotor er en type programvare som leter frem informasjon fra Internett (nettsider eller andre nettressurser) eller begrenset til et datasystem, der informasjonen samsvarer med et gitt søk, og rangerer treffene etter hva den oppfatter som mest relevant. Typisk ligger søkemotoren tilgjengelig som et nettsted, der brukeren legger inn søkeord ev. sammen med filterinnstillinger, og treffene vises gjerne som klikkbare lenker. Søkemotoren kan enten gjøre søk på hele Internett (for eksempel Google, Bing, Kvasir og Yahoo!), innenfor et bestemt nettsted (for eksempel søk innenfor VGs nettavis), eller innenfor et bestemt tema (f.eks. Kelkoo, som søker etter priser på produkter, og Picsearch, som søker etter bilder). En bedrift kan også sette opp en intern bedrifts-søkemotor for å få enklere tilgang til alle dokumenter og databaser i bedriften.", + "contributor": "unknown", + "timestamp": "2018-02-05T14:15:00Z", + "_type": "nowiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "pswiki_1", + "fields": { + "title_ps": "انټرنټ لټوونکی ماشين", + "text_ps": "نټرنټ د معلوماتو يوه داسې پراخه نړۍ ده چې يوه پوله هم نه لري. هره ثانيه په زرگونو معلوماتي توکي په کې ورځای کېږي، خو بيا هم د ډکېدو کومه اندېښنه نه رامنځته کېږي. حيرانوونکې خبره بيا دا ده چې دغه ټول معلومات په داسې مهارت سره په دغه نړۍ کې ځای شوي دي، چې سړی يې د سترگو په رپ کې د نړۍ په هر گوټ کې ترلاسه کولای شي. د کيبورډ په يو دوو تڼيو زور کولو او د موږك په يو دوو کليکونو سره خپلو ټولو پوښتنو ته ځواب موندلای شئ. ټول معلومات په ځانگړو انټرنټ پاڼو کې خوندي وي، نو که سړي ته د يوې پاڼې پته معلومه وي نو سم له لاسه به دغه پاڼه د انټرنټ پاڼو په کتونکي پروگرام کې پرانيزي، خو که سړی بيا يو معلومات غواړي او د هغې پاڼې پته ورسره نه وي، چې دغه ځانگړي معلومات په كې ځای شوي دي، نو بيا سړی يوه داسې پياوړي ځواک ته اړتيا لري، چې د سترگو په رپ کې ټول انټرنټ چاڼ کړي او دغه ځانگړي معلومات راوباسي. له نېکه مرغه د دغه ځواک غم خوړل شوی دی او ډېرInternet Search Engine انټرنټ لټوونکي ماشينونه جوړ کړای شوي دي، چې په وړيا توگه ټول انټرنټ تر ثانيو هم په لږ وخت کې چاڼ کوي او زموږ د خوښې معلومات راښکاره کوي. دغو ماشينونو ته سړی يوه ځانگړې کليمه ورکوي او هغوی ټول انټرنټ په دغې وركړل شوې کلمې پسې لټوي او هر دقيق معلومات چې لاسته ورځي، نو د کمپيوټر پر پرده يې راښکاره کوي. د دغو ماشينونو په ډله کې يو پياوړی ماشين د Google په نوم دی. د نوموړي ماشين بنسټ په ١٩٩٨م کال کې د متحدو ايالاتو د Standford پوهنتون دوو محصلينو Larry Page او Sergey Brin کښېښود. د دغه ماشين خدمات سړی د www.google.com په انټرنټ پاڼه کې کارولای شي. نوموړی ماشين د نړۍ په گڼ شمېر ژبو باندې خدمات وړاندې کوي او داسې چټک او دقيق لټون کوي چې د انټرنټ نور ډېر غښتلي ماشينونه ورته گوته پر غاښ پاتې دي. گوگل په ټوله نړۍ کې کارول کېږي او تر نيمي ثانيي هم په لنډ وخت کې په ميليارډونو انټرنټ پاڼې چاڼ کوي او خپلو کاروونکو ته په پرتله ييزه توگه دقيق معلومات راباسي. گوگل په يوه ورځ کې څه كمُ ٢٠٠ ميليونه پوښتنې ځوابوي. دا ( گوگل) تورى خپله د يو امريکايي رياضيپوه د وراره له خوا په لومړي ځل د يوې لوبې لپاره کارول شوی و. هغه دغه تورى د يو سلو صفرونو ( 1000?.) غوندې لوی عدد ته د نوم په توگه کاراوه. دغه نوم د نوموړي شرکت د دغه توان ښكارندوى دى، چې په لنډ وخت کې په لويه کچه پوښتنو ته ځواب ورکوي او معلومات لټوي. سړی چې د گوگل چټکتيا او دقيقوالي ته ځير شي، نو دا پوښته راپورته کېږي چې د دې ماشين شا ته به څومره پرمختللي کمپيوټرونه او پياوړی تخنيک پټ وي. خو اصلاً د گوگل شا ته په يوه لوی جال کې د منځنۍ بيې کمپيوټرونه سره نښلول شوي دي . په دې توگه په زرگونو کمپيوټرونه هممهاله په کار بوخت وي، چې په ترڅ کې يې د معلوماتو لټول او چاڼ کول چټکتيا مومي. د يوې پوښتنې له اخيستلو څخه راواخله معلوماتو تر لټولو او بيا د دقيقوالي له مخې په يوه ځانگړي طرز بېرته کاروونکي يا پوښتونكي تر ښوولو پورې ټولې چارې د درېيو Software پروگرامونه په لاس کې دي، چې په دغه زرگونو کمپيوټرونو کې ځای پر ځای شوي دي.", + "contributor": "unknown", + "timestamp": "2015-12-15T18:53:00Z", + "_type": "pswiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "ptwiki_1", "fields": { "title_pt": "Motor de busca", - "text_pt": "Motor de pesquisa (português europeu) ou ferramenta de busca (português brasileiro) ou buscador (em inglês: search engine) é um programa desenhado para procurar palavras-chave fornecidas pelo utilizador em documentos e bases de dados. No contexto da internet, um motor de pesquisa permite procurar palavras-chave em documentos alojados na world wide web, como aqueles que se encontram armazenados em websites.", + "text_pt": "Motor de pesquisa (português europeu) ou ferramenta de busca (português brasileiro) ou buscador (em inglês: search engine) é um programa desenhado para procurar palavras-chave fornecidas pelo utilizador em documentos e bases de dados. No contexto da internet, um motor de pesquisa permite procurar palavras-chave em documentos alojados na world wide web, como aqueles que se encontram armazenados em websites. Os motores de busca surgiram logo após o aparecimento da Internet, com a intenção de prestar um serviço extremamente importante: a busca de qualquer informação na rede, apresentando os resultados de uma forma organizada, e também com a proposta de fazer isto de uma maneira rápida e eficiente. A partir deste preceito básico, diversas empresas se desenvolveram, chegando algumas a valer milhões de dólares. Entre as maiores empresas encontram-se o Google, o Yahoo, o Bing, o Lycos, o Cadê e, mais recentemente, a Amazon.com com o seu mecanismo de busca A9 porém inativo. Os buscadores se mostraram imprescindíveis para o fluxo de acesso e a conquista novos visitantes. Antes do advento da Web, havia sistemas para outros protocolos ou usos, como o Archie para sites FTP anônimos e o Veronica para o Gopher (protocolo de redes de computadores que foi desenhado para indexar repositórios de documentos na Internet, baseado-se em menus).", "contributor": "unknown", - "timestamp": "2018-07-04T05:41:00Z", + "timestamp": "2017-11-09T14:38:00Z", "_type": "ptwiki" } - } }, { - "type": "DELETE", + "type": "PUT", "document": { - "id": "jawiki_doc1", + "id": "rowiki_1", "fields": { - "title_ja": "検索エンジン", - "text_ja": "検索エンジン(けんさくエンジン、英語: search engine)は、狭義にはインターネットに存在する情報(ウェブページ、ウェブサイト、画像ファイル、ネットニュースなど)を検索する機能およびそのプログラム。インターネットの普及初期には、検索としての機能のみを提供していたウェブサイトそのものを検索エンジンと呼んだが、現在では様々なサービスが加わったポータルサイト化が進んだため、検索をサービスの一つとして提供するウェブサイトを単に検索サイトと呼ぶことはなくなっている。広義には、インターネットに限定せず情報を検索するシステム全般を含む。", + "title_ro": "Motor de căutare", + "text_ro": "Un motor de căutare este un program apelabil căutător, care accesează Internetul în mod automat și frecvent și care stochează titlul, cuvinte cheie și, parțial, chiar conținutul paginilor web într-o bază de date. În momentul în care un utilizator apelează la un motor de căutare pentru a găsi o informație, o anumită frază sau un cuvânt, motorul de căutare se va uita în această bază de date și, în funcție de anumite criterii de prioritate, va crea și afișa o listă de rezultate (engleză: hit list ).", "contributor": "unknown", - "timestamp": "2018-05-30T00:52:00Z", - "_type": "jawiki" + "timestamp": "2018-06-12T08:59:00Z", + "_type": "rowiki" } } + }, + { + "type": "PUT", + "document": { + "id": "ruwiki_1", + "fields": { + "title_ru": "Поисковая машина", + "text_ru": "Поисковая машина (поиско́вый движо́к) — комплекс программ, предназначенный для поиска информации. Обычно является частью поисковой системы. Основными критериями качества работы поисковой машины являются релевантность (степень соответствия запроса и найденного, т.е. уместность результата), полнота индекса, учёт морфологии языка.", + "contributor": "unknown", + "timestamp": "2017-03-22T01:16:00Z", + "_type": "ruwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "svwiki_1", + "fields": { + "title_sv": "Söktjänst", + "text_sv": "En söktjänst är en webbplats som gör det möjligt att söka efter innehåll på Internet. Söktjänsterna använder sökmotorer, även kallade sökrobotar, för att upptäcka, hämta in och indexera webbsidor.", + "contributor": "unknown", + "timestamp": "2018-08-16T22:13:00Z", + "_type": "svwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "tawiki_1", + "fields": { + "title_ta": "தேடுபொறி", + "text_ta": "தேடுபொறி அல்லது தேடற்பொறி என்பது ஒரு கணினி நிரலாகும். இது இணையத்தில் குவிந்து கிடக்கும் தகவல்களில் இருந்தோ கணினியில் இருக்கும் தகவல்களில் இருந்தோ நமக்குத் தேவையான தகவலைப்பெற உதவுகின்றது. பொதுவாகப் பாவனையாளர்கள் ஒரு விடயம் சம்பந்தமாகத் தேடுதலை ஒரு சொல்லை வைத்து தேடுவார்கள். தேடுபொறிகள் சுட்டிகளைப் பயன்படுத்தி விரைவான தேடலை மேற்கொள்ளும். தேடுபொறிகள் என்பது பொதுவாக இணையத் தேடுபொறிகளை அல்லது இணையத் தேடற்பொறிகளையே குறிக்கும். வேறுசில தேடுபொறிகள் உள்ளூர் வலையமைப்பை மாத்திரமே தேடும். இணைய தேடு பொறிகள் பல பில்லியன் பக்கங்களில் இருந்து நமக்குத் தேவையான மிகப் பொருத்தமான பக்கங்களைத் தேடித் தரும். வேறுசில தேடற்பொறிகள் செய்திக் குழுக்கள், தகவற்தளங்கள், திறந்த இணையத்தளங்களைப் பட்டியலிடும் DMOZ.org போன்ற இணையத் தளங்களைத் தேடும். மனிதர்களால் எழுதப்பட்ட இணையத் தளங்களைப் பட்டியலிடும் தளங்களைப் போன்றல்லாது தேடு பொறிகள் அல்காரிதங்களைப் பாவித்துத் தேடல்களை மேற்கொள்ளும். வேறு சில தேடற்பொறிகளோ தமது இடைமுகத்தை வழங்கினாலும் உண்மையில் வேறுசில தேடுபொறிகளே தேடலை மேற்கொள்ளும். ஆரம்ப காலத்தில் ASCII முறை வரியுருக்களை கொண்டே தேடு சொற்களை உள்ளிட முடிந்தது. தற்போது ஒருங்குறி எழுத்துக்குறிமுறையை பல தேடுபொறிகளும் ஆதரிப்பதால் ஆங்கிலத்தில் மட்டுமல்லாது உலக மொழிகள் அனைத்திலும் அவ்வம் மொழிப்பக்கங்களை தேடிப்பெறக்கூடியதாகவுள்ளது.", + "contributor": "unknown", + "timestamp": "2017-12-24T10:30:00Z", + "_type": "tawiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "tewiki_1", + "fields": { + "title_te": "వెబ్ శోధనా యంత్రం", + "text_te": "వెబ్ శోధన యంత్రం అనేది వరల్డ్ వైడ్ వెబ్/ప్రపంచ వ్యాప్త వెబ్లో సమాచారాన్ని శోదించటానికి తయారుచేసిన ఒక సాధనం. శోధన ఫలితాలు సాధారణంగా ఒక జాబితాలో ఇవ్వబడతాయి మరియు అవి సాధారణంగా హిట్స్ అని పిలువబడతాయి. ఆ సమాచారం వెబ్ పేజీలు, చిత్రాలు, సమాచారం మరియు ఇతర రకాలైన జాబితాలను కలిగి ఉంటుంది.కొన్ని శోధనా యంత్రాలు డేటా బేస్ లు లేదా ఓపెన్ డైరెక్టరీలలో అందుబాటులో ఉన్న సమాచారాన్ని కూడా వెలికితీస్తాయి. మానవ సంపాదకులచే నిర్వహించబడే క్రమపరిచిన వెబ్ డైరెక్టరీల లా కాకుండా, శోధనా యంత్రాలు సమస్య పరిష్కారానికి ఉపయోగించే ఒక క్రమ పద్దతి ద్వారా లేదా సమస్య పరిష్కారానికి ఉపయోగించే ఒక క్రమ పద్దతి మరియు మానవ శక్తిల మిశ్రమంతో పనిచేస్తాయి.", + "contributor": "unknown", + "timestamp": "2017-06-19T11:22:00Z", + "_type": "tewiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "thwiki_1", + "fields": { + "title_th": "เสิร์ชเอนจิน", + "text_th": "เสิร์ชเอนจิน (search engine) หรือ โปรแกรมค้นหา คือ โปรแกรมที่ช่วยในการสืบค้นหาข้อมูล โดยเฉพาะข้อมูลบนอินเทอร์เน็ต โดยครอบคลุมทั้งข้อความ รูปภาพ ภาพเคลื่อนไหว เพลง ซอฟต์แวร์ แผนที่ ข้อมูลบุคคล กลุ่มข่าว และอื่น ๆ ซึ่งแตกต่างกันไปแล้วแต่โปรแกรมหรือผู้ให้บริการแต่ละราย. เสิร์ชเอนจินส่วนใหญ่จะค้นหาข้อมูลจากคำสำคัญ (คีย์เวิร์ด) ที่ผู้ใช้ป้อนเข้าไป จากนั้นก็จะแสดงรายการผลลัพธ์ที่มันคิดว่าผู้ใช้น่าจะต้องการขึ้นมา ในปัจจุบัน เสิร์ชเอนจินบางตัว เช่น กูเกิล จะบันทึกประวัติการค้นหาและการเลือกผลลัพธ์ของผู้ใช้ไว้ด้วย และจะนำประวัติที่บันทึกไว้นั้น มาช่วยกรองผลลัพธ์ในการค้นหาครั้งต่อ ๆ ไป", + "contributor": "unknown", + "timestamp": "2016-06-18T11:06:00Z", + "_type": "thwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "trwiki_1", + "fields": { + "title_tr": "Arama motoru", + "text_tr": "Arama motoru, İnternet üzerinde bulunan içeriği aramak için kullanılan bir mekanizmadır. Üç bileşenden oluşur: web robotu, arama indeksi ve kullanıcı arabirimi. Ancak arama sonuçları genellikle sık tıklanan internet sayfalarından oluşan bir liste olarak verilmektedir.", + "contributor": "unknown", + "timestamp": "2018-03-13T17:37:00Z", + "_type": "trwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "zhwiki_1", + "fields": { + "title_zh": "搜索引擎", + "text_zh": "搜索引擎(英语:search engine)是一种信息检索系统,旨在协助搜索存储在计算机系统中的信息。搜索结果一般被称为“hits”,通常会以表单的形式列出。网络搜索引擎是最常见、公开的一种搜索引擎,其功能为搜索万维网上储存的信息.", + "contributor": "unknown", + "timestamp": "2018-08-27T05:47:00Z", + "_type": "zhwiki" + } + } + }, + { + "type": "DELETE", + "document": { + "id": "arwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "bgwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "cawiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "cswiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "dawiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "dewiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "elwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "enwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "eswiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "fawiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "fiwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "frwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "gawiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "glwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "guwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "hiwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "huwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "hywiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "idwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "itwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "jawiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "knwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "kowiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "mlwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "nlwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "nowiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "pswiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "ptwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "rowiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "ruwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "svwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "tawiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "tewiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "thwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "trwiki_1" + } + }, + { + "type": "DELETE", + "document": { + "id": "zhwiki_1" + } } -] \ No newline at end of file +] diff --git a/example/bulk_put_request.json b/example/bulk_put_request.json index 8fd1821..54d019d 100644 --- a/example/bulk_put_request.json +++ b/example/bulk_put_request.json @@ -2,10 +2,101 @@ { "type": "PUT", "document": { - "id": "enwiki_doc1", + "id": "arwiki_1", + "fields": { + "title_ar": "محرك بحث", + "text_ar": "محرك البحث (بالإنجليزية: Search engine) هو نظام لإسترجاع المعلومات صمم للمساعدة على البحث عن المعلومات المخزنة على أي نظام حاسوبي. تعرض نتائج البحث عادة على شكل قائمة لأماكن تواجد المعلومات ومرتبة وفق معايير معينة. تسمح محركات البحث باختصار مدة البحث والتغلب على مشكلة أحجام البيانات المتصاعدة (إغراق معلوماتي).", + "contributor": "unknown", + "timestamp": "2018-03-25T18:04:00Z", + "_type": "arwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "bgwiki_1", + "fields": { + "title_bg": "Търсачка", + "text_bg": "Търсачка или търсеща машина (на английски: Web search engine) е специализиран софтуер за извличане на информация, съхранена в компютърна система или мрежа. Това може да е персонален компютър, Интернет, корпоративна мрежа и т.н. Без допълнителни уточнения, най-често под търсачка се разбира уеб(-)търсачка, която търси в Интернет. Други видове търсачки са корпоративните търсачки, които търсят в интранет мрежите, личните търсачки – за индивидуалните компютри и мобилните търсачки. В търсачката потребителят (търсещият) прави запитване за съдържание, отговарящо на определен критерий (обикновено такъв, който съдържа определени думи и фрази). В резултат се получават списък от точки, които отговарят, пълно или частично, на този критерий. Търсачките обикновено използват редовно подновявани индекси, за да оперират бързо и ефикасно. Някои търсачки също търсят в информацията, която е на разположение в нюзгрупите и други големи бази данни. За разлика от Уеб директориите, които се поддържат от хора редактори, търсачките оперират алгоритмично. Повечето Интернет търсачки са притежавани от различни корпорации.", + "contributor": "unknown", + "timestamp": "2018-07-11T11:03:00Z", + "_type": "bgwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "cawiki_1", + "fields": { + "title_ca": "Motor de cerca", + "text_ca": "Un motor de cerca o de recerca o bé cercador és un programa informàtic dissenyat per ajudar a trobar informació emmagatzemada en un sistema informàtic com ara una xarxa, Internet, un servidor o un ordinador personal. L'objectiu principal és el de trobar altres programes informàtics, pàgines web i documents, entre d'altres. A partir d'una determinada paraula o paraules o una determinada frase l'usuari demana un contingut sota un criteri determinat i retorna una llista de referències que compleixin aquest criteri. El procés es realitza a través de les metadades, vies per comunicar informació que utilitzen els motors per cada cerca. Els índex que utilitzen els cercadors sempre estan actualitzats a través d'un robot web per generar rapidesa i eficàcia en la recerca. Els directoris, en canvi, són gestionats per editors humans.", + "contributor": "unknown", + "timestamp": "2018-07-09T18:07:00Z", + "_type": "cawiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "cswiki_1", + "fields": { + "title_cs": "Vyhledávač", + "text_cs": "Vyhledávač je počítačový systém či program, který umožňuje uživateli zadat nějaký libovolný nebo specifikovaný vyhledávaný výraz a získat z velkého objemu dat informace, které jsou v souladu s tímto dotazem. Jako vyhledávač se označují i ​​webové stránky, jejichž hlavní funkcí je poskytování takového systému či programu. Jako internetový vyhledávač se označuje buď vyhledávač, na který se přistupuje přes internet, nebo vyhledávač, jehož zdrojem vyhledávání je internet (tj. WWW, Usenet apod.). Jako online vyhledávač se označuje vyhledávač, při jehož výkonu činnosti dochází k výměně dat v rámci nějaké počítačové sítě, nejčastěji to je internetový vyhledávač. Fulltextový vyhledávač je vyhedávač, který vykonává fulltextové vyhledávání.", + "contributor": "unknown", + "timestamp": "2017-11-10T21:59:00Z", + "_type": "cswiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "dawiki_1", + "fields": { + "title_da": "Søgemaskine", + "text_da": "En søgemaskine er en applikation til at hjælpe en bruger med at finde information. Det kan f.eks. være at finde filer med bestemte data (f.eks. ord), gemt i en computers hukommelse, for eksempel via World Wide Web (kaldes så en websøgemaskine). Ofte bruges søgemaskine fejlagtigt om linkkataloger eller Netguider.", + "contributor": "unknown", + "timestamp": "2017-09-04T01:54:00Z", + "_type": "dawiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "dewiki_1", + "fields": { + "title_de": "Suchmaschine", + "text_de": "Eine Suchmaschine ist ein Programm zur Recherche von Dokumenten, die in einem Computer oder einem Computernetzwerk wie z. B. dem World Wide Web gespeichert sind. Internet-Suchmaschinen haben ihren Ursprung in Information-Retrieval-Systemen. Sie erstellen einen Schlüsselwort-Index für die Dokumentbasis, um Suchanfragen über Schlüsselwörter mit einer nach Relevanz geordneten Trefferliste zu beantworten. Nach Eingabe eines Suchbegriffs liefert eine Suchmaschine eine Liste von Verweisen auf möglicherweise relevante Dokumente, meistens dargestellt mit Titel und einem kurzen Auszug des jeweiligen Dokuments. Dabei können verschiedene Suchverfahren Anwendung finden.", + "contributor": "unknown", + "timestamp": "2017-09-04T01:54:00Z", + "_type": "dewiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "elwiki_1", + "fields": { + "title_el": "Μηχανή αναζήτησης", + "text_el": "Μια μηχανή αναζήτησης είναι μια εφαρμογή που επιτρέπει την αναζήτηση κειμένων και αρχείων στο Διαδίκτυο. Αποτελείται από ένα πρόγραμμα υπολογιστή που βρίσκεται σε έναν ή περισσότερους υπολογιστές στους οποίους δημιουργεί μια βάση δεδομένων με τις πληροφορίες που συλλέγει από το διαδίκτυο, και το διαδραστικό περιβάλλον που εμφανίζεται στον τελικό χρήστη ο οποίος χρησιμοποιεί την εφαρμογή από άλλον υπολογιστή συνδεδεμένο στο διαδίκτυο. Οι μηχανές αναζήτησης αποτελούνται από 3 είδη λογισμικού, το spider software, το index software και το query software.", + "contributor": "unknown", + "timestamp": "2017-11-21T19:57:00Z", + "_type": "elwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "enwiki_1", "fields": { "title_en": "Search engine (computing)", - "text_en": "A search engine is an information retrieval system designed to help find information stored on a computer system. The search results are usually presented in a list and are commonly called hits. Search engines help to minimize the time required to find information and the amount of information which must be consulted, akin to other techniques for managing information overload.", + "text_en": "A search engine is an information retrieval system designed to help find information stored on a computer system. The search results are usually presented in a list and are commonly called hits. Search engines help to minimize the time required to find information and the amount of information which must be consulted, akin to other techniques for managing information overload. The most public, visible form of a search engine is a Web search engine which searches for information on the World Wide Web.", "contributor": "unknown", "timestamp": "2018-07-04T05:41:00Z", "_type": "enwiki" @@ -15,28 +106,365 @@ { "type": "PUT", "document": { - "id": "ptwiki_doc1", + "id": "eswiki_1", "fields": { - "title_pt": "Motor de busca", - "text_pt": "Motor de pesquisa (português europeu) ou ferramenta de busca (português brasileiro) ou buscador (em inglês: search engine) é um programa desenhado para procurar palavras-chave fornecidas pelo utilizador em documentos e bases de dados. No contexto da internet, um motor de pesquisa permite procurar palavras-chave em documentos alojados na world wide web, como aqueles que se encontram armazenados em websites.", + "title_es": "Motor de búsqueda", + "text_es": "Un motor de búsqueda o buscador es un sistema informático que busca archivos almacenados en servidores web gracias a su spider (también llamado araña web). Un ejemplo son los buscadores de Internet (algunos buscan únicamente en la web, pero otros lo hacen además en noticias, servicios como Gopher, FTP, etc.) cuando se pide información sobre algún tema. Las búsquedas se hacen con palabras clave o con árboles jerárquicos por temas; el resultado de la búsqueda «Página de resultados del buscador» es un listado de direcciones web en los que se mencionan temas relacionados con las palabras clave buscadas. Como operan de forma automática, los motores de búsqueda contienen generalmente más información que los directorios. Sin embargo, estos últimos también han de construirse a partir de búsquedas (no automatizadas) o bien a partir de avisos dados por los creadores de páginas.", "contributor": "unknown", - "timestamp": "2018-07-04T05:41:00Z", - "_type": "ptwiki" + "timestamp": "2018-08-30T11:30:00Z", + "_type": "eswiki" } - } }, { "type": "PUT", "document": { - "id": "jawiki_doc1", + "id": "fawiki_1", + "fields": { + "title_fa": "موتور جستجو (پردازش)", + "text_fa": "موتور جستجو یا جویشگر، در فرهنگ رایانه، به طور عمومی به برنامه‌ای گفته می‌شود که کلمات کلیدی را در یک سند یا بانک اطلاعاتی جستجو می‌کند. در اینترنت به برنامه‌ای گفته می‌شود که کلمات کلیدی موجود در فایل‌ها و سندهای وب جهانی، گروه‌های خبری، منوهای گوفر و آرشیوهای FTP را جستجو می‌کند. جویشگرهای زیادی وجود دارند که امروزه از معروفترین و پراستفاده‌ترین آنها می‌توان به google و یاهو! جستجو اشاره کرد.", + "contributor": "unknown", + "timestamp": "2017-01-06T02:46:00Z", + "_type": "fawiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "fiwiki_1", + "fields": { + "title_fi": "Hakukone", + "text_fi": "Hakukone on web-pohjainen ohjelma, joka etsii jatkuvasti Internetistä (varsinkin Webistä) uusia sivuja eritellen ja liittäen ne hakemistoonsa erityisten hakusanojen mukaan. Näitä hyväksi käyttäen hakukone tulostaa käyttäjän syöttämiä hakusanoja lähimpänä olevat sivut. Analysointi tapahtuu käytännössä eri hakukoneissa erilaisilla menetelmillä.", + "contributor": "unknown", + "timestamp": "2017-10-04T14:33:00Z", + "_type": "fiwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "frwiki_1", + "fields": { + "title_fr": "Moteur de recherche", + "text_fr": "Un moteur de recherche est une application web permettant de trouver des ressources à partir d'une requête sous forme de mots. Les ressources peuvent être des pages web, des articles de forums Usenet, des images, des vidéos, des fichiers, etc. Certains sites web offrent un moteur de recherche comme principale fonctionnalité ; on appelle alors « moteur de recherche » le site lui-même. Ce sont des instruments de recherche sur le web sans intervention humaine, ce qui les distingue des annuaires. Ils sont basés sur des « robots », encore appelés « bots », « spiders «, « crawlers » ou « agents », qui parcourent les sites à intervalles réguliers et de façon automatique pour découvrir de nouvelles adresses (URL). Ils suivent les liens hypertextes qui relient les pages les unes aux autres, les uns après les autres. Chaque page identifiée est alors indexée dans une base de données, accessible ensuite par les internautes à partir de mots-clés. C'est par abus de langage qu'on appelle également « moteurs de recherche » des sites web proposant des annuaires de sites web : dans ce cas, ce sont des instruments de recherche élaborés par des personnes qui répertorient et classifient des sites web jugés dignes d'intérêt, et non des robots d'indexation. Les moteurs de recherche ne s'appliquent pas qu'à Internet : certains moteurs sont des logiciels installés sur un ordinateur personnel. Ce sont des moteurs dits « de bureau » qui combinent la recherche parmi les fichiers stockés sur le PC et la recherche parmi les sites Web — on peut citer par exemple Exalead Desktop, Google Desktop et Copernic Desktop Search, Windex Server, etc. On trouve également des métamoteurs, c'est-à-dire des sites web où une même recherche est lancée simultanément sur plusieurs moteurs de recherche, les résultats étant ensuite fusionnés pour être présentés à l'internaute. On peut citer dans cette catégorie Ixquick, Mamma, Kartoo, Framabee ou Lilo.", + "contributor": "unknown", + "timestamp": "2018-05-30T15:15:00Z", + "_type": "frwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "gawiki_1", + "fields": { + "title_ga": "Inneall cuardaigh", + "text_ga": "Acmhainn ar an ngréasán domhanda atá insroichte le brabhsálaí Gréasáin, a chabhraíonn leis an úsáideoir ionaid is eolas a aimsiú. Bíonn na hinnill cuardaigh (Yahoo, Lycos, Google, Ask Jeeves) ag cuardach tríd an ngréasán an t-am ar fad, ag tógáil innéacsanna ábhar éagsúla — mar shampla, ag aimsiú teidil, fotheidil, eochairfhocail is céadlínte cáipéisí. Uaidh sin, is féidir cuid mhaith cáipéisí éagsúla ar ábhar ar leith a aisghabháil. Déanann an cuardach leanúnach cinnte de go bhfuil na hinnéacsanna suas chun dáta. Mar sin féin, aisghabhann na hinnill an-chuid cháipéisí nach mbaineann le hábhar, agus tá an-iarracht ar siúl an t-am ar fad iad a fheabhsú.", + "contributor": "unknown", + "timestamp": "2013-10-27T18:17:00Z", + "_type": "gawiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "glwiki_1", + "fields": { + "title_gl": "Motor de busca", + "text_gl": "Un motor de busca ou buscador é un sistema informático que procura arquivos almacenados en servidores web, un exemplo son os buscadores de internet (algúns buscan só na Web pero outros buscan ademais en News, Gopher, FTP etc.) cando lles pedimos información sobre algún tema. As procuras fanse con palabras clave ou con árbores xerárquicas por temas; o resultado da procura é unha listaxe de direccións Web nas que se mencionan temas relacionados coas palabras clave buscadas.", + "contributor": "unknown", + "timestamp": "2016-10-31T13:33:00Z", + "_type": "glwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "guwiki_1", + "fields": { + "title_gu": "વેબ શોધ એન્જીન", + "text_gu": "વેબ શોધ એન્જિન એ વર્લ્ડ વાઈડ વેબ (World Wide Web) પર વિવિધ માહિતી શોધવા માટે ઉપયોગમાં લેવામાં આવે છે. શોધ લીસ્ટને સામાન્ય રીતે યાદીમાં દર્શાવવામાં આવે છે અને જેને સામાન્ય રીતે હીટ્સ કહેવામાં આવે છે. જે માહિતી મળે છે તેમાં વેબ પૃષ્ઠ (web page), છબીઓ, માહિતી અને અન્ય પ્રકારની ફાઈલો હોય છે. કેટલાક શોધ એન્જિનો ન્યુઝબુક, ડેટાબેઝ અને અન્ય પ્રકારની ઓપન ડીરેક્ટરી (open directories)ઓની વિગતો પણ આપે છે. વ્યકિતઓ દ્વારા દુરસ્ત થતી વેબ ડાયરેક્ટરીઝ (Web directories)થી અલગ રીતે, શોધ એન્જિન ઍલ્ગરિધમનો અથવા ઍલ્ગરિધમ (algorithmic) અને માનવીય બાબતોના મિક્ષણનો ઉપયોગ કરે છે.", + "contributor": "unknown", + "timestamp": "2013-04-04T19:28:00Z", + "_type": "guwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "hiwiki_1", + "fields": { + "title_hi": "खोज इंजन", + "text_hi": "ऐसे कम्प्यूटर प्रोग्राम खोजी इंजन (search engine) कहलाते हैं जो किसी कम्प्यूटर सिस्टम पर भण्डारित सूचना में से वांछित सूचना को ढूढ निकालते हैं। ये इंजन प्राप्त परिणामों को प्रायः एक सूची के रूप में प्रस्तुत करते हैं जिससे वांछित सूचना की प्रकृति और उसकी स्थिति का पता चलता है। खोजी इंजन किसी सूचना तक अपेक्षाकृत बहुत कम समय में पहुँचने में हमारी सहायता करते हैं। वे 'सूचना ओवरलोड' से भी हमे बचाते हैं। खोजी इंजन का सबसे प्रचलित रूप 'वेब खोजी इंजन' है जो वर्ल्ड वाइड वेब पर सूचना खोजने के लिये प्रयुक्त होता है।", + "contributor": "unknown", + "timestamp": "2017-10-19T20:09:00Z", + "_type": "hiwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "huwiki_1", + "fields": { + "title_hu": "Keresőmotor", + "text_hu": "A keresőmotor az informatikában egy program vagy alkalmazás, amely bizonyos feltételeknek (többnyire egy szónak vagy kifejezésnek) megfelelő információkat keres valamilyen számítógépes környezetben. Ez a cikk a World Wide Weben (és esetleg az internet más részein, például a Useneten) kereső alkalmazásokról szól, a keresőmotor kifejezés önmagában általában ezekre vonatkozik. Másfajta keresőmotorokra példák a vállalati keresőmotorok, amik egy intraneten, és a személyi keresőmotorok, amik egy személyi számítógép állományai között keresnek.", + "contributor": "unknown", + "timestamp": "2018-05-15T20:40:00Z", + "_type": "huwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "hywiki_1", + "fields": { + "title_hy": "Որոնողական համակարգ", + "text_hy": "Որոնողական համակարգը գործիք է, որը նախատեսված է համապատասխան բառերով Համաշխարհային ցանցում որոնումներ կատարելու համար։ Ստեղծված է համացանցում և FTP սերվերներում ինֆորմացիա փնտրելու համար։ Փնտրված արդյունքները ընդհանրապես ներկայացվում են արդյունքների ցանկում և սովորաբար կոչվում են նպատակակակետ, հիթ։ Ինֆորմացիան կարող է բաղկացած լինել վեբ էջերից, նկարներից, ինֆորմացիաներից և այլ տիպի ֆայլերից ու տվյալներից։ Այն կարող է օգտագործվել տարբեր տեսակի տեղեկատվություն որոնելու համար, ներառյալ՝ կայքեր, ֆորումներ, նկարներ, վիդեոներ, ֆայլեր և այլն։ Որոշ կայքեր արդեն իրենցից ներկայացնում են ինչ-որ որոնողական համակարգ, օրինակ՝ Dailymotion, YouTube և Google Videos ինտերնետում տեղադրված տեսահոլովակների որոնողական կայքեր են։ Որոնողական կայքը բաղկացած է \"ռոբոտներից\", որոնց անվանում են նաև bot, spider, crawler, որոնք ավտոմատ կերպով, առանց մարդկային միջամտության պարբերաբար հետազոտում են կայքերը։ Որոնողական կայքերը հետևում են հղումներին, որոնք կապված լինելով իրար հետ ինդեքսավորում է յուրաքանչյուր էջ տվյալների բազայում՝ հետագայում բանալի բառերի օգնությամբ դառնալով հասանելի ինտերնետից օգտվողների համար։ Սխալմամբ, որոնողական կայքեր են անվանում նաև այն կայքերը, որոնք իրենցից ներկայացնում են կայքային տեղեկատուներ։ Այս կայքերում ուշադրության արժանի կայքերը ցուցակագրվում և դասակարգվում են մարդկային ռեսուրսների շնորհիվ, այլ ոչ թե բոտերի կամ ռոբետների միջոցով։ Այդ կայքերից կարելի է նշել օրինակ՝ Yahoo!։ Yahoo!-ի որոնողական կայքը գտնվում է այստեղ։ Բոլոր որոնողական համակարգերը նախատեսված են ինտերնետում որոնում իրականացնելու համար, սակայն կան որոշ որոնողական համակարգերի տարատեսակներ, որոնք համակարգչային ծրագրեր են և հետևաբար տեղակայվում են համակարգչի մեջ։ Այս համակարգերը կոչվում են desktop։ Վերջիներս հնարավորություն են տալիս որոնելու թե համակարգչի մեջ կուտակված ֆայլեը, թե կայքերում տեղադրված ռեսուրսները։ Այդ ծրագրերից ամենահայտնիներն են՝ Exalead Desktop, Copernic Desktop Search Գոյություն ունեն նաև մետա-որոնողական համակարգեր, այսինքն կայքեր, որ նույն որոնումը կատարում են միաժամանակ տարբեր որոնողական կայքերի միջնորդությամբ։ Որոնման արդյունքները հետո դասակարգվում են որպեսզի ներկայացվեն օգտագործողին։ Մետա-որոնողական համակարգերի շարքից կարելի է թվարկել օրինակ՝ Mamma և Kartoo։", + "contributor": "unknown", + "timestamp": "2017-11-20T17:47:00Z", + "_type": "hywiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "idwiki_1", + "fields": { + "title_id": "Mesin pencari web", + "text_id": "Mesin pencari web atau mesin telusur web (bahasa Inggris: web search engine) adalah program komputer yang dirancang untuk melakukan pencarian atas berkas-berkas yang tersimpan dalam layanan www, ftp, publikasi milis, ataupun news group dalam sebuah ataupun sejumlah komputer peladen dalam suatu jaringan. Mesin pencari merupakan perangkat penelusur informasi dari dokumen-dokumen yang tersedia. Hasil pencarian umumnya ditampilkan dalam bentuk daftar yang seringkali diurutkan menurut tingkat akurasi ataupun rasio pengunjung atas suatu berkas yang disebut sebagai hits. Informasi yang menjadi target pencarian bisa terdapat dalam berbagai macam jenis berkas seperti halaman situs web, gambar, ataupun jenis-jenis berkas lainnya. Beberapa mesin pencari juga diketahui melakukan pengumpulan informasi atas data yang tersimpan dalam suatu basis data ataupun direktori web. Sebagian besar mesin pencari dijalankan oleh perusahaan swasta yang menggunakan algoritme kepemilikan dan basis data tertutup, di antaranya yang paling populer adalah safari Google (MSN Search dan Yahoo!). Telah ada beberapa upaya menciptakan mesin pencari dengan sumber terbuka (open source), contohnya adalah Htdig, Nutch, Egothor dan OpenFTS.", + "contributor": "unknown", + "timestamp": "2017-11-20T17:47:00Z", + "_type": "idwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "itwiki_1", + "fields": { + "title_it": "Motore di ricerca", + "text_it": "Nell'ambito delle tecnologie di Internet, un motore di ricerca (in inglese search engine) è un sistema automatico che, su richiesta, analizza un insieme di dati (spesso da esso stesso raccolti) e restituisce un indice dei contenuti disponibili[1] classificandoli in modo automatico in base a formule statistico-matematiche che ne indichino il grado di rilevanza data una determinata chiave di ricerca. Uno dei campi in cui i motori di ricerca trovano maggiore utilizzo è quello dell'information retrieval e nel web. I motori di ricerca più utilizzati nel 2017 sono stati: Google, Bing, Baidu, Qwant, Yandex, Ecosia, DuckDuckGo.", + "contributor": "unknown", + "timestamp": "2018-07-16T12:20:00Z", + "_type": "itwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "jawiki_1", "fields": { "title_ja": "検索エンジン", - "text_ja": "検索エンジン(けんさくエンジン、英語: search engine)は、狭義にはインターネットに存在する情報(ウェブページ、ウェブサイト、画像ファイル、ネットニュースなど)を検索する機能およびそのプログラム。インターネットの普及初期には、検索としての機能のみを提供していたウェブサイトそのものを検索エンジンと呼んだが、現在では様々なサービスが加わったポータルサイト化が進んだため、検索をサービスの一つとして提供するウェブサイトを単に検索サイトと呼ぶことはなくなっている。広義には、インターネットに限定せず情報を検索するシステム全般を含む。", + "text_ja": "検索エンジン(けんさくエンジン、英語: search engine)は、狭義にはインターネットに存在する情報(ウェブページ、ウェブサイト、画像ファイル、ネットニュースなど)を検索する機能およびそのプログラム。インターネットの普及初期には、検索としての機能のみを提供していたウェブサイトそのものを検索エンジンと呼んだが、現在では様々なサービスが加わったポータルサイト化が進んだため、検索をサービスの一つとして提供するウェブサイトを単に検索サイトと呼ぶことはなくなっている。広義には、インターネットに限定せず情報を検索するシステム全般を含む。狭義の検索エンジンは、ロボット型検索エンジン、ディレクトリ型検索エンジン、メタ検索エンジンなどに分類される。広義の検索エンジンとしては、ある特定のウェブサイト内に登録されているテキスト情報の全文検索機能を備えたソフトウェア(全文検索システム)等がある。検索エンジンは、検索窓と呼ばれるボックスにキーワードを入力して検索をかけるもので、全文検索が可能なものと不可能なものとがある。検索サイトを一般に「検索エンジン」と呼ぶことはあるが、厳密には検索サイト自体は検索エンジンでない。", "contributor": "unknown", "timestamp": "2018-05-30T00:52:00Z", "_type": "jawiki" } } + }, + { + "type": "PUT", + "document": { + "id": "knwiki_1", + "fields": { + "title_kn": "ಅಂತರ್ಜಾಲ ಹುಡುಕಾಟ ಯಂತ್ರ", + "text_kn": "ಅಂತರ್ಜಾಲ ಹುಡುಕಾಟ ಯಂತ್ರ ಎಂದರೆ World Wide Webನಲ್ಲಿ ಮಾಹಿತಿ ಹುಡುಕುವುದಕ್ಕಾಗಿ ವಿನ್ಯಾಸಗೊಳಿಸಲಾದ ಒಂದು ಸಾಧನ. ಹುಡುಕಾಟದ ಫಲಿತಾಂಶಗಳನ್ನು ಸಾಮಾನ್ಯವಾಗಿ ಒಂದು ಪಟ್ಟಿಯ ರೂಪದಲ್ಲಿ ಪ್ರಸ್ತುತಪಡಿಸಲಾಗುತ್ತದೆ ಮತ್ತು ಇವನ್ನು ’ಹಿಟ್ಸ್’ ಎಂದು ಕರೆಯಲಾಗುತ್ತದೆ. ಈ ಮಾಹಿತಿಯು ಅನೇಕ ಜಾಲ ಪುಟಗಳು, ಚಿತ್ರಗಳು, ಮಾಹಿತಿ ಹಾಗೂ ಇತರೆ ಕಡತಗಳನ್ನು ಹೊಂದಿರಬಹುದು. ಕೆಲವು ಹುಡುಕಾಟ ಯಂತ್ರಗಳು ಬೇರೆ ದತ್ತಸಂಚಯಗಳು ಅಥವಾ ಮುಕ್ತ ಮಾಹಿತಿ ಸೂಚಿಗಳಿಂದ ದತ್ತಾಂಶಗಳ ಗಣಿಗಾರಿಕೆ ಮಾಡಿ ಹೊರತೆಗೆಯುತ್ತವೆ. ಜಾಲ ಮಾಹಿತಿಸೂಚಿಗಳನ್ನು ಸಂಬಂಧಿಸಿದ ಸಂಪಾದಕರು ನಿರ್ವಹಿಸಿದರೆ, ಹುಡುಕಾಟ ಯಂತ್ರಗಳು ಗಣನಪದ್ಧತಿಯ ಮೂಲಕ ಅಥವಾ ಗಣನಪದ್ಧತಿ ಮತ್ತು ಮಾನವ ಹೂಡುವಳಿಯ ಮಿಶ್ರಣದ ಮುಖಾಂತರ ಕಾರ್ಯನಿರ್ವಹಿಸುತ್ತವೆ.", + "contributor": "unknown", + "timestamp": "2017-10-03T14:13:00Z", + "_type": "knwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "kowiki_1", + "fields": { + "title_cjk": "검색 엔진", + "text_cjk": "검색 엔진은 컴퓨터 시스템에 저장된 정보를 찾아주는 것을 도와주도록 설계된 정보 검색 시스템이다. 이러한 검색 결과는 목록으로 표현되는 것이 보통이다. 검색 엔진을 사용하면 정보를 찾는데 필요한 시간을 최소화할 수 있다. 가장 눈에 띄는 형태의 공용 검색 엔진으로는 웹 검색 엔진이 있으며 월드 와이드 웹에서 정보를 찾아준다.", + "contributor": "unknown", + "timestamp": "2017-11-19T12:50:00Z", + "_type": "kowiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "mlwiki_1", + "fields": { + "title_ml": "വെബ് സെർച്ച് എഞ്ചിൻ", + "text_ml": "വേൾഡ് വൈഡ് വെബ്ബിലുള്ള വിവരങ്ങൾ തിരയാനുള്ള ഒരു ഉപാധിയാണ്‌ വെബ് സെർച്ച് എഞ്ചിൻ അഥവാ സെർച്ച് എഞ്ചിൻ. തിരച്ചിൽ ഫലങ്ങൾ സാധാരണായായി ഒരു പട്ടികയായി നൽകുന്നു, തിരച്ചിൽ ഫലങ്ങളെ ഹിറ്റുകൾ എന്നാണ്‌ വിളിച്ചുവരുന്നത്[അവലംബം ആവശ്യമാണ്]. തിരച്ചിൽ ഫലങ്ങളിൽ വെബ് പേജുകൾ, ചിത്രങ്ങൾ, വിവരങ്ങൾ, വെബ്ബിലുള്ള മറ്റ് ഫയൽ തരങ്ങൾ എന്നിവ ഉൾപ്പെടാം. അൽഗോരിതങ്ങൾ ഉപയോഗിച്ചാണ് സെർച്ച് എഞ്ചിനുകൾ പ്രവർത്തിക്കുന്നത്.", + "contributor": "unknown", + "timestamp": "2010-05-05T15:06:00Z", + "_type": "mlwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "nlwiki_1", + "fields": { + "title_nl": "Zoekmachine", + "text_nl": "Een zoekmachine is een computerprogramma waarmee informatie kan worden gezocht in een bepaalde collectie; dit kan een bibliotheek, het internet, of een persoonlijke verzameling zijn. Zonder nadere aanduiding wordt meestal een webdienst bedoeld waarmee met behulp van vrije trefwoorden volledige tekst (full text) kan worden gezocht in het gehele wereldwijde web. In tegenstelling tot startpagina's of webgidsen is er geen of zeer weinig menselijke tussenkomst nodig; het bezoeken van de webpagina's en het sorteren van de rangschikkingen gebeurt met behulp van een algoritme. Google is wereldwijd de meest gebruikte zoekmachine, andere populaire zoekmachines zijn Yahoo!, Bing en Baidu.", + "contributor": "unknown", + "timestamp": "2018-05-07T11:05:00Z", + "_type": "nlwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "nowiki_1", + "fields": { + "title_no": "Søkemotor", + "text_no": "En søkemotor er en type programvare som leter frem informasjon fra Internett (nettsider eller andre nettressurser) eller begrenset til et datasystem, der informasjonen samsvarer med et gitt søk, og rangerer treffene etter hva den oppfatter som mest relevant. Typisk ligger søkemotoren tilgjengelig som et nettsted, der brukeren legger inn søkeord ev. sammen med filterinnstillinger, og treffene vises gjerne som klikkbare lenker. Søkemotoren kan enten gjøre søk på hele Internett (for eksempel Google, Bing, Kvasir og Yahoo!), innenfor et bestemt nettsted (for eksempel søk innenfor VGs nettavis), eller innenfor et bestemt tema (f.eks. Kelkoo, som søker etter priser på produkter, og Picsearch, som søker etter bilder). En bedrift kan også sette opp en intern bedrifts-søkemotor for å få enklere tilgang til alle dokumenter og databaser i bedriften.", + "contributor": "unknown", + "timestamp": "2018-02-05T14:15:00Z", + "_type": "nowiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "pswiki_1", + "fields": { + "title_ps": "انټرنټ لټوونکی ماشين", + "text_ps": "نټرنټ د معلوماتو يوه داسې پراخه نړۍ ده چې يوه پوله هم نه لري. هره ثانيه په زرگونو معلوماتي توکي په کې ورځای کېږي، خو بيا هم د ډکېدو کومه اندېښنه نه رامنځته کېږي. حيرانوونکې خبره بيا دا ده چې دغه ټول معلومات په داسې مهارت سره په دغه نړۍ کې ځای شوي دي، چې سړی يې د سترگو په رپ کې د نړۍ په هر گوټ کې ترلاسه کولای شي. د کيبورډ په يو دوو تڼيو زور کولو او د موږك په يو دوو کليکونو سره خپلو ټولو پوښتنو ته ځواب موندلای شئ. ټول معلومات په ځانگړو انټرنټ پاڼو کې خوندي وي، نو که سړي ته د يوې پاڼې پته معلومه وي نو سم له لاسه به دغه پاڼه د انټرنټ پاڼو په کتونکي پروگرام کې پرانيزي، خو که سړی بيا يو معلومات غواړي او د هغې پاڼې پته ورسره نه وي، چې دغه ځانگړي معلومات په كې ځای شوي دي، نو بيا سړی يوه داسې پياوړي ځواک ته اړتيا لري، چې د سترگو په رپ کې ټول انټرنټ چاڼ کړي او دغه ځانگړي معلومات راوباسي. له نېکه مرغه د دغه ځواک غم خوړل شوی دی او ډېرInternet Search Engine انټرنټ لټوونکي ماشينونه جوړ کړای شوي دي، چې په وړيا توگه ټول انټرنټ تر ثانيو هم په لږ وخت کې چاڼ کوي او زموږ د خوښې معلومات راښکاره کوي. دغو ماشينونو ته سړی يوه ځانگړې کليمه ورکوي او هغوی ټول انټرنټ په دغې وركړل شوې کلمې پسې لټوي او هر دقيق معلومات چې لاسته ورځي، نو د کمپيوټر پر پرده يې راښکاره کوي. د دغو ماشينونو په ډله کې يو پياوړی ماشين د Google په نوم دی. د نوموړي ماشين بنسټ په ١٩٩٨م کال کې د متحدو ايالاتو د Standford پوهنتون دوو محصلينو Larry Page او Sergey Brin کښېښود. د دغه ماشين خدمات سړی د www.google.com په انټرنټ پاڼه کې کارولای شي. نوموړی ماشين د نړۍ په گڼ شمېر ژبو باندې خدمات وړاندې کوي او داسې چټک او دقيق لټون کوي چې د انټرنټ نور ډېر غښتلي ماشينونه ورته گوته پر غاښ پاتې دي. گوگل په ټوله نړۍ کې کارول کېږي او تر نيمي ثانيي هم په لنډ وخت کې په ميليارډونو انټرنټ پاڼې چاڼ کوي او خپلو کاروونکو ته په پرتله ييزه توگه دقيق معلومات راباسي. گوگل په يوه ورځ کې څه كمُ ٢٠٠ ميليونه پوښتنې ځوابوي. دا ( گوگل) تورى خپله د يو امريکايي رياضيپوه د وراره له خوا په لومړي ځل د يوې لوبې لپاره کارول شوی و. هغه دغه تورى د يو سلو صفرونو ( 1000?.) غوندې لوی عدد ته د نوم په توگه کاراوه. دغه نوم د نوموړي شرکت د دغه توان ښكارندوى دى، چې په لنډ وخت کې په لويه کچه پوښتنو ته ځواب ورکوي او معلومات لټوي. سړی چې د گوگل چټکتيا او دقيقوالي ته ځير شي، نو دا پوښته راپورته کېږي چې د دې ماشين شا ته به څومره پرمختللي کمپيوټرونه او پياوړی تخنيک پټ وي. خو اصلاً د گوگل شا ته په يوه لوی جال کې د منځنۍ بيې کمپيوټرونه سره نښلول شوي دي . په دې توگه په زرگونو کمپيوټرونه هممهاله په کار بوخت وي، چې په ترڅ کې يې د معلوماتو لټول او چاڼ کول چټکتيا مومي. د يوې پوښتنې له اخيستلو څخه راواخله معلوماتو تر لټولو او بيا د دقيقوالي له مخې په يوه ځانگړي طرز بېرته کاروونکي يا پوښتونكي تر ښوولو پورې ټولې چارې د درېيو Software پروگرامونه په لاس کې دي، چې په دغه زرگونو کمپيوټرونو کې ځای پر ځای شوي دي.", + "contributor": "unknown", + "timestamp": "2015-12-15T18:53:00Z", + "_type": "pswiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "ptwiki_1", + "fields": { + "title_pt": "Motor de busca", + "text_pt": "Motor de pesquisa (português europeu) ou ferramenta de busca (português brasileiro) ou buscador (em inglês: search engine) é um programa desenhado para procurar palavras-chave fornecidas pelo utilizador em documentos e bases de dados. No contexto da internet, um motor de pesquisa permite procurar palavras-chave em documentos alojados na world wide web, como aqueles que se encontram armazenados em websites. Os motores de busca surgiram logo após o aparecimento da Internet, com a intenção de prestar um serviço extremamente importante: a busca de qualquer informação na rede, apresentando os resultados de uma forma organizada, e também com a proposta de fazer isto de uma maneira rápida e eficiente. A partir deste preceito básico, diversas empresas se desenvolveram, chegando algumas a valer milhões de dólares. Entre as maiores empresas encontram-se o Google, o Yahoo, o Bing, o Lycos, o Cadê e, mais recentemente, a Amazon.com com o seu mecanismo de busca A9 porém inativo. Os buscadores se mostraram imprescindíveis para o fluxo de acesso e a conquista novos visitantes. Antes do advento da Web, havia sistemas para outros protocolos ou usos, como o Archie para sites FTP anônimos e o Veronica para o Gopher (protocolo de redes de computadores que foi desenhado para indexar repositórios de documentos na Internet, baseado-se em menus).", + "contributor": "unknown", + "timestamp": "2017-11-09T14:38:00Z", + "_type": "ptwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "rowiki_1", + "fields": { + "title_ro": "Motor de căutare", + "text_ro": "Un motor de căutare este un program apelabil căutător, care accesează Internetul în mod automat și frecvent și care stochează titlul, cuvinte cheie și, parțial, chiar conținutul paginilor web într-o bază de date. În momentul în care un utilizator apelează la un motor de căutare pentru a găsi o informație, o anumită frază sau un cuvânt, motorul de căutare se va uita în această bază de date și, în funcție de anumite criterii de prioritate, va crea și afișa o listă de rezultate (engleză: hit list ).", + "contributor": "unknown", + "timestamp": "2018-06-12T08:59:00Z", + "_type": "rowiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "ruwiki_1", + "fields": { + "title_ru": "Поисковая машина", + "text_ru": "Поисковая машина (поиско́вый движо́к) — комплекс программ, предназначенный для поиска информации. Обычно является частью поисковой системы. Основными критериями качества работы поисковой машины являются релевантность (степень соответствия запроса и найденного, т.е. уместность результата), полнота индекса, учёт морфологии языка.", + "contributor": "unknown", + "timestamp": "2017-03-22T01:16:00Z", + "_type": "ruwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "svwiki_1", + "fields": { + "title_sv": "Söktjänst", + "text_sv": "En söktjänst är en webbplats som gör det möjligt att söka efter innehåll på Internet. Söktjänsterna använder sökmotorer, även kallade sökrobotar, för att upptäcka, hämta in och indexera webbsidor.", + "contributor": "unknown", + "timestamp": "2018-08-16T22:13:00Z", + "_type": "svwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "tawiki_1", + "fields": { + "title_ta": "தேடுபொறி", + "text_ta": "தேடுபொறி அல்லது தேடற்பொறி என்பது ஒரு கணினி நிரலாகும். இது இணையத்தில் குவிந்து கிடக்கும் தகவல்களில் இருந்தோ கணினியில் இருக்கும் தகவல்களில் இருந்தோ நமக்குத் தேவையான தகவலைப்பெற உதவுகின்றது. பொதுவாகப் பாவனையாளர்கள் ஒரு விடயம் சம்பந்தமாகத் தேடுதலை ஒரு சொல்லை வைத்து தேடுவார்கள். தேடுபொறிகள் சுட்டிகளைப் பயன்படுத்தி விரைவான தேடலை மேற்கொள்ளும். தேடுபொறிகள் என்பது பொதுவாக இணையத் தேடுபொறிகளை அல்லது இணையத் தேடற்பொறிகளையே குறிக்கும். வேறுசில தேடுபொறிகள் உள்ளூர் வலையமைப்பை மாத்திரமே தேடும். இணைய தேடு பொறிகள் பல பில்லியன் பக்கங்களில் இருந்து நமக்குத் தேவையான மிகப் பொருத்தமான பக்கங்களைத் தேடித் தரும். வேறுசில தேடற்பொறிகள் செய்திக் குழுக்கள், தகவற்தளங்கள், திறந்த இணையத்தளங்களைப் பட்டியலிடும் DMOZ.org போன்ற இணையத் தளங்களைத் தேடும். மனிதர்களால் எழுதப்பட்ட இணையத் தளங்களைப் பட்டியலிடும் தளங்களைப் போன்றல்லாது தேடு பொறிகள் அல்காரிதங்களைப் பாவித்துத் தேடல்களை மேற்கொள்ளும். வேறு சில தேடற்பொறிகளோ தமது இடைமுகத்தை வழங்கினாலும் உண்மையில் வேறுசில தேடுபொறிகளே தேடலை மேற்கொள்ளும். ஆரம்ப காலத்தில் ASCII முறை வரியுருக்களை கொண்டே தேடு சொற்களை உள்ளிட முடிந்தது. தற்போது ஒருங்குறி எழுத்துக்குறிமுறையை பல தேடுபொறிகளும் ஆதரிப்பதால் ஆங்கிலத்தில் மட்டுமல்லாது உலக மொழிகள் அனைத்திலும் அவ்வம் மொழிப்பக்கங்களை தேடிப்பெறக்கூடியதாகவுள்ளது.", + "contributor": "unknown", + "timestamp": "2017-12-24T10:30:00Z", + "_type": "tawiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "tewiki_1", + "fields": { + "title_te": "వెబ్ శోధనా యంత్రం", + "text_te": "వెబ్ శోధన యంత్రం అనేది వరల్డ్ వైడ్ వెబ్/ప్రపంచ వ్యాప్త వెబ్లో సమాచారాన్ని శోదించటానికి తయారుచేసిన ఒక సాధనం. శోధన ఫలితాలు సాధారణంగా ఒక జాబితాలో ఇవ్వబడతాయి మరియు అవి సాధారణంగా హిట్స్ అని పిలువబడతాయి. ఆ సమాచారం వెబ్ పేజీలు, చిత్రాలు, సమాచారం మరియు ఇతర రకాలైన జాబితాలను కలిగి ఉంటుంది.కొన్ని శోధనా యంత్రాలు డేటా బేస్ లు లేదా ఓపెన్ డైరెక్టరీలలో అందుబాటులో ఉన్న సమాచారాన్ని కూడా వెలికితీస్తాయి. మానవ సంపాదకులచే నిర్వహించబడే క్రమపరిచిన వెబ్ డైరెక్టరీల లా కాకుండా, శోధనా యంత్రాలు సమస్య పరిష్కారానికి ఉపయోగించే ఒక క్రమ పద్దతి ద్వారా లేదా సమస్య పరిష్కారానికి ఉపయోగించే ఒక క్రమ పద్దతి మరియు మానవ శక్తిల మిశ్రమంతో పనిచేస్తాయి.", + "contributor": "unknown", + "timestamp": "2017-06-19T11:22:00Z", + "_type": "tewiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "thwiki_1", + "fields": { + "title_th": "เสิร์ชเอนจิน", + "text_th": "เสิร์ชเอนจิน (search engine) หรือ โปรแกรมค้นหา คือ โปรแกรมที่ช่วยในการสืบค้นหาข้อมูล โดยเฉพาะข้อมูลบนอินเทอร์เน็ต โดยครอบคลุมทั้งข้อความ รูปภาพ ภาพเคลื่อนไหว เพลง ซอฟต์แวร์ แผนที่ ข้อมูลบุคคล กลุ่มข่าว และอื่น ๆ ซึ่งแตกต่างกันไปแล้วแต่โปรแกรมหรือผู้ให้บริการแต่ละราย. เสิร์ชเอนจินส่วนใหญ่จะค้นหาข้อมูลจากคำสำคัญ (คีย์เวิร์ด) ที่ผู้ใช้ป้อนเข้าไป จากนั้นก็จะแสดงรายการผลลัพธ์ที่มันคิดว่าผู้ใช้น่าจะต้องการขึ้นมา ในปัจจุบัน เสิร์ชเอนจินบางตัว เช่น กูเกิล จะบันทึกประวัติการค้นหาและการเลือกผลลัพธ์ของผู้ใช้ไว้ด้วย และจะนำประวัติที่บันทึกไว้นั้น มาช่วยกรองผลลัพธ์ในการค้นหาครั้งต่อ ๆ ไป", + "contributor": "unknown", + "timestamp": "2016-06-18T11:06:00Z", + "_type": "thwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "trwiki_1", + "fields": { + "title_tr": "Arama motoru", + "text_tr": "Arama motoru, İnternet üzerinde bulunan içeriği aramak için kullanılan bir mekanizmadır. Üç bileşenden oluşur: web robotu, arama indeksi ve kullanıcı arabirimi. Ancak arama sonuçları genellikle sık tıklanan internet sayfalarından oluşan bir liste olarak verilmektedir.", + "contributor": "unknown", + "timestamp": "2018-03-13T17:37:00Z", + "_type": "trwiki" + } + } + }, + { + "type": "PUT", + "document": { + "id": "zhwiki_1", + "fields": { + "title_zh": "搜索引擎", + "text_zh": "搜索引擎(英语:search engine)是一种信息检索系统,旨在协助搜索存储在计算机系统中的信息。搜索结果一般被称为“hits”,通常会以表单的形式列出。网络搜索引擎是最常见、公开的一种搜索引擎,其功能为搜索万维网上储存的信息.", + "contributor": "unknown", + "timestamp": "2018-08-27T05:47:00Z", + "_type": "zhwiki" + } + } } -] \ No newline at end of file +] diff --git a/example/doc_arwiki_1.json b/example/doc_arwiki_1.json new file mode 100644 index 0000000..14b2358 --- /dev/null +++ b/example/doc_arwiki_1.json @@ -0,0 +1,7 @@ +{ + "title_ar": "محرك بحث", + "text_ar": "محرك البحث (بالإنجليزية: Search engine) هو نظام لإسترجاع المعلومات صمم للمساعدة على البحث عن المعلومات المخزنة على أي نظام حاسوبي. تعرض نتائج البحث عادة على شكل قائمة لأماكن تواجد المعلومات ومرتبة وفق معايير معينة. تسمح محركات البحث باختصار مدة البحث والتغلب على مشكلة أحجام البيانات المتصاعدة (إغراق معلوماتي).", + "contributor": "unknown", + "timestamp": "2018-03-25T18:04:00Z", + "_type": "arwiki" +} diff --git a/example/doc_bgwiki_1.json b/example/doc_bgwiki_1.json new file mode 100644 index 0000000..4e31eff --- /dev/null +++ b/example/doc_bgwiki_1.json @@ -0,0 +1,7 @@ +{ + "title_bg": "Търсачка", + "text_bg": "Търсачка или търсеща машина (на английски: Web search engine) е специализиран софтуер за извличане на информация, съхранена в компютърна система или мрежа. Това може да е персонален компютър, Интернет, корпоративна мрежа и т.н. Без допълнителни уточнения, най-често под търсачка се разбира уеб(-)търсачка, която търси в Интернет. Други видове търсачки са корпоративните търсачки, които търсят в интранет мрежите, личните търсачки – за индивидуалните компютри и мобилните търсачки. В търсачката потребителят (търсещият) прави запитване за съдържание, отговарящо на определен критерий (обикновено такъв, който съдържа определени думи и фрази). В резултат се получават списък от точки, които отговарят, пълно или частично, на този критерий. Търсачките обикновено използват редовно подновявани индекси, за да оперират бързо и ефикасно. Някои търсачки също търсят в информацията, която е на разположение в нюзгрупите и други големи бази данни. За разлика от Уеб директориите, които се поддържат от хора редактори, търсачките оперират алгоритмично. Повечето Интернет търсачки са притежавани от различни корпорации.", + "contributor": "unknown", + "timestamp": "2018-07-11T11:03:00Z", + "_type": "bgwiki" +} diff --git a/example/doc_cawiki_1.json b/example/doc_cawiki_1.json new file mode 100644 index 0000000..072029c --- /dev/null +++ b/example/doc_cawiki_1.json @@ -0,0 +1,7 @@ +{ + "title_ca": "Motor de cerca", + "text_ca": "Un motor de cerca o de recerca o bé cercador és un programa informàtic dissenyat per ajudar a trobar informació emmagatzemada en un sistema informàtic com ara una xarxa, Internet, un servidor o un ordinador personal. L'objectiu principal és el de trobar altres programes informàtics, pàgines web i documents, entre d'altres. A partir d'una determinada paraula o paraules o una determinada frase l'usuari demana un contingut sota un criteri determinat i retorna una llista de referències que compleixin aquest criteri. El procés es realitza a través de les metadades, vies per comunicar informació que utilitzen els motors per cada cerca. Els índex que utilitzen els cercadors sempre estan actualitzats a través d'un robot web per generar rapidesa i eficàcia en la recerca. Els directoris, en canvi, són gestionats per editors humans.", + "contributor": "unknown", + "timestamp": "2018-07-09T18:07:00Z", + "_type": "cawiki" +} diff --git a/example/doc_cswiki_1.json b/example/doc_cswiki_1.json new file mode 100644 index 0000000..8200dd9 --- /dev/null +++ b/example/doc_cswiki_1.json @@ -0,0 +1,7 @@ +{ + "title_cs": "Vyhledávač", + "text_cs": "Vyhledávač je počítačový systém či program, který umožňuje uživateli zadat nějaký libovolný nebo specifikovaný vyhledávaný výraz a získat z velkého objemu dat informace, které jsou v souladu s tímto dotazem. Jako vyhledávač se označují i ​​webové stránky, jejichž hlavní funkcí je poskytování takového systému či programu. Jako internetový vyhledávač se označuje buď vyhledávač, na který se přistupuje přes internet, nebo vyhledávač, jehož zdrojem vyhledávání je internet (tj. WWW, Usenet apod.). Jako online vyhledávač se označuje vyhledávač, při jehož výkonu činnosti dochází k výměně dat v rámci nějaké počítačové sítě, nejčastěji to je internetový vyhledávač. Fulltextový vyhledávač je vyhedávač, který vykonává fulltextové vyhledávání.", + "contributor": "unknown", + "timestamp": "2017-11-10T21:59:00Z", + "_type": "cswiki" +} diff --git a/example/doc_dawiki_1.json b/example/doc_dawiki_1.json new file mode 100644 index 0000000..eddd565 --- /dev/null +++ b/example/doc_dawiki_1.json @@ -0,0 +1,7 @@ +{ + "title_da": "Søgemaskine", + "text_da": "En søgemaskine er en applikation til at hjælpe en bruger med at finde information. Det kan f.eks. være at finde filer med bestemte data (f.eks. ord), gemt i en computers hukommelse, for eksempel via World Wide Web (kaldes så en websøgemaskine). Ofte bruges søgemaskine fejlagtigt om linkkataloger eller Netguider.", + "contributor": "unknown", + "timestamp": "2017-09-04T01:54:00Z", + "_type": "dawiki" +} diff --git a/example/doc_dewiki_1.json b/example/doc_dewiki_1.json new file mode 100644 index 0000000..c25ce46 --- /dev/null +++ b/example/doc_dewiki_1.json @@ -0,0 +1,7 @@ +{ + "title_de": "Suchmaschine", + "text_de": "Eine Suchmaschine ist ein Programm zur Recherche von Dokumenten, die in einem Computer oder einem Computernetzwerk wie z. B. dem World Wide Web gespeichert sind. Internet-Suchmaschinen haben ihren Ursprung in Information-Retrieval-Systemen. Sie erstellen einen Schlüsselwort-Index für die Dokumentbasis, um Suchanfragen über Schlüsselwörter mit einer nach Relevanz geordneten Trefferliste zu beantworten. Nach Eingabe eines Suchbegriffs liefert eine Suchmaschine eine Liste von Verweisen auf möglicherweise relevante Dokumente, meistens dargestellt mit Titel und einem kurzen Auszug des jeweiligen Dokuments. Dabei können verschiedene Suchverfahren Anwendung finden.", + "contributor": "unknown", + "timestamp": "2017-09-04T01:54:00Z", + "_type": "dewiki" +} diff --git a/example/doc_elwiki_1.json b/example/doc_elwiki_1.json new file mode 100644 index 0000000..b22943b --- /dev/null +++ b/example/doc_elwiki_1.json @@ -0,0 +1,7 @@ +{ + "title_el": "Μηχανή αναζήτησης", + "text_el": "Μια μηχανή αναζήτησης είναι μια εφαρμογή που επιτρέπει την αναζήτηση κειμένων και αρχείων στο Διαδίκτυο. Αποτελείται από ένα πρόγραμμα υπολογιστή που βρίσκεται σε έναν ή περισσότερους υπολογιστές στους οποίους δημιουργεί μια βάση δεδομένων με τις πληροφορίες που συλλέγει από το διαδίκτυο, και το διαδραστικό περιβάλλον που εμφανίζεται στον τελικό χρήστη ο οποίος χρησιμοποιεί την εφαρμογή από άλλον υπολογιστή συνδεδεμένο στο διαδίκτυο. Οι μηχανές αναζήτησης αποτελούνται από 3 είδη λογισμικού, το spider software, το index software και το query software.", + "contributor": "unknown", + "timestamp": "2017-11-21T19:57:00Z", + "_type": "elwiki" +} diff --git a/example/enwiki_doc1.json b/example/doc_enwiki_1.json similarity index 78% rename from example/enwiki_doc1.json rename to example/doc_enwiki_1.json index 80765e8..05b77a8 100644 --- a/example/enwiki_doc1.json +++ b/example/doc_enwiki_1.json @@ -1,6 +1,6 @@ { "title_en": "Search engine (computing)", - "text_en": "A search engine is an information retrieval system designed to help find information stored on a computer system. The search results are usually presented in a list and are commonly called hits. Search engines help to minimize the time required to find information and the amount of information which must be consulted, akin to other techniques for managing information overload.", + "text_en": "A search engine is an information retrieval system designed to help find information stored on a computer system. The search results are usually presented in a list and are commonly called hits. Search engines help to minimize the time required to find information and the amount of information which must be consulted, akin to other techniques for managing information overload. The most public, visible form of a search engine is a Web search engine which searches for information on the World Wide Web.", "contributor": "unknown", "timestamp": "2018-07-04T05:41:00Z", "_type": "enwiki" diff --git a/example/doc_eswiki_1.json b/example/doc_eswiki_1.json new file mode 100644 index 0000000..7d6fd9c --- /dev/null +++ b/example/doc_eswiki_1.json @@ -0,0 +1,7 @@ +{ + "title_es": "Motor de búsqueda", + "text_es": "Un motor de búsqueda o buscador es un sistema informático que busca archivos almacenados en servidores web gracias a su spider (también llamado araña web). Un ejemplo son los buscadores de Internet (algunos buscan únicamente en la web, pero otros lo hacen además en noticias, servicios como Gopher, FTP, etc.) cuando se pide información sobre algún tema. Las búsquedas se hacen con palabras clave o con árboles jerárquicos por temas; el resultado de la búsqueda «Página de resultados del buscador» es un listado de direcciones web en los que se mencionan temas relacionados con las palabras clave buscadas. Como operan de forma automática, los motores de búsqueda contienen generalmente más información que los directorios. Sin embargo, estos últimos también han de construirse a partir de búsquedas (no automatizadas) o bien a partir de avisos dados por los creadores de páginas.", + "contributor": "unknown", + "timestamp": "2018-08-30T11:30:00Z", + "_type": "eswiki" +} diff --git a/example/doc_fawiki_1.json b/example/doc_fawiki_1.json new file mode 100644 index 0000000..fcadc3d --- /dev/null +++ b/example/doc_fawiki_1.json @@ -0,0 +1,7 @@ +{ + "title_fa": "موتور جستجو (پردازش)", + "text_fa": "موتور جستجو یا جویشگر، در فرهنگ رایانه، به طور عمومی به برنامه‌ای گفته می‌شود که کلمات کلیدی را در یک سند یا بانک اطلاعاتی جستجو می‌کند. در اینترنت به برنامه‌ای گفته می‌شود که کلمات کلیدی موجود در فایل‌ها و سندهای وب جهانی، گروه‌های خبری، منوهای گوفر و آرشیوهای FTP را جستجو می‌کند. جویشگرهای زیادی وجود دارند که امروزه از معروفترین و پراستفاده‌ترین آنها می‌توان به google و یاهو! جستجو اشاره کرد.", + "contributor": "unknown", + "timestamp": "2017-01-06T02:46:00Z", + "_type": "fawiki" +} diff --git a/example/doc_fiwiki_1.json b/example/doc_fiwiki_1.json new file mode 100644 index 0000000..ec54eee --- /dev/null +++ b/example/doc_fiwiki_1.json @@ -0,0 +1,7 @@ +{ + "title_fi": "Hakukone", + "text_fi": "Hakukone on web-pohjainen ohjelma, joka etsii jatkuvasti Internetistä (varsinkin Webistä) uusia sivuja eritellen ja liittäen ne hakemistoonsa erityisten hakusanojen mukaan. Näitä hyväksi käyttäen hakukone tulostaa käyttäjän syöttämiä hakusanoja lähimpänä olevat sivut. Analysointi tapahtuu käytännössä eri hakukoneissa erilaisilla menetelmillä.", + "contributor": "unknown", + "timestamp": "2017-10-04T14:33:00Z", + "_type": "fiwiki" +} diff --git a/example/doc_frwiki_1.json b/example/doc_frwiki_1.json new file mode 100644 index 0000000..6f4f822 --- /dev/null +++ b/example/doc_frwiki_1.json @@ -0,0 +1,7 @@ +{ + "title_fr": "Moteur de recherche", + "text_fr": "Un moteur de recherche est une application web permettant de trouver des ressources à partir d'une requête sous forme de mots. Les ressources peuvent être des pages web, des articles de forums Usenet, des images, des vidéos, des fichiers, etc. Certains sites web offrent un moteur de recherche comme principale fonctionnalité ; on appelle alors « moteur de recherche » le site lui-même. Ce sont des instruments de recherche sur le web sans intervention humaine, ce qui les distingue des annuaires. Ils sont basés sur des « robots », encore appelés « bots », « spiders «, « crawlers » ou « agents », qui parcourent les sites à intervalles réguliers et de façon automatique pour découvrir de nouvelles adresses (URL). Ils suivent les liens hypertextes qui relient les pages les unes aux autres, les uns après les autres. Chaque page identifiée est alors indexée dans une base de données, accessible ensuite par les internautes à partir de mots-clés. C'est par abus de langage qu'on appelle également « moteurs de recherche » des sites web proposant des annuaires de sites web : dans ce cas, ce sont des instruments de recherche élaborés par des personnes qui répertorient et classifient des sites web jugés dignes d'intérêt, et non des robots d'indexation. Les moteurs de recherche ne s'appliquent pas qu'à Internet : certains moteurs sont des logiciels installés sur un ordinateur personnel. Ce sont des moteurs dits « de bureau » qui combinent la recherche parmi les fichiers stockés sur le PC et la recherche parmi les sites Web — on peut citer par exemple Exalead Desktop, Google Desktop et Copernic Desktop Search, Windex Server, etc. On trouve également des métamoteurs, c'est-à-dire des sites web où une même recherche est lancée simultanément sur plusieurs moteurs de recherche, les résultats étant ensuite fusionnés pour être présentés à l'internaute. On peut citer dans cette catégorie Ixquick, Mamma, Kartoo, Framabee ou Lilo.", + "contributor": "unknown", + "timestamp": "2018-05-30T15:15:00Z", + "_type": "frwiki" +} diff --git a/example/doc_gawiki_1.json b/example/doc_gawiki_1.json new file mode 100644 index 0000000..e639243 --- /dev/null +++ b/example/doc_gawiki_1.json @@ -0,0 +1,7 @@ +{ + "title_ga": "Inneall cuardaigh", + "text_ga": "Acmhainn ar an ngréasán domhanda atá insroichte le brabhsálaí Gréasáin, a chabhraíonn leis an úsáideoir ionaid is eolas a aimsiú. Bíonn na hinnill cuardaigh (Yahoo, Lycos, Google, Ask Jeeves) ag cuardach tríd an ngréasán an t-am ar fad, ag tógáil innéacsanna ábhar éagsúla — mar shampla, ag aimsiú teidil, fotheidil, eochairfhocail is céadlínte cáipéisí. Uaidh sin, is féidir cuid mhaith cáipéisí éagsúla ar ábhar ar leith a aisghabháil. Déanann an cuardach leanúnach cinnte de go bhfuil na hinnéacsanna suas chun dáta. Mar sin féin, aisghabhann na hinnill an-chuid cháipéisí nach mbaineann le hábhar, agus tá an-iarracht ar siúl an t-am ar fad iad a fheabhsú.", + "contributor": "unknown", + "timestamp": "2013-10-27T18:17:00Z", + "_type": "gawiki" +} diff --git a/example/doc_glwiki_1.json b/example/doc_glwiki_1.json new file mode 100644 index 0000000..f6b0feb --- /dev/null +++ b/example/doc_glwiki_1.json @@ -0,0 +1,7 @@ +{ + "title_gl": "Motor de busca", + "text_gl": "Un motor de busca ou buscador é un sistema informático que procura arquivos almacenados en servidores web, un exemplo son os buscadores de internet (algúns buscan só na Web pero outros buscan ademais en News, Gopher, FTP etc.) cando lles pedimos información sobre algún tema. As procuras fanse con palabras clave ou con árbores xerárquicas por temas; o resultado da procura é unha listaxe de direccións Web nas que se mencionan temas relacionados coas palabras clave buscadas.", + "contributor": "unknown", + "timestamp": "2016-10-31T13:33:00Z", + "_type": "glwiki" +} diff --git a/example/doc_guwiki_1.json b/example/doc_guwiki_1.json new file mode 100644 index 0000000..32deec2 --- /dev/null +++ b/example/doc_guwiki_1.json @@ -0,0 +1,7 @@ +{ + "title_gu": "વેબ શોધ એન્જીન", + "text_gu": "વેબ શોધ એન્જિન એ વર્લ્ડ વાઈડ વેબ (World Wide Web) પર વિવિધ માહિતી શોધવા માટે ઉપયોગમાં લેવામાં આવે છે. શોધ લીસ્ટને સામાન્ય રીતે યાદીમાં દર્શાવવામાં આવે છે અને જેને સામાન્ય રીતે હીટ્સ કહેવામાં આવે છે. જે માહિતી મળે છે તેમાં વેબ પૃષ્ઠ (web page), છબીઓ, માહિતી અને અન્ય પ્રકારની ફાઈલો હોય છે. કેટલાક શોધ એન્જિનો ન્યુઝબુક, ડેટાબેઝ અને અન્ય પ્રકારની ઓપન ડીરેક્ટરી (open directories)ઓની વિગતો પણ આપે છે. વ્યકિતઓ દ્વારા દુરસ્ત થતી વેબ ડાયરેક્ટરીઝ (Web directories)થી અલગ રીતે, શોધ એન્જિન ઍલ્ગરિધમનો અથવા ઍલ્ગરિધમ (algorithmic) અને માનવીય બાબતોના મિક્ષણનો ઉપયોગ કરે છે.", + "contributor": "unknown", + "timestamp": "2013-04-04T19:28:00Z", + "_type": "guwiki" +} diff --git a/example/doc_hiwiki_1.json b/example/doc_hiwiki_1.json new file mode 100644 index 0000000..9067a29 --- /dev/null +++ b/example/doc_hiwiki_1.json @@ -0,0 +1,7 @@ +{ + "title_hi": "खोज इंजन", + "text_hi": "ऐसे कम्प्यूटर प्रोग्राम खोजी इंजन (search engine) कहलाते हैं जो किसी कम्प्यूटर सिस्टम पर भण्डारित सूचना में से वांछित सूचना को ढूढ निकालते हैं। ये इंजन प्राप्त परिणामों को प्रायः एक सूची के रूप में प्रस्तुत करते हैं जिससे वांछित सूचना की प्रकृति और उसकी स्थिति का पता चलता है। खोजी इंजन किसी सूचना तक अपेक्षाकृत बहुत कम समय में पहुँचने में हमारी सहायता करते हैं। वे 'सूचना ओवरलोड' से भी हमे बचाते हैं। खोजी इंजन का सबसे प्रचलित रूप 'वेब खोजी इंजन' है जो वर्ल्ड वाइड वेब पर सूचना खोजने के लिये प्रयुक्त होता है।", + "contributor": "unknown", + "timestamp": "2017-10-19T20:09:00Z", + "_type": "hiwiki" +} diff --git a/example/doc_huwiki_1.json b/example/doc_huwiki_1.json new file mode 100644 index 0000000..7f2d784 --- /dev/null +++ b/example/doc_huwiki_1.json @@ -0,0 +1,7 @@ +{ + "title_hu": "Keresőmotor", + "text_hu": "A keresőmotor az informatikában egy program vagy alkalmazás, amely bizonyos feltételeknek (többnyire egy szónak vagy kifejezésnek) megfelelő információkat keres valamilyen számítógépes környezetben. Ez a cikk a World Wide Weben (és esetleg az internet más részein, például a Useneten) kereső alkalmazásokról szól, a keresőmotor kifejezés önmagában általában ezekre vonatkozik. Másfajta keresőmotorokra példák a vállalati keresőmotorok, amik egy intraneten, és a személyi keresőmotorok, amik egy személyi számítógép állományai között keresnek.", + "contributor": "unknown", + "timestamp": "2018-05-15T20:40:00Z", + "_type": "huwiki" +} diff --git a/example/doc_hywiki_1.json b/example/doc_hywiki_1.json new file mode 100644 index 0000000..007cbb6 --- /dev/null +++ b/example/doc_hywiki_1.json @@ -0,0 +1,7 @@ +{ + "title_hy": "Որոնողական համակարգ", + "text_hy": "Որոնողական համակարգը գործիք է, որը նախատեսված է համապատասխան բառերով Համաշխարհային ցանցում որոնումներ կատարելու համար։ Ստեղծված է համացանցում և FTP սերվերներում ինֆորմացիա փնտրելու համար։ Փնտրված արդյունքները ընդհանրապես ներկայացվում են արդյունքների ցանկում և սովորաբար կոչվում են նպատակակակետ, հիթ։ Ինֆորմացիան կարող է բաղկացած լինել վեբ էջերից, նկարներից, ինֆորմացիաներից և այլ տիպի ֆայլերից ու տվյալներից։ Այն կարող է օգտագործվել տարբեր տեսակի տեղեկատվություն որոնելու համար, ներառյալ՝ կայքեր, ֆորումներ, նկարներ, վիդեոներ, ֆայլեր և այլն։ Որոշ կայքեր արդեն իրենցից ներկայացնում են ինչ-որ որոնողական համակարգ, օրինակ՝ Dailymotion, YouTube և Google Videos ինտերնետում տեղադրված տեսահոլովակների որոնողական կայքեր են։ Որոնողական կայքը բաղկացած է \"ռոբոտներից\", որոնց անվանում են նաև bot, spider, crawler, որոնք ավտոմատ կերպով, առանց մարդկային միջամտության պարբերաբար հետազոտում են կայքերը։ Որոնողական կայքերը հետևում են հղումներին, որոնք կապված լինելով իրար հետ ինդեքսավորում է յուրաքանչյուր էջ տվյալների բազայում՝ հետագայում բանալի բառերի օգնությամբ դառնալով հասանելի ինտերնետից օգտվողների համար։ Սխալմամբ, որոնողական կայքեր են անվանում նաև այն կայքերը, որոնք իրենցից ներկայացնում են կայքային տեղեկատուներ։ Այս կայքերում ուշադրության արժանի կայքերը ցուցակագրվում և դասակարգվում են մարդկային ռեսուրսների շնորհիվ, այլ ոչ թե բոտերի կամ ռոբետների միջոցով։ Այդ կայքերից կարելի է նշել օրինակ՝ Yahoo!։ Yahoo!-ի որոնողական կայքը գտնվում է այստեղ։ Բոլոր որոնողական համակարգերը նախատեսված են ինտերնետում որոնում իրականացնելու համար, սակայն կան որոշ որոնողական համակարգերի տարատեսակներ, որոնք համակարգչային ծրագրեր են և հետևաբար տեղակայվում են համակարգչի մեջ։ Այս համակարգերը կոչվում են desktop։ Վերջիներս հնարավորություն են տալիս որոնելու թե համակարգչի մեջ կուտակված ֆայլեը, թե կայքերում տեղադրված ռեսուրսները։ Այդ ծրագրերից ամենահայտնիներն են՝ Exalead Desktop, Copernic Desktop Search Գոյություն ունեն նաև մետա-որոնողական համակարգեր, այսինքն կայքեր, որ նույն որոնումը կատարում են միաժամանակ տարբեր որոնողական կայքերի միջնորդությամբ։ Որոնման արդյունքները հետո դասակարգվում են որպեսզի ներկայացվեն օգտագործողին։ Մետա-որոնողական համակարգերի շարքից կարելի է թվարկել օրինակ՝ Mamma և Kartoo։", + "contributor": "unknown", + "timestamp": "2017-11-20T17:47:00Z", + "_type": "hywiki" +} diff --git a/example/doc_idwiki_1.json b/example/doc_idwiki_1.json new file mode 100644 index 0000000..7cf1b1e --- /dev/null +++ b/example/doc_idwiki_1.json @@ -0,0 +1,7 @@ +{ + "title_id": "Mesin pencari web", + "text_id": "Mesin pencari web atau mesin telusur web (bahasa Inggris: web search engine) adalah program komputer yang dirancang untuk melakukan pencarian atas berkas-berkas yang tersimpan dalam layanan www, ftp, publikasi milis, ataupun news group dalam sebuah ataupun sejumlah komputer peladen dalam suatu jaringan. Mesin pencari merupakan perangkat penelusur informasi dari dokumen-dokumen yang tersedia. Hasil pencarian umumnya ditampilkan dalam bentuk daftar yang seringkali diurutkan menurut tingkat akurasi ataupun rasio pengunjung atas suatu berkas yang disebut sebagai hits. Informasi yang menjadi target pencarian bisa terdapat dalam berbagai macam jenis berkas seperti halaman situs web, gambar, ataupun jenis-jenis berkas lainnya. Beberapa mesin pencari juga diketahui melakukan pengumpulan informasi atas data yang tersimpan dalam suatu basis data ataupun direktori web. Sebagian besar mesin pencari dijalankan oleh perusahaan swasta yang menggunakan algoritme kepemilikan dan basis data tertutup, di antaranya yang paling populer adalah safari Google (MSN Search dan Yahoo!). Telah ada beberapa upaya menciptakan mesin pencari dengan sumber terbuka (open source), contohnya adalah Htdig, Nutch, Egothor dan OpenFTS.", + "contributor": "unknown", + "timestamp": "2017-11-20T17:47:00Z", + "_type": "idwiki" +} diff --git a/example/doc_itwiki_1.json b/example/doc_itwiki_1.json new file mode 100644 index 0000000..31d777f --- /dev/null +++ b/example/doc_itwiki_1.json @@ -0,0 +1,7 @@ +{ + "title_it": "Motore di ricerca", + "text_it": "Nell'ambito delle tecnologie di Internet, un motore di ricerca (in inglese search engine) è un sistema automatico che, su richiesta, analizza un insieme di dati (spesso da esso stesso raccolti) e restituisce un indice dei contenuti disponibili[1] classificandoli in modo automatico in base a formule statistico-matematiche che ne indichino il grado di rilevanza data una determinata chiave di ricerca. Uno dei campi in cui i motori di ricerca trovano maggiore utilizzo è quello dell'information retrieval e nel web. I motori di ricerca più utilizzati nel 2017 sono stati: Google, Bing, Baidu, Qwant, Yandex, Ecosia, DuckDuckGo.", + "contributor": "unknown", + "timestamp": "2018-07-16T12:20:00Z", + "_type": "itwiki" +} diff --git a/example/jawiki_doc1.json b/example/doc_jawiki_1.json similarity index 54% rename from example/jawiki_doc1.json rename to example/doc_jawiki_1.json index d9c84b1..de75dca 100644 --- a/example/jawiki_doc1.json +++ b/example/doc_jawiki_1.json @@ -1,6 +1,6 @@ { "title_ja": "検索エンジン", - "text_ja": "検索エンジン(けんさくエンジン、英語: search engine)は、狭義にはインターネットに存在する情報(ウェブページ、ウェブサイト、画像ファイル、ネットニュースなど)を検索する機能およびそのプログラム。インターネットの普及初期には、検索としての機能のみを提供していたウェブサイトそのものを検索エンジンと呼んだが、現在では様々なサービスが加わったポータルサイト化が進んだため、検索をサービスの一つとして提供するウェブサイトを単に検索サイトと呼ぶことはなくなっている。広義には、インターネットに限定せず情報を検索するシステム全般を含む。", + "text_ja": "検索エンジン(けんさくエンジン、英語: search engine)は、狭義にはインターネットに存在する情報(ウェブページ、ウェブサイト、画像ファイル、ネットニュースなど)を検索する機能およびそのプログラム。インターネットの普及初期には、検索としての機能のみを提供していたウェブサイトそのものを検索エンジンと呼んだが、現在では様々なサービスが加わったポータルサイト化が進んだため、検索をサービスの一つとして提供するウェブサイトを単に検索サイトと呼ぶことはなくなっている。広義には、インターネットに限定せず情報を検索するシステム全般を含む。狭義の検索エンジンは、ロボット型検索エンジン、ディレクトリ型検索エンジン、メタ検索エンジンなどに分類される。広義の検索エンジンとしては、ある特定のウェブサイト内に登録されているテキスト情報の全文検索機能を備えたソフトウェア(全文検索システム)等がある。検索エンジンは、検索窓と呼ばれるボックスにキーワードを入力して検索をかけるもので、全文検索が可能なものと不可能なものとがある。検索サイトを一般に「検索エンジン」と呼ぶことはあるが、厳密には検索サイト自体は検索エンジンでない。", "contributor": "unknown", "timestamp": "2018-05-30T00:52:00Z", "_type": "jawiki" diff --git a/example/doc_knwiki_1.json b/example/doc_knwiki_1.json new file mode 100644 index 0000000..9bbdd86 --- /dev/null +++ b/example/doc_knwiki_1.json @@ -0,0 +1,7 @@ +{ + "title_kn": "ಅಂತರ್ಜಾಲ ಹುಡುಕಾಟ ಯಂತ್ರ", + "text_kn": "ಅಂತರ್ಜಾಲ ಹುಡುಕಾಟ ಯಂತ್ರ ಎಂದರೆ World Wide Webನಲ್ಲಿ ಮಾಹಿತಿ ಹುಡುಕುವುದಕ್ಕಾಗಿ ವಿನ್ಯಾಸಗೊಳಿಸಲಾದ ಒಂದು ಸಾಧನ. ಹುಡುಕಾಟದ ಫಲಿತಾಂಶಗಳನ್ನು ಸಾಮಾನ್ಯವಾಗಿ ಒಂದು ಪಟ್ಟಿಯ ರೂಪದಲ್ಲಿ ಪ್ರಸ್ತುತಪಡಿಸಲಾಗುತ್ತದೆ ಮತ್ತು ಇವನ್ನು ’ಹಿಟ್ಸ್’ ಎಂದು ಕರೆಯಲಾಗುತ್ತದೆ. ಈ ಮಾಹಿತಿಯು ಅನೇಕ ಜಾಲ ಪುಟಗಳು, ಚಿತ್ರಗಳು, ಮಾಹಿತಿ ಹಾಗೂ ಇತರೆ ಕಡತಗಳನ್ನು ಹೊಂದಿರಬಹುದು. ಕೆಲವು ಹುಡುಕಾಟ ಯಂತ್ರಗಳು ಬೇರೆ ದತ್ತಸಂಚಯಗಳು ಅಥವಾ ಮುಕ್ತ ಮಾಹಿತಿ ಸೂಚಿಗಳಿಂದ ದತ್ತಾಂಶಗಳ ಗಣಿಗಾರಿಕೆ ಮಾಡಿ ಹೊರತೆಗೆಯುತ್ತವೆ. ಜಾಲ ಮಾಹಿತಿಸೂಚಿಗಳನ್ನು ಸಂಬಂಧಿಸಿದ ಸಂಪಾದಕರು ನಿರ್ವಹಿಸಿದರೆ, ಹುಡುಕಾಟ ಯಂತ್ರಗಳು ಗಣನಪದ್ಧತಿಯ ಮೂಲಕ ಅಥವಾ ಗಣನಪದ್ಧತಿ ಮತ್ತು ಮಾನವ ಹೂಡುವಳಿಯ ಮಿಶ್ರಣದ ಮುಖಾಂತರ ಕಾರ್ಯನಿರ್ವಹಿಸುತ್ತವೆ.", + "contributor": "unknown", + "timestamp": "2017-10-03T14:13:00Z", + "_type": "knwiki" +} diff --git a/example/doc_kowiki_1.json b/example/doc_kowiki_1.json new file mode 100644 index 0000000..11bb762 --- /dev/null +++ b/example/doc_kowiki_1.json @@ -0,0 +1,7 @@ +{ + "title_cjk": "검색 엔진", + "text_cjk": "검색 엔진은 컴퓨터 시스템에 저장된 정보를 찾아주는 것을 도와주도록 설계된 정보 검색 시스템이다. 이러한 검색 결과는 목록으로 표현되는 것이 보통이다. 검색 엔진을 사용하면 정보를 찾는데 필요한 시간을 최소화할 수 있다. 가장 눈에 띄는 형태의 공용 검색 엔진으로는 웹 검색 엔진이 있으며 월드 와이드 웹에서 정보를 찾아준다.", + "contributor": "unknown", + "timestamp": "2017-11-19T12:50:00Z", + "_type": "kowiki" +} diff --git a/example/doc_mlwiki_1.json b/example/doc_mlwiki_1.json new file mode 100644 index 0000000..982dd0a --- /dev/null +++ b/example/doc_mlwiki_1.json @@ -0,0 +1,7 @@ +{ + "title_ml": "വെബ് സെർച്ച് എഞ്ചിൻ", + "text_ml": "വേൾഡ് വൈഡ് വെബ്ബിലുള്ള വിവരങ്ങൾ തിരയാനുള്ള ഒരു ഉപാധിയാണ്‌ വെബ് സെർച്ച് എഞ്ചിൻ അഥവാ സെർച്ച് എഞ്ചിൻ. തിരച്ചിൽ ഫലങ്ങൾ സാധാരണായായി ഒരു പട്ടികയായി നൽകുന്നു, തിരച്ചിൽ ഫലങ്ങളെ ഹിറ്റുകൾ എന്നാണ്‌ വിളിച്ചുവരുന്നത്[അവലംബം ആവശ്യമാണ്]. തിരച്ചിൽ ഫലങ്ങളിൽ വെബ് പേജുകൾ, ചിത്രങ്ങൾ, വിവരങ്ങൾ, വെബ്ബിലുള്ള മറ്റ് ഫയൽ തരങ്ങൾ എന്നിവ ഉൾപ്പെടാം. അൽഗോരിതങ്ങൾ ഉപയോഗിച്ചാണ് സെർച്ച് എഞ്ചിനുകൾ പ്രവർത്തിക്കുന്നത്.", + "contributor": "unknown", + "timestamp": "2010-05-05T15:06:00Z", + "_type": "mlwiki" +} diff --git a/example/doc_nlwiki_1.json b/example/doc_nlwiki_1.json new file mode 100644 index 0000000..5276fbc --- /dev/null +++ b/example/doc_nlwiki_1.json @@ -0,0 +1,7 @@ +{ + "title_nl": "Zoekmachine", + "text_nl": "Een zoekmachine is een computerprogramma waarmee informatie kan worden gezocht in een bepaalde collectie; dit kan een bibliotheek, het internet, of een persoonlijke verzameling zijn. Zonder nadere aanduiding wordt meestal een webdienst bedoeld waarmee met behulp van vrije trefwoorden volledige tekst (full text) kan worden gezocht in het gehele wereldwijde web. In tegenstelling tot startpagina's of webgidsen is er geen of zeer weinig menselijke tussenkomst nodig; het bezoeken van de webpagina's en het sorteren van de rangschikkingen gebeurt met behulp van een algoritme. Google is wereldwijd de meest gebruikte zoekmachine, andere populaire zoekmachines zijn Yahoo!, Bing en Baidu.", + "contributor": "unknown", + "timestamp": "2018-05-07T11:05:00Z", + "_type": "nlwiki" +} diff --git a/example/doc_nowiki_1.json b/example/doc_nowiki_1.json new file mode 100644 index 0000000..4f5ae40 --- /dev/null +++ b/example/doc_nowiki_1.json @@ -0,0 +1,7 @@ +{ + "title_no": "Søkemotor", + "text_no": "En søkemotor er en type programvare som leter frem informasjon fra Internett (nettsider eller andre nettressurser) eller begrenset til et datasystem, der informasjonen samsvarer med et gitt søk, og rangerer treffene etter hva den oppfatter som mest relevant. Typisk ligger søkemotoren tilgjengelig som et nettsted, der brukeren legger inn søkeord ev. sammen med filterinnstillinger, og treffene vises gjerne som klikkbare lenker. Søkemotoren kan enten gjøre søk på hele Internett (for eksempel Google, Bing, Kvasir og Yahoo!), innenfor et bestemt nettsted (for eksempel søk innenfor VGs nettavis), eller innenfor et bestemt tema (f.eks. Kelkoo, som søker etter priser på produkter, og Picsearch, som søker etter bilder). En bedrift kan også sette opp en intern bedrifts-søkemotor for å få enklere tilgang til alle dokumenter og databaser i bedriften.", + "contributor": "unknown", + "timestamp": "2018-02-05T14:15:00Z", + "_type": "nowiki" +} diff --git a/example/doc_pswiki_1.json b/example/doc_pswiki_1.json new file mode 100644 index 0000000..9ae0102 --- /dev/null +++ b/example/doc_pswiki_1.json @@ -0,0 +1,7 @@ +{ + "title_ps": "انټرنټ لټوونکی ماشين", + "text_ps": "نټرنټ د معلوماتو يوه داسې پراخه نړۍ ده چې يوه پوله هم نه لري. هره ثانيه په زرگونو معلوماتي توکي په کې ورځای کېږي، خو بيا هم د ډکېدو کومه اندېښنه نه رامنځته کېږي. حيرانوونکې خبره بيا دا ده چې دغه ټول معلومات په داسې مهارت سره په دغه نړۍ کې ځای شوي دي، چې سړی يې د سترگو په رپ کې د نړۍ په هر گوټ کې ترلاسه کولای شي. د کيبورډ په يو دوو تڼيو زور کولو او د موږك په يو دوو کليکونو سره خپلو ټولو پوښتنو ته ځواب موندلای شئ. ټول معلومات په ځانگړو انټرنټ پاڼو کې خوندي وي، نو که سړي ته د يوې پاڼې پته معلومه وي نو سم له لاسه به دغه پاڼه د انټرنټ پاڼو په کتونکي پروگرام کې پرانيزي، خو که سړی بيا يو معلومات غواړي او د هغې پاڼې پته ورسره نه وي، چې دغه ځانگړي معلومات په كې ځای شوي دي، نو بيا سړی يوه داسې پياوړي ځواک ته اړتيا لري، چې د سترگو په رپ کې ټول انټرنټ چاڼ کړي او دغه ځانگړي معلومات راوباسي. له نېکه مرغه د دغه ځواک غم خوړل شوی دی او ډېرInternet Search Engine انټرنټ لټوونکي ماشينونه جوړ کړای شوي دي، چې په وړيا توگه ټول انټرنټ تر ثانيو هم په لږ وخت کې چاڼ کوي او زموږ د خوښې معلومات راښکاره کوي. دغو ماشينونو ته سړی يوه ځانگړې کليمه ورکوي او هغوی ټول انټرنټ په دغې وركړل شوې کلمې پسې لټوي او هر دقيق معلومات چې لاسته ورځي، نو د کمپيوټر پر پرده يې راښکاره کوي. د دغو ماشينونو په ډله کې يو پياوړی ماشين د Google په نوم دی. د نوموړي ماشين بنسټ په ١٩٩٨م کال کې د متحدو ايالاتو د Standford پوهنتون دوو محصلينو Larry Page او Sergey Brin کښېښود. د دغه ماشين خدمات سړی د www.google.com په انټرنټ پاڼه کې کارولای شي. نوموړی ماشين د نړۍ په گڼ شمېر ژبو باندې خدمات وړاندې کوي او داسې چټک او دقيق لټون کوي چې د انټرنټ نور ډېر غښتلي ماشينونه ورته گوته پر غاښ پاتې دي. گوگل په ټوله نړۍ کې کارول کېږي او تر نيمي ثانيي هم په لنډ وخت کې په ميليارډونو انټرنټ پاڼې چاڼ کوي او خپلو کاروونکو ته په پرتله ييزه توگه دقيق معلومات راباسي. گوگل په يوه ورځ کې څه كمُ ٢٠٠ ميليونه پوښتنې ځوابوي. دا ( گوگل) تورى خپله د يو امريکايي رياضيپوه د وراره له خوا په لومړي ځل د يوې لوبې لپاره کارول شوی و. هغه دغه تورى د يو سلو صفرونو ( 1000?.) غوندې لوی عدد ته د نوم په توگه کاراوه. دغه نوم د نوموړي شرکت د دغه توان ښكارندوى دى، چې په لنډ وخت کې په لويه کچه پوښتنو ته ځواب ورکوي او معلومات لټوي. سړی چې د گوگل چټکتيا او دقيقوالي ته ځير شي، نو دا پوښته راپورته کېږي چې د دې ماشين شا ته به څومره پرمختللي کمپيوټرونه او پياوړی تخنيک پټ وي. خو اصلاً د گوگل شا ته په يوه لوی جال کې د منځنۍ بيې کمپيوټرونه سره نښلول شوي دي . په دې توگه په زرگونو کمپيوټرونه هممهاله په کار بوخت وي، چې په ترڅ کې يې د معلوماتو لټول او چاڼ کول چټکتيا مومي. د يوې پوښتنې له اخيستلو څخه راواخله معلوماتو تر لټولو او بيا د دقيقوالي له مخې په يوه ځانگړي طرز بېرته کاروونکي يا پوښتونكي تر ښوولو پورې ټولې چارې د درېيو Software پروگرامونه په لاس کې دي، چې په دغه زرگونو کمپيوټرونو کې ځای پر ځای شوي دي.", + "contributor": "unknown", + "timestamp": "2015-12-15T18:53:00Z", + "_type": "pswiki" +} diff --git a/example/doc_ptwiki_1.json b/example/doc_ptwiki_1.json new file mode 100644 index 0000000..affca77 --- /dev/null +++ b/example/doc_ptwiki_1.json @@ -0,0 +1,7 @@ +{ + "title_pt": "Motor de busca", + "text_pt": "Motor de pesquisa (português europeu) ou ferramenta de busca (português brasileiro) ou buscador (em inglês: search engine) é um programa desenhado para procurar palavras-chave fornecidas pelo utilizador em documentos e bases de dados. No contexto da internet, um motor de pesquisa permite procurar palavras-chave em documentos alojados na world wide web, como aqueles que se encontram armazenados em websites. Os motores de busca surgiram logo após o aparecimento da Internet, com a intenção de prestar um serviço extremamente importante: a busca de qualquer informação na rede, apresentando os resultados de uma forma organizada, e também com a proposta de fazer isto de uma maneira rápida e eficiente. A partir deste preceito básico, diversas empresas se desenvolveram, chegando algumas a valer milhões de dólares. Entre as maiores empresas encontram-se o Google, o Yahoo, o Bing, o Lycos, o Cadê e, mais recentemente, a Amazon.com com o seu mecanismo de busca A9 porém inativo. Os buscadores se mostraram imprescindíveis para o fluxo de acesso e a conquista novos visitantes. Antes do advento da Web, havia sistemas para outros protocolos ou usos, como o Archie para sites FTP anônimos e o Veronica para o Gopher (protocolo de redes de computadores que foi desenhado para indexar repositórios de documentos na Internet, baseado-se em menus).", + "contributor": "unknown", + "timestamp": "2017-11-09T14:38:00Z", + "_type": "ptwiki" +} diff --git a/example/doc_rowiki_1.json b/example/doc_rowiki_1.json new file mode 100644 index 0000000..4929dbc --- /dev/null +++ b/example/doc_rowiki_1.json @@ -0,0 +1,7 @@ +{ + "title_ro": "Motor de căutare", + "text_ro": "Un motor de căutare este un program apelabil căutător, care accesează Internetul în mod automat și frecvent și care stochează titlul, cuvinte cheie și, parțial, chiar conținutul paginilor web într-o bază de date. În momentul în care un utilizator apelează la un motor de căutare pentru a găsi o informație, o anumită frază sau un cuvânt, motorul de căutare se va uita în această bază de date și, în funcție de anumite criterii de prioritate, va crea și afișa o listă de rezultate (engleză: hit list ).", + "contributor": "unknown", + "timestamp": "2018-06-12T08:59:00Z", + "_type": "rowiki" +} diff --git a/example/doc_ruwiki_1.json b/example/doc_ruwiki_1.json new file mode 100644 index 0000000..5a9ec23 --- /dev/null +++ b/example/doc_ruwiki_1.json @@ -0,0 +1,7 @@ +{ + "title_ru": "Поисковая машина", + "text_ru": "Поисковая машина (поиско́вый движо́к) — комплекс программ, предназначенный для поиска информации. Обычно является частью поисковой системы. Основными критериями качества работы поисковой машины являются релевантность (степень соответствия запроса и найденного, т.е. уместность результата), полнота индекса, учёт морфологии языка.", + "contributor": "unknown", + "timestamp": "2017-03-22T01:16:00Z", + "_type": "ruwiki" +} diff --git a/example/doc_svwiki_1.json b/example/doc_svwiki_1.json new file mode 100644 index 0000000..07997b5 --- /dev/null +++ b/example/doc_svwiki_1.json @@ -0,0 +1,7 @@ +{ + "title_sv": "Söktjänst", + "text_sv": "En söktjänst är en webbplats som gör det möjligt att söka efter innehåll på Internet. Söktjänsterna använder sökmotorer, även kallade sökrobotar, för att upptäcka, hämta in och indexera webbsidor.", + "contributor": "unknown", + "timestamp": "2018-08-16T22:13:00Z", + "_type": "svwiki" +} diff --git a/example/doc_tawiki_1.json b/example/doc_tawiki_1.json new file mode 100644 index 0000000..563f0a6 --- /dev/null +++ b/example/doc_tawiki_1.json @@ -0,0 +1,7 @@ +{ + "title_ta": "தேடுபொறி", + "text_ta": "தேடுபொறி அல்லது தேடற்பொறி என்பது ஒரு கணினி நிரலாகும். இது இணையத்தில் குவிந்து கிடக்கும் தகவல்களில் இருந்தோ கணினியில் இருக்கும் தகவல்களில் இருந்தோ நமக்குத் தேவையான தகவலைப்பெற உதவுகின்றது. பொதுவாகப் பாவனையாளர்கள் ஒரு விடயம் சம்பந்தமாகத் தேடுதலை ஒரு சொல்லை வைத்து தேடுவார்கள். தேடுபொறிகள் சுட்டிகளைப் பயன்படுத்தி விரைவான தேடலை மேற்கொள்ளும். தேடுபொறிகள் என்பது பொதுவாக இணையத் தேடுபொறிகளை அல்லது இணையத் தேடற்பொறிகளையே குறிக்கும். வேறுசில தேடுபொறிகள் உள்ளூர் வலையமைப்பை மாத்திரமே தேடும். இணைய தேடு பொறிகள் பல பில்லியன் பக்கங்களில் இருந்து நமக்குத் தேவையான மிகப் பொருத்தமான பக்கங்களைத் தேடித் தரும். வேறுசில தேடற்பொறிகள் செய்திக் குழுக்கள், தகவற்தளங்கள், திறந்த இணையத்தளங்களைப் பட்டியலிடும் DMOZ.org போன்ற இணையத் தளங்களைத் தேடும். மனிதர்களால் எழுதப்பட்ட இணையத் தளங்களைப் பட்டியலிடும் தளங்களைப் போன்றல்லாது தேடு பொறிகள் அல்காரிதங்களைப் பாவித்துத் தேடல்களை மேற்கொள்ளும். வேறு சில தேடற்பொறிகளோ தமது இடைமுகத்தை வழங்கினாலும் உண்மையில் வேறுசில தேடுபொறிகளே தேடலை மேற்கொள்ளும். ஆரம்ப காலத்தில் ASCII முறை வரியுருக்களை கொண்டே தேடு சொற்களை உள்ளிட முடிந்தது. தற்போது ஒருங்குறி எழுத்துக்குறிமுறையை பல தேடுபொறிகளும் ஆதரிப்பதால் ஆங்கிலத்தில் மட்டுமல்லாது உலக மொழிகள் அனைத்திலும் அவ்வம் மொழிப்பக்கங்களை தேடிப்பெறக்கூடியதாகவுள்ளது.", + "contributor": "unknown", + "timestamp": "2017-12-24T10:30:00Z", + "_type": "tawiki" +} diff --git a/example/doc_tewiki_1.json b/example/doc_tewiki_1.json new file mode 100644 index 0000000..91929cd --- /dev/null +++ b/example/doc_tewiki_1.json @@ -0,0 +1,7 @@ +{ + "title_te": "వెబ్ శోధనా యంత్రం", + "text_te": "వెబ్ శోధన యంత్రం అనేది వరల్డ్ వైడ్ వెబ్/ప్రపంచ వ్యాప్త వెబ్లో సమాచారాన్ని శోదించటానికి తయారుచేసిన ఒక సాధనం. శోధన ఫలితాలు సాధారణంగా ఒక జాబితాలో ఇవ్వబడతాయి మరియు అవి సాధారణంగా హిట్స్ అని పిలువబడతాయి. ఆ సమాచారం వెబ్ పేజీలు, చిత్రాలు, సమాచారం మరియు ఇతర రకాలైన జాబితాలను కలిగి ఉంటుంది.కొన్ని శోధనా యంత్రాలు డేటా బేస్ లు లేదా ఓపెన్ డైరెక్టరీలలో అందుబాటులో ఉన్న సమాచారాన్ని కూడా వెలికితీస్తాయి. మానవ సంపాదకులచే నిర్వహించబడే క్రమపరిచిన వెబ్ డైరెక్టరీల లా కాకుండా, శోధనా యంత్రాలు సమస్య పరిష్కారానికి ఉపయోగించే ఒక క్రమ పద్దతి ద్వారా లేదా సమస్య పరిష్కారానికి ఉపయోగించే ఒక క్రమ పద్దతి మరియు మానవ శక్తిల మిశ్రమంతో పనిచేస్తాయి.", + "contributor": "unknown", + "timestamp": "2017-06-19T11:22:00Z", + "_type": "tewiki" +} diff --git a/example/doc_thwiki_1.json b/example/doc_thwiki_1.json new file mode 100644 index 0000000..f3aa444 --- /dev/null +++ b/example/doc_thwiki_1.json @@ -0,0 +1,7 @@ +{ + "title_th": "เสิร์ชเอนจิน", + "text_th": "เสิร์ชเอนจิน (search engine) หรือ โปรแกรมค้นหา คือ โปรแกรมที่ช่วยในการสืบค้นหาข้อมูล โดยเฉพาะข้อมูลบนอินเทอร์เน็ต โดยครอบคลุมทั้งข้อความ รูปภาพ ภาพเคลื่อนไหว เพลง ซอฟต์แวร์ แผนที่ ข้อมูลบุคคล กลุ่มข่าว และอื่น ๆ ซึ่งแตกต่างกันไปแล้วแต่โปรแกรมหรือผู้ให้บริการแต่ละราย. เสิร์ชเอนจินส่วนใหญ่จะค้นหาข้อมูลจากคำสำคัญ (คีย์เวิร์ด) ที่ผู้ใช้ป้อนเข้าไป จากนั้นก็จะแสดงรายการผลลัพธ์ที่มันคิดว่าผู้ใช้น่าจะต้องการขึ้นมา ในปัจจุบัน เสิร์ชเอนจินบางตัว เช่น กูเกิล จะบันทึกประวัติการค้นหาและการเลือกผลลัพธ์ของผู้ใช้ไว้ด้วย และจะนำประวัติที่บันทึกไว้นั้น มาช่วยกรองผลลัพธ์ในการค้นหาครั้งต่อ ๆ ไป", + "contributor": "unknown", + "timestamp": "2016-06-18T11:06:00Z", + "_type": "thwiki" +} diff --git a/example/doc_trwiki_1.json b/example/doc_trwiki_1.json new file mode 100644 index 0000000..b5e3ee6 --- /dev/null +++ b/example/doc_trwiki_1.json @@ -0,0 +1,7 @@ +{ + "title_tr": "Arama motoru", + "text_tr": "Arama motoru, İnternet üzerinde bulunan içeriği aramak için kullanılan bir mekanizmadır. Üç bileşenden oluşur: web robotu, arama indeksi ve kullanıcı arabirimi. Ancak arama sonuçları genellikle sık tıklanan internet sayfalarından oluşan bir liste olarak verilmektedir.", + "contributor": "unknown", + "timestamp": "2018-03-13T17:37:00Z", + "_type": "trwiki" +} diff --git a/example/doc_zhwiki_1.json b/example/doc_zhwiki_1.json new file mode 100644 index 0000000..4c5a761 --- /dev/null +++ b/example/doc_zhwiki_1.json @@ -0,0 +1,7 @@ +{ + "title_zh": "搜索引擎", + "text_zh": "搜索引擎(英语:search engine)是一种信息检索系统,旨在协助搜索存储在计算机系统中的信息。搜索结果一般被称为“hits”,通常会以表单的形式列出。网络搜索引擎是最常见、公开的一种搜索引擎,其功能为搜索万维网上储存的信息.", + "contributor": "unknown", + "timestamp": "2018-08-27T05:47:00Z", + "_type": "zhwiki" +} diff --git a/example/ptwiki_doc1.json b/example/ptwiki_doc1.json deleted file mode 100644 index a1a38b9..0000000 --- a/example/ptwiki_doc1.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "title_pt": "Motor de busca", - "text_pt": "Motor de pesquisa (português europeu) ou ferramenta de busca (português brasileiro) ou buscador (em inglês: search engine) é um programa desenhado para procurar palavras-chave fornecidas pelo utilizador em documentos e bases de dados. No contexto da internet, um motor de pesquisa permite procurar palavras-chave em documentos alojados na world wide web, como aqueles que se encontram armazenados em websites.", - "contributor": "unknown", - "timestamp": "2018-07-04T05:41:00Z", - "_type": "ptwiki" -} diff --git a/example/wikipedia_search_request.json b/example/search_request.json similarity index 100% rename from example/wikipedia_search_request.json rename to example/search_request.json diff --git a/example/wikipedia_index_mapping.json b/example/wikipedia_index_mapping.json deleted file mode 100644 index eb7fbae..0000000 --- a/example/wikipedia_index_mapping.json +++ /dev/null @@ -1,257 +0,0 @@ -{ - "types": { - "enwiki": { - "enabled": true, - "dynamic": true, - "properties": { - "title_en": { - "enabled": true, - "dynamic": true, - "fields": [ - { - "type": "text", - "analyzer": "en", - "store": true, - "index": true, - "include_term_vectors": true, - "include_in_all": true - } - ], - "default_analyzer": "" - }, - "text_en": { - "enabled": true, - "dynamic": true, - "fields": [ - { - "type": "text", - "analyzer": "en", - "store": true, - "index": true, - "include_term_vectors": true, - "include_in_all": true - } - ], - "default_analyzer": "" - }, - "contributor": { - "enabled": true, - "dynamic": true, - "fields": [ - { - "type": "text", - "analyzer": "keyword", - "store": true, - "index": true, - "include_term_vectors": true, - "include_in_all": true - } - ], - "default_analyzer": "" - }, - "timestamp": { - "enabled": true, - "dynamic": true, - "fields": [ - { - "type": "datetime", - "store": true, - "index": true, - "include_in_all": true - } - ], - "default_analyzer": "" - }, - "_type": { - "enabled": true, - "dynamic": true, - "fields": [ - { - "type": "text", - "analyzer": "keyword", - "store": true, - "index": true, - "include_term_vectors": true, - "include_in_all": true - } - ], - "default_analyzer": "" - } - }, - "default_analyzer": "" - }, - "ptwiki": { - "enabled": true, - "dynamic": true, - "properties": { - "title_pt": { - "enabled": true, - "dynamic": true, - "fields": [ - { - "type": "text", - "analyzer": "pt", - "store": true, - "index": true, - "include_term_vectors": true, - "include_in_all": true - } - ], - "default_analyzer": "" - }, - "text_pt": { - "enabled": true, - "dynamic": true, - "fields": [ - { - "type": "text", - "analyzer": "pt", - "store": true, - "index": true, - "include_term_vectors": true, - "include_in_all": true - } - ], - "default_analyzer": "" - }, - "contributor": { - "enabled": true, - "dynamic": true, - "fields": [ - { - "type": "text", - "analyzer": "keyword", - "store": true, - "index": true, - "include_term_vectors": true, - "include_in_all": true - } - ], - "default_analyzer": "" - }, - "timestamp": { - "enabled": true, - "dynamic": true, - "fields": [ - { - "type": "datetime", - "store": true, - "index": true, - "include_in_all": true - } - ], - "default_analyzer": "" - }, - "_type": { - "enabled": true, - "dynamic": true, - "fields": [ - { - "type": "text", - "analyzer": "keyword", - "store": true, - "index": true, - "include_term_vectors": true, - "include_in_all": true - } - ], - "default_analyzer": "" - } - }, - "default_analyzer": "" - }, - "jawiki": { - "enabled": true, - "dynamic": true, - "properties": { - "title_ja": { - "enabled": true, - "dynamic": true, - "fields": [ - { - "type": "text", - "analyzer": "ja", - "store": true, - "index": true, - "include_term_vectors": true, - "include_in_all": true - } - ], - "default_analyzer": "" - }, - "text_ja": { - "enabled": true, - "dynamic": true, - "fields": [ - { - "type": "text", - "analyzer": "ja", - "store": true, - "index": true, - "include_term_vectors": true, - "include_in_all": true - } - ], - "default_analyzer": "" - }, - "contributor": { - "enabled": true, - "dynamic": true, - "fields": [ - { - "type": "text", - "analyzer": "keyword", - "store": true, - "index": true, - "include_term_vectors": true, - "include_in_all": true - } - ], - "default_analyzer": "" - }, - "timestamp": { - "enabled": true, - "dynamic": true, - "fields": [ - { - "type": "datetime", - "store": true, - "index": true, - "include_in_all": true - } - ], - "default_analyzer": "" - }, - "_type": { - "enabled": true, - "dynamic": true, - "fields": [ - { - "type": "text", - "analyzer": "keyword", - "store": true, - "index": true, - "include_term_vectors": true, - "include_in_all": true - } - ], - "default_analyzer": "" - } - }, - "default_analyzer": "" - } - }, - "default_mapping": { - "enabled": true, - "dynamic": true, - "default_analyzer": "" - }, - "type_field": "_type", - "default_type": "_default", - "default_analyzer": "standard", - "default_datetime_parser": "dateTimeOptional", - "default_field": "_all", - "store_dynamic": true, - "index_dynamic": true, - "analysis": {} -} diff --git a/grpc/client/client.go b/grpc/client/client.go index 29b646a..c2f6ba7 100644 --- a/grpc/client/client.go +++ b/grpc/client/client.go @@ -16,6 +16,7 @@ package client import ( "context" + "math" "github.com/golang/protobuf/ptypes/empty" "github.com/mosuka/blast/protobuf" @@ -23,15 +24,13 @@ import ( ) type GRPCClient struct { - ctx context.Context - cancel context.CancelFunc - conn *grpc.ClientConn - client protobuf.KVSClient - maxCallSendMsgSize int - maxCallRecvMsgSize int + ctx context.Context + cancel context.CancelFunc + conn *grpc.ClientConn + client protobuf.KVSClient } -func NewGRPCClient(address string, maxCallSendMsgSize int, maxCallRecvMsgSize int) (*GRPCClient, error) { +func NewGRPCClient(address string) (*GRPCClient, error) { var err error // Connect context @@ -42,8 +41,8 @@ func NewGRPCClient(address string, maxCallSendMsgSize int, maxCallRecvMsgSize in dialOpts := []grpc.DialOption{ grpc.WithInsecure(), grpc.WithDefaultCallOptions( - grpc.MaxCallSendMsgSize(maxCallSendMsgSize), - grpc.MaxCallRecvMsgSize(maxCallRecvMsgSize), + grpc.MaxCallSendMsgSize(math.MaxInt32), + grpc.MaxCallRecvMsgSize(math.MaxInt32), ), } diff --git a/grpc/server/server.go b/grpc/server/server.go index 3c839b0..a844d75 100644 --- a/grpc/server/server.go +++ b/grpc/server/server.go @@ -16,7 +16,6 @@ package server import ( "log" - "math" "net" "github.com/mosuka/blast/logging" @@ -28,9 +27,6 @@ import ( type GRPCServer struct { grpcAddress string - maxSendMessageSize int - maxReceiveMessageSize int - logger *log.Logger server *grpc.Server @@ -40,11 +36,9 @@ type GRPCServer struct { func NewGRPCServer(gRPCAddress string, service *service.KVSService) (*GRPCServer, error) { return &GRPCServer{ - grpcAddress: gRPCAddress, - service: service, - maxSendMessageSize: math.MaxInt32, - maxReceiveMessageSize: math.MaxInt32, - logger: logging.DefaultLogger(), + grpcAddress: gRPCAddress, + service: service, + logger: logging.DefaultLogger(), }, nil } @@ -53,16 +47,6 @@ func (s *GRPCServer) SetLogger(logger *log.Logger) { return } -func (s *GRPCServer) SetMaxSendMessageSize(maxSendMessageSize int) { - s.maxSendMessageSize = maxSendMessageSize - return -} - -func (s *GRPCServer) SetMaxReceiveMessageSize(maxReceiveMessageSize int) { - s.maxReceiveMessageSize = maxReceiveMessageSize - return -} - func (s *GRPCServer) Start() error { var err error diff --git a/http/handler/get.go b/http/handler/get.go index b1a3937..e38a48f 100644 --- a/http/handler/get.go +++ b/http/handler/get.go @@ -17,12 +17,12 @@ package handler import ( "bytes" "encoding/json" + "fmt" "log" "net/http" "strconv" "time" - "fmt" "github.com/gorilla/mux" "github.com/mosuka/blast/grpc/client" "github.com/mosuka/blast/http/metrics" diff --git a/http/server/server.go b/http/server/server.go index 6b34ba9..d902465 100644 --- a/http/server/server.go +++ b/http/server/server.go @@ -16,7 +16,6 @@ package server import ( "log" - "math" "net" "net/http" @@ -33,22 +32,18 @@ type HTTPServer struct { listener net.Listener httpAccessLogger *log.Logger - grpcAddress string - grpcClient *client.GRPCClient - maxSendMessageSize int - maxReceiveMessageSize int + grpcAddress string + grpcClient *client.GRPCClient logger *log.Logger } func NewHTTPServer(httpAddress string, grpcAddress string) (*HTTPServer, error) { return &HTTPServer{ - httpAddress: httpAddress, - grpcAddress: grpcAddress, - maxSendMessageSize: math.MaxInt32, - maxReceiveMessageSize: math.MaxInt32, - logger: logging.DefaultLogger(), - httpAccessLogger: logging.DefaultLogger(), + httpAddress: httpAddress, + grpcAddress: grpcAddress, + logger: logging.DefaultLogger(), + httpAccessLogger: logging.DefaultLogger(), }, nil } @@ -62,20 +57,10 @@ func (s *HTTPServer) SetHTTPAccessLogger(logger *log.Logger) { return } -func (s *HTTPServer) SetMaxSendMessageSize(maxSendMessageSize int) { - s.maxSendMessageSize = maxSendMessageSize - return -} - -func (s *HTTPServer) SetMaxReceiveMessageSize(maxReceiveMessageSize int) { - s.maxReceiveMessageSize = maxReceiveMessageSize - return -} - func (s *HTTPServer) Start() error { var err error - if s.grpcClient, err = client.NewGRPCClient(s.grpcAddress, s.maxSendMessageSize, s.maxReceiveMessageSize); err != nil { + if s.grpcClient, err = client.NewGRPCClient(s.grpcAddress); err != nil { s.logger.Printf("[ERR] server: Failed to create gRPC client: %s", err.Error()) return err } diff --git a/index/bleve/index.go b/index/bleve/index.go index 45209ff..52bc3c2 100644 --- a/index/bleve/index.go +++ b/index/bleve/index.go @@ -15,6 +15,8 @@ package bleve import ( + "encoding/json" + "io/ioutil" "log" "os" @@ -25,27 +27,84 @@ import ( "github.com/mosuka/blast/logging" ) +const ( + DefaultDir = "./data/index" + DefaultIndexMappingFile = "" + DefaultIndexType = upsidedown.Name + DefaultKvstore = boltdb.Name +) + type IndexConfig struct { - Path string `json:"path,omitempty"` + Dir string `json:"dir,omitempty"` IndexMapping *mapping.IndexMappingImpl `json:"index_mapping,omitempty"` IndexType string `json:"index_type,omitempty"` Kvstore string `json:"kvstore,omitempty"` Kvconfig map[string]interface{} `json:"kvconfig,omitempty"` } -func DefaultConfig() *IndexConfig { +func DefaultIndexConfig() *IndexConfig { return &IndexConfig{ - Path: "./data/index", + Dir: DefaultDir, IndexMapping: mapping.NewIndexMapping(), - IndexType: upsidedown.Name, - Kvstore: boltdb.Name, + IndexType: DefaultIndexType, + Kvstore: DefaultKvstore, Kvconfig: map[string]interface{}{ "create_if_missing": true, - "error_if_exists": false, + "error_if_exists": true, }, } } +func (c *IndexConfig) SetIndexMapping(indexMappingFile string) error { + var err error + + f, err := os.Open(indexMappingFile) + if err != nil { + return err + } + defer f.Close() + + b, err := ioutil.ReadAll(f) + if err != nil { + return err + } + + err = json.Unmarshal(b, c.IndexMapping) + if err != nil { + return err + } + + return nil +} + +func NewIndexMapping(file string) (*mapping.IndexMappingImpl, error) { + var err error + + m := mapping.NewIndexMapping() + + if file == "" { + return m, nil + } + + f, err := os.Open(file) + if err != nil { + return nil, err + } + defer f.Close() + + b, err := ioutil.ReadAll(f) + if err != nil { + return nil, err + } + + err = json.Unmarshal(b, m) + if err != nil { + return nil, err + } + + return m, nil +} + type Index struct { index bleve.Index logger *log.Logger @@ -55,12 +114,12 @@ func NewIndex(config *IndexConfig) (*Index, error) { var err error var idx bleve.Index - if _, err = os.Stat(config.Path); os.IsNotExist(err) { - if idx, err = bleve.NewUsing(config.Path, config.IndexMapping, config.IndexType, config.Kvstore, config.Kvconfig); err != nil { + if _, err = os.Stat(config.Dir); os.IsNotExist(err) { + if idx, err = bleve.NewUsing(config.Dir, config.IndexMapping, config.IndexType, config.Kvstore, config.Kvconfig); err != nil { return nil, err } } else { - if idx, err = bleve.OpenUsing(config.Path, config.Kvconfig); err != nil { + if idx, err = bleve.OpenUsing(config.Dir, config.Kvconfig); err != nil { return nil, err } } diff --git a/raft/raft.go b/raft/raft.go index cc752e2..50ee5ff 100644 --- a/raft/raft.go +++ b/raft/raft.go @@ -20,21 +20,30 @@ import ( "github.com/hashicorp/raft" ) +const ( + DefaultDir = "./data/raft" + DefaultSnapshotCount = 1 + DefaultTimeout = "10s" + DefaultNodeID = "node1" +) + type RaftConfig struct { - Path string `json:"path,omitempty"` - RetainSnapshotCount int `json:"retain_snapshot_count,omitempty"` - Timeout time.Duration `json:"timeout,omitempty"` - Config *raft.Config `json:"config,omitempty"` + Dir string `json:"dir,omitempty"` + SnapshotCount int `json:"snapshot_count,omitempty"` + Timeout time.Duration `json:"timeout,omitempty"` + Config *raft.Config `json:"config,omitempty"` } -func DefaultConfig() *RaftConfig { +func DefaultRaftConfig() *RaftConfig { config := raft.DefaultConfig() - config.LocalID = "node0" + config.LocalID = DefaultNodeID + + timeoutDuration, _ := time.ParseDuration(DefaultTimeout) return &RaftConfig{ - Path: "./data/raft", - RetainSnapshotCount: 2, - Timeout: 10 * time.Second, - Config: config, + Dir: DefaultDir, + SnapshotCount: DefaultSnapshotCount, + Timeout: timeoutDuration, + Config: config, } } diff --git a/service/service.go b/service/service.go index 7efc979..a30f997 100644 --- a/service/service.go +++ b/service/service.go @@ -20,7 +20,6 @@ import ( "fmt" "io" "log" - "math" "net" "path/filepath" "sync" @@ -53,9 +52,6 @@ type KVSService struct { indexConfig *bleve.IndexConfig index *bleve.Index - maxSendMessageSize int - maxReceiveMessageSize int - mutex sync.Mutex metadata map[string]*protobuf.Metadata @@ -72,9 +68,6 @@ func NewKVSService(peerAddress string, raftConfig *braft.RaftConfig, bootstrap b storeConfig: storeConfig, indexConfig: indexConfig, - maxSendMessageSize: math.MaxInt32, - maxReceiveMessageSize: math.MaxInt32, - metadata: make(map[string]*protobuf.Metadata, 0), logger: logging.DefaultLogger(), @@ -125,14 +118,14 @@ func (s *KVSService) Start() error { // Create the snapshot store. This allows the Raft to truncate the log. var snapshots *raft.FileSnapshotStore - if snapshots, err = raft.NewFileSnapshotStoreWithLogger(s.raftConfig.Path, s.raftConfig.RetainSnapshotCount, s.logger); err != nil { - s.logger.Printf("[ERR] service: Failed to create snapshot store at %s: %v", s.raftConfig.Path, err) + if snapshots, err = raft.NewFileSnapshotStoreWithLogger(s.raftConfig.Dir, s.raftConfig.SnapshotCount, s.logger); err != nil { + s.logger.Printf("[ERR] service: Failed to create snapshot store at %s: %v", s.raftConfig.Dir, err) return err } - s.logger.Printf("[INFO] service: Snapshot store has been created at %s", s.raftConfig.Path) + s.logger.Printf("[INFO] service: Snapshot store has been created at %s", s.raftConfig.Dir) // Create the log store and stable store. - raftLogStore := filepath.Join(s.raftConfig.Path, "raft_log.db") + raftLogStore := filepath.Join(s.raftConfig.Dir, "raft_log.db") var logStore *raftboltdb.BoltStore if logStore, err = raftboltdb.NewBoltStore(raftLogStore); err != nil { s.logger.Printf("[ERR] service: Failed to create log store at %s: %v", raftLogStore, err) @@ -330,7 +323,7 @@ func (s *KVSService) Join(ctx context.Context, req *protobuf.JoinRequest) (*prot leaderGRPCAddress = s.metadata[leaderID].GrpcAddress var grpcClient *client.GRPCClient - if grpcClient, err = client.NewGRPCClient(leaderGRPCAddress, s.maxSendMessageSize, s.maxReceiveMessageSize); err != nil { + if grpcClient, err = client.NewGRPCClient(leaderGRPCAddress); err != nil { message := fmt.Sprintf("Failed to create gRPC client for %s", leaderGRPCAddress) s.logger.Printf("[ERR] service: %s: %v", message, err) resp.Success = false @@ -447,7 +440,7 @@ func (s *KVSService) Leave(ctx context.Context, req *protobuf.LeaveRequest) (*pr leaderGRPCAddress = s.metadata[leaderID].GrpcAddress var grpcClient *client.GRPCClient - if grpcClient, err = client.NewGRPCClient(leaderGRPCAddress, s.maxSendMessageSize, s.maxReceiveMessageSize); err != nil { + if grpcClient, err = client.NewGRPCClient(leaderGRPCAddress); err != nil { message := "Failed to create gRPC client" s.logger.Printf("[ERR] service: %s: %v", message, err) resp.Success = false @@ -675,7 +668,7 @@ func (s *KVSService) Put(ctx context.Context, req *protobuf.PutRequest) (*protob leaderGRPCAddress = s.metadata[leaderID].GrpcAddress var grpcClient *client.GRPCClient - if grpcClient, err = client.NewGRPCClient(leaderGRPCAddress, s.maxSendMessageSize, s.maxReceiveMessageSize); err != nil { + if grpcClient, err = client.NewGRPCClient(leaderGRPCAddress); err != nil { message := "Failed to create gRPC client" s.logger.Printf("[ERR] service: %s: %v", message, err) resp.Success = false @@ -761,7 +754,7 @@ func (s *KVSService) Delete(ctx context.Context, req *protobuf.DeleteRequest) (* leaderGRPCAddress = s.metadata[leaderID].GrpcAddress var grpcClient *client.GRPCClient - if grpcClient, err = client.NewGRPCClient(leaderGRPCAddress, s.maxSendMessageSize, s.maxReceiveMessageSize); err != nil { + if grpcClient, err = client.NewGRPCClient(leaderGRPCAddress); err != nil { message := "Failed to create gRPC client" s.logger.Printf("[ERR] service: %s: %v", message, err) resp.Success = false @@ -847,7 +840,7 @@ func (s *KVSService) Bulk(ctx context.Context, req *protobuf.BulkRequest) (*prot leaderGRPCAddress = s.metadata[leaderID].GrpcAddress var grpcClient *client.GRPCClient - if grpcClient, err = client.NewGRPCClient(leaderGRPCAddress, s.maxSendMessageSize, s.maxReceiveMessageSize); err != nil { + if grpcClient, err = client.NewGRPCClient(leaderGRPCAddress); err != nil { message := "Failed to create gRPC client" s.logger.Printf("[ERR] service: %s: %v", message, err) resp.Success = false diff --git a/store/boltdb/store.go b/store/boltdb/store.go index 3a459c3..a93320a 100644 --- a/store/boltdb/store.go +++ b/store/boltdb/store.go @@ -27,18 +27,21 @@ import ( const ( Filename = "boltdb.db" Bucket = "DB" + + DefaultDir = "./data/store" + DefaultMode = os.FileMode(0600) ) type StoreConfig struct { - Path string `json:"path,omitempty"` + Dir string `json:"dir,omitempty"` Mode os.FileMode `json:"mode,omitempty"` Options *bolt.Options `json:"options,omitempty"` } -func DefaultConfig() *StoreConfig { +func DefaultStoreConfig() *StoreConfig { return &StoreConfig{ - Path: "./data/store", - Mode: os.FileMode(0600), + Dir: DefaultDir, + Mode: DefaultMode, Options: bolt.DefaultOptions, } } @@ -52,13 +55,13 @@ func NewStore(config *StoreConfig) (*Store, error) { var err error // Create directory - if err := os.MkdirAll(config.Path, 0755); err != nil && !os.IsExist(err) { + if err := os.MkdirAll(config.Dir, 0755); err != nil && !os.IsExist(err) { return nil, fmt.Errorf("store path not accessible: %v", err) } // Open boltdb var db *bolt.DB - if db, err = bolt.Open(filepath.Join(config.Path, Filename), config.Mode, config.Options); err != nil { + if db, err = bolt.Open(filepath.Join(config.Dir, Filename), config.Mode, config.Options); err != nil { return nil, err } diff --git a/store/boltdb/store_test.go b/store/boltdb/store_test.go index a14867f..5be97bd 100644 --- a/store/boltdb/store_test.go +++ b/store/boltdb/store_test.go @@ -21,13 +21,13 @@ import ( ) func TestStore_Reader(t *testing.T) { - config := DefaultConfig() + config := DefaultStoreConfig() tmpDir, err := ioutil.TempDir("/tmp", "blast") if err != nil { t.Fatalf("unexpected error. %v", err) } - config.Path = tmpDir + config.Dir = tmpDir defer os.RemoveAll(tmpDir) @@ -61,13 +61,13 @@ func TestStore_Reader(t *testing.T) { } func TestStore_Writer(t *testing.T) { - config := DefaultConfig() + config := DefaultStoreConfig() tmpDir, err := ioutil.TempDir("/tmp", "blast") if err != nil { t.Fatalf("unexpected error. %v", err) } - config.Path = tmpDir + config.Dir = tmpDir defer os.RemoveAll(tmpDir) @@ -101,13 +101,13 @@ func TestStore_Writer(t *testing.T) { } func TestStore_Iterator(t *testing.T) { - config := DefaultConfig() + config := DefaultStoreConfig() tmpDir, err := ioutil.TempDir("/tmp", "blast") if err != nil { t.Fatalf("unexpected error. %v", err) } - config.Path = tmpDir + config.Dir = tmpDir defer os.RemoveAll(tmpDir) @@ -145,13 +145,13 @@ func TestStore_Iterator(t *testing.T) { } func TestStore_Bulker(t *testing.T) { - config := DefaultConfig() + config := DefaultStoreConfig() tmpDir, err := ioutil.TempDir("/tmp", "blast") if err != nil { t.Fatalf("unexpected error. %v", err) } - config.Path = tmpDir + config.Dir = tmpDir defer os.RemoveAll(tmpDir)