From 2b98466ad4bdefd466991c183a541685d7c0169b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 7 May 2026 20:03:59 +0000 Subject: [PATCH 1/7] Add Quickwit entry Quickwit (Rust, Tantivy-based) exposes an Elasticsearch-compatible REST API but no SQL endpoint, so each ClickBench query is hand-translated to ES DSL in queries.json. Loading goes through /api/v1/_elastic/hits/_bulk; querying through /_search. 19 of the 43 queries are not expressible in Quickwit's ES API (COUNT(DISTINCT), substring LIKE, scripted/runtime fields, REGEXP_REPLACE, ORDER BY on text fields) and are recorded as null. The remaining 24 queries were validated against a 1M-row sample on a single node. Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 2 +- quickwit/README.md | 58 +++++++++++++++ quickwit/benchmark.sh | 67 +++++++++++++++++ quickwit/index_config.yaml | 149 +++++++++++++++++++++++++++++++++++++ quickwit/load.py | 68 +++++++++++++++++ quickwit/queries.json | 43 +++++++++++ quickwit/queries.sql | 43 +++++++++++ quickwit/run.sh | 35 +++++++++ quickwit/template.json | 10 +++ 9 files changed, 474 insertions(+), 1 deletion(-) create mode 100644 quickwit/README.md create mode 100755 quickwit/benchmark.sh create mode 100644 quickwit/index_config.yaml create mode 100644 quickwit/load.py create mode 100644 quickwit/queries.json create mode 100644 quickwit/queries.sql create mode 100755 quickwit/run.sh create mode 100644 quickwit/template.json diff --git a/README.md b/README.md index 244f1560e1..631570d663 100644 --- a/README.md +++ b/README.md @@ -311,7 +311,7 @@ Please help us add more systems and run the benchmarks on more types of VMs: - [ ] MS SQL Server with Column Store Index (without publishing) - [ ] OceanBase - [ ] Planetscale (without publishing) -- [ ] Quickwit +- [x] Quickwit - [ ] Redshift Spectrum - [ ] Seafowl - [ ] ShitholeDB diff --git a/quickwit/README.md b/quickwit/README.md new file mode 100644 index 0000000000..bddbe0cbbc --- /dev/null +++ b/quickwit/README.md @@ -0,0 +1,58 @@ +# Quickwit + +[Quickwit](https://quickwit.io) is a Rust-based search engine for log analytics, built on top of [Tantivy](https://github.com/quickwit-oss/tantivy). It exposes an Elasticsearch-compatible REST API for ingestion and search, but does not implement an SQL endpoint, so this benchmark uses the native Elasticsearch query DSL directly. + +## Methodology + +Infrastructure: +- Single-node Quickwit 0.8.2 on AWS EC2 c6a.4xlarge + +Index configuration (`index_config.yaml`): +- All scalar fields declared with `fast: true` so they can participate in aggregations and sorts (Quickwit aggregations require fast fields). +- Keyword-like text fields use the `raw` tokenizer with the `raw` fast-field normalizer to mimic Elasticsearch's `keyword` mapping. +- `EventTime` is set as the index's timestamp field, providing time-based pruning. + +Ingestion (`load.py`): +- Reads `hits.json.gz` and streams NDJSON to the Elasticsearch-compatible bulk endpoint at `/api/v1/_elastic/hits/_bulk`. +- Quickwit's bulk endpoint only honors the `create` action, and rejects payloads >10MB, so batches are smaller than the Elasticsearch loader. + +Queries (`queries.json`): +- Each query in `queries.sql` is hand-translated to the Elasticsearch DSL on the corresponding line of `queries.json`, and submitted to `/api/v1/_elastic/hits/_search`. +- Timing is taken from the `took` field returned by Quickwit (milliseconds, engine-internal). +- Queries that are not expressible in Quickwit's DSL are recorded as `null`. + +## Unsupported queries + +Quickwit's aggregation and query model is narrower than Elasticsearch's. The following ClickBench queries cannot currently be expressed and are reported as `null`: + +| Q | Reason | +|----|-----------------------------------------------------------------------| +| 5 | `COUNT(DISTINCT)` — Quickwit has no `cardinality` aggregation | +| 6 | `COUNT(DISTINCT)` | +| 9 | `COUNT(DISTINCT)` | +| 10 | `COUNT(DISTINCT)` | +| 11 | `COUNT(DISTINCT)` | +| 12 | `COUNT(DISTINCT)` | +| 14 | `COUNT(DISTINCT)` | +| 19 | `extract(minute FROM …)` — no scripted/runtime fields | +| 21 | `LIKE '%…%'` — leading wildcards rejected, no `wildcard`/`regexp` | +| 22 | `LIKE '%…%'` | +| 23 | `COUNT(DISTINCT)` | +| 24 | `LIKE '%…%'` | +| 26 | `ORDER BY` on text field — not supported by the search backend | +| 27 | `ORDER BY` on text field | +| 28 | `AVG(length(URL))` — no scripted/runtime fields | +| 29 | `REGEXP_REPLACE` — not supported | +| 30 | `SUM(col + N)` — no scripted aggregations | +| 36 | `ClientIP - N` — no scripted aggregations | +| 40 | `CASE WHEN …` — no scripted/runtime fields | + +All other queries run through the native Elasticsearch DSL. + +## Running + +```bash +bash benchmark.sh +``` + +This installs Quickwit, creates the index, downloads `hits.json.gz`, ingests the data via the ES bulk API, and then runs `run.sh` to time each query three times with caches dropped between runs. diff --git a/quickwit/benchmark.sh b/quickwit/benchmark.sh new file mode 100755 index 0000000000..686defe222 --- /dev/null +++ b/quickwit/benchmark.sh @@ -0,0 +1,67 @@ +#!/bin/bash +set -e + +# Install prerequisites +sudo apt-get update -y +sudo apt-get install -y wget curl jq bc python3 python3-pip time + +pip3 install --user requests + +# Download Quickwit +QW_VERSION="0.8.2" +ARCH=$(uname -m) +wget --continue --progress=dot:giga \ + "https://github.com/quickwit-oss/quickwit/releases/download/v${QW_VERSION}/quickwit-v${QW_VERSION}-${ARCH}-unknown-linux-gnu.tar.gz" +tar xzf "quickwit-v${QW_VERSION}-${ARCH}-unknown-linux-gnu.tar.gz" +ln -sfn "quickwit-v${QW_VERSION}" quickwit + +# Start the server in the background. Quickwit defaults: REST on 7280, gRPC on 7281. +pushd quickwit >/dev/null +nohup ./quickwit run > ../quickwit.log 2>&1 & +QW_PID=$! +popd >/dev/null +echo "Quickwit started (PID $QW_PID)" + +# Wait for the server to come up. +for i in $(seq 1 60); do + if curl -sS -f http://localhost:7280/api/v1/version >/dev/null 2>&1; then + echo "Quickwit is ready" + break + fi + sleep 1 +done + +# Create the index from the YAML config. +curl -sS -X POST http://localhost:7280/api/v1/indexes \ + -H 'Content-Type: application/yaml' \ + --data-binary @index_config.yaml + +# Download the data +wget --continue --progress=dot:giga 'https://datasets.clickhouse.com/hits_compatible/hits.json.gz' + +START=$(date +%s) + +# Stream JSON directly into Quickwit via the Elasticsearch-compatible bulk API. +python3 load.py + +# Force any in-flight commits and wait for the data to become searchable. +# The default commit timeout in index_config.yaml is 30s, so wait a bit longer. +sleep 60 + +# Show stats. +curl -sS "http://localhost:7280/api/v1/indexes/hits/describe" | tee stats.json +echo + +END=$(date +%s) +echo "Load time: $((END - START))" + +# Data size on disk (single-node uses qwdata/ inside the install dir). +echo -n "Data size: " +du -sb quickwit/qwdata 2>/dev/null | awk '{print $1}' + +# Run queries +chmod +x run.sh +./run.sh + +# Stop Quickwit +kill "$QW_PID" 2>/dev/null || true diff --git a/quickwit/index_config.yaml b/quickwit/index_config.yaml new file mode 100644 index 0000000000..05b593a181 --- /dev/null +++ b/quickwit/index_config.yaml @@ -0,0 +1,149 @@ +version: 0.8 + +index_id: hits + +doc_mapping: + mode: strict + timestamp_field: EventTime + field_mappings: + - {name: WatchID, type: i64, indexed: true, fast: true} + - {name: JavaEnable, type: i64, indexed: true, fast: true} + - {name: Title, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: GoodEvent, type: i64, indexed: true, fast: true} + - name: EventTime + type: datetime + input_formats: ["%Y-%m-%d %H:%M:%S", "%Y-%m-%d", unix_timestamp, rfc3339] + output_format: unix_timestamp_secs + indexed: true + fast: true + fast_precision: seconds + - name: EventDate + type: datetime + input_formats: ["%Y-%m-%d %H:%M:%S", "%Y-%m-%d", unix_timestamp, rfc3339] + output_format: unix_timestamp_secs + indexed: true + fast: true + fast_precision: seconds + - {name: CounterID, type: i64, indexed: true, fast: true} + - {name: ClientIP, type: i64, indexed: true, fast: true} + - {name: RegionID, type: i64, indexed: true, fast: true} + - {name: UserID, type: i64, indexed: true, fast: true} + - {name: CounterClass, type: i64, indexed: true, fast: true} + - {name: OS, type: i64, indexed: true, fast: true} + - {name: UserAgent, type: i64, indexed: true, fast: true} + - {name: URL, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: Referer, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: IsRefresh, type: i64, indexed: true, fast: true} + - {name: RefererCategoryID, type: i64, indexed: true, fast: true} + - {name: RefererRegionID, type: i64, indexed: true, fast: true} + - {name: URLCategoryID, type: i64, indexed: true, fast: true} + - {name: URLRegionID, type: i64, indexed: true, fast: true} + - {name: ResolutionWidth, type: i64, indexed: true, fast: true} + - {name: ResolutionHeight, type: i64, indexed: true, fast: true} + - {name: ResolutionDepth, type: i64, indexed: true, fast: true} + - {name: FlashMajor, type: i64, indexed: true, fast: true} + - {name: FlashMinor, type: i64, indexed: true, fast: true} + - {name: FlashMinor2, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: NetMajor, type: i64, indexed: true, fast: true} + - {name: NetMinor, type: i64, indexed: true, fast: true} + - {name: UserAgentMajor, type: i64, indexed: true, fast: true} + - {name: UserAgentMinor, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: CookieEnable, type: i64, indexed: true, fast: true} + - {name: JavascriptEnable, type: i64, indexed: true, fast: true} + - {name: IsMobile, type: i64, indexed: true, fast: true} + - {name: MobilePhone, type: i64, indexed: true, fast: true} + - {name: MobilePhoneModel, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: Params, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: IPNetworkID, type: i64, indexed: true, fast: true} + - {name: TraficSourceID, type: i64, indexed: true, fast: true} + - {name: SearchEngineID, type: i64, indexed: true, fast: true} + - {name: SearchPhrase, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: AdvEngineID, type: i64, indexed: true, fast: true} + - {name: IsArtifical, type: i64, indexed: true, fast: true} + - {name: WindowClientWidth, type: i64, indexed: true, fast: true} + - {name: WindowClientHeight, type: i64, indexed: true, fast: true} + - {name: ClientTimeZone, type: i64, indexed: true, fast: true} + - name: ClientEventTime + type: datetime + input_formats: ["%Y-%m-%d %H:%M:%S", "%Y-%m-%d", unix_timestamp, rfc3339] + output_format: unix_timestamp_secs + indexed: true + fast: true + fast_precision: seconds + - {name: SilverlightVersion1, type: i64, indexed: true, fast: true} + - {name: SilverlightVersion2, type: i64, indexed: true, fast: true} + - {name: SilverlightVersion3, type: i64, indexed: true, fast: true} + - {name: SilverlightVersion4, type: i64, indexed: true, fast: true} + - {name: PageCharset, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: CodeVersion, type: i64, indexed: true, fast: true} + - {name: IsLink, type: i64, indexed: true, fast: true} + - {name: IsDownload, type: i64, indexed: true, fast: true} + - {name: IsNotBounce, type: i64, indexed: true, fast: true} + - {name: FUniqID, type: i64, indexed: true, fast: true} + - {name: OriginalURL, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: HID, type: i64, indexed: true, fast: true} + - {name: IsOldCounter, type: i64, indexed: true, fast: true} + - {name: IsEvent, type: i64, indexed: true, fast: true} + - {name: IsParameter, type: i64, indexed: true, fast: true} + - {name: DontCountHits, type: i64, indexed: true, fast: true} + - {name: WithHash, type: i64, indexed: true, fast: true} + - {name: HitColor, type: text, tokenizer: raw, fast: {normalizer: raw}} + - name: LocalEventTime + type: datetime + input_formats: ["%Y-%m-%d %H:%M:%S", "%Y-%m-%d", unix_timestamp, rfc3339] + output_format: unix_timestamp_secs + indexed: true + fast: true + fast_precision: seconds + - {name: Age, type: i64, indexed: true, fast: true} + - {name: Sex, type: i64, indexed: true, fast: true} + - {name: Income, type: i64, indexed: true, fast: true} + - {name: Interests, type: i64, indexed: true, fast: true} + - {name: Robotness, type: i64, indexed: true, fast: true} + - {name: RemoteIP, type: i64, indexed: true, fast: true} + - {name: WindowName, type: i64, indexed: true, fast: true} + - {name: OpenerName, type: i64, indexed: true, fast: true} + - {name: HistoryLength, type: i64, indexed: true, fast: true} + - {name: BrowserLanguage, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: BrowserCountry, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: SocialNetwork, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: SocialAction, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: HTTPError, type: i64, indexed: true, fast: true} + - {name: SendTiming, type: i64, indexed: true, fast: true} + - {name: DNSTiming, type: i64, indexed: true, fast: true} + - {name: ConnectTiming, type: i64, indexed: true, fast: true} + - {name: ResponseStartTiming, type: i64, indexed: true, fast: true} + - {name: ResponseEndTiming, type: i64, indexed: true, fast: true} + - {name: FetchTiming, type: i64, indexed: true, fast: true} + - {name: SocialSourceNetworkID, type: i64, indexed: true, fast: true} + - {name: SocialSourcePage, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: ParamPrice, type: i64, indexed: true, fast: true} + - {name: ParamOrderID, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: ParamCurrency, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: ParamCurrencyID, type: i64, indexed: true, fast: true} + - {name: OpenstatServiceName, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: OpenstatCampaignID, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: OpenstatAdID, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: OpenstatSourceID, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: UTMSource, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: UTMMedium, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: UTMCampaign, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: UTMContent, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: UTMTerm, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: FromTag, type: text, tokenizer: raw, fast: {normalizer: raw}} + - {name: HasGCLID, type: i64, indexed: true, fast: true} + - {name: RefererHash, type: i64, indexed: true, fast: true} + - {name: URLHash, type: i64, indexed: true, fast: true} + - {name: CLID, type: i64, indexed: true, fast: true} + + store_source: false + +indexing_settings: + commit_timeout_secs: 30 + merge_policy: + type: stable_log + merge_factor: 10 + max_merge_factor: 12 + +search_settings: + default_search_fields: [] diff --git a/quickwit/load.py b/quickwit/load.py new file mode 100644 index 0000000000..a5ea304585 --- /dev/null +++ b/quickwit/load.py @@ -0,0 +1,68 @@ +import gzip +import json +from itertools import islice + +import requests + +# Quickwit's _bulk endpoint accepts at most 10MB per request; keep batches +# small enough to stay under the limit comfortably. +BULK_SIZE = 2000 +QW_URL = "http://localhost:7280/api/v1/_elastic/hits/_bulk" +TOTAL_RECORDS = 99997497 + +# Quickwit only supports the "create" action of the Elasticsearch bulk API. +ACTION_META_BYTES = (json.dumps({"create": {"_index": "hits"}}) + "\n").encode("utf-8") +REQUEST_TIMEOUT = 120 + + +def build_body(docs): + parts = [] + for doc in docs: + parts.append(ACTION_META_BYTES) + parts.append(doc.encode("utf-8") if isinstance(doc, str) else doc) + return b"".join(parts) + + +def send_bulk(session, docs, batch_num): + # Quickwit's bulk endpoint requires a Content-Length header, so we have to + # buffer the body rather than streaming it. + resp = session.post(QW_URL, data=build_body(docs), timeout=REQUEST_TIMEOUT) + if resp.status_code >= 300: + print( + f"\nSent batch {batch_num} ({len(docs)} docs) - Warning: HTTP {resp.status_code}: {resp.text[:300]}" + ) + return 0 + + body = resp.json() + if body.get("errors"): + items = body.get("items", []) + err = sum(1 for i in items if "error" in i.get("create", {})) + if err: + print(f"\nBatch {batch_num}: {err} item errors") + + return len(docs) + + +def main(): + total_docs = 0 + batch_num = 0 + + with requests.Session() as session: + session.headers.update({"Content-Type": "application/x-ndjson"}) + + with gzip.open("hits.json.gz", mode="rt", encoding="utf-8") as f: + print("Reading from hits.json.gz") + while True: + docs = list(islice(f, BULK_SIZE)) + if not docs: + break + batch_num += 1 + total_docs += send_bulk(session, docs, batch_num) + pct = (total_docs / TOTAL_RECORDS) * 100 if TOTAL_RECORDS else 0 + print(f" {pct:.2f}% ({total_docs}/{TOTAL_RECORDS})") + + print(f"\nTotal docs sent: {total_docs}") + + +if __name__ == "__main__": + main() diff --git a/quickwit/queries.json b/quickwit/queries.json new file mode 100644 index 0000000000..49842c60be --- /dev/null +++ b/quickwit/queries.json @@ -0,0 +1,43 @@ +{"size":0,"track_total_hits":true,"query":{"match_all":{}}} +{"size":0,"track_total_hits":true,"query":{"bool":{"must_not":[{"term":{"AdvEngineID":0}}]}}} +{"size":0,"track_total_hits":true,"aggs":{"sum_adv":{"sum":{"field":"AdvEngineID"}},"avg_res":{"avg":{"field":"ResolutionWidth"}}}} +{"size":0,"aggs":{"avg_user":{"avg":{"field":"UserID"}}}} +null +null +{"size":0,"aggs":{"min_date":{"min":{"field":"EventDate"}},"max_date":{"max":{"field":"EventDate"}}}} +{"size":0,"query":{"bool":{"must_not":[{"term":{"AdvEngineID":0}}]}},"aggs":{"by_adv":{"terms":{"field":"AdvEngineID","size":1000,"order":{"_count":"desc"}}}}} +null +null +null +null +{"size":0,"query":{"bool":{"must_not":[{"term":{"SearchPhrase":""}}]}},"aggs":{"sp":{"terms":{"field":"SearchPhrase","size":10,"order":{"_count":"desc"}}}}} +null +{"size":0,"query":{"bool":{"must_not":[{"term":{"SearchPhrase":""}}]}},"aggs":{"se":{"terms":{"field":"SearchEngineID","size":10,"order":{"_count":"desc"}},"aggs":{"sp":{"terms":{"field":"SearchPhrase","size":10,"order":{"_count":"desc"}}}}}}} +{"size":0,"aggs":{"u":{"terms":{"field":"UserID","size":10,"order":{"_count":"desc"}}}}} +{"size":0,"aggs":{"u":{"terms":{"field":"UserID","size":10,"order":{"_count":"desc"}},"aggs":{"sp":{"terms":{"field":"SearchPhrase","size":10,"order":{"_count":"desc"}}}}}}} +{"size":0,"aggs":{"u":{"terms":{"field":"UserID","size":10},"aggs":{"sp":{"terms":{"field":"SearchPhrase","size":10}}}}}} +null +{"size":10,"query":{"term":{"UserID":435090932899640449}}} +null +null +null +null +{"size":10,"query":{"bool":{"must_not":[{"term":{"SearchPhrase":""}}]}},"sort":[{"EventTime":"asc"}]} +null +null +null +null +null +{"size":0,"query":{"bool":{"must_not":[{"term":{"SearchPhrase":""}}]}},"aggs":{"se":{"terms":{"field":"SearchEngineID","size":10,"order":{"_count":"desc"}},"aggs":{"ip":{"terms":{"field":"ClientIP","size":10,"order":{"_count":"desc"}},"aggs":{"sumref":{"sum":{"field":"IsRefresh"}},"avgres":{"avg":{"field":"ResolutionWidth"}}}}}}}} +{"size":0,"query":{"bool":{"must_not":[{"term":{"SearchPhrase":""}}]}},"aggs":{"w":{"terms":{"field":"WatchID","size":10,"order":{"_count":"desc"}},"aggs":{"ip":{"terms":{"field":"ClientIP","size":10,"order":{"_count":"desc"}},"aggs":{"sumref":{"sum":{"field":"IsRefresh"}},"avgres":{"avg":{"field":"ResolutionWidth"}}}}}}}} +{"size":0,"aggs":{"w":{"terms":{"field":"WatchID","size":10,"order":{"_count":"desc"}},"aggs":{"ip":{"terms":{"field":"ClientIP","size":10,"order":{"_count":"desc"}},"aggs":{"sumref":{"sum":{"field":"IsRefresh"}},"avgres":{"avg":{"field":"ResolutionWidth"}}}}}}}} +{"size":0,"aggs":{"u":{"terms":{"field":"URL","size":10,"order":{"_count":"desc"}}}}} +{"size":0,"aggs":{"u":{"terms":{"field":"URL","size":10,"order":{"_count":"desc"}}}}} +null +{"size":0,"query":{"bool":{"filter":[{"term":{"CounterID":62}},{"range":{"EventDate":{"gte":"2013-07-01","lte":"2013-07-31"}}},{"term":{"DontCountHits":0}},{"term":{"IsRefresh":0}}],"must_not":[{"term":{"URL":""}}]}},"aggs":{"u":{"terms":{"field":"URL","size":10,"order":{"_count":"desc"}}}}} +{"size":0,"query":{"bool":{"filter":[{"term":{"CounterID":62}},{"range":{"EventDate":{"gte":"2013-07-01","lte":"2013-07-31"}}},{"term":{"DontCountHits":0}},{"term":{"IsRefresh":0}}],"must_not":[{"term":{"Title":""}}]}},"aggs":{"t":{"terms":{"field":"Title","size":10,"order":{"_count":"desc"}}}}} +{"size":0,"query":{"bool":{"filter":[{"term":{"CounterID":62}},{"range":{"EventDate":{"gte":"2013-07-01","lte":"2013-07-31"}}},{"term":{"IsRefresh":0}},{"term":{"IsDownload":0}}],"must_not":[{"term":{"IsLink":0}}]}},"aggs":{"u":{"terms":{"field":"URL","size":1010,"order":{"_count":"desc"}}}}} +null +{"size":0,"query":{"bool":{"filter":[{"term":{"CounterID":62}},{"range":{"EventDate":{"gte":"2013-07-01","lte":"2013-07-31"}}},{"term":{"IsRefresh":0}},{"bool":{"should":[{"term":{"TraficSourceID":-1}},{"term":{"TraficSourceID":6}}]}},{"term":{"RefererHash":3594120000172545465}}]}},"aggs":{"uh":{"terms":{"field":"URLHash","size":110,"order":{"_count":"desc"}},"aggs":{"ed":{"terms":{"field":"EventDate","size":110,"order":{"_count":"desc"}}}}}}} +{"size":0,"query":{"bool":{"filter":[{"term":{"CounterID":62}},{"range":{"EventDate":{"gte":"2013-07-01","lte":"2013-07-31"}}},{"term":{"IsRefresh":0}},{"term":{"DontCountHits":0}},{"term":{"URLHash":2868770270353813622}}]}},"aggs":{"w":{"terms":{"field":"WindowClientWidth","size":10010,"order":{"_count":"desc"}},"aggs":{"h":{"terms":{"field":"WindowClientHeight","size":10010,"order":{"_count":"desc"}}}}}}} +{"size":0,"query":{"bool":{"filter":[{"term":{"CounterID":62}},{"range":{"EventDate":{"gte":"2013-07-14","lte":"2013-07-15"}}},{"term":{"IsRefresh":0}},{"term":{"DontCountHits":0}}]}},"aggs":{"dt":{"date_histogram":{"field":"EventTime","fixed_interval":"1m"}}}} diff --git a/quickwit/queries.sql b/quickwit/queries.sql new file mode 100644 index 0000000000..7d093d057d --- /dev/null +++ b/quickwit/queries.sql @@ -0,0 +1,43 @@ +SELECT COUNT(*) FROM hits; +SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; +SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; +SELECT AVG(UserID) FROM hits; +SELECT COUNT(DISTINCT UserID) FROM hits; +SELECT COUNT(DISTINCT SearchPhrase) FROM hits; +SELECT MIN(EventDate), MAX(EventDate) FROM hits; +SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; +SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; +SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; +SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; +SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID FROM hits WHERE UserID = 435090932899640449; +SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; +SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; +SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; +SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; +SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; +SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; +SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1010; +SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1010; +SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 110; +SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10010; +SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 1010; diff --git a/quickwit/run.sh b/quickwit/run.sh new file mode 100755 index 0000000000..66cb7b20ac --- /dev/null +++ b/quickwit/run.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +TRIES=3 +SEARCH_URL="http://localhost:7280/api/v1/_elastic/hits/_search" + +while IFS= read -r QUERY; do + sync && echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null + + echo -n "[" + + for i in $(seq 1 $TRIES); do + if [ "$QUERY" = "null" ]; then + # Query is not expressible in Quickwit (e.g. cardinality, scripts, regex_replace). + echo -n "null" + else + START=$(date +%s.%N) + QW_RSP=$(curl -s -X POST "$SEARCH_URL" -H 'Content-Type: application/json' -d "$QUERY") + END=$(date +%s.%N) + + # Quickwit returns "took" in milliseconds (the engine-internal latency). + QW_TIME=$(echo "$QW_RSP" | jq -r 'if has("error") or has("status") then "null" else (.took | tostring) end') + + if [ "$QW_TIME" = "null" ] || [ -z "$QW_TIME" ]; then + echo -n "null" + else + # Convert ms -> seconds with 4-decimal precision. + printf "%.4f" "$(echo "scale=4; $QW_TIME / 1000" | bc)" + fi + fi + + [ "$i" != "$TRIES" ] && echo -n ", " + done + + echo "]," +done < queries.json diff --git a/quickwit/template.json b/quickwit/template.json new file mode 100644 index 0000000000..022f7ad20b --- /dev/null +++ b/quickwit/template.json @@ -0,0 +1,10 @@ +{ + "system": "Quickwit", + "proprietary": "no", + "hardware": "cpu", + "tuned": "no", + "tags": [ + "Rust", + "search" + ] +} From 20c2f779f31b0927a41162b12ae5006c71cf6cf4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 7 May 2026 20:26:50 +0000 Subject: [PATCH 2/7] quickwit: install python3-requests via apt, drop pip3 Ubuntu 24.04 (the noble image used by run-benchmark.sh) refuses "pip3 install --user requests" under PEP 668's externally-managed environment, which aborted benchmark.sh after ~28s on c7a.metal-48xl. The python3-requests apt package is available and sufficient. Also drop the symlink "quickwit -> quickwit-v0.8.2" since the source directory is itself named "quickwit", and reference the versioned dir directly via $QW_DIR. Co-Authored-By: Claude Opus 4.7 (1M context) --- quickwit/benchmark.sh | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/quickwit/benchmark.sh b/quickwit/benchmark.sh index 686defe222..f0327bb6ef 100755 --- a/quickwit/benchmark.sh +++ b/quickwit/benchmark.sh @@ -3,20 +3,18 @@ set -e # Install prerequisites sudo apt-get update -y -sudo apt-get install -y wget curl jq bc python3 python3-pip time - -pip3 install --user requests +sudo apt-get install -y wget curl jq bc python3 python3-requests # Download Quickwit QW_VERSION="0.8.2" ARCH=$(uname -m) +QW_DIR="quickwit-v${QW_VERSION}" wget --continue --progress=dot:giga \ - "https://github.com/quickwit-oss/quickwit/releases/download/v${QW_VERSION}/quickwit-v${QW_VERSION}-${ARCH}-unknown-linux-gnu.tar.gz" -tar xzf "quickwit-v${QW_VERSION}-${ARCH}-unknown-linux-gnu.tar.gz" -ln -sfn "quickwit-v${QW_VERSION}" quickwit + "https://github.com/quickwit-oss/quickwit/releases/download/v${QW_VERSION}/${QW_DIR}-${ARCH}-unknown-linux-gnu.tar.gz" +tar xzf "${QW_DIR}-${ARCH}-unknown-linux-gnu.tar.gz" # Start the server in the background. Quickwit defaults: REST on 7280, gRPC on 7281. -pushd quickwit >/dev/null +pushd "$QW_DIR" >/dev/null nohup ./quickwit run > ../quickwit.log 2>&1 & QW_PID=$! popd >/dev/null @@ -57,7 +55,7 @@ echo "Load time: $((END - START))" # Data size on disk (single-node uses qwdata/ inside the install dir). echo -n "Data size: " -du -sb quickwit/qwdata 2>/dev/null | awk '{print $1}' +du -sb "$QW_DIR/qwdata" 2>/dev/null | awk '{print $1}' # Run queries chmod +x run.sh From f0b24340856131b55e210699ba746107de54c3ce Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 8 May 2026 11:50:41 +0000 Subject: [PATCH 3/7] quickwit: switch to v0.9 nightly, use cardinality and wildcard Stable Quickwit 0.8.2 has neither the `cardinality` aggregation nor a `wildcard` query, so 19 of the 43 ClickBench queries had to be reported as null. The v0.9 line (still unreleased; we use the `v0.9.0-rc` Docker image) adds both, which lets us express 11 more queries (Q5/6/9/10/11/ 12/14/21/22/23/24). 8 queries still depend on scripted/runtime fields or text-field sort, neither of which v0.9 provides. Loading switches from the Elasticsearch-compatible bulk endpoint to `quickwit tool local-ingest`, fed by `zcat hits.json.gz` over stdin. v0.9's sharded ingest-v2 API caps single-node throughput to a few MB/s and stalls waiting for shards to scale; `local-ingest` builds splits directly on the configured storage and the running server picks them up at the next metastore poll. Co-Authored-By: Claude Opus 4.7 (1M context) --- quickwit/README.md | 31 +++++++------------- quickwit/benchmark.sh | 52 +++++++++++++++++++-------------- quickwit/load.py | 68 ------------------------------------------- quickwit/queries.json | 22 +++++++------- 4 files changed, 52 insertions(+), 121 deletions(-) delete mode 100644 quickwit/load.py diff --git a/quickwit/README.md b/quickwit/README.md index bddbe0cbbc..d987f12b32 100644 --- a/quickwit/README.md +++ b/quickwit/README.md @@ -5,16 +5,18 @@ ## Methodology Infrastructure: -- Single-node Quickwit 0.8.2 on AWS EC2 c6a.4xlarge +- Single-node Quickwit **v0.9.0-rc** (Docker `quickwit/quickwit:v0.9.0-rc`). + + Stable **0.8.2** is missing `cardinality`, `wildcard`, and several other features the benchmark relies on, so we use the v0.9 release candidate. The v0.9 line is still unreleased — as soon as a stable v0.9.x ships, bump `QW_IMAGE` in `benchmark.sh`. Index configuration (`index_config.yaml`): -- All scalar fields declared with `fast: true` so they can participate in aggregations and sorts (Quickwit aggregations require fast fields). +- All scalar fields declared with `fast: true` so they can participate in aggregations and sorts. - Keyword-like text fields use the `raw` tokenizer with the `raw` fast-field normalizer to mimic Elasticsearch's `keyword` mapping. - `EventTime` is set as the index's timestamp field, providing time-based pruning. -Ingestion (`load.py`): -- Reads `hits.json.gz` and streams NDJSON to the Elasticsearch-compatible bulk endpoint at `/api/v1/_elastic/hits/_bulk`. -- Quickwit's bulk endpoint only honors the `create` action, and rejects payloads >10MB, so batches are smaller than the Elasticsearch loader. +Ingestion (`benchmark.sh`): +- Streams `hits.json.gz` decompressed into `quickwit tool local-ingest`, which builds splits directly on local storage. We do **not** use the Elasticsearch bulk endpoint: v0.9's sharded ingest-v2 API caps single-node throughput to a few MB/s in our testing and stalls waiting for shards to scale. `local-ingest` bypasses the ingest pipeline entirely. +- The server picks up the new splits on its next metastore poll (default 30 s). Queries (`queries.json`): - Each query in `queries.sql` is hand-translated to the Elasticsearch DSL on the corresponding line of `queries.json`, and submitted to `/api/v1/_elastic/hits/_search`. @@ -23,23 +25,12 @@ Queries (`queries.json`): ## Unsupported queries -Quickwit's aggregation and query model is narrower than Elasticsearch's. The following ClickBench queries cannot currently be expressed and are reported as `null`: +The following ClickBench queries cannot currently be expressed in Quickwit's Elasticsearch-compatible DSL and are reported as `null`: | Q | Reason | |----|-----------------------------------------------------------------------| -| 5 | `COUNT(DISTINCT)` — Quickwit has no `cardinality` aggregation | -| 6 | `COUNT(DISTINCT)` | -| 9 | `COUNT(DISTINCT)` | -| 10 | `COUNT(DISTINCT)` | -| 11 | `COUNT(DISTINCT)` | -| 12 | `COUNT(DISTINCT)` | -| 14 | `COUNT(DISTINCT)` | | 19 | `extract(minute FROM …)` — no scripted/runtime fields | -| 21 | `LIKE '%…%'` — leading wildcards rejected, no `wildcard`/`regexp` | -| 22 | `LIKE '%…%'` | -| 23 | `COUNT(DISTINCT)` | -| 24 | `LIKE '%…%'` | -| 26 | `ORDER BY` on text field — not supported by the search backend | +| 26 | `ORDER BY` on text field — `sort by field on type text is currently not supported` | | 27 | `ORDER BY` on text field | | 28 | `AVG(length(URL))` — no scripted/runtime fields | | 29 | `REGEXP_REPLACE` — not supported | @@ -47,7 +38,7 @@ Quickwit's aggregation and query model is narrower than Elasticsearch's. The fol | 36 | `ClientIP - N` — no scripted aggregations | | 40 | `CASE WHEN …` — no scripted/runtime fields | -All other queries run through the native Elasticsearch DSL. +All other 35 queries run through the native Elasticsearch DSL, including `cardinality` (Q5/6/9/10/11/12/14) and `wildcard` (Q21/22/23/24). ## Running @@ -55,4 +46,4 @@ All other queries run through the native Elasticsearch DSL. bash benchmark.sh ``` -This installs Quickwit, creates the index, downloads `hits.json.gz`, ingests the data via the ES bulk API, and then runs `run.sh` to time each query three times with caches dropped between runs. +Installs Docker and Quickwit, creates the index, downloads `hits.json.gz`, runs `local-ingest`, then runs `run.sh` to time each query three times with caches dropped between runs. diff --git a/quickwit/benchmark.sh b/quickwit/benchmark.sh index f0327bb6ef..ae3c81a481 100755 --- a/quickwit/benchmark.sh +++ b/quickwit/benchmark.sh @@ -3,22 +3,24 @@ set -e # Install prerequisites sudo apt-get update -y -sudo apt-get install -y wget curl jq bc python3 python3-requests +sudo apt-get install -y wget curl jq bc docker.io +sudo systemctl start docker -# Download Quickwit -QW_VERSION="0.8.2" -ARCH=$(uname -m) -QW_DIR="quickwit-v${QW_VERSION}" -wget --continue --progress=dot:giga \ - "https://github.com/quickwit-oss/quickwit/releases/download/v${QW_VERSION}/${QW_DIR}-${ARCH}-unknown-linux-gnu.tar.gz" -tar xzf "${QW_DIR}-${ARCH}-unknown-linux-gnu.tar.gz" +# We use the Quickwit v0.9 release candidate. Stable v0.8.2 is missing +# `cardinality`, `wildcard`, and several other features the benchmark relies +# on; only the v0.9 line (still unreleased as of writing) provides them. +QW_IMAGE="quickwit/quickwit:v0.9.0-rc" +sudo docker pull "$QW_IMAGE" + +# Quickwit's data directory (shared between the server and the local-ingest +# container). +QW_DATA="$(pwd)/qwdata" +sudo rm -rf "$QW_DATA" +mkdir -p "$QW_DATA" # Start the server in the background. Quickwit defaults: REST on 7280, gRPC on 7281. -pushd "$QW_DIR" >/dev/null -nohup ./quickwit run > ../quickwit.log 2>&1 & -QW_PID=$! -popd >/dev/null -echo "Quickwit started (PID $QW_PID)" +sudo docker run -d --name qw --network host -v "$QW_DATA":/quickwit/qwdata "$QW_IMAGE" run +echo "Quickwit container started" # Wait for the server to come up. for i in $(seq 1 60); do @@ -39,12 +41,18 @@ wget --continue --progress=dot:giga 'https://datasets.clickhouse.com/hits_compat START=$(date +%s) -# Stream JSON directly into Quickwit via the Elasticsearch-compatible bulk API. -python3 load.py +# Use `quickwit tool local-ingest` instead of the Elasticsearch-compatible +# bulk endpoint. v0.9's sharded ingest-v2 API caps single-node throughput +# to a few MB/s and gets stuck waiting for shards to scale, while +# `local-ingest` builds splits directly and writes them to the index +# storage. The running server picks up new splits on its next metastore +# poll (default 30s). +zcat hits.json.gz | sudo docker run --rm -i --network host \ + -v "$QW_DATA":/quickwit/qwdata \ + "$QW_IMAGE" tool local-ingest --index hits -y -# Force any in-flight commits and wait for the data to become searchable. -# The default commit timeout in index_config.yaml is 30s, so wait a bit longer. -sleep 60 +# Wait long enough for the server to refresh its metastore view. +sleep 35 # Show stats. curl -sS "http://localhost:7280/api/v1/indexes/hits/describe" | tee stats.json @@ -53,13 +61,13 @@ echo END=$(date +%s) echo "Load time: $((END - START))" -# Data size on disk (single-node uses qwdata/ inside the install dir). +# Data size on disk. echo -n "Data size: " -du -sb "$QW_DIR/qwdata" 2>/dev/null | awk '{print $1}' +sudo du -sb "$QW_DATA" | awk '{print $1}' # Run queries chmod +x run.sh ./run.sh -# Stop Quickwit -kill "$QW_PID" 2>/dev/null || true +sudo docker stop qw 2>/dev/null || true +sudo docker rm qw 2>/dev/null || true diff --git a/quickwit/load.py b/quickwit/load.py deleted file mode 100644 index a5ea304585..0000000000 --- a/quickwit/load.py +++ /dev/null @@ -1,68 +0,0 @@ -import gzip -import json -from itertools import islice - -import requests - -# Quickwit's _bulk endpoint accepts at most 10MB per request; keep batches -# small enough to stay under the limit comfortably. -BULK_SIZE = 2000 -QW_URL = "http://localhost:7280/api/v1/_elastic/hits/_bulk" -TOTAL_RECORDS = 99997497 - -# Quickwit only supports the "create" action of the Elasticsearch bulk API. -ACTION_META_BYTES = (json.dumps({"create": {"_index": "hits"}}) + "\n").encode("utf-8") -REQUEST_TIMEOUT = 120 - - -def build_body(docs): - parts = [] - for doc in docs: - parts.append(ACTION_META_BYTES) - parts.append(doc.encode("utf-8") if isinstance(doc, str) else doc) - return b"".join(parts) - - -def send_bulk(session, docs, batch_num): - # Quickwit's bulk endpoint requires a Content-Length header, so we have to - # buffer the body rather than streaming it. - resp = session.post(QW_URL, data=build_body(docs), timeout=REQUEST_TIMEOUT) - if resp.status_code >= 300: - print( - f"\nSent batch {batch_num} ({len(docs)} docs) - Warning: HTTP {resp.status_code}: {resp.text[:300]}" - ) - return 0 - - body = resp.json() - if body.get("errors"): - items = body.get("items", []) - err = sum(1 for i in items if "error" in i.get("create", {})) - if err: - print(f"\nBatch {batch_num}: {err} item errors") - - return len(docs) - - -def main(): - total_docs = 0 - batch_num = 0 - - with requests.Session() as session: - session.headers.update({"Content-Type": "application/x-ndjson"}) - - with gzip.open("hits.json.gz", mode="rt", encoding="utf-8") as f: - print("Reading from hits.json.gz") - while True: - docs = list(islice(f, BULK_SIZE)) - if not docs: - break - batch_num += 1 - total_docs += send_bulk(session, docs, batch_num) - pct = (total_docs / TOTAL_RECORDS) * 100 if TOTAL_RECORDS else 0 - print(f" {pct:.2f}% ({total_docs}/{TOTAL_RECORDS})") - - print(f"\nTotal docs sent: {total_docs}") - - -if __name__ == "__main__": - main() diff --git a/quickwit/queries.json b/quickwit/queries.json index 49842c60be..b7b298d699 100644 --- a/quickwit/queries.json +++ b/quickwit/queries.json @@ -2,26 +2,26 @@ {"size":0,"track_total_hits":true,"query":{"bool":{"must_not":[{"term":{"AdvEngineID":0}}]}}} {"size":0,"track_total_hits":true,"aggs":{"sum_adv":{"sum":{"field":"AdvEngineID"}},"avg_res":{"avg":{"field":"ResolutionWidth"}}}} {"size":0,"aggs":{"avg_user":{"avg":{"field":"UserID"}}}} -null -null +{"size":0,"aggs":{"u":{"cardinality":{"field":"UserID"}}}} +{"size":0,"aggs":{"u":{"cardinality":{"field":"SearchPhrase"}}}} {"size":0,"aggs":{"min_date":{"min":{"field":"EventDate"}},"max_date":{"max":{"field":"EventDate"}}}} {"size":0,"query":{"bool":{"must_not":[{"term":{"AdvEngineID":0}}]}},"aggs":{"by_adv":{"terms":{"field":"AdvEngineID","size":1000,"order":{"_count":"desc"}}}}} -null -null -null -null +{"size":0,"aggs":{"r":{"terms":{"field":"RegionID","size":10,"order":{"u":"desc"}},"aggs":{"u":{"cardinality":{"field":"UserID"}}}}}} +{"size":0,"aggs":{"r":{"terms":{"field":"RegionID","size":10,"order":{"_count":"desc"}},"aggs":{"sumadv":{"sum":{"field":"AdvEngineID"}},"avgres":{"avg":{"field":"ResolutionWidth"}},"u":{"cardinality":{"field":"UserID"}}}}}} +{"size":0,"query":{"bool":{"must_not":[{"term":{"MobilePhoneModel":""}}]}},"aggs":{"m":{"terms":{"field":"MobilePhoneModel","size":10,"order":{"u":"desc"}},"aggs":{"u":{"cardinality":{"field":"UserID"}}}}}} +{"size":0,"query":{"bool":{"must_not":[{"term":{"MobilePhoneModel":""}}]}},"aggs":{"p":{"terms":{"field":"MobilePhone","size":10,"order":{"u":"desc"}},"aggs":{"u":{"cardinality":{"field":"UserID"}},"m":{"terms":{"field":"MobilePhoneModel","size":10,"order":{"u":"desc"}},"aggs":{"u":{"cardinality":{"field":"UserID"}}}}}}}} {"size":0,"query":{"bool":{"must_not":[{"term":{"SearchPhrase":""}}]}},"aggs":{"sp":{"terms":{"field":"SearchPhrase","size":10,"order":{"_count":"desc"}}}}} -null +{"size":0,"query":{"bool":{"must_not":[{"term":{"SearchPhrase":""}}]}},"aggs":{"sp":{"terms":{"field":"SearchPhrase","size":10,"order":{"u":"desc"}},"aggs":{"u":{"cardinality":{"field":"UserID"}}}}}} {"size":0,"query":{"bool":{"must_not":[{"term":{"SearchPhrase":""}}]}},"aggs":{"se":{"terms":{"field":"SearchEngineID","size":10,"order":{"_count":"desc"}},"aggs":{"sp":{"terms":{"field":"SearchPhrase","size":10,"order":{"_count":"desc"}}}}}}} {"size":0,"aggs":{"u":{"terms":{"field":"UserID","size":10,"order":{"_count":"desc"}}}}} {"size":0,"aggs":{"u":{"terms":{"field":"UserID","size":10,"order":{"_count":"desc"}},"aggs":{"sp":{"terms":{"field":"SearchPhrase","size":10,"order":{"_count":"desc"}}}}}}} {"size":0,"aggs":{"u":{"terms":{"field":"UserID","size":10},"aggs":{"sp":{"terms":{"field":"SearchPhrase","size":10}}}}}} null {"size":10,"query":{"term":{"UserID":435090932899640449}}} -null -null -null -null +{"size":0,"track_total_hits":true,"query":{"wildcard":{"URL":"*google*"}}} +{"size":0,"query":{"bool":{"filter":[{"wildcard":{"URL":"*google*"}}],"must_not":[{"term":{"SearchPhrase":""}}]}},"aggs":{"sp":{"terms":{"field":"SearchPhrase","size":10,"order":{"_count":"desc"}}}}} +{"size":0,"query":{"bool":{"filter":[{"wildcard":{"Title":"*Google*"}}],"must_not":[{"wildcard":{"URL":"*.google.*"}},{"term":{"SearchPhrase":""}}]}},"aggs":{"sp":{"terms":{"field":"SearchPhrase","size":10,"order":{"_count":"desc"}},"aggs":{"u":{"cardinality":{"field":"UserID"}}}}}} +{"size":10,"query":{"wildcard":{"URL":"*google*"}},"sort":[{"EventTime":"asc"}]} {"size":10,"query":{"bool":{"must_not":[{"term":{"SearchPhrase":""}}]}},"sort":[{"EventTime":"asc"}]} null null From b607f9415c0c151b5833c4428237c90c63f5fcb7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 8 May 2026 13:01:00 +0000 Subject: [PATCH 4/7] quickwit: throttle load progress, silence noisy commands, bump search timeout The cloud-init log uploaded after the run is constrained to <1 MiB. Two sources were chatty enough to risk hitting that limit on a 100M-row load: `local-ingest`'s per-second progress line and the apt/docker pull output. Throttle the former to one line per ~30 s with awk, and silence apt/docker-pull entirely. Also add node-config.yaml mounted on top of the image's default config to bump the searcher's per-request and per-leaf timeouts from 30 s to 600 s. Several high-cardinality nested aggregations (Q17/18/32/33) on the full dataset run longer than 30 s and were timing out. Co-Authored-By: Claude Opus 4.7 (1M context) --- quickwit/benchmark.sh | 41 +++++++++++++++++++++++++++------------ quickwit/node-config.yaml | 8 ++++++++ 2 files changed, 37 insertions(+), 12 deletions(-) create mode 100644 quickwit/node-config.yaml diff --git a/quickwit/benchmark.sh b/quickwit/benchmark.sh index ae3c81a481..8d438a2652 100755 --- a/quickwit/benchmark.sh +++ b/quickwit/benchmark.sh @@ -1,16 +1,18 @@ #!/bin/bash -set -e +set -eo pipefail -# Install prerequisites -sudo apt-get update -y -sudo apt-get install -y wget curl jq bc docker.io +export DEBIAN_FRONTEND=noninteractive + +# Install prerequisites quietly +sudo apt-get update -qq >/dev/null +sudo apt-get install -y -qq wget curl jq bc docker.io >/dev/null sudo systemctl start docker # We use the Quickwit v0.9 release candidate. Stable v0.8.2 is missing # `cardinality`, `wildcard`, and several other features the benchmark relies # on; only the v0.9 line (still unreleased as of writing) provides them. QW_IMAGE="quickwit/quickwit:v0.9.0-rc" -sudo docker pull "$QW_IMAGE" +sudo docker pull -q "$QW_IMAGE" >/dev/null # Quickwit's data directory (shared between the server and the local-ingest # container). @@ -19,7 +21,13 @@ sudo rm -rf "$QW_DATA" mkdir -p "$QW_DATA" # Start the server in the background. Quickwit defaults: REST on 7280, gRPC on 7281. -sudo docker run -d --name qw --network host -v "$QW_DATA":/quickwit/qwdata "$QW_IMAGE" run +# Mount node-config.yaml on top of the image's default config to bump the +# searcher timeouts (defaults are 30s, which is too low for some of the +# nested high-cardinality aggregations on the full 100M-row dataset). +sudo docker run -d --name qw --network host \ + -v "$QW_DATA":/quickwit/qwdata \ + -v "$(pwd)/node-config.yaml":/quickwit/config/quickwit.yaml \ + "$QW_IMAGE" run >/dev/null echo "Quickwit container started" # Wait for the server to come up. @@ -34,10 +42,11 @@ done # Create the index from the YAML config. curl -sS -X POST http://localhost:7280/api/v1/indexes \ -H 'Content-Type: application/yaml' \ - --data-binary @index_config.yaml + --data-binary @index_config.yaml | jq -r '.index_uid // .message' -# Download the data -wget --continue --progress=dot:giga 'https://datasets.clickhouse.com/hits_compatible/hits.json.gz' +# Download the data quietly (the dataset is ~14 GB; full progress would +# dominate the captured benchmark log). +wget --continue -q 'https://datasets.clickhouse.com/hits_compatible/hits.json.gz' START=$(date +%s) @@ -47,16 +56,24 @@ START=$(date +%s) # `local-ingest` builds splits directly and writes them to the index # storage. The running server picks up new splits on its next metastore # poll (default 30s). +# +# local-ingest emits a "Num docs ... Thrghput ... Time" progress line +# roughly once per second; we throttle that to once per ~30 seconds so +# the captured log stays compact, and pass the surrounding lines through +# unchanged. zcat hits.json.gz | sudo docker run --rm -i --network host \ -v "$QW_DATA":/quickwit/qwdata \ - "$QW_IMAGE" tool local-ingest --index hits -y + "$QW_IMAGE" tool local-ingest --index hits -y 2>&1 \ + | awk '/Num docs/ { n = systime(); if (n - last >= 30) { print; fflush(); last = n } next } + { print; fflush() }' # Wait long enough for the server to refresh its metastore view. sleep 35 # Show stats. -curl -sS "http://localhost:7280/api/v1/indexes/hits/describe" | tee stats.json -echo +curl -sS "http://localhost:7280/api/v1/indexes/hits/describe" \ + | jq '{num_published_docs, num_published_splits, size_published_splits}' \ + | tee stats.json END=$(date +%s) echo "Load time: $((END - START))" diff --git a/quickwit/node-config.yaml b/quickwit/node-config.yaml new file mode 100644 index 0000000000..d94c85a403 --- /dev/null +++ b/quickwit/node-config.yaml @@ -0,0 +1,8 @@ +version: 0.8 + +# Bump the per-request and leaf-search timeouts well above the 30s default — +# a few of the high-cardinality aggregations on the full 100M-row ClickBench +# dataset (e.g. WatchID + ClientIP nested terms) take longer than that. +searcher: + request_timeout_secs: 600 + leaf_request_timeout_secs: 600 From 09c88c01311172a5ec197639b77f0ac90bace872 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 8 May 2026 13:19:46 +0000 Subject: [PATCH 5/7] quickwit: drop in-process caches before each cold query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ClickBench's run.sh convention drops the OS page cache before each query. For Quickwit that's not enough — its in-process caches (partial_request_cache, fast_field_cache, split_footer_cache, predicate_cache) survive `drop_caches`, and there's no cache-clear endpoint in the REST API. Without action, warm runs were consistently ~30× faster than cold runs because they were replaying memoized results. - Disable `partial_request_cache` in node-config.yaml. This is the per-split partial-result cache; keeping it on lets the engine short-circuit identical queries. - Leave `predicate_cache` at its default. It's a predicate-evaluation cache (analogous to ClickHouse's query condition cache), not a result cache. - Restart the Quickwit container in run.sh before each non-null query. This clears the remaining in-process caches (fast_field_cache, split_footer_cache, predicate_cache) so the first run is genuinely cold; the 2nd and 3rd runs benefit from caches re-warmed by run 1, matching ClickBench's cold/warm convention. Restart cycle is ~11s on this hardware, ~7 min total overhead across the 35 non-null queries. Co-Authored-By: Claude Opus 4.7 (1M context) --- quickwit/node-config.yaml | 15 ++++++++++++--- quickwit/run.sh | 17 +++++++++++++---- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/quickwit/node-config.yaml b/quickwit/node-config.yaml index d94c85a403..15bb302928 100644 --- a/quickwit/node-config.yaml +++ b/quickwit/node-config.yaml @@ -1,8 +1,17 @@ version: 0.8 -# Bump the per-request and leaf-search timeouts well above the 30s default — -# a few of the high-cardinality aggregations on the full 100M-row ClickBench -# dataset (e.g. WatchID + ClientIP nested terms) take longer than that. searcher: + # Bump the per-request and leaf-search timeouts well above the 30s default — + # a few of the high-cardinality aggregations on the full 100M-row ClickBench + # dataset (e.g. WatchID + ClientIP nested terms) take longer than that. request_timeout_secs: 600 leaf_request_timeout_secs: 600 + + # Disable the per-split partial result cache so warm runs don't replay a + # memoized answer. The other in-memory caches (fast_field_cache, + # split_footer_cache, predicate_cache) are data-level caches (analogous to + # ClickHouse's query condition cache) and are kept at their defaults; + # run.sh restarts the container before each query so they also start cold + # for the first run. + partial_request_cache: + capacity: 0 diff --git a/quickwit/run.sh b/quickwit/run.sh index 66cb7b20ac..bfbf5f7c2c 100755 --- a/quickwit/run.sh +++ b/quickwit/run.sh @@ -4,26 +4,35 @@ TRIES=3 SEARCH_URL="http://localhost:7280/api/v1/_elastic/hits/_search" while IFS= read -r QUERY; do - sync && echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null + if [ "$QUERY" != "null" ]; then + # Restart Quickwit before each query to clear all in-process caches + # (fast_field_cache, split_footer_cache). Result-style caches + # (partial_request_cache, predicate_cache) are already disabled in + # node-config.yaml. Then drop the OS page cache. This makes the first + # run cold; runs 2 and 3 may benefit from caches re-warmed by run 1. + sudo docker restart qw >/dev/null + until curl -sS -f http://localhost:7280/api/v1/version >/dev/null 2>&1; do sleep 1; done + sync && echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null + fi echo -n "[" for i in $(seq 1 $TRIES); do if [ "$QUERY" = "null" ]; then - # Query is not expressible in Quickwit (e.g. cardinality, scripts, regex_replace). + # Query is not expressible in Quickwit (e.g. text-field sort, + # scripts, REGEXP_REPLACE). echo -n "null" else START=$(date +%s.%N) QW_RSP=$(curl -s -X POST "$SEARCH_URL" -H 'Content-Type: application/json' -d "$QUERY") END=$(date +%s.%N) - # Quickwit returns "took" in milliseconds (the engine-internal latency). + # Quickwit returns "took" in milliseconds (engine-internal latency). QW_TIME=$(echo "$QW_RSP" | jq -r 'if has("error") or has("status") then "null" else (.took | tostring) end') if [ "$QW_TIME" = "null" ] || [ -z "$QW_TIME" ]; then echo -n "null" else - # Convert ms -> seconds with 4-decimal precision. printf "%.4f" "$(echo "scale=4; $QW_TIME / 1000" | bc)" fi fi From f96317a72e17d90cc6e6d7191921c16942ed4c43 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 8 May 2026 17:47:26 +0200 Subject: [PATCH 6/7] Update node-config.yaml --- quickwit/node-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/quickwit/node-config.yaml b/quickwit/node-config.yaml index 15bb302928..19a8f8457c 100644 --- a/quickwit/node-config.yaml +++ b/quickwit/node-config.yaml @@ -1,11 +1,11 @@ version: 0.8 searcher: - # Bump the per-request and leaf-search timeouts well above the 30s default — + # Bump the per-request and leaf-search timeouts above the 30s default — # a few of the high-cardinality aggregations on the full 100M-row ClickBench # dataset (e.g. WatchID + ClientIP nested terms) take longer than that. - request_timeout_secs: 600 - leaf_request_timeout_secs: 600 + request_timeout_secs: 60 + leaf_request_timeout_secs: 60 # Disable the per-split partial result cache so warm runs don't replay a # memoized answer. The other in-memory caches (fast_field_cache, From 3216a6752518ca2943e18725acbd9df570af5d36 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 8 May 2026 18:09:49 +0000 Subject: [PATCH 7/7] Add results --- quickwit/results/20260508/c6a.2xlarge.json | 58 +++++++++++++++++++ quickwit/results/20260508/c6a.4xlarge.json | 58 +++++++++++++++++++ quickwit/results/20260508/c6a.metal.json | 58 +++++++++++++++++++ quickwit/results/20260508/c7a.metal-48xl.json | 58 +++++++++++++++++++ quickwit/results/20260508/c8g.4xlarge.json | 58 +++++++++++++++++++ quickwit/results/20260508/c8g.metal-48xl.json | 58 +++++++++++++++++++ 6 files changed, 348 insertions(+) create mode 100644 quickwit/results/20260508/c6a.2xlarge.json create mode 100644 quickwit/results/20260508/c6a.4xlarge.json create mode 100644 quickwit/results/20260508/c6a.metal.json create mode 100644 quickwit/results/20260508/c7a.metal-48xl.json create mode 100644 quickwit/results/20260508/c8g.4xlarge.json create mode 100644 quickwit/results/20260508/c8g.metal-48xl.json diff --git a/quickwit/results/20260508/c6a.2xlarge.json b/quickwit/results/20260508/c6a.2xlarge.json new file mode 100644 index 0000000000..d5f5984363 --- /dev/null +++ b/quickwit/results/20260508/c6a.2xlarge.json @@ -0,0 +1,58 @@ +{ + "system": "Quickwit", + "date": "2026-05-08", + "machine": "c6a.2xlarge", + "cluster_size": 1, + "proprietary": "no", + "hardware": "cpu", + "tuned": "no", + "tags": ["Rust","search"], + "load_time": 3403, + "data_size": 82647119927, + "result": [ + [0.033, 0.001, 0.001], + [0.161, 0.068, 0.068], + [0.405, 0.209, 0.211], + [1.963, 0.111, 0.111], + [2.579, 0.796, 0.811], + [1.495, 0.802, 0.866], + [0.273, 0.216, 0.217], + [0.144, 0.071, 0.07], + [4.023, 1.474, 1.518], + [4.668, 1.614, 1.569], + [2.378, 0.15, 0.15], + [2.627, 0.223, 0.231], + [1.029, 0.185, 0.201], + [null, null, null], + [null, null, null], + [2.559, 0.752, 0.704], + [null, null, null], + [null, null, null], + [null, null, null], + [0.076, 0.005, 0.004], + [3.595, 2.23, 2.274], + [5.28, 2.281, 2.482], + [11.243, 4.755, 4.519], + [4.45, 1.915, 2.272], + [0.419, 0.104, 0.098], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [2.524, 0.914, 0.912], + [null, null, null], + [null, null, null], + [2.509, 0.436, 0.442], + [2.536, 0.434, 0.414], + [null, null, null], + [2.375, 0.034, 0.031], + [2.246, 0.026, 0.024], + [2.398, 0.056, 0.054], + [null, null, null], + [2.518, 0.896, 0.9], + [0.432, 0.04, 0.038], + [0.411, 0.049, 0.043] +] +} + diff --git a/quickwit/results/20260508/c6a.4xlarge.json b/quickwit/results/20260508/c6a.4xlarge.json new file mode 100644 index 0000000000..ec24f1253b --- /dev/null +++ b/quickwit/results/20260508/c6a.4xlarge.json @@ -0,0 +1,58 @@ +{ + "system": "Quickwit", + "date": "2026-05-08", + "machine": "c6a.4xlarge", + "cluster_size": 1, + "proprietary": "no", + "hardware": "cpu", + "tuned": "no", + "tags": ["Rust","search"], + "load_time": 2724, + "data_size": 74144537424, + "result": [ + [0.039, 0.001, 0.001], + [0.134, 0.052, 0.049], + [0.3, 0.147, 0.145], + [1.945, 0.08, 0.08], + [2.33, 0.515, 0.514], + [1.323, 0.574, 0.538], + [0.217, 0.149, 0.149], + [0.127, 0.052, 0.054], + [3.808, 1.166, 1.221], + [4.23, 1.028, 1.111], + [2.388, 0.113, 0.121], + [2.61, 0.178, 0.168], + [0.998, 0.139, 0.132], + [54.115, 52.265, 53.128], + [1.434, 0.343, 0.343], + [2.372, 0.544, 0.547], + [null, null, null], + [null, null, null], + [null, null, null], + [0.074, 0.005, 0.005], + [3.373, 1.842, 1.876], + [5.078, 1.794, 1.74], + [11.007, 2.991, 3.168], + [4.086, 1.456, 1.478], + [0.418, 0.081, 0.072], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [2.49, 0.829, 0.81], + [null, null, null], + [null, null, null], + [2.482, 0.299, 0.267], + [2.498, 0.281, 0.278], + [null, null, null], + [2.426, 0.031, 0.029], + [2.337, 0.025, 0.025], + [2.448, 0.055, 0.051], + [null, null, null], + [3.359, 0.888, 0.886], + [0.456, 0.039, 0.036], + [0.441, 0.058, 0.04] +] +} + diff --git a/quickwit/results/20260508/c6a.metal.json b/quickwit/results/20260508/c6a.metal.json new file mode 100644 index 0000000000..9a353cda0a --- /dev/null +++ b/quickwit/results/20260508/c6a.metal.json @@ -0,0 +1,58 @@ +{ + "system": "Quickwit", + "date": "2026-05-08", + "machine": "c6a.metal", + "cluster_size": 1, + "proprietary": "no", + "hardware": "cpu", + "tuned": "no", + "tags": ["Rust","search"], + "load_time": 2580, + "data_size": 75886849124, + "result": [ + [0.048, 0.001, 0.001], + [0.117, 0.032, 0.03], + [0.282, 0.183, 0.13], + [1.935, 0.068, 0.069], + [2.243, 0.384, 0.382], + [1.189, 0.418, 0.417], + [0.211, 0.132, 0.13], + [0.15, 0.034, 0.033], + [3.587, 0.943, 0.964], + [4.087, 0.901, 0.852], + [2.369, 0.098, 0.094], + [2.567, 0.15, 0.151], + [0.95, 0.1, 0.102], + [51.568, 51.032, 49.702], + [1.346, 0.244, 0.233], + [2.232, 0.371, 0.405], + [null, 680.233, 358.013], + [null, 287.939, 339.255], + [null, null, null], + [0.083, 0.006, 0.005], + [3.039, 1.447, 1.44], + [4.699, 1.457, 1.511], + [10.677, 2.319, 2.37], + [3.917, 1.285, 1.337], + [0.391, 0.057, 0.056], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [2.221, 0.564, 0.577], + [792.323, null, 353.894], + [null, null, null], + [2.452, 0.225, 0.207], + [2.45, 0.213, 0.213], + [null, null, null], + [2.403, 0.029, 0.029], + [2.32, 0.024, 0.022], + [2.435, 0.055, 0.05], + [null, null, null], + [3.238, 0.806, 0.809], + [0.431, 0.038, 0.034], + [0.419, 0.04, 0.04] +] +} + diff --git a/quickwit/results/20260508/c7a.metal-48xl.json b/quickwit/results/20260508/c7a.metal-48xl.json new file mode 100644 index 0000000000..901dac6171 --- /dev/null +++ b/quickwit/results/20260508/c7a.metal-48xl.json @@ -0,0 +1,58 @@ +{ + "system": "Quickwit", + "date": "2026-05-08", + "machine": "c7a.metal-48xl", + "cluster_size": 1, + "proprietary": "no", + "hardware": "cpu", + "tuned": "no", + "tags": ["Rust","search"], + "load_time": 2333, + "data_size": 74234857136, + "result": [ + [0.053, 0.002, 0.002], + [0.142, 0.027, 0.026], + [0.403, 0.125, 0.194], + [1.952, 0.065, 0.065], + [2.163, 0.28, 0.28], + [1.131, 0.346, 0.348], + [0.244, 0.124, 0.123], + [0.172, 0.031, 0.03], + [3.521, 0.879, 0.866], + [3.944, 0.704, 0.727], + [2.376, 0.077, 0.08], + [2.596, 0.135, 0.129], + [0.98, 0.087, 0.086], + [54.644, 53.696, 51.587], + [1.364, 0.213, 0.208], + [2.212, 0.363, 0.355], + [606.124, null, 402.088], + [597.226, 596.798, 595.534], + [null, null, null], + [0.117, 0.006, 0.005], + [2.903, 1.197, 1.196], + [4.562, 1.216, 1.217], + [10.768, 2.249, 2.194], + [3.939, 1.196, 1.215], + [0.491, 0.049, 0.048], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [2.216, 0.524, 0.523], + [604.168, 601.291, 598.873], + [null, null, null], + [2.519, 0.197, 0.193], + [2.535, 0.197, 0.2], + [null, null, null], + [2.494, 0.029, 0.026], + [2.397, 0.021, 0.022], + [2.528, 0.048, 0.045], + [null, null, null], + [3.259, 0.785, 0.773], + [0.52, 0.036, 0.034], + [0.437, 0.036, 0.034] +] +} + diff --git a/quickwit/results/20260508/c8g.4xlarge.json b/quickwit/results/20260508/c8g.4xlarge.json new file mode 100644 index 0000000000..68ebd2f92c --- /dev/null +++ b/quickwit/results/20260508/c8g.4xlarge.json @@ -0,0 +1,58 @@ +{ + "system": "Quickwit", + "date": "2026-05-08", + "machine": "c8g.4xlarge", + "cluster_size": 1, + "proprietary": "no", + "hardware": "cpu", + "tuned": "no", + "tags": ["Rust","search"], + "load_time": 2390, + "data_size": 73990228369, + "result": [ + [0.031, 0.001, 0.001], + [0.104, 0.043, 0.043], + [0.239, 0.123, 0.117], + [1.922, 0.058, 0.059], + [2.163, 0.312, 0.324], + [1.197, 0.44, 0.446], + [0.165, 0.111, 0.113], + [0.098, 0.042, 0.042], + [3.547, 0.9, 0.902], + [3.937, 0.707, 0.725], + [2.368, 0.084, 0.084], + [2.58, 0.145, 0.139], + [0.964, 0.108, 0.105], + [38.944, 35.934, 36.43], + [1.361, 0.254, 0.251], + [2.169, 0.328, 0.309], + [null, null, null], + [null, null, null], + [null, null, null], + [0.047, 0.004, 0.003], + [3.238, 1.457, 1.453], + [4.871, 1.457, 1.453], + [10.686, 2.344, 2.353], + [3.872, 1.162, 1.157], + [0.395, 0.06, 0.065], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [2.245, 0.525, 0.516], + [null, 839.555, null], + [null, null, null], + [2.43, 0.171, 0.168], + [2.435, 0.172, 0.169], + [null, null, null], + [2.424, 0.023, 0.022], + [2.329, 0.019, 0.018], + [2.452, 0.04, 0.038], + [null, null, null], + [3.114, 0.689, 0.693], + [0.457, 0.029, 0.029], + [0.438, 0.029, 0.027] +] +} + diff --git a/quickwit/results/20260508/c8g.metal-48xl.json b/quickwit/results/20260508/c8g.metal-48xl.json new file mode 100644 index 0000000000..4baec21074 --- /dev/null +++ b/quickwit/results/20260508/c8g.metal-48xl.json @@ -0,0 +1,58 @@ +{ + "system": "Quickwit", + "date": "2026-05-08", + "machine": "c8g.metal-48xl", + "cluster_size": 1, + "proprietary": "no", + "hardware": "cpu", + "tuned": "no", + "tags": ["Rust","search"], + "load_time": 2507, + "data_size": 75341136867, + "result": [ + [0.038, 0.001, 0.001], + [0.126, 0.044, 0.044], + [0.316, 0.117, 0.117], + [1.933, 0.061, 0.061], + [2.205, 0.335, 0.335], + [1.172, 0.393, 0.389], + [0.193, 0.117, 0.117], + [0.14, 0.045, 0.045], + [3.528, 0.891, 0.89], + [3.961, 0.736, 0.736], + [2.362, 0.086, 0.085], + [2.579, 0.138, 0.136], + [0.967, 0.099, 0.097], + [38.004, 34.929, 34.746], + [1.34, 0.221, 0.22], + [2.182, 0.337, 0.361], + [null, null, null], + [null, null, null], + [null, null, null], + [0.099, 0.003, 0.003], + [2.87, 1.212, 1.199], + [4.534, 1.2, 1.224], + [10.769, 2.272, 2.277], + [3.924, 1.197, 1.244], + [0.43, 0.059, 0.058], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [2.113, 0.43, 0.418], + [null, null, null], + [null, null, null], + [2.434, 0.169, 0.172], + [2.435, 0.17, 0.167], + [null, null, null], + [2.435, 0.023, 0.022], + [2.348, 0.019, 0.018], + [2.464, 0.039, 0.039], + [null, null, null], + [3.153, 0.844, 0.842], + [0.431, 0.03, 0.029], + [0.416, 0.028, 0.027] +] +} +