From 2b98466ad4bdefd466991c183a541685d7c0169b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 7 May 2026 20:03:59 +0000
Subject: [PATCH 1/7] Add Quickwit entry

Quickwit (Rust, Tantivy-based) exposes an Elasticsearch-compatible REST API
but no SQL endpoint, so each ClickBench query is hand-translated to ES DSL
in queries.json. Loading goes through /api/v1/_elastic/hits/_bulk; querying
through /_search.

19 of the 43 queries are not expressible in Quickwit's ES API
(COUNT(DISTINCT), substring LIKE, scripted/runtime fields, REGEXP_REPLACE,
ORDER BY on text fields) and are recorded as null. The remaining 24 queries
were validated against a 1M-row sample on a single node.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 README.md                  |   2 +-
 quickwit/README.md         |  58 +++++++++++++++
 quickwit/benchmark.sh      |  67 +++++++++++++++++
 quickwit/index_config.yaml | 149 +++++++++++++++++++++++++++++++++++++
 quickwit/load.py           |  68 +++++++++++++++++
 quickwit/queries.json      |  43 +++++++++++
 quickwit/queries.sql       |  43 +++++++++++
 quickwit/run.sh            |  35 +++++++++
 quickwit/template.json     |  10 +++
 9 files changed, 474 insertions(+), 1 deletion(-)
 create mode 100644 quickwit/README.md
 create mode 100755 quickwit/benchmark.sh
 create mode 100644 quickwit/index_config.yaml
 create mode 100644 quickwit/load.py
 create mode 100644 quickwit/queries.json
 create mode 100644 quickwit/queries.sql
 create mode 100755 quickwit/run.sh
 create mode 100644 quickwit/template.json

diff --git a/README.md b/README.md
index 244f1560e1..631570d663 100644
--- a/README.md
+++ b/README.md
@@ -311,7 +311,7 @@ Please help us add more systems and run the benchmarks on more types of VMs:
 - [ ] MS SQL Server with Column Store Index (without publishing)
 - [ ] OceanBase
 - [ ] Planetscale (without publishing)
-- [ ] Quickwit
+- [x] Quickwit
 - [ ] Redshift Spectrum
 - [ ] Seafowl
 - [ ] ShitholeDB
diff --git a/quickwit/README.md b/quickwit/README.md
new file mode 100644
index 0000000000..bddbe0cbbc
--- /dev/null
+++ b/quickwit/README.md
@@ -0,0 +1,58 @@
+# Quickwit
+
+[Quickwit](https://quickwit.io) is a Rust-based search engine for log analytics, built on top of [Tantivy](https://github.com/quickwit-oss/tantivy). It exposes an Elasticsearch-compatible REST API for ingestion and search, but does not implement an SQL endpoint, so this benchmark uses the native Elasticsearch query DSL directly.
+
+## Methodology
+
+Infrastructure:
+- Single-node Quickwit 0.8.2 on AWS EC2 c6a.4xlarge
+
+Index configuration (`index_config.yaml`):
+- All scalar fields declared with `fast: true` so they can participate in aggregations and sorts (Quickwit aggregations require fast fields).
+- Keyword-like text fields use the `raw` tokenizer with the `raw` fast-field normalizer to mimic Elasticsearch's `keyword` mapping.
+- `EventTime` is set as the index's timestamp field, providing time-based pruning.
+
+Ingestion (`load.py`):
+- Reads `hits.json.gz` and streams NDJSON to the Elasticsearch-compatible bulk endpoint at `/api/v1/_elastic/hits/_bulk`.
+- Quickwit's bulk endpoint only honors the `create` action, and rejects payloads >10MB, so batches are smaller than the Elasticsearch loader.
+
+Queries (`queries.json`):
+- Each query in `queries.sql` is hand-translated to the Elasticsearch DSL on the corresponding line of `queries.json`, and submitted to `/api/v1/_elastic/hits/_search`.
+- Timing is taken from the `took` field returned by Quickwit (milliseconds, engine-internal).
+- Queries that are not expressible in Quickwit's DSL are recorded as `null`.
+
+## Unsupported queries
+
+Quickwit's aggregation and query model is narrower than Elasticsearch's. The following ClickBench queries cannot currently be expressed and are reported as `null`:
+
+| Q  | Reason                                                                |
+|----|-----------------------------------------------------------------------|
+| 5  | `COUNT(DISTINCT)` — Quickwit has no `cardinality` aggregation         |
+| 6  | `COUNT(DISTINCT)`                                                     |
+| 9  | `COUNT(DISTINCT)`                                                     |
+| 10 | `COUNT(DISTINCT)`                                                     |
+| 11 | `COUNT(DISTINCT)`                                                     |
+| 12 | `COUNT(DISTINCT)`                                                     |
+| 14 | `COUNT(DISTINCT)`                                                     |
+| 19 | `extract(minute FROM …)` — no scripted/runtime fields                 |
+| 21 | `LIKE '%…%'` — leading wildcards rejected, no `wildcard`/`regexp`     |
+| 22 | `LIKE '%…%'`                                                          |
+| 23 | `COUNT(DISTINCT)`                                                     |
+| 24 | `LIKE '%…%'`                                                          |
+| 26 | `ORDER BY` on text field — not supported by the search backend        |
+| 27 | `ORDER BY` on text field                                              |
+| 28 | `AVG(length(URL))` — no scripted/runtime fields                       |
+| 29 | `REGEXP_REPLACE` — not supported                                      |
+| 30 | `SUM(col + N)` — no scripted aggregations                             |
+| 36 | `ClientIP - N` — no scripted aggregations                             |
+| 40 | `CASE WHEN …` — no scripted/runtime fields                            |
+
+All other queries run through the native Elasticsearch DSL.
+
+## Running
+
+```bash
+bash benchmark.sh
+```
+
+This installs Quickwit, creates the index, downloads `hits.json.gz`, ingests the data via the ES bulk API, and then runs `run.sh` to time each query three times with caches dropped between runs.
diff --git a/quickwit/benchmark.sh b/quickwit/benchmark.sh
new file mode 100755
index 0000000000..686defe222
--- /dev/null
+++ b/quickwit/benchmark.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+set -e
+
+# Install prerequisites
+sudo apt-get update -y
+sudo apt-get install -y wget curl jq bc python3 python3-pip time
+
+pip3 install --user requests
+
+# Download Quickwit
+QW_VERSION="0.8.2"
+ARCH=$(uname -m)
+wget --continue --progress=dot:giga \
+    "https://github.com/quickwit-oss/quickwit/releases/download/v${QW_VERSION}/quickwit-v${QW_VERSION}-${ARCH}-unknown-linux-gnu.tar.gz"
+tar xzf "quickwit-v${QW_VERSION}-${ARCH}-unknown-linux-gnu.tar.gz"
+ln -sfn "quickwit-v${QW_VERSION}" quickwit
+
+# Start the server in the background. Quickwit defaults: REST on 7280, gRPC on 7281.
+pushd quickwit >/dev/null
+nohup ./quickwit run > ../quickwit.log 2>&1 &
+QW_PID=$!
+popd >/dev/null
+echo "Quickwit started (PID $QW_PID)"
+
+# Wait for the server to come up.
+for i in $(seq 1 60); do
+    if curl -sS -f http://localhost:7280/api/v1/version >/dev/null 2>&1; then
+        echo "Quickwit is ready"
+        break
+    fi
+    sleep 1
+done
+
+# Create the index from the YAML config.
+curl -sS -X POST http://localhost:7280/api/v1/indexes \
+    -H 'Content-Type: application/yaml' \
+    --data-binary @index_config.yaml
+
+# Download the data
+wget --continue --progress=dot:giga 'https://datasets.clickhouse.com/hits_compatible/hits.json.gz'
+
+START=$(date +%s)
+
+# Stream JSON directly into Quickwit via the Elasticsearch-compatible bulk API.
+python3 load.py
+
+# Force any in-flight commits and wait for the data to become searchable.
+# The default commit timeout in index_config.yaml is 30s, so wait a bit longer.
+sleep 60
+
+# Show stats.
+curl -sS "http://localhost:7280/api/v1/indexes/hits/describe" | tee stats.json
+echo
+
+END=$(date +%s)
+echo "Load time: $((END - START))"
+
+# Data size on disk (single-node uses qwdata/ inside the install dir).
+echo -n "Data size: "
+du -sb quickwit/qwdata 2>/dev/null | awk '{print $1}'
+
+# Run queries
+chmod +x run.sh
+./run.sh
+
+# Stop Quickwit
+kill "$QW_PID" 2>/dev/null || true
diff --git a/quickwit/index_config.yaml b/quickwit/index_config.yaml
new file mode 100644
index 0000000000..05b593a181
--- /dev/null
+++ b/quickwit/index_config.yaml
@@ -0,0 +1,149 @@
+version: 0.8
+
+index_id: hits
+
+doc_mapping:
+  mode: strict
+  timestamp_field: EventTime
+  field_mappings:
+    - {name: WatchID, type: i64, indexed: true, fast: true}
+    - {name: JavaEnable, type: i64, indexed: true, fast: true}
+    - {name: Title, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: GoodEvent, type: i64, indexed: true, fast: true}
+    - name: EventTime
+      type: datetime
+      input_formats: ["%Y-%m-%d %H:%M:%S", "%Y-%m-%d", unix_timestamp, rfc3339]
+      output_format: unix_timestamp_secs
+      indexed: true
+      fast: true
+      fast_precision: seconds
+    - name: EventDate
+      type: datetime
+      input_formats: ["%Y-%m-%d %H:%M:%S", "%Y-%m-%d", unix_timestamp, rfc3339]
+      output_format: unix_timestamp_secs
+      indexed: true
+      fast: true
+      fast_precision: seconds
+    - {name: CounterID, type: i64, indexed: true, fast: true}
+    - {name: ClientIP, type: i64, indexed: true, fast: true}
+    - {name: RegionID, type: i64, indexed: true, fast: true}
+    - {name: UserID, type: i64, indexed: true, fast: true}
+    - {name: CounterClass, type: i64, indexed: true, fast: true}
+    - {name: OS, type: i64, indexed: true, fast: true}
+    - {name: UserAgent, type: i64, indexed: true, fast: true}
+    - {name: URL, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: Referer, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: IsRefresh, type: i64, indexed: true, fast: true}
+    - {name: RefererCategoryID, type: i64, indexed: true, fast: true}
+    - {name: RefererRegionID, type: i64, indexed: true, fast: true}
+    - {name: URLCategoryID, type: i64, indexed: true, fast: true}
+    - {name: URLRegionID, type: i64, indexed: true, fast: true}
+    - {name: ResolutionWidth, type: i64, indexed: true, fast: true}
+    - {name: ResolutionHeight, type: i64, indexed: true, fast: true}
+    - {name: ResolutionDepth, type: i64, indexed: true, fast: true}
+    - {name: FlashMajor, type: i64, indexed: true, fast: true}
+    - {name: FlashMinor, type: i64, indexed: true, fast: true}
+    - {name: FlashMinor2, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: NetMajor, type: i64, indexed: true, fast: true}
+    - {name: NetMinor, type: i64, indexed: true, fast: true}
+    - {name: UserAgentMajor, type: i64, indexed: true, fast: true}
+    - {name: UserAgentMinor, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: CookieEnable, type: i64, indexed: true, fast: true}
+    - {name: JavascriptEnable, type: i64, indexed: true, fast: true}
+    - {name: IsMobile, type: i64, indexed: true, fast: true}
+    - {name: MobilePhone, type: i64, indexed: true, fast: true}
+    - {name: MobilePhoneModel, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: Params, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: IPNetworkID, type: i64, indexed: true, fast: true}
+    - {name: TraficSourceID, type: i64, indexed: true, fast: true}
+    - {name: SearchEngineID, type: i64, indexed: true, fast: true}
+    - {name: SearchPhrase, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: AdvEngineID, type: i64, indexed: true, fast: true}
+    - {name: IsArtifical, type: i64, indexed: true, fast: true}
+    - {name: WindowClientWidth, type: i64, indexed: true, fast: true}
+    - {name: WindowClientHeight, type: i64, indexed: true, fast: true}
+    - {name: ClientTimeZone, type: i64, indexed: true, fast: true}
+    - name: ClientEventTime
+      type: datetime
+      input_formats: ["%Y-%m-%d %H:%M:%S", "%Y-%m-%d", unix_timestamp, rfc3339]
+      output_format: unix_timestamp_secs
+      indexed: true
+      fast: true
+      fast_precision: seconds
+    - {name: SilverlightVersion1, type: i64, indexed: true, fast: true}
+    - {name: SilverlightVersion2, type: i64, indexed: true, fast: true}
+    - {name: SilverlightVersion3, type: i64, indexed: true, fast: true}
+    - {name: SilverlightVersion4, type: i64, indexed: true, fast: true}
+    - {name: PageCharset, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: CodeVersion, type: i64, indexed: true, fast: true}
+    - {name: IsLink, type: i64, indexed: true, fast: true}
+    - {name: IsDownload, type: i64, indexed: true, fast: true}
+    - {name: IsNotBounce, type: i64, indexed: true, fast: true}
+    - {name: FUniqID, type: i64, indexed: true, fast: true}
+    - {name: OriginalURL, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: HID, type: i64, indexed: true, fast: true}
+    - {name: IsOldCounter, type: i64, indexed: true, fast: true}
+    - {name: IsEvent, type: i64, indexed: true, fast: true}
+    - {name: IsParameter, type: i64, indexed: true, fast: true}
+    - {name: DontCountHits, type: i64, indexed: true, fast: true}
+    - {name: WithHash, type: i64, indexed: true, fast: true}
+    - {name: HitColor, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - name: LocalEventTime
+      type: datetime
+      input_formats: ["%Y-%m-%d %H:%M:%S", "%Y-%m-%d", unix_timestamp, rfc3339]
+      output_format: unix_timestamp_secs
+      indexed: true
+      fast: true
+      fast_precision: seconds
+    - {name: Age, type: i64, indexed: true, fast: true}
+    - {name: Sex, type: i64, indexed: true, fast: true}
+    - {name: Income, type: i64, indexed: true, fast: true}
+    - {name: Interests, type: i64, indexed: true, fast: true}
+    - {name: Robotness, type: i64, indexed: true, fast: true}
+    - {name: RemoteIP, type: i64, indexed: true, fast: true}
+    - {name: WindowName, type: i64, indexed: true, fast: true}
+    - {name: OpenerName, type: i64, indexed: true, fast: true}
+    - {name: HistoryLength, type: i64, indexed: true, fast: true}
+    - {name: BrowserLanguage, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: BrowserCountry, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: SocialNetwork, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: SocialAction, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: HTTPError, type: i64, indexed: true, fast: true}
+    - {name: SendTiming, type: i64, indexed: true, fast: true}
+    - {name: DNSTiming, type: i64, indexed: true, fast: true}
+    - {name: ConnectTiming, type: i64, indexed: true, fast: true}
+    - {name: ResponseStartTiming, type: i64, indexed: true, fast: true}
+    - {name: ResponseEndTiming, type: i64, indexed: true, fast: true}
+    - {name: FetchTiming, type: i64, indexed: true, fast: true}
+    - {name: SocialSourceNetworkID, type: i64, indexed: true, fast: true}
+    - {name: SocialSourcePage, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: ParamPrice, type: i64, indexed: true, fast: true}
+    - {name: ParamOrderID, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: ParamCurrency, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: ParamCurrencyID, type: i64, indexed: true, fast: true}
+    - {name: OpenstatServiceName, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: OpenstatCampaignID, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: OpenstatAdID, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: OpenstatSourceID, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: UTMSource, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: UTMMedium, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: UTMCampaign, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: UTMContent, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: UTMTerm, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: FromTag, type: text, tokenizer: raw, fast: {normalizer: raw}}
+    - {name: HasGCLID, type: i64, indexed: true, fast: true}
+    - {name: RefererHash, type: i64, indexed: true, fast: true}
+    - {name: URLHash, type: i64, indexed: true, fast: true}
+    - {name: CLID, type: i64, indexed: true, fast: true}
+
+  store_source: false
+
+indexing_settings:
+  commit_timeout_secs: 30
+  merge_policy:
+    type: stable_log
+    merge_factor: 10
+    max_merge_factor: 12
+
+search_settings:
+  default_search_fields: []
diff --git a/quickwit/load.py b/quickwit/load.py
new file mode 100644
index 0000000000..a5ea304585
--- /dev/null
+++ b/quickwit/load.py
@@ -0,0 +1,68 @@
+import gzip
+import json
+from itertools import islice
+
+import requests
+
+# Quickwit's _bulk endpoint accepts at most 10MB per request; keep batches
+# small enough to stay under the limit comfortably.
+BULK_SIZE = 2000
+QW_URL = "http://localhost:7280/api/v1/_elastic/hits/_bulk"
+TOTAL_RECORDS = 99997497
+
+# Quickwit only supports the "create" action of the Elasticsearch bulk API.
+ACTION_META_BYTES = (json.dumps({"create": {"_index": "hits"}}) + "\n").encode("utf-8")
+REQUEST_TIMEOUT = 120
+
+
+def build_body(docs):
+    parts = []
+    for doc in docs:
+        parts.append(ACTION_META_BYTES)
+        parts.append(doc.encode("utf-8") if isinstance(doc, str) else doc)
+    return b"".join(parts)
+
+
+def send_bulk(session, docs, batch_num):
+    # Quickwit's bulk endpoint requires a Content-Length header, so we have to
+    # buffer the body rather than streaming it.
+    resp = session.post(QW_URL, data=build_body(docs), timeout=REQUEST_TIMEOUT)
+    if resp.status_code >= 300:
+        print(
+            f"\nSent batch {batch_num} ({len(docs)} docs) - Warning: HTTP {resp.status_code}: {resp.text[:300]}"
+        )
+        return 0
+
+    body = resp.json()
+    if body.get("errors"):
+        items = body.get("items", [])
+        err = sum(1 for i in items if "error" in i.get("create", {}))
+        if err:
+            print(f"\nBatch {batch_num}: {err} item errors")
+
+    return len(docs)
+
+
+def main():
+    total_docs = 0
+    batch_num = 0
+
+    with requests.Session() as session:
+        session.headers.update({"Content-Type": "application/x-ndjson"})
+
+        with gzip.open("hits.json.gz", mode="rt", encoding="utf-8") as f:
+            print("Reading from hits.json.gz")
+            while True:
+                docs = list(islice(f, BULK_SIZE))
+                if not docs:
+                    break
+                batch_num += 1
+                total_docs += send_bulk(session, docs, batch_num)
+                pct = (total_docs / TOTAL_RECORDS) * 100 if TOTAL_RECORDS else 0
+                print(f" {pct:.2f}% ({total_docs}/{TOTAL_RECORDS})")
+
+    print(f"\nTotal docs sent: {total_docs}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/quickwit/queries.json b/quickwit/queries.json
new file mode 100644
index 0000000000..49842c60be
--- /dev/null
+++ b/quickwit/queries.json
@@ -0,0 +1,43 @@
+{"size":0,"track_total_hits":true,"query":{"match_all":{}}}
+{"size":0,"track_total_hits":true,"query":{"bool":{"must_not":[{"term":{"AdvEngineID":0}}]}}}
+{"size":0,"track_total_hits":true,"aggs":{"sum_adv":{"sum":{"field":"AdvEngineID"}},"avg_res":{"avg":{"field":"ResolutionWidth"}}}}
+{"size":0,"aggs":{"avg_user":{"avg":{"field":"UserID"}}}}
+null
+null
+{"size":0,"aggs":{"min_date":{"min":{"field":"EventDate"}},"max_date":{"max":{"field":"EventDate"}}}}
+{"size":0,"query":{"bool":{"must_not":[{"term":{"AdvEngineID":0}}]}},"aggs":{"by_adv":{"terms":{"field":"AdvEngineID","size":1000,"order":{"_count":"desc"}}}}}
+null
+null
+null
+null
+{"size":0,"query":{"bool":{"must_not":[{"term":{"SearchPhrase":""}}]}},"aggs":{"sp":{"terms":{"field":"SearchPhrase","size":10,"order":{"_count":"desc"}}}}}
+null
+{"size":0,"query":{"bool":{"must_not":[{"term":{"SearchPhrase":""}}]}},"aggs":{"se":{"terms":{"field":"SearchEngineID","size":10,"order":{"_count":"desc"}},"aggs":{"sp":{"terms":{"field":"SearchPhrase","size":10,"order":{"_count":"desc"}}}}}}}
+{"size":0,"aggs":{"u":{"terms":{"field":"UserID","size":10,"order":{"_count":"desc"}}}}}
+{"size":0,"aggs":{"u":{"terms":{"field":"UserID","size":10,"order":{"_count":"desc"}},"aggs":{"sp":{"terms":{"field":"SearchPhrase","size":10,"order":{"_count":"desc"}}}}}}}
+{"size":0,"aggs":{"u":{"terms":{"field":"UserID","size":10},"aggs":{"sp":{"terms":{"field":"SearchPhrase","size":10}}}}}}
+null
+{"size":10,"query":{"term":{"UserID":435090932899640449}}}
+null
+null
+null
+null
+{"size":10,"query":{"bool":{"must_not":[{"term":{"SearchPhrase":""}}]}},"sort":[{"EventTime":"asc"}]}
+null
+null
+null
+null
+null
+{"size":0,"query":{"bool":{"must_not":[{"term":{"SearchPhrase":""}}]}},"aggs":{"se":{"terms":{"field":"SearchEngineID","size":10,"order":{"_count":"desc"}},"aggs":{"ip":{"terms":{"field":"ClientIP","size":10,"order":{"_count":"desc"}},"aggs":{"sumref":{"sum":{"field":"IsRefresh"}},"avgres":{"avg":{"field":"ResolutionWidth"}}}}}}}}
+{"size":0,"query":{"bool":{"must_not":[{"term":{"SearchPhrase":""}}]}},"aggs":{"w":{"terms":{"field":"WatchID","size":10,"order":{"_count":"desc"}},"aggs":{"ip":{"terms":{"field":"ClientIP","size":10,"order":{"_count":"desc"}},"aggs":{"sumref":{"sum":{"field":"IsRefresh"}},"avgres":{"avg":{"field":"ResolutionWidth"}}}}}}}}
+{"size":0,"aggs":{"w":{"terms":{"field":"WatchID","size":10,"order":{"_count":"desc"}},"aggs":{"ip":{"terms":{"field":"ClientIP","size":10,"order":{"_count":"desc"}},"aggs":{"sumref":{"sum":{"field":"IsRefresh"}},"avgres":{"avg":{"field":"ResolutionWidth"}}}}}}}}
+{"size":0,"aggs":{"u":{"terms":{"field":"URL","size":10,"order":{"_count":"desc"}}}}}
+{"size":0,"aggs":{"u":{"terms":{"field":"URL","size":10,"order":{"_count":"desc"}}}}}
+null
+{"size":0,"query":{"bool":{"filter":[{"term":{"CounterID":62}},{"range":{"EventDate":{"gte":"2013-07-01","lte":"2013-07-31"}}},{"term":{"DontCountHits":0}},{"term":{"IsRefresh":0}}],"must_not":[{"term":{"URL":""}}]}},"aggs":{"u":{"terms":{"field":"URL","size":10,"order":{"_count":"desc"}}}}}
+{"size":0,"query":{"bool":{"filter":[{"term":{"CounterID":62}},{"range":{"EventDate":{"gte":"2013-07-01","lte":"2013-07-31"}}},{"term":{"DontCountHits":0}},{"term":{"IsRefresh":0}}],"must_not":[{"term":{"Title":""}}]}},"aggs":{"t":{"terms":{"field":"Title","size":10,"order":{"_count":"desc"}}}}}
+{"size":0,"query":{"bool":{"filter":[{"term":{"CounterID":62}},{"range":{"EventDate":{"gte":"2013-07-01","lte":"2013-07-31"}}},{"term":{"IsRefresh":0}},{"term":{"IsDownload":0}}],"must_not":[{"term":{"IsLink":0}}]}},"aggs":{"u":{"terms":{"field":"URL","size":1010,"order":{"_count":"desc"}}}}}
+null
+{"size":0,"query":{"bool":{"filter":[{"term":{"CounterID":62}},{"range":{"EventDate":{"gte":"2013-07-01","lte":"2013-07-31"}}},{"term":{"IsRefresh":0}},{"bool":{"should":[{"term":{"TraficSourceID":-1}},{"term":{"TraficSourceID":6}}]}},{"term":{"RefererHash":3594120000172545465}}]}},"aggs":{"uh":{"terms":{"field":"URLHash","size":110,"order":{"_count":"desc"}},"aggs":{"ed":{"terms":{"field":"EventDate","size":110,"order":{"_count":"desc"}}}}}}}
+{"size":0,"query":{"bool":{"filter":[{"term":{"CounterID":62}},{"range":{"EventDate":{"gte":"2013-07-01","lte":"2013-07-31"}}},{"term":{"IsRefresh":0}},{"term":{"DontCountHits":0}},{"term":{"URLHash":2868770270353813622}}]}},"aggs":{"w":{"terms":{"field":"WindowClientWidth","size":10010,"order":{"_count":"desc"}},"aggs":{"h":{"terms":{"field":"WindowClientHeight","size":10010,"order":{"_count":"desc"}}}}}}}
+{"size":0,"query":{"bool":{"filter":[{"term":{"CounterID":62}},{"range":{"EventDate":{"gte":"2013-07-14","lte":"2013-07-15"}}},{"term":{"IsRefresh":0}},{"term":{"DontCountHits":0}}]}},"aggs":{"dt":{"date_histogram":{"field":"EventTime","fixed_interval":"1m"}}}}
diff --git a/quickwit/queries.sql b/quickwit/queries.sql
new file mode 100644
index 0000000000..7d093d057d
--- /dev/null
+++ b/quickwit/queries.sql
@@ -0,0 +1,43 @@
+SELECT COUNT(*) FROM hits;
+SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0;
+SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits;
+SELECT AVG(UserID) FROM hits;
+SELECT COUNT(DISTINCT UserID) FROM hits;
+SELECT COUNT(DISTINCT SearchPhrase) FROM hits;
+SELECT MIN(EventDate), MAX(EventDate) FROM hits;
+SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC;
+SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10;
+SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10;
+SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
+SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
+SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
+SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
+SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
+SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10;
+SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
+SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10;
+SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
+SELECT UserID FROM hits WHERE UserID = 435090932899640449;
+SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%';
+SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
+SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
+SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10;
+SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10;
+SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10;
+SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10;
+SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
+SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
+SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits;
+SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
+SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
+SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
+SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10;
+SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
+SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
+SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
+SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
+SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1010;
+SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1010;
+SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 110;
+SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10010;
+SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 1010;
diff --git a/quickwit/run.sh b/quickwit/run.sh
new file mode 100755
index 0000000000..66cb7b20ac
--- /dev/null
+++ b/quickwit/run.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+TRIES=3
+SEARCH_URL="http://localhost:7280/api/v1/_elastic/hits/_search"
+
+while IFS= read -r QUERY; do
+    sync && echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
+
+    echo -n "["
+
+    for i in $(seq 1 $TRIES); do
+        if [ "$QUERY" = "null" ]; then
+            # Query is not expressible in Quickwit (e.g. cardinality, scripts, regex_replace).
+            echo -n "null"
+        else
+            START=$(date +%s.%N)
+            QW_RSP=$(curl -s -X POST "$SEARCH_URL" -H 'Content-Type: application/json' -d "$QUERY")
+            END=$(date +%s.%N)
+
+            # Quickwit returns "took" in milliseconds (the engine-internal latency).
+            QW_TIME=$(echo "$QW_RSP" | jq -r 'if has("error") or has("status") then "null" else (.took | tostring) end')
+
+            if [ "$QW_TIME" = "null" ] || [ -z "$QW_TIME" ]; then
+                echo -n "null"
+            else
+                # Convert ms -> seconds with 4-decimal precision.
+                printf "%.4f" "$(echo "scale=4; $QW_TIME / 1000" | bc)"
+            fi
+        fi
+
+        [ "$i" != "$TRIES" ] && echo -n ", "
+    done
+
+    echo "],"
+done < queries.json
diff --git a/quickwit/template.json b/quickwit/template.json
new file mode 100644
index 0000000000..022f7ad20b
--- /dev/null
+++ b/quickwit/template.json
@@ -0,0 +1,10 @@
+{
+  "system": "Quickwit",
+  "proprietary": "no",
+  "hardware": "cpu",
+  "tuned": "no",
+  "tags": [
+    "Rust",
+    "search"
+  ]
+}

From 20c2f779f31b0927a41162b12ae5006c71cf6cf4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 7 May 2026 20:26:50 +0000
Subject: [PATCH 2/7] quickwit: install python3-requests via apt, drop pip3

Ubuntu 24.04 (the noble image used by run-benchmark.sh) refuses
"pip3 install --user requests" under PEP 668's externally-managed
environment, which aborted benchmark.sh after ~28s on c7a.metal-48xl.
The python3-requests apt package is available and sufficient.

Also drop the symlink "quickwit -> quickwit-v0.8.2" since the source
directory is itself named "quickwit", and reference the versioned dir
directly via $QW_DIR.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 quickwit/benchmark.sh | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/quickwit/benchmark.sh b/quickwit/benchmark.sh
index 686defe222..f0327bb6ef 100755
--- a/quickwit/benchmark.sh
+++ b/quickwit/benchmark.sh
@@ -3,20 +3,18 @@ set -e
 
 # Install prerequisites
 sudo apt-get update -y
-sudo apt-get install -y wget curl jq bc python3 python3-pip time
-
-pip3 install --user requests
+sudo apt-get install -y wget curl jq bc python3 python3-requests
 
 # Download Quickwit
 QW_VERSION="0.8.2"
 ARCH=$(uname -m)
+QW_DIR="quickwit-v${QW_VERSION}"
 wget --continue --progress=dot:giga \
-    "https://github.com/quickwit-oss/quickwit/releases/download/v${QW_VERSION}/quickwit-v${QW_VERSION}-${ARCH}-unknown-linux-gnu.tar.gz"
-tar xzf "quickwit-v${QW_VERSION}-${ARCH}-unknown-linux-gnu.tar.gz"
-ln -sfn "quickwit-v${QW_VERSION}" quickwit
+    "https://github.com/quickwit-oss/quickwit/releases/download/v${QW_VERSION}/${QW_DIR}-${ARCH}-unknown-linux-gnu.tar.gz"
+tar xzf "${QW_DIR}-${ARCH}-unknown-linux-gnu.tar.gz"
 
 # Start the server in the background. Quickwit defaults: REST on 7280, gRPC on 7281.
-pushd quickwit >/dev/null
+pushd "$QW_DIR" >/dev/null
 nohup ./quickwit run > ../quickwit.log 2>&1 &
 QW_PID=$!
 popd >/dev/null
@@ -57,7 +55,7 @@ echo "Load time: $((END - START))"
 
 # Data size on disk (single-node uses qwdata/ inside the install dir).
 echo -n "Data size: "
-du -sb quickwit/qwdata 2>/dev/null | awk '{print $1}'
+du -sb "$QW_DIR/qwdata" 2>/dev/null | awk '{print $1}'
 
 # Run queries
 chmod +x run.sh

From f0b24340856131b55e210699ba746107de54c3ce Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 8 May 2026 11:50:41 +0000
Subject: [PATCH 3/7] quickwit: switch to v0.9 nightly, use cardinality and
 wildcard

Stable Quickwit 0.8.2 has neither the `cardinality` aggregation nor a
`wildcard` query, so 19 of the 43 ClickBench queries had to be reported
as null. The v0.9 line (still unreleased; we use the `v0.9.0-rc` Docker
image) adds both, which lets us express 11 more queries (Q5/6/9/10/11/
12/14/21/22/23/24). 8 queries still depend on scripted/runtime fields
or text-field sort, neither of which v0.9 provides.

Loading switches from the Elasticsearch-compatible bulk endpoint to
`quickwit tool local-ingest`, fed by `zcat hits.json.gz` over stdin.
v0.9's sharded ingest-v2 API caps single-node throughput to a few MB/s
and stalls waiting for shards to scale; `local-ingest` builds splits
directly on the configured storage and the running server picks them
up at the next metastore poll.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 quickwit/README.md    | 31 +++++++-------------
 quickwit/benchmark.sh | 52 +++++++++++++++++++--------------
 quickwit/load.py      | 68 -------------------------------------------
 quickwit/queries.json | 22 +++++++-------
 4 files changed, 52 insertions(+), 121 deletions(-)
 delete mode 100644 quickwit/load.py

diff --git a/quickwit/README.md b/quickwit/README.md
index bddbe0cbbc..d987f12b32 100644
--- a/quickwit/README.md
+++ b/quickwit/README.md
@@ -5,16 +5,18 @@
 ## Methodology
 
 Infrastructure:
-- Single-node Quickwit 0.8.2 on AWS EC2 c6a.4xlarge
+- Single-node Quickwit **v0.9.0-rc** (Docker `quickwit/quickwit:v0.9.0-rc`).
+
+  Stable **0.8.2** is missing `cardinality`, `wildcard`, and several other features the benchmark relies on, so we use the v0.9 release candidate. The v0.9 line is still unreleased — as soon as a stable v0.9.x ships, bump `QW_IMAGE` in `benchmark.sh`.
 
 Index configuration (`index_config.yaml`):
-- All scalar fields declared with `fast: true` so they can participate in aggregations and sorts (Quickwit aggregations require fast fields).
+- All scalar fields declared with `fast: true` so they can participate in aggregations and sorts.
 - Keyword-like text fields use the `raw` tokenizer with the `raw` fast-field normalizer to mimic Elasticsearch's `keyword` mapping.
 - `EventTime` is set as the index's timestamp field, providing time-based pruning.
 
-Ingestion (`load.py`):
-- Reads `hits.json.gz` and streams NDJSON to the Elasticsearch-compatible bulk endpoint at `/api/v1/_elastic/hits/_bulk`.
-- Quickwit's bulk endpoint only honors the `create` action, and rejects payloads >10MB, so batches are smaller than the Elasticsearch loader.
+Ingestion (`benchmark.sh`):
+- Streams `hits.json.gz` decompressed into `quickwit tool local-ingest`, which builds splits directly on local storage. We do **not** use the Elasticsearch bulk endpoint: v0.9's sharded ingest-v2 API caps single-node throughput to a few MB/s in our testing and stalls waiting for shards to scale. `local-ingest` bypasses the ingest pipeline entirely.
+- The server picks up the new splits on its next metastore poll (default 30 s).
 
 Queries (`queries.json`):
 - Each query in `queries.sql` is hand-translated to the Elasticsearch DSL on the corresponding line of `queries.json`, and submitted to `/api/v1/_elastic/hits/_search`.
@@ -23,23 +25,12 @@ Queries (`queries.json`):
 
 ## Unsupported queries
 
-Quickwit's aggregation and query model is narrower than Elasticsearch's. The following ClickBench queries cannot currently be expressed and are reported as `null`:
+The following ClickBench queries cannot currently be expressed in Quickwit's Elasticsearch-compatible DSL and are reported as `null`:
 
 | Q  | Reason                                                                |
 |----|-----------------------------------------------------------------------|
-| 5  | `COUNT(DISTINCT)` — Quickwit has no `cardinality` aggregation         |
-| 6  | `COUNT(DISTINCT)`                                                     |
-| 9  | `COUNT(DISTINCT)`                                                     |
-| 10 | `COUNT(DISTINCT)`                                                     |
-| 11 | `COUNT(DISTINCT)`                                                     |
-| 12 | `COUNT(DISTINCT)`                                                     |
-| 14 | `COUNT(DISTINCT)`                                                     |
 | 19 | `extract(minute FROM …)` — no scripted/runtime fields                 |
-| 21 | `LIKE '%…%'` — leading wildcards rejected, no `wildcard`/`regexp`     |
-| 22 | `LIKE '%…%'`                                                          |
-| 23 | `COUNT(DISTINCT)`                                                     |
-| 24 | `LIKE '%…%'`                                                          |
-| 26 | `ORDER BY` on text field — not supported by the search backend        |
+| 26 | `ORDER BY` on text field — `sort by field on type text is currently not supported` |
 | 27 | `ORDER BY` on text field                                              |
 | 28 | `AVG(length(URL))` — no scripted/runtime fields                       |
 | 29 | `REGEXP_REPLACE` — not supported                                      |
@@ -47,7 +38,7 @@ Quickwit's aggregation and query model is narrower than Elasticsearch's. The fol
 | 36 | `ClientIP - N` — no scripted aggregations                             |
 | 40 | `CASE WHEN …` — no scripted/runtime fields                            |
 
-All other queries run through the native Elasticsearch DSL.
+All other 35 queries run through the native Elasticsearch DSL, including `cardinality` (Q5/6/9/10/11/12/14) and `wildcard` (Q21/22/23/24).
 
 ## Running
 
@@ -55,4 +46,4 @@ All other queries run through the native Elasticsearch DSL.
 bash benchmark.sh
 ```
 
-This installs Quickwit, creates the index, downloads `hits.json.gz`, ingests the data via the ES bulk API, and then runs `run.sh` to time each query three times with caches dropped between runs.
+Installs Docker and Quickwit, creates the index, downloads `hits.json.gz`, runs `local-ingest`, then runs `run.sh` to time each query three times with caches dropped between runs.
diff --git a/quickwit/benchmark.sh b/quickwit/benchmark.sh
index f0327bb6ef..ae3c81a481 100755
--- a/quickwit/benchmark.sh
+++ b/quickwit/benchmark.sh
@@ -3,22 +3,24 @@ set -e
 
 # Install prerequisites
 sudo apt-get update -y
-sudo apt-get install -y wget curl jq bc python3 python3-requests
+sudo apt-get install -y wget curl jq bc docker.io
+sudo systemctl start docker
 
-# Download Quickwit
-QW_VERSION="0.8.2"
-ARCH=$(uname -m)
-QW_DIR="quickwit-v${QW_VERSION}"
-wget --continue --progress=dot:giga \
-    "https://github.com/quickwit-oss/quickwit/releases/download/v${QW_VERSION}/${QW_DIR}-${ARCH}-unknown-linux-gnu.tar.gz"
-tar xzf "${QW_DIR}-${ARCH}-unknown-linux-gnu.tar.gz"
+# We use the Quickwit v0.9 release candidate. Stable v0.8.2 is missing
+# `cardinality`, `wildcard`, and several other features the benchmark relies
+# on; only the v0.9 line (still unreleased as of writing) provides them.
+QW_IMAGE="quickwit/quickwit:v0.9.0-rc"
+sudo docker pull "$QW_IMAGE"
+
+# Quickwit's data directory (shared between the server and the local-ingest
+# container).
+QW_DATA="$(pwd)/qwdata"
+sudo rm -rf "$QW_DATA"
+mkdir -p "$QW_DATA"
 
 # Start the server in the background. Quickwit defaults: REST on 7280, gRPC on 7281.
-pushd "$QW_DIR" >/dev/null
-nohup ./quickwit run > ../quickwit.log 2>&1 &
-QW_PID=$!
-popd >/dev/null
-echo "Quickwit started (PID $QW_PID)"
+sudo docker run -d --name qw --network host -v "$QW_DATA":/quickwit/qwdata "$QW_IMAGE" run
+echo "Quickwit container started"
 
 # Wait for the server to come up.
 for i in $(seq 1 60); do
@@ -39,12 +41,18 @@ wget --continue --progress=dot:giga 'https://datasets.clickhouse.com/hits_compat
 
 START=$(date +%s)
 
-# Stream JSON directly into Quickwit via the Elasticsearch-compatible bulk API.
-python3 load.py
+# Use `quickwit tool local-ingest` instead of the Elasticsearch-compatible
+# bulk endpoint. v0.9's sharded ingest-v2 API caps single-node throughput
+# to a few MB/s and gets stuck waiting for shards to scale, while
+# `local-ingest` builds splits directly and writes them to the index
+# storage. The running server picks up new splits on its next metastore
+# poll (default 30s).
+zcat hits.json.gz | sudo docker run --rm -i --network host \
+    -v "$QW_DATA":/quickwit/qwdata \
+    "$QW_IMAGE" tool local-ingest --index hits -y
 
-# Force any in-flight commits and wait for the data to become searchable.
-# The default commit timeout in index_config.yaml is 30s, so wait a bit longer.
-sleep 60
+# Wait long enough for the server to refresh its metastore view.
+sleep 35
 
 # Show stats.
 curl -sS "http://localhost:7280/api/v1/indexes/hits/describe" | tee stats.json
@@ -53,13 +61,13 @@ echo
 END=$(date +%s)
 echo "Load time: $((END - START))"
 
-# Data size on disk (single-node uses qwdata/ inside the install dir).
+# Data size on disk.
 echo -n "Data size: "
-du -sb "$QW_DIR/qwdata" 2>/dev/null | awk '{print $1}'
+sudo du -sb "$QW_DATA" | awk '{print $1}'
 
 # Run queries
 chmod +x run.sh
 ./run.sh
 
-# Stop Quickwit
-kill "$QW_PID" 2>/dev/null || true
+sudo docker stop qw 2>/dev/null || true
+sudo docker rm qw 2>/dev/null || true
diff --git a/quickwit/load.py b/quickwit/load.py
deleted file mode 100644
index a5ea304585..0000000000
--- a/quickwit/load.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import gzip
-import json
-from itertools import islice
-
-import requests
-
-# Quickwit's _bulk endpoint accepts at most 10MB per request; keep batches
-# small enough to stay under the limit comfortably.
-BULK_SIZE = 2000
-QW_URL = "http://localhost:7280/api/v1/_elastic/hits/_bulk"
-TOTAL_RECORDS = 99997497
-
-# Quickwit only supports the "create" action of the Elasticsearch bulk API.
-ACTION_META_BYTES = (json.dumps({"create": {"_index": "hits"}}) + "\n").encode("utf-8")
-REQUEST_TIMEOUT = 120
-
-
-def build_body(docs):
-    parts = []
-    for doc in docs:
-        parts.append(ACTION_META_BYTES)
-        parts.append(doc.encode("utf-8") if isinstance(doc, str) else doc)
-    return b"".join(parts)
-
-
-def send_bulk(session, docs, batch_num):
-    # Quickwit's bulk endpoint requires a Content-Length header, so we have to
-    # buffer the body rather than streaming it.
-    resp = session.post(QW_URL, data=build_body(docs), timeout=REQUEST_TIMEOUT)
-    if resp.status_code >= 300:
-        print(
-            f"\nSent batch {batch_num} ({len(docs)} docs) - Warning: HTTP {resp.status_code}: {resp.text[:300]}"
-        )
-        return 0
-
-    body = resp.json()
-    if body.get("errors"):
-        items = body.get("items", [])
-        err = sum(1 for i in items if "error" in i.get("create", {}))
-        if err:
-            print(f"\nBatch {batch_num}: {err} item errors")
-
-    return len(docs)
-
-
-def main():
-    total_docs = 0
-    batch_num = 0
-
-    with requests.Session() as session:
-        session.headers.update({"Content-Type": "application/x-ndjson"})
-
-        with gzip.open("hits.json.gz", mode="rt", encoding="utf-8") as f:
-            print("Reading from hits.json.gz")
-            while True:
-                docs = list(islice(f, BULK_SIZE))
-                if not docs:
-                    break
-                batch_num += 1
-                total_docs += send_bulk(session, docs, batch_num)
-                pct = (total_docs / TOTAL_RECORDS) * 100 if TOTAL_RECORDS else 0
-                print(f" {pct:.2f}% ({total_docs}/{TOTAL_RECORDS})")
-
-    print(f"\nTotal docs sent: {total_docs}")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/quickwit/queries.json b/quickwit/queries.json
index 49842c60be..b7b298d699 100644
--- a/quickwit/queries.json
+++ b/quickwit/queries.json
@@ -2,26 +2,26 @@
 {"size":0,"track_total_hits":true,"query":{"bool":{"must_not":[{"term":{"AdvEngineID":0}}]}}}
 {"size":0,"track_total_hits":true,"aggs":{"sum_adv":{"sum":{"field":"AdvEngineID"}},"avg_res":{"avg":{"field":"ResolutionWidth"}}}}
 {"size":0,"aggs":{"avg_user":{"avg":{"field":"UserID"}}}}
-null
-null
+{"size":0,"aggs":{"u":{"cardinality":{"field":"UserID"}}}}
+{"size":0,"aggs":{"u":{"cardinality":{"field":"SearchPhrase"}}}}
 {"size":0,"aggs":{"min_date":{"min":{"field":"EventDate"}},"max_date":{"max":{"field":"EventDate"}}}}
 {"size":0,"query":{"bool":{"must_not":[{"term":{"AdvEngineID":0}}]}},"aggs":{"by_adv":{"terms":{"field":"AdvEngineID","size":1000,"order":{"_count":"desc"}}}}}
-null
-null
-null
-null
+{"size":0,"aggs":{"r":{"terms":{"field":"RegionID","size":10,"order":{"u":"desc"}},"aggs":{"u":{"cardinality":{"field":"UserID"}}}}}}
+{"size":0,"aggs":{"r":{"terms":{"field":"RegionID","size":10,"order":{"_count":"desc"}},"aggs":{"sumadv":{"sum":{"field":"AdvEngineID"}},"avgres":{"avg":{"field":"ResolutionWidth"}},"u":{"cardinality":{"field":"UserID"}}}}}}
+{"size":0,"query":{"bool":{"must_not":[{"term":{"MobilePhoneModel":""}}]}},"aggs":{"m":{"terms":{"field":"MobilePhoneModel","size":10,"order":{"u":"desc"}},"aggs":{"u":{"cardinality":{"field":"UserID"}}}}}}
+{"size":0,"query":{"bool":{"must_not":[{"term":{"MobilePhoneModel":""}}]}},"aggs":{"p":{"terms":{"field":"MobilePhone","size":10,"order":{"u":"desc"}},"aggs":{"u":{"cardinality":{"field":"UserID"}},"m":{"terms":{"field":"MobilePhoneModel","size":10,"order":{"u":"desc"}},"aggs":{"u":{"cardinality":{"field":"UserID"}}}}}}}}
 {"size":0,"query":{"bool":{"must_not":[{"term":{"SearchPhrase":""}}]}},"aggs":{"sp":{"terms":{"field":"SearchPhrase","size":10,"order":{"_count":"desc"}}}}}
-null
+{"size":0,"query":{"bool":{"must_not":[{"term":{"SearchPhrase":""}}]}},"aggs":{"sp":{"terms":{"field":"SearchPhrase","size":10,"order":{"u":"desc"}},"aggs":{"u":{"cardinality":{"field":"UserID"}}}}}}
 {"size":0,"query":{"bool":{"must_not":[{"term":{"SearchPhrase":""}}]}},"aggs":{"se":{"terms":{"field":"SearchEngineID","size":10,"order":{"_count":"desc"}},"aggs":{"sp":{"terms":{"field":"SearchPhrase","size":10,"order":{"_count":"desc"}}}}}}}
 {"size":0,"aggs":{"u":{"terms":{"field":"UserID","size":10,"order":{"_count":"desc"}}}}}
 {"size":0,"aggs":{"u":{"terms":{"field":"UserID","size":10,"order":{"_count":"desc"}},"aggs":{"sp":{"terms":{"field":"SearchPhrase","size":10,"order":{"_count":"desc"}}}}}}}
 {"size":0,"aggs":{"u":{"terms":{"field":"UserID","size":10},"aggs":{"sp":{"terms":{"field":"SearchPhrase","size":10}}}}}}
 null
 {"size":10,"query":{"term":{"UserID":435090932899640449}}}
-null
-null
-null
-null
+{"size":0,"track_total_hits":true,"query":{"wildcard":{"URL":"*google*"}}}
+{"size":0,"query":{"bool":{"filter":[{"wildcard":{"URL":"*google*"}}],"must_not":[{"term":{"SearchPhrase":""}}]}},"aggs":{"sp":{"terms":{"field":"SearchPhrase","size":10,"order":{"_count":"desc"}}}}}
+{"size":0,"query":{"bool":{"filter":[{"wildcard":{"Title":"*Google*"}}],"must_not":[{"wildcard":{"URL":"*.google.*"}},{"term":{"SearchPhrase":""}}]}},"aggs":{"sp":{"terms":{"field":"SearchPhrase","size":10,"order":{"_count":"desc"}},"aggs":{"u":{"cardinality":{"field":"UserID"}}}}}}
+{"size":10,"query":{"wildcard":{"URL":"*google*"}},"sort":[{"EventTime":"asc"}]}
 {"size":10,"query":{"bool":{"must_not":[{"term":{"SearchPhrase":""}}]}},"sort":[{"EventTime":"asc"}]}
 null
 null

From b607f9415c0c151b5833c4428237c90c63f5fcb7 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 8 May 2026 13:01:00 +0000
Subject: [PATCH 4/7] quickwit: throttle load progress, silence noisy commands,
 bump search timeout

The cloud-init log uploaded after the run is constrained to <1 MiB. Two
sources were chatty enough to risk hitting that limit on a 100M-row
load: `local-ingest`'s per-second progress line and the apt/docker pull
output. Throttle the former to one line per ~30 s with awk, and silence
apt/docker-pull entirely.

Also add node-config.yaml mounted on top of the image's default config
to bump the searcher's per-request and per-leaf timeouts from 30 s to
600 s. Several high-cardinality nested aggregations (Q17/18/32/33) on
the full dataset run longer than 30 s and were timing out.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 quickwit/benchmark.sh     | 41 +++++++++++++++++++++++++++------------
 quickwit/node-config.yaml |  8 ++++++++
 2 files changed, 37 insertions(+), 12 deletions(-)
 create mode 100644 quickwit/node-config.yaml

diff --git a/quickwit/benchmark.sh b/quickwit/benchmark.sh
index ae3c81a481..8d438a2652 100755
--- a/quickwit/benchmark.sh
+++ b/quickwit/benchmark.sh
@@ -1,16 +1,18 @@
 #!/bin/bash
-set -e
+set -eo pipefail
 
-# Install prerequisites
-sudo apt-get update -y
-sudo apt-get install -y wget curl jq bc docker.io
+export DEBIAN_FRONTEND=noninteractive
+
+# Install prerequisites quietly
+sudo apt-get update -qq >/dev/null
+sudo apt-get install -y -qq wget curl jq bc docker.io >/dev/null
 sudo systemctl start docker
 
 # We use the Quickwit v0.9 release candidate. Stable v0.8.2 is missing
 # `cardinality`, `wildcard`, and several other features the benchmark relies
 # on; only the v0.9 line (still unreleased as of writing) provides them.
 QW_IMAGE="quickwit/quickwit:v0.9.0-rc"
-sudo docker pull "$QW_IMAGE"
+sudo docker pull -q "$QW_IMAGE" >/dev/null
 
 # Quickwit's data directory (shared between the server and the local-ingest
 # container).
@@ -19,7 +21,13 @@ sudo rm -rf "$QW_DATA"
 mkdir -p "$QW_DATA"
 
 # Start the server in the background. Quickwit defaults: REST on 7280, gRPC on 7281.
-sudo docker run -d --name qw --network host -v "$QW_DATA":/quickwit/qwdata "$QW_IMAGE" run
+# Mount node-config.yaml on top of the image's default config to bump the
+# searcher timeouts (defaults are 30s, which is too low for some of the
+# nested high-cardinality aggregations on the full 100M-row dataset).
+sudo docker run -d --name qw --network host \
+    -v "$QW_DATA":/quickwit/qwdata \
+    -v "$(pwd)/node-config.yaml":/quickwit/config/quickwit.yaml \
+    "$QW_IMAGE" run >/dev/null
 echo "Quickwit container started"
 
 # Wait for the server to come up.
@@ -34,10 +42,11 @@ done
 # Create the index from the YAML config.
 curl -sS -X POST http://localhost:7280/api/v1/indexes \
     -H 'Content-Type: application/yaml' \
-    --data-binary @index_config.yaml
+    --data-binary @index_config.yaml | jq -r '.index_uid // .message'
 
-# Download the data
-wget --continue --progress=dot:giga 'https://datasets.clickhouse.com/hits_compatible/hits.json.gz'
+# Download the data quietly (the dataset is ~14 GB; full progress would
+# dominate the captured benchmark log).
+wget --continue -q 'https://datasets.clickhouse.com/hits_compatible/hits.json.gz'
 
 START=$(date +%s)
 
@@ -47,16 +56,24 @@ START=$(date +%s)
 # `local-ingest` builds splits directly and writes them to the index
 # storage. The running server picks up new splits on its next metastore
 # poll (default 30s).
+#
+# local-ingest emits a "Num docs ... Thrghput ... Time" progress line
+# roughly once per second; we throttle that to once per ~30 seconds so
+# the captured log stays compact, and pass the surrounding lines through
+# unchanged.
 zcat hits.json.gz | sudo docker run --rm -i --network host \
     -v "$QW_DATA":/quickwit/qwdata \
-    "$QW_IMAGE" tool local-ingest --index hits -y
+    "$QW_IMAGE" tool local-ingest --index hits -y 2>&1 \
+    | awk '/Num docs/ { n = systime(); if (n - last >= 30) { print; fflush(); last = n } next }
+           { print; fflush() }'
 
 # Wait long enough for the server to refresh its metastore view.
 sleep 35
 
 # Show stats.
-curl -sS "http://localhost:7280/api/v1/indexes/hits/describe" | tee stats.json
-echo
+curl -sS "http://localhost:7280/api/v1/indexes/hits/describe" \
+    | jq '{num_published_docs, num_published_splits, size_published_splits}' \
+    | tee stats.json
 
 END=$(date +%s)
 echo "Load time: $((END - START))"
diff --git a/quickwit/node-config.yaml b/quickwit/node-config.yaml
new file mode 100644
index 0000000000..d94c85a403
--- /dev/null
+++ b/quickwit/node-config.yaml
@@ -0,0 +1,8 @@
+version: 0.8
+
+# Bump the per-request and leaf-search timeouts well above the 30s default —
+# a few of the high-cardinality aggregations on the full 100M-row ClickBench
+# dataset (e.g. WatchID + ClientIP nested terms) take longer than that.
+searcher:
+  request_timeout_secs: 600
+  leaf_request_timeout_secs: 600

From 09c88c01311172a5ec197639b77f0ac90bace872 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 8 May 2026 13:19:46 +0000
Subject: [PATCH 5/7] quickwit: drop in-process caches before each cold query
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ClickBench's run.sh convention drops the OS page cache before each
query. For Quickwit that's not enough — its in-process caches
(partial_request_cache, fast_field_cache, split_footer_cache,
predicate_cache) survive `drop_caches`, and there's no cache-clear
endpoint in the REST API. Without action, warm runs were consistently
~30× faster than cold runs because they were replaying memoized
results.

- Disable `partial_request_cache` in node-config.yaml. This is the
  per-split partial-result cache; keeping it on lets the engine
  short-circuit identical queries.
- Leave `predicate_cache` at its default. It's a predicate-evaluation
  cache (analogous to ClickHouse's query condition cache), not a
  result cache.
- Restart the Quickwit container in run.sh before each non-null query.
  This clears the remaining in-process caches (fast_field_cache,
  split_footer_cache, predicate_cache) so the first run is genuinely
  cold; the 2nd and 3rd runs benefit from caches re-warmed by run 1,
  matching ClickBench's cold/warm convention.

Restart cycle is ~11s on this hardware, ~7 min total overhead across
the 35 non-null queries.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 quickwit/node-config.yaml | 15 ++++++++++++---
 quickwit/run.sh           | 17 +++++++++++++----
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/quickwit/node-config.yaml b/quickwit/node-config.yaml
index d94c85a403..15bb302928 100644
--- a/quickwit/node-config.yaml
+++ b/quickwit/node-config.yaml
@@ -1,8 +1,17 @@
 version: 0.8
 
-# Bump the per-request and leaf-search timeouts well above the 30s default —
-# a few of the high-cardinality aggregations on the full 100M-row ClickBench
-# dataset (e.g. WatchID + ClientIP nested terms) take longer than that.
 searcher:
+  # Bump the per-request and leaf-search timeouts well above the 30s default —
+  # a few of the high-cardinality aggregations on the full 100M-row ClickBench
+  # dataset (e.g. WatchID + ClientIP nested terms) take longer than that.
   request_timeout_secs: 600
   leaf_request_timeout_secs: 600
+
+  # Disable the per-split partial result cache so warm runs don't replay a
+  # memoized answer. The other in-memory caches (fast_field_cache,
+  # split_footer_cache, predicate_cache) are data-level caches (analogous to
+  # ClickHouse's query condition cache) and are kept at their defaults;
+  # run.sh restarts the container before each query so they also start cold
+  # for the first run.
+  partial_request_cache:
+    capacity: 0
diff --git a/quickwit/run.sh b/quickwit/run.sh
index 66cb7b20ac..bfbf5f7c2c 100755
--- a/quickwit/run.sh
+++ b/quickwit/run.sh
@@ -4,26 +4,35 @@ TRIES=3
 SEARCH_URL="http://localhost:7280/api/v1/_elastic/hits/_search"
 
 while IFS= read -r QUERY; do
-    sync && echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
+    if [ "$QUERY" != "null" ]; then
+        # Restart Quickwit before each query to clear all in-process caches
+        # (fast_field_cache, split_footer_cache). Result-style caches
+        # (partial_request_cache, predicate_cache) are already disabled in
+        # node-config.yaml. Then drop the OS page cache. This makes the first
+        # run cold; runs 2 and 3 may benefit from caches re-warmed by run 1.
+        sudo docker restart qw >/dev/null
+        until curl -sS -f http://localhost:7280/api/v1/version >/dev/null 2>&1; do sleep 1; done
+        sync && echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
+    fi
 
     echo -n "["
 
     for i in $(seq 1 $TRIES); do
         if [ "$QUERY" = "null" ]; then
-            # Query is not expressible in Quickwit (e.g. cardinality, scripts, regex_replace).
+            # Query is not expressible in Quickwit (e.g. text-field sort,
+            # scripts, REGEXP_REPLACE).
             echo -n "null"
         else
             START=$(date +%s.%N)
             QW_RSP=$(curl -s -X POST "$SEARCH_URL" -H 'Content-Type: application/json' -d "$QUERY")
             END=$(date +%s.%N)
 
-            # Quickwit returns "took" in milliseconds (the engine-internal latency).
+            # Quickwit returns "took" in milliseconds (engine-internal latency).
             QW_TIME=$(echo "$QW_RSP" | jq -r 'if has("error") or has("status") then "null" else (.took | tostring) end')
 
             if [ "$QW_TIME" = "null" ] || [ -z "$QW_TIME" ]; then
                 echo -n "null"
             else
-                # Convert ms -> seconds with 4-decimal precision.
                 printf "%.4f" "$(echo "scale=4; $QW_TIME / 1000" | bc)"
             fi
         fi

From f96317a72e17d90cc6e6d7191921c16942ed4c43 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 8 May 2026 17:47:26 +0200
Subject: [PATCH 6/7] Update node-config.yaml

---
 quickwit/node-config.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/quickwit/node-config.yaml b/quickwit/node-config.yaml
index 15bb302928..19a8f8457c 100644
--- a/quickwit/node-config.yaml
+++ b/quickwit/node-config.yaml
@@ -1,11 +1,11 @@
 version: 0.8
 
 searcher:
-  # Bump the per-request and leaf-search timeouts well above the 30s default —
+  # Bump the per-request and leaf-search timeouts above the 30s default —
   # a few of the high-cardinality aggregations on the full 100M-row ClickBench
   # dataset (e.g. WatchID + ClientIP nested terms) take longer than that.
-  request_timeout_secs: 600
-  leaf_request_timeout_secs: 600
+  request_timeout_secs: 60
+  leaf_request_timeout_secs: 60
 
   # Disable the per-split partial result cache so warm runs don't replay a
   # memoized answer. The other in-memory caches (fast_field_cache,

From 3216a6752518ca2943e18725acbd9df570af5d36 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 8 May 2026 18:09:49 +0000
Subject: [PATCH 7/7] Add results

---
 quickwit/results/20260508/c6a.2xlarge.json    | 58 +++++++++++++++++++
 quickwit/results/20260508/c6a.4xlarge.json    | 58 +++++++++++++++++++
 quickwit/results/20260508/c6a.metal.json      | 58 +++++++++++++++++++
 quickwit/results/20260508/c7a.metal-48xl.json | 58 +++++++++++++++++++
 quickwit/results/20260508/c8g.4xlarge.json    | 58 +++++++++++++++++++
 quickwit/results/20260508/c8g.metal-48xl.json | 58 +++++++++++++++++++
 6 files changed, 348 insertions(+)
 create mode 100644 quickwit/results/20260508/c6a.2xlarge.json
 create mode 100644 quickwit/results/20260508/c6a.4xlarge.json
 create mode 100644 quickwit/results/20260508/c6a.metal.json
 create mode 100644 quickwit/results/20260508/c7a.metal-48xl.json
 create mode 100644 quickwit/results/20260508/c8g.4xlarge.json
 create mode 100644 quickwit/results/20260508/c8g.metal-48xl.json

diff --git a/quickwit/results/20260508/c6a.2xlarge.json b/quickwit/results/20260508/c6a.2xlarge.json
new file mode 100644
index 0000000000..d5f5984363
--- /dev/null
+++ b/quickwit/results/20260508/c6a.2xlarge.json
@@ -0,0 +1,58 @@
+{
+    "system": "Quickwit",
+    "date": "2026-05-08",
+    "machine": "c6a.2xlarge",
+    "cluster_size": 1,
+    "proprietary": "no",
+    "hardware": "cpu",
+    "tuned": "no",
+    "tags": ["Rust","search"],
+    "load_time": 3403,
+    "data_size": 82647119927,
+    "result": [
+        [0.033, 0.001, 0.001],
+        [0.161, 0.068, 0.068],
+        [0.405, 0.209, 0.211],
+        [1.963, 0.111, 0.111],
+        [2.579, 0.796, 0.811],
+        [1.495, 0.802, 0.866],
+        [0.273, 0.216, 0.217],
+        [0.144, 0.071, 0.07],
+        [4.023, 1.474, 1.518],
+        [4.668, 1.614, 1.569],
+        [2.378, 0.15, 0.15],
+        [2.627, 0.223, 0.231],
+        [1.029, 0.185, 0.201],
+        [null, null, null],
+        [null, null, null],
+        [2.559, 0.752, 0.704],
+        [null, null, null],
+        [null, null, null],
+        [null, null, null],
+        [0.076, 0.005, 0.004],
+        [3.595, 2.23, 2.274],
+        [5.28, 2.281, 2.482],
+        [11.243, 4.755, 4.519],
+        [4.45, 1.915, 2.272],
+        [0.419, 0.104, 0.098],
+        [null, null, null],
+        [null, null, null],
+        [null, null, null],
+        [null, null, null],
+        [null, null, null],
+        [2.524, 0.914, 0.912],
+        [null, null, null],
+        [null, null, null],
+        [2.509, 0.436, 0.442],
+        [2.536, 0.434, 0.414],
+        [null, null, null],
+        [2.375, 0.034, 0.031],
+        [2.246, 0.026, 0.024],
+        [2.398, 0.056, 0.054],
+        [null, null, null],
+        [2.518, 0.896, 0.9],
+        [0.432, 0.04, 0.038],
+        [0.411, 0.049, 0.043]
+]
+}
+
diff --git a/quickwit/results/20260508/c6a.4xlarge.json b/quickwit/results/20260508/c6a.4xlarge.json
new file mode 100644
index 0000000000..ec24f1253b
--- /dev/null
+++ b/quickwit/results/20260508/c6a.4xlarge.json
@@ -0,0 +1,58 @@
+{
+    "system": "Quickwit",
+    "date": "2026-05-08",
+    "machine": "c6a.4xlarge",
+    "cluster_size": 1,
+    "proprietary": "no",
+    "hardware": "cpu",
+    "tuned": "no",
+    "tags": ["Rust","search"],
+    "load_time": 2724,
+    "data_size": 74144537424,
+    "result": [
+        [0.039, 0.001, 0.001],
+        [0.134, 0.052, 0.049],
+        [0.3, 0.147, 0.145],
+        [1.945, 0.08, 0.08],
+        [2.33, 0.515, 0.514],
+        [1.323, 0.574, 0.538],
+        [0.217, 0.149, 0.149],
+        [0.127, 0.052, 0.054],
+        [3.808, 1.166, 1.221],
+        [4.23, 1.028, 1.111],
+        [2.388, 0.113, 0.121],
+        [2.61, 0.178, 0.168],
+        [0.998, 0.139, 0.132],
+        [54.115, 52.265, 53.128],
+        [1.434, 0.343, 0.343],
+        [2.372, 0.544, 0.547],
+        [null, null, null],
+        [null, null, null],
+        [null, null, null],
+        [0.074, 0.005, 0.005],
+        [3.373, 1.842, 1.876],
+        [5.078, 1.794, 1.74],
+        [11.007, 2.991, 3.168],
+        [4.086, 1.456, 1.478],
+        [0.418, 0.081, 0.072],
+        [null, null, null],
+        [null, null, null],
+        [null, null, null],
+        [null, null, null],
+        [null, null, null],
+        [2.49, 0.829, 0.81],
+        [null, null, null],
+        [null, null, null],
+        [2.482, 0.299, 0.267],
+        [2.498, 0.281, 0.278],
+        [null, null, null],
+        [2.426, 0.031, 0.029],
+        [2.337, 0.025, 0.025],
+        [2.448, 0.055, 0.051],
+        [null, null, null],
+        [3.359, 0.888, 0.886],
+        [0.456, 0.039, 0.036],
+        [0.441, 0.058, 0.04]
+]
+}
+
diff --git a/quickwit/results/20260508/c6a.metal.json b/quickwit/results/20260508/c6a.metal.json
new file mode 100644
index 0000000000..9a353cda0a
--- /dev/null
+++ b/quickwit/results/20260508/c6a.metal.json
@@ -0,0 +1,58 @@
+{
+    "system": "Quickwit",
+    "date": "2026-05-08",
+    "machine": "c6a.metal",
+    "cluster_size": 1,
+    "proprietary": "no",
+    "hardware": "cpu",
+    "tuned": "no",
+    "tags": ["Rust","search"],
+    "load_time": 2580,
+    "data_size": 75886849124,
+    "result": [
+        [0.048, 0.001, 0.001],
+        [0.117, 0.032, 0.03],
+        [0.282, 0.183, 0.13],
+        [1.935, 0.068, 0.069],
+        [2.243, 0.384, 0.382],
+        [1.189, 0.418, 0.417],
+        [0.211, 0.132, 0.13],
+        [0.15, 0.034, 0.033],
+        [3.587, 0.943, 0.964],
+        [4.087, 0.901, 0.852],
+        [2.369, 0.098, 0.094],
+        [2.567, 0.15, 0.151],
+        [0.95, 0.1, 0.102],
+        [51.568, 51.032, 49.702],
+        [1.346, 0.244, 0.233],
+        [2.232, 0.371, 0.405],
+        [null, 680.233, 358.013],
+        [null, 287.939, 339.255],
+        [null, null, null],
+        [0.083, 0.006, 0.005],
+        [3.039, 1.447, 1.44],
+        [4.699, 1.457, 1.511],
+        [10.677, 2.319, 2.37],
+        [3.917, 1.285, 1.337],
+        [0.391, 0.057, 0.056],
+        [null, null, null],
+        [null, null, null],
+        [null, null, null],
+        [null, null, null],
+        [null, null, null],
+        [2.221, 0.564, 0.577],
+        [792.323, null, 353.894],
+        [null, null, null],
+        [2.452, 0.225, 0.207],
+        [2.45, 0.213, 0.213],
+        [null, null, null],
+        [2.403, 0.029, 0.029],
+        [2.32, 0.024, 0.022],
+        [2.435, 0.055, 0.05],
+        [null, null, null],
+        [3.238, 0.806, 0.809],
+        [0.431, 0.038, 0.034],
+        [0.419, 0.04, 0.04]
+]
+}
+
diff --git a/quickwit/results/20260508/c7a.metal-48xl.json b/quickwit/results/20260508/c7a.metal-48xl.json
new file mode 100644
index 0000000000..901dac6171
--- /dev/null
+++ b/quickwit/results/20260508/c7a.metal-48xl.json
@@ -0,0 +1,58 @@
+{
+    "system": "Quickwit",
+    "date": "2026-05-08",
+    "machine": "c7a.metal-48xl",
+    "cluster_size": 1,
+    "proprietary": "no",
+    "hardware": "cpu",
+    "tuned": "no",
+    "tags": ["Rust","search"],
+    "load_time": 2333,
+    "data_size": 74234857136,
+    "result": [
+        [0.053, 0.002, 0.002],
+        [0.142, 0.027, 0.026],
+        [0.403, 0.125, 0.194],
+        [1.952, 0.065, 0.065],
+        [2.163, 0.28, 0.28],
+        [1.131, 0.346, 0.348],
+        [0.244, 0.124, 0.123],
+        [0.172, 0.031, 0.03],
+        [3.521, 0.879, 0.866],
+        [3.944, 0.704, 0.727],
+        [2.376, 0.077, 0.08],
+        [2.596, 0.135, 0.129],
+        [0.98, 0.087, 0.086],
+        [54.644, 53.696, 51.587],
+        [1.364, 0.213, 0.208],
+        [2.212, 0.363, 0.355],
+        [606.124, null, 402.088],
+        [597.226, 596.798, 595.534],
+        [null, null, null],
+        [0.117, 0.006, 0.005],
+        [2.903, 1.197, 1.196],
+        [4.562, 1.216, 1.217],
+        [10.768, 2.249, 2.194],
+        [3.939, 1.196, 1.215],
+        [0.491, 0.049, 0.048],
+        [null, null, null],
+        [null, null, null],
+        [null, null, null],
+        [null, null, null],
+        [null, null, null],
+        [2.216, 0.524, 0.523],
+        [604.168, 601.291, 598.873],
+        [null, null, null],
+        [2.519, 0.197, 0.193],
+        [2.535, 0.197, 0.2],
+        [null, null, null],
+        [2.494, 0.029, 0.026],
+        [2.397, 0.021, 0.022],
+        [2.528, 0.048, 0.045],
+        [null, null, null],
+        [3.259, 0.785, 0.773],
+        [0.52, 0.036, 0.034],
+        [0.437, 0.036, 0.034]
+]
+}
+
diff --git a/quickwit/results/20260508/c8g.4xlarge.json b/quickwit/results/20260508/c8g.4xlarge.json
new file mode 100644
index 0000000000..68ebd2f92c
--- /dev/null
+++ b/quickwit/results/20260508/c8g.4xlarge.json
@@ -0,0 +1,58 @@
+{
+    "system": "Quickwit",
+    "date": "2026-05-08",
+    "machine": "c8g.4xlarge",
+    "cluster_size": 1,
+    "proprietary": "no",
+    "hardware": "cpu",
+    "tuned": "no",
+    "tags": ["Rust","search"],
+    "load_time": 2390,
+    "data_size": 73990228369,
+    "result": [
+        [0.031, 0.001, 0.001],
+        [0.104, 0.043, 0.043],
+        [0.239, 0.123, 0.117],
+        [1.922, 0.058, 0.059],
+        [2.163, 0.312, 0.324],
+        [1.197, 0.44, 0.446],
+        [0.165, 0.111, 0.113],
+        [0.098, 0.042, 0.042],
+        [3.547, 0.9, 0.902],
+        [3.937, 0.707, 0.725],
+        [2.368, 0.084, 0.084],
+        [2.58, 0.145, 0.139],
+        [0.964, 0.108, 0.105],
+        [38.944, 35.934, 36.43],
+        [1.361, 0.254, 0.251],
+        [2.169, 0.328, 0.309],
+        [null, null, null],
+        [null, null, null],
+        [null, null, null],
+        [0.047, 0.004, 0.003],
+        [3.238, 1.457, 1.453],
+        [4.871, 1.457, 1.453],
+        [10.686, 2.344, 2.353],
+        [3.872, 1.162, 1.157],
+        [0.395, 0.06, 0.065],
+        [null, null, null],
+        [null, null, null],
+        [null, null, null],
+        [null, null, null],
+        [null, null, null],
+        [2.245, 0.525, 0.516],
+        [null, 839.555, null],
+        [null, null, null],
+        [2.43, 0.171, 0.168],
+        [2.435, 0.172, 0.169],
+        [null, null, null],
+        [2.424, 0.023, 0.022],
+        [2.329, 0.019, 0.018],
+        [2.452, 0.04, 0.038],
+        [null, null, null],
+        [3.114, 0.689, 0.693],
+        [0.457, 0.029, 0.029],
+        [0.438, 0.029, 0.027]
+]
+}
+
diff --git a/quickwit/results/20260508/c8g.metal-48xl.json b/quickwit/results/20260508/c8g.metal-48xl.json
new file mode 100644
index 0000000000..4baec21074
--- /dev/null
+++ b/quickwit/results/20260508/c8g.metal-48xl.json
@@ -0,0 +1,58 @@
+{
+    "system": "Quickwit",
+    "date": "2026-05-08",
+    "machine": "c8g.metal-48xl",
+    "cluster_size": 1,
+    "proprietary": "no",
+    "hardware": "cpu",
+    "tuned": "no",
+    "tags": ["Rust","search"],
+    "load_time": 2507,
+    "data_size": 75341136867,
+    "result": [
+        [0.038, 0.001, 0.001],
+        [0.126, 0.044, 0.044],
+        [0.316, 0.117, 0.117],
+        [1.933, 0.061, 0.061],
+        [2.205, 0.335, 0.335],
+        [1.172, 0.393, 0.389],
+        [0.193, 0.117, 0.117],
+        [0.14, 0.045, 0.045],
+        [3.528, 0.891, 0.89],
+        [3.961, 0.736, 0.736],
+        [2.362, 0.086, 0.085],
+        [2.579, 0.138, 0.136],
+        [0.967, 0.099, 0.097],
+        [38.004, 34.929, 34.746],
+        [1.34, 0.221, 0.22],
+        [2.182, 0.337, 0.361],
+        [null, null, null],
+        [null, null, null],
+        [null, null, null],
+        [0.099, 0.003, 0.003],
+        [2.87, 1.212, 1.199],
+        [4.534, 1.2, 1.224],
+        [10.769, 2.272, 2.277],
+        [3.924, 1.197, 1.244],
+        [0.43, 0.059, 0.058],
+        [null, null, null],
+        [null, null, null],
+        [null, null, null],
+        [null, null, null],
+        [null, null, null],
+        [2.113, 0.43, 0.418],
+        [null, null, null],
+        [null, null, null],
+        [2.434, 0.169, 0.172],
+        [2.435, 0.17, 0.167],
+        [null, null, null],
+        [2.435, 0.023, 0.022],
+        [2.348, 0.019, 0.018],
+        [2.464, 0.039, 0.039],
+        [null, null, null],
+        [3.153, 0.844, 0.842],
+        [0.431, 0.03, 0.029],
+        [0.416, 0.028, 0.027]
+]
+}
+