Skip to content

Commit d2b3865

Browse files
committed
feat: THP and improved result collections experiment
1 parent 2debd93 commit d2b3865

17 files changed

Lines changed: 1171 additions & 456 deletions

File tree

Cargo.toml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ all-features = true
2121
cargo_asm = ["dep:rand", "dep:rand_chacha"]
2222
csv = ["dep:csv"]
2323
default = ["tracing", "fixed"]
24+
buffered_result_collection = ["dep:smallvec"]
25+
huge_pages = ["dep:libc"]
2426
leaf_view_chunked = []
2527
leaf_nta_prefetch = []
2628
modified_van_emde_boas = []
@@ -46,6 +48,7 @@ serde = [
4648
simd = []
4749
simulator = []
4850
rkyv_08 = ["dep:rkyv_08"]
51+
small_n_result_collectors = ["dep:smallvec"]
4952
test_utils = ["dep:rand", "dep:rand_chacha", "dep:rayon", "dep:fixed"]
5053
tracing = ["dep:tracing", "dep:tracing-subscriber"]
5154
unreliable_select_nth_unstable = []
@@ -90,6 +93,14 @@ features = ["laz-parallel"]
9093
version = "0.4"
9194
optional = true
9295

96+
[dependencies.libc]
97+
version = "0.2"
98+
optional = true
99+
100+
[dependencies.smallvec]
101+
version = "1"
102+
optional = true
103+
93104
[dependencies.kiddo_v5]
94105
package = "kiddo"
95106
path = "../kiddo-v5"
@@ -223,6 +234,11 @@ name = "v6_stem_strategies_focus"
223234
harness = false
224235
required-features = ["test_utils"]
225236

237+
[[bench]]
238+
name = "v6_result_collection_focus"
239+
harness = false
240+
required-features = ["test_utils"]
241+
226242
#[[example]]
227243
#name = "avx2-check"
228244
#path = "examples/avx2-check.rs"
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
use codspeed_criterion_compat::{
2+
black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput,
3+
};
4+
use kiddo::dist::SquaredEuclidean;
5+
use kiddo::kd_tree::leaf_strategies::VecOfArenas;
6+
use kiddo::kd_tree::KdTree;
7+
use kiddo::stem_strategies::donnelly_2_pf::DonnellyPf;
8+
use rand::Rng;
9+
use rand::SeedableRng;
10+
use rand_chacha::ChaCha8Rng;
11+
use std::num::NonZeroUsize;
12+
13+
const K: usize = 3;
14+
const B: usize = 32;
15+
const DEFAULT_QUERY_COUNT: usize = 1_00;
16+
const DEFAULT_POINT_COUNT: usize = 1usize << 24;
17+
const DEFAULT_MAX_QTY: usize = 16;
18+
const DEFAULT_MAX_DIST: f64 = 0.0025;
19+
const POINT_SEED: u64 = 0x5eed_0000_0000_0301;
20+
const QUERY_SEED: u64 = 0x5eed_0000_0000_0302;
21+
22+
type ArenaLeaves = VecOfArenas<f64, u32, K, B>;
23+
type DonnellyPfTree = KdTree<f64, u32, DonnellyPf<3, 64, 8, K>, ArenaLeaves, K, B>;
24+
25+
fn read_usize_env(var: &str, default: usize) -> usize {
26+
std::env::var(var)
27+
.ok()
28+
.and_then(|value| value.parse::<usize>().ok())
29+
.unwrap_or(default)
30+
}
31+
32+
fn read_f64_env(var: &str, default: f64) -> f64 {
33+
std::env::var(var)
34+
.ok()
35+
.and_then(|value| value.parse::<f64>().ok())
36+
.unwrap_or(default)
37+
}
38+
39+
fn build_points(point_count: usize) -> Vec<[f64; K]> {
40+
let mut rng = ChaCha8Rng::seed_from_u64(POINT_SEED);
41+
(0..point_count).map(|_| rng.random::<[f64; K]>()).collect()
42+
}
43+
44+
fn build_queries(query_count: usize) -> Vec<[f64; K]> {
45+
let mut rng = ChaCha8Rng::seed_from_u64(QUERY_SEED);
46+
(0..query_count).map(|_| rng.random::<[f64; K]>()).collect()
47+
}
48+
49+
fn run_sorted_nearest_n_within_queries(
50+
tree: &DonnellyPfTree,
51+
queries: &[[f64; K]],
52+
max_dist: f64,
53+
max_qty: NonZeroUsize,
54+
) -> (usize, u64, f64) {
55+
let mut checksum_len = 0usize;
56+
let mut checksum_item = 0u64;
57+
let mut checksum_dist = 0.0f64;
58+
59+
for query in queries {
60+
let results = tree.nearest_n_within::<SquaredEuclidean<f64>>(
61+
black_box(query),
62+
max_dist,
63+
max_qty,
64+
true,
65+
);
66+
checksum_len += results.len();
67+
68+
for result in results {
69+
checksum_item = checksum_item.wrapping_add(result.item as u64);
70+
checksum_dist += result.distance;
71+
}
72+
}
73+
74+
(checksum_len, checksum_item, checksum_dist)
75+
}
76+
77+
fn run_best_n_within_queries(
78+
tree: &DonnellyPfTree,
79+
queries: &[[f64; K]],
80+
max_dist: f64,
81+
max_qty: NonZeroUsize,
82+
) -> (usize, u64, f64) {
83+
let mut checksum_len = 0usize;
84+
let mut checksum_item = 0u64;
85+
let mut checksum_dist = 0.0f64;
86+
87+
for query in queries {
88+
let results =
89+
tree.best_n_within::<SquaredEuclidean<f64>>(black_box(query), max_dist, max_qty);
90+
checksum_len += results.len();
91+
92+
for result in results.into_vec() {
93+
checksum_item = checksum_item.wrapping_add(result.item as u64);
94+
checksum_dist += result.distance;
95+
}
96+
}
97+
98+
(checksum_len, checksum_item, checksum_dist)
99+
}
100+
101+
fn v6_result_collection_focus(c: &mut Criterion) {
102+
let query_count = read_usize_env("KIDDO_BENCH_QUERIES", DEFAULT_QUERY_COUNT);
103+
let point_count = read_usize_env("KIDDO_BENCH_POINTS", DEFAULT_POINT_COUNT);
104+
let max_qty =
105+
NonZeroUsize::new(read_usize_env("KIDDO_BENCH_MAX_QTY", DEFAULT_MAX_QTY)).unwrap();
106+
let max_dist = read_f64_env("KIDDO_BENCH_MAX_DIST", DEFAULT_MAX_DIST);
107+
108+
let points = build_points(point_count);
109+
let queries = build_queries(query_count);
110+
let tree: DonnellyPfTree = KdTree::new_from_slice(&points);
111+
112+
let mut group = c.benchmark_group("v6 result collection focus");
113+
group.throughput(Throughput::Elements(query_count as u64));
114+
115+
group.bench_function(
116+
BenchmarkId::new("sorted nearest_n_within / Donnelly PF", point_count),
117+
|b| {
118+
b.iter(|| {
119+
black_box(run_sorted_nearest_n_within_queries(
120+
&tree, &queries, max_dist, max_qty,
121+
))
122+
});
123+
},
124+
);
125+
126+
group.bench_function(
127+
BenchmarkId::new("best_n_within / Donnelly PF", point_count),
128+
|b| {
129+
b.iter(|| {
130+
black_box(run_best_n_within_queries(
131+
&tree, &queries, max_dist, max_qty,
132+
))
133+
});
134+
},
135+
);
136+
137+
group.finish();
138+
}
139+
140+
criterion_group!(benches, v6_result_collection_focus);
141+
criterion_main!(benches);

benches/v6_stem_strategies_focus.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@ use kiddo::kd_tree::leaf_strategies::VecOfArenas;
66
use kiddo::kd_tree::KdTree;
77
use kiddo::stem_strategies::donnelly_2_pf::DonnellyPf;
88
use kiddo::stem_strategies::eytzinger_pf_far::EytzingerPfFar;
9+
use kiddo::stem_strategies::Eytzinger;
910
#[cfg(all(
1011
feature = "simd",
1112
target_arch = "x86_64",
1213
any(target_feature = "avx2", target_feature = "avx512f")
1314
))]
1415
use kiddo::stem_strategies::{Block3, DonnellyMarkerSimd};
15-
use kiddo::stem_strategies::{Donnelly, Eytzinger};
1616
use rand::Rng;
1717
use rand::SeedableRng;
1818
use rand_chacha::ChaCha8Rng;

justfile

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,22 @@ bench-v6-stem-strategies-focus FEATURES='simd,test_utils,logging_off' POINTS='41
118118
RUSTFLAGS='-C target-cpu=native' \
119119
cargo criterion --bench v6_stem_strategies_focus --features {{FEATURES}}
120120

121+
bench-v6-stem-strategies-big FEATURES='simd,test_utils,logging_off' POINTS='16777216' QUERIES='10000':
122+
RUSTC_WRAPPER= \
123+
KIDDO_BENCH_POINTS={{POINTS}} \
124+
KIDDO_BENCH_QUERIES={{QUERIES}} \
125+
RUSTFLAGS='-C target-cpu=native' \
126+
cargo criterion --bench v6_stem_strategies_focus --features {{FEATURES}}
127+
128+
bench-v6-result-collection-focus FEATURES='simd,test_utils,logging_off' POINTS='16777216' QUERIES='100' MAX_QTY='16' MAX_DIST='0.0025':
129+
RUSTC_WRAPPER= \
130+
KIDDO_BENCH_POINTS={{POINTS}} \
131+
KIDDO_BENCH_QUERIES={{QUERIES}} \
132+
KIDDO_BENCH_MAX_QTY={{MAX_QTY}} \
133+
KIDDO_BENCH_MAX_DIST={{MAX_DIST}} \
134+
RUSTFLAGS='-C target-cpu=native' \
135+
cargo criterion --bench v6_result_collection_focus --features {{FEATURES}}
136+
121137
profile-v6-stem-exact-stats FEATURES='simd,test_utils,logging_off' POINTS='4194304' QUERIES='10000' REPEATS='1':
122138
RUSTC_WRAPPER= \
123139
KIDDO_PROFILE_POINTS={{POINTS}} \

0 commit comments

Comments
 (0)