Skip to content

Commit e0bce53

Browse files
committed
feat: introduce within_unsorted_visit, improve within_unsorted_iter
1 parent 1d34b5a commit e0bce53

3 files changed

Lines changed: 225 additions & 10 deletions

File tree

src/kd_tree/archived_query.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,11 @@ where
264264
self.nearest_n_within::<D>(query, max_dist, NonZeroUsize::MAX, true)
265265
}
266266

267-
/// Finds all points within a given distance of the query point, unsorted.
267+
/// Visits every point within a given distance of the query point, unsorted.
268+
///
269+
/// This is the lowest-overhead streaming range-query API for archived trees. It runs
270+
/// traversal and optimized leaf kernels, but routes each match directly to `visitor`
271+
/// instead of building a result collection.
268272
pub fn within_unsorted_visit<D, F>(&self, query: &[A; K], max_dist: D::Output, mut visitor: F)
269273
where
270274
D: KdTreeDistanceMetric<A, K>,
@@ -315,7 +319,10 @@ where
315319
results
316320
}
317321

318-
/// Returns an iterator over all points within a given distance, unsorted.
322+
/// Returns a streaming iterator over all points within a given distance, unsorted.
323+
///
324+
/// This avoids materializing the full result set returned by [`within_unsorted`](Self::within_unsorted).
325+
/// For the absolute lowest overhead, use [`within_unsorted_visit`](Self::within_unsorted_visit).
319326
pub fn within_unsorted_iter<D>(
320327
&self,
321328
query: &[A; K],

src/kd_tree/iter.rs

Lines changed: 202 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,17 @@ use crate::kd_tree::leaf_view::TlsLeafScratch;
33
use crate::kd_tree::leaf_view_chunked::nearest_n_within::{
44
nearest_n_within_with_query_wide, nearest_n_within_with_query_wide_arena,
55
};
6+
use crate::kd_tree::result_collection::ResultCollection;
67
use crate::traits_unified_2::{AxisUnified, Basics, LeafProjection, LeafStrategy};
78
use crate::{NearestNeighbour, StemStrategy};
9+
use std::mem::MaybeUninit;
810
use std::ptr::NonNull;
911

1012
use super::{KdTreeAccessor, StemLeafResolution};
1113

14+
const WITHIN_UNSORTED_ITER_INLINE_STACK_CAPACITY: usize = 64;
15+
const WITHIN_UNSORTED_ITER_INLINE_RESULT_CAPACITY: usize = 64;
16+
1217
/// Iterator over all point/item pairs in a kd-tree.
1318
pub struct KdTreeIter<'a, Tree, A, T, SS, LS, const K: usize, const B: usize>
1419
where
@@ -102,7 +107,193 @@ struct TraversalFrame<SS, O, const K: usize> {
102107
rd: O,
103108
}
104109

110+
struct InlineStack<T, const N: usize> {
111+
inline: [MaybeUninit<T>; N],
112+
inline_len: usize,
113+
spill: Vec<T>,
114+
}
115+
116+
impl<T, const N: usize> InlineStack<T, N> {
117+
#[inline]
118+
fn new() -> Self {
119+
Self {
120+
inline: [const { MaybeUninit::uninit() }; N],
121+
inline_len: 0,
122+
spill: Vec::new(),
123+
}
124+
}
125+
126+
#[inline]
127+
fn push(&mut self, value: T) {
128+
if self.spill.is_empty() && self.inline_len < N {
129+
unsafe { self.inline.get_unchecked_mut(self.inline_len) }.write(value);
130+
self.inline_len += 1;
131+
} else {
132+
self.spill.push(value);
133+
}
134+
}
135+
136+
#[inline]
137+
fn pop(&mut self) -> Option<T> {
138+
if let Some(value) = self.spill.pop() {
139+
return Some(value);
140+
}
141+
142+
if self.inline_len == 0 {
143+
None
144+
} else {
145+
self.inline_len -= 1;
146+
Some(unsafe {
147+
self.inline
148+
.get_unchecked(self.inline_len)
149+
.assume_init_read()
150+
})
151+
}
152+
}
153+
}
154+
155+
impl<T, const N: usize> Drop for InlineStack<T, N> {
156+
fn drop(&mut self) {
157+
while self.inline_len != 0 {
158+
self.inline_len -= 1;
159+
unsafe {
160+
self.inline
161+
.get_unchecked_mut(self.inline_len)
162+
.assume_init_drop()
163+
};
164+
}
165+
}
166+
}
167+
168+
struct InlineResultBuffer<E, const N: usize> {
169+
inline: [MaybeUninit<E>; N],
170+
inline_len: usize,
171+
spill: Vec<E>,
172+
}
173+
174+
impl<E, const N: usize> InlineResultBuffer<E, N> {
175+
#[inline]
176+
fn new() -> Self {
177+
Self {
178+
inline: [const { MaybeUninit::uninit() }; N],
179+
inline_len: 0,
180+
spill: Vec::new(),
181+
}
182+
}
183+
184+
#[inline]
185+
fn clear(&mut self) {
186+
while self.inline_len != 0 {
187+
self.inline_len -= 1;
188+
unsafe {
189+
self.inline
190+
.get_unchecked_mut(self.inline_len)
191+
.assume_init_drop()
192+
};
193+
}
194+
self.spill.clear();
195+
}
196+
197+
#[inline]
198+
fn len(&self) -> usize {
199+
self.inline_len + self.spill.len()
200+
}
201+
202+
#[inline]
203+
fn is_empty(&self) -> bool {
204+
self.len() == 0
205+
}
206+
}
207+
208+
impl<E: Copy, const N: usize> InlineResultBuffer<E, N> {
209+
#[inline]
210+
fn push(&mut self, value: E) {
211+
if self.spill.is_empty() && self.inline_len < N {
212+
unsafe { self.inline.get_unchecked_mut(self.inline_len) }.write(value);
213+
self.inline_len += 1;
214+
} else {
215+
self.spill.push(value);
216+
}
217+
}
218+
219+
#[inline]
220+
fn get(&self, idx: usize) -> E {
221+
if idx < self.inline_len {
222+
unsafe { self.inline.get_unchecked(idx).assume_init_read() }
223+
} else {
224+
unsafe { *self.spill.get_unchecked(idx - self.inline_len) }
225+
}
226+
}
227+
}
228+
229+
impl<E, const N: usize> Drop for InlineResultBuffer<E, N> {
230+
fn drop(&mut self) {
231+
self.clear();
232+
}
233+
}
234+
235+
impl<O, E, const N: usize> ResultCollection<O, E> for InlineResultBuffer<E, N>
236+
where
237+
O: AxisUnified<Coord = O>,
238+
E: Copy + Ord,
239+
{
240+
#[inline(always)]
241+
fn with_max_qty(_max_qty: usize) -> Self {
242+
Self::new()
243+
}
244+
245+
#[inline(always)]
246+
fn max_qty(&self) -> usize {
247+
usize::MAX
248+
}
249+
250+
#[inline(always)]
251+
fn len(&self) -> usize {
252+
InlineResultBuffer::len(self)
253+
}
254+
255+
#[inline(always)]
256+
fn add(&mut self, entry: E) {
257+
self.push(entry);
258+
}
259+
260+
#[inline(always)]
261+
fn threshold_distance(&self) -> Option<O> {
262+
None
263+
}
264+
265+
#[inline]
266+
fn into_vec(self) -> Vec<E> {
267+
self.into_vec_unsorted()
268+
}
269+
270+
#[inline]
271+
fn into_sorted_vec(self) -> Vec<E> {
272+
let mut result = self.into_vec_unsorted();
273+
result.sort_unstable();
274+
result
275+
}
276+
}
277+
278+
impl<E: Copy, const N: usize> InlineResultBuffer<E, N> {
279+
#[inline]
280+
fn into_vec_unsorted(mut self) -> Vec<E> {
281+
let mut result = Vec::with_capacity(self.len());
282+
for idx in 0..self.inline_len {
283+
result.push(unsafe { self.inline.get_unchecked(idx).assume_init_read() });
284+
}
285+
result.append(&mut self.spill);
286+
self.inline_len = 0;
287+
result
288+
}
289+
}
290+
105291
/// Lazy iterator returned by `within_unsorted_iter`.
292+
///
293+
/// This is the ergonomic streaming API for callers who want to avoid materializing the full
294+
/// result set. It keeps traversal state and per-leaf matches inline in the common case, spilling
295+
/// to heap allocation only if the tree depth or a single leaf's match count exceeds the inline
296+
/// capacities.
106297
pub struct WithinUnsortedIter<'a, Tree, A, T, SS, LS, D, const K: usize, const B: usize>
107298
where
108299
A: AxisUnified<Coord = A> + 'static,
@@ -117,8 +308,12 @@ where
117308
query: [A; K],
118309
query_wide: [D::Output; K],
119310
max_dist: D::Output,
120-
stack: Vec<TraversalFrame<SS, D::Output, K>>,
121-
leaf_results: Vec<NearestNeighbour<D::Output, T>>,
311+
stack:
312+
InlineStack<TraversalFrame<SS, D::Output, K>, WITHIN_UNSORTED_ITER_INLINE_STACK_CAPACITY>,
313+
leaf_results: InlineResultBuffer<
314+
NearestNeighbour<D::Output, T>,
315+
WITHIN_UNSORTED_ITER_INLINE_RESULT_CAPACITY,
316+
>,
122317
leaf_result_pos: usize,
123318
_phantom: std::marker::PhantomData<(T, LS, D)>,
124319
}
@@ -136,7 +331,7 @@ where
136331
{
137332
#[inline]
138333
pub(crate) fn new(tree: &'a Tree, query: &[A; K], max_dist: D::Output) -> Self {
139-
let mut stack = Vec::with_capacity((tree.max_stem_level().max(0) as usize) + 1);
334+
let mut stack = InlineStack::new();
140335
if tree.size() != 0 {
141336
let stems_ptr = NonNull::new(tree.stems().as_ptr() as *mut u8).unwrap();
142337
stack.push(TraversalFrame {
@@ -152,7 +347,7 @@ where
152347
query_wide: query.map(D::widen_coord),
153348
max_dist,
154349
stack,
155-
leaf_results: Vec::new(),
350+
leaf_results: InlineResultBuffer::new(),
156351
leaf_result_pos: 0,
157352
_phantom: std::marker::PhantomData,
158353
}
@@ -212,6 +407,8 @@ where
212407

213408
#[inline]
214409
fn load_next_non_empty_leaf(&mut self) -> bool {
410+
// TODO: specialize this traversal cursor for Donnelly Block SIMD so iterator traversal
411+
// follows the same block-at-once pruning path as the callback/materialized queries.
215412
while let Some(frame) = self.stack.pop() {
216413
if D::Output::cmp(frame.rd, self.max_dist) == std::cmp::Ordering::Greater {
217414
continue;
@@ -287,7 +484,7 @@ where
287484
fn next(&mut self) -> Option<Self::Item> {
288485
loop {
289486
if self.leaf_result_pos < self.leaf_results.len() {
290-
let result = unsafe { *self.leaf_results.get_unchecked(self.leaf_result_pos) };
487+
let result = self.leaf_results.get(self.leaf_result_pos);
291488
self.leaf_result_pos += 1;
292489
return Some(result);
293490
}

src/kd_tree/query/within_unsorted.rs

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,12 @@ where
6060

6161
/// Visits every point within a given distance of the query point, unsorted.
6262
///
63-
/// This avoids allocating a result collection and is the preferred API for
64-
/// high-throughput range-query consumers that can process matches immediately.
63+
/// This is the lowest-overhead streaming range-query API. It runs the normal
64+
/// traversal and optimized leaf kernels, but routes each match directly to
65+
/// `visitor` instead of building a result collection.
66+
///
67+
/// Prefer this over [`within_unsorted_iter`](Self::within_unsorted_iter) when callback
68+
/// style is acceptable and allocation/dispatch overhead matters.
6569
#[inline]
6670
pub fn within_unsorted_visit<D, F>(&self, query: &[A; K], max_dist: D::Output, mut visitor: F)
6771
where
@@ -114,7 +118,14 @@ where
114118
self.nearest_n_within::<D>(query, max_dist, NonZeroUsize::MAX, false)
115119
}
116120

117-
/// Returns an iterator over all points within a given distance, unsorted.
121+
/// Returns a streaming iterator over all points within a given distance, unsorted.
122+
///
123+
/// This avoids materializing the full result set returned by
124+
/// [`within_unsorted`](Self::within_unsorted). The iterator keeps traversal state and
125+
/// per-leaf matches inline in the common case, spilling to heap allocation only if the
126+
/// tree depth or a single leaf's match count exceeds the inline capacities.
127+
///
128+
/// For the absolute lowest overhead, use [`within_unsorted_visit`](Self::within_unsorted_visit).
118129
#[inline]
119130
pub fn within_unsorted_iter<D>(
120131
&self,

0 commit comments

Comments
 (0)