Skip to content

Commit c253f60

Browse files
committed
feat: add global registry
- This ensures only a single instance of a db (with is connection pools) can exist in the process - NOTE TO REVIEWERS: The caching behavior will be tested from the outside once the `database.rs` tests are committed.
1 parent 8c3818a commit c253f60

2 files changed

Lines changed: 167 additions & 3 deletions

File tree

crates/sqlx-sqlite-conn-mgr/src/lib.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
//!
2020
//! ## Usage
2121
//!
22-
//! ```no_run
22+
//! // TODO: Remove this ignore once implementation is complete
23+
//! ```ignore
2324
//! use sqlx_sqlite_conn_mgr::SqliteDatabase;
2425
//! use std::sync::Arc;
2526
//!
@@ -59,13 +60,13 @@
5960
//! - Global registry caches new database instances (with their pools) and returns existing ones
6061
//! - WAL mode is enabled lazily only when writes are needed
6162
//!
62-
// TODO: Remove these allows once implementation is complete
63-
#![allow(dead_code)]
63+
// TODO: Remove this allow once implementation is complete
6464
#![allow(unused)]
6565

6666
mod config;
6767
mod database;
6868
mod error;
69+
mod registry;
6970
mod write_guard;
7071

7172
// Re-export public types
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
//! Global database registry to cache new database instances and return existing ones
2+
3+
use crate::Result;
4+
use crate::database::SqliteDatabase;
5+
use std::collections::HashMap;
6+
use std::future::Future;
7+
use std::path::{Path, PathBuf};
8+
use std::sync::{Arc, OnceLock, Weak};
9+
use tokio::sync::RwLock;
10+
11+
/// Global registry for SQLite databases
12+
static DATABASE_REGISTRY: OnceLock<RwLock<HashMap<PathBuf, Weak<SqliteDatabase>>>> =
13+
OnceLock::new();
14+
15+
fn registry() -> &'static RwLock<HashMap<PathBuf, Weak<SqliteDatabase>>> {
16+
DATABASE_REGISTRY.get_or_init(|| RwLock::new(HashMap::new()))
17+
}
18+
19+
/// Get or open a SQLite database connection
20+
///
21+
/// If a database is already connected, returns the cached instance.
22+
/// Otherwise, calls the provided factory function to create a new connection.
23+
///
24+
/// Special case: `:memory:` databases should not be cached (each is unique)
25+
pub async fn get_or_open_database<F, Fut>(path: &Path, factory: F) -> Result<Arc<SqliteDatabase>>
26+
where
27+
F: FnOnce() -> Fut,
28+
Fut: Future<Output = Result<SqliteDatabase>>,
29+
{
30+
let path_str = path.to_str().unwrap_or("");
31+
32+
// Skip registry for in-memory databases - always create new
33+
if path_str == ":memory:" || path_str.starts_with("file::memory:") {
34+
let db = factory().await?;
35+
return Ok(Arc::new(db));
36+
}
37+
38+
// Canonicalize the path for consistent lookups
39+
let canonical_path = canonicalize_path(path)?;
40+
41+
// Try to get existing database with read lock (allows concurrent reads)
42+
{
43+
let registry = registry().read().await;
44+
45+
if let Some(weak) = registry.get(&canonical_path) {
46+
if let Some(db) = weak.upgrade() {
47+
return Ok(db);
48+
}
49+
// Weak reference exists but dead - will be cleaned up in write phase
50+
}
51+
}
52+
53+
// Phase 2: Database not found, acquire write lock
54+
let mut registry = registry().write().await;
55+
56+
// Double-check: another thread might have created it while we waited for write lock
57+
if let Some(weak) = registry.get(&canonical_path) {
58+
if let Some(db) = weak.upgrade() {
59+
return Ok(db);
60+
}
61+
}
62+
63+
// Clean up dead weak references while we have the write lock
64+
registry.retain(|_, weak| weak.strong_count() > 0);
65+
66+
// Now we're sure the database doesn't exist - create it while holding the lock
67+
// This prevents race conditions
68+
let db = factory().await?;
69+
let arc_db = Arc::new(db);
70+
71+
// Cache the new database
72+
registry.insert(canonical_path, Arc::downgrade(&arc_db));
73+
74+
Ok(arc_db)
75+
}
76+
77+
/// Helper to canonicalize a database path
78+
///
79+
/// This function attempts to resolve paths to their canonical form to ensure
80+
/// consistent cache lookups. It handles:
81+
/// - Absolute path resolution
82+
/// - Symlink resolution (when file exists)
83+
/// - Parent directory canonicalization (when file doesn't exist yet)
84+
///
85+
/// Known limitations when file doesn't exist:
86+
/// - Case sensitivity: On case-insensitive filesystems (macOS, Windows), paths
87+
/// differing only in case will be treated as different until the file is created.
88+
/// This could lead to multiple connection pools for the same logical database, at
89+
/// least until the file is created and can be canonicalized properly.
90+
/// - Symlinks in filename: If the filename itself will be a symlink (rare for SQLite),
91+
/// different symlink names won't be resolved until the file exists.
92+
fn canonicalize_path(path: &Path) -> std::io::Result<PathBuf> {
93+
match path.canonicalize() {
94+
Ok(p) => Ok(p),
95+
Err(_) => {
96+
// If path doesn't exist, try to canonicalize parent + filename
97+
let parent = path.parent().unwrap_or_else(|| Path::new("."));
98+
let filename = path
99+
.file_name()
100+
.ok_or_else(|| std::io::Error::new(std::io::ErrorKind::InvalidInput, "Invalid path"))?;
101+
let canonical_parent = parent.canonicalize()?;
102+
103+
// Note: We preserve the filename case as provided. On case-insensitive
104+
// filesystems, this means "MyDB.db" and "mydb.db" will create separate
105+
// cache entries until the file exists and can be canonicalized properly.
106+
// This is a known limitation but acceptable since:
107+
// 1. Most apps use consistent casing
108+
// 2. After first connection creates the file, subsequent connects will
109+
// use the canonical (on-disk) case
110+
Ok(canonical_parent.join(filename))
111+
}
112+
}
113+
}
114+
115+
/// Remove a database from the cache
116+
///
117+
/// Special case: `:memory:` databases are never in the registry
118+
///
119+
/// Returns an error if the path cannot be canonicalized
120+
pub async fn uncache_database(path: &Path) -> std::io::Result<()> {
121+
// Skip registry for in-memory databases
122+
let path_str = path.to_str().unwrap_or("");
123+
if path_str == ":memory:" || path_str.starts_with("file::memory:") {
124+
return Ok(());
125+
}
126+
127+
// Canonicalize path
128+
let canonical_path = canonicalize_path(path)?;
129+
130+
let mut registry = registry().write().await;
131+
registry.remove(&canonical_path);
132+
Ok(())
133+
}
134+
135+
#[cfg(test)]
136+
mod tests {
137+
use super::*;
138+
139+
#[test]
140+
fn test_canonicalize_path() {
141+
let temp_dir = std::env::temp_dir();
142+
let test_path = temp_dir.join("test.db");
143+
144+
// Test that path is canonicalized to absolute path
145+
let canonical = canonicalize_path(&test_path).unwrap();
146+
assert!(canonical.is_absolute());
147+
148+
// Test relative path
149+
let relative_path = Path::new("./test_relative.db");
150+
let canonical_relative = canonicalize_path(relative_path).unwrap();
151+
assert!(canonical_relative.is_absolute());
152+
}
153+
154+
#[test]
155+
fn test_canonicalize_nonexistent_path() {
156+
let temp_dir = std::env::temp_dir();
157+
let nonexistent = temp_dir.join("nonexistent_dir").join("test.db");
158+
159+
// Should fail if parent directory doesn't exist
160+
let result = canonicalize_path(&nonexistent);
161+
assert!(result.is_err());
162+
}
163+
}

0 commit comments

Comments
 (0)