Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions components/remote_settings/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,11 @@ impl<C: ApiClient> RemoteSettingsClient<C> {
Ok(())
}

pub fn run_maintenance(&self) -> Result<()> {
let mut inner = self.lock_inner()?;
inner.storage.run_maintenance()
}

pub fn reset_storage(&self) -> Result<()> {
trace!("{0}: reset local storage.", self.collection_name);
let mut inner = self.lock_inner()?;
Expand Down
208 changes: 207 additions & 1 deletion components/remote_settings/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ impl RemoteSettingsService {
.collect();
let bucket_name = inner.bucket_name.clone();

for client in inner.active_clients() {
let active_clients = inner.active_clients();
for client in &active_clients {
let client = &client.internal;
let collection_name = client.collection_name();
if let Some(client_last_modified) = client.get_last_modified_timestamp()? {
Expand All @@ -113,6 +114,19 @@ impl RemoteSettingsService {
client.sync()?;
}
}

// Run SQLite maintenance after sync so SQLite can reclaim pages freed by
// attachment cleanup and enable/use incremental auto-vacuum.
for client in &active_clients {
let client = &client.internal;
let collection_name = client.collection_name();

if synced_collections.contains(collection_name) {
trace!("running maintenance for collection: {collection_name}");
client.run_maintenance()?;
}
}

Ok(synced_collections.into_iter().collect())
}

Expand Down Expand Up @@ -212,3 +226,195 @@ struct ChangesCollection {
bucket: String,
last_modified: u64,
}

#[cfg(all(test, not(feature = "signatures")))]
mod test {
use super::*;
use mockito::{mock, Matcher};

#[cfg(not(feature = "signatures"))]
#[test]
fn test_sync_maintenance_shrinks_db_after_attachment_cleanup() -> Result<()> {
use crate::RemoteSettingsRecord;
use sha2::Digest;
viaduct_dev::init_backend_dev();

let collection = "cid";
let temp_dir = tempfile::tempdir().expect("create temp dir");
let db_path = temp_dir.path().join(format!("{collection}.sql"));

let attachment_data = vec![0x41; 5 * 1024 * 1024];
let attachment_hash = format!("{:x}", sha2::Sha256::digest(&attachment_data));

let attachment_record = format!(
r#"{{
"id": "record-with-attachment",
"last_modified": 100,
"attachment": {{
"filename": "big.bin",
"mimetype": "application/octet-stream",
"location": "attachments/big.bin",
"hash": "{attachment_hash}",
"size": {}
}}
}}"#,
attachment_data.len()
);

// First sync creates a record that references the big attachment.
let _changes_1 = mock("GET", "/v1/buckets/monitor/collections/changes/changeset")
.match_query(Matcher::Any)
.with_status(200)
.with_header("content-type", "application/json")
.with_body(format!(
r#"{{
"timestamp": 100,
"changes": [
{{"collection": "{collection}", "bucket": "main", "last_modified": 100}}
]
}}"#
))
.create();

let _changeset_1 = mock(
"GET",
format!("/v1/buckets/main/collections/{collection}/changeset").as_str(),
)
.match_query(Matcher::Any)
.with_status(200)
.with_header("content-type", "application/json")
.with_body(format!(
r#"{{
"changes": [{attachment_record}],
"timestamp": 100,
"metadata": {{"bucket": "main", "signatures": []}}
}}"#
))
.create();

let service = RemoteSettingsService::new(
temp_dir.path().to_string_lossy().to_string(),
RemoteSettingsConfig2 {
server: Some(RemoteSettingsServer::Custom {
url: mockito::server_url(),
}),
..Default::default()
},
);

let client = service.make_client(collection.into());

service.sync()?;

// Mock attachment discovery and download.
let _root = mock("GET", "/v1/")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(format!(
r#"{{
"capabilities": {{
"attachments": {{
"base_url": "{}/"
}}
}}
}}"#,
mockito::server_url()
))
.create();

// Path matches `location: "attachments/big"` joined against the base URL above.
let _attachment = mock("GET", "/attachments/big")
.with_status(200)
.with_body(attachment_data.clone())
.create();

// Store the large attachment so the DB becomes bloated.
client.internal.get_attachment(&RemoteSettingsRecord {
id: "record-with-attachment".to_string(),
last_modified: 100,
deleted: false,
attachment: Some(crate::Attachment {
filename: "big".to_string(),
mimetype: "application/octet-stream".to_string(),
location: "attachments/big".to_string(),
hash: attachment_hash.clone(),
size: attachment_data.len() as u64,
}),
fields: serde_json::Map::new(),
})?;

let size_with_attachment = std::fs::metadata(&db_path)
.expect("db exists after first sync")
.len();

assert!(
size_with_attachment > 4 * 1024 * 1024,
"DB should contain the large attachment; size={size_with_attachment}"
);

// Drop first-sync mocks explicitly so mockito doesn't re-match the second sync's
// changeset request against them. Mockito matches by registration order, so leftover
// mocks for the same URL would shadow the second-sync mocks.
drop(_changes_1);
drop(_changeset_1);

// Second sync tombstones the record. This deletes the attachment row, and
// post-sync maintenance should compact the database.
let _changes_2 = mock("GET", "/v1/buckets/monitor/collections/changes/changeset")
.match_query(Matcher::Any)
.with_status(200)
.with_header("content-type", "application/json")
.with_body(format!(
r#"{{
"timestamp": 200,
"changes": [
{{"collection": "{collection}", "bucket": "main", "last_modified": 200}}
]
}}"#
))
.create();

let _changeset_2 = mock(
"GET",
format!("/v1/buckets/main/collections/{collection}/changeset").as_str(),
)
.match_query(Matcher::Any)
.with_status(200)
.with_header("content-type", "application/json")
.with_body(
r#"{
"changes": [
{
"id": "record-with-attachment",
"last_modified": 200,
"deleted": true
}
],
"timestamp": 200,
"metadata": {"bucket": "main", "signatures": []}
}"#,
)
.create();

service.sync()?;

let size_after_cleanup_and_maintenance = std::fs::metadata(&db_path)
.expect("db exists after second sync")
.len();

assert!(
size_after_cleanup_and_maintenance < size_with_attachment,
"maintenance should reclaim at least some space after deleting attachment; before={size_with_attachment}, after={size_after_cleanup_and_maintenance}"
);

// Sanity-check that maintenance enabled incremental auto-vacuum.
let conn = rusqlite::Connection::open(&db_path).expect("open collection db");
let auto_vacuum: u32 = conn
.query_row("PRAGMA auto_vacuum", [], |row| row.get(0))
.expect("query auto_vacuum");

assert_eq!(auto_vacuum, 2);

Ok(())
}
}
10 changes: 9 additions & 1 deletion components/remote_settings/src/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use serde_json;
use sha2::{Digest, Sha256};
use std::io;

use sql_support::{open_database::open_database_with_flags, ConnExt};
use sql_support::{open_database::open_database_with_flags, run_maintenance, ConnExt};

/// Internal storage type
///
Expand Down Expand Up @@ -344,6 +344,14 @@ impl Storage {
)?;
Ok(())
}

pub fn run_maintenance(&mut self) -> Result<()> {
if let ConnectionCell::Initialized(conn) = &self.conn {
run_maintenance(conn)?;
}

Ok(())
}
}

/// Stores the SQLite connection, which is lazily constructed and can be closed/shutdown.
Expand Down