Skip to content

Commit a596bc5

Browse files
author
Antoni Marroig Campomar
committed
[IMP] fs_attachment_s3: Optimize clean up storages using mass deletion
1 parent 4ef067c commit a596bc5

7 files changed

Lines changed: 230 additions & 25 deletions

File tree

fs_attachment_s3/README.rst

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
.. image:: https://odoo-community.org/readme-banner-image
2-
:target: https://odoo-community.org/get-involved?utm_source=readme
3-
:alt: Odoo Community Association
4-
51
================
62
Fs Attachment S3
73
================
@@ -17,7 +13,7 @@ Fs Attachment S3
1713
.. |badge1| image:: https://img.shields.io/badge/maturity-Beta-yellow.png
1814
:target: https://odoo-community.org/page/development-status
1915
:alt: Beta
20-
.. |badge2| image:: https://img.shields.io/badge/license-AGPL--3-blue.png
16+
.. |badge2| image:: https://img.shields.io/badge/licence-AGPL--3-blue.png
2117
:target: http://www.gnu.org/licenses/agpl-3.0-standalone.html
2218
:alt: License: AGPL-3
2319
.. |badge3| image:: https://img.shields.io/badge/github-OCA%2Fstorage-lightgray.png?logo=github
@@ -155,6 +151,9 @@ Contributors
155151

156152
- Laurent Mignon laurent.mignon@acsone.eu (https://www.acsone.eu)
157153
- Stéphane Bidoul stephane.bidoul@acsone.eu (https://www.acsone.eu)
154+
- `APSL-Nagarro <https://apsl.tech>`__:
155+
156+
- Antoni Marroig <amarroig@apsl.net>
158157

159158
Other credits
160159
-------------
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
from . import fs_storage
22
from . import ir_attachment
3+
from . import fs_file_gc
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
# Copyright 2026 APSL-Nagarro Antoni Marroig
2+
# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl).
3+
4+
import gc
5+
import logging
6+
7+
from odoo import models
8+
9+
_logger = logging.getLogger(__name__)
10+
11+
12+
class FsFileGc(models.Model):
13+
_inherit = "fs.file.gc"
14+
15+
# S3 bulk delete limit is 1000 objects per request
16+
_GC_BATCH_SIZE = 1000
17+
18+
def _gc_files_unsafe(self) -> None:
19+
"""
20+
Overrides to optimize S3 storage using bulk deletes and
21+
delegates other storage types to the super() method.
22+
"""
23+
# 1. HEAVY LIFTING: Clean up S3 storages first using mass deletion
24+
self._gc_s3_bulk_delete()
25+
26+
# 2. DELEGATION: Call super() for any remaining storage types
27+
# Since S3 records have already been removed from fs_file_gc,
28+
# super() will only process remaining types (Filestore, SFTP, etc.)
29+
# without hitting timeouts.
30+
return super()._gc_files_unsafe()
31+
32+
def _gc_s3_bulk_delete(self):
33+
"""
34+
Specific logic for S3 using Boto3 Batch.
35+
Filters storages in memory since autovacuum_gc is not a stored field.
36+
"""
37+
# Search for all storages and filter by the computed field in Python
38+
all_storages = self.env["fs.storage"].search([])
39+
# We filter those with autovacuum active and verify it's an S3-based path
40+
storages_to_gc = all_storages.filtered(
41+
lambda s: s.autovacuum_gc and s.is_s3_storage
42+
)
43+
44+
for storage in storages_to_gc:
45+
code = storage.code
46+
47+
try:
48+
# directory_path usually contains the bucket name in S3 configurations
49+
bucket_name = storage.get_directory_path().strip("/")
50+
root_fs = storage._get_root_filesystem()
51+
52+
s3_client = root_fs.s3
53+
54+
if not s3_client or not bucket_name:
55+
_logger.warning(
56+
"GC: Could not retrieve S3 client or Bucket name for %s", code
57+
)
58+
continue
59+
60+
_logger.info(
61+
"GC: Starting optimized S3 cleanup for %s in bucket %s",
62+
code,
63+
bucket_name,
64+
)
65+
66+
while True:
67+
# Select the next batch of orphaned files
68+
self._cr.execute(
69+
"""
70+
SELECT store_fname
71+
FROM fs_file_gc
72+
WHERE fs_storage_code = %s
73+
AND NOT EXISTS (
74+
SELECT 1 FROM ir_attachment
75+
WHERE store_fname = fs_file_gc.store_fname
76+
)
77+
LIMIT %s
78+
""",
79+
(code, self._GC_BATCH_SIZE),
80+
)
81+
82+
rows = self._cr.fetchall()
83+
if not rows:
84+
break
85+
86+
fnames = [row[0] for row in rows]
87+
# Prepare the keys by removing the protocol prefix (e.g., s3://)
88+
objects_to_delete = [{"Key": f.partition("://")[2]} for f in fnames]
89+
90+
try:
91+
# Perform mass deletion (1 HTTP request per 1000 files)
92+
_logger.info(
93+
"GC: Sending bulk delete request to S3 (%s files)",
94+
len(objects_to_delete),
95+
)
96+
s3_client.delete_objects(
97+
Bucket=bucket_name, Delete={"Objects": objects_to_delete}
98+
)
99+
100+
# Mass delete from database
101+
self._cr.execute(
102+
"DELETE FROM fs_file_gc WHERE store_fname = ANY(%s)",
103+
(fnames,),
104+
)
105+
106+
# Commit per batch: releases locks and ensures progress is saved
107+
self._cr.commit() # pylint: disable=invalid-commit
108+
except Exception as e:
109+
_logger.error(
110+
"GC Error: Failed S3 delete_objects for %s: %s", code, e
111+
)
112+
# We break the loop for this storage
113+
# to prevent DB deletion if S3 fails
114+
break
115+
116+
# Explicit memory cleanup
117+
gc.collect()
118+
119+
except Exception as e:
120+
_logger.error(
121+
"GC Error: Could not initialize S3 storage %s: %s", code, e
122+
)
123+
continue
124+
125+
_logger.info("GC: Optimized S3 cleanup process finished.")
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
- Laurent Mignon <laurent.mignon@acsone.eu> (https://www.acsone.eu)
22
- Stéphane Bidoul <stephane.bidoul@acsone.eu> (https://www.acsone.eu)
3+
- [APSL-Nagarro](https://apsl.tech):
4+
- Antoni Marroig \<<amarroig@apsl.net>\>

fs_attachment_s3/static/description/index.html

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<head>
44
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
55
<meta name="generator" content="Docutils: https://docutils.sourceforge.io/" />
6-
<title>README.rst</title>
6+
<title>Fs Attachment S3</title>
77
<style type="text/css">
88

99
/*
@@ -360,21 +360,16 @@
360360
</style>
361361
</head>
362362
<body>
363-
<div class="document">
363+
<div class="document" id="fs-attachment-s3">
364+
<h1 class="title">Fs Attachment S3</h1>
364365

365-
366-
<a class="reference external image-reference" href="https://odoo-community.org/get-involved?utm_source=readme">
367-
<img alt="Odoo Community Association" src="https://odoo-community.org/readme-banner-image" />
368-
</a>
369-
<div class="section" id="fs-attachment-s3">
370-
<h1>Fs Attachment S3</h1>
371366
<!-- !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
372367
!! This file is generated by oca-gen-addon-readme !!
373368
!! changes will be overwritten. !!
374369
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
375370
!! source digest: sha256:57a067d105c10ab10e37d943357bf9f6bc3c02a5a5bfd77737fd95c2c5b0ffa6
376371
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -->
377-
<p><a class="reference external image-reference" href="https://odoo-community.org/page/development-status"><img alt="Beta" src="https://img.shields.io/badge/maturity-Beta-yellow.png" /></a> <a class="reference external image-reference" href="http://www.gnu.org/licenses/agpl-3.0-standalone.html"><img alt="License: AGPL-3" src="https://img.shields.io/badge/license-AGPL--3-blue.png" /></a> <a class="reference external image-reference" href="https://github.com/OCA/storage/tree/17.0/fs_attachment_s3"><img alt="OCA/storage" src="https://img.shields.io/badge/github-OCA%2Fstorage-lightgray.png?logo=github" /></a> <a class="reference external image-reference" href="https://translation.odoo-community.org/projects/storage-17-0/storage-17-0-fs_attachment_s3"><img alt="Translate me on Weblate" src="https://img.shields.io/badge/weblate-Translate%20me-F47D42.png" /></a> <a class="reference external image-reference" href="https://runboat.odoo-community.org/builds?repo=OCA/storage&amp;target_branch=17.0"><img alt="Try me on Runboat" src="https://img.shields.io/badge/runboat-Try%20me-875A7B.png" /></a></p>
372+
<p><a class="reference external image-reference" href="https://odoo-community.org/page/development-status"><img alt="Beta" src="https://img.shields.io/badge/maturity-Beta-yellow.png" /></a> <a class="reference external image-reference" href="http://www.gnu.org/licenses/agpl-3.0-standalone.html"><img alt="License: AGPL-3" src="https://img.shields.io/badge/licence-AGPL--3-blue.png" /></a> <a class="reference external image-reference" href="https://github.com/OCA/storage/tree/17.0/fs_attachment_s3"><img alt="OCA/storage" src="https://img.shields.io/badge/github-OCA%2Fstorage-lightgray.png?logo=github" /></a> <a class="reference external image-reference" href="https://translation.odoo-community.org/projects/storage-17-0/storage-17-0-fs_attachment_s3"><img alt="Translate me on Weblate" src="https://img.shields.io/badge/weblate-Translate%20me-F47D42.png" /></a> <a class="reference external image-reference" href="https://runboat.odoo-community.org/builds?repo=OCA/storage&amp;target_branch=17.0"><img alt="Try me on Runboat" src="https://img.shields.io/badge/runboat-Try%20me-875A7B.png" /></a></p>
378373
<p>This module extends the functionality of
379374
<a class="reference external" href="https://github.com/OCA/storage/tree/16.0/fs_attachment">fs_attachment</a>
380375
to better support Amazon S3 storage. It includes features such as:</p>
@@ -407,7 +402,7 @@ <h1>Fs Attachment S3</h1>
407402
</ul>
408403
</div>
409404
<div class="section" id="configuration">
410-
<h2><a class="toc-backref" href="#toc-entry-1">Configuration</a></h2>
405+
<h1><a class="toc-backref" href="#toc-entry-1">Configuration</a></h1>
411406
<p>On the Odoo instance, go to <em>Settings</em> &gt; <em>Technical</em> &gt; <em>Storage</em> &gt; <em>File
412407
Storage</em>.</p>
413408
<p>When you create a new storage for s3 or modify an existing one, when you
@@ -472,11 +467,11 @@ <h2><a class="toc-backref" href="#toc-entry-1">Configuration</a></h2>
472467
directory name as bucket name, and the file path.</p>
473468
</div>
474469
<div class="section" id="changelog">
475-
<h2><a class="toc-backref" href="#toc-entry-2">Changelog</a></h2>
470+
<h1><a class="toc-backref" href="#toc-entry-2">Changelog</a></h1>
476471
<div class="section" id="section-1">
477-
<h3><a class="toc-backref" href="#toc-entry-3">17.0.1.2.0 (2025-10-22)</a></h3>
472+
<h2><a class="toc-backref" href="#toc-entry-3">17.0.1.2.0 (2025-10-22)</a></h2>
478473
<div class="section" id="features">
479-
<h4><a class="toc-backref" href="#toc-entry-4">Features</a></h4>
474+
<h3><a class="toc-backref" href="#toc-entry-4">Features</a></h3>
480475
<ul class="simple">
481476
<li>Adapt to handle {db_name} in directory_path.
482477
(<a class="reference external" href="https://github.com/OCA/storage/issues/db_name">#db_name</a>)</li>
@@ -485,38 +480,42 @@ <h4><a class="toc-backref" href="#toc-entry-4">Features</a></h4>
485480
</div>
486481
</div>
487482
<div class="section" id="bug-tracker">
488-
<h2><a class="toc-backref" href="#toc-entry-5">Bug Tracker</a></h2>
483+
<h1><a class="toc-backref" href="#toc-entry-5">Bug Tracker</a></h1>
489484
<p>Bugs are tracked on <a class="reference external" href="https://github.com/OCA/storage/issues">GitHub Issues</a>.
490485
In case of trouble, please check there if your issue has already been reported.
491486
If you spotted it first, help us to smash it by providing a detailed and welcomed
492487
<a class="reference external" href="https://github.com/OCA/storage/issues/new?body=module:%20fs_attachment_s3%0Aversion:%2017.0%0A%0A**Steps%20to%20reproduce**%0A-%20...%0A%0A**Current%20behavior**%0A%0A**Expected%20behavior**">feedback</a>.</p>
493488
<p>Do not contact contributors directly about support or help with technical issues.</p>
494489
</div>
495490
<div class="section" id="credits">
496-
<h2><a class="toc-backref" href="#toc-entry-6">Credits</a></h2>
491+
<h1><a class="toc-backref" href="#toc-entry-6">Credits</a></h1>
497492
<div class="section" id="authors">
498-
<h3><a class="toc-backref" href="#toc-entry-7">Authors</a></h3>
493+
<h2><a class="toc-backref" href="#toc-entry-7">Authors</a></h2>
499494
<ul class="simple">
500495
<li>ACSONE SA/NV</li>
501496
</ul>
502497
</div>
503498
<div class="section" id="contributors">
504-
<h3><a class="toc-backref" href="#toc-entry-8">Contributors</a></h3>
499+
<h2><a class="toc-backref" href="#toc-entry-8">Contributors</a></h2>
505500
<ul class="simple">
506501
<li>Laurent Mignon <a class="reference external" href="mailto:laurent.mignon&#64;acsone.eu">laurent.mignon&#64;acsone.eu</a> (<a class="reference external" href="https://www.acsone.eu">https://www.acsone.eu</a>)</li>
507502
<li>Stéphane Bidoul <a class="reference external" href="mailto:stephane.bidoul&#64;acsone.eu">stephane.bidoul&#64;acsone.eu</a> (<a class="reference external" href="https://www.acsone.eu">https://www.acsone.eu</a>)</li>
503+
<li><a class="reference external" href="https://apsl.tech">APSL-Nagarro</a>:<ul>
504+
<li>Antoni Marroig &lt;<a class="reference external" href="mailto:amarroig&#64;apsl.net">amarroig&#64;apsl.net</a>&gt;</li>
505+
</ul>
506+
</li>
508507
</ul>
509508
</div>
510509
<div class="section" id="other-credits">
511-
<h3><a class="toc-backref" href="#toc-entry-9">Other credits</a></h3>
510+
<h2><a class="toc-backref" href="#toc-entry-9">Other credits</a></h2>
512511
<p>The development of this module has been financially supported by:</p>
513512
<ul class="simple">
514513
<li>ACSONE SA/NV (<a class="reference external" href="https://www.acsone.eu">https://www.acsone.eu</a>)</li>
515514
<li>Alcyon Belux</li>
516515
</ul>
517516
</div>
518517
<div class="section" id="maintainers">
519-
<h3><a class="toc-backref" href="#toc-entry-10">Maintainers</a></h3>
518+
<h2><a class="toc-backref" href="#toc-entry-10">Maintainers</a></h2>
520519
<p>This module is maintained by the OCA.</p>
521520
<a class="reference external image-reference" href="https://odoo-community.org">
522521
<img alt="Odoo Community Association" src="https://odoo-community.org/logo.png" />
@@ -531,6 +530,5 @@ <h3><a class="toc-backref" href="#toc-entry-10">Maintainers</a></h3>
531530
</div>
532531
</div>
533532
</div>
534-
</div>
535533
</body>
536534
</html>

fs_attachment_s3/tests/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
from . import test_fs_attachment_s3
2+
from . import test_fs_file_gc
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# Copyright 2026 APSL-Nagarro Antoni Marroig
2+
# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl).
3+
4+
from types import SimpleNamespace
5+
from unittest.mock import MagicMock, patch
6+
7+
from .common import TestFSAttachmentS3Common
8+
9+
10+
class TestFsFileGcS3(TestFSAttachmentS3Common):
11+
def setUp(self):
12+
super().setUp()
13+
self.gc_file_model = self.env["fs.file.gc"]
14+
15+
def _mark_for_gc(self, *store_fnames):
16+
for store_fname in store_fnames:
17+
self.gc_file_model._mark_for_gc(store_fname)
18+
19+
def test_gc_s3_bulk_delete_removes_orphaned_files(self):
20+
orphan_1 = "s3tst://dir/sub/orphan_1.txt"
21+
orphan_2 = "s3tst://dir/sub/orphan_2.txt"
22+
referenced = self.fake_attachment_s3.store_fname
23+
self._mark_for_gc(orphan_1, orphan_2, referenced)
24+
25+
s3_client = MagicMock()
26+
root_fs = SimpleNamespace(s3=s3_client)
27+
28+
storage_class = type(self.s3_backend)
29+
with (
30+
patch.object(
31+
storage_class,
32+
"is_s3_storage",
33+
new=property(lambda storage: storage.code == self.s3_backend.code),
34+
),
35+
patch.object(storage_class, "_get_root_filesystem", return_value=root_fs),
36+
patch.object(type(self.env.cr), "commit", return_value=None),
37+
):
38+
self.gc_file_model._gc_s3_bulk_delete()
39+
40+
s3_client.delete_objects.assert_called_once()
41+
_, kwargs = s3_client.delete_objects.call_args
42+
self.assertEqual(kwargs["Bucket"], "test-bucket")
43+
self.assertCountEqual(
44+
kwargs["Delete"]["Objects"],
45+
[
46+
{"Key": "dir/sub/orphan_1.txt"},
47+
{"Key": "dir/sub/orphan_2.txt"},
48+
],
49+
)
50+
remaining_files = self.gc_file_model.search(
51+
[("store_fname", "in", [orphan_1, orphan_2, referenced])]
52+
).mapped("store_fname")
53+
self.assertNotIn(orphan_1, remaining_files)
54+
self.assertNotIn(orphan_2, remaining_files)
55+
self.assertIn(referenced, remaining_files)
56+
57+
def test_gc_s3_bulk_delete_keeps_rows_when_s3_delete_fails(self):
58+
orphan = "s3tst://dir/sub/orphan.txt"
59+
self._mark_for_gc(orphan)
60+
61+
s3_client = MagicMock()
62+
s3_client.delete_objects.side_effect = Exception("S3 is unavailable")
63+
root_fs = SimpleNamespace(s3=s3_client)
64+
65+
storage_class = type(self.s3_backend)
66+
with (
67+
patch.object(
68+
storage_class,
69+
"is_s3_storage",
70+
new=property(lambda storage: storage.code == self.s3_backend.code),
71+
),
72+
patch.object(storage_class, "_get_root_filesystem", return_value=root_fs),
73+
patch.object(type(self.env.cr), "commit", return_value=None),
74+
):
75+
self.gc_file_model._gc_s3_bulk_delete()
76+
77+
self.assertTrue(
78+
self.gc_file_model.search_count([("store_fname", "=", orphan)])
79+
)

0 commit comments

Comments
 (0)