Skip to content

Commit 16f2cca

Browse files
vinayakphegdekgeisz
authored andcommitted
HBASE-29261: Investigate flaw in backup deletion validation of PITR-critical backups and propose correct approach (apache#6922)
* improve the logic of backup deletion validation of PITR-critical backups * add new tests
1 parent ec00c4e commit 16f2cca

3 files changed

Lines changed: 292 additions & 156 deletions

File tree

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java

Lines changed: 93 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,9 @@
5656
import java.util.ArrayList;
5757
import java.util.List;
5858
import java.util.Map;
59+
import java.util.Optional;
5960
import java.util.Set;
6061
import java.util.concurrent.TimeUnit;
61-
import org.agrona.collections.MutableLong;
6262
import org.apache.commons.lang3.StringUtils;
6363
import org.apache.hadoop.conf.Configuration;
6464
import org.apache.hadoop.conf.Configured;
@@ -745,19 +745,20 @@ private void validatePITRBackupDeletion(String[] backupIds, boolean isForceDelet
745745
}
746746

747747
/**
748-
* Identifies tables that rely on the specified backup for PITR. If a table has no other valid
749-
* FULL backups that can facilitate recovery to all points within the PITR retention window, it
750-
* is added to the dependent list.
751-
* @param backupId The backup ID being evaluated.
752-
* @return List of tables dependent on the specified backup for PITR.
753-
* @throws IOException If backup metadata cannot be retrieved.
748+
* Identifies tables that rely on the specified backup for PITR (Point-In-Time Recovery). A
749+
* table is considered dependent on the backup if it does not have any other valid full backups
750+
* that can cover the PITR window enabled by the specified backup.
751+
* @param backupId The ID of the backup being evaluated for PITR coverage.
752+
* @return A list of tables that are dependent on the specified backup for PITR recovery.
753+
* @throws IOException If there is an error retrieving the backup metadata or backup system
754+
* table.
754755
*/
755756
private List<TableName> getTablesDependentOnBackupForPITR(String backupId) throws IOException {
756757
List<TableName> dependentTables = new ArrayList<>();
757758

758759
try (final BackupSystemTable backupSystemTable = new BackupSystemTable(conn)) {
760+
// Fetch the target backup's info using the backup ID
759761
BackupInfo targetBackup = backupSystemTable.readBackupInfo(backupId);
760-
761762
if (targetBackup == null) {
762763
throw new IOException("Backup info not found for backupId: " + backupId);
763764
}
@@ -767,104 +768,121 @@ private List<TableName> getTablesDependentOnBackupForPITR(String backupId) throw
767768
return List.of();
768769
}
769770

770-
// Retrieve the tables with continuous backup enabled and their start times
771+
// Retrieve the tables with continuous backup enabled along with their start times
771772
Map<TableName, Long> continuousBackupStartTimes =
772773
backupSystemTable.getContinuousBackupTableSet();
773774

774-
// Determine the PITR time window
775+
// Calculate the PITR window by fetching configuration and current time
775776
long pitrWindowDays = getConf().getLong(CONF_CONTINUOUS_BACKUP_PITR_WINDOW_DAYS,
776777
DEFAULT_CONTINUOUS_BACKUP_PITR_WINDOW_DAYS);
777778
long currentTime = EnvironmentEdgeManager.getDelegate().currentTime();
778-
final MutableLong pitrMaxStartTime =
779-
new MutableLong(currentTime - TimeUnit.DAYS.toMillis(pitrWindowDays));
779+
final long maxAllowedPITRTime = currentTime - TimeUnit.DAYS.toMillis(pitrWindowDays);
780780

781-
// For all tables, determine the earliest (minimum) continuous backup start time.
782-
// This represents the actual earliest point-in-time recovery (PITR) timestamp
783-
// that can be used, ensuring we do not go beyond the available backup data.
784-
long minContinuousBackupStartTime = currentTime;
781+
// Check each table associated with the target backup
785782
for (TableName table : targetBackup.getTableNames()) {
786-
minContinuousBackupStartTime = Math.min(minContinuousBackupStartTime,
787-
continuousBackupStartTimes.getOrDefault(table, currentTime));
788-
}
789-
790-
// The PITR max start time should be the maximum of the calculated minimum continuous backup
791-
// start time and the default PITR max start time (based on the configured window).
792-
// This ensures that PITR does not extend beyond what is practically possible.
793-
pitrMaxStartTime.set(Math.max(minContinuousBackupStartTime, pitrMaxStartTime.longValue()));
794-
795-
for (TableName table : targetBackup.getTableNames()) {
796-
// This backup is not necessary for this table since it doesn't have PITR enabled
783+
// Skip tables without continuous backup enabled
797784
if (!continuousBackupStartTimes.containsKey(table)) {
798785
continue;
799786
}
800-
if (
801-
!isValidPITRBackup(targetBackup, table, continuousBackupStartTimes,
802-
pitrMaxStartTime.longValue())
803-
) {
804-
continue; // This backup is not crucial for PITR of this table
787+
788+
// Calculate the PITR window this backup covers for the table
789+
Optional<Pair<Long, Long>> coveredPitrWindow = getCoveredPitrWindowForTable(targetBackup,
790+
continuousBackupStartTimes.get(table), maxAllowedPITRTime, currentTime);
791+
792+
// If this backup does not cover a valid PITR window for the table, skip
793+
if (coveredPitrWindow.isEmpty()) {
794+
continue;
805795
}
806796

807-
// Check if another valid full backup exists for this table
808-
List<BackupInfo> backupHistory = backupSystemTable.getBackupInfos(BackupState.COMPLETE);
809-
boolean hasAnotherValidBackup = backupHistory.stream()
810-
.anyMatch(backup -> !backup.getBackupId().equals(backupId) && isValidPITRBackup(backup,
811-
table, continuousBackupStartTimes, pitrMaxStartTime.longValue()));
797+
// Check if there is any other valid backup that can cover the PITR window
798+
List<BackupInfo> allBackups = backupSystemTable.getBackupInfos(BackupState.COMPLETE);
799+
boolean hasAnotherValidBackup =
800+
canAnyOtherBackupCover(allBackups, targetBackup, table, coveredPitrWindow.get(),
801+
continuousBackupStartTimes.get(table), maxAllowedPITRTime, currentTime);
812802

803+
// If no other valid backup exists, add the table to the dependent list
813804
if (!hasAnotherValidBackup) {
814805
dependentTables.add(table);
815806
}
816807
}
817808
}
809+
818810
return dependentTables;
819811
}
820812

821813
/**
822-
* Determines if a given backup is a valid candidate for Point-In-Time Recovery (PITR) for a
823-
* specific table. A valid backup ensures that recovery is possible to any point within the PITR
824-
* retention window. A backup qualifies if:
825-
* <ul>
826-
* <li>It is a FULL backup.</li>
827-
* <li>It contains the specified table.</li>
828-
* <li>Its completion timestamp is before the PITR retention window start time.</li>
829-
* <li>Its completion timestamp is on or after the table’s continuous backup start time.</li>
830-
* </ul>
831-
* @param backupInfo The backup information being evaluated.
832-
* @param tableName The table for which PITR validity is being checked.
833-
* @param continuousBackupTables A map of tables to their continuous backup start time.
834-
* @param pitrMaxStartTime The maximum allowed start timestamp for PITR eligibility.
835-
* @return {@code true} if the backup enables recovery to all valid points in time for the
836-
* table; {@code false} otherwise.
814+
* Calculates the PITR (Point-In-Time Recovery) window that the given backup enables for a
815+
* table.
816+
* @param backupInfo Metadata of the backup being evaluated.
817+
* @param continuousBackupStartTime When continuous backups started for the table.
818+
* @param maxAllowedPITRTime The earliest timestamp from which PITR is supported in the
819+
* cluster.
820+
* @param currentTime Current time.
821+
* @return Optional PITR window as a pair (start, end), or empty if backup is not useful for
822+
* PITR.
837823
*/
838-
private boolean isValidPITRBackup(BackupInfo backupInfo, TableName tableName,
839-
Map<TableName, Long> continuousBackupTables, long pitrMaxStartTime) {
840-
// Only FULL backups are mandatory for PITR
841-
if (!BackupType.FULL.equals(backupInfo.getType())) {
842-
return false;
843-
}
824+
private Optional<Pair<Long, Long>> getCoveredPitrWindowForTable(BackupInfo backupInfo,
825+
long continuousBackupStartTime, long maxAllowedPITRTime, long currentTime) {
844826

845-
// The backup must include the table to be relevant for PITR
846-
if (!backupInfo.getTableNames().contains(tableName)) {
847-
return false;
848-
}
827+
long backupStartTs = backupInfo.getStartTs();
828+
long backupEndTs = backupInfo.getCompleteTs();
829+
long effectiveStart = Math.max(continuousBackupStartTime, maxAllowedPITRTime);
849830

850-
// The backup must have been completed before the PITR retention window starts,
851-
// otherwise, it won't be helpful in cases where the recovery point is between
852-
// pitrMaxStartTime and the backup completion time.
853-
if (backupInfo.getCompleteTs() > pitrMaxStartTime) {
854-
return false;
831+
if (backupStartTs < continuousBackupStartTime) {
832+
return Optional.empty();
855833
}
856834

857-
// Retrieve the table's continuous backup start time
858-
long continuousBackupStartTime = continuousBackupTables.getOrDefault(tableName, 0L);
835+
return Optional.of(Pair.newPair(Math.max(backupEndTs, effectiveStart), currentTime));
836+
}
859837

860-
// The backup must have been started on or after the table’s continuous backup start time,
861-
// otherwise, it won't be helpful in few cases because we wouldn't have the WAL entries
862-
// between the backup start time and the continuous backup start time.
863-
if (backupInfo.getStartTs() < continuousBackupStartTime) {
864-
return false;
838+
/**
839+
* Checks if any backup (excluding the current backup) can cover the specified PITR window for
840+
* the given table. A backup can cover the PITR window if it fully encompasses the target time
841+
* range specified.
842+
* @param allBackups List of all backups available.
843+
* @param currentBackup The current backup that should not be considered for
844+
* coverage.
845+
* @param table The table for which we need to check backup coverage.
846+
* @param targetWindow A pair representing the target PITR window (start and end
847+
* times).
848+
* @param continuousBackupStartTime When continuous backups started for the table.
849+
* @param maxAllowedPITRTime The earliest timestamp from which PITR is supported in the
850+
* cluster.
851+
* @param currentTime Current time.
852+
* @return {@code true} if any backup (excluding the current one) fully covers the target PITR
853+
* window; {@code false} otherwise.
854+
*/
855+
private boolean canAnyOtherBackupCover(List<BackupInfo> allBackups, BackupInfo currentBackup,
856+
TableName table, Pair<Long, Long> targetWindow, long continuousBackupStartTime,
857+
long maxAllowedPITRTime, long currentTime) {
858+
859+
long targetStart = targetWindow.getFirst();
860+
long targetEnd = targetWindow.getSecond();
861+
862+
// Iterate through all backups (including the current one)
863+
for (BackupInfo backup : allBackups) {
864+
// Skip if the backup is not full or doesn't contain the table
865+
if (!BackupType.FULL.equals(backup.getType())) continue;
866+
if (!backup.getTableNames().contains(table)) continue;
867+
868+
// Skip the current backup itself
869+
if (backup.equals(currentBackup)) continue;
870+
871+
// Get the covered PITR window for this backup
872+
Optional<Pair<Long, Long>> coveredWindow = getCoveredPitrWindowForTable(backup,
873+
continuousBackupStartTime, maxAllowedPITRTime, currentTime);
874+
875+
if (coveredWindow.isPresent()) {
876+
Pair<Long, Long> covered = coveredWindow.get();
877+
878+
// The backup must fully cover the target window
879+
if (covered.getFirst() <= targetStart && covered.getSecond() >= targetEnd) {
880+
return true;
881+
}
882+
}
865883
}
866884

867-
return true;
885+
return false;
868886
}
869887

870888
@Override

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -862,6 +862,36 @@ public void addContinuousBackupTableSet(Set<TableName> tables, long startTimesta
862862
}
863863
}
864864

865+
/**
866+
* Removes tables from the global continuous backup set. Only removes entries that currently exist
867+
* in the backup system table.
868+
* @param tables set of tables to remove
869+
* @throws IOException if an error occurs while updating the backup system table
870+
*/
871+
public void removeContinuousBackupTableSet(Set<TableName> tables) throws IOException {
872+
if (LOG.isTraceEnabled()) {
873+
LOG.trace("Remove continuous backup table set from backup system table. tables ["
874+
+ StringUtils.join(tables, " ") + "]");
875+
}
876+
if (LOG.isDebugEnabled()) {
877+
tables.forEach(table -> LOG.debug("Removing: " + table));
878+
}
879+
880+
Map<TableName, Long> existingTables = getContinuousBackupTableSet();
881+
Set<TableName> toRemove =
882+
tables.stream().filter(existingTables::containsKey).collect(Collectors.toSet());
883+
884+
if (toRemove.isEmpty()) {
885+
LOG.debug("No matching tables found to remove from continuous backup set.");
886+
return;
887+
}
888+
889+
try (Table table = connection.getTable(tableName)) {
890+
Delete delete = createDeleteForContinuousBackupTableSet(toRemove);
891+
table.delete(delete);
892+
}
893+
}
894+
865895
/**
866896
* Deletes incremental backup set for a backup destination
867897
* @param backupRoot backup root
@@ -1230,7 +1260,7 @@ private Delete createDeleteForIncrBackupTableSet(String backupRoot) {
12301260
private Delete createDeleteForContinuousBackupTableSet(Set<TableName> tables) {
12311261
Delete delete = new Delete(rowkey(CONTINUOUS_BACKUP_SET));
12321262
for (TableName tableName : tables) {
1233-
delete.addColumn(META_FAMILY, Bytes.toBytes(tableName.getNameAsString()));
1263+
delete.addColumns(META_FAMILY, Bytes.toBytes(tableName.getNameAsString()));
12341264
}
12351265
return delete;
12361266
}

0 commit comments

Comments
 (0)