diff --git a/src/main/java/org/roda_project/commons_ip2/model/IPFileInterface.java b/src/main/java/org/roda_project/commons_ip2/model/IPFileInterface.java index c1a055d5..88b169da 100644 --- a/src/main/java/org/roda_project/commons_ip2/model/IPFileInterface.java +++ b/src/main/java/org/roda_project/commons_ip2/model/IPFileInterface.java @@ -9,9 +9,38 @@ */ public interface IPFileInterface extends Serializable { + /** + * Gets the relative folders for this file. + * + * @return list of relative folder names + */ List getRelativeFolders(); + /** + * Gets the file name. + * + * @return the file name + */ String getFileName(); + /** + * Gets the path to the file. + * + * @return the file path + */ Path getPath(); + + /** + * Gets the pre-calculated checksum of the file, if available. + * + * @return the checksum value, or null if not set + */ + String getChecksum(); + + /** + * Gets the algorithm used for the pre-calculated checksum. + * + * @return the checksum algorithm (e.g., "SHA-256"), or null if not set + */ + String getChecksumAlgorithm(); } diff --git a/src/main/java/org/roda_project/commons_ip2/model/IPFileShallow.java b/src/main/java/org/roda_project/commons_ip2/model/IPFileShallow.java index 069cbb2d..144c733d 100644 --- a/src/main/java/org/roda_project/commons_ip2/model/IPFileShallow.java +++ b/src/main/java/org/roda_project/commons_ip2/model/IPFileShallow.java @@ -103,4 +103,22 @@ public String getFileName() { public Path getPath() { throw new UnsupportedOperationException("IPFileShallow does not support this method"); } + + @Override + public String getChecksum() { + // IPFileShallow may have checksum in fileType if set externally + if (fileType != null && fileType.getCHECKSUM() != null) { + return fileType.getCHECKSUM(); + } + return ""; + } + + @Override + public String getChecksumAlgorithm() { + // IPFileShallow may have checksum algorithm in fileType if set externally + if (fileType != null && fileType.getCHECKSUMTYPE() != null) { + return fileType.getCHECKSUMTYPE(); + } + return ""; + } } diff --git a/src/main/java/org/roda_project/commons_ip2/model/impl/eark/EARKUtils.java b/src/main/java/org/roda_project/commons_ip2/model/impl/eark/EARKUtils.java index 6018b381..2ff1e035 100644 --- a/src/main/java/org/roda_project/commons_ip2/model/impl/eark/EARKUtils.java +++ b/src/main/java/org/roda_project/commons_ip2/model/impl/eark/EARKUtils.java @@ -17,8 +17,6 @@ import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.stream.Collectors; -import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; import org.roda_project.commons_ip.model.ParseException; @@ -92,7 +90,8 @@ protected void addDescriptiveMetadataToZipAndMETS(Map zipE descriptiveFilePath = IPConstants.REPRESENTATIONS_FOLDER + representationId + IPConstants.ZIP_PATH_SEPARATOR + descriptiveFilePath; } - ZIPUtils.addMdRefFileToZip(zipEntries, file.getPath(), descriptiveFilePath, mdRef); + ZIPUtils.addMdRefFileToZip(zipEntries, file.getPath(), descriptiveFilePath, mdRef, + file.getChecksum(), file.getChecksumAlgorithm()); } } } @@ -114,7 +113,8 @@ protected void addPreservationMetadataToZipAndMETS(Map zip preservationMetadataPath = IPConstants.REPRESENTATIONS_FOLDER + representationId + IPConstants.ZIP_PATH_SEPARATOR + preservationMetadataPath; } - ZIPUtils.addMdRefFileToZip(zipEntries, file.getPath(), preservationMetadataPath, mdRef); + ZIPUtils.addMdRefFileToZip(zipEntries, file.getPath(), preservationMetadataPath, mdRef, + file.getChecksum(), file.getChecksumAlgorithm()); } } } @@ -136,7 +136,8 @@ protected void addOtherMetadataToZipAndMETS(Map zipEntries otherMetadataPath = IPConstants.REPRESENTATIONS_FOLDER + representationId + IPConstants.ZIP_PATH_SEPARATOR + otherMetadataPath; } - ZIPUtils.addMdRefFileToZip(zipEntries, file.getPath(), otherMetadataPath, mdRef); + ZIPUtils.addMdRefFileToZip(zipEntries, file.getPath(), otherMetadataPath, mdRef, + file.getChecksum(), file.getChecksumAlgorithm()); } } } @@ -158,7 +159,8 @@ protected void addTechnicalMetadataToZipAndMETS(Map zipEnt technicalMetadataPath = IPConstants.REPRESENTATIONS_FOLDER + representationId + IPConstants.ZIP_PATH_SEPARATOR + technicalMetadataPath; } - ZIPUtils.addMdRefFileToZip(zipEntries, file.getPath(), technicalMetadataPath, mdRef); + ZIPUtils.addMdRefFileToZip(zipEntries, file.getPath(), technicalMetadataPath, mdRef, + file.getChecksum(), file.getChecksumAlgorithm()); } } } @@ -180,7 +182,8 @@ protected void addSourceMetadataToZipAndMETS(Map zipEntrie sourceMetadataPath = IPConstants.REPRESENTATIONS_FOLDER + representationId + IPConstants.ZIP_PATH_SEPARATOR + sourceMetadataPath; } - ZIPUtils.addMdRefFileToZip(zipEntries, file.getPath(), sourceMetadataPath, mdRef); + ZIPUtils.addMdRefFileToZip(zipEntries, file.getPath(), sourceMetadataPath, mdRef, + file.getChecksum(), file.getChecksumAlgorithm()); } } } @@ -202,7 +205,8 @@ protected void addRightsMetadataToZipAndMETS(Map zipEntrie rightsMetadataPath = IPConstants.REPRESENTATIONS_FOLDER + representationId + IPConstants.ZIP_PATH_SEPARATOR + rightsMetadataPath; } - ZIPUtils.addMdRefFileToZip(zipEntries, file.getPath(), rightsMetadataPath, mdRef); + ZIPUtils.addMdRefFileToZip(zipEntries, file.getPath(), rightsMetadataPath, mdRef, + file.getChecksum(), file.getChecksumAlgorithm()); } } } @@ -357,7 +361,8 @@ private void addRepresentationDataFilesToZipErmsAndMETS(IPInterface ip, Map zipEntries, Mets schemaFilePath = IPConstants.REPRESENTATIONS_FOLDER + representationId + IPConstants.ZIP_PATH_SEPARATOR + schemaFilePath; } - ZIPUtils.addFileTypeFileToZip(zipEntries, schema.getPath(), schemaFilePath, fileType); + ZIPUtils.addFileTypeFileToZip(zipEntries, schema.getPath(), schemaFilePath, fileType, + schema.getChecksum(), schema.getChecksumAlgorithm()); } } } @@ -486,7 +494,8 @@ protected void addDocumentationToZipAndMETS(Map zipEntries documentationFilePath = IPConstants.REPRESENTATIONS_FOLDER + representationId + IPConstants.ZIP_PATH_SEPARATOR + documentationFilePath; } - ZIPUtils.addFileTypeFileToZip(zipEntries, doc.getPath(), documentationFilePath, fileType); + ZIPUtils.addFileTypeFileToZip(zipEntries, doc.getPath(), documentationFilePath, fileType, + doc.getChecksum(), doc.getChecksumAlgorithm()); } } } @@ -547,7 +556,8 @@ protected void addSubmissionsToZipAndMETS(final Map zipEnt + ModelUtils.getFoldersFromList(submission.getRelativeFolders()) + submission.getFileName(); final FileType fileType = metsGenerator.addSubmissionFileToMETS(metsWrapper, submissionFilePath, submission.getPath()); - ZIPUtils.addFileTypeFileToZip(zipEntries, submission.getPath(), submissionFilePath, fileType); + ZIPUtils.addFileTypeFileToZip(zipEntries, submission.getPath(), submissionFilePath, fileType, + submission.getChecksum(), submission.getChecksumAlgorithm()); } } } diff --git a/src/main/java/org/roda_project/commons_ip2/model/impl/eark/out/writers/strategy/FolderWriteStrategy.java b/src/main/java/org/roda_project/commons_ip2/model/impl/eark/out/writers/strategy/FolderWriteStrategy.java index da6826d1..36c9d17e 100644 --- a/src/main/java/org/roda_project/commons_ip2/model/impl/eark/out/writers/strategy/FolderWriteStrategy.java +++ b/src/main/java/org/roda_project/commons_ip2/model/impl/eark/out/writers/strategy/FolderWriteStrategy.java @@ -8,6 +8,7 @@ import java.nio.file.Paths; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.HexFormat; import java.util.Map; import org.apache.commons.io.IOUtils; @@ -19,13 +20,13 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import jakarta.xml.bind.DatatypeConverter; /** * @author Miguel GuimarĂ£es */ public class FolderWriteStrategy implements WriteStrategy { private static final Logger LOGGER = LoggerFactory.getLogger(FolderWriteStrategy.class); + private static final int BUFFER_SIZE = 4096; private Path destinationPath; @Override @@ -54,7 +55,7 @@ public Path write(Map entries, SIP sip, String fileNameWit throw new UnsupportedOperationException("Method not implemented"); } - private void writeToPath(final Map entries, final Path path, String checksumAlgorithm) + private void writeToPath(final Map entries, final Path path, final String checksumAlgorithm) throws IPException, InterruptedException { try { Files.createDirectories(path); @@ -62,11 +63,15 @@ private void writeToPath(final Map entries, final Path pat if (Thread.interrupted()) { throw new InterruptedException(); } + // Save pre-calculated checksum BEFORE it gets overwritten + final String preCalculatedChecksum = zipEntryInfo.getChecksum(); + final String preCalculatedAlgorithm = zipEntryInfo.getChecksumAlgorithm(); + zipEntryInfo.setChecksum(checksumAlgorithm); zipEntryInfo.prepareEntryForZipping(); LOGGER.debug("Writing file {}", zipEntryInfo.getFilePath()); final Path outputPath = Paths.get(path.toString(), zipEntryInfo.getName()); - writeFileToPath(zipEntryInfo, outputPath, checksumAlgorithm); + writeFileToPath(zipEntryInfo, outputPath, checksumAlgorithm, preCalculatedChecksum, preCalculatedAlgorithm); } } catch (final IOException | NoSuchAlgorithmException e) { LOGGER.debug("Error in write method", e); @@ -93,29 +98,50 @@ private Path getDirPath(final Path targetPath, final String name, String fallbac return path; } - private void writeFileToPath(final ZipEntryInfo zipEntryInfo, final Path outputPath, String checksumAlgorithm) + private void writeFileToPath(final ZipEntryInfo zipEntryInfo, final Path outputPath, final String checksumAlgorithm, + final String preCalculatedChecksum, final String preCalculatedAlgorithm) throws IOException, NoSuchAlgorithmException { InputStream is = null; OutputStream os = null; try { - is = Files.newInputStream(zipEntryInfo.getFilePath()); Files.createDirectories(outputPath.getParent()); os = Files.newOutputStream(outputPath); - final byte[] buffer = new byte[4096]; - final MessageDigest complete = MessageDigest.getInstance(checksumAlgorithm); - int numRead; - do { - numRead = is.read(buffer); - if (numRead > 0) { - complete.update(buffer, 0, numRead); - os.write(buffer, 0, numRead); - } - } while (numRead != -1); - - setChecksum(zipEntryInfo, DatatypeConverter.printHexBinary(complete.digest()), checksumAlgorithm); + // Check if file already has a pre-calculated checksum matching the requested algorithm + final boolean hasValidPreCalculatedChecksum = preCalculatedChecksum != null + && !preCalculatedChecksum.isEmpty() + && preCalculatedAlgorithm != null + && preCalculatedAlgorithm.equalsIgnoreCase(checksumAlgorithm); + + if (hasValidPreCalculatedChecksum) { + // File has pre-calculated checksum - just copy data without calculating + LOGGER.debug("Using pre-calculated checksum for file {}", zipEntryInfo.getFilePath()); + final byte[] buffer = new byte[BUFFER_SIZE]; + int numRead; + do { + numRead = is.read(buffer); + if (numRead > 0) { + os.write(buffer, 0, numRead); + } + } while (numRead != -1); + setChecksum(zipEntryInfo, preCalculatedChecksum, preCalculatedAlgorithm); + } else { + // Calculate checksum while copying + final byte[] buffer = new byte[BUFFER_SIZE]; + final MessageDigest complete = MessageDigest.getInstance(checksumAlgorithm); + int numRead; + do { + numRead = is.read(buffer); + if (numRead > 0) { + complete.update(buffer, 0, numRead); + os.write(buffer, 0, numRead); + } + } while (numRead != -1); + + setChecksum(zipEntryInfo, HexFormat.of().withUpperCase().formatHex(complete.digest()), checksumAlgorithm); + } } finally { IOUtils.closeQuietly(is); IOUtils.closeQuietly(os); diff --git a/src/main/java/org/roda_project/commons_ip2/utils/METSFileTypeZipEntryInfo.java b/src/main/java/org/roda_project/commons_ip2/utils/METSFileTypeZipEntryInfo.java index c099d634..10afe81c 100644 --- a/src/main/java/org/roda_project/commons_ip2/utils/METSFileTypeZipEntryInfo.java +++ b/src/main/java/org/roda_project/commons_ip2/utils/METSFileTypeZipEntryInfo.java @@ -15,15 +15,47 @@ public class METSFileTypeZipEntryInfo extends FileZipEntryInfo { private FileType metsFileType; - public METSFileTypeZipEntryInfo(String name, Path filePath) { + /** + * Constructor with name and file path. + * + * @param name the entry name + * @param filePath the file path + */ + public METSFileTypeZipEntryInfo(final String name, final Path filePath) { super(name, filePath); } - public METSFileTypeZipEntryInfo(String name, Path filePath, FileType metsFileType) { + /** + * Constructor with name, file path, and METS file type. + * + * @param name the entry name + * @param filePath the file path + * @param metsFileType the METS file type + */ + public METSFileTypeZipEntryInfo(final String name, final Path filePath, final FileType metsFileType) { super(name, filePath); this.setMetsFileType(metsFileType); } + /** + * Constructor with name, file path, METS file type, and pre-calculated checksum. + * + * @param name the entry name + * @param filePath the file path + * @param metsFileType the METS file type + * @param preCalculatedChecksum the pre-calculated checksum value + * @param checksumAlgorithm the checksum algorithm used + */ + public METSFileTypeZipEntryInfo(final String name, final Path filePath, final FileType metsFileType, + final String preCalculatedChecksum, final String checksumAlgorithm) { + super(name, filePath); + this.setMetsFileType(metsFileType); + if (preCalculatedChecksum != null && !preCalculatedChecksum.isEmpty()) { + this.setChecksum(preCalculatedChecksum); + this.setChecksumAlgorithm(checksumAlgorithm); + } + } + @Override public void prepareEntryForZipping() { // do nothing diff --git a/src/main/java/org/roda_project/commons_ip2/utils/METSMdRefZipEntryInfo.java b/src/main/java/org/roda_project/commons_ip2/utils/METSMdRefZipEntryInfo.java index 3f00980f..78f39fba 100644 --- a/src/main/java/org/roda_project/commons_ip2/utils/METSMdRefZipEntryInfo.java +++ b/src/main/java/org/roda_project/commons_ip2/utils/METSMdRefZipEntryInfo.java @@ -12,28 +12,85 @@ import org.roda_project.commons_ip.utils.FileZipEntryInfo; import org.roda_project.commons_ip2.mets_v1_12.beans.MdSecType.MdRef; +/** Zip entry info for METS MdRef elements. */ public class METSMdRefZipEntryInfo extends FileZipEntryInfo { + /** The METS MdRef element. */ private MdRef metsMdRef; - public METSMdRefZipEntryInfo(String name, Path filePath) { + /** + * Constructor. + * + * @param name + * the zip entry name + * @param filePath + * the file path + */ + public METSMdRefZipEntryInfo(final String name, final Path filePath) { super(name, filePath); } - public METSMdRefZipEntryInfo(String name, Path filePath, MdRef metsMdRef) { + /** + * Constructor with MdRef. + * + * @param name + * the zip entry name + * @param filePath + * the file path + * @param metsMdRef + * the METS MdRef + */ + public METSMdRefZipEntryInfo(final String name, final Path filePath, + final MdRef metsMdRef) { super(name, filePath); this.setMetsMdRef(metsMdRef); } + /** + * Constructor with pre-calculated checksum support. + * + * @param name + * the zip entry name + * @param filePath + * the file path + * @param metsMdRef + * the METS MdRef + * @param preCalculatedChecksum + * the pre-calculated checksum (may be null or empty) + * @param checksumAlgorithm + * the algorithm used for the pre-calculated checksum + */ + public METSMdRefZipEntryInfo(final String name, final Path filePath, + final MdRef metsMdRef, final String preCalculatedChecksum, + final String checksumAlgorithm) { + super(name, filePath); + this.setMetsMdRef(metsMdRef); + if (preCalculatedChecksum != null && !preCalculatedChecksum.isEmpty()) { + this.setChecksum(preCalculatedChecksum); + this.setChecksumAlgorithm(checksumAlgorithm); + } + } + @Override public void prepareEntryForZipping() { // do nothing } - public MdRef getMetsMdRef() { + /** + * Gets the METS MdRef. + * + * @return the METS MdRef + */ + public final MdRef getMetsMdRef() { return metsMdRef; } - public void setMetsMdRef(MdRef metsMdRef) { + /** + * Sets the METS MdRef. + * + * @param metsMdRef + * the METS MdRef to set + */ + public final void setMetsMdRef(final MdRef metsMdRef) { this.metsMdRef = metsMdRef; } diff --git a/src/main/java/org/roda_project/commons_ip2/utils/ZIPUtils.java b/src/main/java/org/roda_project/commons_ip2/utils/ZIPUtils.java index 1f29d6af..1a7c1211 100644 --- a/src/main/java/org/roda_project/commons_ip2/utils/ZIPUtils.java +++ b/src/main/java/org/roda_project/commons_ip2/utils/ZIPUtils.java @@ -17,6 +17,7 @@ import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.HashMap; +import java.util.HexFormat; import java.util.Map; import java.util.Map.Entry; import java.util.Optional; @@ -27,7 +28,6 @@ import java.util.zip.ZipInputStream; import java.util.zip.ZipOutputStream; -import jakarta.xml.bind.DatatypeConverter; import org.apache.commons.io.IOUtils; import org.roda_project.commons_ip.model.ParseException; @@ -43,6 +43,7 @@ public final class ZIPUtils { private static final Logger LOGGER = LoggerFactory.getLogger(ZIPUtils.class); + private static final int BUFFER_SIZE = 4096; private ZIPUtils() { // do nothing @@ -92,12 +93,54 @@ public static Map addMdRefFileToZip(Map addMdRefFileToZip(final Map zipEntries, + final Path filePath, final String zipPath, final MdRef mdRef, final String preCalculatedChecksum, + final String checksumAlgorithm) throws IPException { + zipEntries.put(zipPath, new METSMdRefZipEntryInfo(zipPath, filePath, mdRef, + preCalculatedChecksum, checksumAlgorithm)); + return zipEntries; + } + public static Map addFileTypeFileToZip(Map zipEntries, Path filePath, String zipPath, FileType fileType) throws IPException { zipEntries.put(zipPath, new METSFileTypeZipEntryInfo(zipPath, filePath, fileType)); return zipEntries; } + /** + * Add a file to the zip entries with an optional pre-calculated checksum. + * When a pre-calculated checksum is provided and matches the SIP's checksum algorithm, + * the checksum will not be recalculated during zip creation. + * + * @param zipEntries the map of zip entries + * @param filePath the file path + * @param zipPath the path within the zip + * @param fileType the METS file type + * @param preCalculatedChecksum the pre-calculated checksum (may be null or empty) + * @param checksumAlgorithm the algorithm used for the pre-calculated checksum (may be null or empty) + * @return the updated map of zip entries + */ + public static Map addFileTypeFileToZip(final Map zipEntries, + final Path filePath, final String zipPath, final FileType fileType, final String preCalculatedChecksum, + final String checksumAlgorithm) throws IPException { + zipEntries.put(zipPath, new METSFileTypeZipEntryInfo(zipPath, filePath, fileType, + preCalculatedChecksum, checksumAlgorithm)); + return zipEntries; + } + public static Map addMETSFileToZip(Map zipEntries, Path filePath, String zipPath, Mets mets, boolean rootMETS, FileType fileType) throws IPException { zipEntries.put(zipPath, new METSZipEntryInfo(zipPath, filePath, mets, rootMETS, fileType)); @@ -130,6 +173,10 @@ public static void zip(Map files, OutputStream out, SIP si throw new InterruptedException(); } + // Save pre-calculated checksum BEFORE it gets overwritten + final String preCalculatedChecksum = file.getChecksum(); + final String preCalculatedAlgorithm = file.getChecksumAlgorithm(); + file.setChecksum(sip.getChecksum()); file.prepareEntryForZipping(); @@ -145,11 +192,24 @@ public static void zip(Map files, OutputStream out, SIP si zos.putNextEntry(entry); try (InputStream inputStream = Files.newInputStream(file.getFilePath());) { + // Check if file already has a pre-calculated checksum matching the SIP's algorithm + final boolean hasValidPreCalculatedChecksum = preCalculatedChecksum != null + && !preCalculatedChecksum.isEmpty() + && preCalculatedAlgorithm != null + && preCalculatedAlgorithm.equalsIgnoreCase(sip.getChecksum()); + Map checksums; if (file instanceof METSZipEntryInfo metsEntry) { + // METS files always need checksum calculation (they are generated) checksums = calculateChecksums(Optional.of(zos), inputStream, metsChecksumAlgorithms); metsEntry.setChecksums(checksums); metsEntry.setSize(metsEntry.getFilePath().toFile().length()); + } else if (hasValidPreCalculatedChecksum) { + // File has pre-calculated checksum - just copy data without calculating + LOGGER.debug("Using pre-calculated checksum for file {}", file.getFilePath()); + copyWithoutChecksum(zos, inputStream); + checksums = new HashMap<>(); + checksums.put(sip.getChecksum(), preCalculatedChecksum); } else { checksums = calculateChecksums(Optional.of(zos), inputStream, nonMetsChecksumAlgorithms); } @@ -206,7 +266,8 @@ public static Map calculateChecksums(Optional z } while (numRead != -1); // generate hex versions of the digests - algorithms.forEach((alg, dig) -> values.put(alg, DatatypeConverter.printHexBinary(dig.digest()))); + final HexFormat hexFormat = HexFormat.of().withUpperCase(); + algorithms.forEach((alg, dig) -> values.put(alg, hexFormat.formatHex(dig.digest()))); return values; } @@ -252,4 +313,16 @@ public static void unzip(Path zip, final Path dest) throws IOException { } } + private static void copyWithoutChecksum(final ZipOutputStream zos, final InputStream inputStream) + throws IOException { + final byte[] buffer = new byte[BUFFER_SIZE]; + int numRead; + do { + numRead = inputStream.read(buffer); + if (numRead > 0) { + zos.write(buffer, 0, numRead); + } + } while (numRead != -1); + } + } diff --git a/src/test/java/org/roda_project/commons_ip2/model/eark/EARKSIPTest.java b/src/test/java/org/roda_project/commons_ip2/model/eark/EARKSIPTest.java index 50f485c6..7fef86f2 100644 --- a/src/test/java/org/roda_project/commons_ip2/model/eark/EARKSIPTest.java +++ b/src/test/java/org/roda_project/commons_ip2/model/eark/EARKSIPTest.java @@ -67,6 +67,8 @@ */ public class EARKSIPTest { private static final String REPRESENTATION_STATUS_NORMALIZED = "NORMALIZED"; + private static final String SIP_CHECKSUM_TEST_ID = "SIP_CHECKSUM_TEST"; + private static final String REPRESENTATION_WITH_CHECKSUM = "representation_with_checksum"; private static final Logger LOGGER = LoggerFactory.getLogger(EARKSIPTest.class); @@ -864,4 +866,99 @@ private Path createFullEARKSIP_For_Test_Compliance220() throws IPException, Inte return zipSIP; } + /** + * Test that pre-calculated checksums are properly used during SIP generation. + * When a file has its checksum set before SIP creation, that checksum should be + * used instead of recalculating it, which is important for large files. + * + *

This test uses a FAKE checksum to prove the library uses the pre-calculated + * value rather than calculating it. If the library were to calculate the checksum, + * it would not match our fake value.

+ * + * @throws IPException if an error occurs during SIP creation + * @throws InterruptedException if the thread is interrupted + * @throws IOException if an I/O error occurs + * @throws ParserConfigurationException if a parser configuration error occurs + * @throws SAXException if a SAX parsing error occurs + * @throws ParseException if a parsing error occurs + * @throws NoSuchAlgorithmException if the checksum algorithm is not available + */ + @Test + public void testPreCalculatedChecksumSupport() throws IPException, InterruptedException, IOException, + ParserConfigurationException, SAXException, ParseException, NoSuchAlgorithmException { + LOGGER.info("Testing pre-calculated checksum support"); + + // 1) Create SIP with pre-calculated checksum + final SIP sip = new EARKSIP(SIP_CHECKSUM_TEST_ID, IPContentType.getMIXED(), IPContentInformationType.getMIXED(), + "2.1.0"); + sip.addCreatorSoftwareAgent("RODA Commons IP", "2.0.0"); + sip.setDescription("SIP with pre-calculated checksums"); + + // 2) Add descriptive metadata + final IPDescriptiveMetadata metadataDescriptiveDC = new IPDescriptiveMetadata( + new IPFile(Paths.get("src/test/resources/eark/metadata_descriptive_dc.xml")), + new MetadataType(MetadataTypeEnum.DC), null); + sip.addDescriptiveMetadata(metadataDescriptiveDC); + + // 3) Create a representation with a file that has a pre-calculated checksum + final IPRepresentation representation = new IPRepresentation(REPRESENTATION_WITH_CHECKSUM); + sip.addRepresentation(representation); + + // Use a FAKE checksum - this proves the library uses our value instead of calculating + // If the library calculated the checksum, it would NOT match this fake value + final String fakeChecksum = "AABBCCDD11223344556677889900AABBCCDD11223344556677889900AABBCCDD"; + + final Path testFilePath = Paths.get("src/test/resources/eark/documentation.pdf"); + final IPFile representationFile = new IPFile(testFilePath); + representationFile.setRenameTo("data_with_checksum.pdf"); + // Set the FAKE pre-calculated checksum + representationFile.setChecksum(fakeChecksum); + representationFile.setChecksumAlgorithm("SHA-256"); + representation.addFile(representationFile); + + // 4) Build SIP + final WriteStrategy writeStrategy = SIPBuilderUtils.getWriteStrategy(WriteStrategyEnum.ZIP, tempFolder); + final Path zipSIP = sip.build(writeStrategy); + + LOGGER.info("SIP built successfully with pre-calculated checksum at: {}", zipSIP); + + // 5) Extract and verify the fake checksum appears in the representation METS file + final Path extractDir = tempFolder.resolve("extracted_sip"); + Files.createDirectories(extractDir); + + // Extract the ZIP + try (java.util.zip.ZipInputStream zis = new java.util.zip.ZipInputStream(Files.newInputStream(zipSIP))) { + java.util.zip.ZipEntry entry; + while ((entry = zis.getNextEntry()) != null) { + final Path targetPath = extractDir.resolve(entry.getName()); + if (entry.isDirectory()) { + Files.createDirectories(targetPath); + } else { + Files.createDirectories(targetPath.getParent()); + Files.copy(zis, targetPath); + } + zis.closeEntry(); + } + } + + // Find and read the representation METS file + final Path representationMetsPath = extractDir.resolve(SIP_CHECKSUM_TEST_ID) + .resolve("representations") + .resolve(REPRESENTATION_WITH_CHECKSUM) + .resolve("METS.xml"); + + Assert.assertTrue("Representation METS file should exist", Files.exists(representationMetsPath)); + + final String metsContent = Files.readString(representationMetsPath); + + // Verify our FAKE checksum appears in the METS file + Assert.assertTrue( + "The METS file should contain our pre-calculated fake checksum, proving it was used instead of being calculated", + metsContent.contains(fakeChecksum)); + + LOGGER.info( + "SUCCESS: The fake checksum '{}' was found in the METS file, proving pre-calculated checksums work correctly", + fakeChecksum); + } + }