use of gov.loc.repository.bagit.exceptions.InvalidBagitFileFormatException in project bagit-java by LibraryOfCongress.
the class BagitSuiteComplanceTest method testInvalidOperatingSystemSpecificBags.
@Test
public void testInvalidOperatingSystemSpecificBags() {
int errorCount = 0;
Bag bag;
List<Path> osSpecificInvalidPaths = visitor.getLinuxOnlyTestCases();
ConcurrentMap<Class<? extends Exception>, AtomicLong> map = new ConcurrentHashMap<>();
if (TestUtils.isExecutingOnWindows()) {
osSpecificInvalidPaths = visitor.getWindowsOnlyTestCases();
}
for (Path invalidBagDir : osSpecificInvalidPaths) {
try {
bag = reader.read(invalidBagDir);
verifier.isValid(bag, true);
} catch (InvalidBagitFileFormatException | IOException | UnparsableVersionException | MissingPayloadManifestException | MissingBagitFileException | MissingPayloadDirectoryException | FileNotInPayloadDirectoryException | InterruptedException | MaliciousPathException | CorruptChecksumException | VerificationException | UnsupportedAlgorithmException e) {
logger.info("Found invalid os specific bag with message: {}", e.getMessage());
map.putIfAbsent(e.getClass(), new AtomicLong(0));
map.get(e.getClass()).incrementAndGet();
errorCount++;
}
}
assertEquals("every test case should throw an error", osSpecificInvalidPaths.size(), errorCount);
logger.debug("Count of all errors found in os specific invalid cases: {}", map);
}
use of gov.loc.repository.bagit.exceptions.InvalidBagitFileFormatException in project bagit-java by LibraryOfCongress.
the class TagFileReader method createFileFromManifest.
/*
* Create the file and check it for various things, like starting with a *, or trying to access a file outside the bag
*/
static Path createFileFromManifest(final Path bagRootDir, final String path) throws MaliciousPathException, InvalidBagitFileFormatException {
String fixedPath = path;
if (path.charAt(0) == '*') {
logger.warn(messages.getString("removing_asterisk"));
// remove the * from the path
fixedPath = path.substring(1);
}
if (path.contains("\\")) {
final String formattedMessage = messages.getString("blackslash_used_as_path_separator_error");
throw new InvalidBagitFileFormatException(MessageFormatter.format(formattedMessage, path).getMessage());
}
if (path.contains("~/")) {
final String formattedMessage = messages.getString("malicious_path_error");
throw new MaliciousPathException(MessageFormatter.format(formattedMessage, path).getMessage());
}
fixedPath = PathUtils.decodeFilname(fixedPath);
Path file;
if (fixedPath.startsWith("file://")) {
try {
file = Paths.get(new URI(fixedPath));
} catch (URISyntaxException e) {
final String formattedMessage = messages.getString("invalid_url_format_error");
throw new InvalidBagitFileFormatException(MessageFormatter.format(formattedMessage, path).getMessage(), e);
}
} else {
file = bagRootDir.resolve(fixedPath).normalize();
}
if (!file.normalize().startsWith(bagRootDir)) {
final String formattedMessage = messages.getString("malicious_path_error");
throw new MaliciousPathException(MessageFormatter.format(formattedMessage, file).getMessage());
}
return file;
}
use of gov.loc.repository.bagit.exceptions.InvalidBagitFileFormatException in project bagit-java by LibraryOfCongress.
the class BagitTextFileReader method readBagitTextFile.
/**
* Read the bagit.txt file and return the version and encoding.
*
* @param bagitFile the bagit.txt file
* @return the bag {@link Version} and {@link Charset} encoding of the tag files
*
* @throws IOException if there is a problem reading a file. The file MUST be in UTF-8 encoding.
* @throws UnparsableVersionException if there is a problem parsing the bagit version number
* @throws InvalidBagMetadataException if the bagit.txt file does not conform to "key: value"
* @throws InvalidBagitFileFormatException if the bagit.txt file does not conform to the bagit spec
*/
public static SimpleImmutableEntry<Version, Charset> readBagitTextFile(final Path bagitFile) throws IOException, UnparsableVersionException, InvalidBagMetadataException, InvalidBagitFileFormatException {
logger.debug(messages.getString("reading_version_and_encoding"), bagitFile);
throwErrorIfByteOrderMarkIsPresent(bagitFile);
final List<SimpleImmutableEntry<String, String>> pairs = KeyValueReader.readKeyValuesFromFile(bagitFile, ":", StandardCharsets.UTF_8);
String version = null;
Charset encoding = null;
for (final SimpleImmutableEntry<String, String> pair : pairs) {
if ("BagIt-Version".equals(pair.getKey())) {
version = pair.getValue();
logger.debug(messages.getString("bagit_version"), version);
}
if ("Tag-File-Character-Encoding".equals(pair.getKey())) {
encoding = Charset.forName(pair.getValue());
logger.debug(messages.getString("tag_file_encoding"), encoding);
}
}
if (version == null || encoding == null) {
throw new InvalidBagitFileFormatException(messages.getString("invalid_bagit_text_file_error"));
}
final Version parsedVersion = parseVersion(version);
if (parsedVersion.isSameOrNewer(VERSION_1_0)) {
final List<String> lines = Files.readAllLines(bagitFile, StandardCharsets.UTF_8);
throwErrorIfLinesDoNotMatchStrict(lines);
}
return new SimpleImmutableEntry<>(parsedVersion, encoding);
}
use of gov.loc.repository.bagit.exceptions.InvalidBagitFileFormatException in project bagit-java by LibraryOfCongress.
the class FetchReader method readFetch.
/**
* Reads a fetch.txt file
*
* @param fetchFile the specific fetch file
* @param encoding the encoding to read the file with
* @param bagRootDir the root directory of the bag
* @return a list of items to fetch
*
* @throws IOException if there is a problem reading a file
* @throws MaliciousPathException if the path was crafted to point outside the bag directory
* @throws InvalidBagitFileFormatException if the fetch format does not follow the bagit specification
*/
@SuppressWarnings("PMD.AvoidInstantiatingObjectsInLoops")
public static List<FetchItem> readFetch(final Path fetchFile, final Charset encoding, final Path bagRootDir) throws IOException, MaliciousPathException, InvalidBagitFileFormatException {
logger.info(messages.getString("reading_fetch_file"), fetchFile);
final List<FetchItem> itemsToFetch = new ArrayList<>();
try (final BufferedReader reader = Files.newBufferedReader(fetchFile, encoding)) {
String line = reader.readLine();
String[] parts = null;
long length = 0;
URL url = null;
while (line != null) {
if (line.matches(FETCH_LINE_REGEX) && !line.matches("\\s*")) {
parts = line.split("\\s+", 3);
final Path path = TagFileReader.createFileFromManifest(bagRootDir, parts[2]);
length = parts[1].equals("-") ? -1 : Long.decode(parts[1]);
url = new URL(parts[0]);
logger.debug(messages.getString("read_fetch_file_line"), url, length, parts[2], fetchFile);
final FetchItem itemToFetch = new FetchItem(url, length, path);
itemsToFetch.add(itemToFetch);
} else {
throw new InvalidBagitFileFormatException(messages.getString("invalid_fetch_file_line_error").replace("{}", line));
}
line = reader.readLine();
}
}
return itemsToFetch;
}
use of gov.loc.repository.bagit.exceptions.InvalidBagitFileFormatException in project bagit-java by LibraryOfCongress.
the class BagitSuiteComplanceTest method testInvalidBags.
@Test
public void testInvalidBags() {
int errorCount = 0;
Bag bag;
ConcurrentMap<Class<? extends Exception>, AtomicLong> map = new ConcurrentHashMap<>();
for (Path invalidBagDir : visitor.getInvalidTestCases()) {
try {
bag = reader.read(invalidBagDir);
verifier.isValid(bag, true);
System.err.println(bag.getRootDir() + " should have failed but didn't!");
} catch (InvalidBagitFileFormatException | IOException | UnparsableVersionException | MissingPayloadManifestException | MissingBagitFileException | MissingPayloadDirectoryException | FileNotInPayloadDirectoryException | InterruptedException | MaliciousPathException | CorruptChecksumException | VerificationException | UnsupportedAlgorithmException e) {
logger.info("Found invalid os specific bag with message: {}", e.getMessage());
map.putIfAbsent(e.getClass(), new AtomicLong(0));
map.get(e.getClass()).incrementAndGet();
errorCount++;
}
}
assertEquals("every test case should throw an error", visitor.getInvalidTestCases().size(), errorCount);
logger.debug("Count of all errors found in generic invalid cases: {}", map);
}
Aggregations