use of com.google.common.io.ByteSource in project druid by druid-io.
the class S3DataSegmentPuller method getSegmentFiles.
public FileUtils.FileCopyResult getSegmentFiles(final S3Coords s3Coords, final File outDir) throws SegmentLoadingException {
log.info("Pulling index at path[%s] to outDir[%s]", s3Coords, outDir);
if (!isObjectInBucket(s3Coords)) {
throw new SegmentLoadingException("IndexFile[%s] does not exist.", s3Coords);
}
try {
org.apache.commons.io.FileUtils.forceMkdir(outDir);
final URI uri = URI.create(String.format("s3://%s/%s", s3Coords.bucket, s3Coords.path));
final ByteSource byteSource = new ByteSource() {
@Override
public InputStream openStream() throws IOException {
try {
return buildFileObject(uri, s3Client).openInputStream();
} catch (ServiceException e) {
if (e.getCause() != null) {
if (S3Utils.S3RETRY.apply(e)) {
throw new IOException("Recoverable exception", e);
}
}
throw Throwables.propagate(e);
}
}
};
if (CompressionUtils.isZip(s3Coords.path)) {
final FileUtils.FileCopyResult result = CompressionUtils.unzip(byteSource, outDir, S3Utils.S3RETRY, true);
log.info("Loaded %d bytes from [%s] to [%s]", result.size(), s3Coords.toString(), outDir.getAbsolutePath());
return result;
}
if (CompressionUtils.isGz(s3Coords.path)) {
final String fname = Files.getNameWithoutExtension(uri.getPath());
final File outFile = new File(outDir, fname);
final FileUtils.FileCopyResult result = CompressionUtils.gunzip(byteSource, outFile, S3Utils.S3RETRY);
log.info("Loaded %d bytes from [%s] to [%s]", result.size(), s3Coords.toString(), outFile.getAbsolutePath());
return result;
}
throw new IAE("Do not know how to load file type at [%s]", uri.toString());
} catch (Exception e) {
try {
org.apache.commons.io.FileUtils.deleteDirectory(outDir);
} catch (IOException ioe) {
log.warn(ioe, "Failed to remove output directory [%s] for segment pulled from [%s]", outDir.getAbsolutePath(), s3Coords.toString());
}
throw new SegmentLoadingException(e, e.getMessage());
}
}
use of com.google.common.io.ByteSource in project buck by facebook.
the class ZipStep method execute.
@Override
public StepExecutionResult execute(ExecutionContext context) {
if (filesystem.exists(pathToZipFile)) {
context.postEvent(ConsoleEvent.severe("Attempting to overwrite an existing zip: %s", pathToZipFile));
return StepExecutionResult.ERROR;
}
// Since filesystem traversals can be non-deterministic, sort the entries we find into
// a tree map before writing them out.
final Map<String, Pair<CustomZipEntry, Optional<Path>>> entries = Maps.newTreeMap();
FileVisitor<Path> pathFileVisitor = new SimpleFileVisitor<Path>() {
private boolean isSkipFile(Path file) {
return !paths.isEmpty() && !paths.contains(file);
}
private String getEntryName(Path path) {
Path relativePath = junkPaths ? path.getFileName() : baseDir.relativize(path);
return MorePaths.pathWithUnixSeparators(relativePath);
}
private CustomZipEntry getZipEntry(String entryName, final Path path, BasicFileAttributes attr) throws IOException {
boolean isDirectory = filesystem.isDirectory(path);
if (isDirectory) {
entryName += "/";
}
CustomZipEntry entry = new CustomZipEntry(entryName);
// We want deterministic ZIPs, so avoid mtimes.
entry.setFakeTime();
entry.setCompressionLevel(isDirectory ? ZipCompressionLevel.MIN_COMPRESSION_LEVEL.getValue() : compressionLevel.getValue());
// If we're using STORED files, we must manually set the CRC, size, and compressed size.
if (entry.getMethod() == ZipEntry.STORED && !isDirectory) {
entry.setSize(attr.size());
entry.setCompressedSize(attr.size());
entry.setCrc(new ByteSource() {
@Override
public InputStream openStream() throws IOException {
return filesystem.newFileInputStream(path);
}
}.hash(Hashing.crc32()).padToLong());
}
long externalAttributes = filesystem.getFileAttributesForZipEntry(path);
LOG.verbose("Setting mode for entry %s path %s to 0x%08X", entryName, path, externalAttributes);
entry.setExternalAttributes(externalAttributes);
return entry;
}
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
if (!isSkipFile(file)) {
CustomZipEntry entry = getZipEntry(getEntryName(file), file, attrs);
entries.put(entry.getName(), new Pair<>(entry, Optional.of(file)));
}
return FileVisitResult.CONTINUE;
}
@Override
public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException {
if (!dir.equals(baseDir) && !isSkipFile(dir)) {
CustomZipEntry entry = getZipEntry(getEntryName(dir), dir, attrs);
entries.put(entry.getName(), new Pair<>(entry, Optional.empty()));
}
return FileVisitResult.CONTINUE;
}
};
try (BufferedOutputStream baseOut = new BufferedOutputStream(filesystem.newFileOutputStream(pathToZipFile));
CustomZipOutputStream out = ZipOutputStreams.newOutputStream(baseOut, THROW_EXCEPTION)) {
filesystem.walkRelativeFileTree(baseDir, pathFileVisitor);
// Write the entries out using the iteration order of the tree map above.
for (Pair<CustomZipEntry, Optional<Path>> entry : entries.values()) {
out.putNextEntry(entry.getFirst());
if (entry.getSecond().isPresent()) {
try (InputStream input = filesystem.newFileInputStream(entry.getSecond().get())) {
ByteStreams.copy(input, out);
}
}
out.closeEntry();
}
} catch (IOException e) {
context.logError(e, "Error creating zip file %s", pathToZipFile);
return StepExecutionResult.ERROR;
}
return StepExecutionResult.SUCCESS;
}
use of com.google.common.io.ByteSource in project buck by facebook.
the class HttpArtifactCacheBinaryProtocolTest method testStoreRequest.
@Test
public void testStoreRequest() throws IOException {
final RuleKey ruleKey = new RuleKey("00000000010000000000008000000000");
final RuleKey ruleKey2 = new RuleKey("90000000000000000000008000000005");
final String data = "data";
ImmutableMap<String, String> metadata = ImmutableMap.of("metaKey", "metaValue");
HttpArtifactCacheBinaryProtocol.StoreRequest storeRequest = new HttpArtifactCacheBinaryProtocol.StoreRequest(ArtifactInfo.builder().addRuleKeys(ruleKey, ruleKey2).setMetadata(metadata).build(), new ByteSource() {
@Override
public InputStream openStream() throws IOException {
return new ByteArrayInputStream(data.getBytes(Charsets.UTF_8));
}
});
ByteArrayOutputStream storeRequestOutputStream = new ByteArrayOutputStream();
storeRequest.write(storeRequestOutputStream);
ByteArrayOutputStream storeRequestPayloadStream = new ByteArrayOutputStream();
StoreResponseReadResult readStoreRequest = HttpArtifactCacheBinaryProtocol.readStoreRequest(new DataInputStream(new ByteArrayInputStream(storeRequestOutputStream.toByteArray())), storeRequestPayloadStream);
assertThat(readStoreRequest.getRuleKeys(), Matchers.containsInAnyOrder(ruleKey, ruleKey2));
assertThat(readStoreRequest.getMetadata(), Matchers.equalTo(metadata));
assertThat(storeRequestPayloadStream.toByteArray(), Matchers.equalTo(data.getBytes(Charsets.UTF_8)));
}
use of com.google.common.io.ByteSource in project druid by druid-io.
the class CompressionUtils method unzip.
/**
* Unzip the pulled file to an output directory. This is only expected to work on zips with lone files, and is not intended for zips with directory structures.
*
* @param pulledFile The file to unzip
* @param outDir The directory to store the contents of the file.
*
* @return a FileCopyResult of the files which were written to disk
*
* @throws IOException
*/
public static FileUtils.FileCopyResult unzip(final File pulledFile, final File outDir) throws IOException {
if (!(outDir.exists() && outDir.isDirectory())) {
throw new ISE("outDir[%s] must exist and be a directory", outDir);
}
log.info("Unzipping file[%s] to [%s]", pulledFile, outDir);
final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult();
try (final ZipFile zipFile = new ZipFile(pulledFile)) {
final Enumeration<? extends ZipEntry> enumeration = zipFile.entries();
while (enumeration.hasMoreElements()) {
final ZipEntry entry = enumeration.nextElement();
result.addFiles(FileUtils.retryCopy(new ByteSource() {
@Override
public InputStream openStream() throws IOException {
return new BufferedInputStream(zipFile.getInputStream(entry));
}
}, new File(outDir, entry.getName()), FileUtils.IS_EXCEPTION, DEFAULT_RETRY_COUNT).getFiles());
}
}
return result;
}
use of com.google.common.io.ByteSource in project druid by druid-io.
the class StreamUtilsTest method testRetryExceptionOnFlush.
@Test
public void testRetryExceptionOnFlush() {
final byte[] bytes = new byte[1 << 10];
Random random = new Random(47831947819L);
random.nextBytes(bytes);
final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
final AtomicLong outputFlushes = new AtomicLong(0);
Assert.assertEquals(bytes.length, StreamUtils.retryCopy(new ByteSource() {
@Override
public InputStream openStream() throws IOException {
return new ByteArrayInputStream(bytes);
}
}, new ByteSink() {
@Override
public OutputStream openStream() throws IOException {
byteArrayOutputStream.reset();
return new FilterOutputStream(byteArrayOutputStream) {
@Override
public void flush() throws IOException {
if (outputFlushes.getAndIncrement() > 0) {
out.flush();
} else {
throw new IOException("Test exception");
}
}
};
}
}, FileUtils.IS_EXCEPTION, 10));
// 2 closes and 2 manual flushes
Assert.assertEquals(4, outputFlushes.get());
Assert.assertArrayEquals(bytes, byteArrayOutputStream.toByteArray());
}
Aggregations