use of org.apache.hadoop.io.compress.CompressionCodec in project jena by apache.
the class AbstractCompressedWholeFileQuadInputFormatTests method getOutputStream.
@Override
protected OutputStream getOutputStream(File f) throws IOException {
CompressionCodec codec = this.getCompressionCodec();
if (codec instanceof Configurable) {
((Configurable) codec).setConf(this.prepareConfiguration());
}
FileOutputStream fileOutput = new FileOutputStream(f, false);
return codec.createOutputStream(fileOutput);
}
use of org.apache.hadoop.io.compress.CompressionCodec in project jena by apache.
the class AbstractCompressedWholeFileTripleInputFormatTests method getOutputStream.
@Override
protected OutputStream getOutputStream(File f) throws IOException {
CompressionCodec codec = this.getCompressionCodec();
if (codec instanceof Configurable) {
((Configurable) codec).setConf(this.prepareConfiguration());
}
FileOutputStream fileOutput = new FileOutputStream(f, false);
return codec.createOutputStream(fileOutput);
}
use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop by apache.
the class ITestS3AInputStreamPerformance method executeDecompression.
/**
* Execute a decompression + line read with the given input policy.
* @param readahead byte readahead
* @param inputPolicy read policy
* @throws IOException IO Problems
*/
private void executeDecompression(long readahead, S3AInputPolicy inputPolicy) throws IOException {
CompressionCodecFactory factory = new CompressionCodecFactory(getConf());
CompressionCodec codec = factory.getCodec(testData);
long bytesRead = 0;
int lines = 0;
FSDataInputStream objectIn = openTestFile(inputPolicy, readahead);
ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer();
try (LineReader lineReader = new LineReader(codec.createInputStream(objectIn), getConf())) {
Text line = new Text();
int read;
while ((read = lineReader.readLine(line)) > 0) {
bytesRead += read;
lines++;
}
} catch (EOFException eof) {
// done
}
timer.end("Time to read %d lines [%d bytes expanded, %d raw]" + " with readahead = %d", lines, bytesRead, testDataStatus.getLen(), readahead);
logTimePerIOP("line read", timer, lines);
logStreamStatistics();
}
use of org.apache.hadoop.io.compress.CompressionCodec in project nifi by apache.
the class CreateHadoopSequenceFile method onTrigger.
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
String mimeType = flowFile.getAttribute(CoreAttributes.MIME_TYPE.key());
String packagingFormat = NOT_PACKAGED;
if (null != mimeType) {
switch(mimeType.toLowerCase()) {
case "application/tar":
packagingFormat = TAR_FORMAT;
break;
case "application/zip":
packagingFormat = ZIP_FORMAT;
break;
case "application/flowfile-v3":
packagingFormat = FLOWFILE_STREAM_FORMAT_V3;
break;
default:
getLogger().warn("Cannot unpack {} because its mime.type attribute is set to '{}', which is not a format that can be unpacked", new Object[] { flowFile, mimeType });
}
}
final SequenceFileWriter sequenceFileWriter;
switch(packagingFormat) {
case TAR_FORMAT:
sequenceFileWriter = new TarUnpackerSequenceFileWriter();
break;
case ZIP_FORMAT:
sequenceFileWriter = new ZipUnpackerSequenceFileWriter();
break;
case FLOWFILE_STREAM_FORMAT_V3:
sequenceFileWriter = new FlowFileStreamUnpackerSequenceFileWriter();
break;
default:
sequenceFileWriter = new SequenceFileWriterImpl();
}
final Configuration configuration = getConfiguration();
if (configuration == null) {
getLogger().error("HDFS not configured properly");
session.transfer(flowFile, RELATIONSHIP_FAILURE);
context.yield();
return;
}
final CompressionCodec codec = getCompressionCodec(context, configuration);
final String value = context.getProperty(COMPRESSION_TYPE).getValue();
final SequenceFile.CompressionType compressionType = value == null ? SequenceFile.CompressionType.valueOf(DEFAULT_COMPRESSION_TYPE) : SequenceFile.CompressionType.valueOf(value);
final String fileName = flowFile.getAttribute(CoreAttributes.FILENAME.key()) + ".sf";
flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), fileName);
try {
StopWatch stopWatch = new StopWatch(true);
flowFile = sequenceFileWriter.writeSequenceFile(flowFile, session, configuration, compressionType, codec);
session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
session.transfer(flowFile, RELATIONSHIP_SUCCESS);
getLogger().info("Transferred flowfile {} to {}", new Object[] { flowFile, RELATIONSHIP_SUCCESS });
} catch (ProcessException e) {
getLogger().error("Failed to create Sequence File. Transferring {} to 'failure'", new Object[] { flowFile }, e);
session.transfer(flowFile, RELATIONSHIP_FAILURE);
}
}
use of org.apache.hadoop.io.compress.CompressionCodec in project nifi by apache.
the class FetchHDFS method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final FileSystem hdfs = getFileSystem();
final UserGroupInformation ugi = getUserGroupInformation();
final String filenameValue = context.getProperty(FILENAME).evaluateAttributeExpressions(flowFile).getValue();
final Path path;
try {
path = new Path(filenameValue);
} catch (IllegalArgumentException e) {
getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { filenameValue, flowFile, e });
flowFile = session.putAttribute(flowFile, "hdfs.failure.reason", e.getMessage());
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_FAILURE);
return;
}
final StopWatch stopWatch = new StopWatch(true);
final FlowFile finalFlowFile = flowFile;
ugi.doAs(new PrivilegedAction<Object>() {
@Override
public Object run() {
InputStream stream = null;
CompressionCodec codec = null;
Configuration conf = getConfiguration();
final CompressionCodecFactory compressionCodecFactory = new CompressionCodecFactory(conf);
final CompressionType compressionType = CompressionType.valueOf(context.getProperty(COMPRESSION_CODEC).toString());
final boolean inferCompressionCodec = compressionType == CompressionType.AUTOMATIC;
if (inferCompressionCodec) {
codec = compressionCodecFactory.getCodec(path);
} else if (compressionType != CompressionType.NONE) {
codec = getCompressionCodec(context, getConfiguration());
}
FlowFile flowFile = finalFlowFile;
final Path qualifiedPath = path.makeQualified(hdfs.getUri(), hdfs.getWorkingDirectory());
try {
final String outputFilename;
final String originalFilename = path.getName();
stream = hdfs.open(path, 16384);
// Check if compression codec is defined (inferred or otherwise)
if (codec != null) {
stream = codec.createInputStream(stream);
outputFilename = StringUtils.removeEnd(originalFilename, codec.getDefaultExtension());
} else {
outputFilename = originalFilename;
}
flowFile = session.importFrom(stream, finalFlowFile);
flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), outputFilename);
stopWatch.stop();
getLogger().info("Successfully received content from {} for {} in {}", new Object[] { qualifiedPath, flowFile, stopWatch.getDuration() });
session.getProvenanceReporter().fetch(flowFile, qualifiedPath.toString(), stopWatch.getDuration(TimeUnit.MILLISECONDS));
session.transfer(flowFile, REL_SUCCESS);
} catch (final FileNotFoundException | AccessControlException e) {
getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { qualifiedPath, flowFile, e });
flowFile = session.putAttribute(flowFile, "hdfs.failure.reason", e.getMessage());
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_FAILURE);
} catch (final IOException e) {
getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to comms.failure", new Object[] { qualifiedPath, flowFile, e });
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_COMMS_FAILURE);
} finally {
IOUtils.closeQuietly(stream);
}
return null;
}
});
}
Aggregations