use of org.apache.hadoop.io.compress.CompressionCodec in project apex-malhar by apache.
the class AbstractFileOutputOperatorTest method checkSnappyFile.
private void checkSnappyFile(File file, List<Long> offsets, int startVal, int totalWindows, int totalRecords) throws IOException {
FileInputStream fis;
InputStream gss = null;
Configuration conf = new Configuration();
CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(SnappyCodec.class, conf);
CompressionInputStream snappyIs = null;
BufferedReader br = null;
int numWindows = 0;
try {
fis = new FileInputStream(file);
gss = fis;
long startOffset = 0;
for (long offset : offsets) {
// Skip initial case in case file is not yet created
if (offset == 0) {
continue;
}
long limit = offset - startOffset;
LimitInputStream lis = new LimitInputStream(gss, limit);
snappyIs = codec.createInputStream(lis);
br = new BufferedReader(new InputStreamReader(snappyIs));
String eline = "" + (startVal + numWindows * 2);
int count = 0;
String line;
while ((line = br.readLine()) != null) {
Assert.assertEquals("File line", eline, line);
++count;
if ((count % totalRecords) == 0) {
++numWindows;
eline = "" + (startVal + numWindows * 2);
}
}
startOffset = offset;
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if (br != null) {
br.close();
} else {
if (snappyIs != null) {
snappyIs.close();
} else if (gss != null) {
gss.close();
}
}
}
Assert.assertEquals("Total", totalWindows, numWindows);
}
use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop-pcap by RIPE-NCC.
the class PcapInputFormat method initPcapRecordReader.
public static PcapRecordReader initPcapRecordReader(Path path, long start, long length, TaskAttemptContext context) throws IOException {
Configuration conf = context.getConfiguration();
FileSystem fs = path.getFileSystem(conf);
FSDataInputStream baseStream = fs.open(path);
DataInputStream stream = baseStream;
CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(conf);
final CompressionCodec codec = compressionCodecs.getCodec(path);
if (codec != null)
stream = new DataInputStream(codec.createInputStream(stream));
PcapReader reader = initPcapReader(stream, conf);
return new PcapRecordReader(reader, start, length, baseStream, stream, context);
}
use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop-pcap by RIPE-NCC.
the class PcapInputFormat method initPcapRecordReader.
public static PcapRecordReader initPcapRecordReader(Path path, long start, long length, Reporter reporter, Configuration conf) throws IOException {
FileSystem fs = path.getFileSystem(conf);
FSDataInputStream baseStream = fs.open(path);
DataInputStream stream = baseStream;
CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(conf);
final CompressionCodec codec = compressionCodecs.getCodec(path);
if (codec != null)
stream = new DataInputStream(codec.createInputStream(stream));
PcapReader reader = initPcapReader(stream, conf);
return new PcapRecordReader(reader, start, length, baseStream, stream, reporter);
}
use of org.apache.hadoop.io.compress.CompressionCodec in project druid by druid-io.
the class Utils method makePathAndOutputStream.
public static OutputStream makePathAndOutputStream(JobContext job, Path outputPath, boolean deleteExisting) throws IOException {
OutputStream retVal;
FileSystem fs = outputPath.getFileSystem(job.getConfiguration());
Class<? extends CompressionCodec> codecClass;
CompressionCodec codec = null;
if (FileOutputFormat.getCompressOutput(job)) {
codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class);
codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
outputPath = new Path(outputPath.toString() + codec.getDefaultExtension());
}
if (fs.exists(outputPath)) {
if (deleteExisting) {
fs.delete(outputPath, false);
} else {
throw new ISE("outputPath[%s] must not exist.", outputPath);
}
}
if (FileOutputFormat.getCompressOutput(job)) {
retVal = codec.createOutputStream(fs.create(outputPath, false));
} else {
retVal = fs.create(outputPath, false);
}
return retVal;
}
use of org.apache.hadoop.io.compress.CompressionCodec in project presto by prestodb.
the class TestOrcPageSourceMemoryTracking method createTestFile.
public static FileSplit createTestFile(String filePath, HiveOutputFormat<?, ?> outputFormat, @SuppressWarnings("deprecation") SerDe serDe, String compressionCodec, List<TestColumn> testColumns, int numRows) throws Exception {
// filter out partition keys, which are not written to the file
testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey)));
Properties tableProperties = new Properties();
tableProperties.setProperty("columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName)));
tableProperties.setProperty("columns.types", Joiner.on(',').join(transform(testColumns, TestColumn::getType)));
serDe.initialize(CONFIGURATION, tableProperties);
JobConf jobConf = new JobConf();
if (compressionCodec != null) {
CompressionCodec codec = new CompressionCodecFactory(CONFIGURATION).getCodecByName(compressionCodec);
jobConf.set(COMPRESS_CODEC, codec.getClass().getName());
jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString());
}
RecordWriter recordWriter = createRecordWriter(new Path(filePath), CONFIGURATION);
try {
SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(ImmutableList.copyOf(transform(testColumns, TestColumn::getName)), ImmutableList.copyOf(transform(testColumns, TestColumn::getObjectInspector)));
Object row = objectInspector.create();
List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
for (int i = 0; i < testColumns.size(); i++) {
Object writeValue = testColumns.get(i).getWriteValue();
if (writeValue instanceof Slice) {
writeValue = ((Slice) writeValue).getBytes();
}
objectInspector.setStructFieldData(row, fields.get(i), writeValue);
}
Writable record = serDe.serialize(row, objectInspector);
recordWriter.write(record);
if (rowNumber % STRIPE_ROWS == STRIPE_ROWS - 1) {
flushStripe(recordWriter);
}
}
} finally {
recordWriter.close(false);
}
Path path = new Path(filePath);
path.getFileSystem(CONFIGURATION).setVerifyChecksum(true);
File file = new File(filePath);
return new FileSplit(path, 0, file.length(), new String[0]);
}
Aggregations