use of org.apache.hadoop.io.compress.CompressionCodec in project druid by druid-io.
the class Utils method openInputStream.
public static InputStream openInputStream(JobContext job, Path inputPath, final FileSystem fileSystem) throws IOException {
if (!FileOutputFormat.getCompressOutput(job)) {
return fileSystem.open(inputPath);
} else {
Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class);
CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
inputPath = new Path(inputPath + codec.getDefaultExtension());
return codec.createInputStream(fileSystem.open(inputPath));
}
}
use of org.apache.hadoop.io.compress.CompressionCodec in project DataX by alibaba.
the class UnstructuredStorageReaderUtil method readFromStream.
public static void readFromStream(InputStream inputStream, String context, Configuration readerSliceConfig, RecordSender recordSender, TaskPluginCollector taskPluginCollector) {
String compress = readerSliceConfig.getString(Key.COMPRESS, null);
if (StringUtils.isBlank(compress)) {
compress = null;
}
String encoding = readerSliceConfig.getString(Key.ENCODING, Constant.DEFAULT_ENCODING);
// handle blank encoding
if (StringUtils.isBlank(encoding)) {
encoding = Constant.DEFAULT_ENCODING;
LOG.warn(String.format("您配置的encoding为[%s], 使用默认值[%s]", encoding, Constant.DEFAULT_ENCODING));
}
List<Configuration> column = readerSliceConfig.getListConfiguration(Key.COLUMN);
// handle ["*"] -> [], null
if (null != column && 1 == column.size() && "\"*\"".equals(column.get(0).toString())) {
readerSliceConfig.set(Key.COLUMN, null);
column = null;
}
BufferedReader reader = null;
int bufferSize = readerSliceConfig.getInt(Key.BUFFER_SIZE, Constant.DEFAULT_BUFFER_SIZE);
// compress logic
try {
if (null == compress) {
reader = new BufferedReader(new InputStreamReader(inputStream, encoding), bufferSize);
} else {
// TODO compress
if ("lzo_deflate".equalsIgnoreCase(compress)) {
LzoInputStream lzoInputStream = new LzoInputStream(inputStream, new LzoDecompressor1x_safe());
reader = new BufferedReader(new InputStreamReader(lzoInputStream, encoding));
} else if ("lzo".equalsIgnoreCase(compress)) {
LzoInputStream lzopInputStream = new ExpandLzopInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(lzopInputStream, encoding));
} else if ("gzip".equalsIgnoreCase(compress)) {
CompressorInputStream compressorInputStream = new GzipCompressorInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(compressorInputStream, encoding), bufferSize);
} else if ("bzip2".equalsIgnoreCase(compress)) {
CompressorInputStream compressorInputStream = new BZip2CompressorInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(compressorInputStream, encoding), bufferSize);
} else if ("hadoop-snappy".equalsIgnoreCase(compress)) {
CompressionCodec snappyCodec = new SnappyCodec();
InputStream snappyInputStream = snappyCodec.createInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(snappyInputStream, encoding));
} else if ("framing-snappy".equalsIgnoreCase(compress)) {
InputStream snappyInputStream = new SnappyFramedInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(snappyInputStream, encoding));
} else /*else if ("xz".equalsIgnoreCase(compress)) {
CompressorInputStream compressorInputStream = new XZCompressorInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
compressorInputStream, encoding));
} else if ("ar".equalsIgnoreCase(compress)) {
ArArchiveInputStream arArchiveInputStream = new ArArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
arArchiveInputStream, encoding));
} else if ("arj".equalsIgnoreCase(compress)) {
ArjArchiveInputStream arjArchiveInputStream = new ArjArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
arjArchiveInputStream, encoding));
} else if ("cpio".equalsIgnoreCase(compress)) {
CpioArchiveInputStream cpioArchiveInputStream = new CpioArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
cpioArchiveInputStream, encoding));
} else if ("dump".equalsIgnoreCase(compress)) {
DumpArchiveInputStream dumpArchiveInputStream = new DumpArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
dumpArchiveInputStream, encoding));
} else if ("jar".equalsIgnoreCase(compress)) {
JarArchiveInputStream jarArchiveInputStream = new JarArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
jarArchiveInputStream, encoding));
} else if ("tar".equalsIgnoreCase(compress)) {
TarArchiveInputStream tarArchiveInputStream = new TarArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
tarArchiveInputStream, encoding));
}*/
if ("zip".equalsIgnoreCase(compress)) {
ZipCycleInputStream zipCycleInputStream = new ZipCycleInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(zipCycleInputStream, encoding), bufferSize);
} else {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.ILLEGAL_VALUE, String.format("仅支持 gzip, bzip2, zip, lzo, lzo_deflate, hadoop-snappy, framing-snappy" + "文件压缩格式 , 不支持您配置的文件压缩格式: [%s]", compress));
}
}
UnstructuredStorageReaderUtil.doReadFromStream(reader, context, readerSliceConfig, recordSender, taskPluginCollector);
} catch (UnsupportedEncodingException uee) {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.OPEN_FILE_WITH_CHARSET_ERROR, String.format("不支持的编码格式 : [%s]", encoding), uee);
} catch (NullPointerException e) {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.RUNTIME_EXCEPTION, "运行时错误, 请联系我们", e);
}/* catch (ArchiveException e) {
throw DataXException.asDataXException(
UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR,
String.format("压缩文件流读取错误 : [%s]", context), e);
} */
catch (IOException e) {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR, String.format("流读取错误 : [%s]", context), e);
} finally {
IOUtils.closeQuietly(reader);
}
}
use of org.apache.hadoop.io.compress.CompressionCodec in project hbase by apache.
the class TestCompressionTest method nativeCodecTest.
/**
* Verify CompressionTest.testCompression() on a native codec.
*/
private void nativeCodecTest(String codecName, String libName, String codecClassName) {
if (isCompressionAvailable(codecClassName)) {
try {
if (libName != null) {
System.loadLibrary(libName);
}
try {
Configuration conf = new Configuration();
CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(conf.getClassByName(codecClassName), conf);
DataOutputBuffer compressedDataBuffer = new DataOutputBuffer();
CompressionOutputStream deflateFilter = codec.createOutputStream(compressedDataBuffer);
byte[] data = new byte[1024];
DataOutputStream deflateOut = new DataOutputStream(new BufferedOutputStream(deflateFilter));
deflateOut.write(data, 0, data.length);
deflateOut.flush();
deflateFilter.finish();
// Codec class, codec nativelib and Hadoop nativelib with codec JNIs are present
assertTrue(CompressionTest.testCompression(codecName));
} catch (UnsatisfiedLinkError e) {
// Hadoop nativelib does not have codec JNIs.
// cannot assert the codec here because the current logic of
// CompressionTest checks only classloading, not the codec
// usage.
LOG.debug("No JNI for codec '" + codecName + "' " + e.getMessage());
} catch (Exception e) {
LOG.error(codecName, e);
}
} catch (UnsatisfiedLinkError e) {
// nativelib is not available
LOG.debug("Native lib not available: " + codecName);
assertFalse(CompressionTest.testCompression(codecName));
}
} else {
// Compression Codec class is not available
LOG.debug("Codec class not available: " + codecName);
assertFalse(CompressionTest.testCompression(codecName));
}
}
use of org.apache.hadoop.io.compress.CompressionCodec in project hive by apache.
the class Utilities method getFileExtension.
/**
* Based on compression option, output format, and configured output codec -
* get extension for output file. Text files require an extension, whereas
* others, like sequence files, do not.
* <p>
* The property <code>hive.output.file.extension</code> is used to determine
* the extension - if set, it will override other logic for choosing an
* extension.
*
* @param jc
* Job Configuration
* @param isCompressed
* Whether the output file is compressed or not
* @param hiveOutputFormat
* The output format, used to detect if the format is text
* @return the required file extension (example: .gz)
*/
public static String getFileExtension(JobConf jc, boolean isCompressed, HiveOutputFormat<?, ?> hiveOutputFormat) {
String extension = HiveConf.getVar(jc, HiveConf.ConfVars.OUTPUT_FILE_EXTENSION);
if (!StringUtils.isEmpty(extension)) {
return extension;
}
if ((hiveOutputFormat instanceof HiveIgnoreKeyTextOutputFormat) && isCompressed) {
Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(jc, DefaultCodec.class);
CompressionCodec codec = ReflectionUtil.newInstance(codecClass, jc);
return codec.getDefaultExtension();
}
return StringUtils.EMPTY;
}
use of org.apache.hadoop.io.compress.CompressionCodec in project hive by apache.
the class Utilities method createCompressedStream.
/**
* Convert an output stream to a compressed output stream based on codecs codecs in the Job
* Configuration. Caller specifies directly whether file is compressed or not
*
* @param jc
* Job Configuration
* @param out
* Output Stream to be converted into compressed output stream
* @param isCompressed
* whether the output stream needs to be compressed or not
* @return compressed output stream
*/
public static OutputStream createCompressedStream(JobConf jc, OutputStream out, boolean isCompressed) throws IOException {
if (isCompressed) {
Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(jc, DefaultCodec.class);
CompressionCodec codec = ReflectionUtil.newInstance(codecClass, jc);
return codec.createOutputStream(out);
} else {
return (out);
}
}
Aggregations