use of io.cdap.plugin.format.charset.fixedlength.FixedLengthCharsetTransformingCodec in project hydrator-plugins by cdapio.
the class CharsetTransformingLineRecordReader method initialize.
/**
* Initialize method from parent class, simplified for this our use case from the base class.
*
* @param genericSplit File Split
* @param context Execution context
* @throws IOException if the underlying file or decompression operations fail.
*/
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
FileSplit split = (FileSplit) genericSplit;
Configuration job = context.getConfiguration();
this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
start = split.getStart();
end = start + split.getLength();
final Path file = split.getPath();
// open the file and seek to the start of the split
final FileSystem fs = file.getFileSystem(job);
FSDataInputStream fileIn = fs.open(file);
SplittableCompressionCodec codec = new FixedLengthCharsetTransformingCodec(fixedLengthCharset);
decompressor = codec.createDecompressor();
final SplitCompressionInputStream cIn = codec.createInputStream(fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.CONTINUOUS);
in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
start = cIn.getAdjustedStart();
end = cIn.getAdjustedEnd();
filePosition = cIn;
// next() method.
if (start != 0) {
Text t = new Text();
start += in.readLine(t, 4096, Integer.MAX_VALUE);
LOG.info("Discarded line: " + t.toString());
}
this.pos = start;
}
use of io.cdap.plugin.format.charset.fixedlength.FixedLengthCharsetTransformingCodec in project hydrator-plugins by cdapio.
the class CharsetTransformingLineRecordReaderTest method before.
@Before
public void before() throws IOException {
// Set up the Compressed Split Line Reader with a buffer size of 4096 bytes.
// This ensures the buffer will consume all characters in the input stream if we allow it to.
conf = new Configuration();
conf.setInt("io.file.buffer.size", 4096);
fixedLengthCharset = FixedLengthCharset.UTF_32;
codec = new FixedLengthCharsetTransformingCodec(fixedLengthCharset);
codec.setConf(conf);
inputStream = new SeekableByteArrayInputStream(input.getBytes(fixedLengthCharset.getCharset()));
availableBytes = inputStream.available();
}
Aggregations