use of org.apache.hadoop.hive.llap.io.encoded.SerDeEncodedDataReader.CacheWriter in project hive by apache.
the class SerDeEncodedDataReader method processOneFileSplit.
public boolean processOneFileSplit(FileSplit split, long startTime, Ref<Integer> stripeIxRef, StripeData slice) throws IOException {
LlapIoImpl.LOG.info("Processing one split {" + split.getPath() + ", " + split.getStart() + ", " + split.getLength() + "}");
if (LlapIoImpl.CACHE_LOGGER.isTraceEnabled()) {
LlapIoImpl.CACHE_LOGGER.trace("Cache data for the split is " + slice);
}
boolean[] splitIncludes = Arrays.copyOf(writerIncludes, writerIncludes.length);
boolean hasAllData = slice != null && determineSplitIncludes(slice, splitIncludes, writerIncludes);
// 3) No data is in the cache. Multiple slices, disk read and multiple cache puts.
if (hasAllData) {
// Everything comes from cache.
CacheWriter.CacheStripeData csd = null;
boolean result = processOneSlice(csd, splitIncludes, stripeIxRef.value, slice, startTime);
++stripeIxRef.value;
return result;
}
boolean result = false;
// This initializes currentFileRead.
startReadSplitFromFile(split, splitIncludes, slice);
try {
if (slice != null) {
// If we had a cache range already, we expect a single matching disk slice.
Vectors vectors = currentFileRead.readNextSlice();
if (!vectors.isSupported()) {
// Not in VRB mode - the new cache data is ready, we should use it.
CacheWriter cacheWriter = currentFileRead.getCacheWriter();
assert cacheWriter.stripes.size() == 1;
result = processOneSlice(cacheWriter.stripes.get(0), splitIncludes, stripeIxRef.value, slice, startTime);
} else {
// VRB mode - process the VRBs with cache data; the new cache data is coming later.
result = processOneSlice(vectors, splitIncludes, stripeIxRef.value, slice, startTime);
}
assert null == currentFileRead.readNextSlice();
++stripeIxRef.value;
} else {
// All the data comes from disk. The reader may have split it into multiple slices.
Vectors vectors = currentFileRead.readNextSlice();
assert vectors != null;
result = true;
if (!vectors.isSupported()) {
// Force the rest of the data thru.
while (currentFileRead.readNextSlice() != null) ;
CacheWriter cacheWriter = currentFileRead.getCacheWriter();
for (CacheWriter.CacheStripeData csd : cacheWriter.stripes) {
if (!processOneSlice(csd, splitIncludes, stripeIxRef.value, null, startTime)) {
result = false;
break;
}
++stripeIxRef.value;
}
} else {
// VRB mode - process the VRBs with cache data; the new cache data is coming later.
do {
assert vectors.isSupported();
if (!processOneSlice(vectors, splitIncludes, stripeIxRef.value, null, startTime)) {
result = false;
break;
}
++stripeIxRef.value;
} while ((vectors = currentFileRead.readNextSlice()) != null);
}
}
} finally {
cleanUpCurrentRead();
}
return result;
}
use of org.apache.hadoop.hive.llap.io.encoded.SerDeEncodedDataReader.CacheWriter in project hive by apache.
the class SerDeEncodedDataReader method startReadSplitFromFile.
public void startReadSplitFromFile(FileSplit split, boolean[] splitIncludes, StripeData slice) throws IOException {
boolean maySplitTheSplit = slice == null;
ReaderWithOffsets offsetReader = null;
@SuppressWarnings("rawtypes") RecordReader sourceReader = sourceInputFormat.getRecordReader(split, jobConf, reporter);
try {
offsetReader = createOffsetReader(sourceReader);
sourceReader = null;
} finally {
if (sourceReader != null) {
try {
sourceReader.close();
} catch (Exception ex) {
LlapIoImpl.LOG.error("Failed to close source reader", ex);
}
}
}
maySplitTheSplit = maySplitTheSplit && offsetReader.hasOffsets();
try {
StructObjectInspector originalOi = (StructObjectInspector) getOiFromSerDe();
List<Integer> splitColumnIds = OrcInputFormat.genIncludedColumnsReverse(schema, splitIncludes, false);
// fileread writes to the writer, which writes to orcWriter, which writes to cacheWriter
EncodingWriter writer = VectorDeserializeOrcWriter.create(sourceInputFormat, sourceSerDe, parts, daemonConf, jobConf, split.getPath(), originalOi, splitColumnIds, splitIncludes, allocSize);
// TODO: move this into ctor? EW would need to create CacheWriter then
List<Integer> cwColIds = writer.isOnlyWritingIncludedColumns() ? splitColumnIds : columnIds;
writer.init(new CacheWriter(bufferManager, cwColIds, splitIncludes, writer.isOnlyWritingIncludedColumns()), daemonConf, split.getPath());
if (writer instanceof VectorDeserializeOrcWriter) {
VectorDeserializeOrcWriter asyncWriter = (VectorDeserializeOrcWriter) writer;
asyncWriter.startAsync(new AsyncCacheDataCallback());
this.asyncWriters.add(asyncWriter);
}
currentFileRead = new FileReaderYieldReturn(offsetReader, split, writer, maySplitTheSplit, targetSliceRowCount);
} finally {
// Assignment is the last thing in the try, so if it happen we assume success.
if (currentFileRead != null)
return;
if (offsetReader == null)
return;
try {
offsetReader.close();
} catch (Exception ex) {
LlapIoImpl.LOG.error("Failed to close source reader", ex);
}
}
}
Aggregations