use of org.apache.hadoop.hive.llap.cache.SerDeLowLevelCacheImpl.LlapSerDeDataBuffer in project hive by apache.
the class SerDeEncodedDataReader method processOneSlice.
private boolean processOneSlice(CacheWriter.CacheStripeData diskData, boolean[] splitIncludes, int stripeIx, StripeData cacheData, long startTime) throws IOException, InterruptedException {
logProcessOneSlice(stripeIx, diskData, cacheData);
ColumnEncoding[] cacheEncodings = cacheData == null ? null : cacheData.getEncodings();
LlapSerDeDataBuffer[][][] cacheBuffers = cacheData == null ? null : cacheData.getData();
long cacheRowCount = cacheData == null ? -1L : cacheData.getRowCount();
SerDeStripeMetadata metadata = new SerDeStripeMetadata(stripeIx);
StripeData sliceToCache = null;
boolean hasAllData = diskData == null;
if (!hasAllData) {
sliceToCache = createSliceToCache(diskData, cacheData);
metadata.setEncodings(combineCacheAndWriterEncodings(cacheEncodings, diskData.encodings));
metadata.setRowCount(diskData.rowCount);
} else {
metadata.setEncodings(Lists.newArrayList(cacheEncodings));
metadata.setRowCount(cacheRowCount);
}
if (LlapIoImpl.LOG.isTraceEnabled()) {
LlapIoImpl.LOG.trace("Derived stripe metadata for this split is " + metadata);
}
consumer.setStripeMetadata(metadata);
OrcEncodedColumnBatch ecb = useObjectPools ? ECB_POOL.take() : new OrcEncodedColumnBatch();
ecb.init(fileKey, metadata.getStripeIx(), OrcEncodedColumnBatch.ALL_RGS, writerIncludes.length);
// Skip the 0th column that is the root structure.
for (int colIx = 1; colIx < writerIncludes.length; ++colIx) {
if (!writerIncludes[colIx])
continue;
ecb.initColumn(colIx, OrcEncodedColumnBatch.MAX_DATA_STREAMS);
if (!hasAllData && splitIncludes[colIx]) {
// The column has been read from disk.
List<CacheWriter.CacheStreamData> streams = diskData.colStreams.get(colIx);
LlapSerDeDataBuffer[][] newCacheDataForCol = createArrayToCache(sliceToCache, colIx, streams);
// Struct column, such as root?
if (streams == null)
continue;
Iterator<CacheWriter.CacheStreamData> iter = streams.iterator();
while (iter.hasNext()) {
CacheWriter.CacheStreamData stream = iter.next();
if (stream.isSuppressed) {
if (LlapIoImpl.LOG.isTraceEnabled()) {
LlapIoImpl.LOG.trace("Removing a suppressed stream " + stream.name);
}
iter.remove();
discardUncachedBuffers(stream.data);
continue;
}
int streamIx = setStreamDataToCache(newCacheDataForCol, stream);
ColumnStreamData cb = useObjectPools ? CSD_POOL.take() : new ColumnStreamData();
cb.incRef();
cb.setCacheBuffers(stream.data);
ecb.setStreamData(colIx, streamIx, cb);
}
} else {
processColumnCacheData(cacheBuffers, ecb, colIx);
}
}
if (processStop()) {
recordReaderTime(startTime);
return false;
}
// but for now just rely on the cache put to lock them before we send them over.
if (LlapIoImpl.CACHE_LOGGER.isTraceEnabled()) {
LlapIoImpl.CACHE_LOGGER.trace("Data to cache from the read " + sliceToCache);
}
cacheFileData(sliceToCache);
return sendEcbToConsumer(ecb, cacheData != null, diskData);
}
use of org.apache.hadoop.hive.llap.cache.SerDeLowLevelCacheImpl.LlapSerDeDataBuffer in project hive by apache.
the class SerDeEncodedDataReader method processOneSlice.
/**
* Unlike the other overload of processOneSlice, doesn't cache data.
*/
private boolean processOneSlice(Vectors diskData, boolean[] splitIncludes, int stripeIx, StripeData cacheData, long startTime) throws IOException, InterruptedException {
if (diskData == null) {
// The other overload should have been used.
throw new AssertionError();
}
// LlapIoImpl.LOG.debug("diskData " + diskData);
logProcessOneSlice(stripeIx, diskData, cacheData);
if (cacheData == null && diskData.getRowCount() == 0) {
// Nothing to process.
return true;
}
ColumnEncoding[] cacheEncodings = cacheData == null ? null : cacheData.getEncodings();
LlapSerDeDataBuffer[][][] cacheBuffers = cacheData == null ? null : cacheData.getData();
if (cacheData != null) {
// Don't validate column count - no encodings for vectors.
validateCacheAndDisk(cacheData, diskData.getRowCount(), -1, diskData);
}
SerDeStripeMetadata metadata = new SerDeStripeMetadata(stripeIx);
metadata.setEncodings(Arrays.asList(cacheEncodings == null ? new ColumnEncoding[splitIncludes.length] : cacheEncodings));
metadata.setRowCount(diskData.getRowCount());
if (LlapIoImpl.LOG.isTraceEnabled()) {
LlapIoImpl.LOG.trace("Derived stripe metadata for this split is " + metadata);
}
consumer.setStripeMetadata(metadata);
OrcEncodedColumnBatch ecb = useObjectPools ? ECB_POOL.take() : new OrcEncodedColumnBatch();
ecb.init(fileKey, metadata.getStripeIx(), OrcEncodedColumnBatch.ALL_RGS, writerIncludes.length);
int vectorsIx = 0;
for (int colIx = 0; colIx < writerIncludes.length; ++colIx) {
// Skip the 0-th column, since it won't have a vector after reading the text source.
if (colIx == 0)
continue;
if (!writerIncludes[colIx])
continue;
if (splitIncludes[colIx]) {
List<ColumnVector> vectors = diskData.getVectors(vectorsIx++);
if (LlapIoImpl.LOG.isTraceEnabled()) {
LlapIoImpl.LOG.trace("Processing vectors for column " + colIx + ": " + vectors);
}
ecb.initColumnWithVectors(colIx, vectors);
} else {
ecb.initColumn(colIx, OrcEncodedColumnBatch.MAX_DATA_STREAMS);
processColumnCacheData(cacheBuffers, ecb, colIx);
}
}
if (processStop()) {
recordReaderTime(startTime);
return false;
}
return sendEcbToConsumer(ecb, cacheData != null, null);
}
Aggregations