Search in sources :

Example 1 with VectorRowBytesContainer

use of org.apache.hadoop.hive.ql.exec.vector.rowbytescontainer.VectorRowBytesContainer in project hive by apache.

the class VectorPTFGroupBatches method getSpillRowBytesContainer.

private VectorRowBytesContainer getSpillRowBytesContainer() throws HiveException {
    if (spillRowBytesContainer == null) {
        spillRowBytesContainer = new VectorRowBytesContainer(spillLocalDirs);
        if (bufferedBatchVectorSerializeRow == null) {
            bufferedBatchVectorSerializeRow = new VectorSerializeRow<LazyBinarySerializeWrite>(new LazyBinarySerializeWrite(bufferedColumnMap.length));
            // Deserialize just the columns we a buffered batch, which has only the non-key inputs and
            // streamed column outputs.
            bufferedBatchVectorSerializeRow.init(bufferedTypeInfos);
            bufferedBatchVectorDeserializeRow = new VectorDeserializeRow<LazyBinaryDeserializeRead>(new LazyBinaryDeserializeRead(bufferedTypeInfos, /* useExternalBuffer */
            true));
            // Deserialize the fields into the *overflow* batch using the buffered batch column map.
            bufferedBatchVectorDeserializeRow.init(bufferedColumnMap);
        }
    }
    return spillRowBytesContainer;
}
Also used : VectorRowBytesContainer(org.apache.hadoop.hive.ql.exec.vector.rowbytescontainer.VectorRowBytesContainer) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)

Example 2 with VectorRowBytesContainer

use of org.apache.hadoop.hive.ql.exec.vector.rowbytescontainer.VectorRowBytesContainer in project hive by apache.

the class VectorMapJoinGenerateResultOperator method reProcessBigTable.

@Override
protected void reProcessBigTable(int partitionId) throws HiveException {
    if (LOG.isDebugEnabled()) {
        LOG.debug(CLASS_NAME + " reProcessBigTable enter...");
    }
    if (spillReplayBatch == null) {
        // The process method was not called -- no big table rows.
        return;
    }
    HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];
    int rowCount = 0;
    int batchCount = 0;
    try {
        VectorRowBytesContainer bigTable = partition.getMatchfileRowBytesContainer();
        bigTable.prepareForReading();
        while (bigTable.readNext()) {
            rowCount++;
            byte[] bytes = bigTable.currentBytes();
            int offset = bigTable.currentOffset();
            int length = bigTable.currentLength();
            bigTableVectorDeserializeRow.setBytes(bytes, offset, length);
            try {
                bigTableVectorDeserializeRow.deserialize(spillReplayBatch, spillReplayBatch.size);
            } catch (Exception e) {
                throw new HiveException("\nDeserializeRead detail: " + bigTableVectorDeserializeRow.getDetailedReadPositionString(), e);
            }
            spillReplayBatch.size++;
            if (spillReplayBatch.size == VectorizedRowBatch.DEFAULT_SIZE) {
                // call process once we have a full batch
                process(spillReplayBatch, posBigTable);
                spillReplayBatch.reset();
                batchCount++;
            }
        }
        // Process the row batch that has less than DEFAULT_SIZE rows
        if (spillReplayBatch.size > 0) {
            process(spillReplayBatch, posBigTable);
            spillReplayBatch.reset();
            batchCount++;
        }
        bigTable.clear();
    } catch (Exception e) {
        LOG.info(CLASS_NAME + " reProcessBigTable exception! " + e);
        throw new HiveException(e);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug(CLASS_NAME + " reProcessBigTable exit! " + rowCount + " row processed and " + batchCount + " batches processed");
    }
}
Also used : VectorRowBytesContainer(org.apache.hadoop.hive.ql.exec.vector.rowbytescontainer.VectorRowBytesContainer) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HashPartition(org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer.HashPartition) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 3 with VectorRowBytesContainer

use of org.apache.hadoop.hive.ql.exec.vector.rowbytescontainer.VectorRowBytesContainer in project hive by apache.

the class VectorPTFGroupBatches method bufferGroupBatch.

public void bufferGroupBatch(VectorizedRowBatch batch) throws HiveException {
    try {
        // When we've buffered the max allowed, spill the oldest one to make space.
        if (currentBufferedBatchCount >= spillLimitBufferedBatchCount) {
            VectorRowBytesContainer rowBytesContainer = getSpillRowBytesContainer();
            if (!didSpillToDisk) {
                // UNDONE: Don't reuse for now.
                // rowBytesContainer.resetWrite();
                didSpillToDisk = true;
                spillRowCount = 0;
            }
            // Grab the oldest in-memory buffered batch and dump it to disk.
            VectorizedRowBatch oldestBufferedBatch = bufferedBatches.remove(0);
            final boolean selectedInUse = oldestBufferedBatch.selectedInUse;
            int[] selected = oldestBufferedBatch.selected;
            final int size = oldestBufferedBatch.size;
            for (int logicalIndex = 0; logicalIndex < size; logicalIndex++) {
                final int batchIndex = (selectedInUse ? selected[logicalIndex] : logicalIndex);
                Output output = rowBytesContainer.getOuputForRowBytes();
                bufferedBatchVectorSerializeRow.setOutputAppend(output);
                bufferedBatchVectorSerializeRow.serializeWrite(oldestBufferedBatch, batchIndex);
                rowBytesContainer.finishRow();
                spillRowCount++;
            }
            // Put now available buffered batch at end.
            oldestBufferedBatch.reset();
            bufferedBatches.add(oldestBufferedBatch);
            currentBufferedBatchCount--;
        }
        final int bufferedColumnCount = bufferedColumnMap.length;
        if (allocatedBufferedBatchCount <= currentBufferedBatchCount) {
            VectorizedRowBatch newBatch = newBufferedBatch(batch);
            bufferedBatches.add(newBatch);
            allocatedBufferedBatchCount++;
        }
        VectorizedRowBatch bufferedBatch = bufferedBatches.get(currentBufferedBatchCount++);
        // Copy critical columns.
        final int size = batch.size;
        for (int i = 0; i < bufferedColumnCount; i++) {
            VectorizedBatchUtil.copyNonSelectedColumnVector(batch, bufferedColumnMap[i], bufferedBatch, i, size);
        }
        bufferedBatch.size = size;
    } catch (IOException e) {
        throw new HiveException(e);
    }
}
Also used : VectorRowBytesContainer(org.apache.hadoop.hive.ql.exec.vector.rowbytescontainer.VectorRowBytesContainer) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) IOException(java.io.IOException)

Example 4 with VectorRowBytesContainer

use of org.apache.hadoop.hive.ql.exec.vector.rowbytescontainer.VectorRowBytesContainer in project hive by apache.

the class VectorPTFGroupBatches method forwardSpilledBatches.

private void forwardSpilledBatches(VectorPTFOperator vecPTFOperator, VectorizedRowBatch lastBatch) throws HiveException {
    overflowBatch.reset();
    copyPartitionAndOrderColumnsToOverflow(lastBatch);
    long spillRowsRead = 0;
    try {
        VectorRowBytesContainer rowBytesContainer = getSpillRowBytesContainer();
        rowBytesContainer.prepareForReading();
        while (rowBytesContainer.readNext()) {
            byte[] bytes = rowBytesContainer.currentBytes();
            int offset = rowBytesContainer.currentOffset();
            int length = rowBytesContainer.currentLength();
            bufferedBatchVectorDeserializeRow.setBytes(bytes, offset, length);
            try {
                bufferedBatchVectorDeserializeRow.deserialize(overflowBatch, overflowBatch.size);
            } catch (Exception e) {
                throw new HiveException("\nDeserializeRead detail: " + bufferedBatchVectorDeserializeRow.getDetailedReadPositionString(), e);
            }
            overflowBatch.size++;
            spillRowsRead++;
            if (overflowBatch.size == VectorizedRowBatch.DEFAULT_SIZE) {
                fillGroupResults(overflowBatch);
                vecPTFOperator.forward(overflowBatch, null);
                overflowBatch.reset();
                copyPartitionAndOrderColumnsToOverflow(lastBatch);
            }
        }
        // Process the row batch that has less than DEFAULT_SIZE rows
        if (overflowBatch.size > 0) {
            fillGroupResults(overflowBatch);
            vecPTFOperator.forward(overflowBatch, null);
            overflowBatch.reset();
            copyPartitionAndOrderColumnsToOverflow(lastBatch);
        }
        Preconditions.checkState(spillRowsRead == spillRowCount);
        // For now, throw away file.
        releaseSpillRowBytesContainer();
    } catch (Exception e) {
        throw new HiveException(e);
    }
}
Also used : VectorRowBytesContainer(org.apache.hadoop.hive.ql.exec.vector.rowbytescontainer.VectorRowBytesContainer) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 5 with VectorRowBytesContainer

use of org.apache.hadoop.hive.ql.exec.vector.rowbytescontainer.VectorRowBytesContainer in project hive by apache.

the class VectorMapJoinGenerateResultOperator method spillSerializeRow.

private void spillSerializeRow(VectorizedRowBatch batch, int batchIndex, VectorMapJoinHashTableResult hashTableResult) throws IOException {
    int partitionId = hashTableResult.spillPartitionId();
    HybridHashTableContainer ht = (HybridHashTableContainer) mapJoinTables[posSingleVectorMapJoinSmallTable];
    HashPartition hp = ht.getHashPartitions()[partitionId];
    VectorRowBytesContainer rowBytesContainer = hp.getMatchfileRowBytesContainer();
    Output output = rowBytesContainer.getOuputForRowBytes();
    // int offset = output.getLength();
    bigTableVectorSerializeRow.setOutputAppend(output);
    bigTableVectorSerializeRow.serializeWrite(batch, batchIndex);
    // int length = output.getLength() - offset;
    rowBytesContainer.finishRow();
// LOG.debug("spillSerializeRow spilled batchIndex " + batchIndex + ", length " + length);
}
Also used : VectorRowBytesContainer(org.apache.hadoop.hive.ql.exec.vector.rowbytescontainer.VectorRowBytesContainer) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) HashPartition(org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer.HashPartition) HybridHashTableContainer(org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer)

Aggregations

VectorRowBytesContainer (org.apache.hadoop.hive.ql.exec.vector.rowbytescontainer.VectorRowBytesContainer)6 IOException (java.io.IOException)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3 Output (org.apache.hadoop.hive.serde2.ByteStream.Output)3 HashPartition (org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer.HashPartition)2 HybridHashTableContainer (org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer)1 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)1 RandomByteArrayStream (org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.RandomByteArrayStream)1 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)1 LazyBinaryDeserializeRead (org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)1 LazyBinarySerializeWrite (org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite)1