Examples with MapVector - org.apache.drill.exec.vector.complex.MapVector

Example 36 with MapVector

use of org.apache.drill.exec.vector.complex.MapVector in project drill by apache.

the class HBaseRecordReader method next.

@Override
public int next() {
    Stopwatch watch = Stopwatch.createStarted();
    if (rowKeyVector != null) {
        rowKeyVector.clear();
        rowKeyVector.allocateNew();
    }
    for (ValueVector v : familyVectorMap.values()) {
        v.clear();
        v.allocateNew();
    }
    int rowCount = 0;
    // if allocated memory for the first row is larger than allowed max in batch, it will be added anyway
    do {
        Result result = null;
        final OperatorStats operatorStats = operatorContext == null ? null : operatorContext.getStats();
        try {
            if (operatorStats != null) {
                operatorStats.startWait();
            }
            try {
                result = resultScanner.next();
            } finally {
                if (operatorStats != null) {
                    operatorStats.stopWait();
                }
            }
        } catch (IOException e) {
            throw new DrillRuntimeException(e);
        }
        if (result == null) {
            break;
        }
        // parse the result and populate the value vectors
        Cell[] cells = result.rawCells();
        if (rowKeyVector != null) {
            rowKeyVector.getMutator().setSafe(rowCount, cells[0].getRowArray(), cells[0].getRowOffset(), cells[0].getRowLength());
        }
        if (!rowKeyOnly) {
            for (final Cell cell : cells) {
                final int familyOffset = cell.getFamilyOffset();
                final int familyLength = cell.getFamilyLength();
                final byte[] familyArray = cell.getFamilyArray();
                final MapVector mv = getOrCreateFamilyVector(new String(familyArray, familyOffset, familyLength), true);
                final int qualifierOffset = cell.getQualifierOffset();
                final int qualifierLength = cell.getQualifierLength();
                final byte[] qualifierArray = cell.getQualifierArray();
                final NullableVarBinaryVector v = getOrCreateColumnVector(mv, new String(qualifierArray, qualifierOffset, qualifierLength));
                final int valueOffset = cell.getValueOffset();
                final int valueLength = cell.getValueLength();
                final byte[] valueArray = cell.getValueArray();
                v.getMutator().setSafe(rowCount, valueArray, valueOffset, valueLength);
            }
        }
        rowCount++;
    } while (canAddNewRow(rowCount));
    setOutputRowCount(rowCount);
    logger.debug("Took {} ms to get {} records", watch.elapsed(TimeUnit.MILLISECONDS), rowCount);
    return rowCount;
}

Also used : NullableVarBinaryVector(org.apache.drill.exec.vector.NullableVarBinaryVector) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) OperatorStats(org.apache.drill.exec.ops.OperatorStats) IOException(java.io.IOException) Result(org.apache.hadoop.hbase.client.Result) ValueVector(org.apache.drill.exec.vector.ValueVector) DrillRuntimeException(org.apache.drill.common.exceptions.DrillRuntimeException) Cell(org.apache.hadoop.hbase.Cell) MapVector(org.apache.drill.exec.vector.complex.MapVector)

Example 37 with MapVector

use of org.apache.drill.exec.vector.complex.MapVector in project drill by apache.

the class HBaseRecordReader method getOrCreateFamilyVector.

private MapVector getOrCreateFamilyVector(String familyName, boolean allocateOnCreate) {
    try {
        MapVector v = familyVectorMap.get(familyName);
        if (v == null) {
            SchemaPath column = SchemaPath.getSimplePath(familyName);
            MaterializedField field = MaterializedField.create(column.getAsNamePart().getName(), COLUMN_FAMILY_TYPE);
            v = outputMutator.addField(field, MapVector.class);
            if (allocateOnCreate) {
                v.allocateNew();
            }
            getColumns().add(column);
            familyVectorMap.put(familyName, v);
        }
        return v;
    } catch (SchemaChangeException e) {
        throw new DrillRuntimeException(e);
    }
}

Also used : SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) SchemaPath(org.apache.drill.common.expression.SchemaPath) MaterializedField(org.apache.drill.exec.record.MaterializedField) DrillRuntimeException(org.apache.drill.common.exceptions.DrillRuntimeException) MapVector(org.apache.drill.exec.vector.complex.MapVector)

Example 38 with MapVector

use of org.apache.drill.exec.vector.complex.MapVector in project drill by apache.

the class ColumnBuilder method buildSingleMap.

private ColumnState buildSingleMap(ContainerState parent, ColumnMetadata columnSchema) {
    final ProjectionFilter projFilter = parent.projection();
    final ProjResult projResult = projFilter.projection(columnSchema);
    final MapVector vector;
    final VectorState vectorState;
    if (projResult.isProjected) {
        // vectors can be cached.
        assert columnSchema.tupleSchema().isEmpty();
        vector = new MapVector(columnSchema.schema(), parent.loader().allocator(), null);
        vectorState = new MapVectorState(vector, new NullVectorState());
    } else {
        vector = null;
        vectorState = new NullVectorState();
    }
    final TupleObjectWriter mapWriter = MapWriter.buildMap(columnSchema, vector, new ArrayList<>());
    final SingleMapState mapState = new SingleMapState(parent.loader(), parent.vectorCache().childCache(columnSchema.name()), projResult.mapFilter);
    return new MapColumnState(mapState, mapWriter, vectorState, parent.isVersioned());
}

Also used : SingleMapState(org.apache.drill.exec.physical.resultSet.impl.TupleState.SingleMapState) MapVectorState(org.apache.drill.exec.physical.resultSet.impl.TupleState.MapVectorState) ProjResult(org.apache.drill.exec.physical.resultSet.impl.ProjectionFilter.ProjResult) MapColumnState(org.apache.drill.exec.physical.resultSet.impl.TupleState.MapColumnState) MapVectorState(org.apache.drill.exec.physical.resultSet.impl.TupleState.MapVectorState) RepeatedListVectorState(org.apache.drill.exec.physical.resultSet.impl.RepeatedListState.RepeatedListVectorState) UnionVectorState(org.apache.drill.exec.physical.resultSet.impl.UnionState.UnionVectorState) OffsetVectorState(org.apache.drill.exec.physical.resultSet.impl.SingleVectorState.OffsetVectorState) ListVectorState(org.apache.drill.exec.physical.resultSet.impl.ListState.ListVectorState) SimpleVectorState(org.apache.drill.exec.physical.resultSet.impl.SingleVectorState.SimpleVectorState) TupleObjectWriter(org.apache.drill.exec.vector.accessor.writer.AbstractTupleWriter.TupleObjectWriter) RepeatedMapVector(org.apache.drill.exec.vector.complex.RepeatedMapVector) MapVector(org.apache.drill.exec.vector.complex.MapVector)

Example 39 with MapVector

use of org.apache.drill.exec.vector.complex.MapVector in project drill by apache.

the class UnpivotMapsRecordBatch method buildOutputContainer.

private void buildOutputContainer() {
    dataSrcVecMap = Maps.newHashMap();
    copySrcVecMap = Maps.newHashMap();
    for (VectorWrapper<?> vw : incoming) {
        MaterializedField ds = vw.getField();
        String colName = vw.getField().getName();
        if (!mapFieldsNames.contains(colName)) {
            MajorType mt = vw.getValueVector().getField().getType();
            MaterializedField mf = MaterializedField.create(colName, mt);
            container.add(TypeHelper.getNewVector(mf, oContext.getAllocator()));
            copySrcVecMap.put(mf, vw.getValueVector());
            continue;
        }
        MapVector mapVector = (MapVector) vw.getValueVector();
        assert mapVector.getPrimitiveVectors().size() > 0;
        MajorType mt = mapVector.iterator().next().getField().getType();
        MaterializedField mf = MaterializedField.create(colName, mt);
        assert !dataSrcVecMap.containsKey(mf);
        container.add(TypeHelper.getNewVector(mf, oContext.getAllocator()));
        Map<String, ValueVector> m = Maps.newHashMap();
        dataSrcVecMap.put(mf, m);
        for (ValueVector vv : mapVector) {
            String fieldName = SchemaPath.getSimplePath(vv.getField().getName()).toString();
            if (!keyList.contains(fieldName)) {
                throw new UnsupportedOperationException("Unpivot data vector " + ds + " contains key " + fieldName + " not contained in key source!");
            }
            if (vv.getField().getType().getMinorType() == MinorType.MAP) {
                throw new UnsupportedOperationException("Unpivot of nested map is not supported!");
            }
            m.put(fieldName, vv);
        }
    }
    container.buildSchema(incoming.getSchema().getSelectionVectorMode());
}

Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) MajorType(org.apache.drill.common.types.TypeProtos.MajorType) MaterializedField(org.apache.drill.exec.record.MaterializedField) MapVector(org.apache.drill.exec.vector.complex.MapVector)

Example 40 with MapVector

use of org.apache.drill.exec.vector.complex.MapVector in project drill by apache.

the class StatisticsMergeBatch method buildOutgoingRecordBatch.

/**
 * Prepare the outgoing container. Populates the outgoing record batch data.
 * Please look at the comments above the class definition which describes the
 * incoming/outgoing batch schema
 */
private IterOutcome buildOutgoingRecordBatch() {
    for (VectorWrapper<?> vw : container) {
        String outputStatName = vw.getField().getName();
        // Populate the `schema` and `computed` fields
        if (outputStatName.equals(Statistic.SCHEMA)) {
            BigIntVector vv = (BigIntVector) vw.getValueVector();
            vv.allocateNewSafe();
            vv.getMutator().setSafe(0, schema);
        } else if (outputStatName.equals(Statistic.COMPUTED)) {
            GregorianCalendar cal = new GregorianCalendar(TimeZone.getTimeZone("UTC"));
            DateVector vv = (DateVector) vw.getValueVector();
            vv.allocateNewSafe();
            vv.getMutator().setSafe(0, cal.getTimeInMillis());
        } else {
            // Populate the rest of the merged statistics. Each statistic is a map which
            // contains <COL_NAME, STATS_VALUE> pairs
            MapVector vv = (MapVector) vw.getValueVector();
            for (MergedStatistic outputStat : mergedStatisticList) {
                if (outputStatName.equals(outputStat.getName())) {
                    outputStat.setOutput(vv);
                    vv.getMutator().setValueCount(columnsList.size());
                    break;
                }
            }
        }
    }
    // Populate the number of records (1) inside the outgoing batch.
    container.setValueCount(1);
    return IterOutcome.OK;
}

Also used : GregorianCalendar(java.util.GregorianCalendar) DateVector(org.apache.drill.exec.vector.DateVector) BigIntVector(org.apache.drill.exec.vector.BigIntVector) MapVector(org.apache.drill.exec.vector.complex.MapVector)

Aggregations

MapVector (org.apache.drill.exec.vector.complex.MapVector)40 ValueVector (org.apache.drill.exec.vector.ValueVector)21 Test (org.junit.Test)16 SubOperatorTest (org.apache.drill.test.SubOperatorTest)14 RepeatedMapVector (org.apache.drill.exec.vector.complex.RepeatedMapVector)13 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)12 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)11 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)10 MaterializedField (org.apache.drill.exec.record.MaterializedField)10 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)9 UInt4Vector (org.apache.drill.exec.vector.UInt4Vector)9 RowSetBuilder (org.apache.drill.exec.physical.rowSet.RowSetBuilder)7 ColumnSize (org.apache.drill.exec.record.RecordBatchSizer.ColumnSize)6 VectorContainer (org.apache.drill.exec.record.VectorContainer)6 VarCharVector (org.apache.drill.exec.vector.VarCharVector)6 RepeatedValueVector (org.apache.drill.exec.vector.complex.RepeatedValueVector)6 TupleWriter (org.apache.drill.exec.vector.accessor.TupleWriter)5 ArrayList (java.util.ArrayList)4 DrillRuntimeException (org.apache.drill.common.exceptions.DrillRuntimeException)4 SchemaPath (org.apache.drill.common.expression.SchemaPath)4