Search in sources :

Example 51 with MaterializedField

use of org.apache.drill.exec.record.MaterializedField in project drill by apache.

the class ParquetRecordWriter method newSchema.

private void newSchema() throws IOException {
    List<Type> types = Lists.newArrayList();
    for (MaterializedField field : batchSchema) {
        if (field.getPath().equalsIgnoreCase(WriterPrel.PARTITION_COMPARATOR_FIELD)) {
            continue;
        }
        types.add(getType(field));
    }
    schema = new MessageType("root", types);
    int initialBlockBufferSize = max(MINIMUM_BUFFER_SIZE, blockSize / this.schema.getColumns().size() / 5);
    pageStore = ColumnChunkPageWriteStoreExposer.newColumnChunkPageWriteStore(this.oContext, codecFactory.getCompressor(codec), schema);
    int initialPageBufferSize = max(MINIMUM_BUFFER_SIZE, min(pageSize + pageSize / 10, initialBlockBufferSize));
    store = new ColumnWriteStoreV1(pageStore, pageSize, initialPageBufferSize, enableDictionary, writerVersion, new ParquetDirectByteBufferAllocator(oContext));
    MessageColumnIO columnIO = new ColumnIOFactory(false).getColumnIO(this.schema);
    consumer = columnIO.getRecordWriter(store);
    setUp(schema, consumer);
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) GroupType(org.apache.parquet.schema.GroupType) MessageType(org.apache.parquet.schema.MessageType) MinorType(org.apache.drill.common.types.TypeProtos.MinorType) Type(org.apache.parquet.schema.Type) OriginalType(org.apache.parquet.schema.OriginalType) ColumnWriteStoreV1(org.apache.parquet.column.impl.ColumnWriteStoreV1) MaterializedField(org.apache.drill.exec.record.MaterializedField) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) MessageType(org.apache.parquet.schema.MessageType) ColumnIOFactory(org.apache.parquet.io.ColumnIOFactory)

Example 52 with MaterializedField

use of org.apache.drill.exec.record.MaterializedField in project drill by apache.

the class ParquetRecordWriter method getType.

private Type getType(MaterializedField field) {
    MinorType minorType = field.getType().getMinorType();
    DataMode dataMode = field.getType().getMode();
    switch(minorType) {
        case MAP:
            List<Type> types = Lists.newArrayList();
            for (MaterializedField childField : field.getChildren()) {
                types.add(getType(childField));
            }
            return new GroupType(dataMode == DataMode.REPEATED ? Repetition.REPEATED : Repetition.OPTIONAL, field.getLastName(), types);
        case LIST:
            throw new UnsupportedOperationException("Unsupported type " + minorType);
        default:
            return getPrimitiveType(field);
    }
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) GroupType(org.apache.parquet.schema.GroupType) MessageType(org.apache.parquet.schema.MessageType) MinorType(org.apache.drill.common.types.TypeProtos.MinorType) Type(org.apache.parquet.schema.Type) OriginalType(org.apache.parquet.schema.OriginalType) GroupType(org.apache.parquet.schema.GroupType) DataMode(org.apache.drill.common.types.TypeProtos.DataMode) MinorType(org.apache.drill.common.types.TypeProtos.MinorType) MaterializedField(org.apache.drill.exec.record.MaterializedField)

Example 53 with MaterializedField

use of org.apache.drill.exec.record.MaterializedField in project drill by apache.

the class VectorUtil method showVectorAccessibleContent.

public static void showVectorAccessibleContent(VectorAccessible va, int[] columnWidths) {
    int width = 0;
    int columnIndex = 0;
    List<String> columns = Lists.newArrayList();
    List<String> formats = Lists.newArrayList();
    for (VectorWrapper<?> vw : va) {
        int columnWidth = getColumnWidth(columnWidths, columnIndex);
        width += columnWidth + 2;
        formats.add("| %-" + columnWidth + "s");
        MaterializedField field = vw.getValueVector().getField();
        columns.add(field.getPath() + "<" + field.getType().getMinorType() + "(" + field.getType().getMode() + ")" + ">");
        columnIndex++;
    }
    int rows = va.getRecordCount();
    System.out.println(rows + " row(s):");
    for (int row = 0; row < rows; row++) {
        // header, every 50 rows.
        if (row % 50 == 0) {
            System.out.println(StringUtils.repeat("-", width + 1));
            columnIndex = 0;
            for (String column : columns) {
                int columnWidth = getColumnWidth(columnWidths, columnIndex);
                System.out.printf(formats.get(columnIndex), column.length() <= columnWidth ? column : column.substring(0, columnWidth - 1));
                columnIndex++;
            }
            System.out.printf("|\n");
            System.out.println(StringUtils.repeat("-", width + 1));
        }
        // column values
        columnIndex = 0;
        for (VectorWrapper<?> vw : va) {
            int columnWidth = getColumnWidth(columnWidths, columnIndex);
            Object o = vw.getValueVector().getAccessor().getObject(row);
            String cellString;
            if (o instanceof byte[]) {
                cellString = DrillStringUtils.toBinaryString((byte[]) o);
            } else {
                cellString = DrillStringUtils.escapeNewLines(String.valueOf(o));
            }
            System.out.printf(formats.get(columnIndex), cellString.length() <= columnWidth ? cellString : cellString.substring(0, columnWidth - 1));
            columnIndex++;
        }
        System.out.printf("|\n");
    }
    if (rows > 0) {
        System.out.println(StringUtils.repeat("-", width + 1));
    }
    for (VectorWrapper<?> vw : va) {
        vw.clear();
    }
}
Also used : MaterializedField(org.apache.drill.exec.record.MaterializedField)

Example 54 with MaterializedField

use of org.apache.drill.exec.record.MaterializedField in project drill by apache.

the class HBaseRecordReader method setup.

@Override
public void setup(OperatorContext context, OutputMutator output) throws ExecutionSetupException {
    this.operatorContext = context;
    this.outputMutator = output;
    familyVectorMap = new HashMap<>();
    try {
        hTable = connection.getTable(hbaseTableName);
        // when creating reader (order of first appearance in query).
        for (SchemaPath column : getColumns()) {
            if (column.equals(ROW_KEY_PATH)) {
                MaterializedField field = MaterializedField.create(column.getAsNamePart().getName(), ROW_KEY_TYPE);
                rowKeyVector = outputMutator.addField(field, VarBinaryVector.class);
            } else {
                getOrCreateFamilyVector(column.getRootSegment().getPath(), false);
            }
        }
        // Add map and child vectors for any HBase column families and/or HBase
        // columns that are requested (in order to avoid later creation of dummy
        // NullableIntVectors for them).
        final Set<Map.Entry<byte[], NavigableSet<byte[]>>> familiesEntries = hbaseScan.getFamilyMap().entrySet();
        for (Map.Entry<byte[], NavigableSet<byte[]>> familyEntry : familiesEntries) {
            final String familyName = new String(familyEntry.getKey(), StandardCharsets.UTF_8);
            final MapVector familyVector = getOrCreateFamilyVector(familyName, false);
            final Set<byte[]> children = familyEntry.getValue();
            if (null != children) {
                for (byte[] childNameBytes : children) {
                    final String childName = new String(childNameBytes, StandardCharsets.UTF_8);
                    getOrCreateColumnVector(familyVector, childName);
                }
            }
        }
        resultScanner = hTable.getScanner(hbaseScan);
    } catch (SchemaChangeException | IOException e) {
        throw new ExecutionSetupException(e);
    }
}
Also used : ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) NavigableSet(java.util.NavigableSet) MaterializedField(org.apache.drill.exec.record.MaterializedField) IOException(java.io.IOException) VarBinaryVector(org.apache.drill.exec.vector.VarBinaryVector) NullableVarBinaryVector(org.apache.drill.exec.vector.NullableVarBinaryVector) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) SchemaPath(org.apache.drill.common.expression.SchemaPath) HashMap(java.util.HashMap) Map(java.util.Map) MapVector(org.apache.drill.exec.vector.complex.MapVector)

Example 55 with MaterializedField

use of org.apache.drill.exec.record.MaterializedField in project drill by apache.

the class HBaseRecordReader method getOrCreateFamilyVector.

private MapVector getOrCreateFamilyVector(String familyName, boolean allocateOnCreate) {
    try {
        MapVector v = familyVectorMap.get(familyName);
        if (v == null) {
            SchemaPath column = SchemaPath.getSimplePath(familyName);
            MaterializedField field = MaterializedField.create(column.getAsNamePart().getName(), COLUMN_FAMILY_TYPE);
            v = outputMutator.addField(field, MapVector.class);
            if (allocateOnCreate) {
                v.allocateNew();
            }
            getColumns().add(column);
            familyVectorMap.put(familyName, v);
        }
        return v;
    } catch (SchemaChangeException e) {
        throw new DrillRuntimeException(e);
    }
}
Also used : SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) SchemaPath(org.apache.drill.common.expression.SchemaPath) MaterializedField(org.apache.drill.exec.record.MaterializedField) DrillRuntimeException(org.apache.drill.common.exceptions.DrillRuntimeException) MapVector(org.apache.drill.exec.vector.complex.MapVector)

Aggregations

MaterializedField (org.apache.drill.exec.record.MaterializedField)67 ValueVector (org.apache.drill.exec.vector.ValueVector)29 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)20 Test (org.junit.Test)18 ExecTest (org.apache.drill.exec.ExecTest)16 MajorType (org.apache.drill.common.types.TypeProtos.MajorType)13 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)11 ErrorCollector (org.apache.drill.common.expression.ErrorCollector)9 ErrorCollectorImpl (org.apache.drill.common.expression.ErrorCollectorImpl)9 TypedFieldId (org.apache.drill.exec.record.TypedFieldId)9 VectorContainer (org.apache.drill.exec.record.VectorContainer)8 IOException (java.io.IOException)7 BatchSchema (org.apache.drill.exec.record.BatchSchema)7 SchemaPath (org.apache.drill.common.expression.SchemaPath)6 NamedExpression (org.apache.drill.common.logical.data.NamedExpression)6 ValueVectorWriteExpression (org.apache.drill.exec.expr.ValueVectorWriteExpression)6 MinorType (org.apache.drill.common.types.TypeProtos.MinorType)5 NullableVarCharVector (org.apache.drill.exec.vector.NullableVarCharVector)5 DrillBuf (io.netty.buffer.DrillBuf)4 ArrayList (java.util.ArrayList)4