Search in sources :

Example 11 with PathSegment

use of org.apache.drill.common.expression.PathSegment in project drill by apache.

the class DrillParquetGroupConverter method createFieldConverter.

private Converter createFieldConverter(boolean skipRepeated, Type fieldType, String name, PathSegment colNextChild) {
    Converter converter;
    if (fieldType.isPrimitive()) {
        converter = getConverterForType(name, fieldType.asPrimitiveType());
    } else {
        while (colNextChild != null && !colNextChild.isNamed()) {
            colNextChild = colNextChild.getChild();
        }
        Collection<SchemaPath> columns = colNextChild == null ? Collections.emptyList() : Collections.singletonList(new SchemaPath(colNextChild.getNameSegment()));
        BaseWriter writer;
        GroupType fieldGroupType = fieldType.asGroupType();
        if (ParquetReaderUtility.isLogicalListType(fieldGroupType)) {
            writer = getWriter(name, MapWriter::list, ListWriter::list);
            converter = new DrillParquetGroupConverter(mutator, writer, fieldGroupType, columns, options, containsCorruptedDates, true, converterName);
        } else if (options.getOption(ExecConstants.PARQUET_READER_ENABLE_MAP_SUPPORT_VALIDATOR) && ParquetReaderUtility.isLogicalMapType(fieldGroupType)) {
            writer = getWriter(name, MapWriter::dict, ListWriter::dict);
            converter = new DrillParquetMapGroupConverter(mutator, (DictWriter) writer, fieldGroupType, options, containsCorruptedDates);
        } else if (fieldType.isRepetition(Repetition.REPEATED)) {
            if (skipRepeated) {
                converter = new DrillIntermediateParquetGroupConverter(mutator, baseWriter, fieldGroupType, columns, options, containsCorruptedDates, false, converterName);
            } else {
                writer = getWriter(name, (m, s) -> m.list(s).map(), l -> l.list().map());
                converter = new DrillParquetGroupConverter(mutator, writer, fieldGroupType, columns, options, containsCorruptedDates, false, converterName);
            }
        } else {
            writer = getWriter(name, MapWriter::map, ListWriter::map);
            converter = new DrillParquetGroupConverter(mutator, writer, fieldGroupType, columns, options, containsCorruptedDates, false, converterName);
        }
    }
    return converter;
}
Also used : IntervalHolder(org.apache.drill.exec.expr.holders.IntervalHolder) PrimitiveType(org.apache.parquet.schema.PrimitiveType) VarDecimalHolder(org.apache.drill.exec.expr.holders.VarDecimalHolder) SingleMapWriter(org.apache.drill.exec.vector.complex.impl.SingleMapWriter) BiFunction(java.util.function.BiFunction) VarCharHolder(org.apache.drill.exec.expr.holders.VarCharHolder) ParquetReaderUtility(org.apache.drill.exec.store.parquet.ParquetReaderUtility) OutputMutator(org.apache.drill.exec.physical.impl.OutputMutator) VarCharWriter(org.apache.drill.exec.vector.complex.writer.VarCharWriter) PathSegment(org.apache.drill.common.expression.PathSegment) TimeStampWriter(org.apache.drill.exec.vector.complex.writer.TimeStampWriter) PrimitiveConverter(org.apache.parquet.io.api.PrimitiveConverter) DateHolder(org.apache.drill.exec.expr.holders.DateHolder) DrillBuf(io.netty.buffer.DrillBuf) BigIntHolder(org.apache.drill.exec.expr.holders.BigIntHolder) VarBinaryWriter(org.apache.drill.exec.vector.complex.writer.VarBinaryWriter) BigIntWriter(org.apache.drill.exec.vector.complex.writer.BigIntWriter) AbstractRepeatedMapWriter(org.apache.drill.exec.vector.complex.impl.AbstractRepeatedMapWriter) IntWriter(org.apache.drill.exec.vector.complex.writer.IntWriter) GroupType(org.apache.parquet.schema.GroupType) GroupConverter(org.apache.parquet.io.api.GroupConverter) VarDecimalWriter(org.apache.drill.exec.vector.complex.writer.VarDecimalWriter) Collection(java.util.Collection) SchemaPath(org.apache.drill.common.expression.SchemaPath) Float4Writer(org.apache.drill.exec.vector.complex.writer.Float4Writer) TimeWriter(org.apache.drill.exec.vector.complex.writer.TimeWriter) BaseWriter(org.apache.drill.exec.vector.complex.writer.BaseWriter) MapWriter(org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter) Binary(org.apache.parquet.io.api.Binary) Longs(org.apache.drill.shaded.guava.com.google.common.primitives.Longs) List(java.util.List) DYNAMIC_STAR(org.apache.drill.common.expression.SchemaPath.DYNAMIC_STAR) Optional(java.util.Optional) Type(org.apache.parquet.schema.Type) ExecConstants(org.apache.drill.exec.ExecConstants) DateWriter(org.apache.drill.exec.vector.complex.writer.DateWriter) BitHolder(org.apache.drill.exec.expr.holders.BitHolder) OptionManager(org.apache.drill.exec.server.options.OptionManager) Ints(org.apache.drill.shaded.guava.com.google.common.primitives.Ints) BitWriter(org.apache.drill.exec.vector.complex.writer.BitWriter) Float8Writer(org.apache.drill.exec.vector.complex.writer.Float8Writer) Converter(org.apache.parquet.io.api.Converter) ListWriter(org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter) Repetition(org.apache.parquet.schema.Type.Repetition) DrillRuntimeException(org.apache.drill.common.exceptions.DrillRuntimeException) LogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation) Function(java.util.function.Function) Supplier(java.util.function.Supplier) Float8Holder(org.apache.drill.exec.expr.holders.Float8Holder) VarBinaryHolder(org.apache.drill.exec.expr.holders.VarBinaryHolder) ArrayList(java.util.ArrayList) Float4Holder(org.apache.drill.exec.expr.holders.Float4Holder) NanoTimeUtils.getDateTimeValueFromBinary(org.apache.drill.exec.store.parquet.ParquetReaderUtility.NanoTimeUtils.getDateTimeValueFromBinary) TimeHolder(org.apache.drill.exec.expr.holders.TimeHolder) IntHolder(org.apache.drill.exec.expr.holders.IntHolder) DictWriter(org.apache.drill.exec.vector.complex.writer.BaseWriter.DictWriter) Iterator(java.util.Iterator) IntervalWriter(org.apache.drill.exec.vector.complex.writer.IntervalWriter) ParquetColumnMetadata(org.apache.drill.exec.store.parquet.columnreaders.ParquetColumnMetadata) Collections(java.util.Collections) TimeStampHolder(org.apache.drill.exec.expr.holders.TimeStampHolder) DateTimeConstants(org.joda.time.DateTimeConstants) SingleMapWriter(org.apache.drill.exec.vector.complex.impl.SingleMapWriter) AbstractRepeatedMapWriter(org.apache.drill.exec.vector.complex.impl.AbstractRepeatedMapWriter) MapWriter(org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter) BaseWriter(org.apache.drill.exec.vector.complex.writer.BaseWriter) GroupType(org.apache.parquet.schema.GroupType) SchemaPath(org.apache.drill.common.expression.SchemaPath) ListWriter(org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter) PrimitiveConverter(org.apache.parquet.io.api.PrimitiveConverter) GroupConverter(org.apache.parquet.io.api.GroupConverter) Converter(org.apache.parquet.io.api.Converter)

Example 12 with PathSegment

use of org.apache.drill.common.expression.PathSegment in project drill by apache.

the class DrillParquetReader method convertSelectedColumnsToMessageType.

/**
 * Convert SchemaPaths from selectedSchemaPaths and convert to parquet type, and merge into projection schema.
 *
 * @param schema Parquet file schema
 * @param selectedSchemaPaths columns found in schema
 * @return projection schema
 */
private static MessageType convertSelectedColumnsToMessageType(MessageType schema, Set<SchemaPath> selectedSchemaPaths) {
    MessageType projection = null;
    String messageName = schema.getName();
    for (SchemaPath schemaPath : selectedSchemaPaths) {
        List<String> segments = new ArrayList<>();
        PathSegment seg = schemaPath.getRootSegment();
        do {
            segments.add(seg.getNameSegment().getPath());
        } while ((seg = seg.getChild()) != null);
        String[] pathSegments = new String[segments.size()];
        segments.toArray(pathSegments);
        Type t = getSegmentType(pathSegments, 0, schema);
        if (projection == null) {
            projection = new MessageType(messageName, t);
        } else {
            projection = projection.union(new MessageType(messageName, t));
        }
    }
    return projection;
}
Also used : GroupType(org.apache.parquet.schema.GroupType) MessageType(org.apache.parquet.schema.MessageType) Type(org.apache.parquet.schema.Type) SchemaPath(org.apache.drill.common.expression.SchemaPath) ArrayList(java.util.ArrayList) PathSegment(org.apache.drill.common.expression.PathSegment) MessageType(org.apache.parquet.schema.MessageType)

Example 13 with PathSegment

use of org.apache.drill.common.expression.PathSegment in project drill by axbaretto.

the class ProjectionSetImpl method addSegment.

private void addSegment(NameSegment rootSegment) {
    String rootKey = rootSegment.getPath().toLowerCase();
    projection.add(rootKey);
    PathSegment child = rootSegment.getChild();
    if (child == null) {
        return;
    }
    if (child.isArray()) {
        // Ignore the [x] array suffix.
        return;
    }
    ProjectionSetImpl map = mapProjections.get(rootKey);
    if (map == null) {
        map = new ProjectionSetImpl();
        mapProjections.put(rootKey, map);
    }
    map.addSegment((NameSegment) child);
}
Also used : PathSegment(org.apache.drill.common.expression.PathSegment)

Example 14 with PathSegment

use of org.apache.drill.common.expression.PathSegment in project drill by axbaretto.

the class BsonRecordReader method ensureAtLeastOneField.

public void ensureAtLeastOneField(ComplexWriter writer) {
    if (!atLeastOneWrite) {
        // if we had no columns, create one empty one so we can return some data
        // for count purposes.
        SchemaPath sp = columns.get(0);
        PathSegment root = sp.getRootSegment();
        BaseWriter.MapWriter fieldWriter = writer.rootAsMap();
        while (root.getChild() != null && !root.getChild().isArray()) {
            fieldWriter = fieldWriter.map(root.getNameSegment().getPath());
            root = root.getChild();
        }
        fieldWriter.integer(root.getNameSegment().getPath());
    }
}
Also used : BaseWriter(org.apache.drill.exec.vector.complex.writer.BaseWriter) SchemaPath(org.apache.drill.common.expression.SchemaPath) PathSegment(org.apache.drill.common.expression.PathSegment)

Example 15 with PathSegment

use of org.apache.drill.common.expression.PathSegment in project drill by axbaretto.

the class FieldIdUtil method getFieldIdIfMatches.

public static TypedFieldId getFieldIdIfMatches(ValueVector vector, TypedFieldId.Builder builder, boolean addToBreadCrumb, PathSegment seg) {
    if (vector instanceof RepeatedMapVector && seg != null && seg.isArray() && !seg.isLastPath()) {
        if (addToBreadCrumb) {
            addToBreadCrumb = false;
            builder.remainder(seg);
        }
        // skip the first array segment as there is no corresponding child vector.
        seg = seg.getChild();
        // segment.
        if (seg.isArray()) {
            return null;
        }
    }
    if (seg == null) {
        if (addToBreadCrumb) {
            builder.intermediateType(vector.getField().getType());
        }
        return builder.finalType(vector.getField().getType()).build();
    }
    if (seg.isArray()) {
        if (seg.isLastPath()) {
            MajorType type;
            if (vector instanceof AbstractContainerVector) {
                type = ((AbstractContainerVector) vector).getLastPathType();
            } else if (vector instanceof ListVector) {
                type = ((ListVector) vector).getDataVector().getField().getType();
                builder.listVector();
            } else {
                throw new UnsupportedOperationException("FieldIdUtil does not support vector of type " + vector.getField().getType());
            }
            // 
            builder.withIndex().finalType(type);
            // only set remainder when it's the only array segment.
            if (addToBreadCrumb) {
                addToBreadCrumb = false;
                builder.remainder(seg);
            }
            return builder.build();
        } else {
            if (addToBreadCrumb) {
                addToBreadCrumb = false;
                builder.remainder(seg);
            }
        }
    } else {
        if (vector instanceof ListVector) {
            return null;
        }
    }
    ValueVector v;
    if (vector instanceof AbstractContainerVector) {
        VectorWithOrdinal vord = ((AbstractContainerVector) vector).getChildVectorWithOrdinal(seg.isArray() ? null : seg.getNameSegment().getPath());
        if (vord == null) {
            return null;
        }
        v = vord.vector;
        if (addToBreadCrumb) {
            builder.intermediateType(v.getField().getType());
            builder.addId(vord.ordinal);
        }
    } else if (vector instanceof ListVector) {
        v = ((ListVector) vector).getDataVector();
    } else {
        throw new UnsupportedOperationException("FieldIdUtil does not support vector of type " + vector.getField().getType());
    }
    if (v instanceof AbstractContainerVector) {
        // we're looking for a multi path.
        AbstractContainerVector c = (AbstractContainerVector) v;
        return getFieldIdIfMatches(c, builder, addToBreadCrumb, seg.getChild());
    } else if (v instanceof ListVector) {
        ListVector list = (ListVector) v;
        return getFieldIdIfMatches(list, builder, addToBreadCrumb, seg.getChild());
    } else if (v instanceof UnionVector) {
        return getFieldIdIfMatchesUnion((UnionVector) v, builder, addToBreadCrumb, seg.getChild());
    } else {
        if (seg.isNamed()) {
            if (addToBreadCrumb) {
                builder.intermediateType(v.getField().getType());
            }
            builder.finalType(v.getField().getType());
        } else {
            builder.finalType(v.getField().getType().toBuilder().setMode(DataMode.OPTIONAL).build());
        }
        if (seg.isLastPath()) {
            return builder.build();
        } else {
            PathSegment child = seg.getChild();
            if (child.isLastPath() && child.isArray()) {
                if (addToBreadCrumb) {
                    builder.remainder(child);
                }
                builder.withIndex();
                builder.finalType(v.getField().getType().toBuilder().setMode(DataMode.OPTIONAL).build());
                return builder.build();
            } else {
                logger.warn("You tried to request a complex type inside a scalar object or path or type is wrong.");
                return null;
            }
        }
    }
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) MajorType(org.apache.drill.common.types.TypeProtos.MajorType) PathSegment(org.apache.drill.common.expression.PathSegment)

Aggregations

PathSegment (org.apache.drill.common.expression.PathSegment)32 SchemaPath (org.apache.drill.common.expression.SchemaPath)14 BaseWriter (org.apache.drill.exec.vector.complex.writer.BaseWriter)6 FieldSegment (org.ojai.FieldSegment)6 ArrayList (java.util.ArrayList)5 MajorType (org.apache.drill.common.types.TypeProtos.MajorType)5 GroupType (org.apache.parquet.schema.GroupType)5 Type (org.apache.parquet.schema.Type)5 FieldPath (org.ojai.FieldPath)5 Stack (java.util.Stack)4 RexNode (org.apache.calcite.rex.RexNode)4 MessageType (org.apache.parquet.schema.MessageType)4 BitSet (java.util.BitSet)3 ValueVector (org.apache.drill.exec.vector.ValueVector)3 NameSegment (org.apache.drill.common.expression.PathSegment.NameSegment)2 MinorType (org.apache.drill.common.types.TypeProtos.MinorType)2 TypedFieldId (org.apache.drill.exec.record.TypedFieldId)2 ColumnMetadata (org.apache.drill.exec.record.metadata.ColumnMetadata)2 ListWriter (org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter)2 MapWriter (org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter)2