Search in sources :

Example 46 with SchemaPath

use of org.apache.drill.common.expression.SchemaPath in project drill by apache.

the class KuduScanBatchCreator method getBatch.

@Override
public ScanBatch getBatch(FragmentContext context, KuduSubScan subScan, List<RecordBatch> children) throws ExecutionSetupException {
    Preconditions.checkArgument(children.isEmpty());
    List<RecordReader> readers = Lists.newArrayList();
    List<SchemaPath> columns = null;
    for (KuduSubScan.KuduSubScanSpec scanSpec : subScan.getTabletScanSpecList()) {
        try {
            if ((columns = subScan.getColumns()) == null) {
                columns = GroupScan.ALL_COLUMNS;
            }
            readers.add(new KuduRecordReader(subScan.getStorageEngine().getClient(), scanSpec, columns, context));
        } catch (Exception e1) {
            throw new ExecutionSetupException(e1);
        }
    }
    return new ScanBatch(subScan, context, readers.iterator());
}
Also used : ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) SchemaPath(org.apache.drill.common.expression.SchemaPath) RecordReader(org.apache.drill.exec.store.RecordReader) ScanBatch(org.apache.drill.exec.physical.impl.ScanBatch) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException)

Example 47 with SchemaPath

use of org.apache.drill.common.expression.SchemaPath in project drill by apache.

the class MongoRecordReader method transformColumns.

@Override
protected Collection<SchemaPath> transformColumns(Collection<SchemaPath> projectedColumns) {
    Set<SchemaPath> transformed = Sets.newLinkedHashSet();
    if (!isStarQuery()) {
        for (SchemaPath column : projectedColumns) {
            String fieldName = column.getRootSegment().getPath();
            transformed.add(column);
            this.fields.put(fieldName, Integer.valueOf(1));
        }
    } else {
        // Tale all the fields including the _id
        this.fields.remove(DrillMongoConstants.ID);
        transformed.add(AbstractRecordReader.STAR_COLUMN);
    }
    return transformed;
}
Also used : SchemaPath(org.apache.drill.common.expression.SchemaPath)

Example 48 with SchemaPath

use of org.apache.drill.common.expression.SchemaPath in project drill by apache.

the class ParquetGroupScan method checkForPartitionColumn.

/**
   * When reading the very first footer, any column is a potential partition column. So for the first footer, we check
   * every column to see if it is single valued, and if so, add it to the list of potential partition columns. For the
   * remaining footers, we will not find any new partition columns, but we may discover that what was previously a
   * potential partition column now no longer qualifies, so it needs to be removed from the list.
   * @return whether column is a potential partition column
   */
private boolean checkForPartitionColumn(ColumnMetadata columnMetadata, boolean first) {
    SchemaPath schemaPath = SchemaPath.getCompoundPath(columnMetadata.getName());
    final PrimitiveTypeName primitiveType;
    final OriginalType originalType;
    if (this.parquetTableMetadata.hasColumnMetadata()) {
        primitiveType = this.parquetTableMetadata.getPrimitiveType(columnMetadata.getName());
        originalType = this.parquetTableMetadata.getOriginalType(columnMetadata.getName());
    } else {
        primitiveType = columnMetadata.getPrimitiveType();
        originalType = columnMetadata.getOriginalType();
    }
    if (first) {
        if (hasSingleValue(columnMetadata)) {
            partitionColTypeMap.put(schemaPath, getType(primitiveType, originalType));
            return true;
        } else {
            return false;
        }
    } else {
        if (!partitionColTypeMap.keySet().contains(schemaPath)) {
            return false;
        } else {
            if (!hasSingleValue(columnMetadata)) {
                partitionColTypeMap.remove(schemaPath);
                return false;
            }
            if (!getType(primitiveType, originalType).equals(partitionColTypeMap.get(schemaPath))) {
                partitionColTypeMap.remove(schemaPath);
                return false;
            }
        }
    }
    return true;
}
Also used : OriginalType(org.apache.parquet.schema.OriginalType) SchemaPath(org.apache.drill.common.expression.SchemaPath) PrimitiveTypeName(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName)

Example 49 with SchemaPath

use of org.apache.drill.common.expression.SchemaPath in project drill by apache.

the class ParquetGroupScan method populatePruningVector.

public void populatePruningVector(ValueVector v, int index, SchemaPath column, String file) {
    String f = Path.getPathWithoutSchemeAndAuthority(new Path(file)).toString();
    MinorType type = getTypeForColumn(column).getMinorType();
    switch(type) {
        case INT:
            {
                NullableIntVector intVector = (NullableIntVector) v;
                Integer value = (Integer) partitionValueMap.get(f).get(column);
                intVector.getMutator().setSafe(index, value);
                return;
            }
        case SMALLINT:
            {
                NullableSmallIntVector smallIntVector = (NullableSmallIntVector) v;
                Integer value = (Integer) partitionValueMap.get(f).get(column);
                smallIntVector.getMutator().setSafe(index, value.shortValue());
                return;
            }
        case TINYINT:
            {
                NullableTinyIntVector tinyIntVector = (NullableTinyIntVector) v;
                Integer value = (Integer) partitionValueMap.get(f).get(column);
                tinyIntVector.getMutator().setSafe(index, value.byteValue());
                return;
            }
        case UINT1:
            {
                NullableUInt1Vector intVector = (NullableUInt1Vector) v;
                Integer value = (Integer) partitionValueMap.get(f).get(column);
                intVector.getMutator().setSafe(index, value.byteValue());
                return;
            }
        case UINT2:
            {
                NullableUInt2Vector intVector = (NullableUInt2Vector) v;
                Integer value = (Integer) partitionValueMap.get(f).get(column);
                intVector.getMutator().setSafe(index, (char) value.shortValue());
                return;
            }
        case UINT4:
            {
                NullableUInt4Vector intVector = (NullableUInt4Vector) v;
                Integer value = (Integer) partitionValueMap.get(f).get(column);
                intVector.getMutator().setSafe(index, value);
                return;
            }
        case BIGINT:
            {
                NullableBigIntVector bigIntVector = (NullableBigIntVector) v;
                Long value = (Long) partitionValueMap.get(f).get(column);
                bigIntVector.getMutator().setSafe(index, value);
                return;
            }
        case FLOAT4:
            {
                NullableFloat4Vector float4Vector = (NullableFloat4Vector) v;
                Float value = (Float) partitionValueMap.get(f).get(column);
                float4Vector.getMutator().setSafe(index, value);
                return;
            }
        case FLOAT8:
            {
                NullableFloat8Vector float8Vector = (NullableFloat8Vector) v;
                Double value = (Double) partitionValueMap.get(f).get(column);
                float8Vector.getMutator().setSafe(index, value);
                return;
            }
        case VARBINARY:
            {
                NullableVarBinaryVector varBinaryVector = (NullableVarBinaryVector) v;
                Object s = partitionValueMap.get(f).get(column);
                byte[] bytes;
                if (s instanceof Binary) {
                    bytes = ((Binary) s).getBytes();
                } else if (s instanceof String) {
                    bytes = ((String) s).getBytes();
                } else if (s instanceof byte[]) {
                    bytes = (byte[]) s;
                } else {
                    throw new UnsupportedOperationException("Unable to create column data for type: " + type);
                }
                varBinaryVector.getMutator().setSafe(index, bytes, 0, bytes.length);
                return;
            }
        case DECIMAL18:
            {
                NullableDecimal18Vector decimalVector = (NullableDecimal18Vector) v;
                Long value = (Long) partitionValueMap.get(f).get(column);
                decimalVector.getMutator().setSafe(index, value);
                return;
            }
        case DATE:
            {
                NullableDateVector dateVector = (NullableDateVector) v;
                Integer value = (Integer) partitionValueMap.get(f).get(column);
                dateVector.getMutator().setSafe(index, value * (long) DateTimeConstants.MILLIS_PER_DAY);
                return;
            }
        case TIME:
            {
                NullableTimeVector timeVector = (NullableTimeVector) v;
                Integer value = (Integer) partitionValueMap.get(f).get(column);
                timeVector.getMutator().setSafe(index, value);
                return;
            }
        case TIMESTAMP:
            {
                NullableTimeStampVector timeStampVector = (NullableTimeStampVector) v;
                Long value = (Long) partitionValueMap.get(f).get(column);
                timeStampVector.getMutator().setSafe(index, value);
                return;
            }
        case VARCHAR:
            {
                NullableVarCharVector varCharVector = (NullableVarCharVector) v;
                Object s = partitionValueMap.get(f).get(column);
                byte[] bytes;
                if (s instanceof String) {
                    // if the metadata was read from a JSON cache file it maybe a string type
                    bytes = ((String) s).getBytes();
                } else if (s instanceof Binary) {
                    bytes = ((Binary) s).getBytes();
                } else if (s instanceof byte[]) {
                    bytes = (byte[]) s;
                } else {
                    throw new UnsupportedOperationException("Unable to create column data for type: " + type);
                }
                varCharVector.getMutator().setSafe(index, bytes, 0, bytes.length);
                return;
            }
        default:
            throw new UnsupportedOperationException("Unsupported type: " + type);
    }
}
Also used : NullableTinyIntVector(org.apache.drill.exec.vector.NullableTinyIntVector) NullableFloat8Vector(org.apache.drill.exec.vector.NullableFloat8Vector) NullableVarBinaryVector(org.apache.drill.exec.vector.NullableVarBinaryVector) NullableUInt1Vector(org.apache.drill.exec.vector.NullableUInt1Vector) NullableIntVector(org.apache.drill.exec.vector.NullableIntVector) NullableFloat4Vector(org.apache.drill.exec.vector.NullableFloat4Vector) NullableUInt4Vector(org.apache.drill.exec.vector.NullableUInt4Vector) MinorType(org.apache.drill.common.types.TypeProtos.MinorType) NullableDecimal18Vector(org.apache.drill.exec.vector.NullableDecimal18Vector) Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ReadEntryWithPath(org.apache.drill.exec.store.dfs.ReadEntryWithPath) NullableDateVector(org.apache.drill.exec.vector.NullableDateVector) NullableTimeStampVector(org.apache.drill.exec.vector.NullableTimeStampVector) NullableSmallIntVector(org.apache.drill.exec.vector.NullableSmallIntVector) NullableUInt2Vector(org.apache.drill.exec.vector.NullableUInt2Vector) NullableVarCharVector(org.apache.drill.exec.vector.NullableVarCharVector) NullableBigIntVector(org.apache.drill.exec.vector.NullableBigIntVector) Binary(org.apache.parquet.io.api.Binary) NullableTimeVector(org.apache.drill.exec.vector.NullableTimeVector)

Example 50 with SchemaPath

use of org.apache.drill.common.expression.SchemaPath in project drill by apache.

the class ParquetGroupScan method init.

private void init(MetadataContext metaContext) throws IOException {
    if (entries.size() == 1 && parquetTableMetadata == null) {
        Path p = Path.getPathWithoutSchemeAndAuthority(new Path(entries.get(0).getPath()));
        Path metaPath = null;
        if (fs.isDirectory(p)) {
            // Using the metadata file makes sense when querying a directory; otherwise
            // if querying a single file we can look up the metadata directly from the file
            metaPath = new Path(p, Metadata.METADATA_FILENAME);
        }
        if (metaPath != null && fs.exists(metaPath)) {
            usedMetadataCache = true;
            parquetTableMetadata = Metadata.readBlockMeta(fs, metaPath.toString(), metaContext, formatConfig);
        } else {
            parquetTableMetadata = Metadata.getParquetTableMetadata(fs, p.toString(), formatConfig);
        }
    } else {
        Path p = Path.getPathWithoutSchemeAndAuthority(new Path(selectionRoot));
        Path metaPath = new Path(p, Metadata.METADATA_FILENAME);
        if (fs.isDirectory(new Path(selectionRoot)) && fs.exists(metaPath)) {
            usedMetadataCache = true;
            if (parquetTableMetadata == null) {
                parquetTableMetadata = Metadata.readBlockMeta(fs, metaPath.toString(), metaContext, formatConfig);
            }
            if (fileSet != null) {
                parquetTableMetadata = removeUnneededRowGroups(parquetTableMetadata);
            }
        } else {
            final List<FileStatus> fileStatuses = Lists.newArrayList();
            for (ReadEntryWithPath entry : entries) {
                getFiles(entry.getPath(), fileStatuses);
            }
            parquetTableMetadata = Metadata.getParquetTableMetadata(fs, fileStatuses, formatConfig);
        }
    }
    if (fileSet == null) {
        fileSet = Sets.newHashSet();
        for (ParquetFileMetadata file : parquetTableMetadata.getFiles()) {
            fileSet.add(file.getPath());
        }
    }
    Map<String, DrillbitEndpoint> hostEndpointMap = Maps.newHashMap();
    for (DrillbitEndpoint endpoint : formatPlugin.getContext().getBits()) {
        hostEndpointMap.put(endpoint.getAddress(), endpoint);
    }
    rowGroupInfos = Lists.newArrayList();
    for (ParquetFileMetadata file : parquetTableMetadata.getFiles()) {
        int rgIndex = 0;
        for (RowGroupMetadata rg : file.getRowGroups()) {
            RowGroupInfo rowGroupInfo = new RowGroupInfo(file.getPath(), rg.getStart(), rg.getLength(), rgIndex, rg.getRowCount());
            EndpointByteMap endpointByteMap = new EndpointByteMapImpl();
            for (String host : rg.getHostAffinity().keySet()) {
                if (hostEndpointMap.containsKey(host)) {
                    endpointByteMap.add(hostEndpointMap.get(host), (long) (rg.getHostAffinity().get(host) * rg.getLength()));
                }
            }
            rowGroupInfo.setEndpointByteMap(endpointByteMap);
            rgIndex++;
            rowGroupInfos.add(rowGroupInfo);
        }
    }
    this.endpointAffinities = AffinityCreator.getAffinityMap(rowGroupInfos);
    columnValueCounts = Maps.newHashMap();
    this.rowCount = 0;
    boolean first = true;
    for (ParquetFileMetadata file : parquetTableMetadata.getFiles()) {
        for (RowGroupMetadata rowGroup : file.getRowGroups()) {
            long rowCount = rowGroup.getRowCount();
            for (ColumnMetadata column : rowGroup.getColumns()) {
                SchemaPath schemaPath = SchemaPath.getCompoundPath(column.getName());
                Long previousCount = columnValueCounts.get(schemaPath);
                if (previousCount != null) {
                    if (previousCount != GroupScan.NO_COLUMN_STATS) {
                        if (column.getNulls() != null) {
                            Long newCount = rowCount - column.getNulls();
                            columnValueCounts.put(schemaPath, columnValueCounts.get(schemaPath) + newCount);
                        }
                    }
                } else {
                    if (column.getNulls() != null) {
                        Long newCount = rowCount - column.getNulls();
                        columnValueCounts.put(schemaPath, newCount);
                    } else {
                        columnValueCounts.put(schemaPath, GroupScan.NO_COLUMN_STATS);
                    }
                }
                boolean partitionColumn = checkForPartitionColumn(column, first);
                if (partitionColumn) {
                    Map<SchemaPath, Object> map = partitionValueMap.get(file.getPath());
                    if (map == null) {
                        map = Maps.newHashMap();
                        partitionValueMap.put(file.getPath(), map);
                    }
                    Object value = map.get(schemaPath);
                    Object currentValue = column.getMaxValue();
                    if (value != null) {
                        if (value != currentValue) {
                            partitionColTypeMap.remove(schemaPath);
                        }
                    } else {
                        map.put(schemaPath, currentValue);
                    }
                } else {
                    partitionColTypeMap.remove(schemaPath);
                }
            }
            this.rowCount += rowGroup.getRowCount();
            first = false;
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ReadEntryWithPath(org.apache.drill.exec.store.dfs.ReadEntryWithPath) ColumnMetadata(org.apache.drill.exec.store.parquet.Metadata.ColumnMetadata) FileStatus(org.apache.hadoop.fs.FileStatus) ParquetFileMetadata(org.apache.drill.exec.store.parquet.Metadata.ParquetFileMetadata) EndpointByteMap(org.apache.drill.exec.store.schedule.EndpointByteMap) RowGroupMetadata(org.apache.drill.exec.store.parquet.Metadata.RowGroupMetadata) DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) ReadEntryWithPath(org.apache.drill.exec.store.dfs.ReadEntryWithPath) DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) SchemaPath(org.apache.drill.common.expression.SchemaPath) EndpointByteMapImpl(org.apache.drill.exec.store.schedule.EndpointByteMapImpl)

Aggregations

SchemaPath (org.apache.drill.common.expression.SchemaPath)74 Test (org.junit.Test)23 FunctionImplementationRegistry (org.apache.drill.exec.expr.fn.FunctionImplementationRegistry)17 FragmentContext (org.apache.drill.exec.ops.FragmentContext)16 PhysicalPlan (org.apache.drill.exec.physical.PhysicalPlan)15 FragmentRoot (org.apache.drill.exec.physical.base.FragmentRoot)15 PhysicalPlanReader (org.apache.drill.exec.planner.PhysicalPlanReader)15 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)12 ExecTest (org.apache.drill.exec.ExecTest)12 BigIntVector (org.apache.drill.exec.vector.BigIntVector)9 ExecutionSetupException (org.apache.drill.common.exceptions.ExecutionSetupException)8 IntVector (org.apache.drill.exec.vector.IntVector)8 Path (org.apache.hadoop.fs.Path)8 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)7 ValueVector (org.apache.drill.exec.vector.ValueVector)7 IOException (java.io.IOException)6 ErrorCollector (org.apache.drill.common.expression.ErrorCollector)6 TypedFieldId (org.apache.drill.exec.record.TypedFieldId)6 ErrorCollectorImpl (org.apache.drill.common.expression.ErrorCollectorImpl)5 FieldReference (org.apache.drill.common.expression.FieldReference)5