Search in sources :

Example 56 with SchemaPath

use of org.apache.drill.common.expression.SchemaPath in project drill by apache.

the class ParquetGroupScan method populatePruningVector.

public void populatePruningVector(ValueVector v, int index, SchemaPath column, String file) {
    String f = Path.getPathWithoutSchemeAndAuthority(new Path(file)).toString();
    MinorType type = getTypeForColumn(column).getMinorType();
    switch(type) {
        case INT:
            {
                NullableIntVector intVector = (NullableIntVector) v;
                Integer value = (Integer) partitionValueMap.get(f).get(column);
                intVector.getMutator().setSafe(index, value);
                return;
            }
        case SMALLINT:
            {
                NullableSmallIntVector smallIntVector = (NullableSmallIntVector) v;
                Integer value = (Integer) partitionValueMap.get(f).get(column);
                smallIntVector.getMutator().setSafe(index, value.shortValue());
                return;
            }
        case TINYINT:
            {
                NullableTinyIntVector tinyIntVector = (NullableTinyIntVector) v;
                Integer value = (Integer) partitionValueMap.get(f).get(column);
                tinyIntVector.getMutator().setSafe(index, value.byteValue());
                return;
            }
        case UINT1:
            {
                NullableUInt1Vector intVector = (NullableUInt1Vector) v;
                Integer value = (Integer) partitionValueMap.get(f).get(column);
                intVector.getMutator().setSafe(index, value.byteValue());
                return;
            }
        case UINT2:
            {
                NullableUInt2Vector intVector = (NullableUInt2Vector) v;
                Integer value = (Integer) partitionValueMap.get(f).get(column);
                intVector.getMutator().setSafe(index, (char) value.shortValue());
                return;
            }
        case UINT4:
            {
                NullableUInt4Vector intVector = (NullableUInt4Vector) v;
                Integer value = (Integer) partitionValueMap.get(f).get(column);
                intVector.getMutator().setSafe(index, value);
                return;
            }
        case BIGINT:
            {
                NullableBigIntVector bigIntVector = (NullableBigIntVector) v;
                Long value = (Long) partitionValueMap.get(f).get(column);
                bigIntVector.getMutator().setSafe(index, value);
                return;
            }
        case FLOAT4:
            {
                NullableFloat4Vector float4Vector = (NullableFloat4Vector) v;
                Float value = (Float) partitionValueMap.get(f).get(column);
                float4Vector.getMutator().setSafe(index, value);
                return;
            }
        case FLOAT8:
            {
                NullableFloat8Vector float8Vector = (NullableFloat8Vector) v;
                Double value = (Double) partitionValueMap.get(f).get(column);
                float8Vector.getMutator().setSafe(index, value);
                return;
            }
        case VARBINARY:
            {
                NullableVarBinaryVector varBinaryVector = (NullableVarBinaryVector) v;
                Object s = partitionValueMap.get(f).get(column);
                byte[] bytes;
                if (s instanceof Binary) {
                    bytes = ((Binary) s).getBytes();
                } else if (s instanceof String) {
                    bytes = ((String) s).getBytes();
                } else if (s instanceof byte[]) {
                    bytes = (byte[]) s;
                } else {
                    throw new UnsupportedOperationException("Unable to create column data for type: " + type);
                }
                varBinaryVector.getMutator().setSafe(index, bytes, 0, bytes.length);
                return;
            }
        case DECIMAL18:
            {
                NullableDecimal18Vector decimalVector = (NullableDecimal18Vector) v;
                Long value = (Long) partitionValueMap.get(f).get(column);
                decimalVector.getMutator().setSafe(index, value);
                return;
            }
        case DATE:
            {
                NullableDateVector dateVector = (NullableDateVector) v;
                Integer value = (Integer) partitionValueMap.get(f).get(column);
                dateVector.getMutator().setSafe(index, value * (long) DateTimeConstants.MILLIS_PER_DAY);
                return;
            }
        case TIME:
            {
                NullableTimeVector timeVector = (NullableTimeVector) v;
                Integer value = (Integer) partitionValueMap.get(f).get(column);
                timeVector.getMutator().setSafe(index, value);
                return;
            }
        case TIMESTAMP:
            {
                NullableTimeStampVector timeStampVector = (NullableTimeStampVector) v;
                Long value = (Long) partitionValueMap.get(f).get(column);
                timeStampVector.getMutator().setSafe(index, value);
                return;
            }
        case VARCHAR:
            {
                NullableVarCharVector varCharVector = (NullableVarCharVector) v;
                Object s = partitionValueMap.get(f).get(column);
                byte[] bytes;
                if (s instanceof String) {
                    // if the metadata was read from a JSON cache file it maybe a string type
                    bytes = ((String) s).getBytes();
                } else if (s instanceof Binary) {
                    bytes = ((Binary) s).getBytes();
                } else if (s instanceof byte[]) {
                    bytes = (byte[]) s;
                } else {
                    throw new UnsupportedOperationException("Unable to create column data for type: " + type);
                }
                varCharVector.getMutator().setSafe(index, bytes, 0, bytes.length);
                return;
            }
        default:
            throw new UnsupportedOperationException("Unsupported type: " + type);
    }
}
Also used : NullableTinyIntVector(org.apache.drill.exec.vector.NullableTinyIntVector) NullableFloat8Vector(org.apache.drill.exec.vector.NullableFloat8Vector) NullableVarBinaryVector(org.apache.drill.exec.vector.NullableVarBinaryVector) NullableUInt1Vector(org.apache.drill.exec.vector.NullableUInt1Vector) NullableIntVector(org.apache.drill.exec.vector.NullableIntVector) NullableFloat4Vector(org.apache.drill.exec.vector.NullableFloat4Vector) NullableUInt4Vector(org.apache.drill.exec.vector.NullableUInt4Vector) MinorType(org.apache.drill.common.types.TypeProtos.MinorType) NullableDecimal18Vector(org.apache.drill.exec.vector.NullableDecimal18Vector) Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ReadEntryWithPath(org.apache.drill.exec.store.dfs.ReadEntryWithPath) NullableDateVector(org.apache.drill.exec.vector.NullableDateVector) NullableTimeStampVector(org.apache.drill.exec.vector.NullableTimeStampVector) NullableSmallIntVector(org.apache.drill.exec.vector.NullableSmallIntVector) NullableUInt2Vector(org.apache.drill.exec.vector.NullableUInt2Vector) NullableVarCharVector(org.apache.drill.exec.vector.NullableVarCharVector) NullableBigIntVector(org.apache.drill.exec.vector.NullableBigIntVector) Binary(org.apache.parquet.io.api.Binary) NullableTimeVector(org.apache.drill.exec.vector.NullableTimeVector)

Example 57 with SchemaPath

use of org.apache.drill.common.expression.SchemaPath in project drill by apache.

the class ParquetGroupScan method init.

private void init(MetadataContext metaContext) throws IOException {
    if (entries.size() == 1 && parquetTableMetadata == null) {
        Path p = Path.getPathWithoutSchemeAndAuthority(new Path(entries.get(0).getPath()));
        Path metaPath = null;
        if (fs.isDirectory(p)) {
            // Using the metadata file makes sense when querying a directory; otherwise
            // if querying a single file we can look up the metadata directly from the file
            metaPath = new Path(p, Metadata.METADATA_FILENAME);
        }
        if (metaPath != null && fs.exists(metaPath)) {
            usedMetadataCache = true;
            parquetTableMetadata = Metadata.readBlockMeta(fs, metaPath.toString(), metaContext, formatConfig);
        } else {
            parquetTableMetadata = Metadata.getParquetTableMetadata(fs, p.toString(), formatConfig);
        }
    } else {
        Path p = Path.getPathWithoutSchemeAndAuthority(new Path(selectionRoot));
        Path metaPath = new Path(p, Metadata.METADATA_FILENAME);
        if (fs.isDirectory(new Path(selectionRoot)) && fs.exists(metaPath)) {
            usedMetadataCache = true;
            if (parquetTableMetadata == null) {
                parquetTableMetadata = Metadata.readBlockMeta(fs, metaPath.toString(), metaContext, formatConfig);
            }
            if (fileSet != null) {
                parquetTableMetadata = removeUnneededRowGroups(parquetTableMetadata);
            }
        } else {
            final List<FileStatus> fileStatuses = Lists.newArrayList();
            for (ReadEntryWithPath entry : entries) {
                getFiles(entry.getPath(), fileStatuses);
            }
            parquetTableMetadata = Metadata.getParquetTableMetadata(fs, fileStatuses, formatConfig);
        }
    }
    if (fileSet == null) {
        fileSet = Sets.newHashSet();
        for (ParquetFileMetadata file : parquetTableMetadata.getFiles()) {
            fileSet.add(file.getPath());
        }
    }
    Map<String, DrillbitEndpoint> hostEndpointMap = Maps.newHashMap();
    for (DrillbitEndpoint endpoint : formatPlugin.getContext().getBits()) {
        hostEndpointMap.put(endpoint.getAddress(), endpoint);
    }
    rowGroupInfos = Lists.newArrayList();
    for (ParquetFileMetadata file : parquetTableMetadata.getFiles()) {
        int rgIndex = 0;
        for (RowGroupMetadata rg : file.getRowGroups()) {
            RowGroupInfo rowGroupInfo = new RowGroupInfo(file.getPath(), rg.getStart(), rg.getLength(), rgIndex, rg.getRowCount());
            EndpointByteMap endpointByteMap = new EndpointByteMapImpl();
            for (String host : rg.getHostAffinity().keySet()) {
                if (hostEndpointMap.containsKey(host)) {
                    endpointByteMap.add(hostEndpointMap.get(host), (long) (rg.getHostAffinity().get(host) * rg.getLength()));
                }
            }
            rowGroupInfo.setEndpointByteMap(endpointByteMap);
            rgIndex++;
            rowGroupInfos.add(rowGroupInfo);
        }
    }
    this.endpointAffinities = AffinityCreator.getAffinityMap(rowGroupInfos);
    columnValueCounts = Maps.newHashMap();
    this.rowCount = 0;
    boolean first = true;
    for (ParquetFileMetadata file : parquetTableMetadata.getFiles()) {
        for (RowGroupMetadata rowGroup : file.getRowGroups()) {
            long rowCount = rowGroup.getRowCount();
            for (ColumnMetadata column : rowGroup.getColumns()) {
                SchemaPath schemaPath = SchemaPath.getCompoundPath(column.getName());
                Long previousCount = columnValueCounts.get(schemaPath);
                if (previousCount != null) {
                    if (previousCount != GroupScan.NO_COLUMN_STATS) {
                        if (column.getNulls() != null) {
                            Long newCount = rowCount - column.getNulls();
                            columnValueCounts.put(schemaPath, columnValueCounts.get(schemaPath) + newCount);
                        }
                    }
                } else {
                    if (column.getNulls() != null) {
                        Long newCount = rowCount - column.getNulls();
                        columnValueCounts.put(schemaPath, newCount);
                    } else {
                        columnValueCounts.put(schemaPath, GroupScan.NO_COLUMN_STATS);
                    }
                }
                boolean partitionColumn = checkForPartitionColumn(column, first);
                if (partitionColumn) {
                    Map<SchemaPath, Object> map = partitionValueMap.get(file.getPath());
                    if (map == null) {
                        map = Maps.newHashMap();
                        partitionValueMap.put(file.getPath(), map);
                    }
                    Object value = map.get(schemaPath);
                    Object currentValue = column.getMaxValue();
                    if (value != null) {
                        if (value != currentValue) {
                            partitionColTypeMap.remove(schemaPath);
                        }
                    } else {
                        map.put(schemaPath, currentValue);
                    }
                } else {
                    partitionColTypeMap.remove(schemaPath);
                }
            }
            this.rowCount += rowGroup.getRowCount();
            first = false;
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ReadEntryWithPath(org.apache.drill.exec.store.dfs.ReadEntryWithPath) ColumnMetadata(org.apache.drill.exec.store.parquet.Metadata.ColumnMetadata) FileStatus(org.apache.hadoop.fs.FileStatus) ParquetFileMetadata(org.apache.drill.exec.store.parquet.Metadata.ParquetFileMetadata) EndpointByteMap(org.apache.drill.exec.store.schedule.EndpointByteMap) RowGroupMetadata(org.apache.drill.exec.store.parquet.Metadata.RowGroupMetadata) DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) ReadEntryWithPath(org.apache.drill.exec.store.dfs.ReadEntryWithPath) DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) SchemaPath(org.apache.drill.common.expression.SchemaPath) EndpointByteMapImpl(org.apache.drill.exec.store.schedule.EndpointByteMapImpl)

Example 58 with SchemaPath

use of org.apache.drill.common.expression.SchemaPath in project drill by apache.

the class ParquetPartitionDescriptor method getIdIfValid.

@Override
public Integer getIdIfValid(String name) {
    SchemaPath schemaPath = SchemaPath.getSimplePath(name);
    int id = partitionColumns.indexOf(schemaPath);
    if (id == -1) {
        return null;
    }
    return id;
}
Also used : SchemaPath(org.apache.drill.common.expression.SchemaPath)

Example 59 with SchemaPath

use of org.apache.drill.common.expression.SchemaPath in project drill by apache.

the class TestSimpleExternalSort method validateResults.

private void validateResults(BufferAllocator allocator, List<QueryDataBatch> results) throws SchemaChangeException {
    long previousBigInt = Long.MAX_VALUE;
    int recordCount = 0;
    int batchCount = 0;
    for (QueryDataBatch b : results) {
        RecordBatchLoader loader = new RecordBatchLoader(allocator);
        if (b.getHeader().getRowCount() > 0) {
            batchCount++;
            loader.load(b.getHeader().getDef(), b.getData());
            @SuppressWarnings("resource") BigIntVector c1 = (BigIntVector) loader.getValueAccessorById(BigIntVector.class, loader.getValueVectorId(new SchemaPath("blue", ExpressionPosition.UNKNOWN)).getFieldIds()).getValueVector();
            BigIntVector.Accessor a1 = c1.getAccessor();
            for (int i = 0; i < c1.getAccessor().getValueCount(); i++) {
                recordCount++;
                assertTrue(String.format("%d > %d", previousBigInt, a1.get(i)), previousBigInt >= a1.get(i));
                previousBigInt = a1.get(i);
            }
        }
        loader.clear();
        b.release();
    }
    System.out.println(String.format("Sorted %,d records in %d batches.", recordCount, batchCount));
}
Also used : QueryDataBatch(org.apache.drill.exec.rpc.user.QueryDataBatch) SchemaPath(org.apache.drill.common.expression.SchemaPath) RecordBatchLoader(org.apache.drill.exec.record.RecordBatchLoader) BigIntVector(org.apache.drill.exec.vector.BigIntVector)

Example 60 with SchemaPath

use of org.apache.drill.common.expression.SchemaPath in project drill by apache.

the class ExpressionTreeMaterializerTest method testMaterializingLateboundTreeValidated.

@Test
public void testMaterializingLateboundTreeValidated(@Injectable final RecordBatch batch) throws SchemaChangeException {
    ErrorCollector ec = new ErrorCollector() {

        int errorCount = 0;

        @Override
        public void addGeneralError(ExpressionPosition expr, String s) {
            errorCount++;
        }

        @Override
        public void addUnexpectedArgumentType(ExpressionPosition expr, String name, MajorType actual, MajorType[] expected, int argumentIndex) {
            errorCount++;
        }

        @Override
        public void addUnexpectedArgumentCount(ExpressionPosition expr, int actual, Range<Integer> expected) {
            errorCount++;
        }

        @Override
        public void addUnexpectedArgumentCount(ExpressionPosition expr, int actual, int expected) {
            errorCount++;
        }

        @Override
        public void addNonNumericType(ExpressionPosition expr, MajorType actual) {
            errorCount++;
        }

        @Override
        public void addUnexpectedType(ExpressionPosition expr, int index, MajorType actual) {
            errorCount++;
        }

        @Override
        public void addExpectedConstantValue(ExpressionPosition expr, int actual, String s) {
            errorCount++;
        }

        @Override
        public boolean hasErrors() {
            return errorCount > 0;
        }

        @Override
        public String toErrorString() {
            return String.format("Found %s errors.", errorCount);
        }

        @Override
        public int getErrorCount() {
            return errorCount;
        }
    };
    new NonStrictExpectations() {

        {
            batch.getValueVectorId(new SchemaPath("test", ExpressionPosition.UNKNOWN));
            result = new TypedFieldId(Types.required(MinorType.BIGINT), -5);
        }
    };
    new MockUp<RemoteFunctionRegistry>() {

        @Mock
        long getRegistryVersion() {
            return 0L;
        }
    };
    LogicalExpression functionCallExpr = new FunctionCall("testFunc", ImmutableList.of((LogicalExpression) new FieldReference("test", ExpressionPosition.UNKNOWN)), ExpressionPosition.UNKNOWN);
    LogicalExpression newExpr = ExpressionTreeMaterializer.materialize(functionCallExpr, batch, ec, registry);
    assertTrue(newExpr instanceof TypedNullConstant);
    assertEquals(1, ec.getErrorCount());
    System.out.println(ec.toErrorString());
}
Also used : FieldReference(org.apache.drill.common.expression.FieldReference) MajorType(org.apache.drill.common.types.TypeProtos.MajorType) ErrorCollector(org.apache.drill.common.expression.ErrorCollector) MockUp(mockit.MockUp) TypedNullConstant(org.apache.drill.common.expression.TypedNullConstant) Range(com.google.common.collect.Range) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) SchemaPath(org.apache.drill.common.expression.SchemaPath) FunctionCall(org.apache.drill.common.expression.FunctionCall) ExpressionPosition(org.apache.drill.common.expression.ExpressionPosition) NonStrictExpectations(mockit.NonStrictExpectations) ExecTest(org.apache.drill.exec.ExecTest) Test(org.junit.Test)

Aggregations

SchemaPath (org.apache.drill.common.expression.SchemaPath)74 Test (org.junit.Test)23 FunctionImplementationRegistry (org.apache.drill.exec.expr.fn.FunctionImplementationRegistry)17 FragmentContext (org.apache.drill.exec.ops.FragmentContext)16 PhysicalPlan (org.apache.drill.exec.physical.PhysicalPlan)15 FragmentRoot (org.apache.drill.exec.physical.base.FragmentRoot)15 PhysicalPlanReader (org.apache.drill.exec.planner.PhysicalPlanReader)15 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)12 ExecTest (org.apache.drill.exec.ExecTest)12 BigIntVector (org.apache.drill.exec.vector.BigIntVector)9 ExecutionSetupException (org.apache.drill.common.exceptions.ExecutionSetupException)8 IntVector (org.apache.drill.exec.vector.IntVector)8 Path (org.apache.hadoop.fs.Path)8 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)7 ValueVector (org.apache.drill.exec.vector.ValueVector)7 IOException (java.io.IOException)6 ErrorCollector (org.apache.drill.common.expression.ErrorCollector)6 TypedFieldId (org.apache.drill.exec.record.TypedFieldId)6 ErrorCollectorImpl (org.apache.drill.common.expression.ErrorCollectorImpl)5 FieldReference (org.apache.drill.common.expression.FieldReference)5