Search in sources :

Example 1 with HiveColumnHandle

use of io.prestosql.plugin.hive.HiveColumnHandle in project carbondata by apache.

the class CarbondataPageSource method getCarbonProjection.

/**
 * @param columns
 * @return
 */
private CarbonProjection getCarbonProjection(List<? extends ColumnHandle> columns) {
    CarbonProjection carbonProjection = new CarbonProjection();
    // Convert all columns handles
    ImmutableList.Builder<HiveColumnHandle> handles = ImmutableList.builder();
    for (ColumnHandle handle : columns) {
        handles.add(Types.checkType(handle, HiveColumnHandle.class, "handle"));
        carbonProjection.addColumn(((HiveColumnHandle) handle).getName());
    }
    return carbonProjection;
}
Also used : HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) CarbonProjection(org.apache.carbondata.hadoop.CarbonProjection) ImmutableList(com.google.common.collect.ImmutableList) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle)

Example 2 with HiveColumnHandle

use of io.prestosql.plugin.hive.HiveColumnHandle in project carbondata by apache.

the class PrestoFilterUtil method parseFilterExpression.

/**
 * Convert presto-TupleDomain predication into Carbon scan express condition
 *
 * @param originalConstraint presto-TupleDomain
 * @return
 */
static Expression parseFilterExpression(TupleDomain<HiveColumnHandle> originalConstraint) {
    Domain domain;
    if (originalConstraint.isNone()) {
        return null;
    }
    // final expression for the table,
    // returned by the method after combining all the column filters (colValueExpression).
    Expression finalFilters = null;
    for (HiveColumnHandle cdch : originalConstraint.getDomains().get().keySet()) {
        // Build ColumnExpression for Expression(Carbondata)
        HiveType type = cdch.getHiveType();
        DataType coltype = spi2CarbondataTypeMapper(cdch);
        Expression colExpression = new ColumnExpression(cdch.getName(), coltype);
        domain = originalConstraint.getDomains().get().get(cdch);
        checkArgument(domain.getType().isOrderable(), "Domain type must be orderable");
        List<Object> singleValues = new ArrayList<>();
        // combination of multiple rangeExpression for a single column,
        // in case of multiple range Filter on single column
        // else this is equal to rangeExpression, combined to create finalFilters
        Expression colValueExpression = null;
        for (Range range : domain.getValues().getRanges().getOrderedRanges()) {
            if (range.isSingleValue()) {
                Object value = convertDataByType(range.getLow().getValue(), type);
                singleValues.add(value);
            } else {
                // generated for each range of column i.e. lessThan, greaterThan,
                // there can be multiple ranges for a single column. combined to create colValueExpression
                Expression rangeExpression = null;
                if (!range.getLow().isLowerUnbounded()) {
                    Object value = convertDataByType(range.getLow().getValue(), type);
                    switch(range.getLow().getBound()) {
                        case ABOVE:
                            rangeExpression = new GreaterThanExpression(colExpression, new LiteralExpression(value, coltype));
                            break;
                        case EXACTLY:
                            rangeExpression = new GreaterThanEqualToExpression(colExpression, new LiteralExpression(value, coltype));
                            break;
                        case BELOW:
                            throw new IllegalArgumentException("Low marker should never use BELOW bound");
                        default:
                            throw new AssertionError("Unhandled bound: " + range.getLow().getBound());
                    }
                }
                if (!range.getHigh().isUpperUnbounded()) {
                    Expression lessThanExpression;
                    Object value = convertDataByType(range.getHigh().getValue(), type);
                    switch(range.getHigh().getBound()) {
                        case ABOVE:
                            throw new IllegalArgumentException("High marker should never use ABOVE bound");
                        case EXACTLY:
                            lessThanExpression = new LessThanEqualToExpression(colExpression, new LiteralExpression(value, coltype));
                            break;
                        case BELOW:
                            lessThanExpression = new LessThanExpression(colExpression, new LiteralExpression(value, coltype));
                            break;
                        default:
                            throw new AssertionError("Unhandled bound: " + range.getHigh().getBound());
                    }
                    rangeExpression = (rangeExpression == null ? lessThanExpression : new AndExpression(rangeExpression, lessThanExpression));
                }
                colValueExpression = (colValueExpression == null ? rangeExpression : new OrExpression(colValueExpression, rangeExpression));
            }
        }
        if (singleValues.size() == 1) {
            colValueExpression = new EqualToExpression(colExpression, new LiteralExpression(singleValues.get(0), coltype));
        } else if (singleValues.size() > 1) {
            List<Expression> exs = singleValues.stream().map((a) -> new LiteralExpression(a, coltype)).collect(toList());
            colValueExpression = new InExpression(colExpression, new ListExpression(exs));
        }
        if (colValueExpression != null) {
            finalFilters = (finalFilters == null ? colValueExpression : new AndExpression(finalFilters, colValueExpression));
        }
    }
    return finalFilters;
}
Also used : LessThanEqualToExpression(org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression) GreaterThanEqualToExpression(org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression) EqualToExpression(org.apache.carbondata.core.scan.expression.conditional.EqualToExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) InExpression(org.apache.carbondata.core.scan.expression.conditional.InExpression) ListExpression(org.apache.carbondata.core.scan.expression.conditional.ListExpression) ArrayList(java.util.ArrayList) GreaterThanExpression(org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression) Range(io.prestosql.spi.predicate.Range) LessThanEqualToExpression(org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression) OrExpression(org.apache.carbondata.core.scan.expression.logical.OrExpression) GreaterThanEqualToExpression(org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression) AndExpression(org.apache.carbondata.core.scan.expression.logical.AndExpression) LessThanEqualToExpression(org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) GreaterThanExpression(org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression) AndExpression(org.apache.carbondata.core.scan.expression.logical.AndExpression) GreaterThanEqualToExpression(org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression) Expression(org.apache.carbondata.core.scan.expression.Expression) EqualToExpression(org.apache.carbondata.core.scan.expression.conditional.EqualToExpression) ListExpression(org.apache.carbondata.core.scan.expression.conditional.ListExpression) OrExpression(org.apache.carbondata.core.scan.expression.logical.OrExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) InExpression(org.apache.carbondata.core.scan.expression.conditional.InExpression) LessThanExpression(org.apache.carbondata.core.scan.expression.conditional.LessThanExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) DataType(org.apache.carbondata.core.metadata.datatype.DataType) ArrayList(java.util.ArrayList) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) TupleDomain(io.prestosql.spi.predicate.TupleDomain) Domain(io.prestosql.spi.predicate.Domain) HiveType(io.prestosql.plugin.hive.HiveType) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) LessThanExpression(org.apache.carbondata.core.scan.expression.conditional.LessThanExpression)

Example 3 with HiveColumnHandle

use of io.prestosql.plugin.hive.HiveColumnHandle in project carbondata by apache.

the class CarbondataSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy) {
    HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
    SchemaTableName schemaTableName = hiveTableHandle.getSchemaTableName();
    carbonTableReader.setPrestoQueryId(session.getQueryId());
    // get table metadata
    SemiTransactionalHiveMetastore metastore = metastoreProvider.apply((HiveTransactionHandle) transactionHandle);
    Table table = metastore.getTable(new HiveIdentity(session), schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(schemaTableName));
    if (!table.getStorage().getStorageFormat().getInputFormat().contains("carbon")) {
        return super.getSplits(transactionHandle, session, tableHandle, splitSchedulingStrategy);
    }
    // for hive metastore, get table location from catalog table's tablePath
    String location = table.getStorage().getSerdeParameters().get("tablePath");
    if (StringUtils.isEmpty(location)) {
        // file metastore case tablePath can be null, so get from location
        location = table.getStorage().getLocation();
    }
    List<PartitionSpec> filteredPartitions = new ArrayList<>();
    if (hiveTableHandle.getPartitionColumns().size() > 0 && hiveTableHandle.getPartitions().isPresent()) {
        List<String> colNames = hiveTableHandle.getPartitionColumns().stream().map(HiveColumnHandle::getName).collect(Collectors.toList());
        for (HivePartition partition : hiveTableHandle.getPartitions().get()) {
            filteredPartitions.add(new PartitionSpec(colNames, location + CarbonCommonConstants.FILE_SEPARATOR + partition.getPartitionId()));
        }
    }
    String queryId = System.nanoTime() + "";
    QueryStatistic statistic = new QueryStatistic();
    QueryStatisticsRecorder statisticRecorder = CarbonTimeStatisticsFactory.createDriverRecorder();
    statistic.addStatistics(QueryStatisticsConstants.BLOCK_ALLOCATION, System.currentTimeMillis());
    statisticRecorder.recordStatisticsForDriver(statistic, queryId);
    statistic = new QueryStatistic();
    carbonTableReader.setQueryId(queryId);
    TupleDomain<HiveColumnHandle> predicate = hiveTableHandle.getCompactEffectivePredicate();
    Configuration configuration = this.hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session, schemaTableName.getSchemaName(), schemaTableName.getTableName()), new Path(location));
    configuration = carbonTableReader.updateS3Properties(configuration);
    for (Map.Entry<String, String> entry : table.getStorage().getSerdeParameters().entrySet()) {
        configuration.set(entry.getKey(), entry.getValue());
    }
    // set the hadoop configuration to thread local, so that FileFactory can use it.
    ThreadLocalSessionInfo.setConfigurationToCurrentThread(configuration);
    CarbonTableCacheModel cache = carbonTableReader.getCarbonCache(schemaTableName, location, configuration);
    Expression filters = PrestoFilterUtil.parseFilterExpression(predicate);
    try {
        List<CarbonLocalMultiBlockSplit> splits = carbonTableReader.getInputSplits(cache, filters, filteredPartitions, configuration);
        ImmutableList.Builder<ConnectorSplit> cSplits = ImmutableList.builder();
        long index = 0;
        for (CarbonLocalMultiBlockSplit split : splits) {
            index++;
            Properties properties = new Properties();
            for (Map.Entry<String, String> entry : table.getStorage().getSerdeParameters().entrySet()) {
                properties.setProperty(entry.getKey(), entry.getValue());
            }
            properties.setProperty("tablePath", cache.getCarbonTable().getTablePath());
            properties.setProperty("carbonSplit", split.getJsonString());
            properties.setProperty("queryId", queryId);
            properties.setProperty("index", String.valueOf(index));
            cSplits.add(new HiveSplit(schemaTableName.getSchemaName(), schemaTableName.getTableName(), schemaTableName.getTableName(), cache.getCarbonTable().getTablePath(), 0, 0, 0, 0, properties, new ArrayList<>(), getHostAddresses(split.getLocations()), OptionalInt.empty(), false, TableToPartitionMapping.empty(), Optional.empty(), false, Optional.empty()));
        }
        statisticRecorder.logStatisticsAsTableDriver();
        statistic.addStatistics(QueryStatisticsConstants.BLOCK_IDENTIFICATION, System.currentTimeMillis());
        statisticRecorder.recordStatisticsForDriver(statistic, queryId);
        statisticRecorder.logStatisticsAsTableDriver();
        return new FixedSplitSource(cSplits.build());
    } catch (Exception ex) {
        throw new RuntimeException(ex.getMessage(), ex);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) Properties(java.util.Properties) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) HdfsEnvironment(io.prestosql.plugin.hive.HdfsEnvironment) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) HiveTableHandle(io.prestosql.plugin.hive.HiveTableHandle) FixedSplitSource(io.prestosql.spi.connector.FixedSplitSource) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) QueryStatistic(org.apache.carbondata.core.stats.QueryStatistic) Path(org.apache.hadoop.fs.Path) Table(io.prestosql.plugin.hive.metastore.Table) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) PartitionSpec(org.apache.carbondata.core.indexstore.PartitionSpec) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) HiveSplit(io.prestosql.plugin.hive.HiveSplit) Expression(org.apache.carbondata.core.scan.expression.Expression) CarbonTableCacheModel(org.apache.carbondata.presto.impl.CarbonTableCacheModel) QueryStatisticsRecorder(org.apache.carbondata.core.stats.QueryStatisticsRecorder) CarbonLocalMultiBlockSplit(org.apache.carbondata.presto.impl.CarbonLocalMultiBlockSplit) Map(java.util.Map) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) HivePartition(io.prestosql.plugin.hive.HivePartition)

Aggregations

HiveColumnHandle (io.prestosql.plugin.hive.HiveColumnHandle)3 ImmutableList (com.google.common.collect.ImmutableList)2 ArrayList (java.util.ArrayList)2 Expression (org.apache.carbondata.core.scan.expression.Expression)2 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)1 HdfsEnvironment (io.prestosql.plugin.hive.HdfsEnvironment)1 HivePartition (io.prestosql.plugin.hive.HivePartition)1 HiveSplit (io.prestosql.plugin.hive.HiveSplit)1 HiveTableHandle (io.prestosql.plugin.hive.HiveTableHandle)1 HiveType (io.prestosql.plugin.hive.HiveType)1 HiveIdentity (io.prestosql.plugin.hive.authentication.HiveIdentity)1 SemiTransactionalHiveMetastore (io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore)1 Table (io.prestosql.plugin.hive.metastore.Table)1 ColumnHandle (io.prestosql.spi.connector.ColumnHandle)1 ConnectorSplit (io.prestosql.spi.connector.ConnectorSplit)1 FixedSplitSource (io.prestosql.spi.connector.FixedSplitSource)1 SchemaTableName (io.prestosql.spi.connector.SchemaTableName)1 TableNotFoundException (io.prestosql.spi.connector.TableNotFoundException)1 Domain (io.prestosql.spi.predicate.Domain)1 Range (io.prestosql.spi.predicate.Range)1