Search in sources :

Example 1 with CarbonColumn

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.

the class CarbondataSplitManager method parseFilterExpression.

/**
   * Convert presto-TupleDomain predication into Carbon scan express condition
   * @param originalConstraint  presto-TupleDomain
   * @param carbonTable
   * @return
   */
public Expression parseFilterExpression(TupleDomain<ColumnHandle> originalConstraint, CarbonTable carbonTable) {
    ImmutableList.Builder<Expression> filters = ImmutableList.builder();
    Domain domain = null;
    for (ColumnHandle c : originalConstraint.getDomains().get().keySet()) {
        CarbondataColumnHandle cdch = (CarbondataColumnHandle) c;
        Type type = cdch.getColumnType();
        List<CarbonColumn> ccols = carbonTable.getCreateOrderColumn(carbonTable.getFactTableName());
        Optional<CarbonColumn> target = ccols.stream().filter(a -> a.getColName().equals(cdch.getColumnName())).findFirst();
        if (target.get() == null)
            return null;
        DataType coltype = target.get().getDataType();
        ColumnExpression colExpression = new ColumnExpression(cdch.getColumnName(), target.get().getDataType());
        //colExpression.setColIndex(cs.getSchemaOrdinal());
        colExpression.setDimension(target.get().isDimension());
        colExpression.setDimension(carbonTable.getDimensionByName(carbonTable.getFactTableName(), cdch.getColumnName()));
        colExpression.setCarbonColumn(target.get());
        domain = originalConstraint.getDomains().get().get(c);
        checkArgument(domain.getType().isOrderable(), "Domain type must be orderable");
        if (domain.getValues().isNone()) {
        }
        if (domain.getValues().isAll()) {
        }
        List<Object> singleValues = new ArrayList<>();
        List<Expression> rangeFilter = new ArrayList<>();
        for (Range range : domain.getValues().getRanges().getOrderedRanges()) {
            // Already checked
            checkState(!range.isAll());
            if (range.isSingleValue()) {
                singleValues.add(range.getLow().getValue());
            } else {
                List<String> rangeConjuncts = new ArrayList<>();
                if (!range.getLow().isLowerUnbounded()) {
                    Object value = ConvertDataByType(range.getLow().getValue(), type);
                    switch(range.getLow().getBound()) {
                        case ABOVE:
                            if (type == TimestampType.TIMESTAMP) {
                            //todo not now
                            } else {
                                GreaterThanExpression greater = new GreaterThanExpression(colExpression, new LiteralExpression(value, coltype));
                                rangeFilter.add(greater);
                            }
                            break;
                        case EXACTLY:
                            GreaterThanEqualToExpression greater = new GreaterThanEqualToExpression(colExpression, new LiteralExpression(value, coltype));
                            rangeFilter.add(greater);
                            break;
                        case BELOW:
                            throw new IllegalArgumentException("Low marker should never use BELOW bound");
                        default:
                            throw new AssertionError("Unhandled bound: " + range.getLow().getBound());
                    }
                }
                if (!range.getHigh().isUpperUnbounded()) {
                    Object value = ConvertDataByType(range.getHigh().getValue(), type);
                    switch(range.getHigh().getBound()) {
                        case ABOVE:
                            throw new IllegalArgumentException("High marker should never use ABOVE bound");
                        case EXACTLY:
                            LessThanEqualToExpression less = new LessThanEqualToExpression(colExpression, new LiteralExpression(value, coltype));
                            rangeFilter.add(less);
                            break;
                        case BELOW:
                            LessThanExpression less2 = new LessThanExpression(colExpression, new LiteralExpression(value, coltype));
                            rangeFilter.add(less2);
                            break;
                        default:
                            throw new AssertionError("Unhandled bound: " + range.getHigh().getBound());
                    }
                }
            }
        }
        if (singleValues.size() == 1) {
            Expression ex = null;
            if (coltype.equals(DataType.STRING)) {
                ex = new EqualToExpression(colExpression, new LiteralExpression(((Slice) singleValues.get(0)).toStringUtf8(), coltype));
            } else
                ex = new EqualToExpression(colExpression, new LiteralExpression(singleValues.get(0), coltype));
            filters.add(ex);
        } else if (singleValues.size() > 1) {
            ListExpression candidates = null;
            List<Expression> exs = singleValues.stream().map((a) -> {
                return new LiteralExpression(ConvertDataByType(a, type), coltype);
            }).collect(Collectors.toList());
            candidates = new ListExpression(exs);
            if (candidates != null)
                filters.add(new InExpression(colExpression, candidates));
        } else if (rangeFilter.size() > 0) {
            if (rangeFilter.size() > 1) {
                Expression finalFilters = new OrExpression(rangeFilter.get(0), rangeFilter.get(1));
                if (rangeFilter.size() > 2) {
                    for (int i = 2; i < rangeFilter.size(); i++) {
                        filters.add(new AndExpression(finalFilters, rangeFilter.get(i)));
                    }
                }
            } else if (//only have one value
            rangeFilter.size() == 1)
                filters.add(rangeFilter.get(0));
        }
    }
    Expression finalFilters;
    List<Expression> tmp = filters.build();
    if (tmp.size() > 1) {
        finalFilters = new AndExpression(tmp.get(0), tmp.get(1));
        if (tmp.size() > 2) {
            for (int i = 2; i < tmp.size(); i++) {
                finalFilters = new AndExpression(finalFilters, tmp.get(i));
            }
        }
    } else if (tmp.size() == 1)
        finalFilters = tmp.get(0);
    else
        //no filter
        return null;
    return finalFilters;
}
Also used : Slice(io.airlift.slice.Slice) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) CarbonLocalInputSplit(org.apache.carbondata.presto.impl.CarbonLocalInputSplit) ConnectorTransactionHandle(com.facebook.presto.spi.connector.ConnectorTransactionHandle) com.facebook.presto.spi.type(com.facebook.presto.spi.type) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) CarbonTableReader(org.apache.carbondata.presto.impl.CarbonTableReader) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ImmutableList(com.google.common.collect.ImmutableList) AndExpression(org.apache.carbondata.core.scan.expression.logical.AndExpression) Objects.requireNonNull(java.util.Objects.requireNonNull) CarbonTableCacheModel(org.apache.carbondata.presto.impl.CarbonTableCacheModel) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) Expression(org.apache.carbondata.core.scan.expression.Expression) CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) ConnectorSplitManager(com.facebook.presto.spi.connector.ConnectorSplitManager) DataType(org.apache.carbondata.core.metadata.datatype.DataType) Types.checkType(org.apache.carbondata.presto.Types.checkType) Range(com.facebook.presto.spi.predicate.Range) OrExpression(org.apache.carbondata.core.scan.expression.logical.OrExpression) Collectors(java.util.stream.Collectors) Preconditions.checkState(com.google.common.base.Preconditions.checkState) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) Domain(com.facebook.presto.spi.predicate.Domain) org.apache.carbondata.core.scan.expression.conditional(org.apache.carbondata.core.scan.expression.conditional) List(java.util.List) Optional(java.util.Optional) com.facebook.presto.spi(com.facebook.presto.spi) CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) OrExpression(org.apache.carbondata.core.scan.expression.logical.OrExpression) AndExpression(org.apache.carbondata.core.scan.expression.logical.AndExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) DataType(org.apache.carbondata.core.metadata.datatype.DataType) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) Range(com.facebook.presto.spi.predicate.Range) DataType(org.apache.carbondata.core.metadata.datatype.DataType) Types.checkType(org.apache.carbondata.presto.Types.checkType) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) AndExpression(org.apache.carbondata.core.scan.expression.logical.AndExpression) Expression(org.apache.carbondata.core.scan.expression.Expression) OrExpression(org.apache.carbondata.core.scan.expression.logical.OrExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) Domain(com.facebook.presto.spi.predicate.Domain)

Example 2 with CarbonColumn

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.

the class FilterUtil method updateIndexOfColumnExpression.

public static void updateIndexOfColumnExpression(Expression exp, int dimOridnalMax) {
    // if expression is null, not require to update index.
    if (exp == null) {
        return;
    }
    if (exp.getChildren() == null || exp.getChildren().size() == 0) {
        if (exp instanceof ColumnExpression) {
            ColumnExpression ce = (ColumnExpression) exp;
            CarbonColumn column = ce.getCarbonColumn();
            if (column.isDimension()) {
                ce.setColIndex(column.getOrdinal());
            } else {
                ce.setColIndex(dimOridnalMax + column.getOrdinal());
            }
        }
    } else {
        if (exp.getChildren().size() > 0) {
            List<Expression> children = exp.getChildren();
            for (int i = 0; i < children.size(); i++) {
                updateIndexOfColumnExpression(children.get(i), dimOridnalMax);
            }
        }
    }
}
Also used : CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) Expression(org.apache.carbondata.core.scan.expression.Expression) TrueExpression(org.apache.carbondata.core.scan.expression.logical.TrueExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) AndExpression(org.apache.carbondata.core.scan.expression.logical.AndExpression) ListExpression(org.apache.carbondata.core.scan.expression.conditional.ListExpression) InExpression(org.apache.carbondata.core.scan.expression.conditional.InExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression)

Example 3 with CarbonColumn

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.

the class LuceneDataMapFactoryBase method validateAndGetIndexedColumns.

/**
 * validate Lucene DataMap
 * 1. require TEXT_COLUMNS property
 * 2. TEXT_COLUMNS can't contains illegal argument(empty, blank)
 * 3. TEXT_COLUMNS can't contains duplicate same columns
 * 4. TEXT_COLUMNS should be exists in table columns
 * 5. TEXT_COLUMNS support only String DataType columns
 */
private List<String> validateAndGetIndexedColumns(DataMapSchema dataMapSchema, CarbonTable carbonTable) throws MalformedDataMapCommandException {
    String textColumnsStr = dataMapSchema.getProperties().get(TEXT_COLUMNS);
    if (textColumnsStr == null || StringUtils.isBlank(textColumnsStr)) {
        throw new MalformedDataMapCommandException("Lucene DataMap require proper TEXT_COLUMNS property.");
    }
    String[] textColumns = textColumnsStr.split(",", -1);
    for (int i = 0; i < textColumns.length; i++) {
        textColumns[i] = textColumns[i].trim().toLowerCase();
    }
    for (int i = 0; i < textColumns.length; i++) {
        if (textColumns[i].isEmpty()) {
            throw new MalformedDataMapCommandException("TEXT_COLUMNS contains illegal argument.");
        }
        for (int j = i + 1; j < textColumns.length; j++) {
            if (textColumns[i].equals(textColumns[j])) {
                throw new MalformedDataMapCommandException("TEXT_COLUMNS has duplicate columns :" + textColumns[i]);
            }
        }
    }
    List<String> textColumnList = new ArrayList<String>(textColumns.length);
    for (int i = 0; i < textColumns.length; i++) {
        CarbonColumn column = carbonTable.getColumnByName(carbonTable.getTableName(), textColumns[i]);
        if (null == column) {
            throw new MalformedDataMapCommandException("TEXT_COLUMNS: " + textColumns[i] + " does not exist in table. Please check create DataMap statement.");
        } else if (column.getDataType() != DataTypes.STRING) {
            throw new MalformedDataMapCommandException("TEXT_COLUMNS only supports String column. " + "Unsupported column: " + textColumns[i] + ", DataType: " + column.getDataType());
        }
        textColumnList.add(column.getColName());
    }
    return textColumnList;
}
Also used : CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) MalformedDataMapCommandException(org.apache.carbondata.common.exceptions.sql.MalformedDataMapCommandException) ArrayList(java.util.ArrayList)

Example 4 with CarbonColumn

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.

the class FilterUtilTest method testgetFilterListForRS.

@Test
public void testgetFilterListForRS() throws Exception {
    Expression expression = new ColumnExpression("IMEI", DataTypes.STRING);
    ColumnExpression columnExpression = new ColumnExpression("IMEI", DataTypes.STRING);
    String defaultValues = CarbonCommonConstants.MEMBER_DEFAULT_VAL;
    int defaultSurrogate = 1;
    int ordinal = 1;
    final CarbonColumn carbonColumn = new CarbonColumn(columnSchema, ordinal, -1);
    new MockUp<ColumnExpression>() {

        @Mock
        public CarbonColumn getCarbonColumn() {
            return carbonColumn;
        }
    };
    new MockUp<RowImpl>() {

        @Mock
        public Object getVal(int index) {
            return "test";
        }
    };
    assertTrue(FilterUtil.getFilterListForRS(expression, defaultValues, defaultSurrogate) instanceof ColumnFilterInfo);
}
Also used : CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) AndExpression(org.apache.carbondata.core.scan.expression.logical.AndExpression) Expression(org.apache.carbondata.core.scan.expression.Expression) ListExpression(org.apache.carbondata.core.scan.expression.conditional.ListExpression) TrueExpression(org.apache.carbondata.core.scan.expression.logical.TrueExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) InExpression(org.apache.carbondata.core.scan.expression.conditional.InExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) MockUp(mockit.MockUp) Test(org.junit.Test) AbstractDictionaryCacheTest(org.apache.carbondata.core.cache.dictionary.AbstractDictionaryCacheTest)

Example 5 with CarbonColumn

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.

the class DataLoadProcessBuilder method createConfiguration.

public static CarbonDataLoadConfiguration createConfiguration(CarbonLoadModel loadModel) {
    CarbonDataLoadConfiguration configuration = new CarbonDataLoadConfiguration();
    CarbonTable carbonTable = loadModel.getCarbonDataLoadSchema().getCarbonTable();
    AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
    configuration.setTableIdentifier(identifier);
    configuration.setSchemaUpdatedTimeStamp(carbonTable.getTableLastUpdatedTime());
    configuration.setHeader(loadModel.getCsvHeaderColumns());
    configuration.setSegmentId(loadModel.getSegmentId());
    configuration.setTaskNo(loadModel.getTaskNo());
    configuration.setDataLoadProperty(DataLoadProcessorConstants.COMPLEX_DELIMITERS, new String[] { loadModel.getComplexDelimiterLevel1(), loadModel.getComplexDelimiterLevel2() });
    configuration.setDataLoadProperty(DataLoadProcessorConstants.SERIALIZATION_NULL_FORMAT, loadModel.getSerializationNullFormat().split(",")[1]);
    configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_TIME_STAMP, loadModel.getFactTimeStamp());
    configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ENABLE, loadModel.getBadRecordsLoggerEnable().split(",")[1]);
    configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ACTION, loadModel.getBadRecordsAction().split(",")[1]);
    configuration.setDataLoadProperty(DataLoadProcessorConstants.IS_EMPTY_DATA_BAD_RECORD, loadModel.getIsEmptyDataBadRecord().split(",")[1]);
    configuration.setDataLoadProperty(DataLoadProcessorConstants.SKIP_EMPTY_LINE, loadModel.getSkipEmptyLine());
    configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_FILE_PATH, loadModel.getFactFilePath());
    configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_SORT_SCOPE, loadModel.getSortScope());
    configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_BATCH_SORT_SIZE_INMB, loadModel.getBatchSortSizeInMb());
    configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_GLOBAL_SORT_PARTITIONS, loadModel.getGlobalSortPartitions());
    configuration.setDataLoadProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORD_PATH, loadModel.getBadRecordsLocation());
    CarbonMetadata.getInstance().addCarbonTable(carbonTable);
    List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(carbonTable.getTableName());
    List<CarbonMeasure> measures = carbonTable.getMeasureByTableName(carbonTable.getTableName());
    List<DataField> dataFields = new ArrayList<>();
    List<DataField> complexDataFields = new ArrayList<>();
    // And then add complex data types and measures.
    for (CarbonColumn column : dimensions) {
        DataField dataField = new DataField(column);
        if (column.getDataType() == DataTypes.DATE) {
            dataField.setDateFormat(loadModel.getDateFormat());
        } else if (column.getDataType() == DataTypes.TIMESTAMP) {
            dataField.setTimestampFormat(loadModel.getTimestampformat());
        }
        if (column.isComplex()) {
            complexDataFields.add(dataField);
        } else {
            dataFields.add(dataField);
        }
    }
    dataFields.addAll(complexDataFields);
    for (CarbonColumn column : measures) {
        // This dummy measure is added when no measure was present. We no need to load it.
        if (!(column.getColName().equals("default_dummy_measure"))) {
            dataFields.add(new DataField(column));
        }
    }
    configuration.setDataFields(dataFields.toArray(new DataField[dataFields.size()]));
    configuration.setBucketingInfo(carbonTable.getBucketingInfo(carbonTable.getTableName()));
    // configuration for one pass load: dictionary server info
    configuration.setUseOnePass(loadModel.getUseOnePass());
    configuration.setDictionaryServerHost(loadModel.getDictionaryServerHost());
    configuration.setDictionaryServerPort(loadModel.getDictionaryServerPort());
    configuration.setDictionaryServerSecretKey(loadModel.getDictionaryServerSecretKey());
    configuration.setDictionaryEncryptServerSecure(loadModel.getDictionaryEncryptServerSecure());
    configuration.setDictionaryServiceProvider(loadModel.getDictionaryServiceProvider());
    configuration.setPreFetch(loadModel.isPreFetch());
    configuration.setNumberOfSortColumns(carbonTable.getNumberOfSortColumns());
    configuration.setNumberOfNoDictSortColumns(carbonTable.getNumberOfNoDictSortColumns());
    configuration.setDataWritePath(loadModel.getDataWritePath());
    setSortColumnInfo(carbonTable, loadModel, configuration);
    // threads per partition
    if (carbonTable.isHivePartitionTable()) {
        configuration.setWritingCoresCount((short) 1);
    }
    TableSpec tableSpec = new TableSpec(carbonTable);
    configuration.setTableSpec(tableSpec);
    return configuration;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) TableSpec(org.apache.carbondata.core.datastore.TableSpec) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) ArrayList(java.util.ArrayList) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)

Aggregations

CarbonColumn (org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn)19 ArrayList (java.util.ArrayList)8 CarbonMeasure (org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure)7 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)6 ColumnExpression (org.apache.carbondata.core.scan.expression.ColumnExpression)6 LiteralExpression (org.apache.carbondata.core.scan.expression.LiteralExpression)6 InExpression (org.apache.carbondata.core.scan.expression.conditional.InExpression)6 Expression (org.apache.carbondata.core.scan.expression.Expression)5 AndExpression (org.apache.carbondata.core.scan.expression.logical.AndExpression)5 AbstractDictionaryCacheTest (org.apache.carbondata.core.cache.dictionary.AbstractDictionaryCacheTest)4 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)4 ListExpression (org.apache.carbondata.core.scan.expression.conditional.ListExpression)4 TrueExpression (org.apache.carbondata.core.scan.expression.logical.TrueExpression)4 MockUp (mockit.MockUp)3 DataType (org.apache.carbondata.core.metadata.datatype.DataType)3 Test (org.junit.Test)3 BufferedReader (java.io.BufferedReader)2 FileReader (java.io.FileReader)2 HashSet (java.util.HashSet)2 Set (java.util.Set)2