use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.
the class CarbondataSplitManager method parseFilterExpression.
/**
* Convert presto-TupleDomain predication into Carbon scan express condition
* @param originalConstraint presto-TupleDomain
* @param carbonTable
* @return
*/
public Expression parseFilterExpression(TupleDomain<ColumnHandle> originalConstraint, CarbonTable carbonTable) {
ImmutableList.Builder<Expression> filters = ImmutableList.builder();
Domain domain = null;
for (ColumnHandle c : originalConstraint.getDomains().get().keySet()) {
CarbondataColumnHandle cdch = (CarbondataColumnHandle) c;
Type type = cdch.getColumnType();
List<CarbonColumn> ccols = carbonTable.getCreateOrderColumn(carbonTable.getFactTableName());
Optional<CarbonColumn> target = ccols.stream().filter(a -> a.getColName().equals(cdch.getColumnName())).findFirst();
if (target.get() == null)
return null;
DataType coltype = target.get().getDataType();
ColumnExpression colExpression = new ColumnExpression(cdch.getColumnName(), target.get().getDataType());
//colExpression.setColIndex(cs.getSchemaOrdinal());
colExpression.setDimension(target.get().isDimension());
colExpression.setDimension(carbonTable.getDimensionByName(carbonTable.getFactTableName(), cdch.getColumnName()));
colExpression.setCarbonColumn(target.get());
domain = originalConstraint.getDomains().get().get(c);
checkArgument(domain.getType().isOrderable(), "Domain type must be orderable");
if (domain.getValues().isNone()) {
}
if (domain.getValues().isAll()) {
}
List<Object> singleValues = new ArrayList<>();
List<Expression> rangeFilter = new ArrayList<>();
for (Range range : domain.getValues().getRanges().getOrderedRanges()) {
// Already checked
checkState(!range.isAll());
if (range.isSingleValue()) {
singleValues.add(range.getLow().getValue());
} else {
List<String> rangeConjuncts = new ArrayList<>();
if (!range.getLow().isLowerUnbounded()) {
Object value = ConvertDataByType(range.getLow().getValue(), type);
switch(range.getLow().getBound()) {
case ABOVE:
if (type == TimestampType.TIMESTAMP) {
//todo not now
} else {
GreaterThanExpression greater = new GreaterThanExpression(colExpression, new LiteralExpression(value, coltype));
rangeFilter.add(greater);
}
break;
case EXACTLY:
GreaterThanEqualToExpression greater = new GreaterThanEqualToExpression(colExpression, new LiteralExpression(value, coltype));
rangeFilter.add(greater);
break;
case BELOW:
throw new IllegalArgumentException("Low marker should never use BELOW bound");
default:
throw new AssertionError("Unhandled bound: " + range.getLow().getBound());
}
}
if (!range.getHigh().isUpperUnbounded()) {
Object value = ConvertDataByType(range.getHigh().getValue(), type);
switch(range.getHigh().getBound()) {
case ABOVE:
throw new IllegalArgumentException("High marker should never use ABOVE bound");
case EXACTLY:
LessThanEqualToExpression less = new LessThanEqualToExpression(colExpression, new LiteralExpression(value, coltype));
rangeFilter.add(less);
break;
case BELOW:
LessThanExpression less2 = new LessThanExpression(colExpression, new LiteralExpression(value, coltype));
rangeFilter.add(less2);
break;
default:
throw new AssertionError("Unhandled bound: " + range.getHigh().getBound());
}
}
}
}
if (singleValues.size() == 1) {
Expression ex = null;
if (coltype.equals(DataType.STRING)) {
ex = new EqualToExpression(colExpression, new LiteralExpression(((Slice) singleValues.get(0)).toStringUtf8(), coltype));
} else
ex = new EqualToExpression(colExpression, new LiteralExpression(singleValues.get(0), coltype));
filters.add(ex);
} else if (singleValues.size() > 1) {
ListExpression candidates = null;
List<Expression> exs = singleValues.stream().map((a) -> {
return new LiteralExpression(ConvertDataByType(a, type), coltype);
}).collect(Collectors.toList());
candidates = new ListExpression(exs);
if (candidates != null)
filters.add(new InExpression(colExpression, candidates));
} else if (rangeFilter.size() > 0) {
if (rangeFilter.size() > 1) {
Expression finalFilters = new OrExpression(rangeFilter.get(0), rangeFilter.get(1));
if (rangeFilter.size() > 2) {
for (int i = 2; i < rangeFilter.size(); i++) {
filters.add(new AndExpression(finalFilters, rangeFilter.get(i)));
}
}
} else if (//only have one value
rangeFilter.size() == 1)
filters.add(rangeFilter.get(0));
}
}
Expression finalFilters;
List<Expression> tmp = filters.build();
if (tmp.size() > 1) {
finalFilters = new AndExpression(tmp.get(0), tmp.get(1));
if (tmp.size() > 2) {
for (int i = 2; i < tmp.size(); i++) {
finalFilters = new AndExpression(finalFilters, tmp.get(i));
}
}
} else if (tmp.size() == 1)
finalFilters = tmp.get(0);
else
//no filter
return null;
return finalFilters;
}
use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.
the class FilterUtil method updateIndexOfColumnExpression.
public static void updateIndexOfColumnExpression(Expression exp, int dimOridnalMax) {
// if expression is null, not require to update index.
if (exp == null) {
return;
}
if (exp.getChildren() == null || exp.getChildren().size() == 0) {
if (exp instanceof ColumnExpression) {
ColumnExpression ce = (ColumnExpression) exp;
CarbonColumn column = ce.getCarbonColumn();
if (column.isDimension()) {
ce.setColIndex(column.getOrdinal());
} else {
ce.setColIndex(dimOridnalMax + column.getOrdinal());
}
}
} else {
if (exp.getChildren().size() > 0) {
List<Expression> children = exp.getChildren();
for (int i = 0; i < children.size(); i++) {
updateIndexOfColumnExpression(children.get(i), dimOridnalMax);
}
}
}
}
use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.
the class LuceneDataMapFactoryBase method validateAndGetIndexedColumns.
/**
* validate Lucene DataMap
* 1. require TEXT_COLUMNS property
* 2. TEXT_COLUMNS can't contains illegal argument(empty, blank)
* 3. TEXT_COLUMNS can't contains duplicate same columns
* 4. TEXT_COLUMNS should be exists in table columns
* 5. TEXT_COLUMNS support only String DataType columns
*/
private List<String> validateAndGetIndexedColumns(DataMapSchema dataMapSchema, CarbonTable carbonTable) throws MalformedDataMapCommandException {
String textColumnsStr = dataMapSchema.getProperties().get(TEXT_COLUMNS);
if (textColumnsStr == null || StringUtils.isBlank(textColumnsStr)) {
throw new MalformedDataMapCommandException("Lucene DataMap require proper TEXT_COLUMNS property.");
}
String[] textColumns = textColumnsStr.split(",", -1);
for (int i = 0; i < textColumns.length; i++) {
textColumns[i] = textColumns[i].trim().toLowerCase();
}
for (int i = 0; i < textColumns.length; i++) {
if (textColumns[i].isEmpty()) {
throw new MalformedDataMapCommandException("TEXT_COLUMNS contains illegal argument.");
}
for (int j = i + 1; j < textColumns.length; j++) {
if (textColumns[i].equals(textColumns[j])) {
throw new MalformedDataMapCommandException("TEXT_COLUMNS has duplicate columns :" + textColumns[i]);
}
}
}
List<String> textColumnList = new ArrayList<String>(textColumns.length);
for (int i = 0; i < textColumns.length; i++) {
CarbonColumn column = carbonTable.getColumnByName(carbonTable.getTableName(), textColumns[i]);
if (null == column) {
throw new MalformedDataMapCommandException("TEXT_COLUMNS: " + textColumns[i] + " does not exist in table. Please check create DataMap statement.");
} else if (column.getDataType() != DataTypes.STRING) {
throw new MalformedDataMapCommandException("TEXT_COLUMNS only supports String column. " + "Unsupported column: " + textColumns[i] + ", DataType: " + column.getDataType());
}
textColumnList.add(column.getColName());
}
return textColumnList;
}
use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.
the class FilterUtilTest method testgetFilterListForRS.
@Test
public void testgetFilterListForRS() throws Exception {
Expression expression = new ColumnExpression("IMEI", DataTypes.STRING);
ColumnExpression columnExpression = new ColumnExpression("IMEI", DataTypes.STRING);
String defaultValues = CarbonCommonConstants.MEMBER_DEFAULT_VAL;
int defaultSurrogate = 1;
int ordinal = 1;
final CarbonColumn carbonColumn = new CarbonColumn(columnSchema, ordinal, -1);
new MockUp<ColumnExpression>() {
@Mock
public CarbonColumn getCarbonColumn() {
return carbonColumn;
}
};
new MockUp<RowImpl>() {
@Mock
public Object getVal(int index) {
return "test";
}
};
assertTrue(FilterUtil.getFilterListForRS(expression, defaultValues, defaultSurrogate) instanceof ColumnFilterInfo);
}
use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.
the class DataLoadProcessBuilder method createConfiguration.
public static CarbonDataLoadConfiguration createConfiguration(CarbonLoadModel loadModel) {
CarbonDataLoadConfiguration configuration = new CarbonDataLoadConfiguration();
CarbonTable carbonTable = loadModel.getCarbonDataLoadSchema().getCarbonTable();
AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
configuration.setTableIdentifier(identifier);
configuration.setSchemaUpdatedTimeStamp(carbonTable.getTableLastUpdatedTime());
configuration.setHeader(loadModel.getCsvHeaderColumns());
configuration.setSegmentId(loadModel.getSegmentId());
configuration.setTaskNo(loadModel.getTaskNo());
configuration.setDataLoadProperty(DataLoadProcessorConstants.COMPLEX_DELIMITERS, new String[] { loadModel.getComplexDelimiterLevel1(), loadModel.getComplexDelimiterLevel2() });
configuration.setDataLoadProperty(DataLoadProcessorConstants.SERIALIZATION_NULL_FORMAT, loadModel.getSerializationNullFormat().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_TIME_STAMP, loadModel.getFactTimeStamp());
configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ENABLE, loadModel.getBadRecordsLoggerEnable().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ACTION, loadModel.getBadRecordsAction().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.IS_EMPTY_DATA_BAD_RECORD, loadModel.getIsEmptyDataBadRecord().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.SKIP_EMPTY_LINE, loadModel.getSkipEmptyLine());
configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_FILE_PATH, loadModel.getFactFilePath());
configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_SORT_SCOPE, loadModel.getSortScope());
configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_BATCH_SORT_SIZE_INMB, loadModel.getBatchSortSizeInMb());
configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_GLOBAL_SORT_PARTITIONS, loadModel.getGlobalSortPartitions());
configuration.setDataLoadProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORD_PATH, loadModel.getBadRecordsLocation());
CarbonMetadata.getInstance().addCarbonTable(carbonTable);
List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(carbonTable.getTableName());
List<CarbonMeasure> measures = carbonTable.getMeasureByTableName(carbonTable.getTableName());
List<DataField> dataFields = new ArrayList<>();
List<DataField> complexDataFields = new ArrayList<>();
// And then add complex data types and measures.
for (CarbonColumn column : dimensions) {
DataField dataField = new DataField(column);
if (column.getDataType() == DataTypes.DATE) {
dataField.setDateFormat(loadModel.getDateFormat());
} else if (column.getDataType() == DataTypes.TIMESTAMP) {
dataField.setTimestampFormat(loadModel.getTimestampformat());
}
if (column.isComplex()) {
complexDataFields.add(dataField);
} else {
dataFields.add(dataField);
}
}
dataFields.addAll(complexDataFields);
for (CarbonColumn column : measures) {
// This dummy measure is added when no measure was present. We no need to load it.
if (!(column.getColName().equals("default_dummy_measure"))) {
dataFields.add(new DataField(column));
}
}
configuration.setDataFields(dataFields.toArray(new DataField[dataFields.size()]));
configuration.setBucketingInfo(carbonTable.getBucketingInfo(carbonTable.getTableName()));
// configuration for one pass load: dictionary server info
configuration.setUseOnePass(loadModel.getUseOnePass());
configuration.setDictionaryServerHost(loadModel.getDictionaryServerHost());
configuration.setDictionaryServerPort(loadModel.getDictionaryServerPort());
configuration.setDictionaryServerSecretKey(loadModel.getDictionaryServerSecretKey());
configuration.setDictionaryEncryptServerSecure(loadModel.getDictionaryEncryptServerSecure());
configuration.setDictionaryServiceProvider(loadModel.getDictionaryServiceProvider());
configuration.setPreFetch(loadModel.isPreFetch());
configuration.setNumberOfSortColumns(carbonTable.getNumberOfSortColumns());
configuration.setNumberOfNoDictSortColumns(carbonTable.getNumberOfNoDictSortColumns());
configuration.setDataWritePath(loadModel.getDataWritePath());
setSortColumnInfo(carbonTable, loadModel, configuration);
// threads per partition
if (carbonTable.isHivePartitionTable()) {
configuration.setWritingCoresCount((short) 1);
}
TableSpec tableSpec = new TableSpec(carbonTable);
configuration.setTableSpec(tableSpec);
return configuration;
}
Aggregations