use of org.apache.iceberg.expressions.Expression in project drill by apache.
the class IcebergPluginImplementor method canImplement.
@Override
public boolean canImplement(Filter filter) {
RexNode condition = filter.getCondition();
LogicalExpression logicalExpression = DrillOptiq.toDrill(new DrillParseContext(PrelUtil.getPlannerSettings(filter.getCluster().getPlanner())), filter.getInput(), condition);
Expression expression = logicalExpression.accept(DrillExprToIcebergTranslator.INSTANCE, null);
if (expression != null) {
try {
GroupScan scan = findGroupScan(filter);
if (scan instanceof IcebergGroupScan) {
IcebergGroupScan groupScan = (IcebergGroupScan) scan;
// ensures that expression compatible with table schema
expression = Binder.bind(groupScan.getTableScan().schema().asStruct(), expression, true);
} else {
return false;
}
} catch (ValidationException e) {
return false;
}
}
return expression != null;
}
use of org.apache.iceberg.expressions.Expression in project drill by apache.
the class IcebergGroupScan method initTableScan.
public static TableScan initTableScan(IcebergFormatPlugin formatPlugin, String path, LogicalExpression condition) {
TableScan tableScan = new HadoopTables(formatPlugin.getFsConf()).load(path).newScan();
Map<String, String> properties = formatPlugin.getConfig().getProperties();
if (properties != null) {
for (Map.Entry<String, String> entry : properties.entrySet()) {
tableScan = tableScan.option(entry.getKey(), entry.getValue());
}
}
if (condition != null) {
Expression expression = condition.accept(DrillExprToIcebergTranslator.INSTANCE, null);
tableScan = tableScan.filter(expression);
}
Snapshot snapshot = formatPlugin.getConfig().getSnapshot();
if (snapshot != null) {
tableScan = snapshot.apply(tableScan);
}
Boolean caseSensitive = formatPlugin.getConfig().getCaseSensitive();
if (caseSensitive != null) {
tableScan = tableScan.caseSensitive(caseSensitive);
}
Boolean includeColumnStats = formatPlugin.getConfig().getIncludeColumnStats();
if (includeColumnStats != null && includeColumnStats) {
tableScan = tableScan.includeColumnStats();
}
Boolean ignoreResiduals = formatPlugin.getConfig().getIgnoreResiduals();
if (ignoreResiduals != null && ignoreResiduals) {
tableScan = tableScan.ignoreResiduals();
}
return tableScan;
}
use of org.apache.iceberg.expressions.Expression in project hive by apache.
the class IcebergInputFormat method createTableScan.
private static TableScan createTableScan(Table table, Configuration conf) {
TableScan scan = table.newScan().caseSensitive(conf.getBoolean(InputFormatConfig.CASE_SENSITIVE, InputFormatConfig.CASE_SENSITIVE_DEFAULT));
long snapshotId = conf.getLong(InputFormatConfig.SNAPSHOT_ID, -1);
if (snapshotId != -1) {
scan = scan.useSnapshot(snapshotId);
}
long asOfTime = conf.getLong(InputFormatConfig.AS_OF_TIMESTAMP, -1);
if (asOfTime != -1) {
scan = scan.asOfTime(asOfTime);
}
long splitSize = conf.getLong(InputFormatConfig.SPLIT_SIZE, 0);
if (splitSize > 0) {
scan = scan.option(TableProperties.SPLIT_SIZE, String.valueOf(splitSize));
}
// In case of LLAP-based execution we ask Iceberg not to combine multiple fileScanTasks into one split.
// This is so that cache affinity can work, and each file(split) is executed/cached on always the same LLAP daemon.
MapWork mapWork = LlapHiveUtils.findMapWork((JobConf) conf);
if (mapWork != null && mapWork.getCacheAffinity()) {
// Iceberg splits logically consist of buckets, where the bucket size equals to openFileCost setting if the files
// assigned to such bucket are smaller. This is how Iceberg would combine multiple files into one split, so here
// we need to enforce the bucket size to be equal to split size to avoid file combination.
Long openFileCost = splitSize > 0 ? splitSize : TableProperties.SPLIT_SIZE_DEFAULT;
scan = scan.option(TableProperties.SPLIT_OPEN_FILE_COST, String.valueOf(openFileCost));
}
String schemaStr = conf.get(InputFormatConfig.READ_SCHEMA);
if (schemaStr != null) {
scan.project(SchemaParser.fromJson(schemaStr));
}
String[] selectedColumns = conf.getStrings(InputFormatConfig.SELECTED_COLUMNS);
if (selectedColumns != null) {
scan.select(selectedColumns);
}
// TODO add a filter parser to get rid of Serialization
Expression filter = SerializationUtil.deserializeFromBase64(conf.get(InputFormatConfig.FILTER_EXPRESSION));
if (filter != null) {
scan = scan.filter(filter);
}
return scan;
}
use of org.apache.iceberg.expressions.Expression in project hive by apache.
the class VectorizedReadUtils method handleIcebergProjection.
/**
* Adjusts the jobConf so that column reorders and renames that might have happened since this ORC file was written
* are properly mapped to the schema of the original file.
* @param task - Iceberg task - required for
* @param job - JobConf instance to adjust
* @param fileSchema - ORC file schema of the input file
* @throws IOException - errors relating to accessing the ORC file
*/
public static void handleIcebergProjection(FileScanTask task, JobConf job, TypeDescription fileSchema) throws IOException {
// We need to map with the current (i.e. current Hive table columns) full schema (without projections),
// as OrcInputFormat will take care of the projections by the use of an include boolean array
PartitionSpec spec = task.spec();
Schema currentSchema = spec.schema();
TypeDescription readOrcSchema;
if (ORCSchemaUtil.hasIds(fileSchema)) {
readOrcSchema = ORCSchemaUtil.buildOrcProjection(currentSchema, fileSchema);
} else {
Schema readSchemaForOriginalFile = currentSchema;
// In case of migrated, originally partitioned tables, partition values are not present in the file
if (spec.isPartitioned()) {
readSchemaForOriginalFile = currentSchema.select(currentSchema.columns().stream().filter(c -> !spec.identitySourceIds().contains(c.fieldId())).map(c -> c.name()).collect(Collectors.toList()));
}
TypeDescription typeWithIds = ORCSchemaUtil.applyNameMapping(fileSchema, MappingUtil.create(currentSchema));
readOrcSchema = ORCSchemaUtil.buildOrcProjection(readSchemaForOriginalFile, typeWithIds);
}
job.set(ColumnProjectionUtils.ORC_SCHEMA_STRING, readOrcSchema.toString());
// Predicate pushdowns needs to be adjusted too in case of column renames, we let Iceberg generate this into job
if (task.residual() != null) {
Expression boundFilter = Binder.bind(currentSchema.asStruct(), task.residual(), false);
// Note the use of the unshaded version of this class here (required for SARG deseralization later)
org.apache.hadoop.hive.ql.io.sarg.SearchArgument sarg = ExpressionToOrcSearchArgument.convert(boundFilter, readOrcSchema);
if (sarg != null) {
job.unset(TableScanDesc.FILTER_EXPR_CONF_STR);
job.unset(ConvertAstToSearchArg.SARG_PUSHDOWN);
job.set(ConvertAstToSearchArg.SARG_PUSHDOWN, ConvertAstToSearchArg.sargToKryo(sarg));
}
}
}
use of org.apache.iceberg.expressions.Expression in project metacat by Netflix.
the class IcebergFilterGenerator method visit.
@Override
public Object visit(final ASTBETWEEN node, final Object data) {
final Object value = node.jjtGetChild(0).jjtAccept(this, data);
final Object startValue = node.jjtGetChild(1).jjtAccept(this, data);
final Object endValue = node.jjtGetChild(2).jjtAccept(this, data);
final Expression compare1 = createIcebergExpression(value, startValue, node.not ? Compare.LT : Compare.GTE);
final Expression compare2 = createIcebergExpression(value, endValue, node.not ? Compare.GT : Compare.LTE);
return (node.not) ? Expressions.or(compare1, compare2) : Expressions.and(compare1, compare2);
}
Aggregations