use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.
the class HiveInputFormat method pushFilters.
public static void pushFilters(JobConf jobConf, TableScanOperator tableScan) {
// ensure filters are not set from previous pushFilters
jobConf.unset(TableScanDesc.FILTER_TEXT_CONF_STR);
jobConf.unset(TableScanDesc.FILTER_EXPR_CONF_STR);
Utilities.unsetSchemaEvolution(jobConf);
TableScanDesc scanDesc = tableScan.getConf();
if (scanDesc == null) {
return;
}
Utilities.addTableSchemaToConf(jobConf, tableScan);
// construct column name list and types for reference by filter push down
Utilities.setColumnNameList(jobConf, tableScan);
Utilities.setColumnTypeList(jobConf, tableScan);
// push down filters
ExprNodeGenericFuncDesc filterExpr = (ExprNodeGenericFuncDesc) scanDesc.getFilterExpr();
if (filterExpr == null) {
return;
}
String serializedFilterObj = scanDesc.getSerializedFilterObject();
String serializedFilterExpr = scanDesc.getSerializedFilterExpr();
boolean hasObj = serializedFilterObj != null, hasExpr = serializedFilterExpr != null;
if (!hasObj) {
Serializable filterObject = scanDesc.getFilterObject();
if (filterObject != null) {
serializedFilterObj = SerializationUtilities.serializeObject(filterObject);
}
}
if (serializedFilterObj != null) {
jobConf.set(TableScanDesc.FILTER_OBJECT_CONF_STR, serializedFilterObj);
}
if (!hasExpr) {
serializedFilterExpr = SerializationUtilities.serializeExpression(filterExpr);
}
String filterText = filterExpr.getExprString();
if (LOG.isDebugEnabled()) {
LOG.debug("Pushdown initiated with filterText = " + filterText + ", filterExpr = " + filterExpr + ", serializedFilterExpr = " + serializedFilterExpr + " (" + (hasExpr ? "desc" : "new") + ")" + (serializedFilterObj == null ? "" : (", serializedFilterObj = " + serializedFilterObj + " (" + (hasObj ? "desc" : "new") + ")")));
}
jobConf.set(TableScanDesc.FILTER_TEXT_CONF_STR, filterText);
jobConf.set(TableScanDesc.FILTER_EXPR_CONF_STR, serializedFilterExpr);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.
the class TestOrcSplitElimination method testFooterExternalCacheImpl.
private void testFooterExternalCacheImpl(boolean isPpd) throws IOException {
ObjectInspector inspector = createIO();
writeFile(inspector, testFilePath);
writeFile(inspector, testFilePath2);
GenericUDF udf = new GenericUDFOPEqualOrLessThan();
List<ExprNodeDesc> childExpr = Lists.newArrayList();
createTestSarg(inspector, udf, childExpr);
setupExternalCacheConfig(isPpd, testFilePath + "," + testFilePath2);
// Get the base values w/o cache.
conf.setBoolean(ConfVars.HIVE_ORC_MS_FOOTER_CACHE_ENABLED.varname, false);
OrcInputFormatForTest.clearLocalCache();
OrcInputFormat in0 = new OrcInputFormat();
InputSplit[] originals = in0.getSplits(conf, -1);
assertEquals(10, originals.length);
HashSet<FsWithHash> originalHs = new HashSet<>();
for (InputSplit original : originals) {
originalHs.add(new FsWithHash((FileSplit) original));
}
// Populate the cache.
conf.setBoolean(ConfVars.HIVE_ORC_MS_FOOTER_CACHE_ENABLED.varname, true);
OrcInputFormatForTest in = new OrcInputFormatForTest();
OrcInputFormatForTest.clearLocalCache();
OrcInputFormatForTest.caches.resetCounts();
OrcInputFormatForTest.caches.cache.clear();
InputSplit[] splits = in.getSplits(conf, -1);
// Puts, gets, hits, unused, unused.
@SuppressWarnings("static-access") AtomicInteger[] counts = { in.caches.putCount, isPpd ? in.caches.getByExprCount : in.caches.getCount, isPpd ? in.caches.getHitByExprCount : in.caches.getHitCount, isPpd ? in.caches.getCount : in.caches.getByExprCount, isPpd ? in.caches.getHitCount : in.caches.getHitByExprCount };
verifySplits(originalHs, splits);
verifyCallCounts(counts, 2, 2, 0);
assertEquals(2, OrcInputFormatForTest.caches.cache.size());
// Verify we can get from cache.
OrcInputFormatForTest.clearLocalCache();
OrcInputFormatForTest.caches.resetCounts();
splits = in.getSplits(conf, -1);
verifySplits(originalHs, splits);
verifyCallCounts(counts, 0, 2, 2);
// Verify ORC SARG still works.
OrcInputFormatForTest.clearLocalCache();
OrcInputFormatForTest.caches.resetCounts();
childExpr.set(1, new ExprNodeConstantDesc(5));
conf.set("hive.io.filter.expr.serialized", SerializationUtilities.serializeExpression(new ExprNodeGenericFuncDesc(inspector, udf, childExpr)));
splits = in.getSplits(conf, -1);
InputSplit[] filtered = { originals[0], originals[4], originals[5], originals[9] };
originalHs = new HashSet<>();
for (InputSplit original : filtered) {
originalHs.add(new FsWithHash((FileSplit) original));
}
verifySplits(originalHs, splits);
verifyCallCounts(counts, 0, 2, 2);
// Verify corrupted cache value gets replaced.
OrcInputFormatForTest.clearLocalCache();
OrcInputFormatForTest.caches.resetCounts();
Map.Entry<Long, MockExternalCaches.MockItem> e = OrcInputFormatForTest.caches.cache.entrySet().iterator().next();
Long key = e.getKey();
byte[] someData = new byte[8];
ByteBuffer toCorrupt = e.getValue().data;
System.arraycopy(toCorrupt.array(), toCorrupt.arrayOffset(), someData, 0, someData.length);
toCorrupt.putLong(0, 0L);
splits = in.getSplits(conf, -1);
verifySplits(originalHs, splits);
if (!isPpd) {
// Recovery is not implemented yet for PPD path.
ByteBuffer restored = OrcInputFormatForTest.caches.cache.get(key).data;
byte[] newData = new byte[someData.length];
System.arraycopy(restored.array(), restored.arrayOffset(), newData, 0, newData.length);
assertArrayEquals(someData, newData);
}
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.
the class TestParquetRowGroupFilter method testRowGroupFilterTakeEffect.
@Test
public void testRowGroupFilterTakeEffect() throws Exception {
// define schema
columnNames = "intCol";
columnTypes = "int";
StructObjectInspector inspector = getObjectInspector(columnNames, columnTypes);
MessageType fileSchema = MessageTypeParser.parseMessageType("message hive_schema {\n" + " optional int32 intCol;\n" + "}\n");
conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "intCol");
conf.set("columns", "intCol");
conf.set("columns.types", "int");
// create Parquet file with specific data
Path testPath = writeDirect("RowGroupFilterTakeEffect", fileSchema, new DirectWriter() {
@Override
public void write(RecordConsumer consumer) {
for (int i = 0; i < 100; i++) {
consumer.startMessage();
consumer.startField("int", 0);
consumer.addInteger(i);
consumer.endField("int", 0);
consumer.endMessage();
}
}
});
// > 50
GenericUDF udf = new GenericUDFOPGreaterThan();
List<ExprNodeDesc> children = Lists.newArrayList();
ExprNodeColumnDesc columnDesc = new ExprNodeColumnDesc(Integer.class, "intCol", "T", false);
ExprNodeConstantDesc constantDesc = new ExprNodeConstantDesc(50);
children.add(columnDesc);
children.add(constantDesc);
ExprNodeGenericFuncDesc genericFuncDesc = new ExprNodeGenericFuncDesc(inspector, udf, children);
String searchArgumentStr = SerializationUtilities.serializeExpression(genericFuncDesc);
conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, searchArgumentStr);
ParquetRecordReaderWrapper recordReader = (ParquetRecordReaderWrapper) new MapredParquetInputFormat().getRecordReader(new FileSplit(testPath, 0, fileLength(testPath), (String[]) null), conf, null);
Assert.assertEquals("row group is not filtered correctly", 1, recordReader.getFiltedBlocks().size());
// > 100
constantDesc = new ExprNodeConstantDesc(100);
children.set(1, constantDesc);
genericFuncDesc = new ExprNodeGenericFuncDesc(inspector, udf, children);
searchArgumentStr = SerializationUtilities.serializeExpression(genericFuncDesc);
conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, searchArgumentStr);
recordReader = (ParquetRecordReaderWrapper) new MapredParquetInputFormat().getRecordReader(new FileSplit(testPath, 0, fileLength(testPath), (String[]) null), conf, null);
Assert.assertEquals("row group is not filtered correctly", 0, recordReader.getFiltedBlocks().size());
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.
the class TestColumnPrunerProcCtx method testGetSelectNestedColPathsFromChildren6.
// Test select abs(root.col1.b) from table test(root struct<col1:struct<a:boolean,b:double>,
// col2:double>);
@Test
public void testGetSelectNestedColPathsFromChildren6() {
ColumnPrunerProcCtx ctx = new ColumnPrunerProcCtx(null);
ExprNodeDesc colDesc = new ExprNodeColumnDesc(col3Type, "root", "test", false);
ExprNodeDesc col1 = new ExprNodeFieldDesc(col1Type, colDesc, "col1", false);
ExprNodeDesc fieldDesc = new ExprNodeFieldDesc(TypeInfoFactory.doubleTypeInfo, col1, "b", false);
final List<FieldNode> paths = Arrays.asList(new FieldNode("_col0"));
GenericUDF udf = mock(GenericUDFBridge.class);
List<ExprNodeDesc> list = new ArrayList<>();
list.add(fieldDesc);
ExprNodeDesc funcDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.binaryTypeInfo, udf, "abs", list);
SelectOperator selectOperator = buildSelectOperator(Arrays.asList(funcDesc), paths);
List<FieldNode> groups = ctx.getSelectColsFromChildren(selectOperator, paths);
compareTestResults(groups, "root.col1.b");
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.
the class TestColumnPrunerProcCtx method testGetSelectNestedColPathsFromChildren5.
// Test select named_struct from named_struct:struct<a:boolean,b:double>
@Test
public void testGetSelectNestedColPathsFromChildren5() {
ColumnPrunerProcCtx ctx = new ColumnPrunerProcCtx(null);
ExprNodeConstantDesc constADesc = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, "a");
ExprNodeConstantDesc constBDesc = new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, "b");
List<ExprNodeDesc> list = new ArrayList<>();
list.add(constADesc);
list.add(constBDesc);
GenericUDF udf = mock(GenericUDF.class);
ExprNodeDesc funcDesc = new ExprNodeGenericFuncDesc(col1Type, udf, "named_struct", list);
ExprNodeDesc fieldDesc = new ExprNodeFieldDesc(TypeInfoFactory.doubleTypeInfo, funcDesc, "foo", false);
final List<FieldNode> paths = Arrays.asList(new FieldNode("_col0"));
SelectOperator selectOperator = buildSelectOperator(Arrays.asList(fieldDesc), paths);
List<FieldNode> groups = ctx.getSelectColsFromChildren(selectOperator, paths);
// Return empty result since only constant Desc exists
assertEquals(0, groups.size());
}
Aggregations