use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.
the class TestVectorIndex method doIndexOnRandomDataType.
private boolean doIndexOnRandomDataType(Random random, boolean isList, String keyTypeName, String elementRootTypeName, boolean allowNulls, boolean isScalarIndex) throws Exception {
String elementTypeName = VectorRandomRowSource.getDecoratedTypeName(random, elementRootTypeName, SupportedTypes.ALL, /* allowedTypeNameSet */
null, /* depth */
0, /* maxDepth */
3);
TypeInfo elementTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(elementTypeName);
ObjectInspector elementObjectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(elementTypeInfo);
// ----------------------------------------------------------------------------------------------
final TypeInfo keyTypeInfo;
if (isList) {
keyTypeInfo = TypeInfoFactory.intTypeInfo;
} else {
keyTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(keyTypeName);
}
final ObjectInspector keyObjectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(keyTypeInfo);
Object exampleObject = (isList ? ((WritableIntObjectInspector) keyObjectInspector).create(0) : VectorRandomRowSource.randomWritable(random, keyTypeInfo, keyObjectInspector, DataTypePhysicalVariation.NONE, /* allowNull */
false));
WritableComparator writableComparator = WritableComparator.get((Class<? extends WritableComparable>) exampleObject.getClass());
final int allKeyCount = 10 + random.nextInt(10);
final int keyCount = 5 + random.nextInt(allKeyCount / 2);
List<Object> allKeyList = new ArrayList<Object>(allKeyCount);
Set<Object> allKeyTreeSet = new TreeSet<Object>(writableComparator);
int fillAllKeyCount = 0;
while (fillAllKeyCount < allKeyCount) {
Object object;
if (isList) {
WritableIntObjectInspector writableOI = (WritableIntObjectInspector) keyObjectInspector;
int index = random.nextInt(keyCount);
object = writableOI.create(index);
while (allKeyTreeSet.contains(object)) {
index = (random.nextBoolean() ? random.nextInt() : (random.nextBoolean() ? -1 : keyCount));
object = writableOI.create(index);
}
} else {
do {
object = VectorRandomRowSource.randomWritable(random, keyTypeInfo, keyObjectInspector, DataTypePhysicalVariation.NONE, /* allowNull */
false);
} while (allKeyTreeSet.contains(object));
}
allKeyList.add(object);
allKeyTreeSet.add(object);
fillAllKeyCount++;
}
List<Object> keyList = new ArrayList<Object>();
Set<Object> keyTreeSet = new TreeSet<Object>(writableComparator);
int fillKeyCount = 0;
while (fillKeyCount < keyCount) {
Object newKey = allKeyList.get(random.nextInt(allKeyCount));
if (keyTreeSet.contains(newKey)) {
continue;
}
keyList.add(newKey);
keyTreeSet.add(newKey);
fillKeyCount++;
}
// ----------------------------------------------------------------------------------------------
final TypeInfo typeInfo;
if (isList) {
ListTypeInfo listTypeInfo = new ListTypeInfo();
listTypeInfo.setListElementTypeInfo(elementTypeInfo);
typeInfo = listTypeInfo;
} else {
MapTypeInfo mapTypeInfo = new MapTypeInfo();
mapTypeInfo.setMapKeyTypeInfo(keyTypeInfo);
mapTypeInfo.setMapValueTypeInfo(elementTypeInfo);
typeInfo = mapTypeInfo;
}
final String typeName = typeInfo.getTypeName();
final ObjectInspector objectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo);
// ----------------------------------------------------------------------------------------------
GenerationSpec generationSpec = GenerationSpec.createSameType(typeInfo);
List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
List<String> columns = new ArrayList<String>();
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
int columnNum = 1;
ExprNodeDesc keyColExpr;
if (!isScalarIndex) {
generationSpecList.add(GenerationSpec.createValueList(keyTypeInfo, keyList));
explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
String columnName = "col" + columnNum++;
columns.add(columnName);
keyColExpr = new ExprNodeColumnDesc(keyTypeInfo, columnName, "table", false);
} else {
Object scalarWritable = keyList.get(random.nextInt(keyCount));
final Object scalarObject = VectorRandomRowSource.getNonWritableObject(scalarWritable, keyTypeInfo, keyObjectInspector);
keyColExpr = new ExprNodeConstantDesc(keyTypeInfo, scalarObject);
}
/*
System.out.println("*DEBUG* typeName " + typeName);
System.out.println("*DEBUG* keyColExpr " + keyColExpr.toString());
System.out.println("*DEBUG* keyList " + keyList.toString());
System.out.println("*DEBUG* allKeyList " + allKeyList.toString());
*/
generationSpecList.add(GenerationSpec.createValueList(typeInfo, keyList));
explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
String columnName = "col" + columnNum++;
columns.add(columnName);
ExprNodeDesc listOrMapColExpr;
listOrMapColExpr = new ExprNodeColumnDesc(typeInfo, columnName, "table", false);
children.add(listOrMapColExpr);
children.add(keyColExpr);
VectorRandomRowSource rowSource = new VectorRandomRowSource();
rowSource.initGenerationSpecSchema(random, generationSpecList, /* maxComplexDepth */
0, /* allowNull */
allowNulls, /* isUnicodeOk */
true, explicitDataTypePhysicalVariationList);
String[] columnNames = columns.toArray(new String[0]);
Object[][] randomRows = rowSource.randomRows(100000);
VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches(random, rowSource, randomRows, null);
final GenericUDF udf = new GenericUDFIndex();
ObjectInspector[] argumentOIs = new ObjectInspector[2];
argumentOIs[0] = objectInspector;
argumentOIs[1] = keyObjectInspector;
final ObjectInspector outputObjectInspector = udf.initialize(argumentOIs);
TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector);
ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(elementTypeInfo, udf, children);
System.out.println("here");
final int rowCount = randomRows.length;
Object[][] resultObjectsArray = new Object[IndexTestMode.count][];
for (int i = 0; i < IndexTestMode.count; i++) {
Object[] resultObjects = new Object[rowCount];
resultObjectsArray[i] = resultObjects;
IndexTestMode indexTestMode = IndexTestMode.values()[i];
switch(indexTestMode) {
case ROW_MODE:
if (!doRowCastTest(typeInfo, columns, children, udf, exprDesc, randomRows, rowSource.rowStructObjectInspector(), elementObjectInspector, outputTypeInfo, resultObjects)) {
return false;
}
break;
case ADAPTOR:
case VECTOR_EXPRESSION:
if (!doVectorCastTest(typeInfo, columns, columnNames, rowSource.typeInfos(), rowSource.dataTypePhysicalVariations(), children, udf, exprDesc, indexTestMode, batchSource, exprDesc.getWritableObjectInspector(), outputTypeInfo, resultObjects)) {
return false;
}
break;
default:
throw new RuntimeException("Unexpected IF statement test mode " + indexTestMode);
}
}
for (int i = 0; i < rowCount; i++) {
// Row-mode is the expected value.
Object expectedResult = resultObjectsArray[0][i];
for (int v = 1; v < IndexTestMode.count; v++) {
Object vectorResult = resultObjectsArray[v][i];
IndexTestMode indexTestMode = IndexTestMode.values()[v];
if (expectedResult == null || vectorResult == null) {
if (expectedResult != null || vectorResult != null) {
Assert.fail("Row " + i + " sourceTypeName " + typeName + " " + indexTestMode + " result is NULL " + (vectorResult == null ? "YES" : "NO result " + vectorResult.toString()) + " does not match row-mode expected result is NULL " + (expectedResult == null ? "YES" : "NO result " + expectedResult.toString()) + " row values " + Arrays.toString(randomRows[i]) + " exprDesc " + exprDesc.toString());
}
} else {
if (!expectedResult.equals(vectorResult)) {
Assert.fail("Row " + i + " sourceTypeName " + typeName + " " + indexTestMode + " result " + vectorResult.toString() + " (" + vectorResult.getClass().getSimpleName() + ")" + " does not match row-mode expected result " + expectedResult.toString() + " (" + expectedResult.getClass().getSimpleName() + ")" + " row values " + Arrays.toString(randomRows[i]) + " exprDesc " + exprDesc.toString());
}
}
}
}
return true;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.
the class TestVectorIfStatement method doVectorIfTest.
private void doVectorIfTest(TypeInfo typeInfo, IfVariation ifVariation, List<String> columns, String[] columnNames, TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, List<ExprNodeDesc> children, IfStmtTestMode ifStmtTestMode, ColumnScalarMode columnScalarMode, VectorRandomBatchSource batchSource, Object[] resultObjects) throws Exception {
final boolean isFilter = ifVariation.isFilter;
GenericUDF udf;
switch(ifStmtTestMode) {
case VECTOR_EXPRESSION:
udf = new GenericUDFIf();
break;
case ADAPTOR_WHEN:
udf = new GenericUDFWhen();
break;
default:
throw new RuntimeException("Unexpected IF statement test mode " + ifStmtTestMode);
}
ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(typeInfo, udf, children);
String ifExprMode = (ifStmtTestMode != IfStmtTestMode.VECTOR_EXPRESSION ? "adaptor" : "good");
HiveConf hiveConf = new HiveConf();
hiveConf.setVar(HiveConf.ConfVars.HIVE_VECTORIZED_IF_EXPR_MODE, ifExprMode);
VectorizationContext vectorizationContext = new VectorizationContext("name", columns, Arrays.asList(typeInfos), Arrays.asList(dataTypePhysicalVariations), hiveConf);
VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc, (isFilter ? VectorExpressionDescriptor.Mode.FILTER : VectorExpressionDescriptor.Mode.PROJECTION));
final TypeInfo outputTypeInfo;
final ObjectInspector objectInspector;
if (!isFilter) {
outputTypeInfo = vectorExpression.getOutputTypeInfo();
objectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(outputTypeInfo);
} else {
outputTypeInfo = null;
objectInspector = null;
}
if (ifStmtTestMode == IfStmtTestMode.VECTOR_EXPRESSION && vectorExpression instanceof VectorUDFAdaptor) {
System.out.println("*NO NATIVE VECTOR EXPRESSION* typeInfo " + typeInfo.toString() + " ifStmtTestMode " + ifStmtTestMode + " ifVariation " + ifVariation + " columnScalarMode " + columnScalarMode + " vectorExpression " + vectorExpression.toString());
}
String[] outputScratchTypeNames = vectorizationContext.getScratchColumnTypeNames();
DataTypePhysicalVariation[] outputDataTypePhysicalVariations = vectorizationContext.getScratchDataTypePhysicalVariations();
VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(columnNames, typeInfos, dataTypePhysicalVariations, /* dataColumnNums */
null, /* partitionColumnCount */
0, /* virtualColumnCount */
0, /* neededVirtualColumns */
null, outputScratchTypeNames, outputDataTypePhysicalVariations);
VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
// System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName());
/*
System.out.println(
"*DEBUG* typeInfo " + typeInfo.toString() +
" ifStmtTestMode " + ifStmtTestMode +
" ifVariation " + ifVariation +
" columnScalarMode " + columnScalarMode +
" vectorExpression " + vectorExpression.toString());
*/
VectorExtractRow resultVectorExtractRow = null;
Object[] scrqtchRow = null;
if (!isFilter) {
resultVectorExtractRow = new VectorExtractRow();
final int outputColumnNum = vectorExpression.getOutputColumnNum();
resultVectorExtractRow.init(new TypeInfo[] { outputTypeInfo }, new int[] { outputColumnNum });
scrqtchRow = new Object[1];
}
boolean copySelectedInUse = false;
int[] copySelected = new int[VectorizedRowBatch.DEFAULT_SIZE];
batchSource.resetBatchIteration();
int rowIndex = 0;
while (true) {
if (!batchSource.fillNextBatch(batch)) {
break;
}
final int originalBatchSize = batch.size;
if (isFilter) {
copySelectedInUse = batch.selectedInUse;
if (batch.selectedInUse) {
System.arraycopy(batch.selected, 0, copySelected, 0, originalBatchSize);
}
}
// In filter mode, the batch size can be made smaller.
vectorExpression.evaluate(batch);
if (!isFilter) {
extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, objectInspector, resultObjects);
} else {
final int currentBatchSize = batch.size;
if (copySelectedInUse && batch.selectedInUse) {
int selectIndex = 0;
for (int i = 0; i < originalBatchSize; i++) {
final int originalBatchIndex = copySelected[i];
final boolean booleanResult;
if (selectIndex < currentBatchSize && batch.selected[selectIndex] == originalBatchIndex) {
booleanResult = true;
selectIndex++;
} else {
booleanResult = false;
}
resultObjects[rowIndex + i] = new BooleanWritable(booleanResult);
}
} else if (batch.selectedInUse) {
int selectIndex = 0;
for (int i = 0; i < originalBatchSize; i++) {
final boolean booleanResult;
if (selectIndex < currentBatchSize && batch.selected[selectIndex] == i) {
booleanResult = true;
selectIndex++;
} else {
booleanResult = false;
}
resultObjects[rowIndex + i] = new BooleanWritable(booleanResult);
}
} else if (currentBatchSize == 0) {
// Whole batch got zapped.
for (int i = 0; i < originalBatchSize; i++) {
resultObjects[rowIndex + i] = new BooleanWritable(false);
}
} else {
// Every row kept.
for (int i = 0; i < originalBatchSize; i++) {
resultObjects[rowIndex + i] = new BooleanWritable(true);
}
}
}
rowIndex += originalBatchSize;
}
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.
the class TestVectorNegative method doTests.
private void doTests(Random random, TypeInfo typeInfo) throws Exception {
String typeName = typeInfo.getTypeName();
PrimitiveCategory primitiveCategory1 = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
List<String> columns = new ArrayList<String>();
int columnNum = 1;
generationSpecList.add(GenerationSpec.createSameType(typeInfo));
explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
ExprNodeDesc col1Expr;
String columnName = "col" + (columnNum++);
col1Expr = new ExprNodeColumnDesc(typeInfo, columnName, "table", false);
columns.add(columnName);
List<ObjectInspector> objectInspectorList = new ArrayList<ObjectInspector>();
objectInspectorList.add(VectorRandomRowSource.getObjectInspector(typeInfo));
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(col1Expr);
// ----------------------------------------------------------------------------------------------
String[] columnNames = columns.toArray(new String[0]);
VectorRandomRowSource rowSource = new VectorRandomRowSource();
rowSource.initGenerationSpecSchema(random, generationSpecList, /* maxComplexDepth */
0, /* allowNull */
true, /* isUnicodeOk */
true, explicitDataTypePhysicalVariationList);
Object[][] randomRows = rowSource.randomRows(100000);
VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches(random, rowSource, randomRows, null);
GenericUDF genericUdf = new GenericUDFOPNegative();
ObjectInspector[] objectInspectors = objectInspectorList.toArray(new ObjectInspector[objectInspectorList.size()]);
ObjectInspector outputObjectInspector = null;
try {
outputObjectInspector = genericUdf.initialize(objectInspectors);
} catch (Exception e) {
Assert.fail(e.toString());
}
TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector);
ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(outputTypeInfo, genericUdf, children);
final int rowCount = randomRows.length;
Object[][] resultObjectsArray = new Object[NegativeTestMode.count][];
for (int i = 0; i < NegativeTestMode.count; i++) {
Object[] resultObjects = new Object[rowCount];
resultObjectsArray[i] = resultObjects;
NegativeTestMode negativeTestMode = NegativeTestMode.values()[i];
switch(negativeTestMode) {
case ROW_MODE:
doRowArithmeticTest(typeInfo, columns, children, exprDesc, randomRows, rowSource.rowStructObjectInspector(), outputTypeInfo, resultObjects);
break;
case ADAPTOR:
case VECTOR_EXPRESSION:
doVectorArithmeticTest(typeInfo, columns, columnNames, rowSource.typeInfos(), rowSource.dataTypePhysicalVariations(), children, exprDesc, negativeTestMode, batchSource, exprDesc.getWritableObjectInspector(), outputTypeInfo, resultObjects);
break;
default:
throw new RuntimeException("Unexpected Negative operator test mode " + negativeTestMode);
}
}
for (int i = 0; i < rowCount; i++) {
// Row-mode is the expected value.
Object expectedResult = resultObjectsArray[0][i];
for (int v = 1; v < NegativeTestMode.count; v++) {
Object vectorResult = resultObjectsArray[v][i];
if (expectedResult == null || vectorResult == null) {
if (expectedResult != null || vectorResult != null) {
Assert.fail("Row " + i + " typeName " + typeName + " outputTypeName " + outputTypeInfo.getTypeName() + " " + NegativeTestMode.values()[v] + " result is NULL " + (vectorResult == null) + " does not match row-mode expected result is NULL " + (expectedResult == null) + " row values " + Arrays.toString(randomRows[i]));
}
} else {
if (!expectedResult.equals(vectorResult)) {
Assert.fail("Row " + i + " typeName " + typeName + " outputTypeName " + outputTypeInfo.getTypeName() + " " + NegativeTestMode.values()[v] + " result " + vectorResult.toString() + " (" + vectorResult.getClass().getSimpleName() + ")" + " does not match row-mode expected result " + expectedResult.toString() + " (" + expectedResult.getClass().getSimpleName() + ")" + " row values " + Arrays.toString(randomRows[i]));
}
}
}
}
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.
the class TestOrcSplitElimination method testFooterExternalCacheImpl.
private void testFooterExternalCacheImpl(boolean isPpd) throws IOException {
ObjectInspector inspector = createIO();
writeFile(inspector, testFilePath);
writeFile(inspector, testFilePath2);
GenericUDF udf = new GenericUDFOPEqualOrLessThan();
List<ExprNodeDesc> childExpr = Lists.newArrayList();
createTestSarg(inspector, udf, childExpr);
setupExternalCacheConfig(isPpd, testFilePath + "," + testFilePath2);
// Get the base values w/o cache.
conf.setBoolean(ConfVars.HIVE_ORC_MS_FOOTER_CACHE_ENABLED.varname, false);
OrcInputFormatForTest.clearLocalCache();
OrcInputFormat in0 = new OrcInputFormat();
InputSplit[] originals = in0.getSplits(conf, -1);
assertEquals(10, originals.length);
HashSet<FsWithHash> originalHs = new HashSet<>();
for (InputSplit original : originals) {
originalHs.add(new FsWithHash((FileSplit) original));
}
// Populate the cache.
conf.setBoolean(ConfVars.HIVE_ORC_MS_FOOTER_CACHE_ENABLED.varname, true);
OrcInputFormatForTest in = new OrcInputFormatForTest();
OrcInputFormatForTest.clearLocalCache();
OrcInputFormatForTest.caches.resetCounts();
OrcInputFormatForTest.caches.cache.clear();
InputSplit[] splits = in.getSplits(conf, -1);
// Puts, gets, hits, unused, unused.
@SuppressWarnings("static-access") AtomicInteger[] counts = { in.caches.putCount, isPpd ? in.caches.getByExprCount : in.caches.getCount, isPpd ? in.caches.getHitByExprCount : in.caches.getHitCount, isPpd ? in.caches.getCount : in.caches.getByExprCount, isPpd ? in.caches.getHitCount : in.caches.getHitByExprCount };
verifySplits(originalHs, splits);
verifyCallCounts(counts, 2, 2, 0);
assertEquals(2, OrcInputFormatForTest.caches.cache.size());
// Verify we can get from cache.
OrcInputFormatForTest.clearLocalCache();
OrcInputFormatForTest.caches.resetCounts();
splits = in.getSplits(conf, -1);
verifySplits(originalHs, splits);
verifyCallCounts(counts, 0, 2, 2);
// Verify ORC SARG still works.
OrcInputFormatForTest.clearLocalCache();
OrcInputFormatForTest.caches.resetCounts();
childExpr.set(1, new ExprNodeConstantDesc(5));
conf.set("hive.io.filter.expr.serialized", SerializationUtilities.serializeExpression(new ExprNodeGenericFuncDesc(inspector, udf, childExpr)));
splits = in.getSplits(conf, -1);
InputSplit[] filtered = { originals[0], originals[4], originals[5], originals[9] };
originalHs = new HashSet<>();
for (InputSplit original : filtered) {
originalHs.add(new FsWithHash((FileSplit) original));
}
verifySplits(originalHs, splits);
verifyCallCounts(counts, 0, 2, 2);
// Verify corrupted cache value gets replaced.
OrcInputFormatForTest.clearLocalCache();
OrcInputFormatForTest.caches.resetCounts();
Map.Entry<Long, MockExternalCaches.MockItem> e = OrcInputFormatForTest.caches.cache.entrySet().iterator().next();
Long key = e.getKey();
byte[] someData = new byte[8];
ByteBuffer toCorrupt = e.getValue().data;
System.arraycopy(toCorrupt.array(), toCorrupt.arrayOffset(), someData, 0, someData.length);
toCorrupt.putLong(0, 0L);
splits = in.getSplits(conf, -1);
verifySplits(originalHs, splits);
if (!isPpd) {
// Recovery is not implemented yet for PPD path.
ByteBuffer restored = OrcInputFormatForTest.caches.cache.get(key).data;
byte[] newData = new byte[someData.length];
System.arraycopy(restored.array(), restored.arrayOffset(), newData, 0, newData.length);
assertArrayEquals(someData, newData);
}
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.
the class TestParquetRowGroupFilter method testRowGroupFilterTakeEffect.
@Test
public void testRowGroupFilterTakeEffect() throws Exception {
// define schema
columnNames = "intCol";
columnTypes = "int";
StructObjectInspector inspector = getObjectInspector(columnNames, columnTypes);
MessageType fileSchema = MessageTypeParser.parseMessageType("message hive_schema {\n" + " optional int32 intCol;\n" + "}\n");
conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "intCol");
conf.set("columns", "intCol");
conf.set("columns.types", "int");
// create Parquet file with specific data
Path testPath = writeDirect("RowGroupFilterTakeEffect", fileSchema, new DirectWriter() {
@Override
public void write(RecordConsumer consumer) {
for (int i = 0; i < 100; i++) {
consumer.startMessage();
consumer.startField("int", 0);
consumer.addInteger(i);
consumer.endField("int", 0);
consumer.endMessage();
}
}
});
// > 50
GenericUDF udf = new GenericUDFOPGreaterThan();
List<ExprNodeDesc> children = Lists.newArrayList();
ExprNodeColumnDesc columnDesc = new ExprNodeColumnDesc(Integer.class, "intCol", "T", false);
ExprNodeConstantDesc constantDesc = new ExprNodeConstantDesc(50);
children.add(columnDesc);
children.add(constantDesc);
ExprNodeGenericFuncDesc genericFuncDesc = new ExprNodeGenericFuncDesc(inspector, udf, children);
String searchArgumentStr = SerializationUtilities.serializeExpression(genericFuncDesc);
conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, searchArgumentStr);
ParquetRecordReaderWrapper recordReader = (ParquetRecordReaderWrapper) new MapredParquetInputFormat().getRecordReader(new FileSplit(testPath, 0, fileLength(testPath), (String[]) null), conf, null);
Assert.assertEquals("row group is not filtered correctly", 1, recordReader.getFiltedBlocks().size());
// > 100
constantDesc = new ExprNodeConstantDesc(100);
children.set(1, constantDesc);
genericFuncDesc = new ExprNodeGenericFuncDesc(inspector, udf, children);
searchArgumentStr = SerializationUtilities.serializeExpression(genericFuncDesc);
conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, searchArgumentStr);
recordReader = (ParquetRecordReaderWrapper) new MapredParquetInputFormat().getRecordReader(new FileSplit(testPath, 0, fileLength(testPath), (String[]) null), conf, null);
Assert.assertEquals("row group is not filtered correctly", 0, recordReader.getFiltedBlocks().size());
}
Aggregations