use of com.facebook.presto.common.type.VarcharType.VARCHAR in project presto by prestodb.
the class TestOrcSelectiveStreamReaders method testEmptyStrings.
/**
* This test tests SliceDirectSelectiveStreamReader for the case where all elements to read are empty strings. The output Block should be a valid VariableWidthBlock with an
* empty Slice. It is to simulate a problem seen in production. The state of SliceDirectSelectiveStreamReader to reproduce the problem is:
* - dataStream: null
* - presentStream: null
* - lengthStream: not null
* - filter: null
* - outputRequired: true
* - offsets array: non zeros
* The test issues two reads, the first one reads a non-empty string and populates non-zero offsets. The second one reads the empty string with the above conditions met.
*/
@Test
public void testEmptyStrings() throws Exception {
Type type = VARCHAR;
List<Type> types = ImmutableList.of(type);
List<List<?>> values = ImmutableList.of(ImmutableList.of("a", ""));
for (OrcTester.Format format : formats) {
if (!types.stream().allMatch(readType -> format.supportsType(readType))) {
return;
}
for (CompressionKind compression : compressions) {
TempFile tempFile = new TempFile();
writeOrcColumnsPresto(tempFile.getFile(), format, compression, Optional.empty(), types, values, new OrcWriterStats());
OrcPredicate orcPredicate = createOrcPredicate(types, values, DWRF, false);
Map<Integer, Type> includedColumns = IntStream.range(0, types.size()).boxed().collect(toImmutableMap(Function.identity(), types::get));
List<Integer> outputColumns = IntStream.range(0, types.size()).boxed().collect(toImmutableList());
OrcAggregatedMemoryContext systemMemoryUsage = new TestingHiveOrcAggregatedMemoryContext();
try (OrcSelectiveRecordReader recordReader = createCustomOrcSelectiveRecordReader(tempFile.getFile(), format.getOrcEncoding(), orcPredicate, types, 1, ImmutableMap.of(), ImmutableList.of(), ImmutableMap.of(), OrcTester.OrcReaderSettings.builder().build().getRequiredSubfields(), ImmutableMap.of(), ImmutableMap.of(), includedColumns, outputColumns, false, systemMemoryUsage, false)) {
assertEquals(recordReader.getReaderPosition(), 0);
assertEquals(recordReader.getFilePosition(), 0);
SelectiveStreamReader streamReader = recordReader.getStreamReaders()[0];
// Read the first non-empty element. Do not call streamReader.getBlock() to preserve the offsets array in SliceDirectSelectiveStreamReader.
int batchSize = min(recordReader.prepareNextBatch(), 1);
int[] positions = IntStream.range(0, batchSize).toArray();
streamReader.read(0, positions, batchSize);
recordReader.batchRead(batchSize);
// Read the second element: an empty string. Set the dataStream in SliceDirectSelectiveStreamReader to null to simulate the conditions causing the problem.
((SliceSelectiveStreamReader) streamReader).resetDataStream();
batchSize = min(recordReader.prepareNextBatch(), 1);
positions = IntStream.range(0, batchSize).toArray();
streamReader.read(0, positions, batchSize);
recordReader.batchRead(batchSize);
Block block = streamReader.getBlock(positions, batchSize);
List<?> expectedValues = ImmutableList.of("");
assertBlockEquals(type, block, expectedValues, 0);
assertEquals(recordReader.getReaderPosition(), 1);
assertEquals(recordReader.getFilePosition(), 1);
}
}
}
}
use of com.facebook.presto.common.type.VarcharType.VARCHAR in project presto by prestodb.
the class TestHashJoinOperator method testOuterJoinWithNullOnBothSidesAndFilterFunction.
@Test(dataProvider = "hashJoinTestValues")
public void testOuterJoinWithNullOnBothSidesAndFilterFunction(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) {
TaskContext taskContext = createTaskContext();
InternalJoinFilterFunction filterFunction = new TestInternalJoinFilterFunction(((leftPosition, leftPage, rightPosition, rightPage) -> ImmutableSet.of("a", "c").contains(VARCHAR.getSlice(rightPage.getBlock(0), rightPosition).toStringAscii())));
// build factory
RowPagesBuilder buildPages = rowPagesBuilder(buildHashEnabled, Ints.asList(0), ImmutableList.of(VARCHAR)).row("a").row((String) null).row((String) null).row("a").row("b");
BuildSideSetup buildSideSetup = setupBuildSide(parallelBuild, taskContext, Ints.asList(0), buildPages, Optional.of(filterFunction), false, SINGLE_STREAM_SPILLER_FACTORY);
JoinBridgeManager<PartitionedLookupSourceFactory> lookupSourceFactory = buildSideSetup.getLookupSourceFactoryManager();
// probe factory
List<Type> probeTypes = ImmutableList.of(VARCHAR);
RowPagesBuilder probePages = rowPagesBuilder(probeHashEnabled, Ints.asList(0), probeTypes);
List<Page> probeInput = probePages.row("a").row("b").row((String) null).row("c").build();
OperatorFactory joinOperatorFactory = probeOuterJoinOperatorFactory(lookupSourceFactory, probePages);
// build drivers and operators
instantiateBuildDrivers(buildSideSetup, taskContext);
buildLookupSource(buildSideSetup);
// expected
MaterializedResult expected = MaterializedResult.resultBuilder(taskContext.getSession(), concat(probeTypes, buildPages.getTypesWithoutHash())).row("a", "a").row("a", "a").row("b", null).row(null, null).row("c", null).build();
assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages));
}
use of com.facebook.presto.common.type.VarcharType.VARCHAR in project presto by prestodb.
the class TestHashJoinOperator method testOuterJoinWithNullBuildAndFilterFunction.
@Test(dataProvider = "hashJoinTestValues")
public void testOuterJoinWithNullBuildAndFilterFunction(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) {
TaskContext taskContext = createTaskContext();
InternalJoinFilterFunction filterFunction = new TestInternalJoinFilterFunction(((leftPosition, leftPage, rightPosition, rightPage) -> ImmutableSet.of("a", "c").contains(VARCHAR.getSlice(rightPage.getBlock(0), rightPosition).toStringAscii())));
// build factory
List<Type> buildTypes = ImmutableList.of(VARCHAR);
RowPagesBuilder buildPages = rowPagesBuilder(buildHashEnabled, Ints.asList(0), ImmutableList.of(VARCHAR)).row("a").row((String) null).row((String) null).row("a").row("b");
BuildSideSetup buildSideSetup = setupBuildSide(parallelBuild, taskContext, Ints.asList(0), buildPages, Optional.of(filterFunction), false, SINGLE_STREAM_SPILLER_FACTORY);
JoinBridgeManager<PartitionedLookupSourceFactory> lookupSourceFactory = buildSideSetup.getLookupSourceFactoryManager();
// probe factory
List<Type> probeTypes = ImmutableList.of(VARCHAR);
RowPagesBuilder probePages = rowPagesBuilder(probeHashEnabled, Ints.asList(0), probeTypes);
List<Page> probeInput = probePages.row("a").row("b").row("c").build();
OperatorFactory joinOperatorFactory = probeOuterJoinOperatorFactory(lookupSourceFactory, probePages);
// build drivers and operators
instantiateBuildDrivers(buildSideSetup, taskContext);
buildLookupSource(buildSideSetup);
// expected
MaterializedResult expected = MaterializedResult.resultBuilder(taskContext.getSession(), concat(probeTypes, buildTypes)).row("a", "a").row("a", "a").row("b", null).row("c", null).build();
assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages));
}
use of com.facebook.presto.common.type.VarcharType.VARCHAR in project presto by prestodb.
the class TestHashJoinOperator method testOuterJoinWithNullProbeAndFilterFunction.
@Test(dataProvider = "hashJoinTestValues")
public void testOuterJoinWithNullProbeAndFilterFunction(boolean parallelBuild, boolean probeHashEnabled, boolean buildHashEnabled) {
TaskContext taskContext = createTaskContext();
InternalJoinFilterFunction filterFunction = new TestInternalJoinFilterFunction(((leftPosition, leftPage, rightPosition, rightPage) -> VARCHAR.getSlice(rightPage.getBlock(0), rightPosition).toStringAscii().equals("a")));
// build factory
List<Type> buildTypes = ImmutableList.of(VARCHAR);
RowPagesBuilder buildPages = rowPagesBuilder(buildHashEnabled, Ints.asList(0), buildTypes).row("a").row("b").row("c");
BuildSideSetup buildSideSetup = setupBuildSide(parallelBuild, taskContext, Ints.asList(0), buildPages, Optional.of(filterFunction), false, SINGLE_STREAM_SPILLER_FACTORY);
JoinBridgeManager<PartitionedLookupSourceFactory> lookupSourceFactory = buildSideSetup.getLookupSourceFactoryManager();
// probe factory
List<Type> probeTypes = ImmutableList.of(VARCHAR);
RowPagesBuilder probePages = rowPagesBuilder(probeHashEnabled, Ints.asList(0), probeTypes);
List<Page> probeInput = probePages.row("a").row((String) null).row((String) null).row("a").row("b").build();
OperatorFactory joinOperatorFactory = probeOuterJoinOperatorFactory(lookupSourceFactory, probePages);
// build drivers and operators
instantiateBuildDrivers(buildSideSetup, taskContext);
buildLookupSource(buildSideSetup);
// expected
MaterializedResult expected = MaterializedResult.resultBuilder(taskContext.getSession(), concat(probeTypes, buildTypes)).row("a", "a").row(null, null).row(null, null).row("a", "a").row("b", null).build();
assertOperatorEquals(joinOperatorFactory, taskContext.addPipelineContext(0, true, true, false).addDriverContext(), probeInput, expected, true, getHashChannels(probePages, buildPages));
}
use of com.facebook.presto.common.type.VarcharType.VARCHAR in project presto by prestodb.
the class TestSignatureBinder method testFunction.
@Test
public void testFunction() {
Signature simple = functionSignature().returnType(parseTypeSignature("boolean")).argumentTypes(parseTypeSignature("function(integer,integer)")).build();
assertThat(simple).boundTo("integer").fails();
assertThat(simple).boundTo("function(integer,integer)").succeeds();
// TODO: Support coercion of return type of lambda
assertThat(simple).boundTo("function(integer,smallint)").withCoercion().fails();
assertThat(simple).boundTo("function(integer,bigint)").withCoercion().fails();
Signature applyTwice = functionSignature().returnType(parseTypeSignature("V")).argumentTypes(parseTypeSignature("T"), parseTypeSignature("function(T,U)"), parseTypeSignature("function(U,V)")).typeVariableConstraints(typeVariable("T"), typeVariable("U"), typeVariable("V")).build();
assertThat(applyTwice).boundTo("integer", "integer", "integer").fails();
assertThat(applyTwice).boundTo("integer", "function(integer,varchar)", "function(varchar,double)").produces(BoundVariables.builder().setTypeVariable("T", INTEGER).setTypeVariable("U", VARCHAR).setTypeVariable("V", DOUBLE).build());
assertThat(applyTwice).boundTo("integer", new TypeSignatureProvider(functionArgumentTypes -> TypeSignature.parseTypeSignature("function(integer,varchar)")), new TypeSignatureProvider(functionArgumentTypes -> TypeSignature.parseTypeSignature("function(varchar,double)"))).produces(BoundVariables.builder().setTypeVariable("T", INTEGER).setTypeVariable("U", VARCHAR).setTypeVariable("V", DOUBLE).build());
assertThat(applyTwice).boundTo(// pass function argument to non-function position of a function
new TypeSignatureProvider(functionArgumentTypes -> TypeSignature.parseTypeSignature("function(integer,varchar)")), new TypeSignatureProvider(functionArgumentTypes -> TypeSignature.parseTypeSignature("function(integer,varchar)")), new TypeSignatureProvider(functionArgumentTypes -> TypeSignature.parseTypeSignature("function(varchar,double)"))).fails();
assertThat(applyTwice).boundTo(new TypeSignatureProvider(functionArgumentTypes -> TypeSignature.parseTypeSignature("function(integer,varchar)")), // pass non-function argument to function position of a function
"integer", new TypeSignatureProvider(functionArgumentTypes -> TypeSignature.parseTypeSignature("function(varchar,double)"))).fails();
Signature flatMap = functionSignature().returnType(parseTypeSignature("array(T)")).argumentTypes(parseTypeSignature("array(T)"), parseTypeSignature("function(T, array(T))")).typeVariableConstraints(typeVariable("T")).build();
assertThat(flatMap).boundTo("array(integer)", "function(integer, array(integer))").produces(BoundVariables.builder().setTypeVariable("T", INTEGER).build());
Signature varargApply = functionSignature().returnType(parseTypeSignature("T")).argumentTypes(parseTypeSignature("T"), parseTypeSignature("function(T, T)")).typeVariableConstraints(typeVariable("T")).setVariableArity(true).build();
assertThat(varargApply).boundTo("integer", "function(integer, integer)", "function(integer, integer)", "function(integer, integer)").produces(BoundVariables.builder().setTypeVariable("T", INTEGER).build());
assertThat(varargApply).boundTo("integer", "function(integer, integer)", "function(integer, double)", "function(double, double)").fails();
Signature loop = functionSignature().returnType(parseTypeSignature("T")).argumentTypes(parseTypeSignature("T"), parseTypeSignature("function(T, T)")).typeVariableConstraints(typeVariable("T")).build();
assertThat(loop).boundTo("integer", new TypeSignatureProvider(paramTypes -> new FunctionType(paramTypes, BIGINT).getTypeSignature())).fails();
assertThat(loop).boundTo("integer", new TypeSignatureProvider(paramTypes -> new FunctionType(paramTypes, BIGINT).getTypeSignature())).withCoercion().produces(BoundVariables.builder().setTypeVariable("T", BIGINT).build());
// TODO: Support coercion of return type of lambda
assertThat(loop).withCoercion().boundTo("integer", new TypeSignatureProvider(paramTypes -> new FunctionType(paramTypes, SMALLINT).getTypeSignature())).fails();
// TODO: Support coercion of return type of lambda
// Without coercion support for return type of lambda, the return type of lambda must be `varchar(x)` to avoid need for coercions.
Signature varcharApply = functionSignature().returnType(parseTypeSignature("varchar")).argumentTypes(parseTypeSignature("varchar"), parseTypeSignature("function(varchar, varchar(x))", ImmutableSet.of("x"))).build();
assertThat(varcharApply).withCoercion().boundTo("varchar(10)", new TypeSignatureProvider(paramTypes -> new FunctionType(paramTypes, createVarcharType(1)).getTypeSignature())).succeeds();
Signature sortByKey = functionSignature().returnType(parseTypeSignature("array(T)")).argumentTypes(parseTypeSignature("array(T)"), parseTypeSignature("function(T,E)")).typeVariableConstraints(typeVariable("T"), orderableTypeParameter("E")).build();
assertThat(sortByKey).boundTo("array(integer)", new TypeSignatureProvider(paramTypes -> new FunctionType(paramTypes, VARCHAR).getTypeSignature())).produces(BoundVariables.builder().setTypeVariable("T", INTEGER).setTypeVariable("E", VARCHAR).build());
}
Aggregations