Search in sources :

Example 6 with BIGINT

use of io.prestosql.spi.type.BigintType.BIGINT in project hetu-core by openlookeng.

the class TestPruneLimitColumns method buildProjectedLimit.

private ProjectNode buildProjectedLimit(PlanBuilder planBuilder, Predicate<Symbol> projectionFilter) {
    Symbol a = planBuilder.symbol("a");
    Symbol b = planBuilder.symbol("b");
    return planBuilder.project(Assignments.copyOf(Stream.of(a, b).filter(projectionFilter).collect(Collectors.toMap(v -> v, v -> planBuilder.variable(v.getName(), BIGINT)))), planBuilder.limit(1, planBuilder.values(a, b)));
}
Also used : Symbol(io.prestosql.spi.plan.Symbol) BaseRuleTest(io.prestosql.sql.planner.iterative.rule.test.BaseRuleTest) ImmutableMap(com.google.common.collect.ImmutableMap) Assignments(io.prestosql.spi.plan.Assignments) PlanMatchPattern.strictProject(io.prestosql.sql.planner.assertions.PlanMatchPattern.strictProject) Predicate(java.util.function.Predicate) Test(org.testng.annotations.Test) ProjectNode(io.prestosql.spi.plan.ProjectNode) Collectors(java.util.stream.Collectors) PlanMatchPattern.limit(io.prestosql.sql.planner.assertions.PlanMatchPattern.limit) Predicates.alwaysTrue(com.google.common.base.Predicates.alwaysTrue) PlanMatchPattern.values(io.prestosql.sql.planner.assertions.PlanMatchPattern.values) Stream(java.util.stream.Stream) ImmutableList(com.google.common.collect.ImmutableList) PlanBuilder(io.prestosql.sql.planner.iterative.rule.test.PlanBuilder) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) PlanMatchPattern.expression(io.prestosql.sql.planner.assertions.PlanMatchPattern.expression) Symbol(io.prestosql.spi.plan.Symbol)

Example 7 with BIGINT

use of io.prestosql.spi.type.BigintType.BIGINT in project hetu-core by openlookeng.

the class TestOrcReaderPositions method testRowGroupSkipping.

@Test
public void testRowGroupSkipping() throws Exception {
    try (TempFile tempFile = new TempFile()) {
        // create single strip file with multiple row groups
        int rowCount = 142_000;
        createSequentialFile(tempFile.getFile(), rowCount);
        // test reading two row groups from middle of file
        OrcPredicate predicate = (numberOfRows, allColumnStatistics) -> {
            if (numberOfRows == rowCount) {
                return true;
            }
            IntegerStatistics stats = allColumnStatistics.get(new OrcColumnId(1)).getIntegerStatistics();
            return (stats.getMin() == 50_000) || (stats.getMin() == 60_000);
        };
        try (OrcRecordReader reader = createCustomOrcRecordReader(tempFile, predicate, BIGINT, MAX_BATCH_SIZE)) {
            assertEquals(reader.getFileRowCount(), rowCount);
            assertEquals(reader.getReaderRowCount(), rowCount);
            assertEquals(reader.getFilePosition(), 0);
            assertEquals(reader.getReaderPosition(), 0);
            long position = 50_000;
            while (true) {
                Page page = reader.nextPage();
                if (page == null) {
                    break;
                }
                page = page.getLoadedPage();
                Block block = page.getBlock(0);
                for (int i = 0; i < block.getPositionCount(); i++) {
                    assertEquals(BIGINT.getLong(block, i), position + i);
                }
                assertEquals(reader.getFilePosition(), position);
                assertEquals(reader.getReaderPosition(), position);
                position += page.getPositionCount();
            }
            assertEquals(position, 70_000);
            assertEquals(reader.getFilePosition(), rowCount);
            assertEquals(reader.getReaderPosition(), rowCount);
        }
    }
}
Also used : Footer(io.prestosql.orc.metadata.Footer) IntegerStatistics(io.prestosql.orc.metadata.statistics.IntegerStatistics) OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) Slice(io.airlift.slice.Slice) Assert.assertNull(org.testng.Assert.assertNull) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) Assert.assertEquals(org.testng.Assert.assertEquals) Writable(org.apache.hadoop.io.Writable) Test(org.testng.annotations.Test) OrcTester.createOrcRecordWriter(io.prestosql.orc.OrcTester.createOrcRecordWriter) OrcColumnId(io.prestosql.orc.metadata.OrcColumnId) OrcTester.createSettableStructObjectInspector(io.prestosql.orc.OrcTester.createSettableStructObjectInspector) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) ByteBuffer(java.nio.ByteBuffer) MAX_BLOCK_SIZE(io.prestosql.orc.OrcTester.MAX_BLOCK_SIZE) Writer(org.apache.hadoop.hive.ql.io.orc.Writer) INITIAL_BATCH_SIZE(io.prestosql.orc.OrcReader.INITIAL_BATCH_SIZE) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) MAX_BATCH_SIZE(io.prestosql.orc.OrcReader.MAX_BATCH_SIZE) ORC_12(io.prestosql.orc.OrcTester.Format.ORC_12) VARCHAR(io.prestosql.spi.type.VarcharType.VARCHAR) OrcTester.createCustomOrcRecordReader(io.prestosql.orc.OrcTester.createCustomOrcRecordReader) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) BATCH_SIZE_GROWTH_FACTOR(io.prestosql.orc.OrcReader.BATCH_SIZE_GROWTH_FACTOR) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Block(io.prestosql.spi.block.Block) NullMemoryManager(org.apache.orc.NullMemoryManager) ImmutableMap(com.google.common.collect.ImmutableMap) UTF_8(java.nio.charset.StandardCharsets.UTF_8) SNAPPY(org.apache.hadoop.hive.ql.io.orc.CompressionKind.SNAPPY) Assert.fail(org.testng.Assert.fail) Page(io.prestosql.spi.Page) IOException(java.io.IOException) Math.min(java.lang.Math.min) Field(java.lang.reflect.Field) Maps(com.google.common.collect.Maps) File(java.io.File) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) DataSize(io.airlift.units.DataSize) Serializer(org.apache.hadoop.hive.serde2.Serializer) CompressionKind(io.prestosql.orc.metadata.CompressionKind) Assert.assertTrue(org.testng.Assert.assertTrue) OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) OrcColumnId(io.prestosql.orc.metadata.OrcColumnId) Block(io.prestosql.spi.block.Block) Page(io.prestosql.spi.Page) OrcTester.createCustomOrcRecordReader(io.prestosql.orc.OrcTester.createCustomOrcRecordReader) IntegerStatistics(io.prestosql.orc.metadata.statistics.IntegerStatistics) Test(org.testng.annotations.Test)

Example 8 with BIGINT

use of io.prestosql.spi.type.BigintType.BIGINT in project hetu-core by openlookeng.

the class TestOrcReaderPositions method testStripeSkipping.

@Test
public void testStripeSkipping() throws Exception {
    try (TempFile tempFile = new TempFile()) {
        createMultiStripeFile(tempFile.getFile());
        // test reading second and fourth stripes
        OrcPredicate predicate = (numberOfRows, allColumnStatistics) -> {
            if (numberOfRows == 100) {
                return true;
            }
            IntegerStatistics stats = allColumnStatistics.get(new OrcColumnId(1)).getIntegerStatistics();
            return ((stats.getMin() == 60) && (stats.getMax() == 117)) || ((stats.getMin() == 180) && (stats.getMax() == 237));
        };
        try (OrcRecordReader reader = createCustomOrcRecordReader(tempFile, predicate, BIGINT, MAX_BATCH_SIZE)) {
            assertEquals(reader.getFileRowCount(), 100);
            assertEquals(reader.getReaderRowCount(), 40);
            assertEquals(reader.getFilePosition(), 0);
            assertEquals(reader.getReaderPosition(), 0);
            // second stripe
            Page page = reader.nextPage().getLoadedPage();
            assertEquals(page.getPositionCount(), 20);
            assertEquals(reader.getReaderPosition(), 0);
            assertEquals(reader.getFilePosition(), 20);
            assertCurrentBatch(page, 1);
            // fourth stripe
            page = reader.nextPage().getLoadedPage();
            assertEquals(page.getPositionCount(), 20);
            assertEquals(reader.getReaderPosition(), 20);
            assertEquals(reader.getFilePosition(), 60);
            assertCurrentBatch(page, 3);
            page = reader.nextPage();
            assertNull(page);
            assertEquals(reader.getReaderPosition(), 40);
            assertEquals(reader.getFilePosition(), 100);
        }
    }
}
Also used : Footer(io.prestosql.orc.metadata.Footer) IntegerStatistics(io.prestosql.orc.metadata.statistics.IntegerStatistics) OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) Slice(io.airlift.slice.Slice) Assert.assertNull(org.testng.Assert.assertNull) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) Assert.assertEquals(org.testng.Assert.assertEquals) Writable(org.apache.hadoop.io.Writable) Test(org.testng.annotations.Test) OrcTester.createOrcRecordWriter(io.prestosql.orc.OrcTester.createOrcRecordWriter) OrcColumnId(io.prestosql.orc.metadata.OrcColumnId) OrcTester.createSettableStructObjectInspector(io.prestosql.orc.OrcTester.createSettableStructObjectInspector) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) ByteBuffer(java.nio.ByteBuffer) MAX_BLOCK_SIZE(io.prestosql.orc.OrcTester.MAX_BLOCK_SIZE) Writer(org.apache.hadoop.hive.ql.io.orc.Writer) INITIAL_BATCH_SIZE(io.prestosql.orc.OrcReader.INITIAL_BATCH_SIZE) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) MAX_BATCH_SIZE(io.prestosql.orc.OrcReader.MAX_BATCH_SIZE) ORC_12(io.prestosql.orc.OrcTester.Format.ORC_12) VARCHAR(io.prestosql.spi.type.VarcharType.VARCHAR) OrcTester.createCustomOrcRecordReader(io.prestosql.orc.OrcTester.createCustomOrcRecordReader) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) BATCH_SIZE_GROWTH_FACTOR(io.prestosql.orc.OrcReader.BATCH_SIZE_GROWTH_FACTOR) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Block(io.prestosql.spi.block.Block) NullMemoryManager(org.apache.orc.NullMemoryManager) ImmutableMap(com.google.common.collect.ImmutableMap) UTF_8(java.nio.charset.StandardCharsets.UTF_8) SNAPPY(org.apache.hadoop.hive.ql.io.orc.CompressionKind.SNAPPY) Assert.fail(org.testng.Assert.fail) Page(io.prestosql.spi.Page) IOException(java.io.IOException) Math.min(java.lang.Math.min) Field(java.lang.reflect.Field) Maps(com.google.common.collect.Maps) File(java.io.File) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) DataSize(io.airlift.units.DataSize) Serializer(org.apache.hadoop.hive.serde2.Serializer) CompressionKind(io.prestosql.orc.metadata.CompressionKind) Assert.assertTrue(org.testng.Assert.assertTrue) OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) OrcColumnId(io.prestosql.orc.metadata.OrcColumnId) Page(io.prestosql.spi.Page) OrcTester.createCustomOrcRecordReader(io.prestosql.orc.OrcTester.createCustomOrcRecordReader) IntegerStatistics(io.prestosql.orc.metadata.statistics.IntegerStatistics) Test(org.testng.annotations.Test)

Example 9 with BIGINT

use of io.prestosql.spi.type.BigintType.BIGINT in project hetu-core by openlookeng.

the class TestDetermineJoinDistributionType method testReplicateLeftOuterJoin.

@Test
public void testReplicateLeftOuterJoin() {
    int aRows = 10_000;
    int bRows = 10;
    assertDetermineJoinDistributionType(new CostComparator(75, 10, 15)).setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.AUTOMATIC.name()).overrideStats("valuesA", PlanNodeStatsEstimate.builder().setOutputRowCount(aRows).addSymbolStatistics(ImmutableMap.of(new Symbol("A1"), new SymbolStatsEstimate(0, 100, 0, 640000, 100))).build()).overrideStats("valuesB", PlanNodeStatsEstimate.builder().setOutputRowCount(bRows).addSymbolStatistics(ImmutableMap.of(new Symbol("B1"), new SymbolStatsEstimate(0, 100, 0, 640000, 100))).build()).on(p -> p.join(LEFT, p.values(new PlanNodeId("valuesA"), aRows, p.symbol("A1", BIGINT)), p.values(new PlanNodeId("valuesB"), bRows, p.symbol("B1", BIGINT)), ImmutableList.of(new JoinNode.EquiJoinClause(p.symbol("A1", BIGINT), p.symbol("B1", BIGINT))), ImmutableList.of(p.symbol("A1", BIGINT), p.symbol("B1", BIGINT)), Optional.empty())).matches(join(LEFT, ImmutableList.of(equiJoinClause("A1", "B1")), Optional.empty(), Optional.of(REPLICATED), values(ImmutableMap.of("A1", 0)), values(ImmutableMap.of("B1", 0))));
}
Also used : CostComparator(io.prestosql.cost.CostComparator) PlanBuilder.constantExpressions(io.prestosql.sql.planner.iterative.rule.test.PlanBuilder.constantExpressions) SymbolStatsEstimate(io.prestosql.cost.SymbolStatsEstimate) REPLICATED(io.prestosql.spi.plan.JoinNode.DistributionType.REPLICATED) PlanMatchPattern.equiJoinClause(io.prestosql.sql.planner.assertions.PlanMatchPattern.equiJoinClause) JoinDistributionType(io.prestosql.sql.analyzer.FeaturesConfig.JoinDistributionType) PlanMatchPattern.enforceSingleRow(io.prestosql.sql.planner.assertions.PlanMatchPattern.enforceSingleRow) Test(org.testng.annotations.Test) PARTITIONED(io.prestosql.spi.plan.JoinNode.DistributionType.PARTITIONED) PlanBuilder.castToRowExpression(io.prestosql.sql.planner.iterative.rule.test.PlanBuilder.castToRowExpression) PlanMatchPattern.values(io.prestosql.sql.planner.assertions.PlanMatchPattern.values) ImmutableList(com.google.common.collect.ImmutableList) RuleAssert(io.prestosql.sql.planner.iterative.rule.test.RuleAssert) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Type(io.prestosql.spi.plan.JoinNode.Type) JoinNode(io.prestosql.spi.plan.JoinNode) TaskCountEstimator(io.prestosql.cost.TaskCountEstimator) Symbol(io.prestosql.spi.plan.Symbol) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) AfterClass(org.testng.annotations.AfterClass) PlanMatchPattern.join(io.prestosql.sql.planner.assertions.PlanMatchPattern.join) PlanNodeStatsEstimate(io.prestosql.cost.PlanNodeStatsEstimate) ImmutableMap(com.google.common.collect.ImmutableMap) FULL(io.prestosql.spi.plan.JoinNode.Type.FULL) BeforeClass(org.testng.annotations.BeforeClass) CostComparator(io.prestosql.cost.CostComparator) RIGHT(io.prestosql.spi.plan.JoinNode.Type.RIGHT) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) DistributionType(io.prestosql.spi.plan.JoinNode.DistributionType) INNER(io.prestosql.spi.plan.JoinNode.Type.INNER) Optional(java.util.Optional) JOIN_MAX_BROADCAST_TABLE_SIZE(io.prestosql.SystemSessionProperties.JOIN_MAX_BROADCAST_TABLE_SIZE) RuleTester(io.prestosql.sql.planner.iterative.rule.test.RuleTester) VarcharType(io.prestosql.spi.type.VarcharType) JOIN_DISTRIBUTION_TYPE(io.prestosql.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE) LEFT(io.prestosql.spi.plan.JoinNode.Type.LEFT) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) Symbol(io.prestosql.spi.plan.Symbol) SymbolStatsEstimate(io.prestosql.cost.SymbolStatsEstimate) Test(org.testng.annotations.Test)

Example 10 with BIGINT

use of io.prestosql.spi.type.BigintType.BIGINT in project hetu-core by openlookeng.

the class TestDetermineJoinDistributionType method testFlipAndReplicateRightOuterJoin.

@Test
public void testFlipAndReplicateRightOuterJoin() {
    int aRows = 10;
    int bRows = 1_000_000;
    assertDetermineJoinDistributionType(new CostComparator(75, 10, 15)).setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.AUTOMATIC.name()).overrideStats("valuesA", PlanNodeStatsEstimate.builder().setOutputRowCount(aRows).addSymbolStatistics(ImmutableMap.of(new Symbol("A1"), new SymbolStatsEstimate(0, 100, 0, 640000, 100))).build()).overrideStats("valuesB", PlanNodeStatsEstimate.builder().setOutputRowCount(bRows).addSymbolStatistics(ImmutableMap.of(new Symbol("B1"), new SymbolStatsEstimate(0, 100, 0, 640000, 100))).build()).on(p -> p.join(RIGHT, p.values(new PlanNodeId("valuesA"), aRows, p.symbol("A1", BIGINT)), p.values(new PlanNodeId("valuesB"), bRows, p.symbol("B1", BIGINT)), ImmutableList.of(new JoinNode.EquiJoinClause(p.symbol("A1", BIGINT), p.symbol("B1", BIGINT))), ImmutableList.of(p.symbol("A1", BIGINT), p.symbol("B1", BIGINT)), Optional.empty())).matches(join(LEFT, ImmutableList.of(equiJoinClause("A1", "B1")), Optional.empty(), Optional.of(REPLICATED), values(ImmutableMap.of("A1", 0)), values(ImmutableMap.of("B1", 0))));
}
Also used : CostComparator(io.prestosql.cost.CostComparator) PlanBuilder.constantExpressions(io.prestosql.sql.planner.iterative.rule.test.PlanBuilder.constantExpressions) SymbolStatsEstimate(io.prestosql.cost.SymbolStatsEstimate) REPLICATED(io.prestosql.spi.plan.JoinNode.DistributionType.REPLICATED) PlanMatchPattern.equiJoinClause(io.prestosql.sql.planner.assertions.PlanMatchPattern.equiJoinClause) JoinDistributionType(io.prestosql.sql.analyzer.FeaturesConfig.JoinDistributionType) PlanMatchPattern.enforceSingleRow(io.prestosql.sql.planner.assertions.PlanMatchPattern.enforceSingleRow) Test(org.testng.annotations.Test) PARTITIONED(io.prestosql.spi.plan.JoinNode.DistributionType.PARTITIONED) PlanBuilder.castToRowExpression(io.prestosql.sql.planner.iterative.rule.test.PlanBuilder.castToRowExpression) PlanMatchPattern.values(io.prestosql.sql.planner.assertions.PlanMatchPattern.values) ImmutableList(com.google.common.collect.ImmutableList) RuleAssert(io.prestosql.sql.planner.iterative.rule.test.RuleAssert) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Type(io.prestosql.spi.plan.JoinNode.Type) JoinNode(io.prestosql.spi.plan.JoinNode) TaskCountEstimator(io.prestosql.cost.TaskCountEstimator) Symbol(io.prestosql.spi.plan.Symbol) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) AfterClass(org.testng.annotations.AfterClass) PlanMatchPattern.join(io.prestosql.sql.planner.assertions.PlanMatchPattern.join) PlanNodeStatsEstimate(io.prestosql.cost.PlanNodeStatsEstimate) ImmutableMap(com.google.common.collect.ImmutableMap) FULL(io.prestosql.spi.plan.JoinNode.Type.FULL) BeforeClass(org.testng.annotations.BeforeClass) CostComparator(io.prestosql.cost.CostComparator) RIGHT(io.prestosql.spi.plan.JoinNode.Type.RIGHT) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) DistributionType(io.prestosql.spi.plan.JoinNode.DistributionType) INNER(io.prestosql.spi.plan.JoinNode.Type.INNER) Optional(java.util.Optional) JOIN_MAX_BROADCAST_TABLE_SIZE(io.prestosql.SystemSessionProperties.JOIN_MAX_BROADCAST_TABLE_SIZE) RuleTester(io.prestosql.sql.planner.iterative.rule.test.RuleTester) VarcharType(io.prestosql.spi.type.VarcharType) JOIN_DISTRIBUTION_TYPE(io.prestosql.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE) LEFT(io.prestosql.spi.plan.JoinNode.Type.LEFT) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) Symbol(io.prestosql.spi.plan.Symbol) SymbolStatsEstimate(io.prestosql.cost.SymbolStatsEstimate) Test(org.testng.annotations.Test)

Aggregations

BIGINT (io.prestosql.spi.type.BigintType.BIGINT)62 ImmutableList (com.google.common.collect.ImmutableList)54 Optional (java.util.Optional)50 Test (org.testng.annotations.Test)49 ImmutableMap (com.google.common.collect.ImmutableMap)45 Symbol (io.prestosql.spi.plan.Symbol)36 List (java.util.List)31 Type (io.prestosql.spi.type.Type)24 PlanNodeId (io.prestosql.spi.plan.PlanNodeId)21 PlanMatchPattern.values (io.prestosql.sql.planner.assertions.PlanMatchPattern.values)21 ArrayList (java.util.ArrayList)20 Assignments (io.prestosql.spi.plan.Assignments)19 VARCHAR (io.prestosql.spi.type.VarcharType.VARCHAR)19 Assert.assertTrue (org.testng.Assert.assertTrue)19 Assert.assertEquals (org.testng.Assert.assertEquals)18 Metadata (io.prestosql.metadata.Metadata)16 DOUBLE (io.prestosql.spi.type.DoubleType.DOUBLE)16 BeforeClass (org.testng.annotations.BeforeClass)16 Assert.assertFalse (org.testng.Assert.assertFalse)15 MetadataManager.createTestMetadataManager (io.prestosql.metadata.MetadataManager.createTestMetadataManager)14