Search in sources :

Example 81 with Row

use of org.apache.beam.sdk.values.Row in project beam by apache.

the class BeamZetaSqlCalcRelTest method testSingleFieldAccess.

@Test
public void testSingleFieldAccess() throws IllegalAccessException {
    String sql = "SELECT Key FROM KeyValue";
    PCollection<Row> rows = compile(sql);
    final NodeGetter nodeGetter = new NodeGetter(rows);
    pipeline.traverseTopologically(nodeGetter);
    ParDo.MultiOutput<Row, Row> pardo = (ParDo.MultiOutput<Row, Row>) nodeGetter.producer.getTransform();
    PCollection<Row> input = (PCollection<Row>) Iterables.getOnlyElement(nodeGetter.producer.getInputs().values());
    DoFnSchemaInformation info = ParDo.getDoFnSchemaInformation(pardo.getFn(), input);
    FieldAccessDescriptor fieldAccess = info.getFieldAccessDescriptor();
    Assert.assertTrue(fieldAccess.referencesSingleField());
    Assert.assertEquals("Key", Iterables.getOnlyElement(fieldAccess.fieldNamesAccessed()));
    pipeline.run().waitUntilFinish();
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) ParDo(org.apache.beam.sdk.transforms.ParDo) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Example 82 with Row

use of org.apache.beam.sdk.values.Row in project beam by apache.

the class StreamingSqlTest method runHopWindow.

@Test
public void runHopWindow() throws Exception {
    String sql = "SELECT f_long, COUNT(*) AS `getFieldCount`," + "  `window_start`, " + "  `window_end` " + " FROM HOP((select * from streaming_sql_test_table_a), descriptor(f_timestamp), " + " \"INTERVAL 30 MINUTE\", \"INTERVAL 1 HOUR\")" + " GROUP BY f_long, window_start, window_end";
    ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
    BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
    PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
    Schema resultType = Schema.builder().addInt64Field("f_long").addInt64Field("size").addDateTimeField("window_start").addDateTimeField("window_end").build();
    List<Row> expectedRows = Arrays.asList(Row.withSchema(resultType).addValues(1000L, 3L, parseTimestampWithUTCTimeZone("2017-01-01 00:30:00"), parseTimestampWithUTCTimeZone("2017-01-01 01:30:00")).build(), Row.withSchema(resultType).addValues(1000L, 3L, parseTimestampWithUTCTimeZone("2017-01-01 01:00:00"), parseTimestampWithUTCTimeZone("2017-01-01 02:00:00")).build(), Row.withSchema(resultType).addValues(4000L, 1L, parseTimestampWithUTCTimeZone("2017-01-01 01:30:00"), parseTimestampWithUTCTimeZone("2017-01-01 02:30:00")).build(), Row.withSchema(resultType).addValues(4000L, 1L, parseTimestampWithUTCTimeZone("2017-01-01 02:00:00"), parseTimestampWithUTCTimeZone("2017-01-01 03:00:00")).build());
    PAssert.that(stream).containsInAnyOrder(expectedRows);
    pipeline.run().waitUntilFinish();
}
Also used : BeamRelNode(org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode) Schema(org.apache.beam.sdk.schemas.Schema) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Example 83 with Row

use of org.apache.beam.sdk.values.Row in project beam by apache.

the class StreamingSqlTest method testZetaSQLBasicSlidingWindowing.

@Test
public void testZetaSQLBasicSlidingWindowing() {
    String sql = "SELECT " + "COUNT(*) as field_count, " + "HOP_START(\"INTERVAL 1 SECOND\", \"INTERVAL 2 SECOND\") as window_start, " + "HOP_END(\"INTERVAL 1 SECOND\", \"INTERVAL 2 SECOND\") as window_end " + "FROM window_test_table " + "GROUP BY HOP(ts, \"INTERVAL 1 SECOND\", \"INTERVAL 2 SECOND\");";
    ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
    BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
    PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
    final Schema schema = Schema.builder().addInt64Field("count_star").addDateTimeField("field1").addDateTimeField("field2").build();
    PAssert.that(stream).containsInAnyOrder(Row.withSchema(schema).addValues(2L, new DateTime(2018, 7, 1, 21, 26, 7, ISOChronology.getInstanceUTC()), new DateTime(2018, 7, 1, 21, 26, 9, ISOChronology.getInstanceUTC())).build(), Row.withSchema(schema).addValues(1L, new DateTime(2018, 7, 1, 21, 26, 5, ISOChronology.getInstanceUTC()), new DateTime(2018, 7, 1, 21, 26, 7, ISOChronology.getInstanceUTC())).build(), Row.withSchema(schema).addValues(2L, new DateTime(2018, 7, 1, 21, 26, 6, ISOChronology.getInstanceUTC()), new DateTime(2018, 7, 1, 21, 26, 8, ISOChronology.getInstanceUTC())).build(), Row.withSchema(schema).addValues(2L, new DateTime(2018, 7, 1, 21, 26, 8, ISOChronology.getInstanceUTC()), new DateTime(2018, 7, 1, 21, 26, 10, ISOChronology.getInstanceUTC())).build(), Row.withSchema(schema).addValues(1L, new DateTime(2018, 7, 1, 21, 26, 9, ISOChronology.getInstanceUTC()), new DateTime(2018, 7, 1, 21, 26, 11, ISOChronology.getInstanceUTC())).build());
    pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
}
Also used : BeamRelNode(org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode) Schema(org.apache.beam.sdk.schemas.Schema) Row(org.apache.beam.sdk.values.Row) DateTime(org.joda.time.DateTime) Test(org.junit.Test)

Example 84 with Row

use of org.apache.beam.sdk.values.Row in project beam by apache.

the class StreamingSqlTest method testZetaSQLNestedQueryFour.

@Test
public void testZetaSQLNestedQueryFour() {
    String sql = "SELECT t1.Value, TUMBLE_START('INTERVAL 1 SECOND') AS period_start, MIN(t2.Value) as" + " min_v FROM KeyValue AS t1 INNER JOIN BigTable AS t2 on t1.Key = t2.RowKey GROUP BY" + " t1.Value, TUMBLE(t2.ts, 'INTERVAL 1 SECOND')";
    ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
    BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
    PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
    PAssert.that(stream).containsInAnyOrder(Row.withSchema(Schema.builder().addStringField("value").addDateTimeField("min_v").addStringField("period_start").build()).addValues("KeyValue235", new DateTime(2018, 7, 1, 21, 26, 7, ISOChronology.getInstanceUTC()), "BigTable235").build());
    pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
}
Also used : BeamRelNode(org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode) Row(org.apache.beam.sdk.values.Row) DateTime(org.joda.time.DateTime) Test(org.junit.Test)

Example 85 with Row

use of org.apache.beam.sdk.values.Row in project beam by apache.

the class StreamingSqlTest method testTVFTumbleAggregation.

@Test
public void testTVFTumbleAggregation() {
    String sql = "SELECT COUNT(*) as field_count, " + "window_start " + "FROM TUMBLE((select * from KeyValue), descriptor(ts), 'INTERVAL 1 SECOND') " + "GROUP BY window_start";
    ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
    BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
    PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
    final Schema schema = Schema.builder().addInt64Field("field_count").addDateTimeField("window_start").build();
    PAssert.that(stream).containsInAnyOrder(Row.withSchema(schema).addValues(1L, new DateTime(2018, 7, 1, 21, 26, 7, ISOChronology.getInstanceUTC())).build(), Row.withSchema(schema).addValues(1L, new DateTime(2018, 7, 1, 21, 26, 6, ISOChronology.getInstanceUTC())).build());
    pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
}
Also used : BeamRelNode(org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode) Schema(org.apache.beam.sdk.schemas.Schema) Row(org.apache.beam.sdk.values.Row) DateTime(org.joda.time.DateTime) Test(org.junit.Test)

Aggregations

Row (org.apache.beam.sdk.values.Row)958 Test (org.junit.Test)879 Schema (org.apache.beam.sdk.schemas.Schema)566 ByteString (com.google.protobuf.ByteString)219 BeamRelNode (org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode)206 Matchers.containsString (org.hamcrest.Matchers.containsString)85 Category (org.junit.experimental.categories.Category)72 Value (com.google.zetasql.Value)66 List (java.util.List)49 FieldAccessDescriptor (org.apache.beam.sdk.schemas.FieldAccessDescriptor)49 DateTime (org.joda.time.DateTime)46 UsesSchema (org.apache.beam.sdk.testing.UsesSchema)43 DefaultSchema (org.apache.beam.sdk.schemas.annotations.DefaultSchema)36 PCollection (org.apache.beam.sdk.values.PCollection)36 BeamSqlEnv (org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv)35 FieldType (org.apache.beam.sdk.schemas.Schema.FieldType)33 ArrayList (java.util.ArrayList)29 BeamIOSourceRel (org.apache.beam.sdk.extensions.sql.impl.rel.BeamIOSourceRel)28 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)28 Ignore (org.junit.Ignore)27