use of org.apache.beam.sdk.values.Row in project beam by apache.
the class BeamZetaSqlCalcRelTest method testSingleFieldAccess.
@Test
public void testSingleFieldAccess() throws IllegalAccessException {
String sql = "SELECT Key FROM KeyValue";
PCollection<Row> rows = compile(sql);
final NodeGetter nodeGetter = new NodeGetter(rows);
pipeline.traverseTopologically(nodeGetter);
ParDo.MultiOutput<Row, Row> pardo = (ParDo.MultiOutput<Row, Row>) nodeGetter.producer.getTransform();
PCollection<Row> input = (PCollection<Row>) Iterables.getOnlyElement(nodeGetter.producer.getInputs().values());
DoFnSchemaInformation info = ParDo.getDoFnSchemaInformation(pardo.getFn(), input);
FieldAccessDescriptor fieldAccess = info.getFieldAccessDescriptor();
Assert.assertTrue(fieldAccess.referencesSingleField());
Assert.assertEquals("Key", Iterables.getOnlyElement(fieldAccess.fieldNamesAccessed()));
pipeline.run().waitUntilFinish();
}
use of org.apache.beam.sdk.values.Row in project beam by apache.
the class StreamingSqlTest method runHopWindow.
@Test
public void runHopWindow() throws Exception {
String sql = "SELECT f_long, COUNT(*) AS `getFieldCount`," + " `window_start`, " + " `window_end` " + " FROM HOP((select * from streaming_sql_test_table_a), descriptor(f_timestamp), " + " \"INTERVAL 30 MINUTE\", \"INTERVAL 1 HOUR\")" + " GROUP BY f_long, window_start, window_end";
ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
Schema resultType = Schema.builder().addInt64Field("f_long").addInt64Field("size").addDateTimeField("window_start").addDateTimeField("window_end").build();
List<Row> expectedRows = Arrays.asList(Row.withSchema(resultType).addValues(1000L, 3L, parseTimestampWithUTCTimeZone("2017-01-01 00:30:00"), parseTimestampWithUTCTimeZone("2017-01-01 01:30:00")).build(), Row.withSchema(resultType).addValues(1000L, 3L, parseTimestampWithUTCTimeZone("2017-01-01 01:00:00"), parseTimestampWithUTCTimeZone("2017-01-01 02:00:00")).build(), Row.withSchema(resultType).addValues(4000L, 1L, parseTimestampWithUTCTimeZone("2017-01-01 01:30:00"), parseTimestampWithUTCTimeZone("2017-01-01 02:30:00")).build(), Row.withSchema(resultType).addValues(4000L, 1L, parseTimestampWithUTCTimeZone("2017-01-01 02:00:00"), parseTimestampWithUTCTimeZone("2017-01-01 03:00:00")).build());
PAssert.that(stream).containsInAnyOrder(expectedRows);
pipeline.run().waitUntilFinish();
}
use of org.apache.beam.sdk.values.Row in project beam by apache.
the class StreamingSqlTest method testZetaSQLBasicSlidingWindowing.
@Test
public void testZetaSQLBasicSlidingWindowing() {
String sql = "SELECT " + "COUNT(*) as field_count, " + "HOP_START(\"INTERVAL 1 SECOND\", \"INTERVAL 2 SECOND\") as window_start, " + "HOP_END(\"INTERVAL 1 SECOND\", \"INTERVAL 2 SECOND\") as window_end " + "FROM window_test_table " + "GROUP BY HOP(ts, \"INTERVAL 1 SECOND\", \"INTERVAL 2 SECOND\");";
ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
final Schema schema = Schema.builder().addInt64Field("count_star").addDateTimeField("field1").addDateTimeField("field2").build();
PAssert.that(stream).containsInAnyOrder(Row.withSchema(schema).addValues(2L, new DateTime(2018, 7, 1, 21, 26, 7, ISOChronology.getInstanceUTC()), new DateTime(2018, 7, 1, 21, 26, 9, ISOChronology.getInstanceUTC())).build(), Row.withSchema(schema).addValues(1L, new DateTime(2018, 7, 1, 21, 26, 5, ISOChronology.getInstanceUTC()), new DateTime(2018, 7, 1, 21, 26, 7, ISOChronology.getInstanceUTC())).build(), Row.withSchema(schema).addValues(2L, new DateTime(2018, 7, 1, 21, 26, 6, ISOChronology.getInstanceUTC()), new DateTime(2018, 7, 1, 21, 26, 8, ISOChronology.getInstanceUTC())).build(), Row.withSchema(schema).addValues(2L, new DateTime(2018, 7, 1, 21, 26, 8, ISOChronology.getInstanceUTC()), new DateTime(2018, 7, 1, 21, 26, 10, ISOChronology.getInstanceUTC())).build(), Row.withSchema(schema).addValues(1L, new DateTime(2018, 7, 1, 21, 26, 9, ISOChronology.getInstanceUTC()), new DateTime(2018, 7, 1, 21, 26, 11, ISOChronology.getInstanceUTC())).build());
pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
}
use of org.apache.beam.sdk.values.Row in project beam by apache.
the class StreamingSqlTest method testZetaSQLNestedQueryFour.
@Test
public void testZetaSQLNestedQueryFour() {
String sql = "SELECT t1.Value, TUMBLE_START('INTERVAL 1 SECOND') AS period_start, MIN(t2.Value) as" + " min_v FROM KeyValue AS t1 INNER JOIN BigTable AS t2 on t1.Key = t2.RowKey GROUP BY" + " t1.Value, TUMBLE(t2.ts, 'INTERVAL 1 SECOND')";
ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
PAssert.that(stream).containsInAnyOrder(Row.withSchema(Schema.builder().addStringField("value").addDateTimeField("min_v").addStringField("period_start").build()).addValues("KeyValue235", new DateTime(2018, 7, 1, 21, 26, 7, ISOChronology.getInstanceUTC()), "BigTable235").build());
pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
}
use of org.apache.beam.sdk.values.Row in project beam by apache.
the class StreamingSqlTest method testTVFTumbleAggregation.
@Test
public void testTVFTumbleAggregation() {
String sql = "SELECT COUNT(*) as field_count, " + "window_start " + "FROM TUMBLE((select * from KeyValue), descriptor(ts), 'INTERVAL 1 SECOND') " + "GROUP BY window_start";
ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);
PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
final Schema schema = Schema.builder().addInt64Field("field_count").addDateTimeField("window_start").build();
PAssert.that(stream).containsInAnyOrder(Row.withSchema(schema).addValues(1L, new DateTime(2018, 7, 1, 21, 26, 7, ISOChronology.getInstanceUTC())).build(), Row.withSchema(schema).addValues(1L, new DateTime(2018, 7, 1, 21, 26, 6, ISOChronology.getInstanceUTC())).build());
pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
}
Aggregations