Search in sources :

Example 21 with Row

use of org.apache.beam.sdk.values.Row in project beam by apache.

the class ZetaSQLPushDownTest method initializeBeamTableProvider.

private static void initializeBeamTableProvider() {
    Table projectTable = getTable("InMemoryTableProject", PushDownOptions.PROJECT);
    Table bothTable = getTable("InMemoryTableBoth", PushDownOptions.BOTH);
    Row[] rows = { row(BASIC_SCHEMA, 100L, 1L, "one", 100L), row(BASIC_SCHEMA, 200L, 2L, "two", 200L) };
    tableProvider = new TestTableProvider();
    tableProvider.createTable(projectTable);
    tableProvider.createTable(bothTable);
    tableProvider.addRows(projectTable.getName(), rows);
    tableProvider.addRows(bothTable.getName(), rows);
}
Also used : TestTableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.test.TestTableProvider) Table(org.apache.beam.sdk.extensions.sql.meta.Table) Row(org.apache.beam.sdk.values.Row)

Example 22 with Row

use of org.apache.beam.sdk.values.Row in project beam by apache.

the class TextTableProviderTest method testInvalidJson.

@Test
public void testInvalidJson() throws Exception {
    File deadLetterFile = new File(tempFolder.getRoot(), "dead-letter-file");
    Files.write(tempFolder.newFile("test.json").toPath(), INVALID_JSON_TEXT.getBytes(Charsets.UTF_8));
    String query = "SELECT * FROM test";
    String ddl = String.format("CREATE EXTERNAL TABLE test %s TYPE text LOCATION '%s/*' " + "TBLPROPERTIES '{\"format\":\"json\", \"deadLetterFile\": \"%s\"}'", SQL_JSON_SCHEMA, tempFolder.getRoot(), deadLetterFile.getAbsoluteFile());
    PCollection<Row> rows = pipeline.apply(SqlTransform.query(query).withDdlString(ddl));
    PAssert.that(rows).empty();
    pipeline.run();
    assertThat(new NumberedShardedFile(deadLetterFile.getAbsoluteFile() + "*").readFilesWithRetries(Sleeper.DEFAULT, BackOff.STOP_BACKOFF), containsInAnyOrder(INVALID_JSON_TEXT));
}
Also used : NumberedShardedFile(org.apache.beam.sdk.util.NumberedShardedFile) Row(org.apache.beam.sdk.values.Row) File(java.io.File) NumberedShardedFile(org.apache.beam.sdk.util.NumberedShardedFile) Test(org.junit.Test)

Example 23 with Row

use of org.apache.beam.sdk.values.Row in project beam by apache.

the class BeamRowToStorageApiProto method toProtoValue.

private static Object toProtoValue(FieldDescriptor fieldDescriptor, FieldType beamFieldType, Object value) {
    switch(beamFieldType.getTypeName()) {
        case ROW:
            return messageFromBeamRow(fieldDescriptor.getMessageType(), (Row) value);
        case ARRAY:
            List<Object> list = (List<Object>) value;
            @Nullable FieldType arrayElementType = beamFieldType.getCollectionElementType();
            if (arrayElementType == null) {
                throw new RuntimeException("Unexpected null element type!");
            }
            return list.stream().map(v -> toProtoValue(fieldDescriptor, arrayElementType, v)).collect(Collectors.toList());
        case ITERABLE:
            Iterable<Object> iterable = (Iterable<Object>) value;
            @Nullable FieldType iterableElementType = beamFieldType.getCollectionElementType();
            if (iterableElementType == null) {
                throw new RuntimeException("Unexpected null element type!");
            }
            return StreamSupport.stream(iterable.spliterator(), false).map(v -> toProtoValue(fieldDescriptor, iterableElementType, v)).collect(Collectors.toList());
        case MAP:
            throw new RuntimeException("Map types not supported by BigQuery.");
        default:
            return scalarToProtoValue(beamFieldType, value);
    }
}
Also used : Type(com.google.protobuf.DescriptorProtos.FieldDescriptorProto.Type) Descriptor(com.google.protobuf.Descriptors.Descriptor) DynamicMessage(com.google.protobuf.DynamicMessage) BiFunction(java.util.function.BiFunction) LocalDateTime(java.time.LocalDateTime) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Bytes(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.Bytes) DescriptorValidationException(com.google.protobuf.Descriptors.DescriptorValidationException) Function(java.util.function.Function) BigDecimal(java.math.BigDecimal) SqlTypes(org.apache.beam.sdk.schemas.logicaltypes.SqlTypes) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) LocalTime(java.time.LocalTime) EnumerationType(org.apache.beam.sdk.schemas.logicaltypes.EnumerationType) StreamSupport(java.util.stream.StreamSupport) Row(org.apache.beam.sdk.values.Row) FileDescriptor(com.google.protobuf.Descriptors.FileDescriptor) Nullable(javax.annotation.Nullable) Field(org.apache.beam.sdk.schemas.Schema.Field) FieldDescriptorProto(com.google.protobuf.DescriptorProtos.FieldDescriptorProto) Lists(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists) ReadableInstant(org.joda.time.ReadableInstant) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) FieldDescriptor(com.google.protobuf.Descriptors.FieldDescriptor) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) Schema(org.apache.beam.sdk.schemas.Schema) TypeName(org.apache.beam.sdk.schemas.Schema.TypeName) ByteString(com.google.protobuf.ByteString) List(java.util.List) LogicalType(org.apache.beam.sdk.schemas.Schema.LogicalType) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting) FileDescriptorProto(com.google.protobuf.DescriptorProtos.FileDescriptorProto) LocalDate(java.time.LocalDate) Preconditions(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions) Label(com.google.protobuf.DescriptorProtos.FieldDescriptorProto.Label) Functions(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Functions) DescriptorProto(com.google.protobuf.DescriptorProtos.DescriptorProto) List(java.util.List) Nullable(javax.annotation.Nullable) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType)

Example 24 with Row

use of org.apache.beam.sdk.values.Row in project beam by apache.

the class JdbcIOTest method testReadRowsWithNumericFieldsWithExcessPrecision.

@Test
public void testReadRowsWithNumericFieldsWithExcessPrecision() {
    PCollection<Row> rows = pipeline.apply(JdbcIO.readRows().withDataSourceConfiguration(DATA_SOURCE_CONFIGURATION).withQuery(String.format("SELECT CAST(1 AS NUMERIC(10, 2)) AS T1 FROM %s WHERE name = ?", READ_TABLE_NAME)).withStatementPreparator(preparedStatement -> preparedStatement.setString(1, TestRow.getNameForSeed(1))));
    Schema expectedSchema = Schema.of(Schema.Field.of("T1", FieldType.logicalType(FixedPrecisionNumeric.of(NUMERIC.getName(), 10, 2)).withNullable(false)));
    assertEquals(expectedSchema, rows.getSchema());
    PCollection<Row> output = rows.apply(Select.fieldNames("T1"));
    PAssert.that(output).containsInAnyOrder(ImmutableList.of(Row.withSchema(expectedSchema).addValues(BigDecimal.valueOf(1).setScale(2, RoundingMode.HALF_UP)).build()));
    pipeline.run();
}
Also used : Count(org.apache.beam.sdk.transforms.Count) ExpectedLogs(org.apache.beam.sdk.testing.ExpectedLogs) Arrays(java.util.Arrays) PipelineExecutionException(org.apache.beam.sdk.Pipeline.PipelineExecutionException) SerializableCoder(org.apache.beam.sdk.coders.SerializableCoder) Connection(java.sql.Connection) Time(java.sql.Time) Matchers.not(org.hamcrest.Matchers.not) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) Array(java.sql.Array) PoolableDataSourceProvider(org.apache.beam.sdk.io.jdbc.JdbcIO.PoolableDataSourceProvider) BigDecimal(java.math.BigDecimal) Matchers.closeTo(org.hamcrest.Matchers.closeTo) Create(org.apache.beam.sdk.transforms.Create) Wait(org.apache.beam.sdk.transforms.Wait) PoolingDataSource(org.apache.commons.dbcp2.PoolingDataSource) RoundingMode(java.math.RoundingMode) KvCoder(org.apache.beam.sdk.coders.KvCoder) NULL(java.sql.JDBCType.NULL) TimeZone(java.util.TimeZone) Timestamp(java.sql.Timestamp) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) UUID(java.util.UUID) PreparedStatement(java.sql.PreparedStatement) LogRecord(java.util.logging.LogRecord) TypeSafeMatcher(org.hamcrest.TypeSafeMatcher) StandardCharsets(java.nio.charset.StandardCharsets) Serializable(java.io.Serializable) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) Matchers.any(org.mockito.Matchers.any) List(java.util.List) PartitioningFn(org.apache.beam.sdk.io.jdbc.JdbcUtil.PartitioningFn) ParDo(org.apache.beam.sdk.transforms.ParDo) SerializableUtils(org.apache.beam.sdk.util.SerializableUtils) Assert.assertFalse(org.junit.Assert.assertFalse) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) ISOChronology(org.joda.time.chrono.ISOChronology) TestStream(org.apache.beam.sdk.testing.TestStream) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) DatabaseTestHelper.assertRowCount(org.apache.beam.sdk.io.common.DatabaseTestHelper.assertRowCount) Matchers.containsString(org.hamcrest.Matchers.containsString) Mockito.mock(org.mockito.Mockito.mock) KV(org.apache.beam.sdk.values.KV) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) BeforeClass(org.junit.BeforeClass) Assert.assertThrows(org.junit.Assert.assertThrows) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) NUMERIC(java.sql.JDBCType.NUMERIC) ArrayList(java.util.ArrayList) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) FixedPrecisionNumeric(org.apache.beam.sdk.io.jdbc.LogicalTypes.FixedPrecisionNumeric) Assert.assertSame(org.junit.Assert.assertSame) JDBCType(java.sql.JDBCType) SQLException(java.sql.SQLException) Calendar(java.util.Calendar) Charset(java.nio.charset.Charset) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) DataSource(javax.sql.DataSource) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Row(org.apache.beam.sdk.values.Row) ExpectedException(org.junit.rules.ExpectedException) Select(org.apache.beam.sdk.schemas.transforms.Select) Description(org.hamcrest.Description) PAssert(org.apache.beam.sdk.testing.PAssert) TestRow(org.apache.beam.sdk.io.common.TestRow) DataSourceConfiguration(org.apache.beam.sdk.io.jdbc.JdbcIO.DataSourceConfiguration) DateTime(org.joda.time.DateTime) Assert.assertTrue(org.junit.Assert.assertTrue) Mockito.times(org.mockito.Mockito.times) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) Schema(org.apache.beam.sdk.schemas.Schema) Mockito.verify(org.mockito.Mockito.verify) Date(java.sql.Date) LocalDate(org.joda.time.LocalDate) Rule(org.junit.Rule) Instant(org.joda.time.Instant) Statement(java.sql.Statement) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) Collections(java.util.Collections) DatabaseTestHelper(org.apache.beam.sdk.io.common.DatabaseTestHelper) Assert.assertEquals(org.junit.Assert.assertEquals) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) Schema(org.apache.beam.sdk.schemas.Schema) Row(org.apache.beam.sdk.values.Row) TestRow(org.apache.beam.sdk.io.common.TestRow) Test(org.junit.Test)

Example 25 with Row

use of org.apache.beam.sdk.values.Row in project beam by apache.

the class JdbcIOTest method testGetPreparedStatementSetCallerForLogicalTypes.

@Test
public void testGetPreparedStatementSetCallerForLogicalTypes() throws Exception {
    FieldType fixedLengthStringType = LogicalTypes.fixedLengthString(JDBCType.VARCHAR, 4);
    Schema schema = Schema.builder().addField("logical_date_col", LogicalTypes.JDBC_DATE_TYPE).addField("logical_time_col", LogicalTypes.JDBC_TIME_TYPE).addField("logical_time_with_tz_col", LogicalTypes.JDBC_TIMESTAMP_WITH_TIMEZONE_TYPE).addField("logical_fixed_length_string_col", fixedLengthStringType).addField("logical_fixed_length_string_nullable_col", fixedLengthStringType.withNullable(true)).addField("logical_uuid_col", LogicalTypes.JDBC_UUID_TYPE).addField("logical_other_col", LogicalTypes.OTHER_AS_STRING_TYPE).build();
    long epochMilli = 1558719710000L;
    DateTime dateTime = new DateTime(epochMilli, ISOChronology.getInstanceUTC());
    DateTime time = new DateTime(34567000L, /* value must be less than num millis in one day */
    ISOChronology.getInstanceUTC());
    Row row = Row.withSchema(schema).addValues(dateTime.withTimeAtStartOfDay(), time, dateTime, "Test", null, UUID.randomUUID(), "{}").build();
    PreparedStatement psMocked = mock(PreparedStatement.class);
    JdbcUtil.getPreparedStatementSetCaller(LogicalTypes.JDBC_DATE_TYPE).set(row, psMocked, 0, SchemaUtil.FieldWithIndex.of(schema.getField(0), 0));
    JdbcUtil.getPreparedStatementSetCaller(LogicalTypes.JDBC_TIME_TYPE).set(row, psMocked, 1, SchemaUtil.FieldWithIndex.of(schema.getField(1), 1));
    JdbcUtil.getPreparedStatementSetCaller(LogicalTypes.JDBC_TIMESTAMP_WITH_TIMEZONE_TYPE).set(row, psMocked, 2, SchemaUtil.FieldWithIndex.of(schema.getField(2), 2));
    JdbcUtil.getPreparedStatementSetCaller(fixedLengthStringType).set(row, psMocked, 3, SchemaUtil.FieldWithIndex.of(schema.getField(3), 3));
    JdbcUtil.getPreparedStatementSetCaller(fixedLengthStringType.withNullable(true)).set(row, psMocked, 4, SchemaUtil.FieldWithIndex.of(schema.getField(4), 4));
    JdbcUtil.getPreparedStatementSetCaller(LogicalTypes.JDBC_UUID_TYPE).set(row, psMocked, 5, SchemaUtil.FieldWithIndex.of(schema.getField(5), 5));
    JdbcUtil.getPreparedStatementSetCaller(LogicalTypes.OTHER_AS_STRING_TYPE).set(row, psMocked, 6, SchemaUtil.FieldWithIndex.of(schema.getField(6), 6));
    verify(psMocked, times(1)).setDate(1, new Date(row.getDateTime(0).getMillis()));
    verify(psMocked, times(1)).setTime(2, new Time(row.getDateTime(1).getMillis()));
    Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
    cal.setTimeInMillis(epochMilli);
    verify(psMocked, times(1)).setTimestamp(3, new Timestamp(cal.getTime().getTime()), cal);
    verify(psMocked, times(1)).setString(4, row.getString(3));
    verify(psMocked, times(1)).setString(5, row.getString(4));
    verify(psMocked, times(1)).setObject(6, row.getLogicalTypeValue(5, UUID.class));
    verify(psMocked, times(1)).setObject(7, row.getString(6), java.sql.Types.OTHER);
}
Also used : Schema(org.apache.beam.sdk.schemas.Schema) Calendar(java.util.Calendar) PreparedStatement(java.sql.PreparedStatement) Time(java.sql.Time) DateTime(org.joda.time.DateTime) Row(org.apache.beam.sdk.values.Row) TestRow(org.apache.beam.sdk.io.common.TestRow) UUID(java.util.UUID) Timestamp(java.sql.Timestamp) DateTime(org.joda.time.DateTime) Date(java.sql.Date) LocalDate(org.joda.time.LocalDate) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) Test(org.junit.Test)

Aggregations

Row (org.apache.beam.sdk.values.Row)958 Test (org.junit.Test)879 Schema (org.apache.beam.sdk.schemas.Schema)566 ByteString (com.google.protobuf.ByteString)219 BeamRelNode (org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode)206 Matchers.containsString (org.hamcrest.Matchers.containsString)85 Category (org.junit.experimental.categories.Category)72 Value (com.google.zetasql.Value)66 List (java.util.List)49 FieldAccessDescriptor (org.apache.beam.sdk.schemas.FieldAccessDescriptor)49 DateTime (org.joda.time.DateTime)46 UsesSchema (org.apache.beam.sdk.testing.UsesSchema)43 DefaultSchema (org.apache.beam.sdk.schemas.annotations.DefaultSchema)36 PCollection (org.apache.beam.sdk.values.PCollection)36 BeamSqlEnv (org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv)35 FieldType (org.apache.beam.sdk.schemas.Schema.FieldType)33 ArrayList (java.util.ArrayList)29 BeamIOSourceRel (org.apache.beam.sdk.extensions.sql.impl.rel.BeamIOSourceRel)28 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)28 Ignore (org.junit.Ignore)27