Search in sources :

Example 31 with Row

use of org.apache.beam.sdk.values.Row in project beam by apache.

the class JdbcIOTest method testWriteWithoutPsWithNonNullableTableField.

@Test
public void testWriteWithoutPsWithNonNullableTableField() throws Exception {
    final int rowsToAdd = 10;
    Schema.Builder schemaBuilder = Schema.builder();
    schemaBuilder.addField(Schema.Field.of("column_boolean", Schema.FieldType.BOOLEAN));
    schemaBuilder.addField(Schema.Field.of("column_string", Schema.FieldType.STRING));
    Schema schema = schemaBuilder.build();
    String tableName = DatabaseTestHelper.getTestTableName("UT_WRITE");
    StringBuilder stmt = new StringBuilder("CREATE TABLE ");
    stmt.append(tableName);
    stmt.append(" (");
    stmt.append("column_boolean       BOOLEAN,");
    stmt.append("column_int           INTEGER NOT NULL");
    stmt.append(" )");
    DatabaseTestHelper.createTableWithStatement(DATA_SOURCE, stmt.toString());
    try {
        ArrayList<Row> data = getRowsToWrite(rowsToAdd, schema);
        pipeline.apply(Create.of(data)).setRowSchema(schema).apply(JdbcIO.<Row>write().withDataSourceConfiguration(DATA_SOURCE_CONFIGURATION).withBatchSize(10L).withTable(tableName));
        pipeline.run();
    } finally {
        DatabaseTestHelper.deleteTable(DATA_SOURCE, tableName);
        thrown.expect(RuntimeException.class);
    }
}
Also used : Schema(org.apache.beam.sdk.schemas.Schema) Matchers.containsString(org.hamcrest.Matchers.containsString) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) Row(org.apache.beam.sdk.values.Row) TestRow(org.apache.beam.sdk.io.common.TestRow) Test(org.junit.Test)

Example 32 with Row

use of org.apache.beam.sdk.values.Row in project beam by apache.

the class JdbcIOTest method testReadRowsWithDataSourceConfiguration.

@Test
public void testReadRowsWithDataSourceConfiguration() {
    PCollection<Row> rows = pipeline.apply(JdbcIO.readRows().withDataSourceConfiguration(DATA_SOURCE_CONFIGURATION).withQuery(String.format("select name,id from %s where name = ?", READ_TABLE_NAME)).withStatementPreparator(preparedStatement -> preparedStatement.setString(1, TestRow.getNameForSeed(1))));
    Schema expectedSchema = Schema.of(Schema.Field.of("NAME", LogicalTypes.variableLengthString(JDBCType.VARCHAR, 500)).withNullable(true), Schema.Field.of("ID", Schema.FieldType.INT32).withNullable(true));
    assertEquals(expectedSchema, rows.getSchema());
    PCollection<Row> output = rows.apply(Select.fieldNames("NAME", "ID"));
    PAssert.that(output).containsInAnyOrder(ImmutableList.of(Row.withSchema(expectedSchema).addValues("Testval1", 1).build()));
    pipeline.run();
}
Also used : Count(org.apache.beam.sdk.transforms.Count) ExpectedLogs(org.apache.beam.sdk.testing.ExpectedLogs) Arrays(java.util.Arrays) PipelineExecutionException(org.apache.beam.sdk.Pipeline.PipelineExecutionException) SerializableCoder(org.apache.beam.sdk.coders.SerializableCoder) Connection(java.sql.Connection) Time(java.sql.Time) Matchers.not(org.hamcrest.Matchers.not) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) Array(java.sql.Array) PoolableDataSourceProvider(org.apache.beam.sdk.io.jdbc.JdbcIO.PoolableDataSourceProvider) BigDecimal(java.math.BigDecimal) Matchers.closeTo(org.hamcrest.Matchers.closeTo) Create(org.apache.beam.sdk.transforms.Create) Wait(org.apache.beam.sdk.transforms.Wait) PoolingDataSource(org.apache.commons.dbcp2.PoolingDataSource) RoundingMode(java.math.RoundingMode) KvCoder(org.apache.beam.sdk.coders.KvCoder) NULL(java.sql.JDBCType.NULL) TimeZone(java.util.TimeZone) Timestamp(java.sql.Timestamp) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) UUID(java.util.UUID) PreparedStatement(java.sql.PreparedStatement) LogRecord(java.util.logging.LogRecord) TypeSafeMatcher(org.hamcrest.TypeSafeMatcher) StandardCharsets(java.nio.charset.StandardCharsets) Serializable(java.io.Serializable) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) Matchers.any(org.mockito.Matchers.any) List(java.util.List) PartitioningFn(org.apache.beam.sdk.io.jdbc.JdbcUtil.PartitioningFn) ParDo(org.apache.beam.sdk.transforms.ParDo) SerializableUtils(org.apache.beam.sdk.util.SerializableUtils) Assert.assertFalse(org.junit.Assert.assertFalse) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) ISOChronology(org.joda.time.chrono.ISOChronology) TestStream(org.apache.beam.sdk.testing.TestStream) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) DatabaseTestHelper.assertRowCount(org.apache.beam.sdk.io.common.DatabaseTestHelper.assertRowCount) Matchers.containsString(org.hamcrest.Matchers.containsString) Mockito.mock(org.mockito.Mockito.mock) KV(org.apache.beam.sdk.values.KV) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) BeforeClass(org.junit.BeforeClass) Assert.assertThrows(org.junit.Assert.assertThrows) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) NUMERIC(java.sql.JDBCType.NUMERIC) ArrayList(java.util.ArrayList) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) FixedPrecisionNumeric(org.apache.beam.sdk.io.jdbc.LogicalTypes.FixedPrecisionNumeric) Assert.assertSame(org.junit.Assert.assertSame) JDBCType(java.sql.JDBCType) SQLException(java.sql.SQLException) Calendar(java.util.Calendar) Charset(java.nio.charset.Charset) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) DataSource(javax.sql.DataSource) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Row(org.apache.beam.sdk.values.Row) ExpectedException(org.junit.rules.ExpectedException) Select(org.apache.beam.sdk.schemas.transforms.Select) Description(org.hamcrest.Description) PAssert(org.apache.beam.sdk.testing.PAssert) TestRow(org.apache.beam.sdk.io.common.TestRow) DataSourceConfiguration(org.apache.beam.sdk.io.jdbc.JdbcIO.DataSourceConfiguration) DateTime(org.joda.time.DateTime) Assert.assertTrue(org.junit.Assert.assertTrue) Mockito.times(org.mockito.Mockito.times) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) Schema(org.apache.beam.sdk.schemas.Schema) Mockito.verify(org.mockito.Mockito.verify) Date(java.sql.Date) LocalDate(org.joda.time.LocalDate) Rule(org.junit.Rule) Instant(org.joda.time.Instant) Statement(java.sql.Statement) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) Collections(java.util.Collections) DatabaseTestHelper(org.apache.beam.sdk.io.common.DatabaseTestHelper) Assert.assertEquals(org.junit.Assert.assertEquals) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) Schema(org.apache.beam.sdk.schemas.Schema) Row(org.apache.beam.sdk.values.Row) TestRow(org.apache.beam.sdk.io.common.TestRow) Test(org.junit.Test)

Example 33 with Row

use of org.apache.beam.sdk.values.Row in project beam by apache.

the class SqlBoundedSideInputJoin method expand.

@Override
public PCollection<Bid> expand(PCollection<Event> events) {
    PCollection<Row> bids = events.apply(Filter.by(NexmarkQueryUtil.IS_BID)).apply(getName() + ".SelectEvent", new SelectEvent(Event.Type.BID));
    checkState(getSideInput() != null, "Configuration error: side input is null");
    TupleTag<Row> sideTag = new TupleTag<Row>("side") {
    };
    TupleTag<Row> bidTag = new TupleTag<Row>("bid") {
    };
    Schema schema = Schema.of(Schema.Field.of("id", Schema.FieldType.INT64), Schema.Field.of("extra", Schema.FieldType.STRING));
    PCollection<Row> sideRows = getSideInput().setSchema(schema, TypeDescriptors.kvs(TypeDescriptors.longs(), TypeDescriptors.strings()), kv -> Row.withSchema(schema).addValues(kv.getKey(), kv.getValue()).build(), row -> KV.of(row.getInt64("id"), row.getString("extra"))).apply("SideToRows", Convert.toRows());
    return PCollectionTuple.of(bidTag, bids).and(sideTag, sideRows).apply(SqlTransform.query(String.format(query, configuration.sideInputRowCount)).withQueryPlannerClass(plannerClass)).apply("ResultToBid", Convert.fromRows(Bid.class));
}
Also used : NexmarkConfiguration(org.apache.beam.sdk.nexmark.NexmarkConfiguration) KV(org.apache.beam.sdk.values.KV) QueryPlanner(org.apache.beam.sdk.extensions.sql.impl.QueryPlanner) ZetaSQLQueryPlanner(org.apache.beam.sdk.extensions.sql.zetasql.ZetaSQLQueryPlanner) Bid(org.apache.beam.sdk.nexmark.model.Bid) SelectEvent(org.apache.beam.sdk.nexmark.model.sql.SelectEvent) PCollection(org.apache.beam.sdk.values.PCollection) SqlTransform(org.apache.beam.sdk.extensions.sql.SqlTransform) Schema(org.apache.beam.sdk.schemas.Schema) Convert(org.apache.beam.sdk.schemas.transforms.Convert) Filter(org.apache.beam.sdk.transforms.Filter) CalciteQueryPlanner(org.apache.beam.sdk.extensions.sql.impl.CalciteQueryPlanner) Event(org.apache.beam.sdk.nexmark.model.Event) Preconditions.checkState(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState) TupleTag(org.apache.beam.sdk.values.TupleTag) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) NexmarkQueryTransform(org.apache.beam.sdk.nexmark.queries.NexmarkQueryTransform) NexmarkQueryUtil(org.apache.beam.sdk.nexmark.queries.NexmarkQueryUtil) Row(org.apache.beam.sdk.values.Row) Schema(org.apache.beam.sdk.schemas.Schema) TupleTag(org.apache.beam.sdk.values.TupleTag) Row(org.apache.beam.sdk.values.Row) Bid(org.apache.beam.sdk.nexmark.model.Bid) SelectEvent(org.apache.beam.sdk.nexmark.model.sql.SelectEvent)

Example 34 with Row

use of org.apache.beam.sdk.values.Row in project beam by apache.

the class SqlTransformRunner method runUsingSqlTransform.

/**
 * This is the default method in BeamTpcds.main method. Run job using SqlTranform.query() method.
 *
 * @param args Command line arguments
 * @throws Exception
 */
public static void runUsingSqlTransform(String[] args) throws Exception {
    TpcdsOptions tpcdsOptions = PipelineOptionsFactory.fromArgs(args).withValidation().as(TpcdsOptions.class);
    String dataSize = TpcdsParametersReader.getAndCheckDataSize(tpcdsOptions);
    String[] queryNames = TpcdsParametersReader.getAndCheckQueryNames(tpcdsOptions);
    int nThreads = TpcdsParametersReader.getAndCheckTpcParallel(tpcdsOptions);
    // Using ExecutorService and CompletionService to fulfill multi-threading functionality
    ExecutorService executor = Executors.newFixedThreadPool(nThreads);
    CompletionService<TpcdsRunResult> completion = new ExecutorCompletionService<>(executor);
    // Make an array of pipelines, each pipeline is responsible for running a corresponding query.
    Pipeline[] pipelines = new Pipeline[queryNames.length];
    CSVFormat csvFormat = CSVFormat.MYSQL.withDelimiter('|').withNullString("");
    // the txt file and store in a GCP directory.
    for (int i = 0; i < queryNames.length; i++) {
        // For each query, get a copy of pipelineOptions from command line arguments.
        TpcdsOptions tpcdsOptionsCopy = PipelineOptionsFactory.fromArgs(args).withValidation().as(TpcdsOptions.class);
        // Set a unique job name using the time stamp so that multiple different pipelines can run
        // together.
        tpcdsOptionsCopy.setJobName(queryNames[i] + "result" + System.currentTimeMillis());
        pipelines[i] = Pipeline.create(tpcdsOptionsCopy);
        String queryString = QueryReader.readQuery(queryNames[i]);
        PCollectionTuple tables = getTables(pipelines[i], csvFormat, queryNames[i]);
        try {
            tables.apply(SqlTransform.query(queryString)).apply(MapElements.into(TypeDescriptors.strings()).via(Row::toString)).apply(TextIO.write().to(tpcdsOptions.getResultsDirectory() + "/" + dataSize + "/" + pipelines[i].getOptions().getJobName()).withSuffix(".txt").withNumShards(1));
        } catch (Exception e) {
            LOG.error("{} failed to execute", queryNames[i]);
            e.printStackTrace();
        }
        completion.submit(new TpcdsRun(pipelines[i]));
    }
    executor.shutdown();
    printExecutionSummary(completion, queryNames.length);
}
Also used : ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) IOException(java.io.IOException) Pipeline(org.apache.beam.sdk.Pipeline) ExecutorService(java.util.concurrent.ExecutorService) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) CSVFormat(org.apache.commons.csv.CSVFormat) Row(org.apache.beam.sdk.values.Row)

Example 35 with Row

use of org.apache.beam.sdk.values.Row in project beam by apache.

the class ProjectionPushdownOptimizerTest method testBranchedProjectionPushdown.

@Test
public void testBranchedProjectionPushdown() {
    Pipeline p = Pipeline.create();
    SimpleSourceWithPushdown originalSource = new SimpleSourceWithPushdown(FieldAccessDescriptor.withFieldNames("foo", "bar", "baz"));
    PCollection<Row> input = p.apply(originalSource);
    input.apply(new FieldAccessTransform(FieldAccessDescriptor.withFieldNames("foo")));
    input.apply(new FieldAccessTransform(FieldAccessDescriptor.withFieldNames("bar")));
    SimpleSourceWithPushdown expectedSource = new SimpleSourceWithPushdown(FieldAccessDescriptor.withFieldNames("foo", "bar"));
    ProjectionPushdownOptimizer.optimize(p);
    Assert.assertTrue(pipelineHasTransform(p, expectedSource));
    Assert.assertFalse(pipelineHasTransform(p, originalSource));
}
Also used : Row(org.apache.beam.sdk.values.Row) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Aggregations

Row (org.apache.beam.sdk.values.Row)958 Test (org.junit.Test)879 Schema (org.apache.beam.sdk.schemas.Schema)566 ByteString (com.google.protobuf.ByteString)219 BeamRelNode (org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode)206 Matchers.containsString (org.hamcrest.Matchers.containsString)85 Category (org.junit.experimental.categories.Category)72 Value (com.google.zetasql.Value)66 List (java.util.List)49 FieldAccessDescriptor (org.apache.beam.sdk.schemas.FieldAccessDescriptor)49 DateTime (org.joda.time.DateTime)46 UsesSchema (org.apache.beam.sdk.testing.UsesSchema)43 DefaultSchema (org.apache.beam.sdk.schemas.annotations.DefaultSchema)36 PCollection (org.apache.beam.sdk.values.PCollection)36 BeamSqlEnv (org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv)35 FieldType (org.apache.beam.sdk.schemas.Schema.FieldType)33 ArrayList (java.util.ArrayList)29 BeamIOSourceRel (org.apache.beam.sdk.extensions.sql.impl.rel.BeamIOSourceRel)28 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)28 Ignore (org.junit.Ignore)27