use of org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter in project hudi by apache.
the class TestS3EventsSource method testReadingFromSource.
/**
* Runs the test scenario of reading data from the source.
*
* @throws IOException
*/
@Test
public void testReadingFromSource() throws IOException {
SourceFormatAdapter sourceFormatAdapter = new SourceFormatAdapter(prepareCloudObjectSource());
// 1. Extract without any checkpoint => (no data available)
generateMessageInQueue(null);
assertEquals(Option.empty(), sourceFormatAdapter.fetchNewDataInAvroFormat(Option.empty(), Long.MAX_VALUE).getBatch());
// 2. Extract without any checkpoint => (adding new file)
generateMessageInQueue("1");
// Test fetching Avro format
InputBatch<JavaRDD<GenericRecord>> fetch1 = sourceFormatAdapter.fetchNewDataInAvroFormat(Option.empty(), Long.MAX_VALUE);
assertEquals(1, fetch1.getBatch().get().count());
// 3. Produce new data, extract new data
generateMessageInQueue("2");
// Test fetching Avro format
InputBatch<JavaRDD<GenericRecord>> fetch2 = sourceFormatAdapter.fetchNewDataInAvroFormat(Option.of(fetch1.getCheckpointForNextBatch()), Long.MAX_VALUE);
assertEquals(1, fetch2.getBatch().get().count());
GenericRecord s3 = (GenericRecord) fetch2.getBatch().get().rdd().first().get("s3");
GenericRecord s3Object = (GenericRecord) s3.get("object");
assertEquals("2.parquet", s3Object.get("key").toString());
}
use of org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter in project hudi by apache.
the class TestSqlSource method testSqlSourceInvalidTable.
/**
* Runs the test scenario of reading data from the source in row format.
* Source table doesn't exists.
*
* @throws IOException
*/
@Test
public void testSqlSourceInvalidTable() throws IOException {
props.setProperty(sqlSourceConfig, "select * from not_exist_sql_table");
sqlSource = new SqlSource(props, jsc, sparkSession, schemaProvider);
sourceFormatAdapter = new SourceFormatAdapter(sqlSource);
assertThrows(AnalysisException.class, () -> sourceFormatAdapter.fetchNewDataInRowFormat(Option.empty(), Long.MAX_VALUE));
}
use of org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter in project hudi by apache.
the class TestSqlSource method testSqlSourceZeroRecord.
/**
* Runs the test scenario of reading data from the source in row format.
* Source has no records.
*
* @throws IOException
*/
@Test
public void testSqlSourceZeroRecord() throws IOException {
props.setProperty(sqlSourceConfig, "select * from test_sql_table where 1=0");
sqlSource = new SqlSource(props, jsc, sparkSession, schemaProvider);
sourceFormatAdapter = new SourceFormatAdapter(sqlSource);
InputBatch<Dataset<Row>> fetch1AsRows = sourceFormatAdapter.fetchNewDataInRowFormat(Option.empty(), Long.MAX_VALUE);
assertEquals(0, fetch1AsRows.getBatch().get().count());
}
use of org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter in project hudi by apache.
the class TestSqlSource method testSqlSourceAvroFormat.
/**
* Runs the test scenario of reading data from the source in avro format.
*
* @throws IOException
*/
@Test
public void testSqlSourceAvroFormat() throws IOException {
props.setProperty(sqlSourceConfig, "select * from test_sql_table");
sqlSource = new SqlSource(props, jsc, sparkSession, schemaProvider);
sourceFormatAdapter = new SourceFormatAdapter(sqlSource);
// Test fetching Avro format
InputBatch<JavaRDD<GenericRecord>> fetch1 = sourceFormatAdapter.fetchNewDataInAvroFormat(Option.empty(), Long.MAX_VALUE);
// Test Avro to Row format
Dataset<Row> fetch1Rows = AvroConversionUtils.createDataFrame(JavaRDD.toRDD(fetch1.getBatch().get()), schemaProvider.getSourceSchema().toString(), sparkSession);
assertEquals(10000, fetch1Rows.count());
}
use of org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter in project hudi by apache.
the class TestSqlSource method testSqlSourceMoreRecordsThanSourceLimit.
/**
* Runs the test scenario of reading data from the source in row format.
* Source has more records than source limit.
*
* @throws IOException
*/
@Test
public void testSqlSourceMoreRecordsThanSourceLimit() throws IOException {
props.setProperty(sqlSourceConfig, "select * from test_sql_table");
sqlSource = new SqlSource(props, jsc, sparkSession, schemaProvider);
sourceFormatAdapter = new SourceFormatAdapter(sqlSource);
InputBatch<Dataset<Row>> fetch1AsRows = sourceFormatAdapter.fetchNewDataInRowFormat(Option.empty(), 1000);
assertEquals(10000, fetch1AsRows.getBatch().get().count());
}
Aggregations