Search in sources :

Example 1 with DataSetOptions

use of com.thinkbiganalytics.kylo.catalog.spi.DataSetOptions in project kylo by Teradata.

the class AbstractSparkDataSetProviderTest method readDeleteSourceFile.

/**
 * Verify reading a data set and deleting the source file.
 */
@Test
@SuppressWarnings("unchecked")
public void readDeleteSourceFile() {
    isFileFormat = true;
    // Mock data set
    dataSet = Mockito.mock(DataFrame.class);
    Mockito.when(dataSet.col("value")).thenReturn(new Column("value"));
    final StructType schema = DataTypes.createStructType(Collections.singletonList(DataTypes.createStructField("value", DataTypes.StringType, true)));
    Mockito.when(dataSet.schema()).thenReturn(schema);
    final DataFrame mapDataSet = Mockito.mock(DataFrame.class);
    Mockito.when(dataSet.withColumn(Mockito.eq("value"), Mockito.any(Column.class))).thenReturn(mapDataSet);
    // Mock options
    final DataSetOptions options = new DataSetOptions();
    options.setFormat("text");
    options.setOption(KyloCatalogConstants.PATH_OPTION, "/mock/path/file.txt");
    options.setOption("keepSourceFile", "FALSE");
    // Test reading
    final MockSparkDataSetProvider provider = new MockSparkDataSetProvider();
    final DataFrame df = provider.read(Mockito.mock(KyloCatalogClient.class), options);
    Assert.assertEquals(mapDataSet, df);
    final ArgumentCaptor<Column> newColumn = ArgumentCaptor.forClass(Column.class);
    Mockito.verify(dataSet).withColumn(Mockito.eq("value"), newColumn.capture());
    Assert.assertTrue("Expected new column to be a UDF", newColumn.getValue().expr() instanceof ScalaUDF);
}
Also used : StructType(org.apache.spark.sql.types.StructType) Column(org.apache.spark.sql.Column) KyloCatalogClient(com.thinkbiganalytics.kylo.catalog.api.KyloCatalogClient) DataSetOptions(com.thinkbiganalytics.kylo.catalog.spi.DataSetOptions) DataFrame(org.apache.spark.sql.DataFrame) ScalaUDF(org.apache.spark.sql.catalyst.expressions.ScalaUDF) Test(org.junit.Test)

Example 2 with DataSetOptions

use of com.thinkbiganalytics.kylo.catalog.spi.DataSetOptions in project kylo by Teradata.

the class AbstractSparkDataSetProviderTest method read.

/**
 * Verify reading a data set.
 */
@Test
@SuppressWarnings("unchecked")
public void read() {
    // Mock data set
    dataSet = Mockito.mock(DataFrame.class);
    // Mock options
    final DataSetOptions options = new DataSetOptions();
    options.setFormat("text");
    options.setOption(KyloCatalogConstants.PATH_OPTION, "/mock/path/file.txt");
    // Test reading
    final MockSparkDataSetProvider provider = new MockSparkDataSetProvider();
    final DataFrame df = provider.read(Mockito.mock(KyloCatalogClient.class), options);
    Mockito.verifyZeroInteractions(df);
}
Also used : KyloCatalogClient(com.thinkbiganalytics.kylo.catalog.api.KyloCatalogClient) DataSetOptions(com.thinkbiganalytics.kylo.catalog.spi.DataSetOptions) DataFrame(org.apache.spark.sql.DataFrame) Test(org.junit.Test)

Example 3 with DataSetOptions

use of com.thinkbiganalytics.kylo.catalog.spi.DataSetOptions in project kylo by Teradata.

the class DataSetUtilTest method getOptionDefined.

/**
 * Verify retrieving a defined option.
 */
@Test
public void getOptionDefined() {
    final DataSetOptions options = new DataSetOptions();
    options.setOption("key", "value");
    Assert.assertEquals("value", DataSetUtil.getOptionOrThrow(options, "key", null));
}
Also used : DataSetOptions(com.thinkbiganalytics.kylo.catalog.spi.DataSetOptions) Test(org.junit.Test)

Example 4 with DataSetOptions

use of com.thinkbiganalytics.kylo.catalog.spi.DataSetOptions in project kylo by Teradata.

the class AbstractJdbcDataSetProviderTest method getOverlapWithInvalid.

/**
 * Verify exception for an invalid overlap value.
 */
@Test(expected = KyloCatalogException.class)
public void getOverlapWithInvalid() {
    final DataSetOptions options = new DataSetOptions();
    options.setOption("overlap", "a");
    final MockJdbcDataSetProvider provider = new MockJdbcDataSetProvider();
    provider.getOverlap(options);
}
Also used : DataSetOptions(com.thinkbiganalytics.kylo.catalog.spi.DataSetOptions) Test(org.junit.Test)

Example 5 with DataSetOptions

use of com.thinkbiganalytics.kylo.catalog.spi.DataSetOptions in project kylo by Teradata.

the class AbstractJdbcDataSetProviderTest method getOverlap.

/**
 * Verify parsing the overlap option.
 */
@Test
public void getOverlap() {
    // Test null overlap
    final DataSetOptions options = new DataSetOptions();
    final MockJdbcDataSetProvider provider = new MockJdbcDataSetProvider();
    Assert.assertNull("Expected overlap to be null", provider.getOverlap(options));
    // Test non-null overlap
    options.setOption("overlap", "60");
    Assert.assertEquals(new Long(60000), provider.getOverlap(options));
    // Test negative overlap
    options.setOption("overlap", "-5");
    Assert.assertEquals(new Long(5000), provider.getOverlap(options));
}
Also used : DataSetOptions(com.thinkbiganalytics.kylo.catalog.spi.DataSetOptions) Test(org.junit.Test)

Aggregations

DataSetOptions (com.thinkbiganalytics.kylo.catalog.spi.DataSetOptions)21 Test (org.junit.Test)21 KyloCatalogClient (com.thinkbiganalytics.kylo.catalog.api.KyloCatalogClient)17 DataFrame (org.apache.spark.sql.DataFrame)16 File (java.io.File)2 Configuration (org.apache.hadoop.conf.Configuration)2 InvocationOnMock (org.mockito.invocation.InvocationOnMock)2 JdbcRelationProvider (com.thinkbiganalytics.kylo.catalog.spark.sources.jdbc.JdbcRelationProvider)1 Connection (java.sql.Connection)1 ResultSet (java.sql.ResultSet)1 Statement (java.sql.Statement)1 ArrayList (java.util.ArrayList)1 Nonnull (javax.annotation.Nonnull)1 Column (org.apache.spark.sql.Column)1 SQLConf (org.apache.spark.sql.SQLConf)1 SQLContext (org.apache.spark.sql.SQLContext)1 UnresolvedRelation (org.apache.spark.sql.catalyst.analysis.UnresolvedRelation)1 ScalaUDF (org.apache.spark.sql.catalyst.expressions.ScalaUDF)1 InsertIntoTable (org.apache.spark.sql.catalyst.plans.logical.InsertIntoTable)1 LogicalPlan (org.apache.spark.sql.catalyst.plans.logical.LogicalPlan)1