Search in sources :

Example 6 with DataSetOptions

use of com.thinkbiganalytics.kylo.catalog.spi.DataSetOptions in project kylo by Teradata.

the class AbstractJdbcDataSetProviderTest method writeWithoutUrl.

/**
 * Verify exception if url is not defined.
 */
@Test(expected = MissingOptionException.class)
public void writeWithoutUrl() {
    final DataSetOptions options = new DataSetOptions();
    options.setOption("dbtable", "mytable");
    final MockJdbcDataSetProvider provider = new MockJdbcDataSetProvider();
    provider.write(Mockito.mock(KyloCatalogClient.class), options, Mockito.mock(DataFrame.class));
}
Also used : KyloCatalogClient(com.thinkbiganalytics.kylo.catalog.api.KyloCatalogClient) DataSetOptions(com.thinkbiganalytics.kylo.catalog.spi.DataSetOptions) DataFrame(org.apache.spark.sql.DataFrame) Test(org.junit.Test)

Example 7 with DataSetOptions

use of com.thinkbiganalytics.kylo.catalog.spi.DataSetOptions in project kylo by Teradata.

the class SparkDataSetContextTest method getPathsHighWaterMarkEmpty.

/**
 * Verify retrieving High Water Mark paths when no files match.
 */
@Test
@SuppressWarnings("unchecked")
public void getPathsHighWaterMarkEmpty() throws IOException {
    final long currentTime = System.currentTimeMillis();
    // Mock client
    final KyloCatalogClient client = Mockito.mock(KyloCatalogClient.class);
    Mockito.when(client.getHighWaterMarks()).thenReturn(Collections.singletonMap("water.mark", Long.toString(currentTime)));
    // Mock file
    final File file = tempFolder.newFile("file.txt");
    Assert.assertTrue(file.setLastModified(currentTime - 1000));
    // Mock options
    final DataSetOptions options = new DataSetOptions();
    options.setFormat("mock");
    options.setOption(HighWaterMarkInputFormat.HIGH_WATER_MARK, "water.mark");
    options.setPaths(Collections.singletonList(file.getAbsolutePath()));
    // Mock delegate
    final SparkDataSetDelegate<DataFrame> delegate = Mockito.mock(SparkDataSetDelegate.class);
    Mockito.when(delegate.getHadoopConfiguration(Mockito.any(KyloCatalogClient.class))).thenReturn(new Configuration(false));
    Mockito.when(delegate.isFileFormat(Mockito.any(Class.class))).thenReturn(true);
    // Test resolving paths
    final SparkDataSetContext<DataFrame> context = new SparkDataSetContext<>(options, client, delegate);
    final List<String> paths = context.getPaths();
    Assert.assertNotNull("Expected paths to be non-null", paths);
    Assert.assertEquals("file:/dev/null", paths.get(0));
    Assert.assertEquals(1, paths.size());
    Mockito.verify(client).setHighWaterMarks(Collections.singletonMap("water.mark", Long.toString(currentTime)));
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) KyloCatalogClient(com.thinkbiganalytics.kylo.catalog.api.KyloCatalogClient) DataSetOptions(com.thinkbiganalytics.kylo.catalog.spi.DataSetOptions) DataFrame(org.apache.spark.sql.DataFrame) File(java.io.File) Test(org.junit.Test)

Example 8 with DataSetOptions

use of com.thinkbiganalytics.kylo.catalog.spi.DataSetOptions in project kylo by Teradata.

the class SparkDataSetContextTest method getPathsHighWaterMark.

/**
 * Verify retrieving High Water Mark paths.
 */
@Test
@SuppressWarnings("unchecked")
public void getPathsHighWaterMark() throws IOException {
    final long currentTime = System.currentTimeMillis();
    // Mock client
    final KyloCatalogClient client = Mockito.mock(KyloCatalogClient.class);
    Mockito.when(client.getHighWaterMarks()).thenReturn(Collections.singletonMap("water.mark", Long.toString(currentTime - 60000)));
    // Mock files
    final List<String> inputPaths = new ArrayList<>();
    final File file1 = tempFolder.newFile("file1");
    Assert.assertTrue(file1.setLastModified(currentTime - 60000));
    inputPaths.add(file1.getAbsolutePath());
    final File file2 = tempFolder.newFile("file2");
    Assert.assertTrue(file2.setLastModified(currentTime - 30000));
    inputPaths.add(file2.getAbsolutePath());
    final File file3 = tempFolder.newFile("file3");
    Assert.assertTrue(file3.setLastModified(currentTime));
    inputPaths.add(file3.getAbsolutePath());
    // Mock options
    final DataSetOptions options = new DataSetOptions();
    options.setFormat("mock");
    options.setOption(HighWaterMarkInputFormat.HIGH_WATER_MARK, "water.mark");
    options.setOption(HighWaterMarkInputFormat.MAX_FILE_AGE, "300000");
    options.setOption(HighWaterMarkInputFormat.MIN_FILE_AGE, "15000");
    options.setPaths(inputPaths);
    // Mock delegate
    final SparkDataSetDelegate<DataFrame> delegate = Mockito.mock(SparkDataSetDelegate.class);
    Mockito.when(delegate.getHadoopConfiguration(Mockito.any(KyloCatalogClient.class))).thenReturn(new Configuration(false));
    Mockito.when(delegate.isFileFormat(Mockito.any(Class.class))).thenReturn(true);
    // Test resolving paths
    final SparkDataSetContext<DataFrame> context = new SparkDataSetContext<>(options, client, delegate);
    final List<String> paths = context.getPaths();
    Assert.assertNotNull("Expected paths to be non-null", paths);
    Assert.assertEquals(file2.toURI().toString(), paths.get(0));
    Assert.assertEquals(1, paths.size());
    Mockito.verify(client).setHighWaterMarks(Collections.singletonMap("water.mark", Long.toString(file2.lastModified())));
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) KyloCatalogClient(com.thinkbiganalytics.kylo.catalog.api.KyloCatalogClient) ArrayList(java.util.ArrayList) DataSetOptions(com.thinkbiganalytics.kylo.catalog.spi.DataSetOptions) DataFrame(org.apache.spark.sql.DataFrame) File(java.io.File) Test(org.junit.Test)

Example 9 with DataSetOptions

use of com.thinkbiganalytics.kylo.catalog.spi.DataSetOptions in project kylo by Teradata.

the class SparkDataSetContextTest method getPaths.

/**
 * Verify retrieving paths.
 */
@Test
@SuppressWarnings("unchecked")
public void getPaths() {
    // Mock options
    final DataSetOptions options = new DataSetOptions();
    options.setFormat("mock");
    options.setOption(KyloCatalogConstants.PATH_OPTION, "/mock/path/file1.txt");
    options.setPaths(Collections.singletonList("/mock/path/file2.txt"));
    // Test getting path for non-file-format data sources
    final SparkDataSetContext<DataFrame> context = new SparkDataSetContext<>(options, Mockito.mock(KyloCatalogClient.class), Mockito.mock(SparkDataSetDelegate.class));
    Assert.assertEquals("/mock/path/file1.txt", context.getOption("path").get());
    Assert.assertEquals(Collections.singletonList("/mock/path/file2.txt"), context.getPaths());
}
Also used : KyloCatalogClient(com.thinkbiganalytics.kylo.catalog.api.KyloCatalogClient) DataSetOptions(com.thinkbiganalytics.kylo.catalog.spi.DataSetOptions) DataFrame(org.apache.spark.sql.DataFrame) Test(org.junit.Test)

Example 10 with DataSetOptions

use of com.thinkbiganalytics.kylo.catalog.spi.DataSetOptions in project kylo by Teradata.

the class SparkDataSetContextTest method getPathsFileFormat.

/**
 * Verify retrieving file format paths.
 */
@Test
@SuppressWarnings("unchecked")
public void getPathsFileFormat() {
    // Mock options
    final DataSetOptions options = new DataSetOptions();
    options.setFormat("text");
    options.setOption(KyloCatalogConstants.PATH_OPTION, "/mock/path/file1.txt");
    options.setPaths(Collections.singletonList("/mock/path/file2.txt"));
    // Mock delegate
    final SparkDataSetDelegate<DataFrame> delegate = Mockito.mock(SparkDataSetDelegate.class);
    Mockito.when(delegate.isFileFormat(Mockito.any(Class.class))).thenReturn(true);
    // Test retrieving paths
    final SparkDataSetContext<DataFrame> context = new SparkDataSetContext<>(options, Mockito.mock(KyloCatalogClient.class), delegate);
    final List<String> paths = context.getPaths();
    Assert.assertTrue("Expected path option to be empty", context.getOption("path").isEmpty());
    Assert.assertNotNull("Expected paths to be non-null", paths);
    Assert.assertEquals("/mock/path/file1.txt", paths.get(0));
    Assert.assertEquals("/mock/path/file2.txt", paths.get(1));
    Assert.assertEquals(2, paths.size());
}
Also used : KyloCatalogClient(com.thinkbiganalytics.kylo.catalog.api.KyloCatalogClient) DataSetOptions(com.thinkbiganalytics.kylo.catalog.spi.DataSetOptions) DataFrame(org.apache.spark.sql.DataFrame) Test(org.junit.Test)

Aggregations

DataSetOptions (com.thinkbiganalytics.kylo.catalog.spi.DataSetOptions)21 Test (org.junit.Test)21 KyloCatalogClient (com.thinkbiganalytics.kylo.catalog.api.KyloCatalogClient)17 DataFrame (org.apache.spark.sql.DataFrame)16 File (java.io.File)2 Configuration (org.apache.hadoop.conf.Configuration)2 InvocationOnMock (org.mockito.invocation.InvocationOnMock)2 JdbcRelationProvider (com.thinkbiganalytics.kylo.catalog.spark.sources.jdbc.JdbcRelationProvider)1 Connection (java.sql.Connection)1 ResultSet (java.sql.ResultSet)1 Statement (java.sql.Statement)1 ArrayList (java.util.ArrayList)1 Nonnull (javax.annotation.Nonnull)1 Column (org.apache.spark.sql.Column)1 SQLConf (org.apache.spark.sql.SQLConf)1 SQLContext (org.apache.spark.sql.SQLContext)1 UnresolvedRelation (org.apache.spark.sql.catalyst.analysis.UnresolvedRelation)1 ScalaUDF (org.apache.spark.sql.catalyst.expressions.ScalaUDF)1 InsertIntoTable (org.apache.spark.sql.catalyst.plans.logical.InsertIntoTable)1 LogicalPlan (org.apache.spark.sql.catalyst.plans.logical.LogicalPlan)1