Search in sources :

Example 71 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class PartitionLevelWatermarkerTest method testReadPreviousNullWatermarks.

@Test
public void testReadPreviousNullWatermarks() throws Exception {
    WorkUnitState previousWus = new WorkUnitState();
    previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn");
    previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
    SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
    PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
    Assert.assertEquals(watermarker.getPreviousWatermarks().size(), 0);
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) SourceState(org.apache.gobblin.configuration.SourceState) Test(org.testng.annotations.Test)

Example 72 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class TableLevelWatermarkerTest method testPartitionWatermarks.

/**
 * Make sure that all partitions get the same previous high watermark (table's watermark)
 */
@Test
public void testPartitionWatermarks() throws Exception {
    WorkUnitState previousWus = new WorkUnitState();
    previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_table");
    previousWus.setActualHighWatermark(new LongWatermark(100l));
    SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
    TableLevelWatermarker watermarker = new TableLevelWatermarker(state);
    Table mockTable = mockTable("test_table");
    Assert.assertEquals(watermarker.getPreviousHighWatermark(mockTable), new LongWatermark(100l));
    Assert.assertEquals(watermarker.getPreviousHighWatermark(mockPartition(mockTable, ImmutableList.of("2015"))), new LongWatermark(100l));
    Assert.assertEquals(watermarker.getPreviousHighWatermark(mockPartition(mockTable, ImmutableList.of("2016"))), new LongWatermark(100l));
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) Table(org.apache.hadoop.hive.ql.metadata.Table) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) SourceState(org.apache.gobblin.configuration.SourceState) LongWatermark(org.apache.gobblin.source.extractor.extract.LongWatermark) Test(org.testng.annotations.Test)

Example 73 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class HiveAvroToOrcConverterTest method testNestedSchemaDDLandDML.

/**
 * Test nested DDL and DML generation
 * @throws IOException
 */
@Test
public void testNestedSchemaDDLandDML() throws Exception {
    String dbName = "testdb";
    String tableName = "testtable";
    String tableSdLoc = "/tmp/testtable";
    this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
    Table table = this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName, tableSdLoc, Optional.<String>absent());
    Schema schema = ConversionHiveTestUtils.readSchemaFromJsonFile(resourceDir, "recordWithinRecordWithinRecord_nested.json");
    WorkUnitState wus = ConversionHiveTestUtils.createWus(dbName, tableName, 0);
    wus.getJobState().setProp("orc.table.flatten.schema", "false");
    try (HiveAvroToNestedOrcConverter converter = new HiveAvroToNestedOrcConverter()) {
        Config config = ConfigFactory.parseMap(ImmutableMap.<String, String>builder().put("destinationFormats", "nestedOrc").put("nestedOrc.destination.tableName", "testtable_orc_nested").put("nestedOrc.destination.dbName", dbName).put("nestedOrc.destination.dataPath", "file:/tmp/testtable_orc_nested").build());
        ConvertibleHiveDataset cd = ConvertibleHiveDatasetTest.createTestConvertibleDataset(config);
        List<QueryBasedHiveConversionEntity> conversionEntities = Lists.newArrayList(converter.convertRecord(converter.convertSchema(schema, wus), new QueryBasedHiveConversionEntity(cd, new SchemaAwareHiveTable(table, schema)), wus));
        Assert.assertEquals(conversionEntities.size(), 1, "Only one query entity should be returned");
        QueryBasedHiveConversionEntity queryBasedHiveConversionEntity = conversionEntities.get(0);
        List<String> queries = queryBasedHiveConversionEntity.getQueries();
        Assert.assertEquals(queries.size(), 4, "4 DDL and one DML query should be returned");
        // Ignoring part before first bracket in DDL and 'select' clause in DML because staging table has
        // .. a random name component
        String actualDDLQuery = StringUtils.substringAfter("(", queries.get(0).trim());
        String actualDMLQuery = StringUtils.substringAfter("SELECT", queries.get(0).trim());
        String expectedDDLQuery = StringUtils.substringAfter("(", ConversionHiveTestUtils.readQueryFromFile(resourceDir, "recordWithinRecordWithinRecord_nested.ddl"));
        String expectedDMLQuery = StringUtils.substringAfter("SELECT", ConversionHiveTestUtils.readQueryFromFile(resourceDir, "recordWithinRecordWithinRecord_nested.dml"));
        Assert.assertEquals(actualDDLQuery, expectedDDLQuery);
        Assert.assertEquals(actualDMLQuery, expectedDMLQuery);
    }
}
Also used : Table(org.apache.hadoop.hive.metastore.api.Table) SchemaAwareHiveTable(org.apache.gobblin.data.management.conversion.hive.entities.SchemaAwareHiveTable) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) Config(com.typesafe.config.Config) ConvertibleHiveDataset(org.apache.gobblin.data.management.conversion.hive.dataset.ConvertibleHiveDataset) Schema(org.apache.avro.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) QueryBasedHiveConversionEntity(org.apache.gobblin.data.management.conversion.hive.entities.QueryBasedHiveConversionEntity) SchemaAwareHiveTable(org.apache.gobblin.data.management.conversion.hive.entities.SchemaAwareHiveTable) ConvertibleHiveDatasetTest(org.apache.gobblin.data.management.conversion.hive.dataset.ConvertibleHiveDatasetTest) Test(org.testng.annotations.Test)

Example 74 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class HiveMaterializerTest method getTaskContextForRun.

private TaskContext getTaskContextForRun(WorkUnit workUnit) {
    workUnit.setProp(ConfigurationKeys.JOB_ID_KEY, "job123");
    workUnit.setProp(ConfigurationKeys.TASK_ID_KEY, "task123");
    workUnit.setProp(HiveConverterUtils.HIVE_DATASET_DESTINATION_SKIP_SETGROUP, Boolean.toString(true));
    HiveTask.disableHiveWatermarker(workUnit);
    JobState jobState = new JobState("job", "job123");
    return new TaskContext(new WorkUnitState(workUnit, jobState));
}
Also used : TaskContext(org.apache.gobblin.runtime.TaskContext) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) JobState(org.apache.gobblin.runtime.JobState)

Example 75 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class DeletingCopyDataPublisherTest method testDeleteOnSource.

@Test
public void testDeleteOnSource() throws Exception {
    State state = getTestState("testDeleteOnSource");
    Path testMethodTempPath = new Path(testClassTempPath, "testDeleteOnSource");
    DeletingCopyDataPublisher copyDataPublisher = closer.register(new DeletingCopyDataPublisher(state));
    File outputDir = new File(testMethodTempPath.toString(), "task-output/jobid/1f042f494d1fe2198e0e71a17faa233f33b5099b");
    outputDir.mkdirs();
    outputDir.deleteOnExit();
    WorkUnitState wus = new WorkUnitState();
    CopyableDataset copyableDataset = new TestCopyableDataset(new Path("origin"));
    CopyableDatasetMetadata metadata = new CopyableDatasetMetadata(copyableDataset);
    CopyEntity cf = CopyableFileUtils.createTestCopyableFile(new Path(testMethodTempPath, "test.txt").toString());
    CopySource.serializeCopyableDataset(wus, metadata);
    CopySource.serializeCopyEntity(wus, cf);
    Assert.assertTrue(fs.exists(new Path(testMethodTempPath, "test.txt")));
    wus.setWorkingState(WorkingState.SUCCESSFUL);
    copyDataPublisher.publishData(ImmutableList.of(wus));
    Assert.assertFalse(fs.exists(new Path(testMethodTempPath, "test.txt")));
}
Also used : Path(org.apache.hadoop.fs.Path) TestCopyableDataset(org.apache.gobblin.data.management.copy.TestCopyableDataset) CopyableDataset(org.apache.gobblin.data.management.copy.CopyableDataset) TestCopyableDataset(org.apache.gobblin.data.management.copy.TestCopyableDataset) CopyEntity(org.apache.gobblin.data.management.copy.CopyEntity) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) WorkingState(org.apache.gobblin.configuration.WorkUnitState.WorkingState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) CopyableDatasetMetadata(org.apache.gobblin.data.management.copy.CopyableDatasetMetadata) File(java.io.File) Test(org.testng.annotations.Test)

Aggregations

WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)222 Test (org.testng.annotations.Test)143 State (org.apache.gobblin.configuration.State)48 SourceState (org.apache.gobblin.configuration.SourceState)39 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)39 Schema (org.apache.avro.Schema)29 Path (org.apache.hadoop.fs.Path)26 GenericRecord (org.apache.avro.generic.GenericRecord)19 JsonObject (com.google.gson.JsonObject)17 ArrayList (java.util.ArrayList)16 File (java.io.File)14 TaskState (org.apache.hadoop.mapreduce.v2.api.records.TaskState)12 List (java.util.List)11 Configuration (org.apache.hadoop.conf.Configuration)11 IOException (java.io.IOException)10 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)10 Extract (org.apache.gobblin.source.workunit.Extract)10 FileSystem (org.apache.hadoop.fs.FileSystem)10 Closer (com.google.common.io.Closer)8 JsonParser (com.google.gson.JsonParser)8