use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class PartitionLevelWatermarkerTest method testReadPreviousNullWatermarks.
@Test
public void testReadPreviousNullWatermarks() throws Exception {
WorkUnitState previousWus = new WorkUnitState();
previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn");
previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
Assert.assertEquals(watermarker.getPreviousWatermarks().size(), 0);
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class TableLevelWatermarkerTest method testPartitionWatermarks.
/**
* Make sure that all partitions get the same previous high watermark (table's watermark)
*/
@Test
public void testPartitionWatermarks() throws Exception {
WorkUnitState previousWus = new WorkUnitState();
previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_table");
previousWus.setActualHighWatermark(new LongWatermark(100l));
SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
TableLevelWatermarker watermarker = new TableLevelWatermarker(state);
Table mockTable = mockTable("test_table");
Assert.assertEquals(watermarker.getPreviousHighWatermark(mockTable), new LongWatermark(100l));
Assert.assertEquals(watermarker.getPreviousHighWatermark(mockPartition(mockTable, ImmutableList.of("2015"))), new LongWatermark(100l));
Assert.assertEquals(watermarker.getPreviousHighWatermark(mockPartition(mockTable, ImmutableList.of("2016"))), new LongWatermark(100l));
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class HiveAvroToOrcConverterTest method testNestedSchemaDDLandDML.
/**
* Test nested DDL and DML generation
* @throws IOException
*/
@Test
public void testNestedSchemaDDLandDML() throws Exception {
String dbName = "testdb";
String tableName = "testtable";
String tableSdLoc = "/tmp/testtable";
this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
Table table = this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName, tableSdLoc, Optional.<String>absent());
Schema schema = ConversionHiveTestUtils.readSchemaFromJsonFile(resourceDir, "recordWithinRecordWithinRecord_nested.json");
WorkUnitState wus = ConversionHiveTestUtils.createWus(dbName, tableName, 0);
wus.getJobState().setProp("orc.table.flatten.schema", "false");
try (HiveAvroToNestedOrcConverter converter = new HiveAvroToNestedOrcConverter()) {
Config config = ConfigFactory.parseMap(ImmutableMap.<String, String>builder().put("destinationFormats", "nestedOrc").put("nestedOrc.destination.tableName", "testtable_orc_nested").put("nestedOrc.destination.dbName", dbName).put("nestedOrc.destination.dataPath", "file:/tmp/testtable_orc_nested").build());
ConvertibleHiveDataset cd = ConvertibleHiveDatasetTest.createTestConvertibleDataset(config);
List<QueryBasedHiveConversionEntity> conversionEntities = Lists.newArrayList(converter.convertRecord(converter.convertSchema(schema, wus), new QueryBasedHiveConversionEntity(cd, new SchemaAwareHiveTable(table, schema)), wus));
Assert.assertEquals(conversionEntities.size(), 1, "Only one query entity should be returned");
QueryBasedHiveConversionEntity queryBasedHiveConversionEntity = conversionEntities.get(0);
List<String> queries = queryBasedHiveConversionEntity.getQueries();
Assert.assertEquals(queries.size(), 4, "4 DDL and one DML query should be returned");
// Ignoring part before first bracket in DDL and 'select' clause in DML because staging table has
// .. a random name component
String actualDDLQuery = StringUtils.substringAfter("(", queries.get(0).trim());
String actualDMLQuery = StringUtils.substringAfter("SELECT", queries.get(0).trim());
String expectedDDLQuery = StringUtils.substringAfter("(", ConversionHiveTestUtils.readQueryFromFile(resourceDir, "recordWithinRecordWithinRecord_nested.ddl"));
String expectedDMLQuery = StringUtils.substringAfter("SELECT", ConversionHiveTestUtils.readQueryFromFile(resourceDir, "recordWithinRecordWithinRecord_nested.dml"));
Assert.assertEquals(actualDDLQuery, expectedDDLQuery);
Assert.assertEquals(actualDMLQuery, expectedDMLQuery);
}
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class HiveMaterializerTest method getTaskContextForRun.
private TaskContext getTaskContextForRun(WorkUnit workUnit) {
workUnit.setProp(ConfigurationKeys.JOB_ID_KEY, "job123");
workUnit.setProp(ConfigurationKeys.TASK_ID_KEY, "task123");
workUnit.setProp(HiveConverterUtils.HIVE_DATASET_DESTINATION_SKIP_SETGROUP, Boolean.toString(true));
HiveTask.disableHiveWatermarker(workUnit);
JobState jobState = new JobState("job", "job123");
return new TaskContext(new WorkUnitState(workUnit, jobState));
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class DeletingCopyDataPublisherTest method testDeleteOnSource.
@Test
public void testDeleteOnSource() throws Exception {
State state = getTestState("testDeleteOnSource");
Path testMethodTempPath = new Path(testClassTempPath, "testDeleteOnSource");
DeletingCopyDataPublisher copyDataPublisher = closer.register(new DeletingCopyDataPublisher(state));
File outputDir = new File(testMethodTempPath.toString(), "task-output/jobid/1f042f494d1fe2198e0e71a17faa233f33b5099b");
outputDir.mkdirs();
outputDir.deleteOnExit();
WorkUnitState wus = new WorkUnitState();
CopyableDataset copyableDataset = new TestCopyableDataset(new Path("origin"));
CopyableDatasetMetadata metadata = new CopyableDatasetMetadata(copyableDataset);
CopyEntity cf = CopyableFileUtils.createTestCopyableFile(new Path(testMethodTempPath, "test.txt").toString());
CopySource.serializeCopyableDataset(wus, metadata);
CopySource.serializeCopyEntity(wus, cf);
Assert.assertTrue(fs.exists(new Path(testMethodTempPath, "test.txt")));
wus.setWorkingState(WorkingState.SUCCESSFUL);
copyDataPublisher.publishData(ImmutableList.of(wus));
Assert.assertFalse(fs.exists(new Path(testMethodTempPath, "test.txt")));
}
Aggregations