use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class CopyDataPublisher method groupByFileSet.
/**
* Create a {@link Multimap} that maps a {@link CopyableDataset} to all {@link WorkUnitState}s that belong to this
* {@link CopyableDataset}. This mapping is used to set WorkingState of all {@link WorkUnitState}s to
* {@link WorkUnitState.WorkingState#COMMITTED} after a {@link CopyableDataset} is successfully published.
*/
private static Multimap<CopyEntity.DatasetAndPartition, WorkUnitState> groupByFileSet(Collection<? extends WorkUnitState> states) {
Multimap<CopyEntity.DatasetAndPartition, WorkUnitState> datasetRoots = ArrayListMultimap.create();
for (WorkUnitState workUnitState : states) {
CopyEntity file = CopySource.deserializeCopyEntity(workUnitState);
CopyEntity.DatasetAndPartition datasetAndPartition = file.getDatasetAndPartition(CopyableDatasetMetadata.deserialize(workUnitState.getProp(CopySource.SERIALIZED_COPYABLE_DATASET)));
datasetRoots.put(datasetAndPartition, workUnitState);
}
return datasetRoots;
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class CopyDataPublisher method getCommitSequence.
private static List<CommitStep> getCommitSequence(Collection<WorkUnitState> workUnits, Class<?> baseClass) throws IOException {
List<CommitStepCopyEntity> steps = Lists.newArrayList();
for (WorkUnitState wus : workUnits) {
if (baseClass.isAssignableFrom(CopySource.getCopyEntityClass(wus))) {
CommitStepCopyEntity step = (CommitStepCopyEntity) CopySource.deserializeCopyEntity(wus);
steps.add(step);
}
}
Comparator<CommitStepCopyEntity> commitStepSorter = new Comparator<CommitStepCopyEntity>() {
@Override
public int compare(CommitStepCopyEntity o1, CommitStepCopyEntity o2) {
return Integer.compare(o1.getPriority(), o2.getPriority());
}
};
Collections.sort(steps, commitStepSorter);
List<CommitStep> sequence = Lists.newArrayList();
for (CommitStepCopyEntity entity : steps) {
sequence.add(entity.getStep());
}
return sequence;
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class PartitionLevelWatermarkerTest method testNoPreviousWatermarkWorkunits.
@Test
public void testNoPreviousWatermarkWorkunits() throws Exception {
// Create one previous workunit with IS_WATERMARK_WORKUNIT_KEY set to true
WorkUnitState previousWus = new WorkUnitState();
previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn");
previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015", 100l)));
// Create one previous workunit with IS_WATERMARK_WORKUNIT_KEY not set (false)
WorkUnitState previousWus2 = new WorkUnitState();
previousWus2.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn2");
previousWus2.setActualHighWatermark(new LongWatermark(101l));
SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus, previousWus2));
PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
Assert.assertEquals(watermarker.getPreviousWatermarks().size(), 1);
Assert.assertEquals(watermarker.getPreviousWatermarks().get("test_dataset_urn"), ImmutableMap.of("2015", 100l));
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class PartitionLevelWatermarkerTest method testStateStoreReadWrite.
@Test
public void testStateStoreReadWrite() throws Exception {
String dbName = "testStateStoreReadWrite";
LocalHiveMetastoreTestUtils.getInstance().dropDatabaseIfExists(dbName);
PartitionLevelWatermarker watermarker0 = new PartitionLevelWatermarker(new SourceState());
Table mockTable = localTestTable(dbName, "table1", true);
watermarker0.onTableProcessBegin(mockTable, 0l);
long now = new DateTime().getMillis();
watermarker0.onPartitionProcessBegin(localTestPartition(mockTable, ImmutableList.of("2016")), 0, now);
List<WorkUnit> workunits = Lists.newArrayList();
watermarker0.onGetWorkunitsEnd(workunits);
@SuppressWarnings("deprecation") WorkUnitState previousWus = new WorkUnitState(workunits.get(0));
watermarker0.setActualHighWatermark(previousWus);
SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
Assert.assertEquals(watermarker.getPreviousWatermarks().size(), 1);
Assert.assertEquals(watermarker.getPreviousWatermarks().get(dbName + "@table1"), ImmutableMap.of("2016", now));
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class PartitionLevelWatermarkerTest method testDroppedPartitions.
@Test
public void testDroppedPartitions() throws Exception {
WorkUnitState previousWus = new WorkUnitState();
previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "db@test_dataset_urn");
previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015-01", 100l, "2015-02", 101l)));
SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
Table table = mockTable("test_dataset_urn");
Mockito.when(table.getPartitionKeys()).thenReturn(ImmutableList.of(new FieldSchema("year", "string", "")));
Partition partition2015 = mockPartition(table, ImmutableList.of("2015"));
// partition 2015 replaces 2015-01 and 2015-02
Mockito.when(partition2015.getParameters()).thenReturn(ImmutableMap.of(AbstractAvroToOrcConverter.REPLACED_PARTITIONS_HIVE_METASTORE_KEY, "2015-01|2015-02"));
watermarker.onPartitionProcessBegin(partition2015, 0l, 0l);
Assert.assertEquals(watermarker.getExpectedHighWatermarks().get("db@test_dataset_urn"), ImmutableMap.of("2015", 0l));
}
Aggregations