use of org.apache.gobblin.source.extractor.extract.LongWatermark in project incubator-gobblin by apache.
the class ConsoleWriterTest method writeEnvelope.
private void writeEnvelope(ConsoleWriter consoleWriter, String content, String source, long value) throws IOException {
CheckpointableWatermark watermark = new DefaultCheckpointableWatermark(source, new LongWatermark(value));
AcknowledgableWatermark ackable = new AcknowledgableWatermark(watermark);
RecordEnvelope<String> mockEnvelope = (RecordEnvelope<String>) new RecordEnvelope<>(content).addCallBack(ackable);
consoleWriter.writeEnvelope(mockEnvelope);
Assert.assertTrue(ackable.isAcked());
}
use of org.apache.gobblin.source.extractor.extract.LongWatermark in project incubator-gobblin by apache.
the class FineGrainedWatermarkTrackerTest method testWatermarkTracker.
/**
* Single threaded test that creates attempts, acknowledges a few at random
* then checks if the getCommitables method is returning the right values.
* Runs a few iterations.
*/
@Test
public static void testWatermarkTracker() {
Random random = new Random();
Config config = ConfigFactory.empty();
for (int j = 0; j < 100; ++j) {
FineGrainedWatermarkTracker tracker = new FineGrainedWatermarkTracker(config);
int numWatermarks = 1 + random.nextInt(1000);
AcknowledgableWatermark[] acknowledgableWatermarks = new AcknowledgableWatermark[numWatermarks];
for (int i = 0; i < numWatermarks; ++i) {
CheckpointableWatermark checkpointableWatermark = new DefaultCheckpointableWatermark("default", new LongWatermark(i));
AcknowledgableWatermark ackable = new AcknowledgableWatermark(checkpointableWatermark);
acknowledgableWatermarks[i] = ackable;
tracker.track(ackable);
}
// Create some random holes. Don't fire acknowledgements for these messages.
int numMissingAcks = random.nextInt(numWatermarks);
SortedSet<Integer> holes = new TreeSet<>();
for (int i = 0; i < numMissingAcks; ++i) {
holes.add(random.nextInt(numWatermarks));
}
for (int i = 0; i < numWatermarks; ++i) {
if (!holes.contains(i)) {
acknowledgableWatermarks[i].ack();
}
}
verifyCommitables(tracker, holes, numWatermarks - 1);
// verify that sweeping doesn't have any side effects on correctness
tracker.sweep();
verifyCommitables(tracker, holes, numWatermarks - 1);
}
}
use of org.apache.gobblin.source.extractor.extract.LongWatermark in project incubator-gobblin by apache.
the class MultiWriterWatermarkManagerTest method testFailingWatermarkStorage.
/**
* Test that when we have commits failing to watermark storage, the manager continues to try
* at every interval and keeps track of the exception it is seeing.
*/
@Test
public void testFailingWatermarkStorage() throws IOException, InterruptedException {
WatermarkStorage reallyBadWatermarkStorage = mock(WatermarkStorage.class);
IOException exceptionToThrow = new IOException("Failed to write coz the programmer told me to");
doThrow(exceptionToThrow).when(reallyBadWatermarkStorage).commitWatermarks(any(Iterable.class));
long commitInterval = 1000;
MultiWriterWatermarkManager watermarkManager = new MultiWriterWatermarkManager(reallyBadWatermarkStorage, commitInterval, Optional.<Logger>absent());
WatermarkAwareWriter mockWriter = mock(WatermarkAwareWriter.class);
CheckpointableWatermark watermark = new DefaultCheckpointableWatermark("default", new LongWatermark(0));
when(mockWriter.getCommittableWatermark()).thenReturn(Collections.singletonMap("default", watermark));
watermarkManager.registerWriter(mockWriter);
try {
watermarkManager.start();
} catch (Exception e) {
Assert.fail("Should not throw exception", e);
}
// sleep for 2.5 iterations
Thread.sleep(commitInterval * 2 + (commitInterval / 2));
watermarkManager.close();
// 2 calls from iterations, 1 additional attempt due to close
int expectedCalls = 3;
verify(reallyBadWatermarkStorage, atLeast(expectedCalls)).commitWatermarks(any(Iterable.class));
Assert.assertEquals(watermarkManager.getCommitStatus().getLastCommitException(), exceptionToThrow, "Testing tracking of failed exceptions");
}
use of org.apache.gobblin.source.extractor.extract.LongWatermark in project incubator-gobblin by apache.
the class ConfigBasedDatasetTest method testGetCopyableFilesHelper.
public Collection<? extends CopyEntity> testGetCopyableFilesHelper(String sourceDir, String destinationDir, long sourceWatermark, boolean isFilterEnabled) throws Exception {
FileSystem localFs = FileSystem.getLocal(new Configuration());
URI local = localFs.getUri();
Properties properties = new Properties();
properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/publisher");
PathFilter pathFilter = DatasetUtils.instantiatePathFilter(properties);
boolean applyFilterToDirectories = false;
if (isFilterEnabled) {
properties.setProperty(DatasetUtils.CONFIGURATION_KEY_PREFIX + "path.filter.class", "org.apache.gobblin.util.filters.HiddenFilter");
properties.setProperty(CopyConfiguration.APPLY_FILTER_TO_DIRECTORIES, "true");
pathFilter = DatasetUtils.instantiatePathFilter(properties);
applyFilterToDirectories = Boolean.parseBoolean(properties.getProperty(CopyConfiguration.APPLY_FILTER_TO_DIRECTORIES, "false"));
}
CopyConfiguration copyConfiguration = CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).publishDir(new Path(destinationDir)).preserve(PreserveAttributes.fromMnemonicString("ugp")).build();
ReplicationMetaData mockMetaData = Mockito.mock(ReplicationMetaData.class);
Mockito.when(mockMetaData.toString()).thenReturn("Mock Meta Data");
ReplicationConfiguration mockRC = Mockito.mock(ReplicationConfiguration.class);
Mockito.when(mockRC.getCopyMode()).thenReturn(ReplicationCopyMode.PULL);
Mockito.when(mockRC.getMetaData()).thenReturn(mockMetaData);
HadoopFsEndPoint copyFrom = Mockito.mock(HadoopFsEndPoint.class);
Mockito.when(copyFrom.getDatasetPath()).thenReturn(new Path(sourceDir));
Mockito.when(copyFrom.getFsURI()).thenReturn(local);
ComparableWatermark sw = new LongWatermark(sourceWatermark);
Mockito.when(copyFrom.getWatermark()).thenReturn(Optional.of(sw));
Mockito.when(copyFrom.getFiles()).thenReturn(FileListUtils.listFilesRecursively(localFs, new Path(sourceDir), pathFilter, applyFilterToDirectories));
HadoopFsEndPoint copyTo = Mockito.mock(HadoopFsEndPoint.class);
Mockito.when(copyTo.getDatasetPath()).thenReturn(new Path(destinationDir));
Mockito.when(copyTo.getFsURI()).thenReturn(local);
Optional<ComparableWatermark> tmp = Optional.absent();
Mockito.when(copyTo.getWatermark()).thenReturn(tmp);
Mockito.when(copyTo.getFiles()).thenReturn(FileListUtils.listFilesRecursively(localFs, new Path(destinationDir), pathFilter, applyFilterToDirectories));
CopyRoute route = Mockito.mock(CopyRoute.class);
Mockito.when(route.getCopyFrom()).thenReturn(copyFrom);
Mockito.when(route.getCopyTo()).thenReturn(copyTo);
ConfigBasedDataset dataset = new ConfigBasedDataset(mockRC, properties, route);
Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(localFs, copyConfiguration);
return copyableFiles;
}
use of org.apache.gobblin.source.extractor.extract.LongWatermark in project incubator-gobblin by apache.
the class PartitionLevelWatermarkerTest method testGetPreviousHighWatermarkForPartition.
@Test
public void testGetPreviousHighWatermarkForPartition() throws Exception {
WorkUnitState previousWus = new WorkUnitState();
previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "db@test_dataset_urn");
previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015", 100l, "2016", 101l)));
SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
Table table = mockTable("test_dataset_urn");
Partition partition2015 = mockPartition(table, ImmutableList.of("2015"));
Partition partition2016 = mockPartition(table, ImmutableList.of("2016"));
Assert.assertEquals(watermarker.getPreviousHighWatermark(partition2015), new LongWatermark(100l));
Assert.assertEquals(watermarker.getPreviousHighWatermark(partition2016), new LongWatermark(101l));
}
Aggregations