use of org.apache.gobblin.source.extractor.ComparableWatermark in project incubator-gobblin by apache.
the class ReplicaHadoopFsEndPoint method getWatermark.
@Override
public synchronized Optional<ComparableWatermark> getWatermark() {
if (this.watermarkInitialized) {
return this.cachedWatermark;
}
this.watermarkInitialized = true;
try {
Path metaData = new Path(rc.getPath(), WATERMARK_FILE);
FileSystem fs = FileSystem.get(rc.getFsURI(), new Configuration());
if (fs.exists(metaData)) {
try (FSDataInputStream fin = fs.open(metaData)) {
InputStreamReader reader = new InputStreamReader(fin, Charsets.UTF_8);
String content = CharStreams.toString(reader);
Watermark w = WatermarkMetadataUtil.deserialize(content);
if (w instanceof ComparableWatermark) {
this.cachedWatermark = Optional.of((ComparableWatermark) w);
}
}
return this.cachedWatermark;
}
// for replica, can not use the file time stamp as that is different with original source time stamp
return this.cachedWatermark;
} catch (IOException e) {
log.warn("Can not find " + WATERMARK_FILE + " for replica " + this);
return this.cachedWatermark;
} catch (WatermarkMetadataUtil.WatermarkMetadataMulFormatException e) {
log.warn("Can not create watermark from " + WATERMARK_FILE + " for replica " + this);
return this.cachedWatermark;
}
}
use of org.apache.gobblin.source.extractor.ComparableWatermark in project incubator-gobblin by apache.
the class CopyRouteGeneratorTest method testCopyRouteGenerator.
@Test
public void testCopyRouteGenerator() throws Exception {
// Oct 1, 2016
long replica1Watermark = 1475304606000L;
// Oct 4, 2016
long sourceWatermark = 1475604606000L;
ReplicaHadoopFsEndPoint notAvailableReplica = Mockito.mock(ReplicaHadoopFsEndPoint.class);
Mockito.when(notAvailableReplica.isFileSystemAvailable()).thenReturn(false);
Optional<ComparableWatermark> tmp = Optional.absent();
Mockito.when(notAvailableReplica.getWatermark()).thenReturn(tmp);
ReplicaHadoopFsEndPoint replica1 = Mockito.mock(ReplicaHadoopFsEndPoint.class);
Mockito.when(replica1.isFileSystemAvailable()).thenReturn(true);
ComparableWatermark cw = new LongWatermark(replica1Watermark);
tmp = Optional.of(cw);
Mockito.when(replica1.getWatermark()).thenReturn(tmp);
SourceHadoopFsEndPoint source = Mockito.mock(SourceHadoopFsEndPoint.class);
Mockito.when(source.isFileSystemAvailable()).thenReturn(true);
cw = new LongWatermark(sourceWatermark);
tmp = Optional.of(cw);
Mockito.when(source.getWatermark()).thenReturn(tmp);
ReplicaHadoopFsEndPoint copyToEndPoint = Mockito.mock(ReplicaHadoopFsEndPoint.class);
Mockito.when(copyToEndPoint.isFileSystemAvailable()).thenReturn(true);
CopyRoute cp1 = new CopyRoute(notAvailableReplica, copyToEndPoint);
CopyRoute cp2 = new CopyRoute(replica1, copyToEndPoint);
CopyRoute cp3 = new CopyRoute(source, copyToEndPoint);
DataFlowTopology.DataFlowPath dataFlowPath = new DataFlowTopology.DataFlowPath(ImmutableList.<CopyRoute>of(cp1, cp2, cp3));
DataFlowTopology dataFlowTopology = new DataFlowTopology();
dataFlowTopology.addDataFlowPath(dataFlowPath);
ReplicationConfiguration rc = Mockito.mock(ReplicationConfiguration.class);
Mockito.when(rc.getCopyMode()).thenReturn(ReplicationCopyMode.PULL);
Mockito.when(rc.getSource()).thenReturn(source);
Mockito.when(rc.getReplicas()).thenReturn(ImmutableList.<EndPoint>of(notAvailableReplica, replica1, copyToEndPoint));
Mockito.when(rc.getDataFlowToplogy()).thenReturn(dataFlowTopology);
CopyRouteGeneratorOptimizedNetworkBandwidthForTest network = new CopyRouteGeneratorOptimizedNetworkBandwidthForTest();
Assert.assertTrue(network.getPullRoute(rc, copyToEndPoint).get().getCopyFrom().equals(replica1));
Assert.assertTrue(network.getPullRoute(rc, copyToEndPoint).get().getCopyFrom().getWatermark().get().compareTo(new LongWatermark(replica1Watermark)) == 0);
CopyRouteGeneratorOptimizedLatency latency = new CopyRouteGeneratorOptimizedLatency();
Assert.assertTrue(latency.getPullRoute(rc, copyToEndPoint).get().getCopyFrom().equals(source));
Assert.assertTrue(latency.getPullRoute(rc, copyToEndPoint).get().getCopyFrom().getWatermark().get().compareTo(new LongWatermark(sourceWatermark)) == 0);
}
use of org.apache.gobblin.source.extractor.ComparableWatermark in project incubator-gobblin by apache.
the class ConfigBasedDatasetTest method testGetCopyableFilesHelper.
public Collection<? extends CopyEntity> testGetCopyableFilesHelper(String sourceDir, String destinationDir, long sourceWatermark, boolean isFilterEnabled) throws Exception {
FileSystem localFs = FileSystem.getLocal(new Configuration());
URI local = localFs.getUri();
Properties properties = new Properties();
properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/publisher");
PathFilter pathFilter = DatasetUtils.instantiatePathFilter(properties);
boolean applyFilterToDirectories = false;
if (isFilterEnabled) {
properties.setProperty(DatasetUtils.CONFIGURATION_KEY_PREFIX + "path.filter.class", "org.apache.gobblin.util.filters.HiddenFilter");
properties.setProperty(CopyConfiguration.APPLY_FILTER_TO_DIRECTORIES, "true");
pathFilter = DatasetUtils.instantiatePathFilter(properties);
applyFilterToDirectories = Boolean.parseBoolean(properties.getProperty(CopyConfiguration.APPLY_FILTER_TO_DIRECTORIES, "false"));
}
CopyConfiguration copyConfiguration = CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).publishDir(new Path(destinationDir)).preserve(PreserveAttributes.fromMnemonicString("ugp")).build();
ReplicationMetaData mockMetaData = Mockito.mock(ReplicationMetaData.class);
Mockito.when(mockMetaData.toString()).thenReturn("Mock Meta Data");
ReplicationConfiguration mockRC = Mockito.mock(ReplicationConfiguration.class);
Mockito.when(mockRC.getCopyMode()).thenReturn(ReplicationCopyMode.PULL);
Mockito.when(mockRC.getMetaData()).thenReturn(mockMetaData);
HadoopFsEndPoint copyFrom = Mockito.mock(HadoopFsEndPoint.class);
Mockito.when(copyFrom.getDatasetPath()).thenReturn(new Path(sourceDir));
Mockito.when(copyFrom.getFsURI()).thenReturn(local);
ComparableWatermark sw = new LongWatermark(sourceWatermark);
Mockito.when(copyFrom.getWatermark()).thenReturn(Optional.of(sw));
Mockito.when(copyFrom.getFiles()).thenReturn(FileListUtils.listFilesRecursively(localFs, new Path(sourceDir), pathFilter, applyFilterToDirectories));
HadoopFsEndPoint copyTo = Mockito.mock(HadoopFsEndPoint.class);
Mockito.when(copyTo.getDatasetPath()).thenReturn(new Path(destinationDir));
Mockito.when(copyTo.getFsURI()).thenReturn(local);
Optional<ComparableWatermark> tmp = Optional.absent();
Mockito.when(copyTo.getWatermark()).thenReturn(tmp);
Mockito.when(copyTo.getFiles()).thenReturn(FileListUtils.listFilesRecursively(localFs, new Path(destinationDir), pathFilter, applyFilterToDirectories));
CopyRoute route = Mockito.mock(CopyRoute.class);
Mockito.when(route.getCopyFrom()).thenReturn(copyFrom);
Mockito.when(route.getCopyTo()).thenReturn(copyTo);
ConfigBasedDataset dataset = new ConfigBasedDataset(mockRC, properties, route);
Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(localFs, copyConfiguration);
return copyableFiles;
}
use of org.apache.gobblin.source.extractor.ComparableWatermark in project incubator-gobblin by apache.
the class SourceHadoopFsEndPoint method getWatermark.
@Override
public synchronized Optional<ComparableWatermark> getWatermark() {
if (this.initialized) {
return this.cachedWatermark;
}
try {
long curTs = -1;
FileSystem fs = FileSystem.get(rc.getFsURI(), new Configuration());
Collection<Path> validPaths = ReplicationDataValidPathPicker.getValidPaths(this);
for (Path p : validPaths) {
this.allFileStatus.addAll(FileListUtils.listFilesRecursively(fs, p, super.getPathFilter(), super.isApplyFilterToDirectories()));
}
for (FileStatus f : this.allFileStatus) {
if (f.getModificationTime() > curTs) {
curTs = f.getModificationTime();
}
}
ComparableWatermark result = new LongWatermark(curTs);
this.cachedWatermark = Optional.of(result);
if (this.cachedWatermark.isPresent()) {
this.initialized = true;
}
return this.cachedWatermark;
} catch (IOException e) {
log.error("Error while retrieve the watermark for " + this);
return this.cachedWatermark;
}
}
Aggregations