use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class CopySourceTest method testCopySource.
@Test
public void testCopySource() throws Exception {
SourceState state = new SourceState();
state.setProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, "file:///");
state.setProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, "file:///");
state.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/target/dir");
state.setProp(DatasetUtils.DATASET_PROFILE_CLASS_KEY, TestCopyableDatasetFinder.class.getName());
CopySource source = new CopySource();
List<WorkUnit> workunits = source.getWorkunits(state);
workunits = JobLauncherUtils.flattenWorkUnits(workunits);
Assert.assertEquals(workunits.size(), TestCopyableDataset.FILE_COUNT);
Extract extract = workunits.get(0).getExtract();
for (WorkUnit workUnit : workunits) {
CopyableFile file = (CopyableFile) CopySource.deserializeCopyEntity(workUnit);
Assert.assertTrue(file.getOrigin().getPath().toString().startsWith(TestCopyableDataset.ORIGIN_PREFIX));
Assert.assertEquals(file.getDestinationOwnerAndPermission(), TestCopyableDataset.OWNER_AND_PERMISSION);
Assert.assertEquals(workUnit.getExtract(), extract);
}
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class CopySourceTest method testSubmitUnfulfilledRequestEvents.
@Test
public void testSubmitUnfulfilledRequestEvents() throws IOException, NoSuchMethodException, InvocationTargetException, IllegalAccessException {
SourceState state = new SourceState();
state.setProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, "file:///");
state.setProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, "file:///");
state.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/target/dir");
state.setProp(DatasetUtils.DATASET_PROFILE_CLASS_KEY, TestCopyablePartitionableDatasedFinder.class.getCanonicalName());
state.setProp(CopySource.MAX_CONCURRENT_LISTING_SERVICES, 2);
state.setProp(CopyConfiguration.MAX_COPY_PREFIX + ".size", "50");
state.setProp(CopyConfiguration.MAX_COPY_PREFIX + ".copyEntities", 2);
state.setProp(CopyConfiguration.STORE_REJECTED_REQUESTS_KEY, RequestAllocatorConfig.StoreRejectedRequestsConfig.ALL.name().toLowerCase());
state.setProp(ConfigurationKeys.METRICS_CUSTOM_BUILDERS, "org.apache.gobblin.metrics.ConsoleEventReporterFactory");
CopySource source = new CopySource();
final FileSystem sourceFs = HadoopUtils.getSourceFileSystem(state);
final FileSystem targetFs = HadoopUtils.getWriterFileSystem(state, 1, 0);
int maxThreads = state.getPropAsInt(CopySource.MAX_CONCURRENT_LISTING_SERVICES, CopySource.DEFAULT_MAX_CONCURRENT_LISTING_SERVICES);
final CopyConfiguration copyConfiguration = CopyConfiguration.builder(targetFs, state.getProperties()).build();
MetricContext metricContext = Instrumented.getMetricContext(state, CopySource.class);
EventSubmitter eventSubmitter = new EventSubmitter.Builder(metricContext, CopyConfiguration.COPY_PREFIX).build();
DatasetsFinder<CopyableDatasetBase> datasetFinder = DatasetUtils.instantiateDatasetFinder(state.getProperties(), sourceFs, CopySource.DEFAULT_DATASET_PROFILE_CLASS_KEY, eventSubmitter, state);
IterableDatasetFinder<CopyableDatasetBase> iterableDatasetFinder = datasetFinder instanceof IterableDatasetFinder ? (IterableDatasetFinder<CopyableDatasetBase>) datasetFinder : new IterableDatasetFinderImpl<>(datasetFinder);
Iterator<CopyableDatasetRequestor> requestorIteratorWithNulls = Iterators.transform(iterableDatasetFinder.getDatasetsIterator(), new CopyableDatasetRequestor.Factory(targetFs, copyConfiguration, log));
Iterator<CopyableDatasetRequestor> requestorIterator = Iterators.filter(requestorIteratorWithNulls, Predicates.<CopyableDatasetRequestor>notNull());
Method m = CopySource.class.getDeclaredMethod("createRequestAllocator", CopyConfiguration.class, int.class);
m.setAccessible(true);
PriorityIterableBasedRequestAllocator<FileSet<CopyEntity>> allocator = (PriorityIterableBasedRequestAllocator<FileSet<CopyEntity>>) m.invoke(source, copyConfiguration, maxThreads);
Iterator<FileSet<CopyEntity>> prioritizedFileSets = allocator.allocateRequests(requestorIterator, copyConfiguration.getMaxToCopy());
List<FileSet<CopyEntity>> fileSetList = allocator.getRequestsExceedingAvailableResourcePool();
Assert.assertEquals(fileSetList.size(), 2);
FileSet<CopyEntity> fileSet = fileSetList.get(0);
Assert.assertEquals(fileSet.getDataset().getUrn(), "/test");
Assert.assertEquals(fileSet.getTotalEntities(), 5);
Assert.assertEquals(fileSet.getTotalSizeInBytes(), 50);
fileSet = fileSetList.get(1);
Assert.assertEquals(fileSet.getDataset().getUrn(), "/test");
Assert.assertEquals(fileSet.getTotalEntities(), 5);
Assert.assertEquals(fileSet.getTotalSizeInBytes(), 50);
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class BackfillHiveSourceTest method testWhitelist.
@Test
public void testWhitelist() throws Exception {
BackfillHiveSource backfillHiveSource = new BackfillHiveSource();
SourceState state = new SourceState();
state.setProp(BackfillHiveSource.BACKFILL_SOURCE_PARTITION_WHITELIST_KEY, "service@logEvent@datepartition=2016-08-04-00,service@logEvent@datepartition=2016-08-05-00");
backfillHiveSource.initBackfillHiveSource(state);
Partition pass1 = Mockito.mock(Partition.class, Mockito.RETURNS_SMART_NULLS);
Mockito.when(pass1.getCompleteName()).thenReturn("service@logEvent@datepartition=2016-08-04-00");
Partition pass2 = Mockito.mock(Partition.class, Mockito.RETURNS_SMART_NULLS);
Mockito.when(pass2.getCompleteName()).thenReturn("service@logEvent@datepartition=2016-08-05-00");
Partition fail = Mockito.mock(Partition.class, Mockito.RETURNS_SMART_NULLS);
Mockito.when(fail.getCompleteName()).thenReturn("service@logEvent@datepartition=2016-08-06-00");
Assert.assertTrue(backfillHiveSource.shouldCreateWorkunit(pass1, new LongWatermark(0)));
Assert.assertTrue(backfillHiveSource.shouldCreateWorkunit(pass2, new LongWatermark(0)));
Assert.assertFalse(backfillHiveSource.shouldCreateWorkunit(fail, new LongWatermark(0)));
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class HiveSourceTest method getTestState.
private static SourceState getTestState(String dbName) {
SourceState testState = new SourceState();
testState.setProp("hive.dataset.database", dbName);
testState.setProp("hive.dataset.table.pattern", "*");
testState.setProp(ConfigurationKeys.JOB_ID_KEY, "testJobId");
return testState;
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class HiveSourceTest method testGetWorkUnitsForTable.
@Test
public void testGetWorkUnitsForTable() throws Exception {
String dbName = "testdb2";
String tableName = "testtable2";
String tableSdLoc = "/tmp/testtable2";
this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
SourceState testState = getTestState(dbName);
this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName, tableSdLoc, Optional.<String>absent());
List<WorkUnit> workUnits = hiveSource.getWorkunits(testState);
// One workunit for the table, no dummy workunits
Assert.assertEquals(workUnits.size(), 1);
WorkUnit wu = workUnits.get(0);
HiveWorkUnit hwu = new HiveWorkUnit(wu);
Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getDb(), dbName);
Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getTable(), tableName);
Assert.assertEquals(hwu.getTableSchemaUrl(), new Path("/tmp/dummy"));
}
Aggregations