use of org.apache.druid.indexer.HadoopIngestionSpec in project druid by druid-io.
the class MaterializedViewSupervisorTest method testCheckSegmentsAndSubmitTasks.
@Test
public void testCheckSegmentsAndSubmitTasks() throws IOException {
Set<DataSegment> baseSegments = Sets.newHashSet(new DataSegment("base", Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), "2015-01-03", ImmutableMap.of(), ImmutableList.of("dim1", "dim2"), ImmutableList.of("m1"), new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null), 9, 1024));
indexerMetadataStorageCoordinator.announceHistoricalSegments(baseSegments);
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes();
EasyMock.expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.of()).anyTimes();
EasyMock.expect(taskStorage.getStatus("test_task1")).andReturn(Optional.of(TaskStatus.failure("test_task1", "Dummy task status failure err message"))).anyTimes();
EasyMock.expect(taskStorage.getStatus("test_task2")).andReturn(Optional.of(TaskStatus.running("test_task2"))).anyTimes();
EasyMock.replay(taskStorage);
Pair<Map<Interval, HadoopIndexTask>, Map<Interval, String>> runningTasksPair = supervisor.getRunningTasks();
Map<Interval, HadoopIndexTask> runningTasks = runningTasksPair.lhs;
Map<Interval, String> runningVersion = runningTasksPair.rhs;
DataSchema dataSchema = new DataSchema("test_datasource", null, null, null, TransformSpec.NONE, objectMapper);
HadoopIOConfig hadoopIOConfig = new HadoopIOConfig(new HashMap<>(), null, null);
HadoopIngestionSpec spec = new HadoopIngestionSpec(dataSchema, hadoopIOConfig, null);
HadoopIndexTask task1 = new HadoopIndexTask("test_task1", spec, null, null, null, objectMapper, null, null, null);
runningTasks.put(Intervals.of("2015-01-01T00Z/2015-01-02T00Z"), task1);
runningVersion.put(Intervals.of("2015-01-01T00Z/2015-01-02T00Z"), "test_version1");
HadoopIndexTask task2 = new HadoopIndexTask("test_task2", spec, null, null, null, objectMapper, null, null, null);
runningTasks.put(Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), task2);
runningVersion.put(Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), "test_version2");
supervisor.checkSegmentsAndSubmitTasks();
Map<Interval, HadoopIndexTask> expectedRunningTasks = new HashMap<>();
Map<Interval, String> expectedRunningVersion = new HashMap<>();
expectedRunningTasks.put(Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), task2);
expectedRunningVersion.put(Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), "test_version2");
Assert.assertEquals(expectedRunningTasks, runningTasks);
Assert.assertEquals(expectedRunningVersion, runningVersion);
}
use of org.apache.druid.indexer.HadoopIngestionSpec in project druid by druid-io.
the class TaskSerdeTest method testHadoopIndexTaskSerde.
@Test
public void testHadoopIndexTaskSerde() throws Exception {
final HadoopIndexTask task = new HadoopIndexTask(null, new HadoopIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.DAY, null, ImmutableList.of(Intervals.of("2010-01-01/P1D"))), null, jsonMapper), new HadoopIOConfig(ImmutableMap.of("paths", "bar"), null, null), null), null, null, "blah", jsonMapper, null, AuthTestUtils.TEST_AUTHORIZER_MAPPER, null);
final String json = jsonMapper.writeValueAsString(task);
final HadoopIndexTask task2 = (HadoopIndexTask) jsonMapper.readValue(json, Task.class);
Assert.assertEquals("foo", task.getDataSource());
Assert.assertEquals(task.getId(), task2.getId());
Assert.assertEquals(task.getGroupId(), task2.getGroupId());
Assert.assertEquals(task.getDataSource(), task2.getDataSource());
Assert.assertEquals(task.getSpec().getTuningConfig().getJobProperties(), task2.getSpec().getTuningConfig().getJobProperties());
Assert.assertEquals("blah", task.getClasspathPrefix());
Assert.assertEquals("blah", task2.getClasspathPrefix());
}
use of org.apache.druid.indexer.HadoopIngestionSpec in project druid by druid-io.
the class HdfsDataSegmentPusherTest method shouldMakeHDFSCompliantSegmentOutputPath.
@Test
public void shouldMakeHDFSCompliantSegmentOutputPath() {
HadoopIngestionSpec schema;
try {
schema = objectMapper.readValue("{\n" + " \"dataSchema\": {\n" + " \"dataSource\": \"source\",\n" + " \"metricsSpec\": [],\n" + " \"granularitySpec\": {\n" + " \"type\": \"uniform\",\n" + " \"segmentGranularity\": \"hour\",\n" + " \"intervals\": [\"2012-07-10/P1D\"]\n" + " }\n" + " },\n" + " \"ioConfig\": {\n" + " \"type\": \"hadoop\",\n" + " \"segmentOutputPath\": \"hdfs://server:9100/tmp/druid/datatest\"\n" + " }\n" + "}", HadoopIngestionSpec.class);
} catch (Exception e) {
throw new RuntimeException(e);
}
// DataSchema dataSchema = new DataSchema("dataSource", null, null, Gra)
// schema = new HadoopIngestionSpec(dataSchema, ioConfig, HadoopTuningConfig.makeDefaultTuningConfig());
HadoopDruidIndexerConfig cfg = new HadoopDruidIndexerConfig(schema.withTuningConfig(schema.getTuningConfig().withVersion("some:brand:new:version")));
Bucket bucket = new Bucket(4711, new DateTime(2012, 07, 10, 5, 30, ISOChronology.getInstanceUTC()), 4712);
Path path = JobHelper.makeFileNamePath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()), new DistributedFileSystem(), new DataSegment(cfg.getSchema().getDataSchema().getDataSource(), cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time).get(), cfg.getSchema().getTuningConfig().getVersion(), null, null, null, new NumberedShardSpec(bucket.partitionNum, 5000), -1, 0), JobHelper.INDEX_ZIP, hdfsDataSegmentPusher);
Assert.assertEquals("hdfs://server:9100/tmp/druid/datatest/source/20120710T050000.000Z_20120710T060000.000Z/some_brand_new_version" + "/4712_index.zip", path.toString());
path = JobHelper.makeTmpPath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()), new DistributedFileSystem(), new DataSegment(cfg.getSchema().getDataSchema().getDataSource(), cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time).get(), cfg.getSchema().getTuningConfig().getVersion(), null, null, null, new NumberedShardSpec(bucket.partitionNum, 5000), -1, 0), new TaskAttemptID("abc", 123, TaskType.REDUCE, 1, 0), hdfsDataSegmentPusher);
Assert.assertEquals("hdfs://server:9100/tmp/druid/datatest/source/20120710T050000.000Z_20120710T060000.000Z/some_brand_new_version" + "/4712_index.zip.0", path.toString());
}
use of org.apache.druid.indexer.HadoopIngestionSpec in project druid by druid-io.
the class GranularityPathSpecTest method testAddInputPath.
@Test
public void testAddInputPath() throws Exception {
UserGroupInformation.setLoginUser(UserGroupInformation.createUserForTesting("test", new String[] { "testGroup" }));
HadoopIngestionSpec spec = new HadoopIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.DAY, Granularities.MINUTE, ImmutableList.of(Intervals.of("2015-11-06T00:00Z/2015-11-07T00:00Z"))), null, jsonMapper), new HadoopIOConfig(null, null, null), DEFAULT_TUNING_CONFIG);
granularityPathSpec.setDataGranularity(Granularities.HOUR);
granularityPathSpec.setFilePattern(".*");
granularityPathSpec.setInputFormat(TextInputFormat.class);
Job job = Job.getInstance();
String formatStr = "file:%s/%s;org.apache.hadoop.mapreduce.lib.input.TextInputFormat";
testFolder.newFolder("test", "y=2015", "m=11", "d=06", "H=00");
testFolder.newFolder("test", "y=2015", "m=11", "d=06", "H=02");
testFolder.newFolder("test", "y=2015", "m=11", "d=06", "H=05");
testFolder.newFile("test/y=2015/m=11/d=06/H=00/file1");
testFolder.newFile("test/y=2015/m=11/d=06/H=02/file2");
testFolder.newFile("test/y=2015/m=11/d=06/H=05/file3");
testFolder.newFile("test/y=2015/m=11/d=06/H=05/file4");
granularityPathSpec.setInputPath(testFolder.getRoot().getPath() + "/test");
granularityPathSpec.addInputPaths(HadoopDruidIndexerConfig.fromSpec(spec), job);
String actual = job.getConfiguration().get("mapreduce.input.multipleinputs.dir.formats");
String expected = Joiner.on(",").join(Lists.newArrayList(StringUtils.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=00/file1"), StringUtils.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=02/file2"), StringUtils.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=05/file3"), StringUtils.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=05/file4")));
Assert.assertEquals("Did not find expected input paths", expected, actual);
}
Aggregations