use of io.druid.segment.indexing.DataSchema in project druid by druid-io.
the class TaskSerdeTest method testHadoopIndexTaskSerde.
@Test
public void testHadoopIndexTaskSerde() throws Exception {
final HadoopIndexTask task = new HadoopIndexTask(null, new HadoopIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.DAY, null, ImmutableList.of(new Interval("2010-01-01/P1D"))), jsonMapper), new HadoopIOConfig(ImmutableMap.<String, Object>of("paths", "bar"), null, null), null), null, null, "blah", jsonMapper, null);
final String json = jsonMapper.writeValueAsString(task);
final HadoopIndexTask task2 = (HadoopIndexTask) jsonMapper.readValue(json, Task.class);
Assert.assertEquals("foo", task.getDataSource());
Assert.assertEquals(task.getId(), task2.getId());
Assert.assertEquals(task.getGroupId(), task2.getGroupId());
Assert.assertEquals(task.getDataSource(), task2.getDataSource());
Assert.assertEquals(task.getSpec().getTuningConfig().getJobProperties(), task2.getSpec().getTuningConfig().getJobProperties());
Assert.assertEquals("blah", task.getClasspathPrefix());
Assert.assertEquals("blah", task2.getClasspathPrefix());
}
use of io.druid.segment.indexing.DataSchema in project druid by druid-io.
the class TaskSerdeTest method testRealtimeIndexTaskSerde.
@Test
public void testRealtimeIndexTaskSerde() throws Exception {
final RealtimeIndexTask task = new RealtimeIndexTask(null, new TaskResource("rofl", 2), new FireDepartment(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper), new RealtimeIOConfig(new LocalFirehoseFactory(new File("lol"), "rofl", null), new PlumberSchool() {
@Override
public Plumber findPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics) {
return null;
}
}, null), new RealtimeTuningConfig(1, new Period("PT10M"), null, null, null, null, 1, NoneShardSpec.instance(), indexSpec, null, 0, 0, true, null)), null);
final String json = jsonMapper.writeValueAsString(task);
// Just want to run the clock a bit to make sure the task id doesn't change
Thread.sleep(100);
final RealtimeIndexTask task2 = (RealtimeIndexTask) jsonMapper.readValue(json, Task.class);
Assert.assertEquals("foo", task.getDataSource());
Assert.assertEquals(2, task.getTaskResource().getRequiredCapacity());
Assert.assertEquals("rofl", task.getTaskResource().getAvailabilityGroup());
Assert.assertEquals(new Period("PT10M"), task.getRealtimeIngestionSchema().getTuningConfig().getWindowPeriod());
Assert.assertEquals(Granularities.HOUR, task.getRealtimeIngestionSchema().getDataSchema().getGranularitySpec().getSegmentGranularity());
Assert.assertTrue(task.getRealtimeIngestionSchema().getTuningConfig().isReportParseExceptions());
Assert.assertEquals(task.getId(), task2.getId());
Assert.assertEquals(task.getGroupId(), task2.getGroupId());
Assert.assertEquals(task.getDataSource(), task2.getDataSource());
Assert.assertEquals(task.getTaskResource().getRequiredCapacity(), task2.getTaskResource().getRequiredCapacity());
Assert.assertEquals(task.getTaskResource().getAvailabilityGroup(), task2.getTaskResource().getAvailabilityGroup());
Assert.assertEquals(task.getRealtimeIngestionSchema().getTuningConfig().getWindowPeriod(), task2.getRealtimeIngestionSchema().getTuningConfig().getWindowPeriod());
Assert.assertEquals(task.getRealtimeIngestionSchema().getDataSchema().getGranularitySpec().getSegmentGranularity(), task2.getRealtimeIngestionSchema().getDataSchema().getGranularitySpec().getSegmentGranularity());
}
use of io.druid.segment.indexing.DataSchema in project druid by druid-io.
the class TaskLifecycleTest method testResumeTasks.
@Test
public void testResumeTasks() throws Exception {
final Task indexTask = new IndexTask(null, null, new IndexTask.IndexIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[] { new DoubleSumAggregatorFactory("met", "met") }, new UniformGranularitySpec(Granularities.DAY, null, ImmutableList.of(new Interval("2010-01-01/P2D"))), mapper), new IndexTask.IndexIOConfig(new MockFirehoseFactory(false), false, null), new IndexTask.IndexTuningConfig(10000, 10, null, null, indexSpec, null, false, null, null)), null, MAPPER);
final long startTime = System.currentTimeMillis();
// manually insert the task into TaskStorage, waiting for TaskQueue to sync from storage
taskQueue.start();
taskStorage.insert(indexTask, TaskStatus.running(indexTask.getId()));
while (tsqa.getStatus(indexTask.getId()).get().isRunnable()) {
if (System.currentTimeMillis() > startTime + 10 * 1000) {
throw new ISE("Where did the task go?!: %s", indexTask.getId());
}
Thread.sleep(100);
}
final TaskStatus status = taskStorage.getStatus(indexTask.getId()).get();
final List<DataSegment> publishedSegments = byIntervalOrdering.sortedCopy(mdc.getPublished());
final List<DataSegment> loggedSegments = byIntervalOrdering.sortedCopy(tsqa.getInsertedSegments(indexTask.getId()));
Assert.assertEquals("statusCode", TaskStatus.Status.SUCCESS, status.getStatusCode());
Assert.assertEquals("segments logged vs published", loggedSegments, publishedSegments);
Assert.assertEquals("num segments published", 2, mdc.getPublished().size());
Assert.assertEquals("num segments nuked", 0, mdc.getNuked().size());
Assert.assertEquals("segment1 datasource", "foo", publishedSegments.get(0).getDataSource());
Assert.assertEquals("segment1 interval", new Interval("2010-01-01/P1D"), publishedSegments.get(0).getInterval());
Assert.assertEquals("segment1 dimensions", ImmutableList.of("dim1", "dim2"), publishedSegments.get(0).getDimensions());
Assert.assertEquals("segment1 metrics", ImmutableList.of("met"), publishedSegments.get(0).getMetrics());
Assert.assertEquals("segment2 datasource", "foo", publishedSegments.get(1).getDataSource());
Assert.assertEquals("segment2 interval", new Interval("2010-01-02/P1D"), publishedSegments.get(1).getInterval());
Assert.assertEquals("segment2 dimensions", ImmutableList.of("dim1", "dim2"), publishedSegments.get(1).getDimensions());
Assert.assertEquals("segment2 metrics", ImmutableList.of("met"), publishedSegments.get(1).getMetrics());
}
use of io.druid.segment.indexing.DataSchema in project druid by druid-io.
the class RealtimeIndexTaskTest method makeRealtimeTask.
private RealtimeIndexTask makeRealtimeTask(final String taskId, boolean reportParseExceptions, long handoffTimeout) {
ObjectMapper objectMapper = new DefaultObjectMapper();
DataSchema dataSchema = new DataSchema("test_ds", null, new AggregatorFactory[] { new CountAggregatorFactory("rows"), new LongSumAggregatorFactory("met1", "met1") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), objectMapper);
RealtimeIOConfig realtimeIOConfig = new RealtimeIOConfig(new TestFirehoseFactory(), null, null);
RealtimeTuningConfig realtimeTuningConfig = new RealtimeTuningConfig(1000, new Period("P1Y"), new Period("PT10M"), null, null, new ServerTimeRejectionPolicyFactory(), null, null, null, buildV9Directly, 0, 0, reportParseExceptions, handoffTimeout);
return new RealtimeIndexTask(taskId, null, new FireDepartment(dataSchema, realtimeIOConfig, realtimeTuningConfig), null) {
@Override
protected boolean isFirehoseDrainableByClosing(FirehoseFactory firehoseFactory) {
return true;
}
};
}
use of io.druid.segment.indexing.DataSchema in project druid by druid-io.
the class OrcIndexGeneratorJobTest method setUp.
@Before
public void setUp() throws Exception {
mapper = HadoopDruidIndexerConfig.JSON_MAPPER;
mapper.registerSubtypes(new NamedType(HashBasedNumberedShardSpec.class, "hashed"));
dataRoot = temporaryFolder.newFolder("data");
outputRoot = temporaryFolder.newFolder("output");
File dataFile = writeDataToLocalOrcFile(dataRoot, data);
HashMap<String, Object> inputSpec = new HashMap<String, Object>();
inputSpec.put("paths", dataFile.getCanonicalPath());
inputSpec.put("type", "static");
inputSpec.put("inputFormat", "org.apache.hadoop.hive.ql.io.orc.OrcNewInputFormat");
config = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema(dataSourceName, mapper.convertValue(inputRowParser, Map.class), aggs, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(this.interval)), mapper), new HadoopIOConfig(ImmutableMap.copyOf(inputSpec), null, outputRoot.getCanonicalPath()), new HadoopTuningConfig(outputRoot.getCanonicalPath(), null, null, null, null, null, false, false, false, false, //verifies that set num reducers is ignored
ImmutableMap.of(JobContext.NUM_REDUCES, "0"), false, true, null, true, null, false, false)));
config.setShardSpecs(loadShardSpecs(shardInfoForEachSegment));
config = HadoopDruidIndexerConfig.fromSpec(config.getSchema());
}
Aggregations