use of io.druid.segment.realtime.plumber.Plumber in project druid by druid-io.
the class TaskAnnouncementTest method testBackwardsCompatibleSerde.
@Test
public void testBackwardsCompatibleSerde() throws Exception {
final Task task = new RealtimeIndexTask("theid", new TaskResource("rofl", 2), new FireDepartment(new DataSchema("foo", null, new AggregatorFactory[0], null, new DefaultObjectMapper()), new RealtimeIOConfig(new LocalFirehoseFactory(new File("lol"), "rofl", null), new PlumberSchool() {
@Override
public Plumber findPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics) {
return null;
}
}, null), null), null);
final TaskStatus status = TaskStatus.running(task.getId());
final TaskAnnouncement announcement = TaskAnnouncement.create(task, status, TaskLocation.unknown());
final String statusJson = jsonMapper.writeValueAsString(status);
final String announcementJson = jsonMapper.writeValueAsString(announcement);
final TaskStatus statusFromStatus = jsonMapper.readValue(statusJson, TaskStatus.class);
final TaskStatus statusFromAnnouncement = jsonMapper.readValue(announcementJson, TaskStatus.class);
final TaskAnnouncement announcementFromStatus = jsonMapper.readValue(statusJson, TaskAnnouncement.class);
final TaskAnnouncement announcementFromAnnouncement = jsonMapper.readValue(announcementJson, TaskAnnouncement.class);
Assert.assertEquals("theid", statusFromStatus.getId());
Assert.assertEquals("theid", statusFromAnnouncement.getId());
Assert.assertEquals("theid", announcementFromStatus.getTaskStatus().getId());
Assert.assertEquals("theid", announcementFromAnnouncement.getTaskStatus().getId());
Assert.assertEquals("theid", announcementFromStatus.getTaskResource().getAvailabilityGroup());
Assert.assertEquals("rofl", announcementFromAnnouncement.getTaskResource().getAvailabilityGroup());
Assert.assertEquals(1, announcementFromStatus.getTaskResource().getRequiredCapacity());
Assert.assertEquals(2, announcementFromAnnouncement.getTaskResource().getRequiredCapacity());
}
use of io.druid.segment.realtime.plumber.Plumber in project druid by druid-io.
the class RealtimeManagerTest method setUp.
@Before
public void setUp() throws Exception {
final List<TestInputRowHolder> rows = Arrays.asList(makeRow(new DateTime("9000-01-01").getMillis()), makeRow(new ParseException("parse error")), null, makeRow(new DateTime().getMillis()));
ObjectMapper jsonMapper = new DefaultObjectMapper();
schema = new DataSchema("test", null, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper);
schema2 = new DataSchema("testV2", null, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper);
RealtimeIOConfig ioConfig = new RealtimeIOConfig(new FirehoseFactory() {
@Override
public Firehose connect(InputRowParser parser) throws IOException {
return new TestFirehose(rows.iterator());
}
}, new PlumberSchool() {
@Override
public Plumber findPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics) {
return plumber;
}
}, null);
RealtimeIOConfig ioConfig2 = new RealtimeIOConfig(null, new PlumberSchool() {
@Override
public Plumber findPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics) {
return plumber2;
}
}, new FirehoseFactoryV2() {
@Override
public FirehoseV2 connect(InputRowParser parser, Object arg1) throws IOException, ParseException {
return new TestFirehoseV2(rows.iterator());
}
});
RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(1, new Period("P1Y"), null, null, null, null, null, null, null, null, 0, 0, null, null);
plumber = new TestPlumber(new Sink(new Interval("0/P5000Y"), schema, tuningConfig.getShardSpec(), new DateTime().toString(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions()));
realtimeManager = new RealtimeManager(Arrays.<FireDepartment>asList(new FireDepartment(schema, ioConfig, tuningConfig)), null);
plumber2 = new TestPlumber(new Sink(new Interval("0/P5000Y"), schema2, tuningConfig.getShardSpec(), new DateTime().toString(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions()));
realtimeManager2 = new RealtimeManager(Arrays.<FireDepartment>asList(new FireDepartment(schema2, ioConfig2, tuningConfig)), null);
tuningConfig_0 = new RealtimeTuningConfig(1, new Period("P1Y"), null, null, null, null, null, new LinearShardSpec(0), null, null, 0, 0, null, null);
tuningConfig_1 = new RealtimeTuningConfig(1, new Period("P1Y"), null, null, null, null, null, new LinearShardSpec(1), null, null, 0, 0, null, null);
schema3 = new DataSchema("testing", null, new AggregatorFactory[] { new CountAggregatorFactory("ignore") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper);
FireDepartment department_0 = new FireDepartment(schema3, ioConfig, tuningConfig_0);
FireDepartment department_1 = new FireDepartment(schema3, ioConfig2, tuningConfig_1);
QueryRunnerFactoryConglomerate conglomerate = new QueryRunnerFactoryConglomerate() {
@Override
public <T, QueryType extends Query<T>> QueryRunnerFactory<T, QueryType> findFactory(QueryType query) {
return factory;
}
};
chiefStartedLatch = new CountDownLatch(2);
RealtimeManager.FireChief fireChief_0 = new RealtimeManager.FireChief(department_0, conglomerate) {
@Override
public void run() {
super.initPlumber();
chiefStartedLatch.countDown();
}
};
RealtimeManager.FireChief fireChief_1 = new RealtimeManager.FireChief(department_1, conglomerate) {
@Override
public void run() {
super.initPlumber();
chiefStartedLatch.countDown();
}
};
realtimeManager3 = new RealtimeManager(Arrays.asList(department_0, department_1), conglomerate, ImmutableMap.<String, Map<Integer, RealtimeManager.FireChief>>of("testing", ImmutableMap.of(0, fireChief_0, 1, fireChief_1)));
startFireChiefWithPartitionNum(fireChief_0, 0);
startFireChiefWithPartitionNum(fireChief_1, 1);
}
use of io.druid.segment.realtime.plumber.Plumber in project druid by druid-io.
the class DruidJsonValidatorTest method testTaskValidator.
@Test
public void testTaskValidator() throws Exception {
final ObjectMapper jsonMapper = new DefaultObjectMapper();
for (final Module jacksonModule : new FirehoseModule().getJacksonModules()) {
jsonMapper.registerModule(jacksonModule);
}
final RealtimeIndexTask task = new RealtimeIndexTask(null, new TaskResource("rofl", 2), new FireDepartment(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper), new RealtimeIOConfig(new LocalFirehoseFactory(new File("lol"), "rofl", null), new PlumberSchool() {
@Override
public Plumber findPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics) {
return null;
}
}, null), new RealtimeTuningConfig(1, new Period("PT10M"), null, null, null, null, 1, NoneShardSpec.instance(), new IndexSpec(), null, 0, 0, true, null)), null);
File tmp = temporaryFolder.newFile("test_task.json");
jsonMapper.writeValue(tmp, task);
parseCommand("validator", "-f", tmp.getAbsolutePath(), "-t", "task").run();
}
use of io.druid.segment.realtime.plumber.Plumber in project druid by druid-io.
the class YeOldePlumberSchool method findPlumber.
@Override
public Plumber findPlumber(final DataSchema schema, final RealtimeTuningConfig config, final FireDepartmentMetrics metrics) {
// There can be only one.
final Sink theSink = new Sink(interval, schema, config.getShardSpec(), version, config.getMaxRowsInMemory(), config.isReportParseExceptions());
// Temporary directory to hold spilled segments.
final File persistDir = new File(tmpSegmentDir, theSink.getSegment().getIdentifier());
// Set of spilled segments. Will be merged at the end.
final Set<File> spilled = Sets.newHashSet();
// IndexMerger implementation.
final IndexMerger theIndexMerger = config.getBuildV9Directly() ? indexMergerV9 : indexMerger;
return new Plumber() {
@Override
public Object startJob() {
return null;
}
@Override
public int add(InputRow row, Supplier<Committer> committerSupplier) throws IndexSizeExceededException {
Sink sink = getSink(row.getTimestampFromEpoch());
if (sink == null) {
return -1;
}
final int numRows = sink.add(row);
if (!sink.canAppendRow()) {
persist(committerSupplier.get());
}
return numRows;
}
private Sink getSink(long timestamp) {
if (theSink.getInterval().contains(timestamp)) {
return theSink;
} else {
return null;
}
}
@Override
public <T> QueryRunner<T> getQueryRunner(Query<T> query) {
throw new UnsupportedOperationException("Don't query me, bro.");
}
@Override
public void persist(Committer committer) {
spillIfSwappable();
committer.run();
}
@Override
public void finishJob() {
// The segment we will upload
File fileToUpload = null;
try {
// User should have persisted everything by now.
Preconditions.checkState(!theSink.swappable(), "All data must be persisted before fininshing the job!");
if (spilled.size() == 0) {
throw new IllegalStateException("Nothing indexed?");
} else if (spilled.size() == 1) {
fileToUpload = Iterables.getOnlyElement(spilled);
} else {
List<QueryableIndex> indexes = Lists.newArrayList();
for (final File oneSpill : spilled) {
indexes.add(indexIO.loadIndex(oneSpill));
}
fileToUpload = new File(tmpSegmentDir, "merged");
theIndexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), fileToUpload, config.getIndexSpec());
}
// Map merged segment so we can extract dimensions
final QueryableIndex mappedSegment = indexIO.loadIndex(fileToUpload);
final DataSegment segmentToUpload = theSink.getSegment().withDimensions(ImmutableList.copyOf(mappedSegment.getAvailableDimensions())).withBinaryVersion(SegmentUtils.getVersionFromDir(fileToUpload));
dataSegmentPusher.push(fileToUpload, segmentToUpload);
log.info("Uploaded segment[%s]", segmentToUpload.getIdentifier());
} catch (Exception e) {
log.warn(e, "Failed to merge and upload");
throw Throwables.propagate(e);
} finally {
try {
if (fileToUpload != null) {
log.info("Deleting Index File[%s]", fileToUpload);
FileUtils.deleteDirectory(fileToUpload);
}
} catch (IOException e) {
log.warn(e, "Error deleting directory[%s]", fileToUpload);
}
}
}
private void spillIfSwappable() {
if (theSink.swappable()) {
final FireHydrant indexToPersist = theSink.swap();
final int rowsToPersist = indexToPersist.getIndex().size();
final File dirToPersist = getSpillDir(indexToPersist.getCount());
log.info("Spilling index[%d] with rows[%d] to: %s", indexToPersist.getCount(), rowsToPersist, dirToPersist);
try {
theIndexMerger.persist(indexToPersist.getIndex(), dirToPersist, config.getIndexSpec());
indexToPersist.swapSegment(null);
metrics.incrementRowOutputCount(rowsToPersist);
spilled.add(dirToPersist);
} catch (Exception e) {
log.warn(e, "Failed to spill index[%d]", indexToPersist.getCount());
throw Throwables.propagate(e);
}
}
}
private File getSpillDir(final int n) {
return new File(persistDir, String.format("spill%d", n));
}
};
}
use of io.druid.segment.realtime.plumber.Plumber in project druid by druid-io.
the class TaskSerdeTest method testRealtimeIndexTaskSerde.
@Test
public void testRealtimeIndexTaskSerde() throws Exception {
final RealtimeIndexTask task = new RealtimeIndexTask(null, new TaskResource("rofl", 2), new FireDepartment(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper), new RealtimeIOConfig(new LocalFirehoseFactory(new File("lol"), "rofl", null), new PlumberSchool() {
@Override
public Plumber findPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics) {
return null;
}
}, null), new RealtimeTuningConfig(1, new Period("PT10M"), null, null, null, null, 1, NoneShardSpec.instance(), indexSpec, null, 0, 0, true, null)), null);
final String json = jsonMapper.writeValueAsString(task);
// Just want to run the clock a bit to make sure the task id doesn't change
Thread.sleep(100);
final RealtimeIndexTask task2 = (RealtimeIndexTask) jsonMapper.readValue(json, Task.class);
Assert.assertEquals("foo", task.getDataSource());
Assert.assertEquals(2, task.getTaskResource().getRequiredCapacity());
Assert.assertEquals("rofl", task.getTaskResource().getAvailabilityGroup());
Assert.assertEquals(new Period("PT10M"), task.getRealtimeIngestionSchema().getTuningConfig().getWindowPeriod());
Assert.assertEquals(Granularities.HOUR, task.getRealtimeIngestionSchema().getDataSchema().getGranularitySpec().getSegmentGranularity());
Assert.assertTrue(task.getRealtimeIngestionSchema().getTuningConfig().isReportParseExceptions());
Assert.assertEquals(task.getId(), task2.getId());
Assert.assertEquals(task.getGroupId(), task2.getGroupId());
Assert.assertEquals(task.getDataSource(), task2.getDataSource());
Assert.assertEquals(task.getTaskResource().getRequiredCapacity(), task2.getTaskResource().getRequiredCapacity());
Assert.assertEquals(task.getTaskResource().getAvailabilityGroup(), task2.getTaskResource().getAvailabilityGroup());
Assert.assertEquals(task.getRealtimeIngestionSchema().getTuningConfig().getWindowPeriod(), task2.getRealtimeIngestionSchema().getTuningConfig().getWindowPeriod());
Assert.assertEquals(task.getRealtimeIngestionSchema().getDataSchema().getGranularitySpec().getSegmentGranularity(), task2.getRealtimeIngestionSchema().getDataSchema().getGranularitySpec().getSegmentGranularity());
}
Aggregations