use of org.apache.druid.segment.indexing.RealtimeTuningConfig in project druid by apache.
the class TaskLifecycleTest method newRealtimeIndexTask.
private RealtimeIndexTask newRealtimeIndexTask() {
String taskId = StringUtils.format("rt_task_%s", System.currentTimeMillis());
DataSchema dataSchema = new DataSchema("test_ds", TestHelper.makeJsonMapper().convertValue(new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(null, null, null), DimensionsSpec.EMPTY)), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT), new AggregatorFactory[] { new LongSumAggregatorFactory("count", "rows") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), null, mapper);
RealtimeIOConfig realtimeIOConfig = new RealtimeIOConfig(new MockFirehoseFactory(), null);
RealtimeTuningConfig realtimeTuningConfig = new RealtimeTuningConfig(null, 1000, null, null, new Period("P1Y"), // default window period of 10 minutes
null, // base persist dir ignored by Realtime Index task
null, null, null, null, null, null, null, 0, 0, null, null, null, null, null);
FireDepartment fireDepartment = new FireDepartment(dataSchema, realtimeIOConfig, realtimeTuningConfig);
return new RealtimeIndexTask(taskId, new TaskResource(taskId, 1), fireDepartment, null);
}
use of org.apache.druid.segment.indexing.RealtimeTuningConfig in project druid by apache.
the class DefaultOfflineAppenderatorFactoryTest method testBuild.
@Test
public void testBuild() throws IOException, SegmentNotWritableException {
Injector injector = Initialization.makeInjectorWithModules(GuiceInjectors.makeStartupInjector(), ImmutableList.<Module>of(new Module() {
@Override
public void configure(Binder binder) {
binder.bindConstant().annotatedWith(Names.named("serviceName")).to("druid/tool");
binder.bindConstant().annotatedWith(Names.named("servicePort")).to(9999);
binder.bindConstant().annotatedWith(Names.named("tlsServicePort")).to(-1);
binder.bind(DruidProcessingConfig.class).toInstance(new DruidProcessingConfig() {
@Override
public String getFormatString() {
return "processing-%s";
}
@Override
public int intermediateComputeSizeBytes() {
return 100 * 1024 * 1024;
}
@Override
public int getNumThreads() {
return 1;
}
@Override
public int columnCacheSizeBytes() {
return 25 * 1024 * 1024;
}
});
binder.bind(ColumnConfig.class).to(DruidProcessingConfig.class);
}
}));
ObjectMapper objectMapper = injector.getInstance(ObjectMapper.class);
AppenderatorFactory defaultOfflineAppenderatorFactory = objectMapper.readerFor(AppenderatorFactory.class).readValue("{\"type\":\"offline\"}");
final Map<String, Object> parserMap = objectMapper.convertValue(new MapInputRowParser(new JSONParseSpec(new TimestampSpec("ts", "auto", null), DimensionsSpec.EMPTY, null, null, null)), Map.class);
DataSchema schema = new DataSchema("dataSourceName", parserMap, new AggregatorFactory[] { new CountAggregatorFactory("count"), new LongSumAggregatorFactory("met", "met") }, new UniformGranularitySpec(Granularities.MINUTE, Granularities.NONE, null), null, objectMapper);
RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, 75000, null, null, null, null, temporaryFolder.newFolder(), null, null, null, null, null, null, 0, 0, null, null, null, null, null);
Appenderator appenderator = defaultOfflineAppenderatorFactory.build(schema, tuningConfig, new FireDepartmentMetrics());
try {
Assert.assertEquals("dataSourceName", appenderator.getDataSource());
Assert.assertEquals(null, appenderator.startJob());
SegmentIdWithShardSpec identifier = new SegmentIdWithShardSpec("dataSourceName", Intervals.of("2000/2001"), "A", new LinearShardSpec(0));
Assert.assertEquals(0, ((AppenderatorImpl) appenderator).getRowsInMemory());
appenderator.add(identifier, StreamAppenderatorTest.ir("2000", "bar", 1), null);
Assert.assertEquals(1, ((AppenderatorImpl) appenderator).getRowsInMemory());
appenderator.add(identifier, StreamAppenderatorTest.ir("2000", "baz", 1), null);
Assert.assertEquals(2, ((AppenderatorImpl) appenderator).getRowsInMemory());
appenderator.close();
Assert.assertEquals(0, ((AppenderatorImpl) appenderator).getRowsInMemory());
} finally {
appenderator.close();
}
}
use of org.apache.druid.segment.indexing.RealtimeTuningConfig in project druid by apache.
the class RealtimePlumberSchoolTest method setUp.
@Before
public void setUp() throws Exception {
tmpDir = FileUtils.createTempDir();
ObjectMapper jsonMapper = new DefaultObjectMapper();
schema = new DataSchema("test", jsonMapper.convertValue(new StringInputRowParser(new JSONParseSpec(new TimestampSpec("timestamp", "auto", null), DimensionsSpec.EMPTY, null, null, null), null), Map.class), new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), null, jsonMapper);
schema2 = new DataSchema("test", jsonMapper.convertValue(new StringInputRowParser(new JSONParseSpec(new TimestampSpec("timestamp", "auto", null), DimensionsSpec.EMPTY, null, null, null), null), Map.class), new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.YEAR, Granularities.NONE, null), null, jsonMapper);
announcer = EasyMock.createMock(DataSegmentAnnouncer.class);
announcer.announceSegment(EasyMock.anyObject());
EasyMock.expectLastCall().anyTimes();
segmentPublisher = EasyMock.createNiceMock(SegmentPublisher.class);
dataSegmentPusher = EasyMock.createNiceMock(DataSegmentPusher.class);
handoffNotifierFactory = EasyMock.createNiceMock(SegmentHandoffNotifierFactory.class);
handoffNotifier = EasyMock.createNiceMock(SegmentHandoffNotifier.class);
EasyMock.expect(handoffNotifierFactory.createSegmentHandoffNotifier(EasyMock.anyString())).andReturn(handoffNotifier).anyTimes();
EasyMock.expect(handoffNotifier.registerSegmentHandoffCallback(EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject())).andReturn(true).anyTimes();
emitter = EasyMock.createMock(ServiceEmitter.class);
EasyMock.replay(announcer, segmentPublisher, dataSegmentPusher, handoffNotifierFactory, handoffNotifier, emitter);
tuningConfig = new RealtimeTuningConfig(null, 1, null, null, null, null, null, new IntervalStartVersioningPolicy(), rejectionPolicy, null, null, null, null, 0, 0, false, null, null, null, null);
realtimePlumberSchool = new RealtimePlumberSchool(emitter, new DefaultQueryRunnerFactoryConglomerate(new HashMap<>()), dataSegmentPusher, announcer, segmentPublisher, handoffNotifierFactory, DirectQueryProcessingPool.INSTANCE, NoopJoinableFactory.INSTANCE, TestHelper.getTestIndexMergerV9(segmentWriteOutMediumFactory), TestHelper.getTestIndexIO(), MapCache.create(0), FireDepartmentTest.NO_CACHE_CONFIG, new CachePopulatorStats(), TestHelper.makeJsonMapper());
metrics = new FireDepartmentMetrics();
plumber = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema, tuningConfig, metrics);
}
use of org.apache.druid.segment.indexing.RealtimeTuningConfig in project druid by apache.
the class SinkTest method testDedup.
@Test
public void testDedup() throws Exception {
final DataSchema schema = new DataSchema("test", new TimestampSpec(null, null, null), DimensionsSpec.EMPTY, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, null), null);
final Interval interval = Intervals.of("2013-01-01/2013-01-02");
final String version = DateTimes.nowUtc().toString();
RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, 100, null, null, new Period("P1Y"), null, null, null, null, null, null, null, null, 0, 0, null, null, null, null, "dedupColumn");
final Sink sink = new Sink(interval, schema, tuningConfig.getShardSpec(), version, tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), tuningConfig.getMaxBytesInMemoryOrDefault(), true, tuningConfig.getDedupColumn());
int rows = sink.add(new MapBasedInputRow(DateTimes.of("2013-01-01"), ImmutableList.of("field", "dedupColumn"), ImmutableMap.of("field1", "value1", "dedupColumn", "v1")), false).getRowCount();
Assert.assertTrue(rows > 0);
// dedupColumn is null
rows = sink.add(new MapBasedInputRow(DateTimes.of("2013-01-01"), ImmutableList.of("field", "dedupColumn"), ImmutableMap.of("field1", "value2")), false).getRowCount();
Assert.assertTrue(rows > 0);
// dedupColumn is null
rows = sink.add(new MapBasedInputRow(DateTimes.of("2013-01-01"), ImmutableList.of("field", "dedupColumn"), ImmutableMap.of("field1", "value3")), false).getRowCount();
Assert.assertTrue(rows > 0);
rows = sink.add(new MapBasedInputRow(DateTimes.of("2013-01-01"), ImmutableList.of("field", "dedupColumn"), ImmutableMap.of("field1", "value4", "dedupColumn", "v2")), false).getRowCount();
Assert.assertTrue(rows > 0);
rows = sink.add(new MapBasedInputRow(DateTimes.of("2013-01-01"), ImmutableList.of("field", "dedupColumn"), ImmutableMap.of("field1", "value5", "dedupColumn", "v1")), false).getRowCount();
Assert.assertTrue(rows == -2);
}
use of org.apache.druid.segment.indexing.RealtimeTuningConfig in project hive by apache.
the class DruidOutputFormat method getHiveRecordWriter.
@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
final int targetNumShardsPerGranularity = Integer.parseUnsignedInt(tableProperties.getProperty(Constants.DRUID_TARGET_SHARDS_PER_GRANULARITY, "0"));
final int maxPartitionSize = targetNumShardsPerGranularity > 0 ? -1 : HiveConf.getIntVar(jc, HiveConf.ConfVars.HIVE_DRUID_MAX_PARTITION_SIZE);
// If datasource is in the table properties, it is an INSERT/INSERT OVERWRITE as the datasource
// name was already persisted. Otherwise, it is a CT/CTAS and we need to get the name from the
// job properties that are set by configureOutputJobProperties in the DruidStorageHandler
final String dataSource = tableProperties.getProperty(Constants.DRUID_DATA_SOURCE) == null ? jc.get(Constants.DRUID_DATA_SOURCE) : tableProperties.getProperty(Constants.DRUID_DATA_SOURCE);
final String segmentDirectory = jc.get(DruidConstants.DRUID_SEGMENT_INTERMEDIATE_DIRECTORY);
final GranularitySpec granularitySpec = DruidStorageHandlerUtils.getGranularitySpec(jc, tableProperties);
final String columnNameProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMNS);
final String columnTypeProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMN_TYPES);
if (StringUtils.isEmpty(columnNameProperty) || StringUtils.isEmpty(columnTypeProperty)) {
throw new IllegalStateException(String.format("List of columns names [%s] or columns type [%s] is/are not present", columnNameProperty, columnTypeProperty));
}
ArrayList<String> columnNames = Lists.newArrayList(columnNameProperty.split(","));
if (!columnNames.contains(DruidConstants.DEFAULT_TIMESTAMP_COLUMN)) {
throw new IllegalStateException("Timestamp column (' " + DruidConstants.DEFAULT_TIMESTAMP_COLUMN + "') not specified in create table; list of columns is : " + tableProperties.getProperty(serdeConstants.LIST_COLUMNS));
}
ArrayList<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
Pair<List<DimensionSchema>, AggregatorFactory[]> dimensionsAndAggregates = DruidStorageHandlerUtils.getDimensionsAndAggregates(columnNames, columnTypes);
final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidConstants.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(dimensionsAndAggregates.lhs, Lists.newArrayList(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Constants.DRUID_SHARD_KEY_COL_NAME), null)));
Map<String, Object> inputParser = DruidStorageHandlerUtils.JSON_MAPPER.convertValue(inputRowParser, new TypeReference<Map<String, Object>>() {
});
final DataSchema dataSchema = new DataSchema(Preconditions.checkNotNull(dataSource, "Data source name is null"), inputParser, dimensionsAndAggregates.rhs, granularitySpec, null, DruidStorageHandlerUtils.JSON_MAPPER);
final String workingPath = jc.get(DruidConstants.DRUID_JOB_WORKING_DIRECTORY);
final String version = jc.get(DruidConstants.DRUID_SEGMENT_VERSION);
String basePersistDirectory = HiveConf.getVar(jc, HiveConf.ConfVars.HIVE_DRUID_BASE_PERSIST_DIRECTORY);
if (Strings.isNullOrEmpty(basePersistDirectory)) {
basePersistDirectory = System.getProperty("java.io.tmpdir");
}
Integer maxRowInMemory = HiveConf.getIntVar(jc, HiveConf.ConfVars.HIVE_DRUID_MAX_ROW_IN_MEMORY);
IndexSpec indexSpec = DruidStorageHandlerUtils.getIndexSpec(jc);
RealtimeTuningConfig realtimeTuningConfig = new RealtimeTuningConfig(maxRowInMemory, null, null, null, new File(basePersistDirectory, dataSource), new CustomVersioningPolicy(version), null, null, null, indexSpec, null, true, 0, 0, true, null, 0L, null, null);
LOG.debug(String.format("running with Data schema [%s] ", dataSchema));
return new DruidRecordWriter(dataSchema, realtimeTuningConfig, DruidStorageHandlerUtils.createSegmentPusherForDirectory(segmentDirectory, jc), maxPartitionSize, new Path(workingPath, SEGMENTS_DESCRIPTOR_DIR_NAME), finalOutPath.getFileSystem(jc));
}
Aggregations