use of org.apache.druid.segment.indexing.DataSchema in project druid by druid-io.
the class SinkTest method testSwap.
@Test
public void testSwap() throws Exception {
final DataSchema schema = new DataSchema("test", new TimestampSpec(null, null, null), DimensionsSpec.EMPTY, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, null), null);
final Interval interval = Intervals.of("2013-01-01/2013-01-02");
final String version = DateTimes.nowUtc().toString();
RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, 100, null, null, new Period("P1Y"), null, null, null, null, null, null, null, null, 0, 0, null, null, null, null, null);
final Sink sink = new Sink(interval, schema, tuningConfig.getShardSpec(), version, tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), tuningConfig.getMaxBytesInMemoryOrDefault(), true, tuningConfig.getDedupColumn());
sink.add(new InputRow() {
@Override
public List<String> getDimensions() {
return new ArrayList<>();
}
@Override
public long getTimestampFromEpoch() {
return DateTimes.of("2013-01-01").getMillis();
}
@Override
public DateTime getTimestamp() {
return DateTimes.of("2013-01-01");
}
@Override
public List<String> getDimension(String dimension) {
return new ArrayList<>();
}
@Override
public Number getMetric(String metric) {
return 0;
}
@Override
public Object getRaw(String dimension) {
return null;
}
@Override
public int compareTo(Row o) {
return 0;
}
}, false);
FireHydrant currHydrant = sink.getCurrHydrant();
Assert.assertEquals(Intervals.of("2013-01-01/PT1M"), currHydrant.getIndex().getInterval());
FireHydrant swapHydrant = sink.swap();
sink.add(new InputRow() {
@Override
public List<String> getDimensions() {
return new ArrayList<>();
}
@Override
public long getTimestampFromEpoch() {
return DateTimes.of("2013-01-01").getMillis();
}
@Override
public DateTime getTimestamp() {
return DateTimes.of("2013-01-01");
}
@Override
public List<String> getDimension(String dimension) {
return new ArrayList<>();
}
@Override
public Number getMetric(String metric) {
return 0;
}
@Override
public Object getRaw(String dimension) {
return null;
}
@Override
public int compareTo(Row o) {
return 0;
}
}, false);
Assert.assertEquals(currHydrant, swapHydrant);
Assert.assertNotSame(currHydrant, sink.getCurrHydrant());
Assert.assertEquals(Intervals.of("2013-01-01/PT1M"), sink.getCurrHydrant().getIndex().getInterval());
Assert.assertEquals(2, Iterators.size(sink.iterator()));
}
use of org.apache.druid.segment.indexing.DataSchema in project druid by druid-io.
the class SinkTest method testDedup.
@Test
public void testDedup() throws Exception {
final DataSchema schema = new DataSchema("test", new TimestampSpec(null, null, null), DimensionsSpec.EMPTY, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, null), null);
final Interval interval = Intervals.of("2013-01-01/2013-01-02");
final String version = DateTimes.nowUtc().toString();
RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, 100, null, null, new Period("P1Y"), null, null, null, null, null, null, null, null, 0, 0, null, null, null, null, "dedupColumn");
final Sink sink = new Sink(interval, schema, tuningConfig.getShardSpec(), version, tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), tuningConfig.getMaxBytesInMemoryOrDefault(), true, tuningConfig.getDedupColumn());
int rows = sink.add(new MapBasedInputRow(DateTimes.of("2013-01-01"), ImmutableList.of("field", "dedupColumn"), ImmutableMap.of("field1", "value1", "dedupColumn", "v1")), false).getRowCount();
Assert.assertTrue(rows > 0);
// dedupColumn is null
rows = sink.add(new MapBasedInputRow(DateTimes.of("2013-01-01"), ImmutableList.of("field", "dedupColumn"), ImmutableMap.of("field1", "value2")), false).getRowCount();
Assert.assertTrue(rows > 0);
// dedupColumn is null
rows = sink.add(new MapBasedInputRow(DateTimes.of("2013-01-01"), ImmutableList.of("field", "dedupColumn"), ImmutableMap.of("field1", "value3")), false).getRowCount();
Assert.assertTrue(rows > 0);
rows = sink.add(new MapBasedInputRow(DateTimes.of("2013-01-01"), ImmutableList.of("field", "dedupColumn"), ImmutableMap.of("field1", "value4", "dedupColumn", "v2")), false).getRowCount();
Assert.assertTrue(rows > 0);
rows = sink.add(new MapBasedInputRow(DateTimes.of("2013-01-01"), ImmutableList.of("field", "dedupColumn"), ImmutableMap.of("field1", "value5", "dedupColumn", "v1")), false).getRowCount();
Assert.assertTrue(rows == -2);
}
use of org.apache.druid.segment.indexing.DataSchema in project druid by druid-io.
the class UnifiedIndexerAppenderatorsManagerTest method setup.
@Before
public void setup() {
appenderatorConfig = EasyMock.createMock(AppenderatorConfig.class);
EasyMock.expect(appenderatorConfig.getMaxPendingPersists()).andReturn(0);
EasyMock.expect(appenderatorConfig.isSkipBytesInMemoryOverheadCheck()).andReturn(false);
EasyMock.replay(appenderatorConfig);
appenderator = manager.createClosedSegmentsOfflineAppenderatorForTask("taskId", new DataSchema("myDataSource", new TimestampSpec("__time", "millis", null), null, null, new UniformGranularitySpec(Granularities.HOUR, Granularities.HOUR, false, Collections.emptyList()), null), appenderatorConfig, new FireDepartmentMetrics(), new NoopDataSegmentPusher(), TestHelper.makeJsonMapper(), TestHelper.getTestIndexIO(), TestHelper.getTestIndexMergerV9(OnHeapMemorySegmentWriteOutMediumFactory.instance()), new NoopRowIngestionMeters(), new ParseExceptionHandler(new NoopRowIngestionMeters(), false, 0, 0), true);
}
use of org.apache.druid.segment.indexing.DataSchema in project druid by druid-io.
the class FireDepartmentTest method testSerde.
@Test
public void testSerde() throws Exception {
ObjectMapper jsonMapper = new DefaultObjectMapper();
jsonMapper.setInjectableValues(new InjectableValues.Std().addValue(ObjectMapper.class, jsonMapper));
FireDepartment schema = new FireDepartment(new DataSchema("foo", jsonMapper.convertValue(new StringInputRowParser(new JSONParseSpec(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("dim1", "dim2"))), null, null, null), null), Map.class), new AggregatorFactory[] { new CountAggregatorFactory("count") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, null), null, jsonMapper), new RealtimeIOConfig(null, new RealtimePlumberSchool(null, null, null, null, null, null, null, NoopJoinableFactory.INSTANCE, TestHelper.getTestIndexMergerV9(OffHeapMemorySegmentWriteOutMediumFactory.instance()), TestHelper.getTestIndexIO(), MapCache.create(0), NO_CACHE_CONFIG, new CachePopulatorStats(), TestHelper.makeJsonMapper())), RealtimeTuningConfig.makeDefaultTuningConfig(new File("/tmp/nonexistent")));
String json = jsonMapper.writeValueAsString(schema);
FireDepartment newSchema = jsonMapper.readValue(json, FireDepartment.class);
Assert.assertEquals(schema.getDataSchema().getDataSource(), newSchema.getDataSchema().getDataSource());
Assert.assertEquals("/tmp/nonexistent", schema.getTuningConfig().getBasePersistDirectory().toString());
}
use of org.apache.druid.segment.indexing.DataSchema in project druid by druid-io.
the class IndexGeneratorJobTest method setUp.
@Before
public void setUp() throws Exception {
mapper = HadoopDruidIndexerConfig.JSON_MAPPER;
mapper.registerSubtypes(new NamedType(HashBasedNumberedShardSpec.class, "hashed"));
mapper.registerSubtypes(new NamedType(SingleDimensionShardSpec.class, "single"));
dataFile = temporaryFolder.newFile();
tmpDir = temporaryFolder.newFolder();
HashMap<String, Object> inputSpec = new HashMap<String, Object>();
inputSpec.put("paths", dataFile.getCanonicalPath());
inputSpec.put("type", "static");
if (inputFormatName != null) {
inputSpec.put("inputFormat", inputFormatName);
}
if (SequenceFileInputFormat.class.getName().equals(inputFormatName)) {
writeDataToLocalSequenceFile(dataFile, data);
} else {
FileUtils.writeLines(dataFile, data);
}
config = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema(datasourceName, mapper.convertValue(inputRowParser, Map.class), aggs, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(this.interval)), null, mapper), new HadoopIOConfig(ImmutableMap.copyOf(inputSpec), null, tmpDir.getCanonicalPath()), new HadoopTuningConfig(tmpDir.getCanonicalPath(), null, null, null, null, null, null, maxRowsInMemory, maxBytesInMemory, true, false, false, false, // verifies that set num reducers is ignored
ImmutableMap.of(MRJobConfig.NUM_REDUCES, "0"), false, useCombiner, null, null, forceExtendableShardSpecs, false, null, null, null, null, null)));
config.setShardSpecs(loadShardSpecs(partitionType, shardInfoForEachSegment));
config = HadoopDruidIndexerConfig.fromSpec(config.getSchema());
}
Aggregations