use of org.apache.druid.data.input.InputRow in project druid by druid-io.
the class SegmentGenerator method generateIncrementalIndex.
public IncrementalIndex generateIncrementalIndex(final DataSegment dataSegment, final GeneratorSchemaInfo schemaInfo, final Granularity granularity, final int numRows) {
// In case we need to generate hyperUniques.
ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());
final String dataHash = Hashing.sha256().newHasher().putString(dataSegment.getId().toString(), StandardCharsets.UTF_8).putString(schemaInfo.toString(), StandardCharsets.UTF_8).putString(granularity.toString(), StandardCharsets.UTF_8).putInt(numRows).hash().toString();
final DataGenerator dataGenerator = new DataGenerator(schemaInfo.getColumnSchemas(), dataSegment.getId().hashCode(), /* Use segment identifier hashCode as seed */
schemaInfo.getDataInterval(), numRows);
final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(schemaInfo.getDimensionsSpec()).withMetrics(schemaInfo.getAggsArray()).withRollup(schemaInfo.isWithRollup()).withQueryGranularity(granularity).build();
final List<InputRow> rows = new ArrayList<>();
for (int i = 0; i < numRows; i++) {
final InputRow row = dataGenerator.nextRow();
rows.add(row);
if ((i + 1) % 20000 == 0) {
log.info("%,d/%,d rows generated for[%s].", i + 1, numRows, dataSegment);
}
}
log.info("%,d/%,d rows generated for[%s].", numRows, numRows, dataSegment);
return makeIncrementalIndex(dataSegment.getId(), dataHash, 0, rows, indexSchema);
}
use of org.apache.druid.data.input.InputRow in project druid by druid-io.
the class SqlFirehoseTest method testFirehoseStringParser.
@Test
public void testFirehoseStringParser() throws Exception {
final TestCloseable closeable = new TestCloseable();
List<Object> expectedResults = new ArrayList<>();
for (Map<String, Object> map : inputs) {
expectedResults.add(map.get("x"));
}
final List<JsonIterator<Map<String, Object>>> lineIterators = fileList.stream().map(s -> new JsonIterator<Map<String, Object>>(TYPE_REF, s, closeable, objectMapper)).collect(Collectors.toList());
final InputRowParser stringParser = TransformSpec.NONE.decorate(new StringInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("x")))), Charset.defaultCharset().name()));
try (final SqlFirehose firehose = new SqlFirehose(lineIterators.iterator(), stringParser, closeable)) {
final List<Object> results = new ArrayList<>();
while (firehose.hasMore()) {
final InputRow inputRow = firehose.nextRow();
if (inputRow == null) {
results.add(null);
} else {
results.add(inputRow.getDimension("x").get(0));
}
}
Assert.assertEquals(expectedResults, results);
}
}
use of org.apache.druid.data.input.InputRow in project druid by druid-io.
the class SqlFirehoseTest method testFirehose.
@Test
public void testFirehose() throws Exception {
final TestCloseable closeable = new TestCloseable();
List<Object> expectedResults = new ArrayList<>();
for (Map<String, Object> map : inputs) {
expectedResults.add(map.get("x"));
}
final List<JsonIterator<Map<String, Object>>> lineIterators = fileList.stream().map(s -> new JsonIterator<Map<String, Object>>(TYPE_REF, s, closeable, objectMapper)).collect(Collectors.toList());
try (final SqlFirehose firehose = new SqlFirehose(lineIterators.iterator(), parser, closeable)) {
final List<Object> results = new ArrayList<>();
while (firehose.hasMore()) {
final InputRow inputRow = firehose.nextRow();
if (inputRow == null) {
results.add(null);
} else {
results.add(inputRow.getDimension("x").get(0));
}
}
Assert.assertEquals(expectedResults, results);
}
}
use of org.apache.druid.data.input.InputRow in project druid by druid-io.
the class SinkTest method testSwap.
@Test
public void testSwap() throws Exception {
final DataSchema schema = new DataSchema("test", new TimestampSpec(null, null, null), DimensionsSpec.EMPTY, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, null), null);
final Interval interval = Intervals.of("2013-01-01/2013-01-02");
final String version = DateTimes.nowUtc().toString();
RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, 100, null, null, new Period("P1Y"), null, null, null, null, null, null, null, null, 0, 0, null, null, null, null, null);
final Sink sink = new Sink(interval, schema, tuningConfig.getShardSpec(), version, tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), tuningConfig.getMaxBytesInMemoryOrDefault(), true, tuningConfig.getDedupColumn());
sink.add(new InputRow() {
@Override
public List<String> getDimensions() {
return new ArrayList<>();
}
@Override
public long getTimestampFromEpoch() {
return DateTimes.of("2013-01-01").getMillis();
}
@Override
public DateTime getTimestamp() {
return DateTimes.of("2013-01-01");
}
@Override
public List<String> getDimension(String dimension) {
return new ArrayList<>();
}
@Override
public Number getMetric(String metric) {
return 0;
}
@Override
public Object getRaw(String dimension) {
return null;
}
@Override
public int compareTo(Row o) {
return 0;
}
}, false);
FireHydrant currHydrant = sink.getCurrHydrant();
Assert.assertEquals(Intervals.of("2013-01-01/PT1M"), currHydrant.getIndex().getInterval());
FireHydrant swapHydrant = sink.swap();
sink.add(new InputRow() {
@Override
public List<String> getDimensions() {
return new ArrayList<>();
}
@Override
public long getTimestampFromEpoch() {
return DateTimes.of("2013-01-01").getMillis();
}
@Override
public DateTime getTimestamp() {
return DateTimes.of("2013-01-01");
}
@Override
public List<String> getDimension(String dimension) {
return new ArrayList<>();
}
@Override
public Number getMetric(String metric) {
return 0;
}
@Override
public Object getRaw(String dimension) {
return null;
}
@Override
public int compareTo(Row o) {
return 0;
}
}, false);
Assert.assertEquals(currHydrant, swapHydrant);
Assert.assertNotSame(currHydrant, sink.getCurrHydrant());
Assert.assertEquals(Intervals.of("2013-01-01/PT1M"), sink.getCurrHydrant().getIndex().getInterval());
Assert.assertEquals(2, Iterators.size(sink.iterator()));
}
use of org.apache.druid.data.input.InputRow in project druid by druid-io.
the class OpenAndClosedSegmentsBatchAppenderatorDriverTest method testIncrementalPush.
@Test(timeout = 5000L)
public void testIncrementalPush() throws Exception {
Assert.assertNull(driver.startJob(null));
int i = 0;
for (InputRow row : ROWS) {
Assert.assertTrue(driver.add(row, "dummy").isOk());
checkSegmentStates(1, SegmentState.APPENDING);
checkSegmentStates(i, SegmentState.PUSHED_AND_DROPPED);
driver.pushAllAndClear(TIMEOUT);
checkSegmentStates(0, SegmentState.APPENDING);
checkSegmentStates(++i, SegmentState.PUSHED_AND_DROPPED);
}
final SegmentsAndCommitMetadata published = driver.publishAll(null, null, makeOkPublisher(), Function.identity()).get(TIMEOUT, TimeUnit.MILLISECONDS);
Assert.assertEquals(ImmutableSet.of(new SegmentIdWithShardSpec(DATA_SOURCE, Intervals.of("2000/PT1H"), VERSION, new NumberedShardSpec(0, 0)), new SegmentIdWithShardSpec(DATA_SOURCE, Intervals.of("2000T01/PT1H"), VERSION, new NumberedShardSpec(0, 0)), new SegmentIdWithShardSpec(DATA_SOURCE, Intervals.of("2000T01/PT1H"), VERSION, new NumberedShardSpec(1, 0))), published.getSegments().stream().map(SegmentIdWithShardSpec::fromDataSegment).collect(Collectors.toSet()));
Assert.assertNull(published.getCommitMetadata());
}
Aggregations