use of org.apache.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class IncrementalIndexRowTypeBenchmark method getFloatRow.
private MapBasedInputRow getFloatRow(long timestamp, int dimensionCount) {
Random rng = ThreadLocalRandom.current();
List<String> dimensionList = new ArrayList<String>(dimensionCount);
ImmutableMap.Builder<String, Object> builder = ImmutableMap.builder();
for (int i = 0; i < dimensionCount; i++) {
String dimName = StringUtils.format("Dim_%d", i);
dimensionList.add(dimName);
builder.put(dimName, rng.nextFloat());
}
return new MapBasedInputRow(timestamp, dimensionList, builder.build());
}
use of org.apache.druid.data.input.MapBasedInputRow in project hive by apache.
the class DruidRecordWriter method write.
@Override
public void write(Writable w) throws IOException {
DruidWritable record = (DruidWritable) w;
final long timestamp = (long) record.getValue().get(DruidConstants.DEFAULT_TIMESTAMP_COLUMN);
final int partitionNumber = Math.toIntExact((long) record.getValue().getOrDefault(Constants.DRUID_SHARD_KEY_COL_NAME, -1L));
final InputRow inputRow = new MapBasedInputRow(timestamp, dataSchema.getParser().getParseSpec().getDimensionsSpec().getDimensionNames(), record.getValue());
try {
if (partitionNumber != -1 && maxPartitionSize == -1) {
/*
Case data is sorted by time and an extra hashing dimension see DRUID_SHARD_KEY_COL_NAME
Thus use DRUID_SHARD_KEY_COL_NAME as segment partition in addition to time dimension
Data with the same DRUID_SHARD_KEY_COL_NAME and Time interval will end in the same segment
*/
DateTime truncatedDateTime = segmentGranularity.bucketStart(DateTimes.utc(timestamp));
final Interval interval = new Interval(truncatedDateTime, segmentGranularity.increment(truncatedDateTime));
if (currentOpenSegment != null) {
if (currentOpenSegment.getShardSpec().getPartitionNum() != partitionNumber || !currentOpenSegment.getInterval().equals(interval)) {
pushSegments(ImmutableList.of(currentOpenSegment));
currentOpenSegment = new SegmentIdWithShardSpec(dataSchema.getDataSource(), interval, tuningConfig.getVersioningPolicy().getVersion(interval), new LinearShardSpec(partitionNumber));
}
} else {
currentOpenSegment = new SegmentIdWithShardSpec(dataSchema.getDataSource(), interval, tuningConfig.getVersioningPolicy().getVersion(interval), new LinearShardSpec(partitionNumber));
}
appenderator.add(currentOpenSegment, inputRow, committerSupplier::get);
} else if (partitionNumber == -1 && maxPartitionSize != -1) {
/*Case we are partitioning the segments based on time and max row per segment maxPartitionSize*/
appenderator.add(getSegmentIdentifierAndMaybePush(timestamp), inputRow, committerSupplier::get);
} else {
throw new IllegalArgumentException(String.format("partitionNumber and maxPartitionSize should be mutually exclusive " + "got partitionNum [%s] and maxPartitionSize [%s]", partitionNumber, maxPartitionSize));
}
} catch (SegmentNotWritableException e) {
throw new IOException(e);
}
}
use of org.apache.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class IncrementalIndexRowTypeBenchmark method getLongRow.
private MapBasedInputRow getLongRow(long timestamp, int dimensionCount) {
Random rng = ThreadLocalRandom.current();
List<String> dimensionList = new ArrayList<String>(dimensionCount);
ImmutableMap.Builder<String, Object> builder = ImmutableMap.builder();
for (int i = 0; i < dimensionCount; i++) {
String dimName = StringUtils.format("Dim_%d", i);
dimensionList.add(dimName);
builder.put(dimName, rng.nextLong());
}
return new MapBasedInputRow(timestamp, dimensionList, builder.build());
}
use of org.apache.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class IncrementalIndexRowTypeBenchmark method getStringRow.
private MapBasedInputRow getStringRow(long timestamp, int dimensionCount) {
Random rng = ThreadLocalRandom.current();
List<String> dimensionList = new ArrayList<String>(dimensionCount);
ImmutableMap.Builder<String, Object> builder = ImmutableMap.builder();
for (int i = 0; i < dimensionCount; i++) {
String dimName = StringUtils.format("Dim_%d", i);
dimensionList.add(dimName);
builder.put(dimName, String.valueOf(rng.nextLong()));
}
return new MapBasedInputRow(timestamp, dimensionList, builder.build());
}
use of org.apache.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class HashBasedNumberedShardSpecTest method testExtractKeys.
@Test
public void testExtractKeys() {
final List<String> partitionDimensions1 = ImmutableList.of("visitor_id");
final DateTime time = DateTimes.nowUtc();
final InputRow inputRow = new MapBasedInputRow(time, ImmutableList.of("visitor_id", "cnt"), ImmutableMap.of("visitor_id", "v1", "cnt", 10));
Assert.assertEquals(ImmutableList.of(Collections.singletonList("v1")), new HashPartitioner(objectMapper, HashPartitionFunction.MURMUR3_32_ABS, partitionDimensions1, // not used
0).extractKeys(time.getMillis(), inputRow));
Assert.assertEquals(ImmutableList.of(time.getMillis(), ImmutableMap.of("cnt", Collections.singletonList(10), "visitor_id", Collections.singletonList("v1"))).toString(), // empty list when partitionDimensions is null
new HashPartitioner(objectMapper, HashPartitionFunction.MURMUR3_32_ABS, ImmutableList.of(), // not used
0).extractKeys(time.getMillis(), inputRow).toString());
}
Aggregations