Search in sources :

Example 56 with MapBasedInputRow

use of org.apache.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class IncrementalIndexRowTypeBenchmark method getFloatRow.

private MapBasedInputRow getFloatRow(long timestamp, int dimensionCount) {
    Random rng = ThreadLocalRandom.current();
    List<String> dimensionList = new ArrayList<String>(dimensionCount);
    ImmutableMap.Builder<String, Object> builder = ImmutableMap.builder();
    for (int i = 0; i < dimensionCount; i++) {
        String dimName = StringUtils.format("Dim_%d", i);
        dimensionList.add(dimName);
        builder.put(dimName, rng.nextFloat());
    }
    return new MapBasedInputRow(timestamp, dimensionList, builder.build());
}
Also used : Random(java.util.Random) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) ArrayList(java.util.ArrayList) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 57 with MapBasedInputRow

use of org.apache.druid.data.input.MapBasedInputRow in project hive by apache.

the class DruidRecordWriter method write.

@Override
public void write(Writable w) throws IOException {
    DruidWritable record = (DruidWritable) w;
    final long timestamp = (long) record.getValue().get(DruidConstants.DEFAULT_TIMESTAMP_COLUMN);
    final int partitionNumber = Math.toIntExact((long) record.getValue().getOrDefault(Constants.DRUID_SHARD_KEY_COL_NAME, -1L));
    final InputRow inputRow = new MapBasedInputRow(timestamp, dataSchema.getParser().getParseSpec().getDimensionsSpec().getDimensionNames(), record.getValue());
    try {
        if (partitionNumber != -1 && maxPartitionSize == -1) {
            /*
        Case data is sorted by time and an extra hashing dimension see DRUID_SHARD_KEY_COL_NAME
        Thus use DRUID_SHARD_KEY_COL_NAME as segment partition in addition to time dimension
        Data with the same DRUID_SHARD_KEY_COL_NAME and Time interval will end in the same segment
        */
            DateTime truncatedDateTime = segmentGranularity.bucketStart(DateTimes.utc(timestamp));
            final Interval interval = new Interval(truncatedDateTime, segmentGranularity.increment(truncatedDateTime));
            if (currentOpenSegment != null) {
                if (currentOpenSegment.getShardSpec().getPartitionNum() != partitionNumber || !currentOpenSegment.getInterval().equals(interval)) {
                    pushSegments(ImmutableList.of(currentOpenSegment));
                    currentOpenSegment = new SegmentIdWithShardSpec(dataSchema.getDataSource(), interval, tuningConfig.getVersioningPolicy().getVersion(interval), new LinearShardSpec(partitionNumber));
                }
            } else {
                currentOpenSegment = new SegmentIdWithShardSpec(dataSchema.getDataSource(), interval, tuningConfig.getVersioningPolicy().getVersion(interval), new LinearShardSpec(partitionNumber));
            }
            appenderator.add(currentOpenSegment, inputRow, committerSupplier::get);
        } else if (partitionNumber == -1 && maxPartitionSize != -1) {
            /*Case we are partitioning the segments based on time and max row per segment maxPartitionSize*/
            appenderator.add(getSegmentIdentifierAndMaybePush(timestamp), inputRow, committerSupplier::get);
        } else {
            throw new IllegalArgumentException(String.format("partitionNumber and maxPartitionSize should be mutually exclusive " + "got partitionNum [%s] and maxPartitionSize [%s]", partitionNumber, maxPartitionSize));
        }
    } catch (SegmentNotWritableException e) {
        throw new IOException(e);
    }
}
Also used : SegmentNotWritableException(org.apache.druid.segment.realtime.appenderator.SegmentNotWritableException) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) IOException(java.io.IOException) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) DateTime(org.joda.time.DateTime) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) Interval(org.joda.time.Interval)

Example 58 with MapBasedInputRow

use of org.apache.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class IncrementalIndexRowTypeBenchmark method getLongRow.

private MapBasedInputRow getLongRow(long timestamp, int dimensionCount) {
    Random rng = ThreadLocalRandom.current();
    List<String> dimensionList = new ArrayList<String>(dimensionCount);
    ImmutableMap.Builder<String, Object> builder = ImmutableMap.builder();
    for (int i = 0; i < dimensionCount; i++) {
        String dimName = StringUtils.format("Dim_%d", i);
        dimensionList.add(dimName);
        builder.put(dimName, rng.nextLong());
    }
    return new MapBasedInputRow(timestamp, dimensionList, builder.build());
}
Also used : Random(java.util.Random) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) ArrayList(java.util.ArrayList) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 59 with MapBasedInputRow

use of org.apache.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class IncrementalIndexRowTypeBenchmark method getStringRow.

private MapBasedInputRow getStringRow(long timestamp, int dimensionCount) {
    Random rng = ThreadLocalRandom.current();
    List<String> dimensionList = new ArrayList<String>(dimensionCount);
    ImmutableMap.Builder<String, Object> builder = ImmutableMap.builder();
    for (int i = 0; i < dimensionCount; i++) {
        String dimName = StringUtils.format("Dim_%d", i);
        dimensionList.add(dimName);
        builder.put(dimName, String.valueOf(rng.nextLong()));
    }
    return new MapBasedInputRow(timestamp, dimensionList, builder.build());
}
Also used : Random(java.util.Random) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) ArrayList(java.util.ArrayList) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 60 with MapBasedInputRow

use of org.apache.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class HashBasedNumberedShardSpecTest method testExtractKeys.

@Test
public void testExtractKeys() {
    final List<String> partitionDimensions1 = ImmutableList.of("visitor_id");
    final DateTime time = DateTimes.nowUtc();
    final InputRow inputRow = new MapBasedInputRow(time, ImmutableList.of("visitor_id", "cnt"), ImmutableMap.of("visitor_id", "v1", "cnt", 10));
    Assert.assertEquals(ImmutableList.of(Collections.singletonList("v1")), new HashPartitioner(objectMapper, HashPartitionFunction.MURMUR3_32_ABS, partitionDimensions1, // not used
    0).extractKeys(time.getMillis(), inputRow));
    Assert.assertEquals(ImmutableList.of(time.getMillis(), ImmutableMap.of("cnt", Collections.singletonList(10), "visitor_id", Collections.singletonList("v1"))).toString(), // empty list when partitionDimensions is null
    new HashPartitioner(objectMapper, HashPartitionFunction.MURMUR3_32_ABS, ImmutableList.of(), // not used
    0).extractKeys(time.getMillis(), inputRow).toString());
}
Also used : MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) DateTime(org.joda.time.DateTime) Test(org.junit.Test)

Aggregations

MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)114 Test (org.junit.Test)77 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)46 IncrementalIndex (org.apache.druid.segment.incremental.IncrementalIndex)42 OnheapIncrementalIndex (org.apache.druid.segment.incremental.OnheapIncrementalIndex)38 InputRow (org.apache.druid.data.input.InputRow)31 File (java.io.File)24 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)21 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)20 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)19 ArrayList (java.util.ArrayList)17 HashMap (java.util.HashMap)15 DateTime (org.joda.time.DateTime)15 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)14 IncrementalIndexTest (org.apache.druid.segment.data.IncrementalIndexTest)14 Interval (org.joda.time.Interval)14 IOException (java.io.IOException)13 DoubleDimensionSchema (org.apache.druid.data.input.impl.DoubleDimensionSchema)13 IncrementalIndexSchema (org.apache.druid.segment.incremental.IncrementalIndexSchema)12 ImmutableMap (com.google.common.collect.ImmutableMap)11