Search in sources :

Example 91 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class ShardSpecsTest method testShardSpecSelectionWithNullPartitionDimension.

@Test
public void testShardSpecSelectionWithNullPartitionDimension() {
    HashBucketShardSpec spec1 = new HashBucketShardSpec(0, 2, null, HashPartitionFunction.MURMUR3_32_ABS, jsonMapper);
    HashBucketShardSpec spec2 = new HashBucketShardSpec(1, 2, null, HashPartitionFunction.MURMUR3_32_ABS, jsonMapper);
    Map<Interval, List<BucketNumberedShardSpec<?>>> shardSpecMap = new HashMap<>();
    shardSpecMap.put(Intervals.of("2014-01-01T00:00:00.000Z/2014-01-02T00:00:00.000Z"), ImmutableList.of(spec1, spec2));
    ShardSpecs shardSpecs = new ShardSpecs(shardSpecMap, Granularities.HOUR);
    String visitorId = "visitorId";
    String clientType = "clientType";
    long timestamp1 = DateTimes.of("2014-01-01T00:00:00.000Z").getMillis();
    InputRow row1 = new MapBasedInputRow(timestamp1, Lists.newArrayList(visitorId, clientType), ImmutableMap.of(visitorId, "0", clientType, "iphone"));
    long timestamp2 = DateTimes.of("2014-01-01T00:30:20.456Z").getMillis();
    InputRow row2 = new MapBasedInputRow(timestamp2, Lists.newArrayList(visitorId, clientType), ImmutableMap.of(visitorId, "0", clientType, "iphone"));
    long timestamp3 = DateTimes.of("2014-01-01T10:10:20.456Z").getMillis();
    InputRow row3 = new MapBasedInputRow(timestamp3, Lists.newArrayList(visitorId, clientType), ImmutableMap.of(visitorId, "0", clientType, "iphone"));
    ShardSpec spec3 = shardSpecs.getShardSpec(Intervals.of("2014-01-01T00:00:00.000Z/2014-01-02T00:00:00.000Z"), row1);
    ShardSpec spec4 = shardSpecs.getShardSpec(Intervals.of("2014-01-01T00:00:00.000Z/2014-01-02T00:00:00.000Z"), row2);
    ShardSpec spec5 = shardSpecs.getShardSpec(Intervals.of("2014-01-01T00:00:00.000Z/2014-01-02T00:00:00.000Z"), row3);
    Assert.assertSame(true, spec3 == spec4);
    Assert.assertSame(false, spec3 == spec5);
}
Also used : HashMap(java.util.HashMap) HashBucketShardSpec(org.apache.druid.timeline.partition.HashBucketShardSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) HashBucketShardSpec(org.apache.druid.timeline.partition.HashBucketShardSpec) BucketNumberedShardSpec(org.apache.druid.timeline.partition.BucketNumberedShardSpec) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 92 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class IngestSegmentFirehoseFactoryTest method testTransformSpec.

@Test
public void testTransformSpec() throws IOException {
    Assert.assertEquals(MAX_SHARD_NUMBER.longValue(), SEGMENT_SET.size());
    Integer rowcount = 0;
    final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter(ColumnHolder.TIME_COLUMN_NAME, "1", null), ImmutableList.of(new ExpressionTransform(METRIC_FLOAT_NAME, METRIC_FLOAT_NAME + " * 10", ExprMacroTable.nil())));
    int skipped = 0;
    try (final Firehose firehose = factory.connect(transformSpec.decorate(rowParser), TMP_DIR)) {
        while (firehose.hasMore()) {
            InputRow row = firehose.nextRow();
            if (row == null) {
                skipped++;
                continue;
            }
            Assert.assertArrayEquals(new String[] { DIM_NAME }, row.getDimensions().toArray());
            Assert.assertArrayEquals(new String[] { DIM_VALUE }, row.getDimension(DIM_NAME).toArray());
            Assert.assertEquals(METRIC_LONG_VALUE.longValue(), row.getMetric(METRIC_LONG_NAME).longValue());
            Assert.assertEquals(METRIC_FLOAT_VALUE * 10, row.getMetric(METRIC_FLOAT_NAME).floatValue(), METRIC_FLOAT_VALUE * 0.0001);
            ++rowcount;
        }
    }
    Assert.assertEquals(90, skipped);
    Assert.assertEquals((int) MAX_ROWS, (int) rowcount);
}
Also used : SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) Firehose(org.apache.druid.data.input.Firehose) InputRow(org.apache.druid.data.input.InputRow) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Test(org.junit.Test)

Example 93 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class RecordSupplierInputSourceTest method testRead.

@Test
public void testRead() throws IOException {
    final RandomCsvSupplier supplier = new RandomCsvSupplier();
    final InputSource inputSource = new RecordSupplierInputSource<>("topic", supplier, false);
    final List<String> colNames = IntStream.range(0, NUM_COLS).mapToObj(i -> StringUtils.format("col_%d", i)).collect(Collectors.toList());
    final InputFormat inputFormat = new CsvInputFormat(colNames, null, null, false, 0);
    final InputSourceReader reader = inputSource.reader(new InputRowSchema(new TimestampSpec("col_0", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(colNames.subList(1, colNames.size()))), ColumnsFilter.all()), inputFormat, temporaryFolder.newFolder());
    int read = 0;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        for (; read < NUM_ROWS && iterator.hasNext(); read++) {
            final InputRow inputRow = iterator.next();
            Assert.assertEquals(DateTimes.of(TIMESTAMP_STRING), inputRow.getTimestamp());
            Assert.assertEquals(NUM_COLS - 1, inputRow.getDimensions().size());
        }
    }
    Assert.assertEquals(NUM_ROWS, read);
    Assert.assertTrue(supplier.isClosed());
}
Also used : IntStream(java.util.stream.IntStream) StreamPartition(org.apache.druid.indexing.seekablestream.common.StreamPartition) RecordSupplier(org.apache.druid.indexing.seekablestream.common.RecordSupplier) OrderedPartitionableRecord(org.apache.druid.indexing.seekablestream.common.OrderedPartitionableRecord) Random(java.util.Random) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) ArrayList(java.util.ArrayList) InputSource(org.apache.druid.data.input.InputSource) Map(java.util.Map) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) InputSourceReader(org.apache.druid.data.input.InputSourceReader) CloseableIterator(org.apache.druid.java.util.common.parsers.CloseableIterator) Nullable(javax.annotation.Nullable) DateTimes(org.apache.druid.java.util.common.DateTimes) ColumnsFilter(org.apache.druid.data.input.ColumnsFilter) InputFormat(org.apache.druid.data.input.InputFormat) Collection(java.util.Collection) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) IOException(java.io.IOException) NotNull(javax.validation.constraints.NotNull) Maps(com.google.common.collect.Maps) Collectors(java.util.stream.Collectors) InputRow(org.apache.druid.data.input.InputRow) List(java.util.List) Rule(org.junit.Rule) ByteEntity(org.apache.druid.data.input.impl.ByteEntity) RandomStringUtils(org.apache.commons.lang3.RandomStringUtils) Assert(org.junit.Assert) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) InputSource(org.apache.druid.data.input.InputSource) InputSourceReader(org.apache.druid.data.input.InputSourceReader) InputFormat(org.apache.druid.data.input.InputFormat) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 94 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class StreamChunkParserTest method parseAndAssertResult.

private void parseAndAssertResult(StreamChunkParser<ByteEntity> chunkParser) throws IOException {
    final String json = "{\"timestamp\": \"2020-01-01\", \"dim\": \"val\", \"met\": \"val2\"}";
    List<InputRow> parsedRows = chunkParser.parse(Collections.singletonList(new ByteEntity(json.getBytes(StringUtils.UTF8_STRING))), false);
    Assert.assertEquals(1, parsedRows.size());
    InputRow row = parsedRows.get(0);
    Assert.assertEquals(DateTimes.of("2020-01-01"), row.getTimestamp());
    Assert.assertEquals("val", Iterables.getOnlyElement(row.getDimension("dim")));
    Assert.assertEquals("val2", Iterables.getOnlyElement(row.getDimension("met")));
}
Also used : ByteEntity(org.apache.druid.data.input.impl.ByteEntity) InputRow(org.apache.druid.data.input.InputRow)

Example 95 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class StreamChunkParserTest method parseEmptyEndOfShard.

@Test
public void parseEmptyEndOfShard() throws IOException {
    final TrackingJsonInputFormat inputFormat = new TrackingJsonInputFormat(JSONPathSpec.DEFAULT, Collections.emptyMap());
    RowIngestionMeters mockRowIngestionMeters = Mockito.mock(RowIngestionMeters.class);
    final StreamChunkParser<ByteEntity> chunkParser = new StreamChunkParser<>(null, inputFormat, new InputRowSchema(TIMESTAMP_SPEC, DimensionsSpec.EMPTY, ColumnsFilter.all()), TransformSpec.NONE, temporaryFolder.newFolder(), row -> true, mockRowIngestionMeters, parseExceptionHandler);
    List<InputRow> parsedRows = chunkParser.parse(ImmutableList.of(), true);
    Assert.assertEquals(0, parsedRows.size());
    Mockito.verifyNoInteractions(mockRowIngestionMeters);
}
Also used : ByteEntity(org.apache.druid.data.input.impl.ByteEntity) InputRow(org.apache.druid.data.input.InputRow) InputRowSchema(org.apache.druid.data.input.InputRowSchema) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) NoopRowIngestionMeters(org.apache.druid.segment.incremental.NoopRowIngestionMeters) Test(org.junit.Test)

Aggregations

InputRow (org.apache.druid.data.input.InputRow)266 Test (org.junit.Test)193 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)57 InputEntityReader (org.apache.druid.data.input.InputEntityReader)54 InputRowSchema (org.apache.druid.data.input.InputRowSchema)52 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)52 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)49 ArrayList (java.util.ArrayList)46 List (java.util.List)37 ImmutableList (com.google.common.collect.ImmutableList)33 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)33 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)33 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)29 File (java.io.File)27 HadoopDruidIndexerConfig (org.apache.druid.indexer.HadoopDruidIndexerConfig)27 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)27 DateTime (org.joda.time.DateTime)24 Map (java.util.Map)23 IOException (java.io.IOException)18 Interval (org.joda.time.Interval)18