Search in sources :

Example 76 with InputRow

use of io.druid.data.input.InputRow in project druid by druid-io.

the class ThriftInputRowParserTest method serializationAndTest.

public void serializationAndTest(ThriftInputRowParser parser, byte[] bytes) throws TException {
    ByteBuffer buffer = ByteBuffer.wrap(bytes);
    InputRow row1 = parser.parse(buffer);
    assertTrue(row1.getDimension("title").get(0).equals("title"));
    InputRow row2 = parser.parse(new BytesWritable(bytes));
    assertTrue(row2.getDimension("lastName").get(0).equals("last"));
}
Also used : InputRow(io.druid.data.input.InputRow) BytesWritable(org.apache.hadoop.io.BytesWritable) ByteBuffer(java.nio.ByteBuffer)

Example 77 with InputRow

use of io.druid.data.input.InputRow in project druid by druid-io.

the class BatchDeltaIngestionTest method testIngestion.

private void testIngestion(HadoopDruidIndexerConfig config, List<ImmutableMap<String, Object>> expectedRowsGenerated, WindowedDataSegment windowedDataSegment) throws Exception {
    IndexGeneratorJob job = new IndexGeneratorJob(config);
    JobHelper.runJobs(ImmutableList.<Jobby>of(job), config);
    File segmentFolder = new File(String.format("%s/%s/%s_%s/%s/0", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), INTERVAL_FULL.getStart().toString(), INTERVAL_FULL.getEnd().toString(), config.getSchema().getTuningConfig().getVersion()));
    Assert.assertTrue(segmentFolder.exists());
    File descriptor = new File(segmentFolder, "descriptor.json");
    File indexZip = new File(segmentFolder, "index.zip");
    Assert.assertTrue(descriptor.exists());
    Assert.assertTrue(indexZip.exists());
    DataSegment dataSegment = MAPPER.readValue(descriptor, DataSegment.class);
    Assert.assertEquals("website", dataSegment.getDataSource());
    Assert.assertEquals(config.getSchema().getTuningConfig().getVersion(), dataSegment.getVersion());
    Assert.assertEquals(INTERVAL_FULL, dataSegment.getInterval());
    Assert.assertEquals("local", dataSegment.getLoadSpec().get("type"));
    Assert.assertEquals(indexZip.getCanonicalPath(), dataSegment.getLoadSpec().get("path"));
    Assert.assertEquals("host", dataSegment.getDimensions().get(0));
    Assert.assertEquals("visited_sum", dataSegment.getMetrics().get(0));
    Assert.assertEquals("unique_hosts", dataSegment.getMetrics().get(1));
    Assert.assertEquals(Integer.valueOf(9), dataSegment.getBinaryVersion());
    HashBasedNumberedShardSpec spec = (HashBasedNumberedShardSpec) dataSegment.getShardSpec();
    Assert.assertEquals(0, spec.getPartitionNum());
    Assert.assertEquals(1, spec.getPartitions());
    File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
    new LocalDataSegmentPuller().getSegmentFiles(dataSegment, tmpUnzippedSegmentDir);
    QueryableIndex index = INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
    StorageAdapter adapter = new QueryableIndexStorageAdapter(index);
    Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, windowedDataSegment.getInterval())), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null, Granularities.NONE);
    List<InputRow> rows = Lists.newArrayList();
    while (firehose.hasMore()) {
        rows.add(firehose.nextRow());
    }
    verifyRows(expectedRowsGenerated, rows);
}
Also used : HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) IngestSegmentFirehose(io.druid.segment.realtime.firehose.IngestSegmentFirehose) Firehose(io.druid.data.input.Firehose) IngestSegmentFirehose(io.druid.segment.realtime.firehose.IngestSegmentFirehose) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) StorageAdapter(io.druid.segment.StorageAdapter) WindowedStorageAdapter(io.druid.segment.realtime.firehose.WindowedStorageAdapter) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) DataSegment(io.druid.timeline.DataSegment) WindowedDataSegment(io.druid.indexer.hadoop.WindowedDataSegment) LocalDataSegmentPuller(io.druid.segment.loading.LocalDataSegmentPuller) QueryableIndex(io.druid.segment.QueryableIndex) InputRow(io.druid.data.input.InputRow) File(java.io.File) WindowedStorageAdapter(io.druid.segment.realtime.firehose.WindowedStorageAdapter)

Example 78 with InputRow

use of io.druid.data.input.InputRow in project druid by druid-io.

the class DatasourceRecordReaderTest method verifyRows.

private void verifyRows(List<InputRow> actualRows) {
    List<ImmutableMap<String, Object>> expectedRows = ImmutableList.of(ImmutableMap.<String, Object>of("time", DateTime.parse("2014-10-22T00:00:00.000Z"), "host", ImmutableList.of("a.example.com"), "visited_sum", 100L, "unique_hosts", 1.0d), ImmutableMap.<String, Object>of("time", DateTime.parse("2014-10-22T01:00:00.000Z"), "host", ImmutableList.of("b.example.com"), "visited_sum", 150L, "unique_hosts", 1.0d), ImmutableMap.<String, Object>of("time", DateTime.parse("2014-10-22T02:00:00.000Z"), "host", ImmutableList.of("c.example.com"), "visited_sum", 200L, "unique_hosts", 1.0d));
    Assert.assertEquals(expectedRows.size(), actualRows.size());
    for (int i = 0; i < expectedRows.size(); i++) {
        Map<String, Object> expected = expectedRows.get(i);
        InputRow actual = actualRows.get(i);
        Assert.assertEquals(ImmutableList.of("host"), actual.getDimensions());
        Assert.assertEquals(expected.get("time"), actual.getTimestamp());
        Assert.assertEquals(expected.get("host"), actual.getDimension("host"));
        Assert.assertEquals(expected.get("visited_sum"), actual.getLongMetric("visited_sum"));
        Assert.assertEquals((Double) expected.get("unique_hosts"), (Double) HyperUniquesAggregatorFactory.estimateCardinality(actual.getRaw("unique_hosts")), 0.001);
    }
}
Also used : InputRow(io.druid.data.input.InputRow) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 79 with InputRow

use of io.druid.data.input.InputRow in project druid by druid-io.

the class InputRowSerdeTest method testSerde.

@Test
public void testSerde() {
    InputRow in = new MapBasedInputRow(timestamp, dims, event);
    AggregatorFactory[] aggregatorFactories = new AggregatorFactory[] { new DoubleSumAggregatorFactory("agg_non_existing", "agg_non_existing_in"), new DoubleSumAggregatorFactory("m1out", "m1"), new LongSumAggregatorFactory("m2out", "m2"), new HyperUniquesAggregatorFactory("m3out", "m3"), // Unparseable from String to Long
    new LongSumAggregatorFactory("unparseable", "m3") };
    // Ignore Unparseable aggregator
    byte[] data = InputRowSerde.toBytes(in, aggregatorFactories, false);
    InputRow out = InputRowSerde.fromBytes(data, aggregatorFactories);
    Assert.assertEquals(timestamp, out.getTimestampFromEpoch());
    Assert.assertEquals(dims, out.getDimensions());
    Assert.assertEquals(Collections.EMPTY_LIST, out.getDimension("dim_non_existing"));
    Assert.assertEquals(ImmutableList.of("d1v"), out.getDimension("d1"));
    Assert.assertEquals(ImmutableList.of("d2v1", "d2v2"), out.getDimension("d2"));
    Assert.assertEquals(0.0f, out.getFloatMetric("agg_non_existing"), 0.00001);
    Assert.assertEquals(5.0f, out.getFloatMetric("m1out"), 0.00001);
    Assert.assertEquals(100L, out.getLongMetric("m2out"));
    Assert.assertEquals(1, ((HyperLogLogCollector) out.getRaw("m3out")).estimateCardinality(), 0.001);
    Assert.assertEquals(0L, out.getLongMetric("unparseable"));
}
Also used : DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) Test(org.junit.Test)

Example 80 with InputRow

use of io.druid.data.input.InputRow in project druid by druid-io.

the class WikipediaIrcDecoder method decodeMessage.

@Override
public InputRow decodeMessage(final DateTime timestamp, String channel, String msg) {
    final Map<String, String> dimensions = Maps.newHashMap();
    final Map<String, Float> metrics = Maps.newHashMap();
    Matcher m = pattern.matcher(msg);
    if (!m.matches()) {
        throw new IllegalArgumentException("Invalid input format");
    }
    Matcher shortname = shortnamePattern.matcher(channel);
    if (shortname.matches()) {
        dimensions.put("language", shortname.group(1));
    }
    String page = m.group(1);
    String pageUrl = page.replaceAll("\\s", "_");
    dimensions.put("page", pageUrl);
    String user = m.group(4);
    Matcher ipMatch = ipPattern.matcher(user);
    boolean anonymous = ipMatch.matches();
    if (anonymous) {
        try {
            final InetAddress ip = InetAddress.getByName(ipMatch.group());
            final Omni lookup = geoLookup.omni(ip);
            dimensions.put("continent", lookup.getContinent().getName());
            dimensions.put("country", lookup.getCountry().getName());
            dimensions.put("region", lookup.getMostSpecificSubdivision().getName());
            dimensions.put("city", lookup.getCity().getName());
        } catch (UnknownHostException e) {
            log.error(e, "invalid ip [%s]", ipMatch.group());
        } catch (IOException e) {
            log.error(e, "error looking up geo ip");
        } catch (GeoIp2Exception e) {
            log.error(e, "error looking up geo ip");
        }
    }
    dimensions.put("user", user);
    final String flags = m.group(2);
    dimensions.put("unpatrolled", Boolean.toString(flags.contains("!")));
    dimensions.put("newPage", Boolean.toString(flags.contains("N")));
    dimensions.put("robot", Boolean.toString(flags.contains("B")));
    dimensions.put("anonymous", Boolean.toString(anonymous));
    String[] parts = page.split(":");
    if (parts.length > 1 && !parts[1].startsWith(" ")) {
        Map<String, String> channelNamespaces = namespaces.get(channel);
        if (channelNamespaces != null && channelNamespaces.containsKey(parts[0])) {
            dimensions.put("namespace", channelNamespaces.get(parts[0]));
        } else {
            dimensions.put("namespace", "wikipedia");
        }
    } else {
        dimensions.put("namespace", "article");
    }
    float delta = m.group(6) != null ? Float.parseFloat(m.group(6)) : 0;
    metrics.put("delta", delta);
    metrics.put("added", Math.max(delta, 0));
    metrics.put("deleted", Math.min(delta, 0));
    return new InputRow() {

        @Override
        public List<String> getDimensions() {
            return dimensionList;
        }

        @Override
        public long getTimestampFromEpoch() {
            return timestamp.getMillis();
        }

        @Override
        public DateTime getTimestamp() {
            return timestamp;
        }

        @Override
        public List<String> getDimension(String dimension) {
            final String value = dimensions.get(dimension);
            if (value != null) {
                return ImmutableList.of(value);
            } else {
                return ImmutableList.of();
            }
        }

        @Override
        public Object getRaw(String dimension) {
            return dimensions.get(dimension);
        }

        @Override
        public float getFloatMetric(String metric) {
            return metrics.get(metric);
        }

        @Override
        public long getLongMetric(String metric) {
            return new Float(metrics.get(metric)).longValue();
        }

        @Override
        public int compareTo(Row o) {
            return timestamp.compareTo(o.getTimestamp());
        }

        @Override
        public String toString() {
            return "WikipediaRow{" + "timestamp=" + timestamp + ", dimensions=" + dimensions + ", metrics=" + metrics + '}';
        }
    };
}
Also used : UnknownHostException(java.net.UnknownHostException) Matcher(java.util.regex.Matcher) IOException(java.io.IOException) GeoIp2Exception(com.maxmind.geoip2.exception.GeoIp2Exception) InputRow(io.druid.data.input.InputRow) InputRow(io.druid.data.input.InputRow) Row(io.druid.data.input.Row) Omni(com.maxmind.geoip2.model.Omni) InetAddress(java.net.InetAddress)

Aggregations

InputRow (io.druid.data.input.InputRow)81 Test (org.junit.Test)35 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)24 BenchmarkDataGenerator (io.druid.benchmark.datagen.BenchmarkDataGenerator)22 File (java.io.File)18 Setup (org.openjdk.jmh.annotations.Setup)15 HyperUniquesSerde (io.druid.query.aggregation.hyperloglog.HyperUniquesSerde)14 Firehose (io.druid.data.input.Firehose)12 OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)12 IndexSpec (io.druid.segment.IndexSpec)11 ArrayList (java.util.ArrayList)11 IncrementalIndex (io.druid.segment.incremental.IncrementalIndex)10 DateTime (org.joda.time.DateTime)10 QueryableIndex (io.druid.segment.QueryableIndex)9 IOException (java.io.IOException)9 BenchmarkColumnSchema (io.druid.benchmark.datagen.BenchmarkColumnSchema)8 Interval (org.joda.time.Interval)8 ParseException (io.druid.java.util.common.parsers.ParseException)7 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)6 DataSegment (io.druid.timeline.DataSegment)5