use of io.druid.data.input.InputRow in project druid by druid-io.
the class ThriftInputRowParserTest method serializationAndTest.
public void serializationAndTest(ThriftInputRowParser parser, byte[] bytes) throws TException {
ByteBuffer buffer = ByteBuffer.wrap(bytes);
InputRow row1 = parser.parse(buffer);
assertTrue(row1.getDimension("title").get(0).equals("title"));
InputRow row2 = parser.parse(new BytesWritable(bytes));
assertTrue(row2.getDimension("lastName").get(0).equals("last"));
}
use of io.druid.data.input.InputRow in project druid by druid-io.
the class BatchDeltaIngestionTest method testIngestion.
private void testIngestion(HadoopDruidIndexerConfig config, List<ImmutableMap<String, Object>> expectedRowsGenerated, WindowedDataSegment windowedDataSegment) throws Exception {
IndexGeneratorJob job = new IndexGeneratorJob(config);
JobHelper.runJobs(ImmutableList.<Jobby>of(job), config);
File segmentFolder = new File(String.format("%s/%s/%s_%s/%s/0", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), INTERVAL_FULL.getStart().toString(), INTERVAL_FULL.getEnd().toString(), config.getSchema().getTuningConfig().getVersion()));
Assert.assertTrue(segmentFolder.exists());
File descriptor = new File(segmentFolder, "descriptor.json");
File indexZip = new File(segmentFolder, "index.zip");
Assert.assertTrue(descriptor.exists());
Assert.assertTrue(indexZip.exists());
DataSegment dataSegment = MAPPER.readValue(descriptor, DataSegment.class);
Assert.assertEquals("website", dataSegment.getDataSource());
Assert.assertEquals(config.getSchema().getTuningConfig().getVersion(), dataSegment.getVersion());
Assert.assertEquals(INTERVAL_FULL, dataSegment.getInterval());
Assert.assertEquals("local", dataSegment.getLoadSpec().get("type"));
Assert.assertEquals(indexZip.getCanonicalPath(), dataSegment.getLoadSpec().get("path"));
Assert.assertEquals("host", dataSegment.getDimensions().get(0));
Assert.assertEquals("visited_sum", dataSegment.getMetrics().get(0));
Assert.assertEquals("unique_hosts", dataSegment.getMetrics().get(1));
Assert.assertEquals(Integer.valueOf(9), dataSegment.getBinaryVersion());
HashBasedNumberedShardSpec spec = (HashBasedNumberedShardSpec) dataSegment.getShardSpec();
Assert.assertEquals(0, spec.getPartitionNum());
Assert.assertEquals(1, spec.getPartitions());
File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
new LocalDataSegmentPuller().getSegmentFiles(dataSegment, tmpUnzippedSegmentDir);
QueryableIndex index = INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
StorageAdapter adapter = new QueryableIndexStorageAdapter(index);
Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, windowedDataSegment.getInterval())), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null, Granularities.NONE);
List<InputRow> rows = Lists.newArrayList();
while (firehose.hasMore()) {
rows.add(firehose.nextRow());
}
verifyRows(expectedRowsGenerated, rows);
}
use of io.druid.data.input.InputRow in project druid by druid-io.
the class DatasourceRecordReaderTest method verifyRows.
private void verifyRows(List<InputRow> actualRows) {
List<ImmutableMap<String, Object>> expectedRows = ImmutableList.of(ImmutableMap.<String, Object>of("time", DateTime.parse("2014-10-22T00:00:00.000Z"), "host", ImmutableList.of("a.example.com"), "visited_sum", 100L, "unique_hosts", 1.0d), ImmutableMap.<String, Object>of("time", DateTime.parse("2014-10-22T01:00:00.000Z"), "host", ImmutableList.of("b.example.com"), "visited_sum", 150L, "unique_hosts", 1.0d), ImmutableMap.<String, Object>of("time", DateTime.parse("2014-10-22T02:00:00.000Z"), "host", ImmutableList.of("c.example.com"), "visited_sum", 200L, "unique_hosts", 1.0d));
Assert.assertEquals(expectedRows.size(), actualRows.size());
for (int i = 0; i < expectedRows.size(); i++) {
Map<String, Object> expected = expectedRows.get(i);
InputRow actual = actualRows.get(i);
Assert.assertEquals(ImmutableList.of("host"), actual.getDimensions());
Assert.assertEquals(expected.get("time"), actual.getTimestamp());
Assert.assertEquals(expected.get("host"), actual.getDimension("host"));
Assert.assertEquals(expected.get("visited_sum"), actual.getLongMetric("visited_sum"));
Assert.assertEquals((Double) expected.get("unique_hosts"), (Double) HyperUniquesAggregatorFactory.estimateCardinality(actual.getRaw("unique_hosts")), 0.001);
}
}
use of io.druid.data.input.InputRow in project druid by druid-io.
the class InputRowSerdeTest method testSerde.
@Test
public void testSerde() {
InputRow in = new MapBasedInputRow(timestamp, dims, event);
AggregatorFactory[] aggregatorFactories = new AggregatorFactory[] { new DoubleSumAggregatorFactory("agg_non_existing", "agg_non_existing_in"), new DoubleSumAggregatorFactory("m1out", "m1"), new LongSumAggregatorFactory("m2out", "m2"), new HyperUniquesAggregatorFactory("m3out", "m3"), // Unparseable from String to Long
new LongSumAggregatorFactory("unparseable", "m3") };
// Ignore Unparseable aggregator
byte[] data = InputRowSerde.toBytes(in, aggregatorFactories, false);
InputRow out = InputRowSerde.fromBytes(data, aggregatorFactories);
Assert.assertEquals(timestamp, out.getTimestampFromEpoch());
Assert.assertEquals(dims, out.getDimensions());
Assert.assertEquals(Collections.EMPTY_LIST, out.getDimension("dim_non_existing"));
Assert.assertEquals(ImmutableList.of("d1v"), out.getDimension("d1"));
Assert.assertEquals(ImmutableList.of("d2v1", "d2v2"), out.getDimension("d2"));
Assert.assertEquals(0.0f, out.getFloatMetric("agg_non_existing"), 0.00001);
Assert.assertEquals(5.0f, out.getFloatMetric("m1out"), 0.00001);
Assert.assertEquals(100L, out.getLongMetric("m2out"));
Assert.assertEquals(1, ((HyperLogLogCollector) out.getRaw("m3out")).estimateCardinality(), 0.001);
Assert.assertEquals(0L, out.getLongMetric("unparseable"));
}
use of io.druid.data.input.InputRow in project druid by druid-io.
the class WikipediaIrcDecoder method decodeMessage.
@Override
public InputRow decodeMessage(final DateTime timestamp, String channel, String msg) {
final Map<String, String> dimensions = Maps.newHashMap();
final Map<String, Float> metrics = Maps.newHashMap();
Matcher m = pattern.matcher(msg);
if (!m.matches()) {
throw new IllegalArgumentException("Invalid input format");
}
Matcher shortname = shortnamePattern.matcher(channel);
if (shortname.matches()) {
dimensions.put("language", shortname.group(1));
}
String page = m.group(1);
String pageUrl = page.replaceAll("\\s", "_");
dimensions.put("page", pageUrl);
String user = m.group(4);
Matcher ipMatch = ipPattern.matcher(user);
boolean anonymous = ipMatch.matches();
if (anonymous) {
try {
final InetAddress ip = InetAddress.getByName(ipMatch.group());
final Omni lookup = geoLookup.omni(ip);
dimensions.put("continent", lookup.getContinent().getName());
dimensions.put("country", lookup.getCountry().getName());
dimensions.put("region", lookup.getMostSpecificSubdivision().getName());
dimensions.put("city", lookup.getCity().getName());
} catch (UnknownHostException e) {
log.error(e, "invalid ip [%s]", ipMatch.group());
} catch (IOException e) {
log.error(e, "error looking up geo ip");
} catch (GeoIp2Exception e) {
log.error(e, "error looking up geo ip");
}
}
dimensions.put("user", user);
final String flags = m.group(2);
dimensions.put("unpatrolled", Boolean.toString(flags.contains("!")));
dimensions.put("newPage", Boolean.toString(flags.contains("N")));
dimensions.put("robot", Boolean.toString(flags.contains("B")));
dimensions.put("anonymous", Boolean.toString(anonymous));
String[] parts = page.split(":");
if (parts.length > 1 && !parts[1].startsWith(" ")) {
Map<String, String> channelNamespaces = namespaces.get(channel);
if (channelNamespaces != null && channelNamespaces.containsKey(parts[0])) {
dimensions.put("namespace", channelNamespaces.get(parts[0]));
} else {
dimensions.put("namespace", "wikipedia");
}
} else {
dimensions.put("namespace", "article");
}
float delta = m.group(6) != null ? Float.parseFloat(m.group(6)) : 0;
metrics.put("delta", delta);
metrics.put("added", Math.max(delta, 0));
metrics.put("deleted", Math.min(delta, 0));
return new InputRow() {
@Override
public List<String> getDimensions() {
return dimensionList;
}
@Override
public long getTimestampFromEpoch() {
return timestamp.getMillis();
}
@Override
public DateTime getTimestamp() {
return timestamp;
}
@Override
public List<String> getDimension(String dimension) {
final String value = dimensions.get(dimension);
if (value != null) {
return ImmutableList.of(value);
} else {
return ImmutableList.of();
}
}
@Override
public Object getRaw(String dimension) {
return dimensions.get(dimension);
}
@Override
public float getFloatMetric(String metric) {
return metrics.get(metric);
}
@Override
public long getLongMetric(String metric) {
return new Float(metrics.get(metric)).longValue();
}
@Override
public int compareTo(Row o) {
return timestamp.compareTo(o.getTimestamp());
}
@Override
public String toString() {
return "WikipediaRow{" + "timestamp=" + timestamp + ", dimensions=" + dimensions + ", metrics=" + metrics + '}';
}
};
}
Aggregations