use of org.apache.druid.data.input.impl.InputRowParser in project druid by druid-io.
the class SqlFirehoseTest method testFirehoseStringParser.
@Test
public void testFirehoseStringParser() throws Exception {
final TestCloseable closeable = new TestCloseable();
List<Object> expectedResults = new ArrayList<>();
for (Map<String, Object> map : inputs) {
expectedResults.add(map.get("x"));
}
final List<JsonIterator<Map<String, Object>>> lineIterators = fileList.stream().map(s -> new JsonIterator<Map<String, Object>>(TYPE_REF, s, closeable, objectMapper)).collect(Collectors.toList());
final InputRowParser stringParser = TransformSpec.NONE.decorate(new StringInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("x")))), Charset.defaultCharset().name()));
try (final SqlFirehose firehose = new SqlFirehose(lineIterators.iterator(), stringParser, closeable)) {
final List<Object> results = new ArrayList<>();
while (firehose.hasMore()) {
final InputRow inputRow = firehose.nextRow();
if (inputRow == null) {
results.add(null);
} else {
results.add(inputRow.getDimension("x").get(0));
}
}
Assert.assertEquals(expectedResults, results);
}
}
use of org.apache.druid.data.input.impl.InputRowParser in project druid by druid-io.
the class BaseParquetInputTest method getAllRows.
static List<InputRow> getAllRows(String parserType, HadoopDruidIndexerConfig config) throws IOException, InterruptedException {
Job job = Job.getInstance(new Configuration());
config.intoConfiguration(job);
File testFile = new File(((StaticPathSpec) config.getPathSpec()).getPaths());
Path path = new Path(testFile.getAbsoluteFile().toURI());
FileSplit split = new FileSplit(path, 0, testFile.length(), null);
InputFormat inputFormat = ReflectionUtils.newInstance(INPUT_FORMAT_CLASSES.get(parserType), job.getConfiguration());
TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
try (RecordReader reader = inputFormat.createRecordReader(split, context)) {
List<InputRow> records = new ArrayList<>();
InputRowParser parser = config.getParser();
reader.initialize(split, context);
while (reader.nextKeyValue()) {
reader.nextKeyValue();
Object data = reader.getCurrentValue();
records.add(((List<InputRow>) parser.parseBatch(data)).get(0));
}
return records;
}
}
use of org.apache.druid.data.input.impl.InputRowParser in project druid by druid-io.
the class IngestSegmentFirehoseFactoryTest method constructorFeeder.
@Parameterized.Parameters(name = "{0}")
public static Collection<Object[]> constructorFeeder() throws IOException {
final IndexSpec indexSpec = new IndexSpec();
final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withMinTimestamp(JodaUtils.MIN_INSTANT).withDimensionsSpec(ROW_PARSER).withMetrics(new LongSumAggregatorFactory(METRIC_LONG_NAME, DIM_LONG_NAME), new DoubleSumAggregatorFactory(METRIC_FLOAT_NAME, DIM_FLOAT_NAME)).build();
final IncrementalIndex index = new OnheapIncrementalIndex.Builder().setIndexSchema(schema).setMaxRowCount(MAX_ROWS * MAX_SHARD_NUMBER).build();
for (Integer i = 0; i < MAX_ROWS; ++i) {
index.add(ROW_PARSER.parseBatch(buildRow(i.longValue())).get(0));
}
FileUtils.mkdirp(PERSIST_DIR);
INDEX_MERGER_V9.persist(index, PERSIST_DIR, indexSpec, null);
final CoordinatorClient cc = new CoordinatorClient(null, null) {
@Override
public Collection<DataSegment> fetchUsedSegmentsInDataSourceForIntervals(String dataSource, List<Interval> intervals) {
return ImmutableSet.copyOf(SEGMENT_SET);
}
};
SegmentHandoffNotifierFactory notifierFactory = EasyMock.createNiceMock(SegmentHandoffNotifierFactory.class);
EasyMock.replay(notifierFactory);
final SegmentCacheManagerFactory slf = new SegmentCacheManagerFactory(MAPPER);
final RetryPolicyFactory retryPolicyFactory = new RetryPolicyFactory(new RetryPolicyConfig());
Collection<Object[]> values = new ArrayList<>();
for (InputRowParser parser : Arrays.<InputRowParser>asList(ROW_PARSER, new MapInputRowParser(new JSONParseSpec(new TimestampSpec(TIME_COLUMN, "auto", null), DimensionsSpec.builder().setDimensionExclusions(ImmutableList.of(DIM_FLOAT_NAME, DIM_LONG_NAME)).build(), null, null, null)))) {
for (List<String> dim_names : Arrays.<List<String>>asList(null, ImmutableList.of(DIM_NAME))) {
for (List<String> metric_names : Arrays.<List<String>>asList(null, ImmutableList.of(METRIC_LONG_NAME, METRIC_FLOAT_NAME))) {
for (Boolean wrapInCombining : Arrays.asList(false, true)) {
final IngestSegmentFirehoseFactory isfFactory = new IngestSegmentFirehoseFactory(TASK.getDataSource(), Intervals.ETERNITY, null, new SelectorDimFilter(DIM_NAME, DIM_VALUE, null), dim_names, metric_names, null, INDEX_IO, cc, slf, retryPolicyFactory);
final FirehoseFactory factory = wrapInCombining ? new CombiningFirehoseFactory(ImmutableList.of(isfFactory)) : isfFactory;
values.add(new Object[] { StringUtils.format("DimNames[%s]MetricNames[%s]ParserDimNames[%s]WrapInCombining[%s]", dim_names == null ? "null" : "dims", metric_names == null ? "null" : "metrics", parser == ROW_PARSER ? "dims" : "null", wrapInCombining), factory, parser });
}
}
}
}
return values;
}
use of org.apache.druid.data.input.impl.InputRowParser in project druid by druid-io.
the class OrcHadoopInputRowParserTest method getAllRows.
private static List<InputRow> getAllRows(HadoopDruidIndexerConfig config) throws IOException {
Job job = Job.getInstance(new Configuration());
config.intoConfiguration(job);
File testFile = new File(((StaticPathSpec) config.getPathSpec()).getPaths());
Path path = new Path(testFile.getAbsoluteFile().toURI());
FileSplit split = new FileSplit(path, 0, testFile.length(), new String[] { "host" });
InputFormat<NullWritable, OrcStruct> inputFormat = ReflectionUtils.newInstance(OrcInputFormat.class, job.getConfiguration());
RecordReader<NullWritable, OrcStruct> reader = inputFormat.getRecordReader(split, new JobConf(job.getConfiguration()), null);
try {
List<InputRow> records = new ArrayList<>();
InputRowParser parser = config.getParser();
final NullWritable key = reader.createKey();
OrcStruct value = reader.createValue();
while (reader.next(key, value)) {
records.add(((List<InputRow>) parser.parseBatch(value)).get(0));
value = reader.createValue();
}
return records;
} finally {
reader.close();
}
}
use of org.apache.druid.data.input.impl.InputRowParser in project hive by apache.
the class TestDruidRecordWriter method testWrite.
// Test is failing due to Guava dependency, Druid 0.13.0 should have less dependency on Guava
@Ignore
@Test
public void testWrite() throws IOException, SegmentLoadingException {
final String dataSourceName = "testDataSource";
final File segmentOutputDir = temporaryFolder.newFolder();
final File workingDir = temporaryFolder.newFolder();
Configuration config = new Configuration();
final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidConstants.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(ImmutableList.of(new StringDimensionSchema("host")), null, null)));
final Map<String, Object> parserMap = objectMapper.convertValue(inputRowParser, new TypeReference<Map<String, Object>>() {
});
DataSchema dataSchema = new DataSchema(dataSourceName, parserMap, new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_sum"), new HyperUniquesAggregatorFactory("unique_hosts", "unique_hosts") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(INTERVAL_FULL)), null, objectMapper);
IndexSpec indexSpec = new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null);
RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, null, null, null, temporaryFolder.newFolder(), null, null, null, null, indexSpec, null, null, 0, 0, null, null, 0L, null, null);
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig() {
@Override
public File getStorageDirectory() {
return segmentOutputDir;
}
});
Path segmentDescriptorPath = new Path(workingDir.getAbsolutePath(), DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME);
DruidRecordWriter druidRecordWriter = new DruidRecordWriter(dataSchema, tuningConfig, dataSegmentPusher, 20, segmentDescriptorPath, localFileSystem);
List<DruidWritable> druidWritables = expectedRows.stream().map(input -> new DruidWritable(ImmutableMap.<String, Object>builder().putAll(input).put(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Granularities.DAY.bucketStart(new DateTime((long) input.get(DruidConstants.DEFAULT_TIMESTAMP_COLUMN))).getMillis()).build())).collect(Collectors.toList());
for (DruidWritable druidWritable : druidWritables) {
druidRecordWriter.write(druidWritable);
}
druidRecordWriter.close(false);
List<DataSegment> dataSegmentList = DruidStorageHandlerUtils.getCreatedSegments(segmentDescriptorPath, config);
Assert.assertEquals(1, dataSegmentList.size());
File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
new LocalDataSegmentPuller().getSegmentFiles(dataSegmentList.get(0), tmpUnzippedSegmentDir);
final QueryableIndex queryableIndex = DruidStorageHandlerUtils.INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(queryableIndex);
Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, adapter.getInterval())), null, ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null);
List<InputRow> rows = Lists.newArrayList();
while (firehose.hasMore()) {
rows.add(firehose.nextRow());
}
verifyRows(expectedRows, rows);
}
Aggregations