use of org.apache.druid.data.input.InputSourceReader in project druid by druid-io.
the class OssInputSourceTest method testCompressedReader.
@Test
public void testCompressedReader() throws IOException {
EasyMock.reset(OSSCLIENT);
expectListObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_COMPRESSED_URIS.get(0)), CONTENT);
expectListObjects(EXPECTED_COMPRESSED_URIS.get(1), ImmutableList.of(EXPECTED_COMPRESSED_URIS.get(1)), CONTENT);
expectGetObjectCompressed(EXPECTED_COMPRESSED_URIS.get(0));
expectGetObjectCompressed(EXPECTED_COMPRESSED_URIS.get(1));
EasyMock.replay(OSSCLIENT);
OssInputSource inputSource = new OssInputSource(OSSCLIENT, INPUT_DATA_CONFIG, null, ImmutableList.of(PREFIXES.get(0), EXPECTED_COMPRESSED_URIS.get(1)), null, null);
InputRowSchema someSchema = new InputRowSchema(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2"))), ColumnsFilter.all());
InputSourceReader reader = inputSource.reader(someSchema, new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), temporaryFolder.newFolder());
CloseableIterator<InputRow> iterator = reader.read();
while (iterator.hasNext()) {
InputRow nextRow = iterator.next();
Assert.assertEquals(NOW, nextRow.getTimestamp());
Assert.assertEquals("hello", nextRow.getDimension("dim1").get(0));
Assert.assertEquals("world", nextRow.getDimension("dim2").get(0));
}
EasyMock.verify(OSSCLIENT);
}
use of org.apache.druid.data.input.InputSourceReader in project druid by druid-io.
the class GoogleCloudStorageInputSourceTest method testReader.
@Test
public void testReader() throws IOException {
EasyMock.reset(STORAGE);
EasyMock.reset(INPUT_DATA_CONFIG);
addExpectedPrefixObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_URIS.get(0)));
addExpectedGetObjectMock(EXPECTED_URIS.get(0));
addExpectedPrefixObjects(PREFIXES.get(1), ImmutableList.of(EXPECTED_URIS.get(1)));
addExpectedGetObjectMock(EXPECTED_URIS.get(1));
EasyMock.expect(INPUT_DATA_CONFIG.getMaxListingLength()).andReturn(MAX_LISTING_LENGTH);
EasyMock.replay(STORAGE);
EasyMock.replay(INPUT_DATA_CONFIG);
GoogleCloudStorageInputSource inputSource = new GoogleCloudStorageInputSource(STORAGE, INPUT_DATA_CONFIG, null, PREFIXES, null);
InputRowSchema someSchema = new InputRowSchema(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2"))), ColumnsFilter.all());
InputSourceReader reader = inputSource.reader(someSchema, new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), null);
CloseableIterator<InputRow> iterator = reader.read();
while (iterator.hasNext()) {
InputRow nextRow = iterator.next();
Assert.assertEquals(NOW, nextRow.getTimestamp());
Assert.assertEquals("hello", nextRow.getDimension("dim1").get(0));
Assert.assertEquals("world", nextRow.getDimension("dim2").get(0));
}
}
use of org.apache.druid.data.input.InputSourceReader in project druid by druid-io.
the class S3InputSourceTest method testCompressedReader.
@Test
public void testCompressedReader() throws IOException {
EasyMock.reset(S3_CLIENT);
expectListObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_COMPRESSED_URIS.get(0)), CONTENT);
expectListObjects(EXPECTED_COMPRESSED_URIS.get(1), ImmutableList.of(EXPECTED_COMPRESSED_URIS.get(1)), CONTENT);
expectGetObjectCompressed(EXPECTED_COMPRESSED_URIS.get(0));
expectGetObjectCompressed(EXPECTED_COMPRESSED_URIS.get(1));
EasyMock.replay(S3_CLIENT);
S3InputSource inputSource = new S3InputSource(SERVICE, SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER, INPUT_DATA_CONFIG, null, ImmutableList.of(PREFIXES.get(0), EXPECTED_COMPRESSED_URIS.get(1)), null, null);
InputRowSchema someSchema = new InputRowSchema(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2"))), ColumnsFilter.all());
InputSourceReader reader = inputSource.reader(someSchema, new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), temporaryFolder.newFolder());
CloseableIterator<InputRow> iterator = reader.read();
while (iterator.hasNext()) {
InputRow nextRow = iterator.next();
Assert.assertEquals(NOW, nextRow.getTimestamp());
Assert.assertEquals("hello", nextRow.getDimension("dim1").get(0));
Assert.assertEquals("world", nextRow.getDimension("dim2").get(0));
}
EasyMock.verify(S3_CLIENT);
}
use of org.apache.druid.data.input.InputSourceReader in project druid by druid-io.
the class DruidInputSource method fixedFormatReader.
@Override
protected InputSourceReader fixedFormatReader(InputRowSchema inputRowSchema, @Nullable File temporaryDirectory) {
final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(temporaryDirectory);
final List<TimelineObjectHolder<String, DataSegment>> timeline = createTimeline();
final Iterator<DruidSegmentInputEntity> entityIterator = FluentIterable.from(timeline).transformAndConcat(holder -> {
// noinspection ConstantConditions
final PartitionHolder<DataSegment> partitionHolder = holder.getObject();
// noinspection ConstantConditions
return FluentIterable.from(partitionHolder).transform(chunk -> new DruidSegmentInputEntity(segmentCacheManager, chunk.getObject(), holder.getInterval()));
}).iterator();
final DruidSegmentInputFormat inputFormat = new DruidSegmentInputFormat(indexIO, dimFilter);
final InputRowSchema inputRowSchemaToUse;
if (taskConfig.isIgnoreTimestampSpecForDruidInputSource()) {
// Legacy compatibility mode; see https://github.com/apache/druid/pull/10267.
LOG.warn("Ignoring the provided timestampSpec and reading the __time column instead. To use timestampSpecs with " + "the 'druid' input source, set druid.indexer.task.ignoreTimestampSpecForDruidInputSource to false.");
inputRowSchemaToUse = new InputRowSchema(new TimestampSpec(ColumnHolder.TIME_COLUMN_NAME, STANDARD_TIME_COLUMN_FORMATS.iterator().next(), null), inputRowSchema.getDimensionsSpec(), inputRowSchema.getColumnsFilter().plus(ColumnHolder.TIME_COLUMN_NAME));
} else {
inputRowSchemaToUse = inputRowSchema;
}
if (ColumnHolder.TIME_COLUMN_NAME.equals(inputRowSchemaToUse.getTimestampSpec().getTimestampColumn()) && !STANDARD_TIME_COLUMN_FORMATS.contains(inputRowSchemaToUse.getTimestampSpec().getTimestampFormat())) {
// Slight chance the user did this intentionally, but not likely. Log a warning.
LOG.warn("The provided timestampSpec refers to the %s column without using format %s. If you wanted to read the " + "column as-is, switch formats.", inputRowSchemaToUse.getTimestampSpec().getTimestampColumn(), STANDARD_TIME_COLUMN_FORMATS);
}
return new InputEntityIteratingReader(inputRowSchemaToUse, inputFormat, entityIterator, temporaryDirectory);
}
use of org.apache.druid.data.input.InputSourceReader in project druid by druid-io.
the class GeneratorInputSourceTest method testReader.
@Test
public void testReader() throws IOException {
final long seed = 1024L;
final long millis = DateTimes.nowUtc().getMillis();
final int numConsecutiveTimestamps = 1000;
final double timestampIncrement = 1.0;
final int numRows = 1000;
GeneratorInputSource inputSource = new GeneratorInputSource("basic", null, numRows, 2, seed, millis, numConsecutiveTimestamps, timestampIncrement);
DataGenerator generator = new DataGenerator(GeneratorBasicSchemas.SCHEMA_MAP.get("basic").getColumnSchemas(), seed, millis, numConsecutiveTimestamps, timestampIncrement);
InputSourceReader reader = inputSource.fixedFormatReader(null, null);
CloseableIterator<InputRow> iterator = reader.read();
InputRow first = iterator.next();
InputRow generatorFirst = generator.nextRow();
Assert.assertEquals(generatorFirst, first);
Assert.assertTrue(iterator.hasNext());
int i;
for (i = 1; iterator.hasNext(); i++) {
iterator.next();
}
Assert.assertEquals(numRows, i);
}
Aggregations