Search in sources :

Example 1 with InputSourceReader

use of org.apache.druid.data.input.InputSourceReader in project druid by druid-io.

the class S3InputSourceTest method testReader.

@Test
public void testReader() throws IOException {
    EasyMock.reset(S3_CLIENT);
    expectListObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_URIS.get(0)), CONTENT);
    expectListObjects(EXPECTED_URIS.get(1), ImmutableList.of(EXPECTED_URIS.get(1)), CONTENT);
    expectGetObject(EXPECTED_URIS.get(0));
    expectGetObject(EXPECTED_URIS.get(1));
    EasyMock.replay(S3_CLIENT);
    S3InputSource inputSource = new S3InputSource(SERVICE, SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER, INPUT_DATA_CONFIG, null, ImmutableList.of(PREFIXES.get(0), EXPECTED_URIS.get(1)), null, null);
    InputRowSchema someSchema = new InputRowSchema(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2"))), ColumnsFilter.all());
    InputSourceReader reader = inputSource.reader(someSchema, new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), temporaryFolder.newFolder());
    CloseableIterator<InputRow> iterator = reader.read();
    while (iterator.hasNext()) {
        InputRow nextRow = iterator.next();
        Assert.assertEquals(NOW, nextRow.getTimestamp());
        Assert.assertEquals("hello", nextRow.getDimension("dim1").get(0));
        Assert.assertEquals("world", nextRow.getDimension("dim2").get(0));
    }
    EasyMock.verify(S3_CLIENT);
}
Also used : InputSourceReader(org.apache.druid.data.input.InputSourceReader) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 2 with InputSourceReader

use of org.apache.druid.data.input.InputSourceReader in project druid by druid-io.

the class S3InputSourceTest method testReaderRetriesOnSdkClientExceptionButNeverSucceedsThenThrows.

@Test(expected = SdkClientException.class)
public void testReaderRetriesOnSdkClientExceptionButNeverSucceedsThenThrows() throws Exception {
    EasyMock.reset(S3_CLIENT);
    expectListObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_URIS.get(0)), CONTENT);
    expectSdkClientException(EXPECTED_URIS.get(0));
    EasyMock.replay(S3_CLIENT);
    S3InputSource inputSource = new S3InputSource(SERVICE, SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER, INPUT_DATA_CONFIG, null, ImmutableList.of(PREFIXES.get(0)), null, null, // only have three retries since they are slow
    3);
    InputRowSchema someSchema = new InputRowSchema(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2"))), ColumnsFilter.all());
    InputSourceReader reader = inputSource.reader(someSchema, new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), temporaryFolder.newFolder());
    CloseableIterator<InputRow> iterator = reader.read();
    while (iterator.hasNext()) {
        InputRow nextRow = iterator.next();
        Assert.assertEquals(NOW, nextRow.getTimestamp());
        Assert.assertEquals("hello", nextRow.getDimension("dim1").get(0));
        Assert.assertEquals("world", nextRow.getDimension("dim2").get(0));
    }
    EasyMock.verify(S3_CLIENT);
}
Also used : InputSourceReader(org.apache.druid.data.input.InputSourceReader) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 3 with InputSourceReader

use of org.apache.druid.data.input.InputSourceReader in project druid by druid-io.

the class RecordSupplierInputSourceTest method testRead.

@Test
public void testRead() throws IOException {
    final RandomCsvSupplier supplier = new RandomCsvSupplier();
    final InputSource inputSource = new RecordSupplierInputSource<>("topic", supplier, false);
    final List<String> colNames = IntStream.range(0, NUM_COLS).mapToObj(i -> StringUtils.format("col_%d", i)).collect(Collectors.toList());
    final InputFormat inputFormat = new CsvInputFormat(colNames, null, null, false, 0);
    final InputSourceReader reader = inputSource.reader(new InputRowSchema(new TimestampSpec("col_0", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(colNames.subList(1, colNames.size()))), ColumnsFilter.all()), inputFormat, temporaryFolder.newFolder());
    int read = 0;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        for (; read < NUM_ROWS && iterator.hasNext(); read++) {
            final InputRow inputRow = iterator.next();
            Assert.assertEquals(DateTimes.of(TIMESTAMP_STRING), inputRow.getTimestamp());
            Assert.assertEquals(NUM_COLS - 1, inputRow.getDimensions().size());
        }
    }
    Assert.assertEquals(NUM_ROWS, read);
    Assert.assertTrue(supplier.isClosed());
}
Also used : IntStream(java.util.stream.IntStream) StreamPartition(org.apache.druid.indexing.seekablestream.common.StreamPartition) RecordSupplier(org.apache.druid.indexing.seekablestream.common.RecordSupplier) OrderedPartitionableRecord(org.apache.druid.indexing.seekablestream.common.OrderedPartitionableRecord) Random(java.util.Random) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) ArrayList(java.util.ArrayList) InputSource(org.apache.druid.data.input.InputSource) Map(java.util.Map) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) InputSourceReader(org.apache.druid.data.input.InputSourceReader) CloseableIterator(org.apache.druid.java.util.common.parsers.CloseableIterator) Nullable(javax.annotation.Nullable) DateTimes(org.apache.druid.java.util.common.DateTimes) ColumnsFilter(org.apache.druid.data.input.ColumnsFilter) InputFormat(org.apache.druid.data.input.InputFormat) Collection(java.util.Collection) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) IOException(java.io.IOException) NotNull(javax.validation.constraints.NotNull) Maps(com.google.common.collect.Maps) Collectors(java.util.stream.Collectors) InputRow(org.apache.druid.data.input.InputRow) List(java.util.List) Rule(org.junit.Rule) ByteEntity(org.apache.druid.data.input.impl.ByteEntity) RandomStringUtils(org.apache.commons.lang3.RandomStringUtils) Assert(org.junit.Assert) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) InputSource(org.apache.druid.data.input.InputSource) InputSourceReader(org.apache.druid.data.input.InputSourceReader) InputFormat(org.apache.druid.data.input.InputFormat) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 4 with InputSourceReader

use of org.apache.druid.data.input.InputSourceReader in project druid by druid-io.

the class OssInputSourceTest method testReader.

@Test
public void testReader() throws IOException {
    EasyMock.reset(OSSCLIENT);
    expectListObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_URIS.get(0)), CONTENT);
    expectListObjects(EXPECTED_URIS.get(1), ImmutableList.of(EXPECTED_URIS.get(1)), CONTENT);
    expectGetObject(EXPECTED_URIS.get(0));
    expectGetObject(EXPECTED_URIS.get(1));
    EasyMock.replay(OSSCLIENT);
    OssInputSource inputSource = new OssInputSource(OSSCLIENT, INPUT_DATA_CONFIG, null, ImmutableList.of(PREFIXES.get(0), EXPECTED_URIS.get(1)), null, null);
    InputRowSchema someSchema = new InputRowSchema(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2"))), ColumnsFilter.all());
    InputSourceReader reader = inputSource.reader(someSchema, new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), temporaryFolder.newFolder());
    CloseableIterator<InputRow> iterator = reader.read();
    while (iterator.hasNext()) {
        InputRow nextRow = iterator.next();
        Assert.assertEquals(NOW, nextRow.getTimestamp());
        Assert.assertEquals("hello", nextRow.getDimension("dim1").get(0));
        Assert.assertEquals("world", nextRow.getDimension("dim2").get(0));
    }
    EasyMock.verify(OSSCLIENT);
}
Also used : InputSourceReader(org.apache.druid.data.input.InputSourceReader) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 5 with InputSourceReader

use of org.apache.druid.data.input.InputSourceReader in project druid by druid-io.

the class GoogleCloudStorageInputSourceTest method testCompressedReader.

@Test
public void testCompressedReader() throws IOException {
    EasyMock.reset(STORAGE);
    EasyMock.reset(INPUT_DATA_CONFIG);
    addExpectedPrefixObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_COMPRESSED_URIS.get(0)));
    addExpectedGetCompressedObjectMock(EXPECTED_COMPRESSED_URIS.get(0));
    addExpectedPrefixObjects(PREFIXES.get(1), ImmutableList.of(EXPECTED_COMPRESSED_URIS.get(1)));
    addExpectedGetCompressedObjectMock(EXPECTED_COMPRESSED_URIS.get(1));
    EasyMock.expect(INPUT_DATA_CONFIG.getMaxListingLength()).andReturn(MAX_LISTING_LENGTH);
    EasyMock.replay(STORAGE);
    EasyMock.replay(INPUT_DATA_CONFIG);
    GoogleCloudStorageInputSource inputSource = new GoogleCloudStorageInputSource(STORAGE, INPUT_DATA_CONFIG, null, PREFIXES, null);
    InputRowSchema someSchema = new InputRowSchema(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2"))), ColumnsFilter.all());
    InputSourceReader reader = inputSource.reader(someSchema, new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), null);
    CloseableIterator<InputRow> iterator = reader.read();
    while (iterator.hasNext()) {
        InputRow nextRow = iterator.next();
        Assert.assertEquals(NOW, nextRow.getTimestamp());
        Assert.assertEquals("hello", nextRow.getDimension("dim1").get(0));
        Assert.assertEquals("world", nextRow.getDimension("dim2").get(0));
    }
}
Also used : InputSourceReader(org.apache.druid.data.input.InputSourceReader) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

InputSourceReader (org.apache.druid.data.input.InputSourceReader)15 InputRow (org.apache.druid.data.input.InputRow)12 Test (org.junit.Test)12 InputRowSchema (org.apache.druid.data.input.InputRowSchema)10 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)9 CsvInputFormat (org.apache.druid.data.input.impl.CsvInputFormat)8 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)8 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)8 ArrayList (java.util.ArrayList)6 File (java.io.File)5 Map (java.util.Map)3 Row (org.apache.druid.data.input.Row)3 Collection (java.util.Collection)2 Collections (java.util.Collections)2 List (java.util.List)2 ThreadLocalRandom (java.util.concurrent.ThreadLocalRandom)2 Nullable (javax.annotation.Nullable)2 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)2 TimedShutoffInputSourceReader (org.apache.druid.data.input.impl.TimedShutoffInputSourceReader)2 JacksonInject (com.fasterxml.jackson.annotation.JacksonInject)1