Search in sources :

Example 1 with AvroRecordReader

use of com.linkedin.pinot.core.data.readers.AvroRecordReader in project pinot by linkedin.

the class AvroDataPublisherTest method TestReadAvro.

@Test
public void TestReadAvro() throws Exception {
    final String filePath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA));
    final String jsonPath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(JSON_DATA));
    Schema schema = new Schema.SchemaBuilder().addSingleValueDimension("column3", DataType.STRING).addSingleValueDimension("column2", DataType.STRING).build();
    final SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
    config.setFormat(FileFormat.AVRO);
    config.setInputFilePath(filePath);
    config.setSegmentVersion(SegmentVersion.v1);
    AvroRecordReader avroDataPublisher = (AvroRecordReader) RecordReaderFactory.get(config);
    int cnt = 0;
    for (String line : FileUtils.readLines(new File(jsonPath))) {
        JSONObject obj = new JSONObject(line);
        if (avroDataPublisher.hasNext()) {
            GenericRow recordRow = avroDataPublisher.next();
            for (String column : recordRow.getFieldNames()) {
                String valueFromJson = obj.get(column).toString();
                String valueFromAvro = recordRow.getValue(column).toString();
                if (cnt > 1) {
                    Assert.assertEquals(valueFromJson, valueFromAvro);
                }
            }
        }
        cnt++;
    }
    Assert.assertEquals(cnt, 10001);
}
Also used : GenericRow(com.linkedin.pinot.core.data.GenericRow) JSONObject(org.json.JSONObject) AvroRecordReader(com.linkedin.pinot.core.data.readers.AvroRecordReader) Schema(com.linkedin.pinot.common.data.Schema) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) File(java.io.File) Test(org.testng.annotations.Test)

Example 2 with AvroRecordReader

use of com.linkedin.pinot.core.data.readers.AvroRecordReader in project pinot by linkedin.

the class AvroDataPublisherTest method TestReadMultiValueAvro.

@Test
public void TestReadMultiValueAvro() throws Exception {
    final String filePath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_MULTI_DATA));
    final SegmentGeneratorConfig config = new SegmentGeneratorConfig(AvroUtils.extractSchemaFromAvro(new File(filePath)));
    config.setFormat(FileFormat.AVRO);
    config.setInputFilePath(filePath);
    config.setSegmentVersion(SegmentVersion.v1);
    AvroRecordReader avroDataPublisher = (AvroRecordReader) RecordReaderFactory.get(config);
    int cnt = 0;
    while (avroDataPublisher.hasNext()) {
        GenericRow recordRow = avroDataPublisher.next();
        for (String column : recordRow.getFieldNames()) {
            String valueStringFromAvro = null;
            if (avroDataPublisher.getSchema().getFieldSpecFor(column).isSingleValueField()) {
                Object valueFromAvro = recordRow.getValue(column);
                valueStringFromAvro = valueFromAvro.toString();
            } else {
                Object[] valueFromAvro = (Object[]) recordRow.getValue(column);
                valueStringFromAvro = "[";
                int i = 0;
                for (Object valueObject : valueFromAvro) {
                    if (i++ == 0) {
                        valueStringFromAvro += valueObject.toString();
                    } else {
                        valueStringFromAvro += ", " + valueObject.toString();
                    }
                }
                valueStringFromAvro += "]";
            }
        }
        cnt++;
    }
    Assert.assertEquals(28949, cnt);
}
Also used : GenericRow(com.linkedin.pinot.core.data.GenericRow) AvroRecordReader(com.linkedin.pinot.core.data.readers.AvroRecordReader) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) JSONObject(org.json.JSONObject) File(java.io.File) Test(org.testng.annotations.Test)

Example 3 with AvroRecordReader

use of com.linkedin.pinot.core.data.readers.AvroRecordReader in project pinot by linkedin.

the class AvroDataPublisherTest method TestReadPartialAvro.

@Test
public void TestReadPartialAvro() throws Exception {
    final String filePath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA));
    final String jsonPath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(JSON_DATA));
    final List<String> projectedColumns = new ArrayList<String>();
    projectedColumns.add("column3");
    projectedColumns.add("column2");
    Schema schema = new Schema.SchemaBuilder().addSingleValueDimension("column3", DataType.STRING).addSingleValueDimension("column2", DataType.STRING).build();
    final SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
    config.setFormat(FileFormat.AVRO);
    config.setInputFilePath(filePath);
    config.setSegmentVersion(SegmentVersion.v1);
    final AvroRecordReader avroDataPublisher = new AvroRecordReader(FieldExtractorFactory.getPlainFieldExtractor(config), config.getInputFilePath());
    avroDataPublisher.next();
    int cnt = 0;
    for (final String line : FileUtils.readLines(new File(jsonPath))) {
        final JSONObject obj = new JSONObject(line);
        if (avroDataPublisher.hasNext()) {
            final GenericRow recordRow = avroDataPublisher.next();
            // System.out.println(recordRow);
            Assert.assertEquals(recordRow.getFieldNames().length, 2);
            for (final String column : recordRow.getFieldNames()) {
                final String valueFromJson = obj.get(column).toString();
                final String valueFromAvro = recordRow.getValue(column).toString();
                if (cnt > 1) {
                    Assert.assertEquals(valueFromAvro, valueFromJson);
                }
            }
        }
        cnt++;
    }
    Assert.assertEquals(10001, cnt);
}
Also used : GenericRow(com.linkedin.pinot.core.data.GenericRow) JSONObject(org.json.JSONObject) AvroRecordReader(com.linkedin.pinot.core.data.readers.AvroRecordReader) Schema(com.linkedin.pinot.common.data.Schema) ArrayList(java.util.ArrayList) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) File(java.io.File) Test(org.testng.annotations.Test)

Aggregations

GenericRow (com.linkedin.pinot.core.data.GenericRow)3 AvroRecordReader (com.linkedin.pinot.core.data.readers.AvroRecordReader)3 SegmentGeneratorConfig (com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)3 File (java.io.File)3 JSONObject (org.json.JSONObject)3 Test (org.testng.annotations.Test)3 Schema (com.linkedin.pinot.common.data.Schema)2 ArrayList (java.util.ArrayList)1