Search in sources :

Example 11 with GenericRow

use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.

the class AvroDataPublisherTest method TestReadAvro.

@Test
public void TestReadAvro() throws Exception {
    final String filePath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA));
    final String jsonPath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(JSON_DATA));
    Schema schema = new Schema.SchemaBuilder().addSingleValueDimension("column3", DataType.STRING).addSingleValueDimension("column2", DataType.STRING).build();
    final SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
    config.setFormat(FileFormat.AVRO);
    config.setInputFilePath(filePath);
    config.setSegmentVersion(SegmentVersion.v1);
    AvroRecordReader avroDataPublisher = (AvroRecordReader) RecordReaderFactory.get(config);
    int cnt = 0;
    for (String line : FileUtils.readLines(new File(jsonPath))) {
        JSONObject obj = new JSONObject(line);
        if (avroDataPublisher.hasNext()) {
            GenericRow recordRow = avroDataPublisher.next();
            for (String column : recordRow.getFieldNames()) {
                String valueFromJson = obj.get(column).toString();
                String valueFromAvro = recordRow.getValue(column).toString();
                if (cnt > 1) {
                    Assert.assertEquals(valueFromJson, valueFromAvro);
                }
            }
        }
        cnt++;
    }
    Assert.assertEquals(cnt, 10001);
}
Also used : GenericRow(com.linkedin.pinot.core.data.GenericRow) JSONObject(org.json.JSONObject) AvroRecordReader(com.linkedin.pinot.core.data.readers.AvroRecordReader) Schema(com.linkedin.pinot.common.data.Schema) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) File(java.io.File) Test(org.testng.annotations.Test)

Example 12 with GenericRow

use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.

the class AvroDataPublisherTest method TestReadMultiValueAvro.

@Test
public void TestReadMultiValueAvro() throws Exception {
    final String filePath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_MULTI_DATA));
    final SegmentGeneratorConfig config = new SegmentGeneratorConfig(AvroUtils.extractSchemaFromAvro(new File(filePath)));
    config.setFormat(FileFormat.AVRO);
    config.setInputFilePath(filePath);
    config.setSegmentVersion(SegmentVersion.v1);
    AvroRecordReader avroDataPublisher = (AvroRecordReader) RecordReaderFactory.get(config);
    int cnt = 0;
    while (avroDataPublisher.hasNext()) {
        GenericRow recordRow = avroDataPublisher.next();
        for (String column : recordRow.getFieldNames()) {
            String valueStringFromAvro = null;
            if (avroDataPublisher.getSchema().getFieldSpecFor(column).isSingleValueField()) {
                Object valueFromAvro = recordRow.getValue(column);
                valueStringFromAvro = valueFromAvro.toString();
            } else {
                Object[] valueFromAvro = (Object[]) recordRow.getValue(column);
                valueStringFromAvro = "[";
                int i = 0;
                for (Object valueObject : valueFromAvro) {
                    if (i++ == 0) {
                        valueStringFromAvro += valueObject.toString();
                    } else {
                        valueStringFromAvro += ", " + valueObject.toString();
                    }
                }
                valueStringFromAvro += "]";
            }
        }
        cnt++;
    }
    Assert.assertEquals(28949, cnt);
}
Also used : GenericRow(com.linkedin.pinot.core.data.GenericRow) AvroRecordReader(com.linkedin.pinot.core.data.readers.AvroRecordReader) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) JSONObject(org.json.JSONObject) File(java.io.File) Test(org.testng.annotations.Test)

Example 13 with GenericRow

use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.

the class StarTreeIndexTestSegmentHelper method buildSegment.

private static Schema buildSegment(String segmentDirName, String segmentName, HllConfig hllConfig, boolean enableOffHeapFormat) throws Exception {
    final int rows = (int) MathUtils.factorial(NUM_DIMENSIONS) * 100;
    Schema schema = new Schema();
    for (int i = 0; i < NUM_DIMENSIONS; i++) {
        String dimName = "d" + (i + 1);
        DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(dimName, FieldSpec.DataType.STRING, true);
        schema.addField(dimName, dimensionFieldSpec);
    }
    schema.setTimeFieldSpec(new TimeFieldSpec(TIME_COLUMN_NAME, FieldSpec.DataType.INT, TimeUnit.DAYS));
    for (int i = 0; i < NUM_METRICS; i++) {
        String metricName = "m" + (i + 1);
        MetricFieldSpec metricFieldSpec = new MetricFieldSpec(metricName, FieldSpec.DataType.INT);
        schema.addField(metricName, metricFieldSpec);
    }
    SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
    config.setEnableStarTreeIndex(true);
    config.setOutDir(segmentDirName);
    config.setFormat(FileFormat.AVRO);
    config.setSegmentName(segmentName);
    config.setHllConfig(hllConfig);
    config.setStarTreeIndexSpec(buildStarTreeIndexSpec(enableOffHeapFormat));
    Random random = new Random(RANDOM_SEED);
    final List<GenericRow> data = new ArrayList<>();
    for (int row = 0; row < rows; row++) {
        HashMap<String, Object> map = new HashMap<>();
        // Dim columns.
        for (int i = 0; i < NUM_DIMENSIONS / 2; i++) {
            String dimName = schema.getDimensionFieldSpecs().get(i).getName();
            map.put(dimName, dimName + "-v" + row % (NUM_DIMENSIONS - i));
        }
        // Random values make cardinality of d3, d4 column values larger to better test hll
        for (int i = NUM_DIMENSIONS / 2; i < NUM_DIMENSIONS; i++) {
            String dimName = schema.getDimensionFieldSpecs().get(i).getName();
            map.put(dimName, dimName + "-v" + random.nextInt(i * 100));
        }
        // Metric columns.
        for (int i = 0; i < NUM_METRICS; i++) {
            String metName = schema.getMetricFieldSpecs().get(i).getName();
            map.put(metName, random.nextInt(METRIC_MAX_VALUE));
        }
        // Time column.
        map.put(TIME_COLUMN_NAME, row % 7);
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        data.add(genericRow);
    }
    SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
    RecordReader reader = new TestUtils.GenericRowRecordReader(schema, data);
    driver.init(config, reader);
    driver.build();
    LOGGER.info("Built segment {} at {}", segmentName, segmentDirName);
    return schema;
}
Also used : RecordReader(com.linkedin.pinot.core.data.readers.RecordReader) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl) GenericRow(com.linkedin.pinot.core.data.GenericRow) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)

Example 14 with GenericRow

use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.

the class NoDictionaryGroupKeyGeneratorTest method getExpectedGroupKeys.

/**
   * Helper method to build group keys for a given array of group-by columns.
   *
   * @param groupByColumns Group-by columns for which to generate the group-keys.
   * @return Set of unique group keys.
   * @throws Exception
   */
private Set<String> getExpectedGroupKeys(RecordReader recordReader, String[] groupByColumns) throws Exception {
    Set<String> groupKeys = new HashSet<>();
    StringBuilder stringBuilder = new StringBuilder();
    recordReader.rewind();
    while (recordReader.hasNext()) {
        GenericRow row = recordReader.next();
        stringBuilder.setLength(0);
        for (int i = 0; i < groupByColumns.length; i++) {
            stringBuilder.append(row.getValue(groupByColumns[i]));
            if (i < groupByColumns.length - 1) {
                stringBuilder.append(AggregationGroupByTrimmingService.GROUP_KEY_DELIMITER);
            }
        }
        groupKeys.add(stringBuilder.toString());
    }
    return groupKeys;
}
Also used : GenericRow(com.linkedin.pinot.core.data.GenericRow) HashSet(java.util.HashSet)

Example 15 with GenericRow

use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.

the class NoDictionaryGroupKeyGeneratorTest method buildSegment.

/**
   * Helper method to build a segment as follows:
   * <ul>
   *   <li> One string column without dictionary. </li>
   *   <li> One integer column with dictionary. </li>
   * </ul>
   *
   * It also computes the unique group keys while it generates the index.
   *
   * @return Set containing unique group keys from the created segment.
   *
   * @throws Exception
   */
private TestRecordReader buildSegment() throws Exception {
    Schema schema = new Schema();
    for (int i = 0; i < COLUMN_NAMES.length; i++) {
        DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(COLUMN_NAMES[i], DATA_TYPES[i], true);
        schema.addField(dimensionFieldSpec);
    }
    SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
    config.setRawIndexCreationColumns(Arrays.asList(NO_DICT_COLUMN_NAMES));
    config.setOutDir(SEGMENT_DIR_NAME);
    config.setSegmentName(SEGMENT_NAME);
    Random random = new Random();
    List<GenericRow> rows = new ArrayList<>(NUM_ROWS);
    for (int i = 0; i < NUM_ROWS; i++) {
        Map<String, Object> map = new HashMap<>(NUM_COLUMNS);
        for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
            String column = fieldSpec.getName();
            FieldSpec.DataType dataType = fieldSpec.getDataType();
            switch(dataType) {
                case INT:
                    map.put(column, random.nextInt());
                    break;
                case LONG:
                    map.put(column, random.nextLong());
                    break;
                case FLOAT:
                    map.put(column, random.nextFloat());
                    break;
                case DOUBLE:
                    map.put(column, random.nextDouble());
                    break;
                case STRING:
                    map.put(column, "value_" + i);
                    break;
                default:
                    throw new IllegalArgumentException("Illegal data type specified: " + dataType);
            }
        }
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        rows.add(genericRow);
    }
    SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
    _recordReader = new TestRecordReader(rows, schema);
    driver.init(config, _recordReader);
    driver.build();
    return _recordReader;
}
Also used : TestRecordReader(com.linkedin.pinot.core.data.readers.TestRecordReader) HashMap(java.util.HashMap) Schema(com.linkedin.pinot.common.data.Schema) ArrayList(java.util.ArrayList) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl) GenericRow(com.linkedin.pinot.core.data.GenericRow) Random(java.util.Random) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Aggregations

GenericRow (com.linkedin.pinot.core.data.GenericRow)45 HashMap (java.util.HashMap)24 File (java.io.File)17 Test (org.testng.annotations.Test)15 Schema (com.linkedin.pinot.common.data.Schema)14 SegmentGeneratorConfig (com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)14 ArrayList (java.util.ArrayList)13 SegmentIndexCreationDriverImpl (com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)11 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)8 RecordReader (com.linkedin.pinot.core.data.readers.RecordReader)8 Random (java.util.Random)6 JSONObject (org.json.JSONObject)5 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)4 ServerMetrics (com.linkedin.pinot.common.metrics.ServerMetrics)4 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)3 AvroRecordReader (com.linkedin.pinot.core.data.readers.AvroRecordReader)3 PinotSegmentRecordReader (com.linkedin.pinot.core.data.readers.PinotSegmentRecordReader)3 TestRecordReader (com.linkedin.pinot.core.data.readers.TestRecordReader)3 MetricsRegistry (com.yammer.metrics.core.MetricsRegistry)3 BeforeClass (org.testng.annotations.BeforeClass)3