Search in sources :

Example 6 with RecordReader

use of com.linkedin.pinot.core.data.readers.RecordReader in project pinot by linkedin.

the class FilterTreeOptimizationTest method buildSegment.

/**
   * Helper method to build a segment.
   *
   * @param segmentDirName Name of segment directory
   * @param segmentName Name of segment
   * @param schema Schema for segment
   * @return Schema built for the segment
   * @throws Exception
   */
private RecordReader buildSegment(String segmentDirName, String segmentName, Schema schema) throws Exception {
    SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
    config.setOutDir(segmentDirName);
    config.setFormat(FileFormat.AVRO);
    config.setTableName(TABLE_NAME);
    config.setSegmentName(segmentName);
    final List<GenericRow> data = new ArrayList<>();
    for (int row = 0; row < NUM_ROWS; row++) {
        HashMap<String, Object> map = new HashMap<>();
        for (String dimensionName : DIMENSIONS) {
            map.put(dimensionName, dimensionName + '_' + (row % MAX_DIMENSION_VALUES));
        }
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        data.add(genericRow);
    }
    SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
    RecordReader reader = new TestUtils.GenericRowRecordReader(schema, data);
    driver.init(config, reader);
    driver.build();
    LOGGER.info("Built segment {} at {}", segmentName, segmentDirName);
    return reader;
}
Also used : GenericRow(com.linkedin.pinot.core.data.GenericRow) HashMap(java.util.HashMap) RecordReader(com.linkedin.pinot.core.data.readers.RecordReader) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) ArrayList(java.util.ArrayList) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)

Example 7 with RecordReader

use of com.linkedin.pinot.core.data.readers.RecordReader in project pinot by linkedin.

the class TransformGroupByTest method buildSegment.

/**
   * Helper method to build a segment with one dimension column containing values
   * from {@link #_dimensionValues}, and one metric column.
   *
   * Also builds the expected group by result as it builds the segments.
   *
   * @param segmentDirName Name of segment directory
   * @param segmentName Name of segment
   * @param schema Schema for segment
   * @return Schema built for the segment
   * @throws Exception
   */
private RecordReader buildSegment(String segmentDirName, String segmentName, Schema schema) throws Exception {
    SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
    config.setOutDir(segmentDirName);
    config.setFormat(FileFormat.AVRO);
    config.setTableName(TABLE_NAME);
    config.setSegmentName(segmentName);
    Random random = new Random(RANDOM_SEED);
    long currentTimeMillis = System.currentTimeMillis();
    // Divide the day into fixed parts, and decrement time column value by this delta, so as to get
    // continuous days in the input. This gives about 10 days per 10k rows.
    long timeDelta = TimeUnit.MILLISECONDS.convert(1, TimeUnit.DAYS) / 1000;
    final List<GenericRow> data = new ArrayList<>();
    int numDimValues = _dimensionValues.length;
    for (int row = 0; row < NUM_ROWS; row++) {
        HashMap<String, Object> map = new HashMap<>();
        map.put(DIMENSION_NAME, _dimensionValues[random.nextInt(numDimValues)]);
        map.put(METRIC_NAME, random.nextDouble());
        map.put(TIME_COLUMN_NAME, currentTimeMillis);
        currentTimeMillis -= timeDelta;
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        data.add(genericRow);
    }
    SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
    RecordReader reader = new TestUtils.GenericRowRecordReader(schema, data);
    driver.init(config, reader);
    driver.build();
    LOGGER.info("Built segment {} at {}", segmentName, segmentDirName);
    return reader;
}
Also used : HashMap(java.util.HashMap) RecordReader(com.linkedin.pinot.core.data.readers.RecordReader) ArrayList(java.util.ArrayList) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl) GenericRow(com.linkedin.pinot.core.data.GenericRow) Random(java.util.Random) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)

Example 8 with RecordReader

use of com.linkedin.pinot.core.data.readers.RecordReader in project pinot by linkedin.

the class TransformExpressionOperatorTest method buildSegment.

/**
   * Helper method to build a segment with {@link #NUM_METRICS} metrics with random
   * data as per the schema.
   *
   * @param segmentDirName Name of segment directory
   * @param segmentName Name of segment
   * @param schema Schema for segment
   * @return Schema built for the segment
   * @throws Exception
   */
private Schema buildSegment(String segmentDirName, String segmentName, Schema schema) throws Exception {
    SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
    config.setOutDir(segmentDirName);
    config.setFormat(FileFormat.AVRO);
    config.setSegmentName(segmentName);
    Random random = new Random(RANDOM_SEED);
    final List<GenericRow> data = new ArrayList<>();
    _values = new double[NUM_ROWS][NUM_METRICS];
    for (int row = 0; row < NUM_ROWS; row++) {
        HashMap<String, Object> map = new HashMap<>();
        // Metric columns.
        for (int i = 0; i < NUM_METRICS; i++) {
            String metName = schema.getMetricFieldSpecs().get(i).getName();
            double value = random.nextInt(MAX_METRIC_VALUE) + random.nextDouble() + 1.0;
            map.put(metName, value);
            _values[row][i] = value;
        }
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        data.add(genericRow);
    }
    SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
    RecordReader reader = new TestUtils.GenericRowRecordReader(schema, data);
    driver.init(config, reader);
    driver.build();
    LOGGER.info("Built segment {} at {}", segmentName, segmentDirName);
    return schema;
}
Also used : HashMap(java.util.HashMap) RecordReader(com.linkedin.pinot.core.data.readers.RecordReader) ArrayList(java.util.ArrayList) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl) GenericRow(com.linkedin.pinot.core.data.GenericRow) Random(java.util.Random) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)

Example 9 with RecordReader

use of com.linkedin.pinot.core.data.readers.RecordReader in project pinot by linkedin.

the class RawIndexCreatorTest method buildIndex.

/**
   * Helper method to build a segment containing a single valued string column with RAW (no-dictionary) index.
   *
   * @return Array of string values for the rows in the generated index.
   * @throws Exception
   */
private RecordReader buildIndex(Schema schema) throws Exception {
    SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
    config.setRawIndexCreationColumns(schema.getDimensionNames());
    config.setOutDir(SEGMENT_DIR_NAME);
    config.setSegmentName(SEGMENT_NAME);
    final List<GenericRow> rows = new ArrayList<>();
    for (int row = 0; row < NUM_ROWS; row++) {
        HashMap<String, Object> map = new HashMap<>();
        for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
            Object value;
            FieldSpec.DataType dataType = fieldSpec.getDataType();
            value = getRandomValue(dataType);
            map.put(fieldSpec.getName(), value);
        }
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        rows.add(genericRow);
    }
    SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
    RecordReader reader = new TestRecordReader(rows, schema);
    driver.init(config, reader);
    driver.build();
    _segmentDirectory = SegmentDirectory.createFromLocalFS(driver.getOutputDirectory(), ReadMode.mmap);
    _segmentReader = _segmentDirectory.createReader();
    reader.rewind();
    return reader;
}
Also used : TestRecordReader(com.linkedin.pinot.core.data.readers.TestRecordReader) HashMap(java.util.HashMap) RecordReader(com.linkedin.pinot.core.data.readers.RecordReader) TestRecordReader(com.linkedin.pinot.core.data.readers.TestRecordReader) ArrayList(java.util.ArrayList) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl) GenericRow(com.linkedin.pinot.core.data.GenericRow) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)

Aggregations

RecordReader (com.linkedin.pinot.core.data.readers.RecordReader)9 SegmentGeneratorConfig (com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)9 SegmentIndexCreationDriverImpl (com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)9 GenericRow (com.linkedin.pinot.core.data.GenericRow)8 ArrayList (java.util.ArrayList)7 HashMap (java.util.HashMap)7 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)3 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)3 Schema (com.linkedin.pinot.common.data.Schema)3 File (java.io.File)3 Random (java.util.Random)3 TestRecordReader (com.linkedin.pinot.core.data.readers.TestRecordReader)2 BufferedReader (java.io.BufferedReader)1 FileReader (java.io.FileReader)1 HashSet (java.util.HashSet)1