Search in sources :

Example 1 with TestRecordReader

use of com.linkedin.pinot.core.data.readers.TestRecordReader in project pinot by linkedin.

the class NoDictionaryGroupKeyGeneratorTest method buildSegment.

/**
   * Helper method to build a segment as follows:
   * <ul>
   *   <li> One string column without dictionary. </li>
   *   <li> One integer column with dictionary. </li>
   * </ul>
   *
   * It also computes the unique group keys while it generates the index.
   *
   * @return Set containing unique group keys from the created segment.
   *
   * @throws Exception
   */
private TestRecordReader buildSegment() throws Exception {
    Schema schema = new Schema();
    for (int i = 0; i < COLUMN_NAMES.length; i++) {
        DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(COLUMN_NAMES[i], DATA_TYPES[i], true);
        schema.addField(dimensionFieldSpec);
    }
    SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
    config.setRawIndexCreationColumns(Arrays.asList(NO_DICT_COLUMN_NAMES));
    config.setOutDir(SEGMENT_DIR_NAME);
    config.setSegmentName(SEGMENT_NAME);
    Random random = new Random();
    List<GenericRow> rows = new ArrayList<>(NUM_ROWS);
    for (int i = 0; i < NUM_ROWS; i++) {
        Map<String, Object> map = new HashMap<>(NUM_COLUMNS);
        for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
            String column = fieldSpec.getName();
            FieldSpec.DataType dataType = fieldSpec.getDataType();
            switch(dataType) {
                case INT:
                    map.put(column, random.nextInt());
                    break;
                case LONG:
                    map.put(column, random.nextLong());
                    break;
                case FLOAT:
                    map.put(column, random.nextFloat());
                    break;
                case DOUBLE:
                    map.put(column, random.nextDouble());
                    break;
                case STRING:
                    map.put(column, "value_" + i);
                    break;
                default:
                    throw new IllegalArgumentException("Illegal data type specified: " + dataType);
            }
        }
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        rows.add(genericRow);
    }
    SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
    _recordReader = new TestRecordReader(rows, schema);
    driver.init(config, _recordReader);
    driver.build();
    return _recordReader;
}
Also used : TestRecordReader(com.linkedin.pinot.core.data.readers.TestRecordReader) HashMap(java.util.HashMap) Schema(com.linkedin.pinot.common.data.Schema) ArrayList(java.util.ArrayList) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl) GenericRow(com.linkedin.pinot.core.data.GenericRow) Random(java.util.Random) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 2 with TestRecordReader

use of com.linkedin.pinot.core.data.readers.TestRecordReader in project pinot by linkedin.

the class RawIndexCreatorTest method buildIndex.

/**
   * Helper method to build a segment containing a single valued string column with RAW (no-dictionary) index.
   *
   * @return Array of string values for the rows in the generated index.
   * @throws Exception
   */
private RecordReader buildIndex(Schema schema) throws Exception {
    SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
    config.setRawIndexCreationColumns(schema.getDimensionNames());
    config.setOutDir(SEGMENT_DIR_NAME);
    config.setSegmentName(SEGMENT_NAME);
    final List<GenericRow> rows = new ArrayList<>();
    for (int row = 0; row < NUM_ROWS; row++) {
        HashMap<String, Object> map = new HashMap<>();
        for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
            Object value;
            FieldSpec.DataType dataType = fieldSpec.getDataType();
            value = getRandomValue(dataType);
            map.put(fieldSpec.getName(), value);
        }
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        rows.add(genericRow);
    }
    SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
    RecordReader reader = new TestRecordReader(rows, schema);
    driver.init(config, reader);
    driver.build();
    _segmentDirectory = SegmentDirectory.createFromLocalFS(driver.getOutputDirectory(), ReadMode.mmap);
    _segmentReader = _segmentDirectory.createReader();
    reader.rewind();
    return reader;
}
Also used : TestRecordReader(com.linkedin.pinot.core.data.readers.TestRecordReader) HashMap(java.util.HashMap) RecordReader(com.linkedin.pinot.core.data.readers.RecordReader) TestRecordReader(com.linkedin.pinot.core.data.readers.TestRecordReader) ArrayList(java.util.ArrayList) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl) GenericRow(com.linkedin.pinot.core.data.GenericRow) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)

Example 3 with TestRecordReader

use of com.linkedin.pinot.core.data.readers.TestRecordReader in project pinot by linkedin.

the class RawIndexBenchmark method buildSegment.

/**
   * Helper method that builds a segment containing two columns both with data from input file.
   * The first column has raw indices (no dictionary), where as the second column is dictionary encoded.
   *
   * @throws Exception
   */
private File buildSegment() throws Exception {
    Schema schema = new Schema();
    for (int i = 0; i < NUM_COLUMNS; i++) {
        String column = "column_" + i;
        DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(column, FieldSpec.DataType.STRING, true);
        schema.addField(dimensionFieldSpec);
    }
    SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
    config.setRawIndexCreationColumns(Collections.singletonList(_rawIndexColumn));
    config.setOutDir(SEGMENT_DIR_NAME);
    config.setSegmentName(SEGMENT_NAME);
    BufferedReader reader = new BufferedReader(new FileReader(_dataFile));
    String value;
    final List<GenericRow> rows = new ArrayList<>();
    System.out.println("Reading data...");
    while ((value = reader.readLine()) != null) {
        HashMap<String, Object> map = new HashMap<>();
        for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
            map.put(fieldSpec.getName(), value);
        }
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        rows.add(genericRow);
        _numRows++;
        if (_numRows % 1000000 == 0) {
            System.out.println("Read rows: " + _numRows);
        }
    }
    System.out.println("Generating segment...");
    SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
    RecordReader recordReader = new TestRecordReader(rows, schema);
    driver.init(config, recordReader);
    driver.build();
    return new File(SEGMENT_DIR_NAME, SEGMENT_NAME);
}
Also used : TestRecordReader(com.linkedin.pinot.core.data.readers.TestRecordReader) HashMap(java.util.HashMap) Schema(com.linkedin.pinot.common.data.Schema) RecordReader(com.linkedin.pinot.core.data.readers.RecordReader) TestRecordReader(com.linkedin.pinot.core.data.readers.TestRecordReader) ArrayList(java.util.ArrayList) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl) GenericRow(com.linkedin.pinot.core.data.GenericRow) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) File(java.io.File) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Aggregations

DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)3 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)3 GenericRow (com.linkedin.pinot.core.data.GenericRow)3 TestRecordReader (com.linkedin.pinot.core.data.readers.TestRecordReader)3 SegmentGeneratorConfig (com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)3 SegmentIndexCreationDriverImpl (com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)3 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 Schema (com.linkedin.pinot.common.data.Schema)2 RecordReader (com.linkedin.pinot.core.data.readers.RecordReader)2 BufferedReader (java.io.BufferedReader)1 File (java.io.File)1 FileReader (java.io.FileReader)1 Random (java.util.Random)1