Search in sources :

Example 51 with SegmentGeneratorConfig

use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.

the class RawIndexBenchmark method buildSegment.

/**
   * Helper method that builds a segment containing two columns both with data from input file.
   * The first column has raw indices (no dictionary), where as the second column is dictionary encoded.
   *
   * @throws Exception
   */
private File buildSegment() throws Exception {
    Schema schema = new Schema();
    for (int i = 0; i < NUM_COLUMNS; i++) {
        String column = "column_" + i;
        DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(column, FieldSpec.DataType.STRING, true);
        schema.addField(dimensionFieldSpec);
    }
    SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
    config.setRawIndexCreationColumns(Collections.singletonList(_rawIndexColumn));
    config.setOutDir(SEGMENT_DIR_NAME);
    config.setSegmentName(SEGMENT_NAME);
    BufferedReader reader = new BufferedReader(new FileReader(_dataFile));
    String value;
    final List<GenericRow> rows = new ArrayList<>();
    System.out.println("Reading data...");
    while ((value = reader.readLine()) != null) {
        HashMap<String, Object> map = new HashMap<>();
        for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
            map.put(fieldSpec.getName(), value);
        }
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        rows.add(genericRow);
        _numRows++;
        if (_numRows % 1000000 == 0) {
            System.out.println("Read rows: " + _numRows);
        }
    }
    System.out.println("Generating segment...");
    SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
    RecordReader recordReader = new TestRecordReader(rows, schema);
    driver.init(config, recordReader);
    driver.build();
    return new File(SEGMENT_DIR_NAME, SEGMENT_NAME);
}
Also used : TestRecordReader(com.linkedin.pinot.core.data.readers.TestRecordReader) HashMap(java.util.HashMap) Schema(com.linkedin.pinot.common.data.Schema) RecordReader(com.linkedin.pinot.core.data.readers.RecordReader) TestRecordReader(com.linkedin.pinot.core.data.readers.TestRecordReader) ArrayList(java.util.ArrayList) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl) GenericRow(com.linkedin.pinot.core.data.GenericRow) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) File(java.io.File) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 52 with SegmentGeneratorConfig

use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.

the class HelixStarterTest method setupSegment.

private void setupSegment(File segmentDir, String tableName) throws Exception {
    String filePath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA));
    SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), segmentDir, TimeUnit.DAYS, tableName, null);
    SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
    driver.init(config);
    driver.build();
    LOGGER.info("Table: {} built at path: {}", tableName, segmentDir.getAbsolutePath());
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) File(java.io.File)

Example 53 with SegmentGeneratorConfig

use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.

the class SegmentTestUtils method getSegmentGenSpecWithSchemAndProjectedColumns.

public static SegmentGeneratorConfig getSegmentGenSpecWithSchemAndProjectedColumns(File inputAvro, File outputDir, TimeUnit timeUnit, String clusterName, Schema inputPinotSchema) throws IOException {
    Schema schema;
    if (inputPinotSchema == null) {
        schema = AvroUtils.extractSchemaFromAvro(inputAvro);
    } else {
        schema = inputPinotSchema;
    }
    SegmentGeneratorConfig segmentGenSpec = new SegmentGeneratorConfig(schema);
    segmentGenSpec.setInputFilePath(inputAvro.getAbsolutePath());
    segmentGenSpec.setSegmentTimeUnit(timeUnit);
    if (inputAvro.getName().endsWith("gz")) {
        segmentGenSpec.setFormat(FileFormat.GZIPPED_AVRO);
    } else {
        segmentGenSpec.setFormat(FileFormat.AVRO);
    }
    segmentGenSpec.setSegmentVersion(SegmentVersion.v1);
    segmentGenSpec.setTableName(clusterName);
    segmentGenSpec.setOutDir(outputDir.getAbsolutePath());
    return segmentGenSpec;
}
Also used : Schema(com.linkedin.pinot.common.data.Schema) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)

Example 54 with SegmentGeneratorConfig

use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.

the class IntegrationTest method setupSegmentList.

private void setupSegmentList() throws Exception {
    final URL resource = getClass().getClassLoader().getResource(SMALL_AVRO_DATA);
    final String filePath = TestUtils.getFileFromResourceUrl(resource);
    _indexSegmentList.clear();
    if (INDEXES_DIR.exists()) {
        FileUtils.deleteQuietly(INDEXES_DIR);
    }
    INDEXES_DIR.mkdir();
    for (int i = 0; i < 2; ++i) {
        final File segmentDir = new File(INDEXES_DIR, "segment_" + i);
        final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), segmentDir, "dim" + i, TimeUnit.DAYS, "testTable");
        final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
        driver.init(config);
        driver.build();
        _indexSegmentList.add(ColumnarSegmentLoader.load(new File(new File(INDEXES_DIR, "segment_" + String.valueOf(i)), driver.getSegmentName()), ReadMode.mmap));
    //      System.out.println("built at : " + segmentDir.getAbsolutePath());
    }
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) File(java.io.File) URL(java.net.URL)

Example 55 with SegmentGeneratorConfig

use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.

the class ChunkIndexCreationDriverImplTest method setUP.

@BeforeClass
public void setUP() throws Exception {
    if (INDEX_DIR.exists()) {
        FileUtils.deleteQuietly(INDEX_DIR);
    }
    final String filePath = TestUtils.getFileFromResourceUrl(ChunkIndexCreationDriverImplTest.class.getClassLoader().getResource(AVRO_DATA));
    final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch", TimeUnit.DAYS, "testTable");
    config.setSegmentNamePostfix("1");
    config.setTimeColumnName("daysSinceEpoch");
    final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
    driver.init(config);
    driver.build();
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) File(java.io.File) BeforeClass(org.testng.annotations.BeforeClass)

Aggregations

SegmentGeneratorConfig (com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)57 SegmentIndexCreationDriver (com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver)34 File (java.io.File)31 SegmentIndexCreationDriverImpl (com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)19 GenericRow (com.linkedin.pinot.core.data.GenericRow)14 Test (org.testng.annotations.Test)13 Schema (com.linkedin.pinot.common.data.Schema)12 HashMap (java.util.HashMap)12 ArrayList (java.util.ArrayList)10 RecordReader (com.linkedin.pinot.core.data.readers.RecordReader)9 BeforeClass (org.testng.annotations.BeforeClass)8 ColumnMetadataTest (com.linkedin.pinot.core.segment.index.ColumnMetadataTest)7 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)6 IndexSegment (com.linkedin.pinot.core.indexsegment.IndexSegment)6 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)5 URL (java.net.URL)5 Random (java.util.Random)4 AvroRecordReader (com.linkedin.pinot.core.data.readers.AvroRecordReader)3 TestRecordReader (com.linkedin.pinot.core.data.readers.TestRecordReader)3 JSONObject (org.json.JSONObject)3