Search in sources :

Example 16 with SegmentIndexCreationDriver

use of com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver in project pinot by linkedin.

the class BrokerReduceServiceTest method setupSegmentList.

private void setupSegmentList(int numberOfSegments) throws Exception {
    final String filePath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(SMALL_AVRO_DATA));
    _indexSegmentList.clear();
    if (INDEXES_DIR.exists()) {
        FileUtils.deleteQuietly(INDEXES_DIR);
    }
    INDEXES_DIR.mkdir();
    for (int i = 0; i < numberOfSegments; ++i) {
        final File segmentDir = new File(INDEXES_DIR, "segment_" + i);
        final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), segmentDir, "dim" + i, TimeUnit.DAYS, "midas");
        config.setSegmentNamePostfix(String.valueOf(i));
        final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
        driver.init(config);
        driver.build();
        File parent = new File(INDEXES_DIR, "segment_" + String.valueOf(i));
        String segmentName = parent.list()[0];
        _indexSegmentList.add(ColumnarSegmentLoader.load(new File(parent, segmentName), ReadMode.mmap));
    //      System.out.println("built at : " + segmentDir.getAbsolutePath());
    }
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) File(java.io.File)

Example 17 with SegmentIndexCreationDriver

use of com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver in project pinot by linkedin.

the class BitmapInvertedIndexTest method setup.

@BeforeClass
public void setup() throws Exception {
    final String filePath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA));
    if (INDEX_DIR.exists()) {
        FileUtils.deleteQuietly(INDEX_DIR);
    }
    final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "time_day", TimeUnit.DAYS, "test");
    final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
    driver.init(config);
    driver.build();
    List<String> iiColumns = config.getInvertedIndexCreationColumns();
    invertedIndexColumns = new String[iiColumns.size()];
    iiColumns.toArray(invertedIndexColumns);
    segmentDirectory = new File(INDEX_DIR, driver.getSegmentName());
//    System.out.println("built at : " + INDEX_DIR.getAbsolutePath());
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) File(java.io.File) BeforeClass(org.testng.annotations.BeforeClass)

Example 18 with SegmentIndexCreationDriver

use of com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver in project pinot by linkedin.

the class BlocksTest method before.

@BeforeClass
public static void before() throws Exception {
    final String filePath = TestUtils.getFileFromResourceUrl(BlocksTest.class.getClassLoader().getResource(AVRO_DATA));
    if (INDEX_DIR.exists()) {
        FileUtils.deleteQuietly(INDEX_DIR);
    }
    //    System.out.println(INDEX_DIR.getAbsolutePath());
    final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
    final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch", TimeUnit.DAYS, "test");
    config.setTimeColumnName("daysSinceEpoch");
    driver.init(config);
    driver.build();
    final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
    final org.apache.avro.Schema avroSchema = avroReader.getSchema();
    final String[] columns = new String[avroSchema.getFields().size()];
    int i = 0;
    for (final Field f : avroSchema.getFields()) {
        columns[i] = f.name();
        i++;
    }
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) Field(org.apache.avro.Schema.Field) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) BeforeClass(org.testng.annotations.BeforeClass)

Example 19 with SegmentIndexCreationDriver

use of com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver in project pinot by linkedin.

the class DictionariesTest method before.

@BeforeClass
public static void before() throws Exception {
    final String filePath = TestUtils.getFileFromResourceUrl(DictionariesTest.class.getClassLoader().getResource(AVRO_DATA));
    if (INDEX_DIR.exists()) {
        FileUtils.deleteQuietly(INDEX_DIR);
    }
    final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "time_day", TimeUnit.DAYS, "test");
    final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
    driver.init(config);
    driver.build();
    segmentDirectory = new File(INDEX_DIR, driver.getSegmentName());
    final Schema schema = AvroUtils.extractSchemaFromAvro(new File(filePath));
    final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
    final org.apache.avro.Schema avroSchema = avroReader.getSchema();
    final String[] columns = new String[avroSchema.getFields().size()];
    int i = 0;
    for (final Field f : avroSchema.getFields()) {
        columns[i] = f.name();
        i++;
    }
    uniqueEntries = new HashMap<String, Set<Object>>();
    for (final String column : columns) {
        uniqueEntries.put(column, new HashSet<Object>());
    }
    while (avroReader.hasNext()) {
        final GenericRecord rec = avroReader.next();
        for (final String column : columns) {
            Object val = rec.get(column);
            if (val instanceof Utf8) {
                val = ((Utf8) val).toString();
            }
            uniqueEntries.get(column).add(getAppropriateType(schema.getFieldSpecFor(column).getDataType(), val));
        }
    }
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) HashSet(java.util.HashSet) Set(java.util.Set) Schema(com.linkedin.pinot.common.data.Schema) Field(org.apache.avro.Schema.Field) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) Utf8(org.apache.avro.util.Utf8) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) BeforeClass(org.testng.annotations.BeforeClass)

Example 20 with SegmentIndexCreationDriver

use of com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver in project pinot by linkedin.

the class FileBasedSentineTest method setup.

@BeforeClass
public void setup() throws Exception {
    url = new URL("http://localhost:" + FileBasedServerBrokerStarters.BROKER_CLIENT_PORT + "/query");
    // lets generate data
    final String[] columns = { "dimention1", "dimention2", "dimention3", "dimention4", "metric1", "daysSinceEpoch" };
    final Map<String, DataType> dataTypes = new HashMap<String, FieldSpec.DataType>();
    final Map<String, FieldType> fieldTypes = new HashMap<String, FieldType>();
    final Map<String, TimeUnit> timeUnits = new HashMap<String, TimeUnit>();
    final Map<String, Integer> cardinality = new HashMap<String, Integer>();
    // Crate empty range map as the signature of DataGeneratorSpec has changed, and this test does not
    // use metric/time as fieldType.
    final Map<String, IntRange> range = new HashMap<String, IntRange>();
    for (final String col : columns) {
        if (col.equals("dimention1")) {
            dataTypes.put(col, DataType.STRING);
            cardinality.put(col, 1000);
        } else {
            dataTypes.put(col, DataType.INT);
            cardinality.put(col, 1000);
        }
        fieldTypes.put(col, FieldType.DIMENSION);
    }
    if (avroDataDir.exists()) {
        FileUtils.deleteDirectory(avroDataDir);
    }
    final DataGeneratorSpec spec = new DataGeneratorSpec(Arrays.asList(columns), cardinality, range, dataTypes, fieldTypes, timeUnits, FileFormat.AVRO, avroDataDir.getAbsolutePath(), true);
    generator = new DataGenerator();
    generator.init(spec);
    generator.generate(100000L, 2);
    // lets make segments now
    final File bootstrapDir = new File(FileBasedServerBrokerStarters.SERVER_BOOTSTRAP_DIR);
    if (bootstrapDir.exists()) {
        FileUtils.deleteDirectory(bootstrapDir);
    }
    bootstrapDir.mkdir();
    int counter = 0;
    for (final File avro : avroDataDir.listFiles()) {
        for (final String table : FileBasedServerBrokerStarters.TABLE_NAMES) {
            final SegmentGeneratorConfig genConfig = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(avro, new File(bootstrapDir, "segment-" + counter), "daysSinceEpoch", TimeUnit.DAYS, table);
            final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
            driver.init(genConfig);
            driver.build();
            counter++;
        }
    }
    // lets start the server and the broker now
    starter = new FileBasedServerBrokerStarters();
    starter.startAll();
    // pick some values from here if you need to use it for running filter queries
    final JSONObject selectionRequestResponse = postQuery("select * from 'table1' limit 100", "http://localhost:" + FileBasedServerBrokerStarters.BROKER_CLIENT_PORT);
//    System.out.println(selectionRequestResponse.toString(1));
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) HashMap(java.util.HashMap) IntRange(org.apache.commons.lang.math.IntRange) URL(java.net.URL) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType) JSONObject(org.json.JSONObject) DataGenerator(com.linkedin.pinot.tools.data.generator.DataGenerator) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) TimeUnit(java.util.concurrent.TimeUnit) File(java.io.File) DataGeneratorSpec(com.linkedin.pinot.tools.data.generator.DataGeneratorSpec) BeforeClass(org.testng.annotations.BeforeClass)

Aggregations

SegmentIndexCreationDriver (com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver)37 SegmentGeneratorConfig (com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)34 File (java.io.File)21 Test (org.testng.annotations.Test)13 ColumnMetadataTest (com.linkedin.pinot.core.segment.index.ColumnMetadataTest)7 IndexSegment (com.linkedin.pinot.core.indexsegment.IndexSegment)6 BeforeClass (org.testng.annotations.BeforeClass)6 SegmentIndexCreationDriverImpl (com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)5 URL (java.net.URL)5 Schema (com.linkedin.pinot.common.data.Schema)3 HashMap (java.util.HashMap)3 Field (org.apache.avro.Schema.Field)3 GenericRecord (org.apache.avro.generic.GenericRecord)3 BeforeMethod (org.testng.annotations.BeforeMethod)3 StarTreeIndexSpec (com.linkedin.pinot.common.data.StarTreeIndexSpec)2 IndexLoadingConfigMetadata (com.linkedin.pinot.common.metadata.segment.IndexLoadingConfigMetadata)2 Configuration (org.apache.commons.configuration.Configuration)2 PropertiesConfiguration (org.apache.commons.configuration.PropertiesConfiguration)2 BeforeTest (org.testng.annotations.BeforeTest)2 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)1