Search in sources :

Example 26 with SegmentIndexCreationDriver

use of com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver in project pinot by linkedin.

the class BaseSingleValueQueriesTest method buildSegment.

@BeforeTest
public void buildSegment() throws Exception {
    FileUtils.deleteQuietly(INDEX_DIR);
    // Get resource file path.
    URL resource = getClass().getClassLoader().getResource(AVRO_DATA);
    Assert.assertNotNull(resource);
    String filePath = resource.getFile();
    // Build the segment schema.
    Schema schema = new Schema.SchemaBuilder().setSchemaName("testTable").addMetric("column1", FieldSpec.DataType.INT).addMetric("column3", FieldSpec.DataType.INT).addSingleValueDimension("column5", FieldSpec.DataType.STRING).addSingleValueDimension("column6", FieldSpec.DataType.INT).addSingleValueDimension("column7", FieldSpec.DataType.INT).addSingleValueDimension("column9", FieldSpec.DataType.INT).addSingleValueDimension("column11", FieldSpec.DataType.STRING).addSingleValueDimension("column12", FieldSpec.DataType.STRING).addMetric("column17", FieldSpec.DataType.INT).addMetric("column18", FieldSpec.DataType.INT).addTime("daysSinceEpoch", TimeUnit.DAYS, FieldSpec.DataType.INT).build();
    // Create the segment generator config.
    SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schema);
    segmentGeneratorConfig.setInputFilePath(filePath);
    segmentGeneratorConfig.setTableName("testTable");
    segmentGeneratorConfig.setOutDir(INDEX_DIR.getAbsolutePath());
    segmentGeneratorConfig.setInvertedIndexCreationColumns(Arrays.asList("column6", "column7", "column11", "column17", "column18"));
    // Build the index segment.
    SegmentIndexCreationDriver driver = new SegmentIndexCreationDriverImpl();
    driver.init(segmentGeneratorConfig);
    driver.build();
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) Schema(com.linkedin.pinot.common.data.Schema) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) URL(java.net.URL) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl) BeforeTest(org.testng.annotations.BeforeTest)

Example 27 with SegmentIndexCreationDriver

use of com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver in project pinot by linkedin.

the class CrcUtilsTest method makeSegmentAndReturnPath.

private String makeSegmentAndReturnPath() throws Exception {
    final String filePath = TestUtils.getFileFromResourceUrl(ChunkIndexCreationDriverImplTest.class.getClassLoader().getResource(AVRO_DATA));
    final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch", TimeUnit.DAYS, "testTable");
    config.setSegmentNamePostfix("1");
    config.setTimeColumnName("daysSinceEpoch");
    final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
    driver.init(config);
    driver.build();
    return new File(INDEX_DIR, driver.getSegmentName()).getAbsolutePath();
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) File(java.io.File)

Example 28 with SegmentIndexCreationDriver

use of com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver in project pinot by linkedin.

the class BaseClusterIntegrationTest method buildSegmentsFromAvro.

public static Future<Map<File, File>> buildSegmentsFromAvro(final List<File> avroFiles, Executor executor, int baseSegmentIndex, final File baseDirectory, final File segmentTarDir, final String tableName, final boolean createStarTreeIndex, final com.linkedin.pinot.common.data.Schema inputPinotSchema) {
    int segmentCount = avroFiles.size();
    LOGGER.info("Building " + segmentCount + " segments in parallel");
    List<ListenableFutureTask<Pair<File, File>>> futureTasks = new ArrayList<ListenableFutureTask<Pair<File, File>>>();
    for (int i = 1; i <= segmentCount; ++i) {
        final int segmentIndex = i - 1;
        final int segmentNumber = i + baseSegmentIndex;
        final ListenableFutureTask<Pair<File, File>> buildSegmentFutureTask = ListenableFutureTask.<Pair<File, File>>create(new Callable<Pair<File, File>>() {

            @Override
            public Pair<File, File> call() throws Exception {
                try {
                    // Build segment
                    LOGGER.info("Starting to build segment " + segmentNumber);
                    File outputDir = new File(baseDirectory, "segment-" + segmentNumber);
                    final File inputAvroFile = avroFiles.get(segmentIndex);
                    final SegmentGeneratorConfig genConfig = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(inputAvroFile, outputDir, TimeUnit.DAYS, tableName, inputPinotSchema);
                    if (inputPinotSchema != null) {
                        genConfig.setSchema(inputPinotSchema);
                    }
                    // jfim: We add a space and a special character to do a regression test for PINOT-3296 Segments with spaces
                    // in their filename don't work properly
                    genConfig.setSegmentNamePostfix(Integer.toString(segmentNumber) + " %");
                    genConfig.setEnableStarTreeIndex(createStarTreeIndex);
                    // Enable off heap star tree format in the integration test.
                    StarTreeIndexSpec starTreeIndexSpec = null;
                    if (createStarTreeIndex) {
                        starTreeIndexSpec = new StarTreeIndexSpec();
                        starTreeIndexSpec.setEnableOffHeapFormat(true);
                    }
                    genConfig.setStarTreeIndexSpec(starTreeIndexSpec);
                    final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
                    driver.init(genConfig);
                    driver.build();
                    // Tar segment
                    String segmentName = outputDir.list()[0];
                    final String tarGzPath = TarGzCompressionUtils.createTarGzOfDirectory(outputDir.getAbsolutePath() + "/" + segmentName, new File(segmentTarDir, segmentName).getAbsolutePath());
                    LOGGER.info("Completed segment " + segmentNumber + " : " + segmentName + " from file " + inputAvroFile.getName());
                    return new ImmutablePair<File, File>(inputAvroFile, new File(tarGzPath));
                } catch (Exception e) {
                    LOGGER.error("Exception while building segment input: {} output {} ", avroFiles.get(segmentIndex), "segment-" + segmentNumber);
                    throw new RuntimeException(e);
                }
            }
        });
        futureTasks.add(buildSegmentFutureTask);
        executor.execute(buildSegmentFutureTask);
    }
    ListenableFuture<List<Pair<File, File>>> pairListFuture = Futures.allAsList(futureTasks);
    return Futures.transform(pairListFuture, new AsyncFunction<List<Pair<File, File>>, Map<File, File>>() {

        @Override
        public ListenableFuture<Map<File, File>> apply(List<Pair<File, File>> input) throws Exception {
            Map<File, File> avroToSegmentMap = new HashMap<File, File>();
            for (Pair<File, File> avroToSegmentPair : input) {
                avroToSegmentMap.put(avroToSegmentPair.getLeft(), avroToSegmentPair.getRight());
            }
            return Futures.immediateFuture(avroToSegmentMap);
        }
    });
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) ArrayList(java.util.ArrayList) StarTreeIndexSpec(com.linkedin.pinot.common.data.StarTreeIndexSpec) JSONException(org.json.JSONException) ArchiveException(org.apache.commons.compress.archivers.ArchiveException) SQLException(java.sql.SQLException) IOException(java.io.IOException) ListenableFutureTask(com.google.common.util.concurrent.ListenableFutureTask) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) List(java.util.List) ArrayList(java.util.ArrayList) File(java.io.File) Map(java.util.Map) HashMap(java.util.HashMap) Pair(org.apache.commons.lang3.tuple.Pair) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair)

Example 29 with SegmentIndexCreationDriver

use of com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver in project pinot by linkedin.

the class IntArraysTest method before.

@BeforeClass
public static void before() throws Exception {
    final String filePath = TestUtils.getFileFromResourceUrl(DictionariesTest.class.getClassLoader().getResource(AVRO_DATA));
    if (INDEX_DIR.exists()) {
        FileUtils.deleteQuietly(INDEX_DIR);
    }
    //    System.out.println(INDEX_DIR.getAbsolutePath());
    final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
    final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "weeksSinceEpochSunday", TimeUnit.DAYS, "test");
    config.setTimeColumnName("weeksSinceEpochSunday");
    driver.init(config);
    driver.build();
    final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
    final org.apache.avro.Schema avroSchema = avroReader.getSchema();
    final String[] columns = new String[avroSchema.getFields().size()];
    int i = 0;
    for (final Field f : avroSchema.getFields()) {
        columns[i] = f.name();
        i++;
    }
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) Field(org.apache.avro.Schema.Field) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) BeforeClass(org.testng.annotations.BeforeClass)

Example 30 with SegmentIndexCreationDriver

use of com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver in project pinot by linkedin.

the class SegmentMetadataImplTest method setUp.

@BeforeMethod
public void setUp() throws Exception {
    INDEX_DIR = Files.createTempDirectory(SegmentMetadataImplTest.class.getName() + "_segmentDir").toFile();
    final String filePath = TestUtils.getFileFromResourceUrl(SegmentMetadataImplTest.class.getClassLoader().getResource(AVRO_DATA));
    // intentionally changed this to TimeUnit.Hours to make it non-default for testing
    final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch", TimeUnit.HOURS, "testTable");
    config.setSegmentNamePostfix("1");
    config.setTimeColumnName("daysSinceEpoch");
    final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
    driver.init(config);
    driver.build();
    segmentDirectory = new File(INDEX_DIR, driver.getSegmentName());
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) File(java.io.File) BeforeMethod(org.testng.annotations.BeforeMethod)

Aggregations

SegmentIndexCreationDriver (com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver)37 SegmentGeneratorConfig (com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)34 File (java.io.File)21 Test (org.testng.annotations.Test)13 ColumnMetadataTest (com.linkedin.pinot.core.segment.index.ColumnMetadataTest)7 IndexSegment (com.linkedin.pinot.core.indexsegment.IndexSegment)6 BeforeClass (org.testng.annotations.BeforeClass)6 SegmentIndexCreationDriverImpl (com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)5 URL (java.net.URL)5 Schema (com.linkedin.pinot.common.data.Schema)3 HashMap (java.util.HashMap)3 Field (org.apache.avro.Schema.Field)3 GenericRecord (org.apache.avro.generic.GenericRecord)3 BeforeMethod (org.testng.annotations.BeforeMethod)3 StarTreeIndexSpec (com.linkedin.pinot.common.data.StarTreeIndexSpec)2 IndexLoadingConfigMetadata (com.linkedin.pinot.common.metadata.segment.IndexLoadingConfigMetadata)2 Configuration (org.apache.commons.configuration.Configuration)2 PropertiesConfiguration (org.apache.commons.configuration.PropertiesConfiguration)2 BeforeTest (org.testng.annotations.BeforeTest)2 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)1