Search in sources :

Example 41 with SegmentGeneratorConfig

use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.

the class AvroDataPublisherTest method TestReadPartialAvro.

@Test
public void TestReadPartialAvro() throws Exception {
    final String filePath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA));
    final String jsonPath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(JSON_DATA));
    final List<String> projectedColumns = new ArrayList<String>();
    projectedColumns.add("column3");
    projectedColumns.add("column2");
    Schema schema = new Schema.SchemaBuilder().addSingleValueDimension("column3", DataType.STRING).addSingleValueDimension("column2", DataType.STRING).build();
    final SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
    config.setFormat(FileFormat.AVRO);
    config.setInputFilePath(filePath);
    config.setSegmentVersion(SegmentVersion.v1);
    final AvroRecordReader avroDataPublisher = new AvroRecordReader(FieldExtractorFactory.getPlainFieldExtractor(config), config.getInputFilePath());
    avroDataPublisher.next();
    int cnt = 0;
    for (final String line : FileUtils.readLines(new File(jsonPath))) {
        final JSONObject obj = new JSONObject(line);
        if (avroDataPublisher.hasNext()) {
            final GenericRow recordRow = avroDataPublisher.next();
            // System.out.println(recordRow);
            Assert.assertEquals(recordRow.getFieldNames().length, 2);
            for (final String column : recordRow.getFieldNames()) {
                final String valueFromJson = obj.get(column).toString();
                final String valueFromAvro = recordRow.getValue(column).toString();
                if (cnt > 1) {
                    Assert.assertEquals(valueFromAvro, valueFromJson);
                }
            }
        }
        cnt++;
    }
    Assert.assertEquals(10001, cnt);
}
Also used : GenericRow(com.linkedin.pinot.core.data.GenericRow) JSONObject(org.json.JSONObject) AvroRecordReader(com.linkedin.pinot.core.data.readers.AvroRecordReader) Schema(com.linkedin.pinot.common.data.Schema) ArrayList(java.util.ArrayList) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) File(java.io.File) Test(org.testng.annotations.Test)

Example 42 with SegmentGeneratorConfig

use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.

the class CrcUtilsTest method makeSegmentAndReturnPath.

private String makeSegmentAndReturnPath() throws Exception {
    final String filePath = TestUtils.getFileFromResourceUrl(ChunkIndexCreationDriverImplTest.class.getClassLoader().getResource(AVRO_DATA));
    final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch", TimeUnit.DAYS, "testTable");
    config.setSegmentNamePostfix("1");
    config.setTimeColumnName("daysSinceEpoch");
    final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
    driver.init(config);
    driver.build();
    return new File(INDEX_DIR, driver.getSegmentName()).getAbsolutePath();
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) File(java.io.File)

Example 43 with SegmentGeneratorConfig

use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.

the class BaseClusterIntegrationTest method buildSegmentsFromAvro.

public static Future<Map<File, File>> buildSegmentsFromAvro(final List<File> avroFiles, Executor executor, int baseSegmentIndex, final File baseDirectory, final File segmentTarDir, final String tableName, final boolean createStarTreeIndex, final com.linkedin.pinot.common.data.Schema inputPinotSchema) {
    int segmentCount = avroFiles.size();
    LOGGER.info("Building " + segmentCount + " segments in parallel");
    List<ListenableFutureTask<Pair<File, File>>> futureTasks = new ArrayList<ListenableFutureTask<Pair<File, File>>>();
    for (int i = 1; i <= segmentCount; ++i) {
        final int segmentIndex = i - 1;
        final int segmentNumber = i + baseSegmentIndex;
        final ListenableFutureTask<Pair<File, File>> buildSegmentFutureTask = ListenableFutureTask.<Pair<File, File>>create(new Callable<Pair<File, File>>() {

            @Override
            public Pair<File, File> call() throws Exception {
                try {
                    // Build segment
                    LOGGER.info("Starting to build segment " + segmentNumber);
                    File outputDir = new File(baseDirectory, "segment-" + segmentNumber);
                    final File inputAvroFile = avroFiles.get(segmentIndex);
                    final SegmentGeneratorConfig genConfig = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(inputAvroFile, outputDir, TimeUnit.DAYS, tableName, inputPinotSchema);
                    if (inputPinotSchema != null) {
                        genConfig.setSchema(inputPinotSchema);
                    }
                    // jfim: We add a space and a special character to do a regression test for PINOT-3296 Segments with spaces
                    // in their filename don't work properly
                    genConfig.setSegmentNamePostfix(Integer.toString(segmentNumber) + " %");
                    genConfig.setEnableStarTreeIndex(createStarTreeIndex);
                    // Enable off heap star tree format in the integration test.
                    StarTreeIndexSpec starTreeIndexSpec = null;
                    if (createStarTreeIndex) {
                        starTreeIndexSpec = new StarTreeIndexSpec();
                        starTreeIndexSpec.setEnableOffHeapFormat(true);
                    }
                    genConfig.setStarTreeIndexSpec(starTreeIndexSpec);
                    final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
                    driver.init(genConfig);
                    driver.build();
                    // Tar segment
                    String segmentName = outputDir.list()[0];
                    final String tarGzPath = TarGzCompressionUtils.createTarGzOfDirectory(outputDir.getAbsolutePath() + "/" + segmentName, new File(segmentTarDir, segmentName).getAbsolutePath());
                    LOGGER.info("Completed segment " + segmentNumber + " : " + segmentName + " from file " + inputAvroFile.getName());
                    return new ImmutablePair<File, File>(inputAvroFile, new File(tarGzPath));
                } catch (Exception e) {
                    LOGGER.error("Exception while building segment input: {} output {} ", avroFiles.get(segmentIndex), "segment-" + segmentNumber);
                    throw new RuntimeException(e);
                }
            }
        });
        futureTasks.add(buildSegmentFutureTask);
        executor.execute(buildSegmentFutureTask);
    }
    ListenableFuture<List<Pair<File, File>>> pairListFuture = Futures.allAsList(futureTasks);
    return Futures.transform(pairListFuture, new AsyncFunction<List<Pair<File, File>>, Map<File, File>>() {

        @Override
        public ListenableFuture<Map<File, File>> apply(List<Pair<File, File>> input) throws Exception {
            Map<File, File> avroToSegmentMap = new HashMap<File, File>();
            for (Pair<File, File> avroToSegmentPair : input) {
                avroToSegmentMap.put(avroToSegmentPair.getLeft(), avroToSegmentPair.getRight());
            }
            return Futures.immediateFuture(avroToSegmentMap);
        }
    });
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) ArrayList(java.util.ArrayList) StarTreeIndexSpec(com.linkedin.pinot.common.data.StarTreeIndexSpec) JSONException(org.json.JSONException) ArchiveException(org.apache.commons.compress.archivers.ArchiveException) SQLException(java.sql.SQLException) IOException(java.io.IOException) ListenableFutureTask(com.google.common.util.concurrent.ListenableFutureTask) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) List(java.util.List) ArrayList(java.util.ArrayList) File(java.io.File) Map(java.util.Map) HashMap(java.util.HashMap) Pair(org.apache.commons.lang3.tuple.Pair) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair)

Example 44 with SegmentGeneratorConfig

use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.

the class SegmentTestUtils method getSegmentGeneratorConfigWithSchema.

public static SegmentGeneratorConfig getSegmentGeneratorConfigWithSchema(File inputAvro, File outputDir, String tableName, Schema schema) {
    SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schema);
    segmentGeneratorConfig.setInputFilePath(inputAvro.getAbsolutePath());
    segmentGeneratorConfig.setOutDir(outputDir.getAbsolutePath());
    segmentGeneratorConfig.setFormat(FileFormat.AVRO);
    segmentGeneratorConfig.setSegmentVersion(SegmentVersion.v1);
    segmentGeneratorConfig.setTableName(tableName);
    segmentGeneratorConfig.setTimeColumnName(schema.getTimeColumnName());
    segmentGeneratorConfig.setSegmentTimeUnit(schema.getOutgoingTimeUnit());
    return segmentGeneratorConfig;
}
Also used : SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)

Example 45 with SegmentGeneratorConfig

use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.

the class IntArraysTest method before.

@BeforeClass
public static void before() throws Exception {
    final String filePath = TestUtils.getFileFromResourceUrl(DictionariesTest.class.getClassLoader().getResource(AVRO_DATA));
    if (INDEX_DIR.exists()) {
        FileUtils.deleteQuietly(INDEX_DIR);
    }
    //    System.out.println(INDEX_DIR.getAbsolutePath());
    final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
    final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "weeksSinceEpochSunday", TimeUnit.DAYS, "test");
    config.setTimeColumnName("weeksSinceEpochSunday");
    driver.init(config);
    driver.build();
    final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
    final org.apache.avro.Schema avroSchema = avroReader.getSchema();
    final String[] columns = new String[avroSchema.getFields().size()];
    int i = 0;
    for (final Field f : avroSchema.getFields()) {
        columns[i] = f.name();
        i++;
    }
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) Field(org.apache.avro.Schema.Field) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) BeforeClass(org.testng.annotations.BeforeClass)

Aggregations

SegmentGeneratorConfig (com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)57 SegmentIndexCreationDriver (com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver)34 File (java.io.File)31 SegmentIndexCreationDriverImpl (com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)19 GenericRow (com.linkedin.pinot.core.data.GenericRow)14 Test (org.testng.annotations.Test)13 Schema (com.linkedin.pinot.common.data.Schema)12 HashMap (java.util.HashMap)12 ArrayList (java.util.ArrayList)10 RecordReader (com.linkedin.pinot.core.data.readers.RecordReader)9 BeforeClass (org.testng.annotations.BeforeClass)8 ColumnMetadataTest (com.linkedin.pinot.core.segment.index.ColumnMetadataTest)7 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)6 IndexSegment (com.linkedin.pinot.core.indexsegment.IndexSegment)6 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)5 URL (java.net.URL)5 Random (java.util.Random)4 AvroRecordReader (com.linkedin.pinot.core.data.readers.AvroRecordReader)3 TestRecordReader (com.linkedin.pinot.core.data.readers.TestRecordReader)3 JSONObject (org.json.JSONObject)3