use of com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver in project pinot by linkedin.
the class BaseSingleValueQueriesTest method buildSegment.
@BeforeTest
public void buildSegment() throws Exception {
FileUtils.deleteQuietly(INDEX_DIR);
// Get resource file path.
URL resource = getClass().getClassLoader().getResource(AVRO_DATA);
Assert.assertNotNull(resource);
String filePath = resource.getFile();
// Build the segment schema.
Schema schema = new Schema.SchemaBuilder().setSchemaName("testTable").addMetric("column1", FieldSpec.DataType.INT).addMetric("column3", FieldSpec.DataType.INT).addSingleValueDimension("column5", FieldSpec.DataType.STRING).addSingleValueDimension("column6", FieldSpec.DataType.INT).addSingleValueDimension("column7", FieldSpec.DataType.INT).addSingleValueDimension("column9", FieldSpec.DataType.INT).addSingleValueDimension("column11", FieldSpec.DataType.STRING).addSingleValueDimension("column12", FieldSpec.DataType.STRING).addMetric("column17", FieldSpec.DataType.INT).addMetric("column18", FieldSpec.DataType.INT).addTime("daysSinceEpoch", TimeUnit.DAYS, FieldSpec.DataType.INT).build();
// Create the segment generator config.
SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schema);
segmentGeneratorConfig.setInputFilePath(filePath);
segmentGeneratorConfig.setTableName("testTable");
segmentGeneratorConfig.setOutDir(INDEX_DIR.getAbsolutePath());
segmentGeneratorConfig.setInvertedIndexCreationColumns(Arrays.asList("column6", "column7", "column11", "column17", "column18"));
// Build the index segment.
SegmentIndexCreationDriver driver = new SegmentIndexCreationDriverImpl();
driver.init(segmentGeneratorConfig);
driver.build();
}
use of com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver in project pinot by linkedin.
the class CrcUtilsTest method makeSegmentAndReturnPath.
private String makeSegmentAndReturnPath() throws Exception {
final String filePath = TestUtils.getFileFromResourceUrl(ChunkIndexCreationDriverImplTest.class.getClassLoader().getResource(AVRO_DATA));
final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch", TimeUnit.DAYS, "testTable");
config.setSegmentNamePostfix("1");
config.setTimeColumnName("daysSinceEpoch");
final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
driver.init(config);
driver.build();
return new File(INDEX_DIR, driver.getSegmentName()).getAbsolutePath();
}
use of com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver in project pinot by linkedin.
the class BaseClusterIntegrationTest method buildSegmentsFromAvro.
public static Future<Map<File, File>> buildSegmentsFromAvro(final List<File> avroFiles, Executor executor, int baseSegmentIndex, final File baseDirectory, final File segmentTarDir, final String tableName, final boolean createStarTreeIndex, final com.linkedin.pinot.common.data.Schema inputPinotSchema) {
int segmentCount = avroFiles.size();
LOGGER.info("Building " + segmentCount + " segments in parallel");
List<ListenableFutureTask<Pair<File, File>>> futureTasks = new ArrayList<ListenableFutureTask<Pair<File, File>>>();
for (int i = 1; i <= segmentCount; ++i) {
final int segmentIndex = i - 1;
final int segmentNumber = i + baseSegmentIndex;
final ListenableFutureTask<Pair<File, File>> buildSegmentFutureTask = ListenableFutureTask.<Pair<File, File>>create(new Callable<Pair<File, File>>() {
@Override
public Pair<File, File> call() throws Exception {
try {
// Build segment
LOGGER.info("Starting to build segment " + segmentNumber);
File outputDir = new File(baseDirectory, "segment-" + segmentNumber);
final File inputAvroFile = avroFiles.get(segmentIndex);
final SegmentGeneratorConfig genConfig = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(inputAvroFile, outputDir, TimeUnit.DAYS, tableName, inputPinotSchema);
if (inputPinotSchema != null) {
genConfig.setSchema(inputPinotSchema);
}
// jfim: We add a space and a special character to do a regression test for PINOT-3296 Segments with spaces
// in their filename don't work properly
genConfig.setSegmentNamePostfix(Integer.toString(segmentNumber) + " %");
genConfig.setEnableStarTreeIndex(createStarTreeIndex);
// Enable off heap star tree format in the integration test.
StarTreeIndexSpec starTreeIndexSpec = null;
if (createStarTreeIndex) {
starTreeIndexSpec = new StarTreeIndexSpec();
starTreeIndexSpec.setEnableOffHeapFormat(true);
}
genConfig.setStarTreeIndexSpec(starTreeIndexSpec);
final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
driver.init(genConfig);
driver.build();
// Tar segment
String segmentName = outputDir.list()[0];
final String tarGzPath = TarGzCompressionUtils.createTarGzOfDirectory(outputDir.getAbsolutePath() + "/" + segmentName, new File(segmentTarDir, segmentName).getAbsolutePath());
LOGGER.info("Completed segment " + segmentNumber + " : " + segmentName + " from file " + inputAvroFile.getName());
return new ImmutablePair<File, File>(inputAvroFile, new File(tarGzPath));
} catch (Exception e) {
LOGGER.error("Exception while building segment input: {} output {} ", avroFiles.get(segmentIndex), "segment-" + segmentNumber);
throw new RuntimeException(e);
}
}
});
futureTasks.add(buildSegmentFutureTask);
executor.execute(buildSegmentFutureTask);
}
ListenableFuture<List<Pair<File, File>>> pairListFuture = Futures.allAsList(futureTasks);
return Futures.transform(pairListFuture, new AsyncFunction<List<Pair<File, File>>, Map<File, File>>() {
@Override
public ListenableFuture<Map<File, File>> apply(List<Pair<File, File>> input) throws Exception {
Map<File, File> avroToSegmentMap = new HashMap<File, File>();
for (Pair<File, File> avroToSegmentPair : input) {
avroToSegmentMap.put(avroToSegmentPair.getLeft(), avroToSegmentPair.getRight());
}
return Futures.immediateFuture(avroToSegmentMap);
}
});
}
use of com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver in project pinot by linkedin.
the class IntArraysTest method before.
@BeforeClass
public static void before() throws Exception {
final String filePath = TestUtils.getFileFromResourceUrl(DictionariesTest.class.getClassLoader().getResource(AVRO_DATA));
if (INDEX_DIR.exists()) {
FileUtils.deleteQuietly(INDEX_DIR);
}
// System.out.println(INDEX_DIR.getAbsolutePath());
final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "weeksSinceEpochSunday", TimeUnit.DAYS, "test");
config.setTimeColumnName("weeksSinceEpochSunday");
driver.init(config);
driver.build();
final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
final org.apache.avro.Schema avroSchema = avroReader.getSchema();
final String[] columns = new String[avroSchema.getFields().size()];
int i = 0;
for (final Field f : avroSchema.getFields()) {
columns[i] = f.name();
i++;
}
}
use of com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver in project pinot by linkedin.
the class SegmentMetadataImplTest method setUp.
@BeforeMethod
public void setUp() throws Exception {
INDEX_DIR = Files.createTempDirectory(SegmentMetadataImplTest.class.getName() + "_segmentDir").toFile();
final String filePath = TestUtils.getFileFromResourceUrl(SegmentMetadataImplTest.class.getClassLoader().getResource(AVRO_DATA));
// intentionally changed this to TimeUnit.Hours to make it non-default for testing
final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch", TimeUnit.HOURS, "testTable");
config.setSegmentNamePostfix("1");
config.setTimeColumnName("daysSinceEpoch");
final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
driver.init(config);
driver.build();
segmentDirectory = new File(INDEX_DIR, driver.getSegmentName());
}
Aggregations