use of com.linkedin.pinot.core.segment.store.SegmentDirectory in project pinot by linkedin.
the class SegmentPreProcessorTest method checkUpdateDefaultColumns.
private void checkUpdateDefaultColumns(File segmentDirectoryFile) throws Exception {
// Update default value.
SegmentPreProcessor processor = new SegmentPreProcessor(segmentDirectoryFile, _indexLoadingConfigMetadata, _newColumnsSchema1);
processor.process();
SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(segmentDirectoryFile);
// Check column metadata.
// Check all field for one column, and do necessary checks for other columns.
ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_INT_METRIC_COLUMN_NAME);
Assert.assertEquals(columnMetadata.getCardinality(), 1);
Assert.assertEquals(columnMetadata.getTotalDocs(), 100000);
Assert.assertEquals(columnMetadata.getTotalRawDocs(), 100000);
Assert.assertEquals(columnMetadata.getTotalAggDocs(), 0);
Assert.assertEquals(columnMetadata.getDataType(), FieldSpec.DataType.INT);
Assert.assertEquals(columnMetadata.getBitsPerElement(), 1);
Assert.assertEquals(columnMetadata.getStringColumnMaxLength(), 0);
Assert.assertEquals(columnMetadata.getFieldType(), FieldSpec.FieldType.METRIC);
Assert.assertTrue(columnMetadata.isSorted());
Assert.assertFalse(columnMetadata.hasNulls());
Assert.assertTrue(columnMetadata.hasDictionary());
Assert.assertTrue(columnMetadata.hasInvertedIndex());
Assert.assertTrue(columnMetadata.isSingleValue());
Assert.assertEquals(columnMetadata.getMaxNumberOfMultiValues(), 0);
Assert.assertEquals(columnMetadata.getTotalNumberOfEntries(), 100000);
Assert.assertTrue(columnMetadata.isAutoGenerated());
Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "1");
columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_LONG_METRIC_COLUMN_NAME);
Assert.assertEquals(columnMetadata.getDataType(), FieldSpec.DataType.LONG);
Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "0");
columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_FLOAT_METRIC_COLUMN_NAME);
Assert.assertEquals(columnMetadata.getDataType(), FieldSpec.DataType.FLOAT);
Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "0.0");
columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_DOUBLE_METRIC_COLUMN_NAME);
Assert.assertEquals(columnMetadata.getDataType(), FieldSpec.DataType.DOUBLE);
Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "0.0");
columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_BOOLEAN_SV_DIMENSION_COLUMN_NAME);
Assert.assertEquals(columnMetadata.getDataType(), FieldSpec.DataType.STRING);
Assert.assertEquals(columnMetadata.getStringColumnMaxLength(), 5);
Assert.assertEquals(columnMetadata.getFieldType(), FieldSpec.FieldType.DIMENSION);
Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "false");
columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_INT_SV_DIMENSION_COLUMN_NAME);
Assert.assertEquals(columnMetadata.getDataType(), FieldSpec.DataType.INT);
Assert.assertEquals(columnMetadata.getDefaultNullValueString(), String.valueOf(Integer.MIN_VALUE));
columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_STRING_MV_DIMENSION_COLUMN_NAME);
Assert.assertEquals(columnMetadata.getDataType(), FieldSpec.DataType.STRING);
Assert.assertEquals(columnMetadata.getStringColumnMaxLength(), 4);
Assert.assertFalse(columnMetadata.isSorted());
Assert.assertFalse(columnMetadata.isSingleValue());
Assert.assertEquals(columnMetadata.getMaxNumberOfMultiValues(), 1);
Assert.assertEquals(columnMetadata.getTotalNumberOfEntries(), 100000);
Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "null");
// Check dictionary and forward index exist.
try (SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(segmentDirectoryFile, segmentMetadata, ReadMode.mmap);
SegmentDirectory.Reader reader = segmentDirectory.createReader()) {
Assert.assertTrue(reader.hasIndexFor(NEW_INT_METRIC_COLUMN_NAME, ColumnIndexType.DICTIONARY));
Assert.assertTrue(reader.hasIndexFor(NEW_INT_METRIC_COLUMN_NAME, ColumnIndexType.FORWARD_INDEX));
Assert.assertTrue(reader.hasIndexFor(NEW_LONG_METRIC_COLUMN_NAME, ColumnIndexType.DICTIONARY));
Assert.assertTrue(reader.hasIndexFor(NEW_LONG_METRIC_COLUMN_NAME, ColumnIndexType.FORWARD_INDEX));
Assert.assertTrue(reader.hasIndexFor(NEW_FLOAT_METRIC_COLUMN_NAME, ColumnIndexType.DICTIONARY));
Assert.assertTrue(reader.hasIndexFor(NEW_FLOAT_METRIC_COLUMN_NAME, ColumnIndexType.FORWARD_INDEX));
Assert.assertTrue(reader.hasIndexFor(NEW_DOUBLE_METRIC_COLUMN_NAME, ColumnIndexType.DICTIONARY));
Assert.assertTrue(reader.hasIndexFor(NEW_DOUBLE_METRIC_COLUMN_NAME, ColumnIndexType.FORWARD_INDEX));
Assert.assertTrue(reader.hasIndexFor(NEW_BOOLEAN_SV_DIMENSION_COLUMN_NAME, ColumnIndexType.DICTIONARY));
Assert.assertTrue(reader.hasIndexFor(NEW_BOOLEAN_SV_DIMENSION_COLUMN_NAME, ColumnIndexType.FORWARD_INDEX));
Assert.assertTrue(reader.hasIndexFor(NEW_INT_SV_DIMENSION_COLUMN_NAME, ColumnIndexType.DICTIONARY));
Assert.assertTrue(reader.hasIndexFor(NEW_INT_SV_DIMENSION_COLUMN_NAME, ColumnIndexType.FORWARD_INDEX));
Assert.assertTrue(reader.hasIndexFor(NEW_STRING_MV_DIMENSION_COLUMN_NAME, ColumnIndexType.DICTIONARY));
Assert.assertTrue(reader.hasIndexFor(NEW_STRING_MV_DIMENSION_COLUMN_NAME, ColumnIndexType.FORWARD_INDEX));
}
// Use the second schema and update default value again.
// For the second schema, we changed the default value for column 'newIntMetric' to 2, and added default value
// 'abcd' (keep the same length as 'null') to column 'newStringMVDimension'.
processor = new SegmentPreProcessor(segmentDirectoryFile, _indexLoadingConfigMetadata, _newColumnsSchema2);
processor.process();
segmentMetadata = new SegmentMetadataImpl(segmentDirectoryFile);
// Check column metadata.
columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_INT_METRIC_COLUMN_NAME);
Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "2");
columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_STRING_MV_DIMENSION_COLUMN_NAME);
Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "abcd");
}
use of com.linkedin.pinot.core.segment.store.SegmentDirectory in project pinot by linkedin.
the class SegmentPreProcessorTest method testV3CreateInvertedIndices.
@Test
public void testV3CreateInvertedIndices() throws Exception {
constructSegment();
// Convert segment format to v3.
SegmentV1V2ToV3FormatConverter converter = new SegmentV1V2ToV3FormatConverter();
converter.convert(_segmentDirectoryFile);
File v3SegmentDirectoryFile = new File(_segmentDirectoryFile, V3_SEGMENT_NAME);
SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(v3SegmentDirectoryFile);
String segmentVersion = segmentMetadata.getVersion();
Assert.assertEquals(SegmentVersion.valueOf(segmentVersion), SegmentVersion.v3);
File singleFileIndex = new File(v3SegmentDirectoryFile, "columns.psf");
FileTime lastModifiedTime = Files.getLastModifiedTime(singleFileIndex.toPath());
long fileSize = singleFileIndex.length();
// Sleep 2 seconds to prevent the same last modified time when modifying the file.
Thread.sleep(2000);
// Create inverted index the first time.
checkInvertedIndexCreation(v3SegmentDirectoryFile, segmentMetadata, false);
long addedLength = 0L;
try (SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(v3SegmentDirectoryFile, segmentMetadata, ReadMode.mmap);
SegmentDirectory.Reader reader = segmentDirectory.createReader()) {
// 8 bytes overhead is for checking integrity of the segment.
try (PinotDataBuffer col1Buffer = reader.getIndexFor(COLUMN1_NAME, ColumnIndexType.INVERTED_INDEX)) {
addedLength += col1Buffer.size() + 8;
}
try (PinotDataBuffer col13Buffer = reader.getIndexFor(COLUMN13_NAME, ColumnIndexType.INVERTED_INDEX)) {
addedLength += col13Buffer.size() + 8;
}
}
FileTime newLastModifiedTime = Files.getLastModifiedTime(singleFileIndex.toPath());
Assert.assertTrue(newLastModifiedTime.compareTo(lastModifiedTime) > 0);
long newFileSize = singleFileIndex.length();
Assert.assertEquals(fileSize + addedLength, newFileSize);
// Sleep 2 seconds to prevent the same last modified time when modifying the file.
Thread.sleep(2000);
// Create inverted index the second time.
checkInvertedIndexCreation(v3SegmentDirectoryFile, segmentMetadata, true);
Assert.assertEquals(Files.getLastModifiedTime(singleFileIndex.toPath()), newLastModifiedTime);
Assert.assertEquals(singleFileIndex.length(), newFileSize);
}
use of com.linkedin.pinot.core.segment.store.SegmentDirectory in project pinot by linkedin.
the class SegmentFormatConverterV1ToV2 method convert.
@Override
public void convert(File indexSegmentDir) throws Exception {
SegmentMetadataImpl segmentMetadataImpl = new SegmentMetadataImpl(indexSegmentDir);
SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(indexSegmentDir, segmentMetadataImpl, ReadMode.mmap);
Set<String> columns = segmentMetadataImpl.getAllColumns();
SegmentDirectory.Writer segmentWriter = segmentDirectory.createWriter();
for (String column : columns) {
ColumnMetadata columnMetadata = segmentMetadataImpl.getColumnMetadataFor(column);
if (columnMetadata.isSorted()) {
// no need to change sorted forward index
continue;
}
PinotDataBuffer fwdIndexBuffer = segmentWriter.getIndexFor(column, ColumnIndexType.FORWARD_INDEX);
if (columnMetadata.isSingleValue() && !columnMetadata.isSorted()) {
// since we use dictionary to encode values, we wont have any negative values in forward
// index
boolean signed = false;
SingleColumnSingleValueReader v1Reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitSingleValueReader(fwdIndexBuffer, segmentMetadataImpl.getTotalDocs(), columnMetadata.getBitsPerElement(), false);
File convertedFwdIndexFile = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_SV_FWD_IDX_FILE_EXTENTION + ".tmp");
SingleColumnSingleValueWriter v2Writer = new com.linkedin.pinot.core.io.writer.impl.v2.FixedBitSingleValueWriter(convertedFwdIndexFile, segmentMetadataImpl.getTotalDocs(), columnMetadata.getBitsPerElement());
for (int row = 0; row < segmentMetadataImpl.getTotalDocs(); row++) {
int value = v1Reader.getInt(row);
v2Writer.setInt(row, value);
}
v1Reader.close();
v2Writer.close();
File fwdIndexFileCopy = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_SV_FWD_IDX_FILE_EXTENTION + ".orig");
segmentWriter.removeIndex(column, ColumnIndexType.FORWARD_INDEX);
// FIXME
PinotDataBuffer newIndexBuffer = segmentWriter.newIndexFor(column, ColumnIndexType.FORWARD_INDEX, (int) convertedFwdIndexFile.length());
newIndexBuffer.readFrom(convertedFwdIndexFile);
convertedFwdIndexFile.delete();
}
if (!columnMetadata.isSingleValue()) {
// since we use dictionary to encode values, we wont have any negative values in forward
// index
boolean signed = false;
SingleColumnMultiValueReader v1Reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitMultiValueReader(fwdIndexBuffer, segmentMetadataImpl.getTotalDocs(), columnMetadata.getTotalNumberOfEntries(), columnMetadata.getBitsPerElement(), signed);
File convertedFwdIndexFile = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_MV_FWD_IDX_FILE_EXTENTION + ".tmp");
SingleColumnMultiValueWriter v2Writer = new com.linkedin.pinot.core.io.writer.impl.v2.FixedBitMultiValueWriter(convertedFwdIndexFile, segmentMetadataImpl.getTotalDocs(), columnMetadata.getTotalNumberOfEntries(), columnMetadata.getBitsPerElement());
int[] values = new int[columnMetadata.getMaxNumberOfMultiValues()];
for (int row = 0; row < segmentMetadataImpl.getTotalDocs(); row++) {
int length = v1Reader.getIntArray(row, values);
int[] copy = new int[length];
System.arraycopy(values, 0, copy, 0, length);
v2Writer.setIntArray(row, copy);
}
v1Reader.close();
v2Writer.close();
segmentWriter.removeIndex(column, ColumnIndexType.FORWARD_INDEX);
PinotDataBuffer newIndexBuffer = segmentWriter.newIndexFor(column, ColumnIndexType.FORWARD_INDEX, (int) convertedFwdIndexFile.length());
newIndexBuffer.readFrom(convertedFwdIndexFile);
convertedFwdIndexFile.delete();
}
}
File metadataFile = new File(indexSegmentDir, V1Constants.MetadataKeys.METADATA_FILE_NAME);
File metadataFileCopy = new File(indexSegmentDir, V1Constants.MetadataKeys.METADATA_FILE_NAME + ".orig");
bis = new BufferedInputStream(new FileInputStream(metadataFile));
bos = new BufferedOutputStream(new FileOutputStream(metadataFileCopy));
IOUtils.copy(bis, bos);
bis.close();
bos.close();
final PropertiesConfiguration properties = new PropertiesConfiguration(metadataFileCopy);
// update the segment version
properties.setProperty(V1Constants.MetadataKeys.Segment.SEGMENT_VERSION, SegmentVersion.v2.toString());
metadataFile.delete();
properties.save(metadataFile);
}
Aggregations