use of io.questdb.cairo.sql.RecordMetadata in project questdb by bluestreak01.
the class CairoTextWriter method openWriterAndOverrideImportTypes.
private TableWriter openWriterAndOverrideImportTypes(CairoSecurityContext cairoSecurityContext, ObjList<TypeAdapter> detectedTypes) {
TableWriter writer = engine.getWriter(cairoSecurityContext, tableName, WRITER_LOCK_REASON);
RecordMetadata metadata = writer.getMetadata();
if (metadata.getColumnCount() < detectedTypes.size()) {
writer.close();
throw CairoException.instance(0).put("column count mismatch [textColumnCount=").put(detectedTypes.size()).put(", tableColumnCount=").put(metadata.getColumnCount()).put(", table=").put(tableName).put(']');
}
this.types = detectedTypes;
// now overwrite detected types with actual table column types
for (int i = 0, n = this.types.size(); i < n; i++) {
final int columnType = metadata.getColumnType(i);
final TypeAdapter detectedAdapter = this.types.getQuick(i);
final int detectedType = detectedAdapter.getType();
if (detectedType != columnType) {
// use when populating this field
switch(ColumnType.tagOf(columnType)) {
case ColumnType.DATE:
logTypeError(i);
this.types.setQuick(i, BadDateAdapter.INSTANCE);
break;
case ColumnType.TIMESTAMP:
if (detectedAdapter instanceof TimestampCompatibleAdapter) {
this.types.setQuick(i, otherToTimestampAdapterPool.next().of((TimestampCompatibleAdapter) detectedAdapter));
} else {
logTypeError(i);
this.types.setQuick(i, BadTimestampAdapter.INSTANCE);
}
break;
case ColumnType.BINARY:
writer.close();
throw CairoException.instance(0).put("cannot import text into BINARY column [index=").put(i).put(']');
default:
this.types.setQuick(i, typeManager.getTypeAdapter(columnType));
break;
}
}
}
return writer;
}
use of io.questdb.cairo.sql.RecordMetadata in project questdb by bluestreak01.
the class O3PartitionJob method processPartition.
public static void processPartition(CharSequence pathToTable, int partitionBy, ObjList<MemoryMAR> columns, ObjList<MemoryCARW> oooColumns, long srcOooLo, long srcOooHi, long srcOooMax, long o3TimestampMin, long o3TimestampMax, long partitionTimestamp, long maxTimestamp, long srcDataMax, long srcDataTxn, boolean last, long txn, long sortedTimestampsAddr, TableWriter tableWriter, AtomicInteger columnCounter, O3Basket o3Basket, long tmpBuf) {
// is out of order data hitting the last partition?
// if so we do not need to re-open files and write to existing file descriptors
final long o3TimestampLo = getTimestampIndexValue(sortedTimestampsAddr, srcOooLo);
final RecordMetadata metadata = tableWriter.getMetadata();
final int timestampIndex = metadata.getTimestampIndex();
final Path path = Path.getThreadLocal(pathToTable);
TableUtils.setPathForPartition(path, partitionBy, o3TimestampLo, false);
final int pplen = path.length();
TableUtils.txnPartitionConditionally(path, srcDataTxn);
final int plen = path.length();
long srcTimestampFd = 0;
long dataTimestampLo;
long dataTimestampHi;
final FilesFacade ff = tableWriter.getFilesFacade();
if (srcDataMax < 1) {
if (!last) {
try {
LOG.debug().$("would create [path=").$(path.chop$().slash$()).$(']').$();
createDirsOrFail(ff, path, tableWriter.getConfiguration().getMkDirMode());
} catch (Throwable e) {
LOG.error().$("process new partition error [table=").$(tableWriter.getTableName()).$(", e=").$(e).I$();
tableWriter.o3BumpErrorCount();
tableWriter.o3ClockDownPartitionUpdateCount();
tableWriter.o3CountDownDoneLatch();
throw e;
}
}
publishOpenColumnTasks(txn, columns, oooColumns, pathToTable, srcOooLo, srcOooHi, srcOooMax, o3TimestampMin, o3TimestampMax, o3TimestampLo, partitionTimestamp, // below parameters are unused by this type of append
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, srcDataTxn, OPEN_NEW_PARTITION_FOR_APPEND, // timestamp fd
0, 0, 0, timestampIndex, sortedTimestampsAddr, tableWriter, columnCounter, o3Basket, tmpBuf);
} else {
long srcTimestampAddr = 0;
long srcTimestampSize = 0;
int prefixType;
long prefixLo;
long prefixHi;
int mergeType;
long mergeDataLo;
long mergeDataHi;
long mergeO3Lo;
long mergeO3Hi;
int suffixType;
long suffixLo;
long suffixHi;
final int openColumnMode;
try {
// so this check is for matching ceilings
if (last) {
dataTimestampHi = maxTimestamp;
srcTimestampSize = srcDataMax * 8L;
// negative fd indicates descriptor reuse
srcTimestampFd = -columns.getQuick(getPrimaryColumnIndex(timestampIndex)).getFd();
srcTimestampAddr = mapRW(ff, -srcTimestampFd, srcTimestampSize, MemoryTag.MMAP_O3);
} else {
srcTimestampSize = srcDataMax * 8L;
// out of order data is going into archive partition
// we need to read "low" and "high" boundaries of the partition. "low" being oldest timestamp
// and "high" being newest
dFile(path.trimTo(plen), metadata.getColumnName(timestampIndex));
// also track the fd that we need to eventually close
srcTimestampFd = openRW(ff, path, LOG);
srcTimestampAddr = mapRW(ff, srcTimestampFd, srcTimestampSize, MemoryTag.MMAP_O3);
dataTimestampHi = Unsafe.getUnsafe().getLong(srcTimestampAddr + srcTimestampSize - Long.BYTES);
}
dataTimestampLo = Unsafe.getUnsafe().getLong(srcTimestampAddr);
// create copy jobs
// we will have maximum of 3 stages:
// - prefix data
// - merge job
// - suffix data
//
// prefix and suffix can be sourced either from OO fully or from Data (written to disk) fully
// so for prefix and suffix we will need a flag indicating source of the data
// as well as range of rows in that source
prefixType = O3_BLOCK_NONE;
prefixLo = -1;
prefixHi = -1;
mergeType = O3_BLOCK_NONE;
mergeDataLo = -1;
mergeDataHi = -1;
mergeO3Lo = -1;
mergeO3Hi = -1;
suffixType = O3_BLOCK_NONE;
suffixLo = -1;
suffixHi = -1;
assert srcTimestampFd != -1 && srcTimestampFd != 1;
int branch;
if (o3TimestampLo > dataTimestampLo) {
if (o3TimestampLo >= dataTimestampHi) {
// +------+
// | data |
// | |
// +------+
//
// +-----+
// | OOO |
// | |
//
branch = 1;
suffixType = O3_BLOCK_O3;
suffixLo = srcOooLo;
suffixHi = srcOooHi;
} else {
//
// +------+
// | |
// | | +-----+
// | data | | OOO |
// +------+
prefixType = O3_BLOCK_DATA;
prefixLo = 0;
prefixHi = Vect.boundedBinarySearch64Bit(srcTimestampAddr, o3TimestampLo, 0, srcDataMax - 1, BinarySearch.SCAN_DOWN);
mergeDataLo = prefixHi + 1;
mergeO3Lo = srcOooLo;
if (o3TimestampMax < dataTimestampHi) {
//
// | | +-----+
// | data | | OOO |
// | | +-----+
// +------+
branch = 2;
mergeO3Hi = srcOooHi;
mergeDataHi = Vect.boundedBinarySearch64Bit(srcTimestampAddr, o3TimestampMax - 1, mergeDataLo, srcDataMax - 1, BinarySearch.SCAN_DOWN);
if (mergeDataLo > mergeDataHi) {
// the OO data implodes right between rows of existing data
// so we will have both data prefix and suffix and the middle bit
// is the out of order
mergeType = O3_BLOCK_O3;
} else {
mergeType = O3_BLOCK_MERGE;
}
suffixType = O3_BLOCK_DATA;
suffixLo = mergeDataHi + 1;
suffixHi = srcDataMax - 1;
assert suffixLo <= suffixHi;
} else if (o3TimestampMax > dataTimestampHi) {
//
// | | +-----+
// | data | | OOO |
// | | | |
// +------+ | |
// | |
// +-----+
branch = 3;
mergeO3Hi = Vect.boundedBinarySearchIndexT(sortedTimestampsAddr, dataTimestampHi, srcOooLo, srcOooHi, BinarySearch.SCAN_UP);
mergeDataHi = srcDataMax - 1;
mergeType = O3_BLOCK_MERGE;
suffixType = O3_BLOCK_O3;
suffixLo = mergeO3Hi + 1;
suffixHi = srcOooHi;
} else {
//
// | | +-----+
// | data | | OOO |
// | | | |
// +------+ +-----+
//
branch = 4;
mergeType = O3_BLOCK_MERGE;
mergeO3Hi = srcOooHi;
mergeDataHi = srcDataMax - 1;
}
}
} else {
// +-----+
// | OOO |
//
// +------+
// | data |
prefixType = O3_BLOCK_O3;
prefixLo = srcOooLo;
if (dataTimestampLo < o3TimestampMax) {
//
// +------+ | OOO |
// | data | +-----+
// | |
mergeDataLo = 0;
prefixHi = Vect.boundedBinarySearchIndexT(sortedTimestampsAddr, dataTimestampLo, srcOooLo, srcOooHi, BinarySearch.SCAN_DOWN);
mergeO3Lo = prefixHi + 1;
if (o3TimestampMax < dataTimestampHi) {
// | | | |
// | | | OOO |
// | data | +-----+
// | |
// +------+
branch = 5;
mergeType = O3_BLOCK_MERGE;
mergeO3Hi = srcOooHi;
mergeDataHi = Vect.boundedBinarySearch64Bit(srcTimestampAddr, o3TimestampMax, 0, srcDataMax - 1, BinarySearch.SCAN_DOWN);
suffixLo = mergeDataHi + 1;
suffixType = O3_BLOCK_DATA;
suffixHi = srcDataMax - 1;
} else if (o3TimestampMax > dataTimestampHi) {
// | | | |
// | | | OOO |
// | data | | |
// +------+ | |
// +-----+
branch = 6;
mergeDataHi = srcDataMax - 1;
mergeO3Hi = Vect.boundedBinarySearchIndexT(sortedTimestampsAddr, dataTimestampHi - 1, mergeO3Lo, srcOooHi, BinarySearch.SCAN_DOWN);
if (mergeO3Lo > mergeO3Hi) {
mergeType = O3_BLOCK_DATA;
} else {
mergeType = O3_BLOCK_MERGE;
}
if (mergeO3Hi < srcOooHi) {
suffixLo = mergeO3Hi + 1;
suffixType = O3_BLOCK_O3;
suffixHi = Math.max(suffixLo, srcOooHi);
}
} else {
// | | | |
// | | | OOO |
// | data | | |
// +------+ +-----+
branch = 7;
mergeType = O3_BLOCK_MERGE;
mergeO3Hi = srcOooHi;
mergeDataHi = srcDataMax - 1;
}
} else {
// +-----+
// | OOO |
// +-----+
//
// +------+
// | data |
//
branch = 8;
prefixHi = srcOooHi;
suffixType = O3_BLOCK_DATA;
suffixLo = 0;
suffixHi = srcDataMax - 1;
}
}
LOG.debug().$("o3 merge [branch=").$(branch).$(", prefixType=").$(prefixType).$(", prefixLo=").$(prefixLo).$(", prefixHi=").$(prefixHi).$(", o3TimestampLo=").$ts(o3TimestampLo).$(", o3TimestampMin=").$ts(o3TimestampMin).$(", o3TimestampMax=").$ts(o3TimestampMax).$(", dataTimestampLo=").$ts(dataTimestampLo).$(", dataTimestampHi=").$ts(dataTimestampHi).$(", partitionTimestamp=").$ts(partitionTimestamp).$(", srcDataMax=").$(srcDataMax).$(", mergeType=").$(mergeType).$(", mergeDataLo=").$(mergeDataLo).$(", mergeDataHi=").$(mergeDataHi).$(", mergeO3Lo=").$(mergeO3Lo).$(", mergeO3Hi=").$(mergeO3Hi).$(", suffixType=").$(suffixType).$(", suffixLo=").$(suffixLo).$(", suffixHi=").$(suffixHi).$(", table=").$(pathToTable).I$();
if (prefixType == O3_BLOCK_NONE) {
// We do not need to create a copy of partition when we simply need to append
// existing the one.
openColumnMode = OPEN_MID_PARTITION_FOR_APPEND;
} else {
txnPartition(path.trimTo(pplen), txn);
createDirsOrFail(ff, path.slash$(), tableWriter.getConfiguration().getMkDirMode());
if (last) {
openColumnMode = OPEN_LAST_PARTITION_FOR_MERGE;
} else {
openColumnMode = OPEN_MID_PARTITION_FOR_MERGE;
}
}
} catch (Throwable e) {
LOG.error().$("process existing partition error [table=").$(tableWriter.getTableName()).$(", e=").$(e).I$();
O3Utils.unmap(ff, srcTimestampAddr, srcTimestampSize);
O3Utils.close(ff, srcTimestampFd);
tableWriter.o3BumpErrorCount();
tableWriter.o3ClockDownPartitionUpdateCount();
tableWriter.o3CountDownDoneLatch();
throw e;
}
// Compute max timestamp as maximum of out of order data and
// data in existing partition.
// When partition is new, the data timestamp is MIN_LONG
final long timestampMax = Math.max(o3TimestampMax, dataTimestampHi);
publishOpenColumnTasks(txn, columns, oooColumns, pathToTable, srcOooLo, srcOooHi, srcOooMax, o3TimestampMin, // <-- this is max of OOO and data chunk
timestampMax, o3TimestampLo, partitionTimestamp, prefixType, prefixLo, prefixHi, mergeType, mergeDataLo, mergeDataHi, mergeO3Lo, mergeO3Hi, suffixType, suffixLo, suffixHi, srcDataMax, srcDataTxn, openColumnMode, srcTimestampFd, srcTimestampAddr, srcTimestampSize, timestampIndex, sortedTimestampsAddr, tableWriter, columnCounter, o3Basket, tmpBuf);
}
}
use of io.questdb.cairo.sql.RecordMetadata in project questdb by bluestreak01.
the class O3PartitionJob method publishOpenColumnTasks.
private static void publishOpenColumnTasks(long txn, ObjList<MemoryMAR> columns, ObjList<MemoryCARW> oooColumns, CharSequence pathToTable, long srcOooLo, long srcOooHi, long srcOooMax, long oooTimestampMin, long oooTimestampMax, long oooTimestampLo, long partitionTimestamp, int prefixType, long prefixLo, long prefixHi, int mergeType, long mergeDataLo, long mergeDataHi, long mergeOOOLo, long mergeOOOHi, int suffixType, long suffixLo, long suffixHi, long srcDataMax, long srcDataTxn, int openColumnMode, long srcTimestampFd, long srcTimestampAddr, long srcTimestampSize, int timestampIndex, long sortedTimestampsAddr, TableWriter tableWriter, AtomicInteger columnCounter, O3Basket o3Basket, long tmpBuf) {
LOG.debug().$("partition [ts=").$ts(oooTimestampLo).$(']').$();
final long timestampMergeIndexAddr;
if (mergeType == O3_BLOCK_MERGE) {
timestampMergeIndexAddr = createMergeIndex(srcTimestampAddr, sortedTimestampsAddr, mergeDataLo, mergeDataHi, mergeOOOLo, mergeOOOHi);
} else {
timestampMergeIndexAddr = 0;
}
final RecordMetadata metadata = tableWriter.getMetadata();
final int columnCount = metadata.getColumnCount();
columnCounter.set(columnCount);
int columnsInFlight = columnCount;
try {
for (int i = 0; i < columnCount; i++) {
final int colOffset = TableWriter.getPrimaryColumnIndex(i);
final boolean notTheTimestamp = i != timestampIndex;
final int columnType = metadata.getColumnType(i);
final MemoryARW oooMem1 = oooColumns.getQuick(colOffset);
final MemoryARW oooMem2 = oooColumns.getQuick(colOffset + 1);
final MemoryMA mem1 = columns.getQuick(colOffset);
final MemoryMA mem2 = columns.getQuick(colOffset + 1);
final long activeFixFd;
final long activeVarFd;
final long srcDataTop;
final long srcOooFixAddr;
final long srcOooVarAddr;
if (!ColumnType.isVariableLength(columnType)) {
activeFixFd = mem1.getFd();
activeVarFd = 0;
srcOooFixAddr = oooMem1.addressOf(0);
srcOooVarAddr = 0;
} else {
activeFixFd = mem2.getFd();
activeVarFd = mem1.getFd();
srcOooFixAddr = oooMem2.addressOf(0);
srcOooVarAddr = oooMem1.addressOf(0);
}
final CharSequence columnName = metadata.getColumnName(i);
final boolean isIndexed = metadata.isColumnIndexed(i);
if (openColumnMode == OPEN_LAST_PARTITION_FOR_APPEND || openColumnMode == OPEN_LAST_PARTITION_FOR_MERGE) {
srcDataTop = tableWriter.getColumnTop(i);
} else {
// column open job will have to find out if top exists and its value
srcDataTop = -1;
}
final BitmapIndexWriter indexWriter;
if (isIndexed) {
indexWriter = o3Basket.nextIndexer();
} else {
indexWriter = null;
}
try {
final long cursor = tableWriter.getO3OpenColumnPubSeq().next();
if (cursor > -1) {
publishOpenColumnTaskHarmonized(cursor, openColumnMode, pathToTable, columnName, columnCounter, o3Basket.nextPartCounter(), notTheTimestamp ? columnType : ColumnType.setDesignatedTimestampBit(columnType, true), timestampMergeIndexAddr, srcOooFixAddr, srcOooVarAddr, srcOooLo, srcOooHi, srcOooMax, oooTimestampMin, oooTimestampMax, oooTimestampLo, partitionTimestamp, srcDataTop, srcDataMax, srcDataTxn, txn, prefixType, prefixLo, prefixHi, mergeType, mergeDataLo, mergeDataHi, mergeOOOLo, mergeOOOHi, suffixType, suffixLo, suffixHi, isIndexed, srcTimestampFd, srcTimestampAddr, srcTimestampSize, activeFixFd, activeVarFd, tableWriter, indexWriter);
} else {
publishOpenColumnTaskContended(tmpBuf, cursor, openColumnMode, pathToTable, columnName, columnCounter, o3Basket.nextPartCounter(), notTheTimestamp ? columnType : ColumnType.setDesignatedTimestampBit(columnType, true), timestampMergeIndexAddr, srcOooFixAddr, srcOooVarAddr, srcOooLo, srcOooHi, srcOooMax, oooTimestampMin, oooTimestampMax, oooTimestampLo, partitionTimestamp, srcDataTop, srcDataMax, srcDataTxn, txn, prefixType, prefixLo, prefixHi, mergeType, mergeDataLo, mergeDataHi, mergeOOOLo, mergeOOOHi, suffixType, suffixLo, suffixHi, srcTimestampFd, srcTimestampAddr, srcTimestampSize, isIndexed, activeFixFd, activeVarFd, tableWriter, indexWriter);
}
} catch (Throwable e) {
tableWriter.o3BumpErrorCount();
LOG.error().$("open column error [table=").$(tableWriter.getTableName()).$(", e=").$(e).I$();
columnsInFlight = i + 1;
throw e;
}
}
} finally {
final int delta = columnsInFlight - columnCount;
LOG.debug().$("idle [delta=").$(delta).$(']').$();
if (delta < 0 && columnCounter.addAndGet(delta) == 0) {
O3CopyJob.closeColumnIdleQuick(timestampMergeIndexAddr, srcTimestampFd, srcTimestampAddr, srcTimestampSize, tableWriter);
}
}
}
use of io.questdb.cairo.sql.RecordMetadata in project questdb by bluestreak01.
the class O3CommitLagTest method insertUncommitted.
private void insertUncommitted(SqlCompiler compiler, SqlExecutionContext sqlExecutionContext, String sql, TableWriter writer) throws SqlException {
minTimestamp = Long.MAX_VALUE;
maxTimestamp = Long.MIN_VALUE;
try (RecordCursorFactory factory = compiler.compile(sql, sqlExecutionContext).getRecordCursorFactory()) {
RecordMetadata metadata = factory.getMetadata();
int timestampIndex = writer.getMetadata().getTimestampIndex();
EntityColumnFilter toColumnFilter = new EntityColumnFilter();
toColumnFilter.of(metadata.getColumnCount());
if (null == copier) {
copier = SqlCompiler.assembleRecordToRowCopier(new BytecodeAssembler(), metadata, writer.getMetadata(), toColumnFilter);
}
try (RecordCursor cursor = factory.getCursor(sqlExecutionContext)) {
final Record record = cursor.getRecord();
while (cursor.hasNext()) {
long timestamp = record.getTimestamp(timestampIndex);
if (timestamp > maxTimestamp) {
maxTimestamp = timestamp;
}
if (timestamp < minTimestamp) {
minTimestamp = timestamp;
}
Row row = writer.newRow(timestamp);
copier.copy(record, row);
row.append();
}
}
}
}
use of io.questdb.cairo.sql.RecordMetadata in project questdb by bluestreak01.
the class DataFrameRecordCursorFactoryTest method testFactory.
@Test
public void testFactory() throws Exception {
TestUtils.assertMemoryLeak(() -> {
final int N = 100;
// separate two symbol columns with primitive. It will make problems apparent if index does not shift correctly
try (TableModel model = new TableModel(configuration, "x", PartitionBy.DAY).col("a", ColumnType.STRING).col("b", ColumnType.SYMBOL).indexed(true, N / 4).col("i", ColumnType.INT).col("c", ColumnType.SYMBOL).indexed(true, N / 4).timestamp()) {
CairoTestUtils.create(model);
}
final Rnd rnd = new Rnd();
final String[] symbols = new String[N];
final int M = 1000;
final long increment = 1000000 * 60L * 4;
for (int i = 0; i < N; i++) {
symbols[i] = rnd.nextChars(8).toString();
}
rnd.reset();
// prepare the data
long timestamp = 0;
try (TableWriter writer = new TableWriter(configuration, "x")) {
for (int i = 0; i < M; i++) {
TableWriter.Row row = writer.newRow(timestamp += increment);
row.putStr(0, rnd.nextChars(20));
row.putSym(1, symbols[rnd.nextPositiveInt() % N]);
row.putInt(2, rnd.nextInt());
row.putSym(3, symbols[rnd.nextPositiveInt() % N]);
row.append();
}
writer.commit();
}
try (CairoEngine engine = new CairoEngine(configuration)) {
String value = symbols[N - 10];
int columnIndex;
int symbolKey;
RecordMetadata metadata;
try (TableReader reader = engine.getReader(AllowAllCairoSecurityContext.INSTANCE, "x", TableUtils.ANY_TABLE_ID, TableUtils.ANY_TABLE_VERSION)) {
columnIndex = reader.getMetadata().getColumnIndexQuiet("b");
symbolKey = reader.getSymbolMapReader(columnIndex).keyOf(value);
metadata = GenericRecordMetadata.copyOf(reader.getMetadata());
}
SymbolIndexRowCursorFactory symbolIndexRowCursorFactory = new SymbolIndexRowCursorFactory(columnIndex, symbolKey, true, BitmapIndexReader.DIR_FORWARD, null);
FullFwdDataFrameCursorFactory dataFrameFactory = new FullFwdDataFrameCursorFactory(engine, "x", TableUtils.ANY_TABLE_ID, TableUtils.ANY_TABLE_VERSION);
// entity index
final IntList columnIndexes = new IntList();
for (int i = 0, n = metadata.getColumnCount(); i < n; i++) {
columnIndexes.add(i);
}
DataFrameRecordCursorFactory factory = new DataFrameRecordCursorFactory(metadata, dataFrameFactory, symbolIndexRowCursorFactory, false, null, false, columnIndexes, null);
SqlExecutionContext sqlExecutionContext = new SqlExecutionContextImpl(engine, 1).with(AllowAllCairoSecurityContext.INSTANCE, null, null, -1, null);
try (RecordCursor cursor = factory.getCursor(sqlExecutionContext)) {
Record record = cursor.getRecord();
while (cursor.hasNext()) {
TestUtils.assertEquals(value, record.getSym(1));
}
}
}
});
}
Aggregations