use of io.questdb.std.str.Path in project questdb by bluestreak01.
the class ContinuousMemoryMTest method withMem.
private void withMem(long appendSz, long sz, MemTestCode code) throws Exception {
assertMemoryLeak(() -> {
final Path path = Path.getThreadLocal(root).concat("t.d").$();
rnd.reset();
try (MemoryCMARW rwMem = Vm.getCMARWInstance(FilesFacadeImpl.INSTANCE, path, appendSz, -1, MemoryTag.MMAP_DEFAULT);
MemoryCMR roMem = new MemoryCMRImpl(FilesFacadeImpl.INSTANCE, path, sz, MemoryTag.MMAP_DEFAULT)) {
code.run(rwMem, roMem);
} finally {
Path.clearThreadLocals();
}
});
}
use of io.questdb.std.str.Path in project questdb by bluestreak01.
the class JsonLexerTest method testParseLargeFile.
@Test
public void testParseLargeFile() throws Exception {
String path = JsonLexerTest.class.getResource("/json/test.json").getPath();
try (Path p = new Path()) {
if (Os.type == Os.WINDOWS && path.startsWith("/")) {
p.of(path.substring(1));
} else {
p.of(path);
}
long l = Files.length(p.$());
long fd = Files.openRO(p);
JsonParser listener = new NoOpParser();
try {
long buf = Unsafe.malloc(l, MemoryTag.NATIVE_DEFAULT);
long bufA = Unsafe.malloc(l, MemoryTag.NATIVE_DEFAULT);
long bufB = Unsafe.malloc(l, MemoryTag.NATIVE_DEFAULT);
try {
Assert.assertEquals(l, Files.read(fd, buf, (int) l, 0));
for (int i = 0; i < l; i++) {
try {
LEXER.clear();
Unsafe.getUnsafe().copyMemory(buf, bufA, i);
Unsafe.getUnsafe().copyMemory(buf + i, bufB, l - i);
LEXER.parse(bufA, bufA + i, listener);
LEXER.parse(bufB, bufB + l - i, listener);
LEXER.parseLast();
} catch (JsonException e) {
System.out.println(i);
throw e;
}
}
} finally {
Unsafe.free(buf, l, MemoryTag.NATIVE_DEFAULT);
Unsafe.free(bufA, l, MemoryTag.NATIVE_DEFAULT);
Unsafe.free(bufB, l, MemoryTag.NATIVE_DEFAULT);
}
} finally {
Files.close(fd);
}
}
}
use of io.questdb.std.str.Path in project questdb by bluestreak01.
the class LogFactoryTest method testRollingFileWriterBySize.
@Test
public void testRollingFileWriterBySize() throws Exception {
String base = temp.getRoot().getAbsolutePath() + Files.SEPARATOR;
String logFile = base + "mylog-${date:yyyy-MM-dd}.log";
String expectedLogFile = base + "mylog-2015-05-03.log";
final MicrosecondClock clock = new TestMicrosecondClock(TimestampFormatUtils.parseTimestamp("2015-05-03T10:35:00.000Z"), 1);
try (Path path = new Path()) {
// create rogue file that would be in a way of logger rolling existing files
path.of(base);
Assert.assertTrue(Files.touch(path.concat("mylog-2015-05-03.log.2").$()));
}
RingQueue<LogRecordSink> queue = new RingQueue<>(LogRecordSink::new, 1024, 1024, MemoryTag.NATIVE_DEFAULT);
SPSequence pubSeq = new SPSequence(queue.getCycle());
SCSequence subSeq = new SCSequence();
pubSeq.then(subSeq).then(pubSeq);
try (final LogRollingFileWriter writer = new LogRollingFileWriter(FilesFacadeImpl.INSTANCE, clock, queue, subSeq, LogLevel.LOG_LEVEL_INFO)) {
writer.setLocation(logFile);
writer.setRollSize("1m");
writer.setBufferSize("64k");
writer.bindProperties();
AtomicBoolean running = new AtomicBoolean(true);
SOCountDownLatch halted = new SOCountDownLatch();
halted.setCount(1);
new Thread(() -> {
while (running.get()) {
writer.runSerially();
}
// noinspection StatementWithEmptyBody
while (writer.runSerially()) ;
halted.countDown();
}).start();
// now publish
int published = 0;
int toPublish = 100_000;
while (published < toPublish) {
long cursor = pubSeq.next();
if (cursor < 0) {
LockSupport.parkNanos(1);
continue;
}
final long available = pubSeq.available();
while (cursor < available && published < toPublish) {
LogRecordSink sink = queue.get(cursor++);
sink.setLevel(LogLevel.LOG_LEVEL_INFO);
sink.put("test");
published++;
}
pubSeq.done(cursor - 1);
}
running.set(false);
halted.await();
}
assertFileLength(expectedLogFile);
assertFileLength(expectedLogFile + ".1");
}
use of io.questdb.std.str.Path in project questdb by bluestreak01.
the class TableWriter method repairDataGaps.
private long repairDataGaps(final long timestamp) {
if (txWriter.getMaxTimestamp() != Numbers.LONG_NaN && partitionBy != PartitionBy.NONE) {
long fixedRowCount = 0;
long lastTimestamp = -1;
long transientRowCount = this.txWriter.getTransientRowCount();
long maxTimestamp = this.txWriter.getMaxTimestamp();
try {
final long tsLimit = timestampFloorMethod.floor(this.txWriter.getMaxTimestamp());
for (long ts = getPartitionLo(txWriter.getMinTimestamp()); ts < tsLimit; ts = timestampAddMethod.calculate(ts, 1)) {
path.trimTo(rootLen);
setStateForTimestamp(path, ts, false);
int p = path.length();
long partitionSize = txWriter.getPartitionSizeByPartitionTimestamp(ts);
if (partitionSize >= 0 && ff.exists(path.$())) {
fixedRowCount += partitionSize;
lastTimestamp = ts;
} else {
Path other = Path.getThreadLocal2(path.trimTo(p).$());
TableUtils.oldPartitionName(other, getTxn());
if (ff.exists(other.$())) {
if (!ff.rename(other, path)) {
LOG.error().$("could not rename [from=").$(other).$(", to=").$(path).$(']').$();
throw new CairoError("could not restore directory, see log for details");
} else {
LOG.info().$("restored [path=").$(path).$(']').$();
}
} else {
LOG.debug().$("missing partition [name=").$(path.trimTo(p).$()).$(']').$();
}
}
}
if (lastTimestamp > -1) {
path.trimTo(rootLen);
setStateForTimestamp(path, tsLimit, false);
if (!ff.exists(path.$())) {
Path other = Path.getThreadLocal2(path);
TableUtils.oldPartitionName(other, getTxn());
if (ff.exists(other.$())) {
if (!ff.rename(other, path)) {
LOG.error().$("could not rename [from=").$(other).$(", to=").$(path).$(']').$();
throw new CairoError("could not restore directory, see log for details");
} else {
LOG.info().$("restored [path=").$(path).$(']').$();
}
} else {
LOG.error().$("last partition does not exist [name=").$(path).$(']').$();
// ok, create last partition we discovered the active
// 1. read its size
path.trimTo(rootLen);
setStateForTimestamp(path, lastTimestamp, false);
int p = path.length();
transientRowCount = txWriter.getPartitionSizeByPartitionTimestamp(lastTimestamp);
// 2. read max timestamp
TableUtils.dFile(path.trimTo(p), metadata.getColumnName(metadata.getTimestampIndex()));
maxTimestamp = TableUtils.readLongAtOffset(ff, path, tempMem16b, (transientRowCount - 1) * Long.BYTES);
fixedRowCount -= transientRowCount;
txWriter.removeAttachedPartitions(txWriter.getMaxTimestamp());
LOG.info().$("updated active partition [name=").$(path.trimTo(p).$()).$(", maxTimestamp=").$ts(maxTimestamp).$(", transientRowCount=").$(transientRowCount).$(", fixedRowCount=").$(txWriter.getFixedRowCount()).$(']').$();
}
}
}
} finally {
path.trimTo(rootLen);
}
final long expectedSize = txWriter.unsafeReadFixedRowCount();
if (expectedSize != fixedRowCount || maxTimestamp != this.txWriter.getMaxTimestamp()) {
LOG.info().$("actual table size has been adjusted [name=`").utf8(tableName).$('`').$(", expectedFixedSize=").$(expectedSize).$(", actualFixedSize=").$(fixedRowCount).$(']').$();
txWriter.reset(fixedRowCount, transientRowCount, maxTimestamp);
return maxTimestamp;
}
}
return timestamp;
}
use of io.questdb.std.str.Path in project questdb by bluestreak01.
the class TableWriter method o3Commit.
/**
* Commits O3 data. Lag is optional. When 0 is specified the entire O3 segment is committed.
*
* @param lag interval in microseconds that determines the length of O3 segment that is not going to be
* committed to disk. The interval starts at max timestamp of O3 segment and ends <i>lag</i>
* microseconds before this timestamp.
* @return <i>true</i> when commit has is a NOOP, e.g. no data has been committed to disk. <i>false</i> otherwise.
*/
private boolean o3Commit(long lag) {
o3RowCount = getO3RowCount0();
o3PartitionRemoveCandidates.clear();
o3ErrorCount.set(0);
o3ColumnCounters.clear();
o3BasketPool.clear();
long o3LagRowCount = 0;
long maxUncommittedRows = metadata.getMaxUncommittedRows();
final int timestampIndex = metadata.getTimestampIndex();
this.lastPartitionTimestamp = timestampFloorMethod.floor(partitionTimestampHi);
long activePartitionTimestampCeil = timestampCeilMethod.ceil(partitionTimestampHi);
try {
o3RowCount += o3MoveUncommitted(timestampIndex);
final long transientRowCount = txWriter.transientRowCount;
// we may need to re-use file descriptors when this partition is the "current" one
// we cannot open file again due to sharing violation
//
// to determine that 'ooTimestampLo' goes into current partition
// we need to compare 'partitionTimestampHi', which is appropriately truncated to DAY/MONTH/YEAR
// to this.maxTimestamp, which isn't truncated yet. So we need to truncate it first
LOG.info().$("sorting o3 [table=").$(tableName).$(']').$();
final long sortedTimestampsAddr = o3TimestampMem.getAddress();
// ensure there is enough size
if (o3RowCount > 600 || !o3QuickSortEnabled) {
o3TimestampMemCpy.jumpTo(o3TimestampMem.getAppendOffset());
Vect.radixSortLongIndexAscInPlace(sortedTimestampsAddr, o3RowCount, o3TimestampMemCpy.addressOf(0));
} else {
Vect.quickSortLongIndexAscInPlace(sortedTimestampsAddr, o3RowCount);
}
// we have three frames:
// partition logical "lo" and "hi" - absolute bounds (partitionLo, partitionHi)
// partition actual data "lo" and "hi" (dataLo, dataHi)
// out of order "lo" and "hi" (indexLo, indexHi)
long srcOooMax;
final long o3TimestampMin = getTimestampIndexValue(sortedTimestampsAddr, 0);
if (o3TimestampMin < Timestamps.O3_MIN_TS) {
o3InError = true;
throw CairoException.instance(0).put("timestamps before 1970-01-01 are not allowed for O3");
}
long o3TimestampMax = getTimestampIndexValue(sortedTimestampsAddr, o3RowCount - 1);
if (o3TimestampMax < Timestamps.O3_MIN_TS) {
o3InError = true;
throw CairoException.instance(0).put("timestamps before 1970-01-01 are not allowed for O3");
}
// Safe check of the sort. No known way to reproduce
assert o3TimestampMin <= o3TimestampMax;
if (lag > 0) {
long lagThresholdTimestamp = o3TimestampMax - lag;
if (lagThresholdTimestamp >= o3TimestampMin) {
final long lagThresholdRow = Vect.boundedBinarySearchIndexT(sortedTimestampsAddr, lagThresholdTimestamp, 0, o3RowCount - 1, BinarySearch.SCAN_DOWN);
o3LagRowCount = o3RowCount - lagThresholdRow - 1;
if (o3LagRowCount > maxUncommittedRows) {
o3LagRowCount = maxUncommittedRows;
srcOooMax = o3RowCount - maxUncommittedRows;
} else {
srcOooMax = lagThresholdRow + 1;
}
} else {
o3LagRowCount = o3RowCount;
// commit half of the lag.
if (o3LagRowCount > maxUncommittedRows) {
o3LagRowCount = maxUncommittedRows / 2;
srcOooMax = o3RowCount - o3LagRowCount;
} else {
srcOooMax = 0;
}
}
LOG.debug().$("o3 commit lag [table=").$(tableName).$(", lag=").$(lag).$(", maxUncommittedRows=").$(maxUncommittedRows).$(", o3max=").$ts(o3TimestampMax).$(", lagThresholdTimestamp=").$ts(lagThresholdTimestamp).$(", o3LagRowCount=").$(o3LagRowCount).$(", srcOooMax=").$(srcOooMax).$(", o3RowCount=").$(o3RowCount).I$();
} else {
LOG.debug().$("o3 commit no lag [table=").$(tableName).$(", o3RowCount=").$(o3RowCount).I$();
srcOooMax = o3RowCount;
}
if (srcOooMax == 0) {
return true;
}
// we could have moved the "srcOooMax" and hence we re-read the max timestamp
o3TimestampMax = getTimestampIndexValue(sortedTimestampsAddr, srcOooMax - 1);
// move uncommitted is liable to change max timestamp
// however we need to identify last partition before max timestamp skips to NULL for example
final long maxTimestamp = txWriter.getMaxTimestamp();
// we are going to use this soon to avoid double-copying lag data
// final boolean yep = isAppendLastPartitionOnly(sortedTimestampsAddr, o3TimestampMax);
// reshuffle all columns according to timestamp index
o3Sort(sortedTimestampsAddr, timestampIndex, o3RowCount);
LOG.info().$("sorted [table=").utf8(tableName).I$();
this.o3DoneLatch.reset();
this.o3PartitionUpdRemaining.set(0);
boolean success = true;
int latchCount = 0;
long srcOoo = 0;
boolean flattenTimestamp = true;
int pCount = 0;
try {
// We do not know upfront which partition is going to be last because this is
// a single pass over the data. Instead, we will update transient row count in a rolling
// manner, assuming the partition marked "last" is the last and then for a new partition
// we move prevTransientRowCount into the "fixedRowCount" sum and set new value on the
// transientRowCount
long prevTransientRowCount = transientRowCount;
while (srcOoo < srcOooMax) {
try {
final long srcOooLo = srcOoo;
final long o3Timestamp = getTimestampIndexValue(sortedTimestampsAddr, srcOoo);
final long srcOooHi;
// keep ceil inclusive in the interval
final long srcOooTimestampCeil = timestampCeilMethod.ceil(o3Timestamp) - 1;
if (srcOooTimestampCeil < o3TimestampMax) {
srcOooHi = Vect.boundedBinarySearchIndexT(sortedTimestampsAddr, srcOooTimestampCeil, srcOoo, srcOooMax - 1, BinarySearch.SCAN_DOWN);
} else {
srcOooHi = srcOooMax - 1;
}
final long partitionTimestamp = timestampFloorMethod.floor(o3Timestamp);
final boolean last = partitionTimestamp == lastPartitionTimestamp;
srcOoo = srcOooHi + 1;
final long srcDataMax;
final long srcNameTxn;
final int partitionIndex = txWriter.findAttachedPartitionIndexByLoTimestamp(partitionTimestamp);
if (partitionIndex > -1) {
if (last) {
srcDataMax = transientRowCount;
} else {
srcDataMax = getPartitionSizeByIndex(partitionIndex);
}
srcNameTxn = getPartitionNameTxnByIndex(partitionIndex);
} else {
srcDataMax = 0;
srcNameTxn = -1;
}
final boolean append = last && (srcDataMax == 0 || o3Timestamp >= maxTimestamp);
final long partitionSize = srcDataMax + srcOooHi - srcOooLo + 1;
LOG.debug().$("o3 partition task [table=").$(tableName).$(", srcOooLo=").$(srcOooLo).$(", srcOooHi=").$(srcOooHi).$(", srcOooMax=").$(srcOooMax).$(", o3TimestampMin=").$ts(o3TimestampMin).$(", o3Timestamp=").$ts(o3Timestamp).$(", o3TimestampMax=").$ts(o3TimestampMax).$(", partitionTimestamp=").$ts(partitionTimestamp).$(", partitionIndex=").$(partitionIndex).$(", srcDataMax=").$(srcDataMax).$(", maxTimestamp=").$ts(maxTimestamp).$(", last=").$(last).$(", partitionSize=").$(partitionSize).$(", append=").$(append).$(", memUsed=").$(Unsafe.getMemUsed()).I$();
if (partitionTimestamp < lastPartitionTimestamp) {
// increment fixedRowCount by number of rows old partition incremented
this.txWriter.fixedRowCount += partitionSize - srcDataMax;
} else if (partitionTimestamp == lastPartitionTimestamp) {
// this is existing "last" partition, we can set the size directly
prevTransientRowCount = partitionSize;
} else {
// this is potentially a new last partition
this.txWriter.fixedRowCount += prevTransientRowCount;
prevTransientRowCount = partitionSize;
}
pCount++;
o3PartitionUpdRemaining.incrementAndGet();
final O3Basket o3Basket = o3BasketPool.next();
o3Basket.ensureCapacity(columnCount, indexCount);
AtomicInteger columnCounter = o3ColumnCounters.next();
// async partition processing set this counter to the column count
// and then manages issues if publishing of column tasks fails
// mid-column-count.
latchCount++;
if (append) {
// counted down correctly
try {
setAppendPosition(srcDataMax, false);
} catch (Throwable e) {
o3BumpErrorCount();
o3ClockDownPartitionUpdateCount();
o3CountDownDoneLatch();
throw e;
}
columnCounter.set(columnCount);
Path pathToPartition = Path.getThreadLocal(this.path);
TableUtils.setPathForPartition(pathToPartition, partitionBy, o3TimestampMin, false);
TableUtils.txnPartitionConditionally(pathToPartition, srcNameTxn);
final int plen = pathToPartition.length();
int columnsPublished = 0;
for (int i = 0; i < columnCount; i++) {
final int colOffset = TableWriter.getPrimaryColumnIndex(i);
final boolean notTheTimestamp = i != timestampIndex;
final int columnType = metadata.getColumnType(i);
final CharSequence columnName = metadata.getColumnName(i);
final boolean isIndexed = metadata.isColumnIndexed(i);
final BitmapIndexWriter indexWriter = isIndexed ? getBitmapIndexWriter(i) : null;
final MemoryARW oooMem1 = o3Columns.getQuick(colOffset);
final MemoryARW oooMem2 = o3Columns.getQuick(colOffset + 1);
final MemoryMAR mem1 = columns.getQuick(colOffset);
final MemoryMAR mem2 = columns.getQuick(colOffset + 1);
final long srcDataTop = getColumnTop(i);
final long srcOooFixAddr;
final long srcOooVarAddr;
final MemoryMAR dstFixMem;
final MemoryMAR dstVarMem;
if (!ColumnType.isVariableLength(columnType)) {
srcOooFixAddr = oooMem1.addressOf(0);
srcOooVarAddr = 0;
dstFixMem = mem1;
dstVarMem = null;
} else {
srcOooFixAddr = oooMem2.addressOf(0);
srcOooVarAddr = oooMem1.addressOf(0);
dstFixMem = mem2;
dstVarMem = mem1;
}
columnsPublished++;
try {
O3OpenColumnJob.appendLastPartition(pathToPartition, plen, columnName, columnCounter, notTheTimestamp ? columnType : ColumnType.setDesignatedTimestampBit(columnType, true), srcOooFixAddr, srcOooVarAddr, srcOooLo, srcOooHi, srcOooMax, o3TimestampMin, o3TimestampMax, partitionTimestamp, srcDataTop, srcDataMax, isIndexed, dstFixMem, dstVarMem, this, indexWriter);
} catch (Throwable e) {
if (columnCounter.addAndGet(columnsPublished - columnCount) == 0) {
o3ClockDownPartitionUpdateCount();
o3CountDownDoneLatch();
}
throw e;
}
}
} else {
if (flattenTimestamp) {
Vect.flattenIndex(sortedTimestampsAddr, o3RowCount);
flattenTimestamp = false;
}
o3CommitPartitionAsync(columnCounter, maxTimestamp, sortedTimestampsAddr, srcOooMax, o3TimestampMin, o3TimestampMax, srcOooLo, srcOooHi, partitionTimestamp, last, srcDataMax, srcNameTxn, o3Basket);
}
} catch (CairoException | CairoError e) {
LOG.error().$((Sinkable) e).$();
success = false;
throw e;
}
}
// at this point we should know the last partition row count
this.txWriter.transientRowCount = prevTransientRowCount;
this.partitionTimestampHi = Math.max(this.partitionTimestampHi, o3TimestampMax);
this.txWriter.updateMaxTimestamp(Math.max(txWriter.getMaxTimestamp(), o3TimestampMax));
} finally {
// we are stealing work here it is possible we get exception from this method
LOG.debug().$("o3 expecting updates [table=").$(tableName).$(", partitionsPublished=").$(pCount).I$();
o3ConsumePartitionUpdates(srcOooMax, o3TimestampMin, o3TimestampMax);
o3DoneLatch.await(latchCount);
o3InError = !success || o3ErrorCount.get() > 0;
if (success && o3ErrorCount.get() > 0) {
// noinspection ThrowFromFinallyBlock
throw CairoException.instance(0).put("bulk update failed and will be rolled back");
}
}
if (o3LagRowCount > 0) {
o3ShiftLagRowsUp(timestampIndex, o3LagRowCount, srcOooMax);
}
} finally {
if (denseIndexers.size() == 0) {
populateDenseIndexerList();
}
path.trimTo(rootLen);
// Alright, we finished updating partitions. Now we need to get this writer instance into
// a consistent state.
//
// We start with ensuring append memory is in ready-to-use state. When max timestamp changes we need to
// move append memory to new set of files. Otherwise, we stay on the same set but advance to append position.
avoidIndexOnCommit = o3ErrorCount.get() == 0;
if (o3LagRowCount == 0) {
clearO3();
LOG.debug().$("lag segment is empty").$();
} else {
// adjust O3 master ref so that virtual row count becomes equal to value of "o3LagRowCount"
this.o3MasterRef = this.masterRef - o3LagRowCount * 2 + 1;
LOG.debug().$("adjusted [o3RowCount=").$(getO3RowCount0()).I$();
}
}
if (!columns.getQuick(0).isOpen() || partitionTimestampHi > activePartitionTimestampCeil) {
openPartition(txWriter.getMaxTimestamp());
}
// will attempt to mmap new page and fail... Then we can remove the 'true' parameter
try {
setAppendPosition(txWriter.getTransientRowCount(), true);
} catch (Throwable e) {
LOG.error().$("data is committed but writer failed to update its state `").$(e).$('`').$();
distressed = true;
throw e;
}
return false;
}
Aggregations