Search in sources :

Example 51 with Path

use of io.questdb.std.str.Path in project questdb by bluestreak01.

the class ContinuousMemoryMTest method withMem.

private void withMem(long appendSz, long sz, MemTestCode code) throws Exception {
    assertMemoryLeak(() -> {
        final Path path = Path.getThreadLocal(root).concat("t.d").$();
        try (MemoryCMARW rwMem = Vm.getCMARWInstance(FilesFacadeImpl.INSTANCE, path, appendSz, -1, MemoryTag.MMAP_DEFAULT);
            MemoryCMR roMem = new MemoryCMRImpl(FilesFacadeImpl.INSTANCE, path, sz, MemoryTag.MMAP_DEFAULT)) {
  , roMem);
        } finally {
Also used : Path(io.questdb.std.str.Path)

Example 52 with Path

use of io.questdb.std.str.Path in project questdb by bluestreak01.

the class JsonLexerTest method testParseLargeFile.

public void testParseLargeFile() throws Exception {
    String path = JsonLexerTest.class.getResource("/json/test.json").getPath();
    try (Path p = new Path()) {
        if (Os.type == Os.WINDOWS && path.startsWith("/")) {
        } else {
        long l = Files.length(p.$());
        long fd = Files.openRO(p);
        JsonParser listener = new NoOpParser();
        try {
            long buf = Unsafe.malloc(l, MemoryTag.NATIVE_DEFAULT);
            long bufA = Unsafe.malloc(l, MemoryTag.NATIVE_DEFAULT);
            long bufB = Unsafe.malloc(l, MemoryTag.NATIVE_DEFAULT);
            try {
                Assert.assertEquals(l,, buf, (int) l, 0));
                for (int i = 0; i < l; i++) {
                    try {
                        Unsafe.getUnsafe().copyMemory(buf, bufA, i);
                        Unsafe.getUnsafe().copyMemory(buf + i, bufB, l - i);
                        LEXER.parse(bufA, bufA + i, listener);
                        LEXER.parse(bufB, bufB + l - i, listener);
                    } catch (JsonException e) {
                        throw e;
            } finally {
      , l, MemoryTag.NATIVE_DEFAULT);
      , l, MemoryTag.NATIVE_DEFAULT);
      , l, MemoryTag.NATIVE_DEFAULT);
        } finally {
Also used : Path(io.questdb.std.str.Path) Test(org.junit.Test)

Example 53 with Path

use of io.questdb.std.str.Path in project questdb by bluestreak01.

the class LogFactoryTest method testRollingFileWriterBySize.

public void testRollingFileWriterBySize() throws Exception {
    String base = temp.getRoot().getAbsolutePath() + Files.SEPARATOR;
    String logFile = base + "mylog-${date:yyyy-MM-dd}.log";
    String expectedLogFile = base + "mylog-2015-05-03.log";
    final MicrosecondClock clock = new TestMicrosecondClock(TimestampFormatUtils.parseTimestamp("2015-05-03T10:35:00.000Z"), 1);
    try (Path path = new Path()) {
        // create rogue file that would be in a way of logger rolling existing files
    RingQueue<LogRecordSink> queue = new RingQueue<>(LogRecordSink::new, 1024, 1024, MemoryTag.NATIVE_DEFAULT);
    SPSequence pubSeq = new SPSequence(queue.getCycle());
    SCSequence subSeq = new SCSequence();
    try (final LogRollingFileWriter writer = new LogRollingFileWriter(FilesFacadeImpl.INSTANCE, clock, queue, subSeq, LogLevel.LOG_LEVEL_INFO)) {
        AtomicBoolean running = new AtomicBoolean(true);
        SOCountDownLatch halted = new SOCountDownLatch();
        new Thread(() -> {
            while (running.get()) {
            // noinspection StatementWithEmptyBody
            while (writer.runSerially()) ;
        // now publish
        int published = 0;
        int toPublish = 100_000;
        while (published < toPublish) {
            long cursor =;
            if (cursor < 0) {
            final long available = pubSeq.available();
            while (cursor < available && published < toPublish) {
                LogRecordSink sink = queue.get(cursor++);
            pubSeq.done(cursor - 1);
    assertFileLength(expectedLogFile + ".1");
Also used : Path(io.questdb.std.str.Path) RingQueue( SPSequence( SOCountDownLatch( MicrosecondClock(io.questdb.std.datetime.microtime.MicrosecondClock) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) SCSequence( Test(org.junit.Test)

Example 54 with Path

use of io.questdb.std.str.Path in project questdb by bluestreak01.

the class TableWriter method repairDataGaps.

private long repairDataGaps(final long timestamp) {
    if (txWriter.getMaxTimestamp() != Numbers.LONG_NaN && partitionBy != PartitionBy.NONE) {
        long fixedRowCount = 0;
        long lastTimestamp = -1;
        long transientRowCount = this.txWriter.getTransientRowCount();
        long maxTimestamp = this.txWriter.getMaxTimestamp();
        try {
            final long tsLimit = timestampFloorMethod.floor(this.txWriter.getMaxTimestamp());
            for (long ts = getPartitionLo(txWriter.getMinTimestamp()); ts < tsLimit; ts = timestampAddMethod.calculate(ts, 1)) {
                setStateForTimestamp(path, ts, false);
                int p = path.length();
                long partitionSize = txWriter.getPartitionSizeByPartitionTimestamp(ts);
                if (partitionSize >= 0 && ff.exists(path.$())) {
                    fixedRowCount += partitionSize;
                    lastTimestamp = ts;
                } else {
                    Path other = Path.getThreadLocal2(path.trimTo(p).$());
                    TableUtils.oldPartitionName(other, getTxn());
                    if (ff.exists(other.$())) {
                        if (!ff.rename(other, path)) {
                            LOG.error().$("could not rename [from=").$(other).$(", to=").$(path).$(']').$();
                            throw new CairoError("could not restore directory, see log for details");
                        } else {
                  $("restored [path=").$(path).$(']').$();
                    } else {
                        LOG.debug().$("missing partition [name=").$(path.trimTo(p).$()).$(']').$();
            if (lastTimestamp > -1) {
                setStateForTimestamp(path, tsLimit, false);
                if (!ff.exists(path.$())) {
                    Path other = Path.getThreadLocal2(path);
                    TableUtils.oldPartitionName(other, getTxn());
                    if (ff.exists(other.$())) {
                        if (!ff.rename(other, path)) {
                            LOG.error().$("could not rename [from=").$(other).$(", to=").$(path).$(']').$();
                            throw new CairoError("could not restore directory, see log for details");
                        } else {
                  $("restored [path=").$(path).$(']').$();
                    } else {
                        LOG.error().$("last partition does not exist [name=").$(path).$(']').$();
                        // ok, create last partition we discovered the active
                        // 1. read its size
                        setStateForTimestamp(path, lastTimestamp, false);
                        int p = path.length();
                        transientRowCount = txWriter.getPartitionSizeByPartitionTimestamp(lastTimestamp);
                        // 2. read max timestamp
                        TableUtils.dFile(path.trimTo(p), metadata.getColumnName(metadata.getTimestampIndex()));
                        maxTimestamp = TableUtils.readLongAtOffset(ff, path, tempMem16b, (transientRowCount - 1) * Long.BYTES);
                        fixedRowCount -= transientRowCount;
              $("updated active partition [name=").$(path.trimTo(p).$()).$(", maxTimestamp=").$ts(maxTimestamp).$(", transientRowCount=").$(transientRowCount).$(", fixedRowCount=").$(txWriter.getFixedRowCount()).$(']').$();
        } finally {
        final long expectedSize = txWriter.unsafeReadFixedRowCount();
        if (expectedSize != fixedRowCount || maxTimestamp != this.txWriter.getMaxTimestamp()) {
  $("actual table size has been adjusted [name=`").utf8(tableName).$('`').$(", expectedFixedSize=").$(expectedSize).$(", actualFixedSize=").$(fixedRowCount).$(']').$();
            txWriter.reset(fixedRowCount, transientRowCount, maxTimestamp);
            return maxTimestamp;
    return timestamp;
Also used : Path(io.questdb.std.str.Path)

Example 55 with Path

use of io.questdb.std.str.Path in project questdb by bluestreak01.

the class TableWriter method o3Commit.

 * Commits O3 data. Lag is optional. When 0 is specified the entire O3 segment is committed.
 * @param lag interval in microseconds that determines the length of O3 segment that is not going to be
 *            committed to disk. The interval starts at max timestamp of O3 segment and ends <i>lag</i>
 *            microseconds before this timestamp.
 * @return <i>true</i> when commit has is a NOOP, e.g. no data has been committed to disk. <i>false</i> otherwise.
private boolean o3Commit(long lag) {
    o3RowCount = getO3RowCount0();
    long o3LagRowCount = 0;
    long maxUncommittedRows = metadata.getMaxUncommittedRows();
    final int timestampIndex = metadata.getTimestampIndex();
    this.lastPartitionTimestamp = timestampFloorMethod.floor(partitionTimestampHi);
    long activePartitionTimestampCeil = timestampCeilMethod.ceil(partitionTimestampHi);
    try {
        o3RowCount += o3MoveUncommitted(timestampIndex);
        final long transientRowCount = txWriter.transientRowCount;
        // we may need to re-use file descriptors when this partition is the "current" one
        // we cannot open file again due to sharing violation
        // to determine that 'ooTimestampLo' goes into current partition
        // we need to compare 'partitionTimestampHi', which is appropriately truncated to DAY/MONTH/YEAR
        // to this.maxTimestamp, which isn't truncated yet. So we need to truncate it first$("sorting o3 [table=").$(tableName).$(']').$();
        final long sortedTimestampsAddr = o3TimestampMem.getAddress();
        // ensure there is enough size
        if (o3RowCount > 600 || !o3QuickSortEnabled) {
            Vect.radixSortLongIndexAscInPlace(sortedTimestampsAddr, o3RowCount, o3TimestampMemCpy.addressOf(0));
        } else {
            Vect.quickSortLongIndexAscInPlace(sortedTimestampsAddr, o3RowCount);
        // we have three frames:
        // partition logical "lo" and "hi" - absolute bounds (partitionLo, partitionHi)
        // partition actual data "lo" and "hi" (dataLo, dataHi)
        // out of order "lo" and "hi" (indexLo, indexHi)
        long srcOooMax;
        final long o3TimestampMin = getTimestampIndexValue(sortedTimestampsAddr, 0);
        if (o3TimestampMin < Timestamps.O3_MIN_TS) {
            o3InError = true;
            throw CairoException.instance(0).put("timestamps before 1970-01-01 are not allowed for O3");
        long o3TimestampMax = getTimestampIndexValue(sortedTimestampsAddr, o3RowCount - 1);
        if (o3TimestampMax < Timestamps.O3_MIN_TS) {
            o3InError = true;
            throw CairoException.instance(0).put("timestamps before 1970-01-01 are not allowed for O3");
        // Safe check of the sort. No known way to reproduce
        assert o3TimestampMin <= o3TimestampMax;
        if (lag > 0) {
            long lagThresholdTimestamp = o3TimestampMax - lag;
            if (lagThresholdTimestamp >= o3TimestampMin) {
                final long lagThresholdRow = Vect.boundedBinarySearchIndexT(sortedTimestampsAddr, lagThresholdTimestamp, 0, o3RowCount - 1, BinarySearch.SCAN_DOWN);
                o3LagRowCount = o3RowCount - lagThresholdRow - 1;
                if (o3LagRowCount > maxUncommittedRows) {
                    o3LagRowCount = maxUncommittedRows;
                    srcOooMax = o3RowCount - maxUncommittedRows;
                } else {
                    srcOooMax = lagThresholdRow + 1;
            } else {
                o3LagRowCount = o3RowCount;
                // commit half of the lag.
                if (o3LagRowCount > maxUncommittedRows) {
                    o3LagRowCount = maxUncommittedRows / 2;
                    srcOooMax = o3RowCount - o3LagRowCount;
                } else {
                    srcOooMax = 0;
            LOG.debug().$("o3 commit lag [table=").$(tableName).$(", lag=").$(lag).$(", maxUncommittedRows=").$(maxUncommittedRows).$(", o3max=").$ts(o3TimestampMax).$(", lagThresholdTimestamp=").$ts(lagThresholdTimestamp).$(", o3LagRowCount=").$(o3LagRowCount).$(", srcOooMax=").$(srcOooMax).$(", o3RowCount=").$(o3RowCount).I$();
        } else {
            LOG.debug().$("o3 commit no lag [table=").$(tableName).$(", o3RowCount=").$(o3RowCount).I$();
            srcOooMax = o3RowCount;
        if (srcOooMax == 0) {
            return true;
        // we could have moved the "srcOooMax" and hence we re-read the max timestamp
        o3TimestampMax = getTimestampIndexValue(sortedTimestampsAddr, srcOooMax - 1);
        // move uncommitted is liable to change max timestamp
        // however we need to identify last partition before max timestamp skips to NULL for example
        final long maxTimestamp = txWriter.getMaxTimestamp();
        // we are going to use this soon to avoid double-copying lag data
        // final boolean yep = isAppendLastPartitionOnly(sortedTimestampsAddr, o3TimestampMax);
        // reshuffle all columns according to timestamp index
        o3Sort(sortedTimestampsAddr, timestampIndex, o3RowCount);$("sorted [table=").utf8(tableName).I$();
        boolean success = true;
        int latchCount = 0;
        long srcOoo = 0;
        boolean flattenTimestamp = true;
        int pCount = 0;
        try {
            // We do not know upfront which partition is going to be last because this is
            // a single pass over the data. Instead, we will update transient row count in a rolling
            // manner, assuming the partition marked "last" is the last and then for a new partition
            // we move prevTransientRowCount into the "fixedRowCount" sum and set new value on the
            // transientRowCount
            long prevTransientRowCount = transientRowCount;
            while (srcOoo < srcOooMax) {
                try {
                    final long srcOooLo = srcOoo;
                    final long o3Timestamp = getTimestampIndexValue(sortedTimestampsAddr, srcOoo);
                    final long srcOooHi;
                    // keep ceil inclusive in the interval
                    final long srcOooTimestampCeil = timestampCeilMethod.ceil(o3Timestamp) - 1;
                    if (srcOooTimestampCeil < o3TimestampMax) {
                        srcOooHi = Vect.boundedBinarySearchIndexT(sortedTimestampsAddr, srcOooTimestampCeil, srcOoo, srcOooMax - 1, BinarySearch.SCAN_DOWN);
                    } else {
                        srcOooHi = srcOooMax - 1;
                    final long partitionTimestamp = timestampFloorMethod.floor(o3Timestamp);
                    final boolean last = partitionTimestamp == lastPartitionTimestamp;
                    srcOoo = srcOooHi + 1;
                    final long srcDataMax;
                    final long srcNameTxn;
                    final int partitionIndex = txWriter.findAttachedPartitionIndexByLoTimestamp(partitionTimestamp);
                    if (partitionIndex > -1) {
                        if (last) {
                            srcDataMax = transientRowCount;
                        } else {
                            srcDataMax = getPartitionSizeByIndex(partitionIndex);
                        srcNameTxn = getPartitionNameTxnByIndex(partitionIndex);
                    } else {
                        srcDataMax = 0;
                        srcNameTxn = -1;
                    final boolean append = last && (srcDataMax == 0 || o3Timestamp >= maxTimestamp);
                    final long partitionSize = srcDataMax + srcOooHi - srcOooLo + 1;
                    LOG.debug().$("o3 partition task [table=").$(tableName).$(", srcOooLo=").$(srcOooLo).$(", srcOooHi=").$(srcOooHi).$(", srcOooMax=").$(srcOooMax).$(", o3TimestampMin=").$ts(o3TimestampMin).$(", o3Timestamp=").$ts(o3Timestamp).$(", o3TimestampMax=").$ts(o3TimestampMax).$(", partitionTimestamp=").$ts(partitionTimestamp).$(", partitionIndex=").$(partitionIndex).$(", srcDataMax=").$(srcDataMax).$(", maxTimestamp=").$ts(maxTimestamp).$(", last=").$(last).$(", partitionSize=").$(partitionSize).$(", append=").$(append).$(", memUsed=").$(Unsafe.getMemUsed()).I$();
                    if (partitionTimestamp < lastPartitionTimestamp) {
                        // increment fixedRowCount by number of rows old partition incremented
                        this.txWriter.fixedRowCount += partitionSize - srcDataMax;
                    } else if (partitionTimestamp == lastPartitionTimestamp) {
                        // this is existing "last" partition, we can set the size directly
                        prevTransientRowCount = partitionSize;
                    } else {
                        // this is potentially a new last partition
                        this.txWriter.fixedRowCount += prevTransientRowCount;
                        prevTransientRowCount = partitionSize;
                    final O3Basket o3Basket =;
                    o3Basket.ensureCapacity(columnCount, indexCount);
                    AtomicInteger columnCounter =;
                    // async partition processing set this counter to the column count
                    // and then manages issues if publishing of column tasks fails
                    // mid-column-count.
                    if (append) {
                        // counted down correctly
                        try {
                            setAppendPosition(srcDataMax, false);
                        } catch (Throwable e) {
                            throw e;
                        Path pathToPartition = Path.getThreadLocal(this.path);
                        TableUtils.setPathForPartition(pathToPartition, partitionBy, o3TimestampMin, false);
                        TableUtils.txnPartitionConditionally(pathToPartition, srcNameTxn);
                        final int plen = pathToPartition.length();
                        int columnsPublished = 0;
                        for (int i = 0; i < columnCount; i++) {
                            final int colOffset = TableWriter.getPrimaryColumnIndex(i);
                            final boolean notTheTimestamp = i != timestampIndex;
                            final int columnType = metadata.getColumnType(i);
                            final CharSequence columnName = metadata.getColumnName(i);
                            final boolean isIndexed = metadata.isColumnIndexed(i);
                            final BitmapIndexWriter indexWriter = isIndexed ? getBitmapIndexWriter(i) : null;
                            final MemoryARW oooMem1 = o3Columns.getQuick(colOffset);
                            final MemoryARW oooMem2 = o3Columns.getQuick(colOffset + 1);
                            final MemoryMAR mem1 = columns.getQuick(colOffset);
                            final MemoryMAR mem2 = columns.getQuick(colOffset + 1);
                            final long srcDataTop = getColumnTop(i);
                            final long srcOooFixAddr;
                            final long srcOooVarAddr;
                            final MemoryMAR dstFixMem;
                            final MemoryMAR dstVarMem;
                            if (!ColumnType.isVariableLength(columnType)) {
                                srcOooFixAddr = oooMem1.addressOf(0);
                                srcOooVarAddr = 0;
                                dstFixMem = mem1;
                                dstVarMem = null;
                            } else {
                                srcOooFixAddr = oooMem2.addressOf(0);
                                srcOooVarAddr = oooMem1.addressOf(0);
                                dstFixMem = mem2;
                                dstVarMem = mem1;
                            try {
                                O3OpenColumnJob.appendLastPartition(pathToPartition, plen, columnName, columnCounter, notTheTimestamp ? columnType : ColumnType.setDesignatedTimestampBit(columnType, true), srcOooFixAddr, srcOooVarAddr, srcOooLo, srcOooHi, srcOooMax, o3TimestampMin, o3TimestampMax, partitionTimestamp, srcDataTop, srcDataMax, isIndexed, dstFixMem, dstVarMem, this, indexWriter);
                            } catch (Throwable e) {
                                if (columnCounter.addAndGet(columnsPublished - columnCount) == 0) {
                                throw e;
                    } else {
                        if (flattenTimestamp) {
                            Vect.flattenIndex(sortedTimestampsAddr, o3RowCount);
                            flattenTimestamp = false;
                        o3CommitPartitionAsync(columnCounter, maxTimestamp, sortedTimestampsAddr, srcOooMax, o3TimestampMin, o3TimestampMax, srcOooLo, srcOooHi, partitionTimestamp, last, srcDataMax, srcNameTxn, o3Basket);
                } catch (CairoException | CairoError e) {
                    LOG.error().$((Sinkable) e).$();
                    success = false;
                    throw e;
            // at this point we should know the last partition row count
            this.txWriter.transientRowCount = prevTransientRowCount;
            this.partitionTimestampHi = Math.max(this.partitionTimestampHi, o3TimestampMax);
            this.txWriter.updateMaxTimestamp(Math.max(txWriter.getMaxTimestamp(), o3TimestampMax));
        } finally {
            // we are stealing work here it is possible we get exception from this method
            LOG.debug().$("o3 expecting updates [table=").$(tableName).$(", partitionsPublished=").$(pCount).I$();
            o3ConsumePartitionUpdates(srcOooMax, o3TimestampMin, o3TimestampMax);
            o3InError = !success || o3ErrorCount.get() > 0;
            if (success && o3ErrorCount.get() > 0) {
                // noinspection ThrowFromFinallyBlock
                throw CairoException.instance(0).put("bulk update failed and will be rolled back");
        if (o3LagRowCount > 0) {
            o3ShiftLagRowsUp(timestampIndex, o3LagRowCount, srcOooMax);
    } finally {
        if (denseIndexers.size() == 0) {
        // Alright, we finished updating partitions. Now we need to get this writer instance into
        // a consistent state.
        // We start with ensuring append memory is in ready-to-use state. When max timestamp changes we need to
        // move append memory to new set of files. Otherwise, we stay on the same set but advance to append position.
        avoidIndexOnCommit = o3ErrorCount.get() == 0;
        if (o3LagRowCount == 0) {
            LOG.debug().$("lag segment is empty").$();
        } else {
            // adjust O3 master ref so that virtual row count becomes equal to value of "o3LagRowCount"
            this.o3MasterRef = this.masterRef - o3LagRowCount * 2 + 1;
            LOG.debug().$("adjusted [o3RowCount=").$(getO3RowCount0()).I$();
    if (!columns.getQuick(0).isOpen() || partitionTimestampHi > activePartitionTimestampCeil) {
    // will attempt to mmap new page and fail... Then we can remove the 'true' parameter
    try {
        setAppendPosition(txWriter.getTransientRowCount(), true);
    } catch (Throwable e) {
        LOG.error().$("data is committed but writer failed to update its state `").$(e).$('`').$();
        distressed = true;
        throw e;
    return false;
Also used : Path(io.questdb.std.str.Path) AtomicInteger(java.util.concurrent.atomic.AtomicInteger)


Path (io.questdb.std.str.Path)141 Test (org.junit.Test)89 File ( FilesFacade (io.questdb.std.FilesFacade)13 MemoryCMARW (io.questdb.cairo.vm.api.MemoryCMARW)10 MemoryMR (io.questdb.cairo.vm.api.MemoryMR)10 Rnd (io.questdb.std.Rnd)10 AbstractCairoTest (io.questdb.cairo.AbstractCairoTest)7 MemoryMA (io.questdb.cairo.vm.api.MemoryMA)7 MemoryMARW (io.questdb.cairo.vm.api.MemoryMARW)7 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)7 AbstractGriffinTest (io.questdb.griffin.AbstractGriffinTest)6 NativeLPSZ (io.questdb.std.str.NativeLPSZ)6 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)6 SOCountDownLatch ( LPSZ (io.questdb.std.str.LPSZ)5 RecordCursor (io.questdb.cairo.sql.RecordCursor)4 RowCursor (io.questdb.cairo.sql.RowCursor)4 MemoryARW (io.questdb.cairo.vm.api.MemoryARW)4 RingQueue (