Search in sources :

Example 11 with DirectByteCharSequence

use of com.questdb.std.str.DirectByteCharSequence in project questdb by bluestreak01.

the class PlainTextMetadataParser method onFields.

@Override
public void onFields(int line, ObjList<DirectByteCharSequence> values, int hi) {
    // keep first line in case its a header
    if (line == 0) {
        stashPossibleHeader(values, hi);
    }
    int count = typeProbeCollection.getProbeCount();
    for (int i = 0; i < hi; i++) {
        DirectByteCharSequence cs = values.getQuick(i);
        if (cs.length() == 0) {
            _blanks.increment(i);
        }
        int offset = i * count;
        for (int k = 0; k < count; k++) {
            TypeProbe probe = typeProbeCollection.getProbe(k);
            if (probe.probe(cs)) {
                _histogram.increment(k + offset);
            }
        }
    }
}
Also used : TypeProbe(com.questdb.parser.typeprobe.TypeProbe) DirectByteCharSequence(com.questdb.std.str.DirectByteCharSequence)

Example 12 with DirectByteCharSequence

use of com.questdb.std.str.DirectByteCharSequence in project questdb by bluestreak01.

the class TypeProbeCollection method parseFile.

private void parseFile(CharSequence fileName, DateFormatFactory dateFormatFactory, DateLocaleFactory dateLocaleFactory) throws IOException {
    final DirectByteCharSequence dbcs = new DirectByteCharSequence();
    try (Path path = new Path().of(fileName).$()) {
        long fd = Files.openRO(path);
        if (fd < 0) {
            throw new IOException("Cannot open " + fileName + " [errno=" + Os.errno() + ']');
        }
        long sz = Files.length(fd);
        long buf = Unsafe.malloc(sz);
        try {
            Files.read(fd, buf, sz, 0);
            long p = buf;
            long hi = p + sz;
            long _lo = p;
            boolean newline = true;
            boolean comment = false;
            boolean quote = false;
            boolean space = true;
            String pattern = null;
            while (p < hi) {
                char b = (char) Unsafe.getUnsafe().getByte(p++);
                switch(b) {
                    case '#':
                        comment = newline;
                        break;
                    case '\'':
                        // inside comment, ignore
                        if (comment) {
                            continue;
                        }
                        if (quote) {
                            // we were inside quote, close out and check which part to assign result to
                            if (pattern == null) {
                                pattern = dbcs.of(_lo, p - 1).toString();
                                _lo = p;
                                space = true;
                                quote = false;
                            } else {
                                // pattern has been assigned, should never end up here
                                LOG.error().$("Internal error").$();
                            }
                        } else if (newline) {
                            // only start quote if it is at beginning of line
                            _lo = p;
                            quote = true;
                        }
                        break;
                    case ' ':
                    case '\t':
                        if (comment || quote) {
                            continue;
                        }
                        if (space) {
                            _lo = p;
                            continue;
                        }
                        space = true;
                        newline = false;
                        String s = dbcs.of(_lo, p - 1).toString();
                        if (pattern == null) {
                            pattern = s;
                            _lo = p;
                            space = true;
                        } else {
                            DateLocale locale = dateLocaleFactory.getDateLocale(s);
                            if (locale == null) {
                                LOG.error().$("Unknown date locale: ").$(s).$();
                                // skip rest of line
                                comment = true;
                                continue;
                            }
                            probes.add(new DateProbe(dateFormatFactory, locale, pattern));
                        }
                        break;
                    case '\n':
                    case '\r':
                        if (!comment) {
                            if (_lo < p - 1) {
                                s = dbcs.of(_lo, p - 1).toString();
                                if (pattern == null) {
                                    // no date locale, use default
                                    probes.add(new DateProbe(dateFormatFactory, dateLocaleFactory.getDefaultDateLocale(), s));
                                } else {
                                    DateLocale locale = dateLocaleFactory.getDateLocale(s);
                                    if (locale == null) {
                                        LOG.error().$("Unknown date locale: ").$(s).$();
                                    } else {
                                        probes.add(new DateProbe(dateFormatFactory, locale, pattern));
                                    }
                                }
                            } else if (pattern != null) {
                                probes.add(new DateProbe(dateFormatFactory, dateLocaleFactory.getDefaultDateLocale(), pattern));
                            }
                        }
                        newline = true;
                        comment = false;
                        quote = false;
                        pattern = null;
                        space = false;
                        _lo = p;
                        break;
                    default:
                        if (newline) {
                            newline = false;
                        }
                        if (space) {
                            space = false;
                        }
                        break;
                }
            }
        } finally {
            Unsafe.free(buf, sz);
        }
    }
}
Also used : Path(com.questdb.std.str.Path) DateLocale(com.questdb.std.time.DateLocale) DirectByteCharSequence(com.questdb.std.str.DirectByteCharSequence) IOException(java.io.IOException)

Aggregations

DirectByteCharSequence (com.questdb.std.str.DirectByteCharSequence)12 Test (org.junit.Test)4 NumericException (com.questdb.common.NumericException)2 StringSink (com.questdb.std.str.StringSink)2 Ignore (org.junit.Ignore)2 JournalRuntimeException (com.questdb.common.JournalRuntimeException)1 ImportColumnCountException (com.questdb.ex.ImportColumnCountException)1 ImportNameException (com.questdb.ex.ImportNameException)1 ImportedColumnMetadata (com.questdb.parser.ImportedColumnMetadata)1 PlainTextLexer (com.questdb.parser.plaintext.PlainTextLexer)1 PlainTextParser (com.questdb.parser.plaintext.PlainTextParser)1 TypeProbe (com.questdb.parser.typeprobe.TypeProbe)1 JournalException (com.questdb.std.ex.JournalException)1 Path (com.questdb.std.str.Path)1 DateLocale (com.questdb.std.time.DateLocale)1 JournalEntryWriter (com.questdb.store.JournalEntryWriter)1 File (java.io.File)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1