Search in sources :

Example 6 with VariableByteInput

use of org.exist.storage.io.VariableByteInput in project exist by eXist-db.

the class NGramIndexWorker method dropIndex.

private void dropIndex(final ReindexMode mode) {
    if (ngrams.isEmpty()) {
        return;
    }
    final VariableByteOutputStream buf = new VariableByteOutputStream();
    for (final Map.Entry<QNameTerm, OccurrenceList> entry : ngrams.entrySet()) {
        final QNameTerm key = entry.getKey();
        final OccurrenceList occurencesList = entry.getValue();
        occurencesList.sort();
        os.clear();
        try (final ManagedLock<ReentrantLock> dbLock = lockManager.acquireBtreeWriteLock(index.db.getLockName())) {
            final NGramQNameKey value = new NGramQNameKey(currentDoc.getCollection().getId(), key.qname, index.getBrokerPool().getSymbols(), key.term);
            boolean changed = false;
            os.clear();
            final VariableByteInput is = index.db.getAsStream(value);
            if (is == null) {
                continue;
            }
            while (is.available() > 0) {
                final int storedDocId = is.readInt();
                final byte nameType = is.readByte();
                final int occurrences = is.readInt();
                // Read (variable) length of node IDs + frequency + offsets
                final int length = is.readFixedInt();
                if (storedDocId != currentDoc.getDocId()) {
                    // data are related to another document:
                    // copy them to any existing data
                    os.writeInt(storedDocId);
                    os.writeByte(nameType);
                    os.writeInt(occurrences);
                    os.writeFixedInt(length);
                    is.copyRaw(os, length);
                } else {
                    // data are related to our document:
                    if (mode == ReindexMode.REMOVE_ALL_NODES) {
                        // skip them
                        is.skipBytes(length);
                    } else {
                        // removing nodes: need to filter out the node ids to be removed
                        // feed the new list with the GIDs
                        final OccurrenceList newOccurrences = new OccurrenceList();
                        NodeId previous = null;
                        for (int m = 0; m < occurrences; m++) {
                            final NodeId nodeId = index.getBrokerPool().getNodeFactory().createFromStream(previous, is);
                            previous = nodeId;
                            final int freq = is.readInt();
                            // in the list of removed nodes
                            if (!occurencesList.contains(nodeId)) {
                                for (int n = 0; n < freq; n++) {
                                    newOccurrences.add(nodeId, is.readInt());
                                }
                            } else {
                                is.skip(freq);
                            }
                        }
                        // append the data from the new list
                        if (newOccurrences.getSize() > 0) {
                            // Don't forget this one
                            newOccurrences.sort();
                            os.writeInt(currentDoc.getDocId());
                            os.writeByte(nameType);
                            os.writeInt(newOccurrences.getTermCount());
                            // write nodeids, freq, and offsets to a `temp` buf
                            previous = null;
                            for (int m = 0; m < newOccurrences.getSize(); ) {
                                previous = newOccurrences.getNode(m).write(previous, buf);
                                final int freq = newOccurrences.getOccurrences(m);
                                buf.writeInt(freq);
                                for (int n = 0; n < freq; n++) {
                                    buf.writeInt(newOccurrences.getOffset(m + n));
                                }
                                m += freq;
                            }
                            final byte[] bufData = buf.toByteArray();
                            // clear the buf for the next iteration
                            buf.clear();
                            // Write length of node IDs + frequency + offsets (bytes)
                            os.writeFixedInt(bufData.length);
                            // Write the node IDs + frequency + offset
                            os.write(bufData);
                        }
                    }
                    changed = true;
                }
            }
            // Store new data, if relevant
            if (changed) {
                // Well, nothing to store : remove the existing data
                if (os.data().size() == 0) {
                    index.db.remove(value);
                } else {
                    if (index.db.put(value, os.data()) == BFile.UNKNOWN_ADDRESS) {
                        LOG.error("Could not put index data for token '{}' in '{}'", key.term, FileUtils.fileName(index.db.getFile()));
                    }
                }
            }
        } catch (final LockException e) {
            LOG.warn("Failed to acquire lock for file {}", FileUtils.fileName(index.db.getFile()), e);
        } catch (final IOException e) {
            LOG.warn("IO error for file {}", FileUtils.fileName(index.db.getFile()), e);
        } finally {
            os.clear();
        }
    }
    ngrams.clear();
}
Also used : ReentrantLock(java.util.concurrent.locks.ReentrantLock) OccurrenceList(org.exist.storage.OccurrenceList) IOException(java.io.IOException) VariableByteInput(org.exist.storage.io.VariableByteInput) VariableByteOutputStream(org.exist.storage.io.VariableByteOutputStream) NodeId(org.exist.numbering.NodeId)

Example 7 with VariableByteInput

use of org.exist.storage.io.VariableByteInput in project exist by eXist-db.

the class SymbolTableTest method write_and_read_are_balanced.

@Test
public void write_and_read_are_balanced() throws IOException, BrokerPoolServiceException {
    final SymbolTable symbolTable = createSymbolTable(createTempDir());
    symbolTable.getSymbol("some-name");
    VariableByteOutputStream mockOs = createMock(VariableByteOutputStream.class);
    VariableByteInput mockIs = createMock(VariableByteInput.class);
    final Capture<Byte> byteCapture = newCapture();
    final Capture<Integer> intCapture = newCapture();
    final Capture<String> strCapture = newCapture();
    // write expectations
    mockOs.writeByte(captureByte(byteCapture));
    mockOs.writeInt(captureInt(intCapture));
    mockOs.writeUTF(capture(strCapture));
    replay(mockOs);
    symbolTable.localNameSymbols.write(mockOs);
    verify(mockOs);
    // read expectations
    expect(mockIs.available()).andReturn(1);
    expect(mockIs.readByte()).andReturn(byteCapture.getValue());
    expect(mockIs.readInt()).andReturn(intCapture.getValue());
    expect(mockIs.readUTF()).andReturn(strCapture.getValue());
    expect(mockIs.available()).andReturn(0);
    replay(mockIs);
    symbolTable.read(mockIs);
    verify(mockIs);
}
Also used : VariableByteInput(org.exist.storage.io.VariableByteInput) VariableByteOutputStream(org.exist.storage.io.VariableByteOutputStream) Test(org.junit.Test)

Example 8 with VariableByteInput

use of org.exist.storage.io.VariableByteInput in project exist by eXist-db.

the class SymbolTable method loadSymbols.

/**
 * Read the global symbol table. The global symbol table stores QNames and
 * namespace/prefix mappings.
 *
 * @throws EXistException in response to eXist-db error
 */
private synchronized void loadSymbols() throws EXistException {
    try (final InputStream fis = new BufferedInputStream(Files.newInputStream(getFile()))) {
        final VariableByteInput is = new VariableByteInputStream(fis);
        final int magic = is.readFixedInt();
        if (magic == LEGACY_FILE_FORMAT_VERSION_ID) {
            LOG.info("Converting legacy symbols.dbx to new format...");
            readLegacy(is);
            saveSymbols();
        } else if (magic != FILE_FORMAT_VERSION_ID) {
            throw new EXistException("Symbol table was created by an older" + "or newer version of eXist" + " (file id: " + magic + "). " + "To avoid damage, the database will stop.");
        } else {
            read(is);
        }
    } catch (final FileNotFoundException e) {
        throw new EXistException("Could not read " + this.getFile().toAbsolutePath().toString(), e);
    } catch (final IOException e) {
        throw new EXistException("IO error occurred while reading " + this.getFile().toAbsolutePath().toString() + ": " + e.getMessage(), e);
    }
}
Also used : VariableByteInput(org.exist.storage.io.VariableByteInput) VariableByteInputStream(org.exist.storage.io.VariableByteInputStream) EXistException(org.exist.EXistException) VariableByteInputStream(org.exist.storage.io.VariableByteInputStream)

Aggregations

VariableByteInput (org.exist.storage.io.VariableByteInput)8 ReentrantLock (java.util.concurrent.locks.ReentrantLock)4 VariableByteOutputStream (org.exist.storage.io.VariableByteOutputStream)3 Test (org.junit.Test)3 XmldbURI (org.exist.xmldb.XmldbURI)2 IOException (java.io.IOException)1 Random (java.util.Random)1 Nullable (javax.annotation.Nullable)1 EXistException (org.exist.EXistException)1 Collection (org.exist.collections.Collection)1 NodeId (org.exist.numbering.NodeId)1 OccurrenceList (org.exist.storage.OccurrenceList)1 VariableByteInputStream (org.exist.storage.io.VariableByteInputStream)1 TerminatedException (org.exist.xquery.TerminatedException)1