use of org.exist.storage.io.VariableByteInput in project exist by eXist-db.
the class NGramIndexWorker method dropIndex.
private void dropIndex(final ReindexMode mode) {
if (ngrams.isEmpty()) {
return;
}
final VariableByteOutputStream buf = new VariableByteOutputStream();
for (final Map.Entry<QNameTerm, OccurrenceList> entry : ngrams.entrySet()) {
final QNameTerm key = entry.getKey();
final OccurrenceList occurencesList = entry.getValue();
occurencesList.sort();
os.clear();
try (final ManagedLock<ReentrantLock> dbLock = lockManager.acquireBtreeWriteLock(index.db.getLockName())) {
final NGramQNameKey value = new NGramQNameKey(currentDoc.getCollection().getId(), key.qname, index.getBrokerPool().getSymbols(), key.term);
boolean changed = false;
os.clear();
final VariableByteInput is = index.db.getAsStream(value);
if (is == null) {
continue;
}
while (is.available() > 0) {
final int storedDocId = is.readInt();
final byte nameType = is.readByte();
final int occurrences = is.readInt();
// Read (variable) length of node IDs + frequency + offsets
final int length = is.readFixedInt();
if (storedDocId != currentDoc.getDocId()) {
// data are related to another document:
// copy them to any existing data
os.writeInt(storedDocId);
os.writeByte(nameType);
os.writeInt(occurrences);
os.writeFixedInt(length);
is.copyRaw(os, length);
} else {
// data are related to our document:
if (mode == ReindexMode.REMOVE_ALL_NODES) {
// skip them
is.skipBytes(length);
} else {
// removing nodes: need to filter out the node ids to be removed
// feed the new list with the GIDs
final OccurrenceList newOccurrences = new OccurrenceList();
NodeId previous = null;
for (int m = 0; m < occurrences; m++) {
final NodeId nodeId = index.getBrokerPool().getNodeFactory().createFromStream(previous, is);
previous = nodeId;
final int freq = is.readInt();
// in the list of removed nodes
if (!occurencesList.contains(nodeId)) {
for (int n = 0; n < freq; n++) {
newOccurrences.add(nodeId, is.readInt());
}
} else {
is.skip(freq);
}
}
// append the data from the new list
if (newOccurrences.getSize() > 0) {
// Don't forget this one
newOccurrences.sort();
os.writeInt(currentDoc.getDocId());
os.writeByte(nameType);
os.writeInt(newOccurrences.getTermCount());
// write nodeids, freq, and offsets to a `temp` buf
previous = null;
for (int m = 0; m < newOccurrences.getSize(); ) {
previous = newOccurrences.getNode(m).write(previous, buf);
final int freq = newOccurrences.getOccurrences(m);
buf.writeInt(freq);
for (int n = 0; n < freq; n++) {
buf.writeInt(newOccurrences.getOffset(m + n));
}
m += freq;
}
final byte[] bufData = buf.toByteArray();
// clear the buf for the next iteration
buf.clear();
// Write length of node IDs + frequency + offsets (bytes)
os.writeFixedInt(bufData.length);
// Write the node IDs + frequency + offset
os.write(bufData);
}
}
changed = true;
}
}
// Store new data, if relevant
if (changed) {
// Well, nothing to store : remove the existing data
if (os.data().size() == 0) {
index.db.remove(value);
} else {
if (index.db.put(value, os.data()) == BFile.UNKNOWN_ADDRESS) {
LOG.error("Could not put index data for token '{}' in '{}'", key.term, FileUtils.fileName(index.db.getFile()));
}
}
}
} catch (final LockException e) {
LOG.warn("Failed to acquire lock for file {}", FileUtils.fileName(index.db.getFile()), e);
} catch (final IOException e) {
LOG.warn("IO error for file {}", FileUtils.fileName(index.db.getFile()), e);
} finally {
os.clear();
}
}
ngrams.clear();
}
use of org.exist.storage.io.VariableByteInput in project exist by eXist-db.
the class SymbolTableTest method write_and_read_are_balanced.
@Test
public void write_and_read_are_balanced() throws IOException, BrokerPoolServiceException {
final SymbolTable symbolTable = createSymbolTable(createTempDir());
symbolTable.getSymbol("some-name");
VariableByteOutputStream mockOs = createMock(VariableByteOutputStream.class);
VariableByteInput mockIs = createMock(VariableByteInput.class);
final Capture<Byte> byteCapture = newCapture();
final Capture<Integer> intCapture = newCapture();
final Capture<String> strCapture = newCapture();
// write expectations
mockOs.writeByte(captureByte(byteCapture));
mockOs.writeInt(captureInt(intCapture));
mockOs.writeUTF(capture(strCapture));
replay(mockOs);
symbolTable.localNameSymbols.write(mockOs);
verify(mockOs);
// read expectations
expect(mockIs.available()).andReturn(1);
expect(mockIs.readByte()).andReturn(byteCapture.getValue());
expect(mockIs.readInt()).andReturn(intCapture.getValue());
expect(mockIs.readUTF()).andReturn(strCapture.getValue());
expect(mockIs.available()).andReturn(0);
replay(mockIs);
symbolTable.read(mockIs);
verify(mockIs);
}
use of org.exist.storage.io.VariableByteInput in project exist by eXist-db.
the class SymbolTable method loadSymbols.
/**
* Read the global symbol table. The global symbol table stores QNames and
* namespace/prefix mappings.
*
* @throws EXistException in response to eXist-db error
*/
private synchronized void loadSymbols() throws EXistException {
try (final InputStream fis = new BufferedInputStream(Files.newInputStream(getFile()))) {
final VariableByteInput is = new VariableByteInputStream(fis);
final int magic = is.readFixedInt();
if (magic == LEGACY_FILE_FORMAT_VERSION_ID) {
LOG.info("Converting legacy symbols.dbx to new format...");
readLegacy(is);
saveSymbols();
} else if (magic != FILE_FORMAT_VERSION_ID) {
throw new EXistException("Symbol table was created by an older" + "or newer version of eXist" + " (file id: " + magic + "). " + "To avoid damage, the database will stop.");
} else {
read(is);
}
} catch (final FileNotFoundException e) {
throw new EXistException("Could not read " + this.getFile().toAbsolutePath().toString(), e);
} catch (final IOException e) {
throw new EXistException("IO error occurred while reading " + this.getFile().toAbsolutePath().toString() + ": " + e.getMessage(), e);
}
}
Aggregations