use of nl.inl.blacklab.exceptions.BlackLabRuntimeException in project BlackLab by INL.
the class AnnotationForwardIndexReader method retrievePartsIntAllocationless.
public List<int[]> retrievePartsIntAllocationless(int fiid, int[] starts, int[] ends, List<int[]> ret) {
if (!initialized)
initialize();
if (deleted[fiid] != 0)
return null;
int n = starts.length;
if (n != ends.length)
throw new IllegalArgumentException("start and end must be of equal length");
List<int[]> result = new ArrayList<>(n);
for (int i = 0; i < n; i++) {
// don't modify the start/end array contents!
int start = starts[i];
int end = ends[i];
if (start == -1)
start = 0;
if (end == -1)
end = length[fiid];
if (start < 0 || end < 0) {
throw new IllegalArgumentException("Illegal values, start = " + start + ", end = " + end);
}
if (// Can happen while making KWICs because we don't know the
end > length[fiid])
// doc length until here
end = length[fiid];
if (start > length[fiid] || end > length[fiid]) {
throw new IllegalArgumentException("Value(s) out of range, start = " + start + ", end = " + end + ", content length = " + length[fiid]);
}
if (end <= start) {
throw new IllegalArgumentException("Tried to read empty or negative length snippet (from " + start + " to " + end + ")");
}
// Get an IntBuffer to read the desired content
IntBuffer ib = null;
// The tokens file has has been mapped to memory.
// Get an int buffer into the file.
// Figure out which chunk to access.
ByteBuffer whichChunk = null;
long chunkOffsetBytes = -1;
long entryOffsetBytes = offset[fiid] * SIZEOF_INT;
for (int j = 0; j < tokensFileChunkOffsetBytes.size(); j++) {
long offsetBytes = tokensFileChunkOffsetBytes.get(j);
ByteBuffer buffer = tokensFileChunks.get(j);
if (offsetBytes <= entryOffsetBytes + start * SIZEOF_INT && offsetBytes + buffer.capacity() >= entryOffsetBytes + end * SIZEOF_INT) {
// This one!
whichChunk = buffer;
chunkOffsetBytes = offsetBytes;
break;
}
}
if (whichChunk == null) {
throw new BlackLabRuntimeException("Tokens file chunk containing document not found. fiid = " + fiid);
}
((Buffer) whichChunk).position((int) (offset[fiid] * SIZEOF_INT - chunkOffsetBytes));
ib = whichChunk.asIntBuffer();
int snippetLength = end - start;
int[] snippet = new int[snippetLength];
// The file is mem-mapped (search mode).
// Position us at the correct place in the file.
ib.position(start);
ib.get(snippet);
result.add(snippet);
}
return result;
}
use of nl.inl.blacklab.exceptions.BlackLabRuntimeException in project BlackLab by INL.
the class AnnotationForwardIndexReader method retrievePartsInt.
@Override
public List<int[]> retrievePartsInt(int fiid, int[] starts, int[] ends) {
if (!initialized)
initialize();
if (deleted[fiid] != 0)
return null;
int n = starts.length;
if (n != ends.length)
throw new IllegalArgumentException("start and end must be of equal length");
List<int[]> result = new ArrayList<>(n);
for (int i = 0; i < n; i++) {
// don't modify the start/end array contents!
int start = starts[i];
int end = ends[i];
if (start == -1)
start = 0;
if (end == -1)
end = length[fiid];
if (start < 0 || end < 0) {
throw new IllegalArgumentException("Illegal values, start = " + start + ", end = " + end);
}
if (// Can happen while making KWICs because we don't know the
end > length[fiid])
// doc length until here
end = length[fiid];
if (start > length[fiid] || end > length[fiid]) {
throw new IllegalArgumentException("Value(s) out of range, start = " + start + ", end = " + end + ", content length = " + length[fiid]);
}
if (end <= start) {
throw new IllegalArgumentException("Tried to read empty or negative length snippet (from " + start + " to " + end + ")");
}
// Get an IntBuffer to read the desired content
IntBuffer ib = null;
// The tokens file has has been mapped to memory.
// Get an int buffer into the file.
// Figure out which chunk to access.
ByteBuffer whichChunk = null;
long chunkOffsetBytes = -1;
long entryOffsetBytes = offset[fiid] * SIZEOF_INT;
for (int j = 0; j < tokensFileChunkOffsetBytes.size(); j++) {
long offsetBytes = tokensFileChunkOffsetBytes.get(j);
ByteBuffer buffer = tokensFileChunks.get(j);
if (offsetBytes <= entryOffsetBytes + start * SIZEOF_INT && offsetBytes + buffer.capacity() >= entryOffsetBytes + end * SIZEOF_INT) {
// This one!
whichChunk = buffer;
chunkOffsetBytes = offsetBytes;
break;
}
}
if (whichChunk == null) {
throw new BlackLabRuntimeException("Tokens file chunk containing document not found. fiid = " + fiid);
}
((Buffer) whichChunk).position((int) (offset[fiid] * SIZEOF_INT - chunkOffsetBytes));
ib = whichChunk.asIntBuffer();
int snippetLength = end - start;
int[] snippet = new int[snippetLength];
// The file is mem-mapped (search mode).
// Position us at the correct place in the file.
ib.position(start);
ib.get(snippet);
result.add(snippet);
}
return result;
}
use of nl.inl.blacklab.exceptions.BlackLabRuntimeException in project BlackLab by INL.
the class AnnotationForwardIndexWriter method clear.
/**
* Delete all content in the forward index
*/
private void clear() {
// delete data files and empty TOC
try {
if (writeTokensFp == null) {
openTokensFileForWriting();
}
if (// causes problems on Windows
File.separatorChar != '\\')
writeTokensFp.setLength(0);
} catch (IOException e) {
throw BlackLabRuntimeException.wrap(e);
}
if (termsFile.exists() && !termsFile.delete())
throw new BlackLabRuntimeException("Could not delete file: " + termsFile);
if (tocFile.exists() && !tocFile.delete())
throw new BlackLabRuntimeException("Could not delete file: " + tocFile);
if (toc != null)
toc.clear();
if (deletedTocEntries != null)
deletedTocEntries.clear();
tokenFileEndPosition = 0;
tocModified = true;
}
use of nl.inl.blacklab.exceptions.BlackLabRuntimeException in project BlackLab by INL.
the class DocIndexerXmlHandlers method getMetadataFetcher.
/**
* Get the external metadata fetcher for this indexer, if any.
*
* The metadata fetcher can be configured through the "metadataFetcherClass"
* parameter.
*
* @return the metadata fetcher if any, or null if there is none.
*/
MetadataFetcher getMetadataFetcher() {
if (metadataFetcher == null) {
@SuppressWarnings("deprecation") String metadataFetcherClassName = getParameter("metadataFetcherClass");
if (metadataFetcherClassName != null) {
try {
Class<? extends MetadataFetcher> metadataFetcherClass = Class.forName(metadataFetcherClassName).asSubclass(MetadataFetcher.class);
Constructor<? extends MetadataFetcher> ctor = metadataFetcherClass.getConstructor(DocIndexer.class);
metadataFetcher = ctor.newInstance(this);
} catch (ReflectiveOperationException e) {
throw new BlackLabRuntimeException(e);
}
}
}
return metadataFetcher;
}
use of nl.inl.blacklab.exceptions.BlackLabRuntimeException in project BlackLab by INL.
the class DocIndexerConfig method processLinkedDocument.
/**
* process linked documents when configured. An xPath processor can be provided,
* it will retrieve information from the document to construct a path to a linked document.
* @param ld
* @param xpathProcessor
*/
protected void processLinkedDocument(ConfigLinkedDocument ld, Function<String, String> xpathProcessor) {
// Resolve linkPaths to get the information needed to fetch the document
List<String> results = new ArrayList<>();
for (ConfigLinkValue linkValue : ld.getLinkValues()) {
String valuePath = linkValue.getValuePath();
String valueField = linkValue.getValueField();
if (valuePath != null) {
// Resolve value using XPath
String result = xpathProcessor.apply(valuePath);
if (result == null || result.isEmpty()) {
switch(ld.getIfLinkPathMissing()) {
case IGNORE:
break;
case WARN:
docWriter.listener().warning("Link path " + valuePath + " not found in document " + documentName);
break;
case FAIL:
throw new BlackLabRuntimeException("Link path " + valuePath + " not found in document " + documentName);
}
}
results.add(result);
} else if (valueField != null) {
// Fetch value from Lucene doc
List<String> metadataField = getMetadataField(valueField);
if (metadataField == null) {
throw new BlackLabRuntimeException("Link value field " + valueField + " has no values (null)!");
}
results.addAll(metadataField);
}
List<String> resultAfterProcessing = new ArrayList<>();
for (String inputValue : results) {
resultAfterProcessing.addAll(processStringMultipleValues(inputValue, linkValue.getProcess(), null));
}
results = resultAfterProcessing;
}
// Substitute link path results in inputFile, pathInsideArchive and documentPath
String inputFile = replaceDollarRefs(ld.getInputFile(), results);
String pathInsideArchive = replaceDollarRefs(ld.getPathInsideArchive(), results);
String documentPath = replaceDollarRefs(ld.getDocumentPath(), results);
try {
// Fetch and index the linked document
indexLinkedDocument(inputFile, pathInsideArchive, documentPath, ld.getInputFormatIdentifier(), ld.shouldStore() ? ld.getName() : null);
} catch (Exception e) {
String moreInfo = "(inputFile = " + inputFile;
if (pathInsideArchive != null)
moreInfo += ", pathInsideArchive = " + pathInsideArchive;
if (documentPath != null)
moreInfo += ", documentPath = " + documentPath;
moreInfo += ")";
switch(ld.getIfLinkPathMissing()) {
case IGNORE:
case WARN:
docWriter.listener().warning("Could not find or parse linked document for " + documentName + moreInfo + ": " + e.getMessage());
break;
case FAIL:
throw new BlackLabRuntimeException("Could not find or parse linked document for " + documentName + moreInfo, e);
}
}
}
Aggregations