Search in sources :

Example 1 with BlackLabRuntimeException

use of nl.inl.blacklab.exceptions.BlackLabRuntimeException in project BlackLab by INL.

the class AnnotationForwardIndexReader method retrievePartsIntAllocationless.

public List<int[]> retrievePartsIntAllocationless(int fiid, int[] starts, int[] ends, List<int[]> ret) {
    if (!initialized)
        initialize();
    if (deleted[fiid] != 0)
        return null;
    int n = starts.length;
    if (n != ends.length)
        throw new IllegalArgumentException("start and end must be of equal length");
    List<int[]> result = new ArrayList<>(n);
    for (int i = 0; i < n; i++) {
        // don't modify the start/end array contents!
        int start = starts[i];
        int end = ends[i];
        if (start == -1)
            start = 0;
        if (end == -1)
            end = length[fiid];
        if (start < 0 || end < 0) {
            throw new IllegalArgumentException("Illegal values, start = " + start + ", end = " + end);
        }
        if (// Can happen while making KWICs because we don't know the
        end > length[fiid])
            // doc length until here
            end = length[fiid];
        if (start > length[fiid] || end > length[fiid]) {
            throw new IllegalArgumentException("Value(s) out of range, start = " + start + ", end = " + end + ", content length = " + length[fiid]);
        }
        if (end <= start) {
            throw new IllegalArgumentException("Tried to read empty or negative length snippet (from " + start + " to " + end + ")");
        }
        // Get an IntBuffer to read the desired content
        IntBuffer ib = null;
        // The tokens file has has been mapped to memory.
        // Get an int buffer into the file.
        // Figure out which chunk to access.
        ByteBuffer whichChunk = null;
        long chunkOffsetBytes = -1;
        long entryOffsetBytes = offset[fiid] * SIZEOF_INT;
        for (int j = 0; j < tokensFileChunkOffsetBytes.size(); j++) {
            long offsetBytes = tokensFileChunkOffsetBytes.get(j);
            ByteBuffer buffer = tokensFileChunks.get(j);
            if (offsetBytes <= entryOffsetBytes + start * SIZEOF_INT && offsetBytes + buffer.capacity() >= entryOffsetBytes + end * SIZEOF_INT) {
                // This one!
                whichChunk = buffer;
                chunkOffsetBytes = offsetBytes;
                break;
            }
        }
        if (whichChunk == null) {
            throw new BlackLabRuntimeException("Tokens file chunk containing document not found. fiid = " + fiid);
        }
        ((Buffer) whichChunk).position((int) (offset[fiid] * SIZEOF_INT - chunkOffsetBytes));
        ib = whichChunk.asIntBuffer();
        int snippetLength = end - start;
        int[] snippet = new int[snippetLength];
        // The file is mem-mapped (search mode).
        // Position us at the correct place in the file.
        ib.position(start);
        ib.get(snippet);
        result.add(snippet);
    }
    return result;
}
Also used : ByteBuffer(java.nio.ByteBuffer) IntBuffer(java.nio.IntBuffer) Buffer(java.nio.Buffer) LongBuffer(java.nio.LongBuffer) MappedByteBuffer(java.nio.MappedByteBuffer) IntBuffer(java.nio.IntBuffer) BlackLabRuntimeException(nl.inl.blacklab.exceptions.BlackLabRuntimeException) ArrayList(java.util.ArrayList) ByteBuffer(java.nio.ByteBuffer) MappedByteBuffer(java.nio.MappedByteBuffer)

Example 2 with BlackLabRuntimeException

use of nl.inl.blacklab.exceptions.BlackLabRuntimeException in project BlackLab by INL.

the class AnnotationForwardIndexReader method retrievePartsInt.

@Override
public List<int[]> retrievePartsInt(int fiid, int[] starts, int[] ends) {
    if (!initialized)
        initialize();
    if (deleted[fiid] != 0)
        return null;
    int n = starts.length;
    if (n != ends.length)
        throw new IllegalArgumentException("start and end must be of equal length");
    List<int[]> result = new ArrayList<>(n);
    for (int i = 0; i < n; i++) {
        // don't modify the start/end array contents!
        int start = starts[i];
        int end = ends[i];
        if (start == -1)
            start = 0;
        if (end == -1)
            end = length[fiid];
        if (start < 0 || end < 0) {
            throw new IllegalArgumentException("Illegal values, start = " + start + ", end = " + end);
        }
        if (// Can happen while making KWICs because we don't know the
        end > length[fiid])
            // doc length until here
            end = length[fiid];
        if (start > length[fiid] || end > length[fiid]) {
            throw new IllegalArgumentException("Value(s) out of range, start = " + start + ", end = " + end + ", content length = " + length[fiid]);
        }
        if (end <= start) {
            throw new IllegalArgumentException("Tried to read empty or negative length snippet (from " + start + " to " + end + ")");
        }
        // Get an IntBuffer to read the desired content
        IntBuffer ib = null;
        // The tokens file has has been mapped to memory.
        // Get an int buffer into the file.
        // Figure out which chunk to access.
        ByteBuffer whichChunk = null;
        long chunkOffsetBytes = -1;
        long entryOffsetBytes = offset[fiid] * SIZEOF_INT;
        for (int j = 0; j < tokensFileChunkOffsetBytes.size(); j++) {
            long offsetBytes = tokensFileChunkOffsetBytes.get(j);
            ByteBuffer buffer = tokensFileChunks.get(j);
            if (offsetBytes <= entryOffsetBytes + start * SIZEOF_INT && offsetBytes + buffer.capacity() >= entryOffsetBytes + end * SIZEOF_INT) {
                // This one!
                whichChunk = buffer;
                chunkOffsetBytes = offsetBytes;
                break;
            }
        }
        if (whichChunk == null) {
            throw new BlackLabRuntimeException("Tokens file chunk containing document not found. fiid = " + fiid);
        }
        ((Buffer) whichChunk).position((int) (offset[fiid] * SIZEOF_INT - chunkOffsetBytes));
        ib = whichChunk.asIntBuffer();
        int snippetLength = end - start;
        int[] snippet = new int[snippetLength];
        // The file is mem-mapped (search mode).
        // Position us at the correct place in the file.
        ib.position(start);
        ib.get(snippet);
        result.add(snippet);
    }
    return result;
}
Also used : ByteBuffer(java.nio.ByteBuffer) IntBuffer(java.nio.IntBuffer) Buffer(java.nio.Buffer) LongBuffer(java.nio.LongBuffer) MappedByteBuffer(java.nio.MappedByteBuffer) IntBuffer(java.nio.IntBuffer) BlackLabRuntimeException(nl.inl.blacklab.exceptions.BlackLabRuntimeException) ArrayList(java.util.ArrayList) ByteBuffer(java.nio.ByteBuffer) MappedByteBuffer(java.nio.MappedByteBuffer)

Example 3 with BlackLabRuntimeException

use of nl.inl.blacklab.exceptions.BlackLabRuntimeException in project BlackLab by INL.

the class AnnotationForwardIndexWriter method clear.

/**
 * Delete all content in the forward index
 */
private void clear() {
    // delete data files and empty TOC
    try {
        if (writeTokensFp == null) {
            openTokensFileForWriting();
        }
        if (// causes problems on Windows
        File.separatorChar != '\\')
            writeTokensFp.setLength(0);
    } catch (IOException e) {
        throw BlackLabRuntimeException.wrap(e);
    }
    if (termsFile.exists() && !termsFile.delete())
        throw new BlackLabRuntimeException("Could not delete file: " + termsFile);
    if (tocFile.exists() && !tocFile.delete())
        throw new BlackLabRuntimeException("Could not delete file: " + tocFile);
    if (toc != null)
        toc.clear();
    if (deletedTocEntries != null)
        deletedTocEntries.clear();
    tokenFileEndPosition = 0;
    tocModified = true;
}
Also used : BlackLabRuntimeException(nl.inl.blacklab.exceptions.BlackLabRuntimeException) IOException(java.io.IOException)

Example 4 with BlackLabRuntimeException

use of nl.inl.blacklab.exceptions.BlackLabRuntimeException in project BlackLab by INL.

the class DocIndexerXmlHandlers method getMetadataFetcher.

/**
 * Get the external metadata fetcher for this indexer, if any.
 *
 * The metadata fetcher can be configured through the "metadataFetcherClass"
 * parameter.
 *
 * @return the metadata fetcher if any, or null if there is none.
 */
MetadataFetcher getMetadataFetcher() {
    if (metadataFetcher == null) {
        @SuppressWarnings("deprecation") String metadataFetcherClassName = getParameter("metadataFetcherClass");
        if (metadataFetcherClassName != null) {
            try {
                Class<? extends MetadataFetcher> metadataFetcherClass = Class.forName(metadataFetcherClassName).asSubclass(MetadataFetcher.class);
                Constructor<? extends MetadataFetcher> ctor = metadataFetcherClass.getConstructor(DocIndexer.class);
                metadataFetcher = ctor.newInstance(this);
            } catch (ReflectiveOperationException e) {
                throw new BlackLabRuntimeException(e);
            }
        }
    }
    return metadataFetcher;
}
Also used : BlackLabRuntimeException(nl.inl.blacklab.exceptions.BlackLabRuntimeException)

Example 5 with BlackLabRuntimeException

use of nl.inl.blacklab.exceptions.BlackLabRuntimeException in project BlackLab by INL.

the class DocIndexerConfig method processLinkedDocument.

/**
 * process linked documents when configured. An xPath processor can be provided,
 * it will retrieve information from the document to construct a path to a linked document.
 * @param ld
 * @param xpathProcessor
 */
protected void processLinkedDocument(ConfigLinkedDocument ld, Function<String, String> xpathProcessor) {
    // Resolve linkPaths to get the information needed to fetch the document
    List<String> results = new ArrayList<>();
    for (ConfigLinkValue linkValue : ld.getLinkValues()) {
        String valuePath = linkValue.getValuePath();
        String valueField = linkValue.getValueField();
        if (valuePath != null) {
            // Resolve value using XPath
            String result = xpathProcessor.apply(valuePath);
            if (result == null || result.isEmpty()) {
                switch(ld.getIfLinkPathMissing()) {
                    case IGNORE:
                        break;
                    case WARN:
                        docWriter.listener().warning("Link path " + valuePath + " not found in document " + documentName);
                        break;
                    case FAIL:
                        throw new BlackLabRuntimeException("Link path " + valuePath + " not found in document " + documentName);
                }
            }
            results.add(result);
        } else if (valueField != null) {
            // Fetch value from Lucene doc
            List<String> metadataField = getMetadataField(valueField);
            if (metadataField == null) {
                throw new BlackLabRuntimeException("Link value field " + valueField + " has no values (null)!");
            }
            results.addAll(metadataField);
        }
        List<String> resultAfterProcessing = new ArrayList<>();
        for (String inputValue : results) {
            resultAfterProcessing.addAll(processStringMultipleValues(inputValue, linkValue.getProcess(), null));
        }
        results = resultAfterProcessing;
    }
    // Substitute link path results in inputFile, pathInsideArchive and documentPath
    String inputFile = replaceDollarRefs(ld.getInputFile(), results);
    String pathInsideArchive = replaceDollarRefs(ld.getPathInsideArchive(), results);
    String documentPath = replaceDollarRefs(ld.getDocumentPath(), results);
    try {
        // Fetch and index the linked document
        indexLinkedDocument(inputFile, pathInsideArchive, documentPath, ld.getInputFormatIdentifier(), ld.shouldStore() ? ld.getName() : null);
    } catch (Exception e) {
        String moreInfo = "(inputFile = " + inputFile;
        if (pathInsideArchive != null)
            moreInfo += ", pathInsideArchive = " + pathInsideArchive;
        if (documentPath != null)
            moreInfo += ", documentPath = " + documentPath;
        moreInfo += ")";
        switch(ld.getIfLinkPathMissing()) {
            case IGNORE:
            case WARN:
                docWriter.listener().warning("Could not find or parse linked document for " + documentName + moreInfo + ": " + e.getMessage());
                break;
            case FAIL:
                throw new BlackLabRuntimeException("Could not find or parse linked document for " + documentName + moreInfo, e);
        }
    }
}
Also used : BlackLabRuntimeException(nl.inl.blacklab.exceptions.BlackLabRuntimeException) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) BlackLabRuntimeException(nl.inl.blacklab.exceptions.BlackLabRuntimeException) PatternSyntaxException(java.util.regex.PatternSyntaxException) IOException(java.io.IOException) PluginException(nl.inl.blacklab.exceptions.PluginException)

Aggregations

BlackLabRuntimeException (nl.inl.blacklab.exceptions.BlackLabRuntimeException)63 IOException (java.io.IOException)27 ArrayList (java.util.ArrayList)12 File (java.io.File)11 ByteBuffer (java.nio.ByteBuffer)7 FileNotFoundException (java.io.FileNotFoundException)4 Buffer (java.nio.Buffer)4 IntBuffer (java.nio.IntBuffer)4 LongBuffer (java.nio.LongBuffer)4 MappedByteBuffer (java.nio.MappedByteBuffer)4 HashSet (java.util.HashSet)4 List (java.util.List)4 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)4 Terms (org.apache.lucene.index.Terms)4 AutoPilot (com.ximpleware.AutoPilot)3 XPathParseException (com.ximpleware.XPathParseException)3 FileInputStream (java.io.FileInputStream)3 InputStream (java.io.InputStream)3 HashMap (java.util.HashMap)3 Map (java.util.Map)3