Search in sources :

Example 1 with JochreException

use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.

the class JochreSearch method execute.

public void execute(Map<String, String> argMap, Either<PrintWriter, OutputStream> output) {
    long startTime = System.currentTimeMillis();
    Command command = null;
    try {
        command = Command.valueOf(argMap.get("command"));
        argMap.remove("command");
        JochreSearchConfig config = JochreSearchConfig.getInstance(configId);
        LOG.debug("##### Arguments:");
        for (Entry<String, String> arg : argMap.entrySet()) {
            LOG.debug(arg.getKey() + ": " + arg.getValue());
        }
        boolean forceUpdate = false;
        String docName = null;
        int docIndex = -1;
        int docId = -1;
        Set<Integer> docIds = null;
        int decimalPlaces = config.getConfig().getInt("decimal-places");
        // query
        String queryString = null;
        List<String> authors = new ArrayList<>();
        boolean authorInclude = true;
        String titleQueryString = null;
        boolean expandInflections = true;
        SortBy sortBy = SortBy.Score;
        boolean sortAscending = true;
        Integer fromYear = null;
        Integer toYear = null;
        String reference = null;
        int pageNumber = 0;
        int resultsPerPage = config.getConfig().getInt("results-per-page");
        // lexicon handling
        String lexiconDirPath = null;
        String lexiconRegexPath = null;
        String word = null;
        // snippets
        int snippetCount = -1;
        double minWeight = 0.0;
        boolean includeText = false;
        boolean includeGraphics = false;
        String snippetJson = null;
        // word images
        int startOffset = -1;
        // suggestions
        String suggestion = null;
        String suggestion2 = null;
        String user = null;
        String languageCode = null;
        String fontCode = null;
        String ip = config.getConfig().getString("default-ip");
        // prefix search
        JochreIndexField field = null;
        String prefix = null;
        int maxResults = 0;
        // corrections
        boolean applyEverywhere = false;
        int correctionId = -1;
        for (Entry<String, String> argMapEntry : argMap.entrySet()) {
            String argName = argMapEntry.getKey();
            String argValue = argMapEntry.getValue();
            if (argName.equals("forceUpdate")) {
                forceUpdate = argValue.equals("true");
            } else if (argName.equals("docName")) {
                docName = argValue;
            } else if (argName.equals("docIndex")) {
                docIndex = Integer.parseInt(argValue);
            } else if (argName.equals("docId")) {
                docId = Integer.parseInt(argValue);
            } else if (argName.equalsIgnoreCase("query")) {
                queryString = argValue;
            } else if (argName.equalsIgnoreCase("authors")) {
                if (argValue.length() > 0) {
                    String[] authorArray = argValue.split("\\|");
                    for (String author : authorArray) if (author.length() > 0)
                        authors.add(author);
                }
            } else if (argName.equalsIgnoreCase("authorInclude")) {
                authorInclude = argValue.equals("true");
            } else if (argName.equalsIgnoreCase("title")) {
                titleQueryString = argValue;
            } else if (argName.equalsIgnoreCase("decimalPlaces")) {
                decimalPlaces = Integer.parseInt(argValue);
            } else if (argName.equals("expand")) {
                expandInflections = argValue.equals("true");
            } else if (argName.equals("lexiconDir")) {
                lexiconDirPath = argValue;
            } else if (argName.equals("lexiconRegex")) {
                lexiconRegexPath = argValue;
            } else if (argName.equals("word")) {
                word = argValue;
            } else if (argName.equals("snippetCount")) {
                snippetCount = Integer.parseInt(argValue);
            } else if (argName.equals("minWeight")) {
                minWeight = Double.parseDouble(argValue);
            } else if (argName.equals("startOffset")) {
                startOffset = Integer.parseInt(argValue);
            } else if (argName.equals("suggestion")) {
                suggestion = argValue;
            } else if (argName.equals("suggestion2")) {
                suggestion2 = argValue;
            } else if (argName.equals("languageCode")) {
                languageCode = argValue;
            } else if (argName.equals("fontCode")) {
                fontCode = argValue;
            } else if (argName.equals("ip")) {
                ip = argValue;
            } else if (argName.equals("includeText")) {
                includeText = argValue.equalsIgnoreCase("true");
            } else if (argName.equals("includeGraphics")) {
                includeGraphics = argValue.equalsIgnoreCase("true");
            } else if (argName.equals("snippet")) {
                snippetJson = argValue;
            } else if (argName.equalsIgnoreCase("docIds")) {
                if (argValue.length() > 0) {
                    String[] idArray = argValue.split(",");
                    docIds = new HashSet<>();
                    for (String id : idArray) docIds.add(Integer.parseInt(id));
                }
            } else if (argName.equals("startOffset")) {
                startOffset = Integer.parseInt(argValue);
            } else if (argName.equals("user")) {
                user = argValue;
            } else if (argName.equals("field")) {
                field = JochreIndexField.valueOf(argValue);
            } else if (argName.equals("prefix")) {
                prefix = argValue;
            } else if (argName.equals("maxResults")) {
                maxResults = Integer.parseInt(argValue);
            } else if (argName.equals("sortBy")) {
                sortBy = SortBy.valueOf(argValue);
            } else if (argName.equals("sortAscending")) {
                sortAscending = argValue.equals("true");
            } else if (argName.equals("fromYear")) {
                fromYear = Integer.parseInt(argValue);
            } else if (argName.equals("toYear")) {
                toYear = Integer.parseInt(argValue);
            } else if (argName.equals("reference")) {
                reference = argValue;
            } else if (argName.equals("page")) {
                pageNumber = Integer.parseInt(argValue);
            } else if (argName.equals("resultsPerPage")) {
                resultsPerPage = Integer.parseInt(argValue);
            } else if (argName.equals("applyEverywhere")) {
                applyEverywhere = argValue.equals("true");
            } else if (argName.equals("correctionId")) {
                correctionId = Integer.parseInt(argValue);
            } else {
                throw new RuntimeException("Unknown option: " + argName);
            }
        }
        JochreSearchManager searchManager = JochreSearchManager.getInstance(configId);
        DecimalFormat df = new DecimalFormat("0." + StringUtils.repeat('0', decimalPlaces), enSymbols);
        PrintWriter out = null;
        if (output.isLeft())
            out = output.getLeft();
        switch(command) {
            case index:
                {
                    JochreIndexBuilder builder = new JochreIndexBuilder(configId, forceUpdate);
                    new Thread(builder).start();
                    out.write("{\"response\":\"index thread started\"}\n");
                    break;
                }
            case indexStatus:
                {
                    SearchStatusHolder searchStatusHolder = SearchStatusHolder.getInstance();
                    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
                    JsonFactory jsonFactory = new JsonFactory();
                    JsonGenerator jsonGen = jsonFactory.createGenerator(out);
                    jsonGen.writeStartObject();
                    jsonGen.writeStringField("status", searchStatusHolder.getStatus().name());
                    jsonGen.writeStringField("message", searchStatusHolder.getMessage());
                    jsonGen.writeNumberField("total", searchStatusHolder.getTotalCount());
                    jsonGen.writeNumberField("processed", searchStatusHolder.getProcessedCount());
                    jsonGen.writeNumberField("success", searchStatusHolder.getSuccessCount());
                    jsonGen.writeNumberField("failure", searchStatusHolder.getFailureCount());
                    Date updateDate = new Date(searchStatusHolder.getLastUpdated());
                    jsonGen.writeStringField("lastUpdated", dateFormat.format(updateDate));
                    jsonGen.writeNumberField("totalTime", searchStatusHolder.getTotalTime());
                    jsonGen.writeEndObject();
                    jsonGen.flush();
                    break;
                }
            case refresh:
                {
                    JochreSearchManager manager = JochreSearchManager.getInstance(configId);
                    manager.getManager().maybeRefresh();
                    out.write("{\"response\":\"index reader refreshed\"}\n");
                    break;
                }
            case search:
            case highlight:
            case snippets:
                {
                    IndexSearcher indexSearcher = searchManager.getManager().acquire();
                    try {
                        JochreQuery query = new JochreQuery(configId, queryString, authors, authorInclude, titleQueryString, fromYear, toYear, expandInflections, sortBy, sortAscending, reference);
                        try {
                            JochreIndexSearcher searcher = new JochreIndexSearcher(indexSearcher, configId);
                            switch(command) {
                                case search:
                                    {
                                        Pair<TopDocs, Integer> results = searcher.search(query, pageNumber, resultsPerPage);
                                        JsonFactory jsonFactory = new JsonFactory();
                                        JsonGenerator jsonGen = jsonFactory.createGenerator(out);
                                        jsonGen.writeStartObject();
                                        jsonGen.writeNumberField("totalHits", results.getRight());
                                        jsonGen.writeNumberField("maxResults", config.getMaxResults());
                                        jsonGen.writeBooleanField("highlights", query.hasHighlights());
                                        jsonGen.writeArrayFieldStart("results");
                                        TopDocs topDocs = results.getLeft();
                                        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
                                            jsonGen.writeStartObject();
                                            JochreIndexDocument doc = new JochreIndexDocument(indexSearcher, scoreDoc.doc, configId);
                                            jsonGen.writeFieldName("doc");
                                            doc.toJson(jsonGen);
                                            if (Float.isNaN(scoreDoc.score)) {
                                                jsonGen.writeNumberField("score", 0.0);
                                            } else {
                                                double roundedScore = df.parse(df.format(scoreDoc.score)).doubleValue();
                                                jsonGen.writeNumberField("score", roundedScore);
                                            }
                                            jsonGen.writeEndObject();
                                        }
                                        jsonGen.writeEndArray();
                                        jsonGen.writeEndObject();
                                        jsonGen.flush();
                                        if (config.hasDatabase()) {
                                            FeedbackQuery feedbackQuery = new FeedbackQuery(user, ip, configId);
                                            feedbackQuery.setResultCount(results.getRight().intValue());
                                            if (query.getQueryString() != null && query.getQueryString().length() > 0)
                                                feedbackQuery.addClause(FeedbackCriterion.text, query.getQueryString());
                                            if (query.getAuthors().size() > 0) {
                                                feedbackQuery.addClause(FeedbackCriterion.author, String.join("|", query.getAuthors()));
                                                feedbackQuery.addClause(FeedbackCriterion.includeAuthors, "" + query.isAuthorInclude());
                                            }
                                            if (query.getTitleQueryString() != null && query.getTitleQueryString().length() > 0)
                                                feedbackQuery.addClause(FeedbackCriterion.title, query.getTitleQueryString());
                                            if (!query.isExpandInflections())
                                                feedbackQuery.addClause(FeedbackCriterion.strict, "true");
                                            if (query.getFromYear() != null)
                                                feedbackQuery.addClause(FeedbackCriterion.fromYear, query.getFromYear().toString());
                                            if (query.getToYear() != null)
                                                feedbackQuery.addClause(FeedbackCriterion.toYear, query.getToYear().toString());
                                            if (query.getSortBy() != SortBy.Score) {
                                                feedbackQuery.addClause(FeedbackCriterion.sortBy, query.getSortBy().name());
                                                feedbackQuery.addClause(FeedbackCriterion.sortAscending, "" + query.isSortAscending());
                                            }
                                            if (query.getReference() != null)
                                                feedbackQuery.addClause(FeedbackCriterion.reference, query.getReference());
                                            feedbackQuery.save();
                                        }
                                        break;
                                    }
                                default:
                                    {
                                        if (!query.hasHighlights())
                                            throw new RuntimeException("For command " + command + " a query is required - no highlights available.");
                                        if (docIds == null) {
                                            Pair<TopDocs, Integer> result = searcher.search(query, pageNumber, resultsPerPage);
                                            docIds = new LinkedHashSet<>();
                                            for (ScoreDoc scoreDoc : result.getLeft().scoreDocs) {
                                                docIds.add(scoreDoc.doc);
                                                LOG.debug("### Next document");
                                                Document doc = indexSearcher.doc(scoreDoc.doc);
                                                for (IndexableField oneField : doc.getFields()) {
                                                    if (!oneField.name().equals(JochreIndexField.text.name()) && !oneField.name().startsWith(JochreIndexField.rect.name()) && !oneField.name().startsWith(JochreIndexField.start.name()))
                                                        LOG.debug(oneField.toString());
                                                }
                                            }
                                        }
                                        Set<String> searchFields = new HashSet<>();
                                        searchFields.add(JochreIndexField.text.name());
                                        Highlighter highlighter = new LuceneQueryHighlighter(query, indexSearcher, searchFields);
                                        HighlightManager highlightManager = new HighlightManager(indexSearcher, searchFields, configId);
                                        highlightManager.setDecimalPlaces(decimalPlaces);
                                        highlightManager.setMinWeight(minWeight);
                                        highlightManager.setIncludeText(includeText);
                                        highlightManager.setIncludeGraphics(includeGraphics);
                                        if (snippetCount > 0)
                                            highlightManager.setSnippetCount(snippetCount);
                                        if (command == Command.highlight) {
                                            highlightManager.highlight(highlighter, docIds, out);
                                        } else {
                                            highlightManager.findSnippets(highlighter, docIds, out);
                                        }
                                        break;
                                    }
                            }
                        } catch (JochreQueryParseException e) {
                            JsonFactory jsonFactory = new JsonFactory();
                            JsonGenerator jsonGen = jsonFactory.createGenerator(out);
                            jsonGen.writeStartObject();
                            jsonGen.writeStringField("parseException", "true");
                            jsonGen.writeStringField("message", e.getMessage());
                            jsonGen.writeEndObject();
                            jsonGen.flush();
                        }
                        out.write("\n");
                        out.flush();
                    } finally {
                        searchManager.getManager().release(indexSearcher);
                    }
                    break;
                }
            case textSnippet:
                {
                    if (snippetJson == null)
                        throw new JochreException("Command " + command + " requires a snippet");
                    IndexSearcher indexSearcher = searchManager.getManager().acquire();
                    try {
                        Snippet snippet = new Snippet(snippetJson);
                        if (LOG.isDebugEnabled()) {
                            Document doc = indexSearcher.doc(snippet.getDocId());
                            LOG.debug("Snippet in: " + doc.get(JochreIndexField.path.name()));
                        }
                        Set<String> searchFields = new HashSet<>();
                        searchFields.add(JochreIndexField.text.name());
                        HighlightManager highlightManager = new HighlightManager(indexSearcher, searchFields, configId);
                        String text = highlightManager.displaySnippet(snippet);
                        out.write(text);
                    } finally {
                        searchManager.getManager().release(indexSearcher);
                    }
                    break;
                }
            case imageSnippet:
                {
                    if (snippetJson == null)
                        throw new JochreException("Command " + command + " requires a snippet");
                    IndexSearcher indexSearcher = searchManager.getManager().acquire();
                    try {
                        Snippet snippet = new Snippet(snippetJson);
                        if (LOG.isDebugEnabled()) {
                            Document doc = indexSearcher.doc(snippet.getDocId());
                            LOG.debug("Snippet in: " + doc.get(JochreIndexField.path.name()));
                        }
                        Set<String> searchFields = new HashSet<>();
                        searchFields.add(JochreIndexField.text.name());
                        HighlightManager highlightManager = new HighlightManager(indexSearcher, searchFields, configId);
                        ImageSnippet imageSnippet = highlightManager.getImageSnippet(snippet);
                        ImageOutputStream ios = ImageIO.createImageOutputStream(output.getRight());
                        BufferedImage image = imageSnippet.getImage();
                        ImageReader imageReader = ImageIO.getImageReadersByMIMEType("image/png").next();
                        ImageWriter imageWriter = ImageIO.getImageWriter(imageReader);
                        imageWriter.setOutput(ios);
                        imageWriter.write(image);
                        ios.flush();
                    } finally {
                        searchManager.getManager().release(indexSearcher);
                    }
                    break;
                }
            case word:
                {
                    if (startOffset < 0)
                        throw new JochreException("Command " + command + " requires a startOffset");
                    if (docId < 0 && (docName == null || docIndex < 0))
                        throw new RuntimeException("For command " + command + " either a docName and docIndex, or a docId is required");
                    IndexSearcher indexSearcher = searchManager.getManager().acquire();
                    try {
                        JochreIndexSearcher searcher = new JochreIndexSearcher(indexSearcher, configId);
                        if (docId < 0) {
                            Map<Integer, Document> docs = searcher.findDocument(docName, docIndex);
                            docId = docs.keySet().iterator().next();
                        }
                        JochreIndexDocument jochreDoc = new JochreIndexDocument(indexSearcher, docId, configId);
                        JochreIndexWord jochreWord = jochreDoc.getWord(startOffset);
                        String word1 = jochreWord.getText();
                        String word2 = null;
                        if (word1.contains(JochreSearchConstants.INDEX_NEWLINE)) {
                            word2 = word1.substring(word1.indexOf(JochreSearchConstants.INDEX_NEWLINE) + 1);
                            word1 = word1.substring(0, word1.indexOf(JochreSearchConstants.INDEX_NEWLINE));
                        }
                        JsonFactory jsonFactory = new JsonFactory();
                        JsonGenerator jsonGen = jsonFactory.createGenerator(out);
                        jsonGen.writeStartObject();
                        jsonGen.writeStringField("word", word1);
                        if (word2 != null)
                            jsonGen.writeStringField("word2", word2);
                        jsonGen.writeEndObject();
                        jsonGen.flush();
                    } finally {
                        searchManager.getManager().release(indexSearcher);
                    }
                    break;
                }
            case view:
                {
                    if (docId < 0 && docIndex < 0)
                        throw new RuntimeException("For command " + command + " either docName and docIndex, or docId are required");
                    if (docId < 0) {
                        if (docName == null)
                            throw new RuntimeException("For command " + command + " docName is required");
                        if (docIndex < 0)
                            throw new RuntimeException("For command " + command + " docIndex is required");
                    }
                    IndexSearcher indexSearcher = searchManager.getManager().acquire();
                    try {
                        JochreIndexSearcher searcher = new JochreIndexSearcher(indexSearcher, configId);
                        if (docId < 0) {
                            Map<Integer, Document> docs = searcher.findDocument(docName, docIndex);
                            docId = docs.keySet().iterator().next();
                        }
                        Document doc = indexSearcher.doc(docId);
                        JsonFactory jsonFactory = new JsonFactory();
                        JsonGenerator jsonGen = jsonFactory.createGenerator(out);
                        jsonGen.writeStartObject();
                        for (IndexableField oneField : doc.getFields()) {
                            if (!oneField.name().equals(JochreIndexField.text.name()))
                                jsonGen.writeStringField(oneField.name(), oneField.stringValue());
                        }
                        jsonGen.writeEndObject();
                        jsonGen.flush();
                        out.write("\n");
                        out.flush();
                    } finally {
                        searchManager.getManager().release(indexSearcher);
                    }
                    break;
                }
            case list:
                {
                    if (docId < 0 && docIndex < 0)
                        throw new RuntimeException("For command " + command + " either docName and docIndex, or docId are required");
                    if (docId < 0) {
                        if (docName == null)
                            throw new RuntimeException("For command " + command + " docName is required");
                        if (docIndex < 0)
                            throw new RuntimeException("For command " + command + " docIndex is required");
                    }
                    IndexSearcher indexSearcher = searchManager.getManager().acquire();
                    try {
                        JochreIndexSearcher searcher = new JochreIndexSearcher(indexSearcher, configId);
                        if (docId < 0) {
                            Map<Integer, Document> docs = searcher.findDocument(docName, docIndex);
                            docId = docs.keySet().iterator().next();
                        }
                        JochreIndexTermLister lister = new JochreIndexTermLister(docId, indexSearcher);
                        lister.list(out);
                        out.write("\n");
                        out.flush();
                    } finally {
                        searchManager.getManager().release(indexSearcher);
                    }
                    break;
                }
            case wordImage:
                {
                    if (docId < 0 && docIndex < 0)
                        throw new RuntimeException("For command " + command + " either docName and docIndex, or docId are required");
                    if (docId < 0) {
                        if (docName == null)
                            throw new RuntimeException("For command " + command + " docName is required");
                        if (docIndex < 0)
                            throw new RuntimeException("For command " + command + " docIndex is required");
                    }
                    if (startOffset < 0)
                        throw new RuntimeException("For command " + command + " startOffset is required");
                    IndexSearcher indexSearcher = searchManager.getManager().acquire();
                    try {
                        JochreIndexSearcher searcher = new JochreIndexSearcher(indexSearcher, configId);
                        if (docId < 0) {
                            Map<Integer, Document> docs = searcher.findDocument(docName, docIndex);
                            docId = docs.keySet().iterator().next();
                        }
                        JochreIndexDocument jochreDoc = new JochreIndexDocument(indexSearcher, docId, configId);
                        JochreIndexWord jochreWord = jochreDoc.getWord(startOffset);
                        LOG.debug("jochreDoc: " + jochreDoc.getPath());
                        LOG.debug("word: " + jochreWord.getText());
                        LOG.debug("startOffset: " + jochreWord.getStartOffset());
                        BufferedImage wordImage = jochreWord.getImage();
                        ImageOutputStream ios = ImageIO.createImageOutputStream(output.getRight());
                        ImageReader imageReader = ImageIO.getImageReadersByMIMEType("image/png").next();
                        ImageWriter imageWriter = ImageIO.getImageWriter(imageReader);
                        imageWriter.setOutput(ios);
                        imageWriter.write(wordImage);
                        ios.flush();
                    } finally {
                        searchManager.getManager().release(indexSearcher);
                    }
                    break;
                }
            case suggest:
                {
                    if (!config.hasDatabase())
                        throw new RuntimeException("For command " + command + " a database is required");
                    if (docId < 0 && docIndex < 0)
                        throw new RuntimeException("For command " + command + " either docName and docIndex, or docId are required");
                    if (docId < 0) {
                        if (docName == null)
                            throw new RuntimeException("For command " + command + " docName is required");
                        if (docIndex < 0)
                            throw new RuntimeException("For command " + command + " docIndex is required");
                    }
                    if (startOffset < 0)
                        throw new RuntimeException("For command " + command + " startOffset is required");
                    if (suggestion == null)
                        throw new RuntimeException("For command " + command + " suggestion is required");
                    if (user == null)
                        throw new RuntimeException("For command " + command + " user is required");
                    if (fontCode == null)
                        throw new RuntimeException("For command " + command + " fontCode is required");
                    if (languageCode == null)
                        throw new RuntimeException("For command " + command + " languageCode is required");
                    String fullSuggestion = suggestion;
                    if (suggestion2 != null && suggestion2.length() > 0)
                        fullSuggestion += JochreSearchConstants.INDEX_NEWLINE + suggestion2;
                    IndexSearcher indexSearcher = searchManager.getManager().acquire();
                    try {
                        JochreIndexSearcher searcher = new JochreIndexSearcher(indexSearcher, configId);
                        Document luceneDoc = null;
                        if (docId < 0) {
                            Map<Integer, Document> docs = searcher.findDocument(docName, docIndex);
                            luceneDoc = docs.values().iterator().next();
                            docId = docs.keySet().iterator().next();
                        } else {
                            luceneDoc = searcher.loadDocument(docId);
                        }
                        FeedbackSuggestion sug = new FeedbackSuggestion(indexSearcher, docId, startOffset, fullSuggestion, user, ip, fontCode, languageCode, configId);
                        sug.save();
                        // Mark the document for re-indexing
                        String path = luceneDoc.get(JochreIndexField.path.name());
                        JochreIndexDirectory jochreIndexDirectory = new JochreIndexDirectory(path, configId);
                        jochreIndexDirectory.addUpdateInstructions();
                        // Start the index thread
                        JochreIndexBuilder builder = new JochreIndexBuilder(configId, false);
                        new Thread(builder).start();
                    } finally {
                        searchManager.getManager().release(indexSearcher);
                    }
                    out.write("{\"response\":\"suggestion saved\"}\n");
                    break;
                }
            case correct:
                {
                    if (!config.hasDatabase())
                        throw new RuntimeException("For command " + command + " a database is required");
                    if (docId < 0 && docName == null)
                        throw new RuntimeException("For command " + command + " either docName or docId are required");
                    if (field == null)
                        throw new RuntimeException("For command " + command + " field is required");
                    if (suggestion == null)
                        throw new RuntimeException("For command " + command + " suggestion is required");
                    if (user == null)
                        throw new RuntimeException("For command " + command + " user is required");
                    IndexSearcher indexSearcher = searchManager.getManager().acquire();
                    try {
                        JochreIndexSearcher searcher = new JochreIndexSearcher(indexSearcher, configId);
                        Document luceneDoc = null;
                        if (docId < 0) {
                            Map<Integer, Document> docs = searcher.findDocuments(docName);
                            luceneDoc = docs.values().iterator().next();
                        } else {
                            luceneDoc = searcher.loadDocument(docId);
                        }
                        String previousValue = luceneDoc.get(field.name());
                        FeedbackDocument feedbackDoc = FeedbackDocument.findOrCreateDocument(luceneDoc.get(JochreIndexField.path.name()), configId);
                        Correction correction = new Correction(feedbackDoc, field, user, ip, suggestion, previousValue, applyEverywhere, configId);
                        correction.save();
                        // Now apply the correction to the index itself
                        List<String> docNames = new ArrayList<>();
                        if (applyEverywhere) {
                            // find all documents affected by this update and mark them for re-indexing
                            TopDocs topDocs = searcher.search(correction.getField(), correction.getPreviousValue());
                            for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
                                Document doc = indexSearcher.doc(scoreDoc.doc);
                                String path = doc.get(JochreIndexField.path.name());
                                JochreIndexDirectory jochreIndexDirectory = new JochreIndexDirectory(path, configId);
                                jochreIndexDirectory.addUpdateInstructions();
                                docNames.add(doc.get(JochreIndexField.name.name()));
                            }
                        } else {
                            // mark the document for re-indexing
                            String path = luceneDoc.get(JochreIndexField.path.name());
                            JochreIndexDirectory jochreIndexDirectory = new JochreIndexDirectory(path, configId);
                            jochreIndexDirectory.addUpdateInstructions();
                            docNames.add(luceneDoc.get(JochreIndexField.name.name()));
                        }
                        // update the list of documents affected by this correction
                        correction.setDocuments(docNames);
                        correction.save();
                        // start the index thread
                        JochreIndexBuilder builder = new JochreIndexBuilder(configId, false);
                        new Thread(builder).start();
                        // wrap e-mail in runnable to return directly to client
                        new Thread(new Runnable() {

                            @Override
                            public void run() {
                                try {
                                    LOG.debug("Sending e-mail for correction");
                                    // send an e-mail if required
                                    correction.sendEmail();
                                } catch (MessagingException | IOException | TemplateException e) {
                                    LOG.error("Unable to send correction e-mail", e);
                                }
                            }
                        }).start();
                    } finally {
                        searchManager.getManager().release(indexSearcher);
                    }
                    out.write("{\"response\":\"correction saved\"}\n");
                    break;
                }
            case sendCorrectionEmail:
                {
                    if (correctionId < 0)
                        throw new RuntimeException("For command " + command + " correctionId is required");
                    Correction correction = Correction.loadCorrection(correctionId, configId);
                    correction.sendEmail();
                    out.write("{\"response\":\"correction e-mail sent\"}\n");
                    break;
                }
            case undo:
                {
                    if (correctionId < 0)
                        throw new RuntimeException("For command " + command + " correctionId is required");
                    // Mark correction for ignoring
                    Correction correction = Correction.loadCorrection(correctionId, configId);
                    correction.setIgnore(true);
                    correction.save();
                    IndexSearcher indexSearcher = searchManager.getManager().acquire();
                    try {
                        JochreIndexSearcher searcher = new JochreIndexSearcher(indexSearcher, configId);
                        List<String> docNames = correction.getDocuments();
                        for (String doc : docNames) {
                            // mark the document for re-indexing
                            Map<Integer, Document> docs = searcher.findDocuments(doc);
                            Document luceneDoc = docs.values().iterator().next();
                            String path = luceneDoc.get(JochreIndexField.path.name());
                            JochreIndexDirectory jochreIndexDirectory = new JochreIndexDirectory(path, configId);
                            jochreIndexDirectory.addUpdateInstructions();
                        }
                        // Mark initial document for update
                        String docPath = correction.getDocument().getPath();
                        JochreIndexDirectory jochreIndexDirectory = new JochreIndexDirectory(docPath, configId);
                        jochreIndexDirectory.addUpdateInstructions();
                        // start the index thread
                        JochreIndexBuilder builder = new JochreIndexBuilder(configId, forceUpdate);
                        new Thread(builder).start();
                        out.write("{\"response\":\"correction undo thread started\"}\n");
                    } finally {
                        searchManager.getManager().release(indexSearcher);
                    }
                    break;
                }
            case prefixSearch:
                {
                    if (field == null)
                        throw new RuntimeException("For command " + command + " field is required");
                    if (prefix == null)
                        throw new RuntimeException("For command " + command + " prefix is required");
                    IndexSearcher indexSearcher = searchManager.getManager().acquire();
                    try {
                        FieldTermPrefixFinder finder = new FieldTermPrefixFinder(indexSearcher, field, prefix, maxResults, configId);
                        JsonFactory jsonFactory = new JsonFactory();
                        JsonGenerator jsonGen = jsonFactory.createGenerator(out);
                        jsonGen.writeStartArray();
                        for (String result : finder.getResults()) {
                            jsonGen.writeString(result);
                        }
                        jsonGen.writeEndArray();
                        jsonGen.flush();
                        out.write("\n");
                        out.flush();
                    } finally {
                        searchManager.getManager().release(indexSearcher);
                    }
                    break;
                }
            case document:
                {
                    if (docId < 0 && docName == null)
                        throw new RuntimeException("For command " + command + " either docName  or docId are required");
                    IndexSearcher indexSearcher = searchManager.getManager().acquire();
                    try {
                        JochreIndexSearcher searcher = new JochreIndexSearcher(indexSearcher, configId);
                        List<JochreIndexDocument> docs = new ArrayList<>();
                        if (docName != null) {
                            Map<Integer, Document> docMap = searcher.findDocuments(docName);
                            for (int id : docMap.keySet()) docs.add(new JochreIndexDocument(indexSearcher, id, configId));
                            Collections.sort(docs, new Comparator<JochreIndexDocument>() {

                                @Override
                                public int compare(JochreIndexDocument d1, JochreIndexDocument d2) {
                                    return d1.getSectionNumber() - d2.getSectionNumber();
                                }
                            });
                        } else {
                            JochreIndexDocument doc = new JochreIndexDocument(indexSearcher, docId, configId);
                            docs.add(doc);
                        }
                        JsonFactory jsonFactory = new JsonFactory();
                        JsonGenerator jsonGen = jsonFactory.createGenerator(out);
                        jsonGen.writeStartArray();
                        for (JochreIndexDocument doc : docs) {
                            doc.toJson(jsonGen);
                        }
                        jsonGen.writeEndArray();
                        jsonGen.flush();
                        out.write("\n");
                        out.flush();
                    } finally {
                        searchManager.getManager().release(indexSearcher);
                    }
                    break;
                }
            case contents:
                {
                    if (docName == null)
                        throw new RuntimeException("For command " + command + " docName is required");
                    IndexSearcher indexSearcher = searchManager.getManager().acquire();
                    try {
                        JochreIndexSearcher searcher = new JochreIndexSearcher(indexSearcher, configId);
                        Map<Integer, Document> docMap = searcher.findDocuments(docName);
                        List<JochreIndexDocument> docs = new ArrayList<>();
                        for (int id : docMap.keySet()) docs.add(new JochreIndexDocument(indexSearcher, id, configId));
                        Collections.sort(docs, new Comparator<JochreIndexDocument>() {

                            @Override
                            public int compare(JochreIndexDocument d1, JochreIndexDocument d2) {
                                return d1.getSectionNumber() - d2.getSectionNumber();
                            }
                        });
                        for (JochreIndexDocument doc : docs) {
                            DocumentContentHTMLWriter htmlWriter = new DocumentContentHTMLWriter(out, doc, config);
                            htmlWriter.writeContents();
                        }
                    } finally {
                        searchManager.getManager().release(indexSearcher);
                    }
                    break;
                }
            case bookCount:
                {
                    IndexSearcher indexSearcher = searchManager.getManager().acquire();
                    try {
                        int bookCount = ((JochreSearcher) indexSearcher).getBookCount();
                        JsonFactory jsonFactory = new JsonFactory();
                        JsonGenerator jsonGen = jsonFactory.createGenerator(out);
                        jsonGen.writeStartObject();
                        jsonGen.writeNumberField("bookCount", bookCount);
                        jsonGen.writeEndObject();
                        jsonGen.flush();
                        out.write("\n");
                        out.flush();
                    } finally {
                        searchManager.getManager().release(indexSearcher);
                    }
                    break;
                }
            case serializeLexicon:
                {
                    if (lexiconDirPath == null)
                        throw new RuntimeException("For command " + command + " lexiconDir is required");
                    if (lexiconRegexPath == null)
                        throw new RuntimeException("For command " + command + " lexiconRegex is required");
                    File lexiconDir = new File(lexiconDirPath);
                    File[] lexiconFiles = lexiconDir.listFiles();
                    TextFileLexicon lexicon = new TextFileLexicon(configId);
                    File regexFile = new File(lexiconRegexPath);
                    Scanner regexScanner = new Scanner(new BufferedReader(new InputStreamReader(new FileInputStream(regexFile), "UTF-8")));
                    LexicalEntryReader lexicalEntryReader = new RegexLexicalEntryReader(regexScanner);
                    for (File file : lexiconFiles) {
                        LOG.info("Adding " + file.getName());
                        lexicon.addLexiconFile(file, lexicalEntryReader);
                    }
                    File outFile = config.getLexiconFile();
                    lexicon.serialize(outFile);
                    break;
                }
            case deserializeLexicon:
                {
                    if (word == null)
                        throw new RuntimeException("For command " + command + " word is required");
                    Lexicon lexicon = config.getLexicon();
                    Set<String> lemmas = lexicon.getLemmas(word);
                    LOG.info("Word: " + word);
                    if (lemmas != null) {
                        for (String lemma : lemmas) {
                            Set<String> words = lexicon.getWords(lemma);
                            LOG.info("# Lemma: " + lemma + ", words: " + words.toString());
                        }
                    }
                    break;
                }
            default:
                {
                    throw new RuntimeException("Unknown command: " + command);
                }
        }
        if (output.isLeft())
            output.getLeft().flush();
        else
            output.getRight().flush();
    } catch (RuntimeException e) {
        LOG.error("Failed to run command " + command, e);
        throw e;
    } catch (Exception e) {
        LOG.error("Failed to run command " + command, e);
        throw new RuntimeException(e);
    } finally {
        long endTime = System.currentTimeMillis();
        LOG.info("Command " + command + " completed in " + (endTime - startTime) + " ms");
    }
}
Also used : FeedbackQuery(com.joliciel.jochre.search.feedback.FeedbackQuery) Lexicon(com.joliciel.jochre.search.lexicon.Lexicon) TextFileLexicon(com.joliciel.jochre.search.lexicon.TextFileLexicon) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) FeedbackDocument(com.joliciel.jochre.search.feedback.FeedbackDocument) ScoreDoc(org.apache.lucene.search.ScoreDoc) Comparator(java.util.Comparator) JochreException(com.joliciel.jochre.utils.JochreException) List(java.util.List) ArrayList(java.util.ArrayList) ImageOutputStream(javax.imageio.stream.ImageOutputStream) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) LuceneQueryHighlighter(com.joliciel.jochre.search.highlight.LuceneQueryHighlighter) TextFileLexicon(com.joliciel.jochre.search.lexicon.TextFileLexicon) ImageSnippet(com.joliciel.jochre.search.highlight.ImageSnippet) Snippet(com.joliciel.jochre.search.highlight.Snippet) FileInputStream(java.io.FileInputStream) RegexLexicalEntryReader(com.joliciel.jochre.search.lexicon.RegexLexicalEntryReader) Map(java.util.Map) HashMap(java.util.HashMap) File(java.io.File) FeedbackSuggestion(com.joliciel.jochre.search.feedback.FeedbackSuggestion) IndexSearcher(org.apache.lucene.search.IndexSearcher) Scanner(java.util.Scanner) Set(java.util.Set) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) SortBy(com.joliciel.jochre.search.JochreQuery.SortBy) DecimalFormat(java.text.DecimalFormat) JsonFactory(com.fasterxml.jackson.core.JsonFactory) ImageWriter(javax.imageio.ImageWriter) Correction(com.joliciel.jochre.search.feedback.Correction) BufferedImage(java.awt.image.BufferedImage) TopDocs(org.apache.lucene.search.TopDocs) JsonGenerator(com.fasterxml.jackson.core.JsonGenerator) ImageReader(javax.imageio.ImageReader) PrintWriter(java.io.PrintWriter) Highlighter(com.joliciel.jochre.search.highlight.Highlighter) LuceneQueryHighlighter(com.joliciel.jochre.search.highlight.LuceneQueryHighlighter) FeedbackDocument(com.joliciel.jochre.search.feedback.FeedbackDocument) InputStreamReader(java.io.InputStreamReader) HighlightManager(com.joliciel.jochre.search.highlight.HighlightManager) Date(java.util.Date) MessagingException(javax.mail.MessagingException) JochreException(com.joliciel.jochre.utils.JochreException) TemplateException(freemarker.template.TemplateException) IOException(java.io.IOException) IndexableField(org.apache.lucene.index.IndexableField) ImageSnippet(com.joliciel.jochre.search.highlight.ImageSnippet) LexicalEntryReader(com.joliciel.jochre.search.lexicon.LexicalEntryReader) RegexLexicalEntryReader(com.joliciel.jochre.search.lexicon.RegexLexicalEntryReader) DateFormat(java.text.DateFormat) SimpleDateFormat(java.text.SimpleDateFormat) BufferedReader(java.io.BufferedReader) SimpleDateFormat(java.text.SimpleDateFormat)

Example 2 with JochreException

use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.

the class TextFileLexicon method deserialize.

public static Lexicon deserialize(ZipInputStream zis) {
    List<Lexicon> lexicons = new ArrayList<>();
    try {
        ZipEntry zipEntry;
        while ((zipEntry = zis.getNextEntry()) != null) {
            LOG.debug("Scanning zip entry " + zipEntry.getName());
            if (zipEntry.getName().endsWith(".zip")) {
                ZipInputStream innerZis = new ZipInputStream(zis);
                Lexicon lexicon = TextFileLexicon.deserialize(innerZis);
                lexicons.add(lexicon);
            } else {
                ObjectInputStream in = new ObjectInputStream(zis);
                Lexicon lexicon = (TextFileLexicon) in.readObject();
                lexicons.add(lexicon);
            }
            zis.closeEntry();
        }
    } catch (IOException ioe) {
        throw new JochreException(ioe);
    } catch (ClassNotFoundException cnfe) {
        throw new JochreException(cnfe);
    }
    if (lexicons.size() == 1)
        return lexicons.get(0);
    LexiconMerger lexiconMerger = new LexiconMerger();
    for (Lexicon lexicon : lexicons) lexiconMerger.addLexicon(lexicon);
    return lexiconMerger;
}
Also used : ZipInputStream(java.util.zip.ZipInputStream) JochreException(com.joliciel.jochre.utils.JochreException) ZipEntry(java.util.zip.ZipEntry) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ObjectInputStream(java.io.ObjectInputStream)

Example 3 with JochreException

use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.

the class TextFileLexicon method serialize.

public void serialize(File memoryBaseFile) {
    LOG.debug("serialize");
    boolean isZip = false;
    if (memoryBaseFile.getName().endsWith(".zip"))
        isZip = true;
    FileOutputStream fos = null;
    ObjectOutputStream out = null;
    ZipOutputStream zos = null;
    try {
        fos = new FileOutputStream(memoryBaseFile);
        if (isZip) {
            zos = new ZipOutputStream(fos);
            zos.putNextEntry(new ZipEntry("lexicon.obj"));
            out = new ObjectOutputStream(zos);
        } else {
            out = new ObjectOutputStream(fos);
        }
        try {
            out.writeObject(this);
        } finally {
            out.flush();
            out.close();
        }
    } catch (IOException ioe) {
        throw new JochreException(ioe);
    }
}
Also used : JochreException(com.joliciel.jochre.utils.JochreException) ZipOutputStream(java.util.zip.ZipOutputStream) FileOutputStream(java.io.FileOutputStream) ZipEntry(java.util.zip.ZipEntry) IOException(java.io.IOException) ObjectOutputStream(java.io.ObjectOutputStream)

Example 4 with JochreException

use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.

the class JochrePage method segmentAndShow.

/**
 * Segment any image on this page and output the segmentation into PNG files
 * so that they can be viewed by the user.
 */
public void segmentAndShow(String outputDirectory) {
    int i = 0;
    for (JochreImage image : this.getImages()) {
        SourceImage sourceImage = (SourceImage) image;
        Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
        segmenter.setDrawSegmentation(true);
        segmenter.segment();
        BufferedImage segmentedImage = segmenter.getSegmentedImage();
        try {
            ImageIO.write(segmentedImage, "PNG", new File(outputDirectory + "/" + image.getName() + "_seg.png"));
        } catch (IOException e) {
            throw new JochreException(e);
        }
        LOG.debug("Image " + i + " segmented: " + sourceImage.getName());
        i++;
    }
}
Also used : JochreImage(com.joliciel.jochre.graphics.JochreImage) JochreException(com.joliciel.jochre.utils.JochreException) SourceImage(com.joliciel.jochre.graphics.SourceImage) Segmenter(com.joliciel.jochre.graphics.Segmenter) IOException(java.io.IOException) File(java.io.File) BufferedImage(java.awt.image.BufferedImage)

Example 5 with JochreException

use of com.joliciel.jochre.utils.JochreException in project jochre by urieli.

the class ImageUtils method getImage.

/**
 * Get the image from a previously retrieved SqlRowSet.
 */
public static BufferedImage getImage(SqlRowSet rs, String column) throws SQLException {
    BufferedImage image = null;
    if (rs.getObject(column) != null) {
        byte[] imageBytes = (byte[]) rs.getObject(column);
        ByteArrayInputStream is = new ByteArrayInputStream(imageBytes);
        try {
            image = ImageIO.read(is);
            is.close();
        } catch (IOException e) {
            throw new JochreException(e);
        }
    }
    return image;
}
Also used : JochreException(com.joliciel.jochre.utils.JochreException) ByteArrayInputStream(java.io.ByteArrayInputStream) IOException(java.io.IOException) BufferedImage(java.awt.image.BufferedImage)

Aggregations

JochreException (com.joliciel.jochre.utils.JochreException)23 IOException (java.io.IOException)15 BufferedImage (java.awt.image.BufferedImage)7 File (java.io.File)7 ClassificationModel (com.joliciel.talismane.machineLearning.ClassificationModel)5 Shape (com.joliciel.jochre.graphics.Shape)4 ClassificationEventStream (com.joliciel.talismane.machineLearning.ClassificationEventStream)4 ClassificationModelTrainer (com.joliciel.talismane.machineLearning.ClassificationModelTrainer)4 ModelTrainerFactory (com.joliciel.talismane.machineLearning.ModelTrainerFactory)4 BoundaryDetector (com.joliciel.jochre.boundaries.BoundaryDetector)3 DeterministicBoundaryDetector (com.joliciel.jochre.boundaries.DeterministicBoundaryDetector)3 LetterByLetterBoundaryDetector (com.joliciel.jochre.boundaries.LetterByLetterBoundaryDetector)3 OriginalBoundaryDetector (com.joliciel.jochre.boundaries.OriginalBoundaryDetector)3 JochreImage (com.joliciel.jochre.graphics.JochreImage)3 LetterFeature (com.joliciel.jochre.letterGuesser.features.LetterFeature)3 LetterFeatureParser (com.joliciel.jochre.letterGuesser.features.LetterFeatureParser)3 TreeSet (java.util.TreeSet)3 BeamSearchImageAnalyser (com.joliciel.jochre.analyser.BeamSearchImageAnalyser)2 ImageAnalyser (com.joliciel.jochre.analyser.ImageAnalyser)2 LetterAssigner (com.joliciel.jochre.analyser.LetterAssigner)2