Search in sources :

Example 1 with GetTextContentResponse

use of org.alfresco.solr.client.SOLRAPIClient.GetTextContentResponse in project SearchServices by Alfresco.

the class SolrInformationServer method addContentPropertyToDocUsingAlfrescoRepository.

private void addContentPropertyToDocUsingAlfrescoRepository(SolrInputDocument doc, QName propertyQName, long dbId, String locale) throws AuthenticationException, IOException, UnsupportedEncodingException {
    long start = System.nanoTime();
    // Expensive call to be done with ContentTracker
    GetTextContentResponse response = repositoryClient.getTextContent(dbId, propertyQName, null);
    addContentPropertyMetadata(doc, propertyQName, AlfrescoSolrDataModel.ContentFieldType.TRANSFORMATION_STATUS, response);
    addContentPropertyMetadata(doc, propertyQName, AlfrescoSolrDataModel.ContentFieldType.TRANSFORMATION_EXCEPTION, response);
    addContentPropertyMetadata(doc, propertyQName, AlfrescoSolrDataModel.ContentFieldType.TRANSFORMATION_TIME, response);
    InputStream ris = response.getContent();
    String textContent = "";
    try {
        if (ris != null) {
            // Get and copy content
            byte[] bytes = FileCopyUtils.copyToByteArray(new BoundedInputStream(ris, contentStreamLimit));
            textContent = new String(bytes, "UTF8");
        }
    } finally {
        // release the response only when the content has been read
        response.release();
    }
    if (minHash && textContent.length() > 0) {
        Analyzer analyzer = core.getLatestSchema().getFieldType("min_hash").getIndexAnalyzer();
        TokenStream ts = analyzer.tokenStream("min_hash", textContent);
        CharTermAttribute termAttribute = ts.getAttribute(CharTermAttribute.class);
        ts.reset();
        while (ts.incrementToken()) {
            StringBuilder tokenBuff = new StringBuilder();
            char[] buff = termAttribute.buffer();
            for (int i = 0; i < termAttribute.length(); i++) {
                tokenBuff.append(Integer.toHexString(buff[i]));
            }
            doc.addField(FINGERPRINT_FIELD, tokenBuff.toString());
        }
        ts.end();
        ts.close();
    }
    long end = System.nanoTime();
    this.getTrackerStats().addDocTransformationTime(end - start);
    StringBuilder builder = new StringBuilder(textContent.length() + 16);
    builder.append("\u0000").append(locale).append("\u0000");
    builder.append(textContent);
    String localisedText = builder.toString();
    for (FieldInstance field : AlfrescoSolrDataModel.getInstance().getIndexedFieldNamesForProperty(propertyQName).getFields()) {
        doc.removeField(field.getField());
        if (field.isLocalised()) {
            doc.addField(field.getField(), localisedText);
        } else {
            doc.addField(field.getField(), textContent);
        }
        addFieldIfNotSet(doc, field);
    }
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) GetTextContentResponse(org.alfresco.solr.client.SOLRAPIClient.GetTextContentResponse) BoundedInputStream(org.apache.commons.io.input.BoundedInputStream) InputStream(java.io.InputStream) Analyzer(org.apache.lucene.analysis.Analyzer) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) BoundedInputStream(org.apache.commons.io.input.BoundedInputStream) FieldInstance(org.alfresco.solr.AlfrescoSolrDataModel.FieldInstance)

Aggregations

InputStream (java.io.InputStream)1 FieldInstance (org.alfresco.solr.AlfrescoSolrDataModel.FieldInstance)1 GetTextContentResponse (org.alfresco.solr.client.SOLRAPIClient.GetTextContentResponse)1 BoundedInputStream (org.apache.commons.io.input.BoundedInputStream)1 Analyzer (org.apache.lucene.analysis.Analyzer)1 TokenStream (org.apache.lucene.analysis.TokenStream)1 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)1