use of org.carrot2.core.Cluster in project lucene-solr by apache.
the class CarrotClusteringEngine method cluster.
@Override
public Object cluster(Query query, SolrDocumentList solrDocList, Map<SolrDocument, Integer> docIds, SolrQueryRequest sreq) {
try {
// Prepare attributes for Carrot2 clustering call
Map<String, Object> attributes = new HashMap<>();
List<Document> documents = getDocuments(solrDocList, docIds, query, sreq);
attributes.put(AttributeNames.DOCUMENTS, documents);
attributes.put(AttributeNames.QUERY, query.toString());
// Pass the fields on which clustering runs.
attributes.put("solrFieldNames", getFieldsForClustering(sreq));
// Pass extra overriding attributes from the request, if any
extractCarrotAttributes(sreq.getParams(), attributes);
// Perform clustering and convert to an output structure of clusters.
//
// Carrot2 uses current thread's context class loader to get
// certain classes (e.g. custom tokenizer/stemmer) at runtime.
// To make sure classes from contrib JARs are available,
// we swap the context class loader for the time of clustering.
Thread ct = Thread.currentThread();
ClassLoader prev = ct.getContextClassLoader();
try {
ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
return clustersToNamedList(controller.process(attributes, clusteringAlgorithmClass).getClusters(), sreq.getParams());
} finally {
ct.setContextClassLoader(prev);
}
} catch (Exception e) {
log.error("Carrot2 clustering failed", e);
throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
}
}
use of org.carrot2.core.Cluster in project lucene-solr by apache.
the class EchoClusteringAlgorithm method process.
@Override
public void process() throws ProcessingException {
clusters = new ArrayList<>();
for (Document document : documents) {
final Cluster cluster = new Cluster();
cluster.addPhrases(document.getTitle(), document.getSummary());
if (document.getLanguage() != null) {
cluster.addPhrases(document.getLanguage().name());
}
for (String field : customFields.split(",")) {
Object value = document.getField(field);
if (value != null) {
cluster.addPhrases(value.toString());
}
}
cluster.addDocuments(document);
clusters.add(cluster);
}
}
use of org.carrot2.core.Cluster in project lucene-solr by apache.
the class EchoTokensClusteringAlgorithm method process.
@Override
public void process() throws ProcessingException {
final PreprocessingContext preprocessingContext = preprocessing.preprocess(documents, "", LanguageCode.ENGLISH);
clusters = new ArrayList<>();
for (char[] token : preprocessingContext.allTokens.image) {
if (token != null) {
clusters.add(new Cluster(new String(token)));
}
}
}
use of org.carrot2.core.Cluster in project lucene-solr by apache.
the class EchoStemsClusteringAlgorithm method process.
@Override
public void process() throws ProcessingException {
final PreprocessingContext preprocessingContext = preprocessing.preprocess(documents, "", LanguageCode.ENGLISH);
final AllTokens allTokens = preprocessingContext.allTokens;
final AllWords allWords = preprocessingContext.allWords;
final AllStems allStems = preprocessingContext.allStems;
clusters = new ArrayList<>();
for (int i = 0; i < allTokens.image.length; i++) {
if (allTokens.wordIndex[i] >= 0) {
clusters.add(new Cluster(new String(allStems.image[allWords.stemIndex[allTokens.wordIndex[i]]])));
}
}
}
use of org.carrot2.core.Cluster in project lucene-solr by apache.
the class CarrotClusteringEngine method clustersToNamedList.
private void clustersToNamedList(List<Cluster> outputClusters, List<NamedList<Object>> parent, boolean outputSubClusters, int maxLabels) {
for (Cluster outCluster : outputClusters) {
NamedList<Object> cluster = new SimpleOrderedMap<>();
parent.add(cluster);
// Add labels
List<String> labels = outCluster.getPhrases();
if (labels.size() > maxLabels) {
labels = labels.subList(0, maxLabels);
}
cluster.add("labels", labels);
// Add cluster score
final Double score = outCluster.getScore();
if (score != null) {
cluster.add("score", score);
}
// Add other topics marker
if (outCluster.isOtherTopics()) {
cluster.add("other-topics", outCluster.isOtherTopics());
}
// Add documents
List<Document> docs = outputSubClusters ? outCluster.getDocuments() : outCluster.getAllDocuments();
List<Object> docList = new ArrayList<>();
cluster.add("docs", docList);
for (Document doc : docs) {
docList.add(doc.getField(SOLR_DOCUMENT_ID));
}
// Add subclusters
if (outputSubClusters && !outCluster.getSubclusters().isEmpty()) {
List<NamedList<Object>> subclusters = new ArrayList<>();
cluster.add("clusters", subclusters);
clustersToNamedList(outCluster.getSubclusters(), subclusters, outputSubClusters, maxLabels);
}
}
}
Aggregations