Search in sources :

Example 16 with ModifiableSolrParams

use of org.apache.solr.common.params.ModifiableSolrParams in project lucene-solr by apache.

the class CarrotClusteringEngineTest method testOneCarrot2SupportedLanguage.

@Test
public void testOneCarrot2SupportedLanguage() throws Exception {
    final ModifiableSolrParams params = new ModifiableSolrParams();
    params.add(CarrotParams.LANGUAGE_FIELD_NAME, "lang");
    final List<String> labels = getLabels(checkEngine(getClusteringEngine("echo"), 1, 1, new TermQuery(new Term("url", "one_supported_language")), params).get(0));
    assertEquals(3, labels.size());
    assertEquals("Correct Carrot2 language", LanguageCode.CHINESE_SIMPLIFIED.name(), labels.get(2));
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) Test(org.junit.Test)

Example 17 with ModifiableSolrParams

use of org.apache.solr.common.params.ModifiableSolrParams in project lucene-solr by apache.

the class CarrotClusteringEngineTest method testSolrStopWordsUsedInCarrot2Clustering.

@Test
public void testSolrStopWordsUsedInCarrot2Clustering() throws Exception {
    ModifiableSolrParams params = new ModifiableSolrParams();
    params.set("merge-resources", false);
    params.set(AttributeUtils.getKey(LexicalResourcesCheckClusteringAlgorithm.class, "wordsToCheck"), "online,solrownstopword");
    // "solrownstopword" is in stopwords.txt, so we're expecting
    // only one cluster with label "online".
    final List<NamedList<Object>> clusters = checkEngine(getClusteringEngine("lexical-resource-check"), 1, params);
    assertEquals(getLabels(clusters.get(0)), Collections.singletonList("online"));
}
Also used : NamedList(org.apache.solr.common.util.NamedList) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) Test(org.junit.Test)

Example 18 with ModifiableSolrParams

use of org.apache.solr.common.params.ModifiableSolrParams in project lucene-solr by apache.

the class CarrotClusteringEngineTest method testWithSubclusters.

@Test
public void testWithSubclusters() throws Exception {
    ModifiableSolrParams params = new ModifiableSolrParams();
    params.set(CarrotParams.OUTPUT_SUB_CLUSTERS, true);
    checkClusters(checkEngine(getClusteringEngine("mock"), AbstractClusteringTestCase.numberOfDocs), 1, 1, 2);
}
Also used : ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) Test(org.junit.Test)

Example 19 with ModifiableSolrParams

use of org.apache.solr.common.params.ModifiableSolrParams in project lucene-solr by apache.

the class TikaLanguageIdentifierUpdateProcessorFactoryTest method testMaxFieldValueCharsAndMaxTotalChars.

@Test
public void testMaxFieldValueCharsAndMaxTotalChars() throws Exception {
    SolrInputDocument doc = new SolrInputDocument();
    String valueF1 = "Apache Lucene is a free/open source information retrieval software library, originally created in Java by Doug Cutting. It is supported by the Apache Software Foundation and is released under the Apache Software License.";
    String valueF2 = "An open-source search server based on the Lucene Java search library. News, documentation, resources, and download.";
    doc.addField("foo_s", valueF1);
    ModifiableSolrParams parameters = new ModifiableSolrParams();
    parameters.add("langid.fl", "foo_s");
    parameters.add("langid.langField", "language");
    parameters.add("langid.enforceSchema", "false");
    TikaLanguageIdentifierUpdateProcessor p = (TikaLanguageIdentifierUpdateProcessor) createLangIdProcessor(parameters);
    assertEquals(valueF1, p.concatFields(doc).trim());
    parameters = new ModifiableSolrParams();
    parameters.add("langid.fl", "foo_s");
    parameters.add("langid.langField", "language");
    parameters.add("langid.enforceSchema", "false");
    parameters.add("langid.maxFieldValueChars", "8");
    parameters.add("langid.maxTotalChars", "6");
    p = (TikaLanguageIdentifierUpdateProcessor) createLangIdProcessor(parameters);
    assertEquals("Apache", p.concatFields(doc).trim());
    doc.addField("bar_s", valueF2);
    parameters = new ModifiableSolrParams();
    parameters.add("langid.fl", "foo_s,bar_s");
    parameters.add("langid.langField", "language");
    parameters.add("langid.enforceSchema", "false");
    p = (TikaLanguageIdentifierUpdateProcessor) createLangIdProcessor(parameters);
    assertEquals(valueF1 + " " + valueF2, p.concatFields(doc).trim());
    parameters = new ModifiableSolrParams();
    parameters.add("langid.fl", "foo_s,bar_s");
    parameters.add("langid.langField", "language");
    parameters.add("langid.enforceSchema", "false");
    parameters.add("langid.maxFieldValueChars", "3");
    parameters.add("langid.maxTotalChars", "8");
    p = (TikaLanguageIdentifierUpdateProcessor) createLangIdProcessor(parameters);
    assertEquals("Apa An", p.concatFields(doc).trim());
    parameters = new ModifiableSolrParams();
    parameters.add("langid.fl", "foo_s,bar_s");
    parameters.add("langid.langField", "language");
    parameters.add("langid.enforceSchema", "false");
    parameters.add("langid.maxFieldValueChars", "10000");
    parameters.add("langid.maxTotalChars", "100000");
    p = (TikaLanguageIdentifierUpdateProcessor) createLangIdProcessor(parameters);
    assertEquals(valueF1 + " " + valueF2, p.concatFields(doc).trim());
}
Also used : SolrInputDocument(org.apache.solr.common.SolrInputDocument) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) Test(org.junit.Test)

Example 20 with ModifiableSolrParams

use of org.apache.solr.common.params.ModifiableSolrParams in project lucene-solr by apache.

the class LangDetectLanguageIdentifierUpdateProcessorFactoryTest method testLangIdGlobal.

/* we don't return 'un' for the super-short one (this detector things hungarian?).
   * replace this with japanese
   */
@Test
@Override
public void testLangIdGlobal() throws Exception {
    ModifiableSolrParams parameters = new ModifiableSolrParams();
    parameters.add("langid.fl", "name,subject");
    parameters.add("langid.langField", "language_s");
    parameters.add("langid.fallback", "un");
    liProcessor = createLangIdProcessor(parameters);
    assertLang("no", "id", "1no", "name", "Lucene", "subject", "Lucene er et fri/åpen kildekode programvarebibliotek for informasjonsgjenfinning, opprinnelig utviklet i programmeringsspråket Java av Doug Cutting. Lucene støttes av Apache Software Foundation og utgis under Apache-lisensen.");
    assertLang("en", "id", "2en", "name", "Lucene", "subject", "Apache Lucene is a free/open source information retrieval software library, originally created in Java by Doug Cutting. It is supported by the Apache Software Foundation and is released under the Apache Software License.");
    assertLang("sv", "id", "3sv", "name", "Maven", "subject", "Apache Maven är ett verktyg utvecklat av Apache Software Foundation och används inom systemutveckling av datorprogram i programspråket Java. Maven används för att automatiskt paketera (bygga) programfilerna till en distribuerbar enhet. Maven används inom samma område som Apache Ant men dess byggfiler är deklarativa till skillnad ifrån Ants skriptbaserade.");
    assertLang("es", "id", "4es", "name", "Lucene", "subject", "Lucene es un API de código abierto para recuperación de información, originalmente implementada en Java por Doug Cutting. Está apoyado por el Apache Software Foundation y se distribuye bajo la Apache Software License. Lucene tiene versiones para otros lenguajes incluyendo Delphi, Perl, C#, C++, Python, Ruby y PHP.");
    assertLang("ja", "id", "5ja", "name", "Japanese", "subject", "日本語(にほんご、にっぽんご)は主として、日本で使用されてきた言語である。日本国は法令上、公用語を明記していないが、事実上の公用語となっており、学校教育の「国語」で教えられる。");
    assertLang("th", "id", "6th", "name", "บทความคัดสรรเดือนนี้", "subject", "อันเนอลีส มารี อันเนอ ฟรังค์ หรือมักรู้จักในภาษาไทยว่า แอนน์ แฟรงค์ เป็นเด็กหญิงชาวยิว เกิดที่เมืองแฟรงก์เฟิร์ต ประเทศเยอรมนี เธอมีชื่อเสียงโด่งดังในฐานะผู้เขียนบันทึกประจำวันซึ่งต่อมาได้รับการตีพิมพ์เป็นหนังสือ บรรยายเหตุการณ์ขณะหลบซ่อนตัวจากการล่าชาวยิวในประเทศเนเธอร์แลนด์ ระหว่างที่ถูกเยอรมนีเข้าครอบครองในช่วงสงครามโลกครั้งที่สอง");
    assertLang("ru", "id", "7ru", "name", "Lucene", "subject", "The Apache Lucene — это свободная библиотека для высокоскоростного полнотекстового поиска, написанная на Java. Может быть использована для поиска в интернете и других областях компьютерной лингвистики (аналитическая философия).");
    assertLang("de", "id", "8de", "name", "Lucene", "subject", "Lucene ist ein Freie-Software-Projekt der Apache Software Foundation, das eine Suchsoftware erstellt. Durch die hohe Leistungsfähigkeit und Skalierbarkeit können die Lucene-Werkzeuge für beliebige Projektgrößen und Anforderungen eingesetzt werden. So setzt beispielsweise Wikipedia Lucene für die Volltextsuche ein. Zudem verwenden die beiden Desktop-Suchprogramme Beagle und Strigi eine C#- bzw. C++- Portierung von Lucene als Indexer.");
    assertLang("fr", "id", "9fr", "name", "Lucene", "subject", "Lucene est un moteur de recherche libre écrit en Java qui permet d'indexer et de rechercher du texte. C'est un projet open source de la fondation Apache mis à disposition sous licence Apache. Il est également disponible pour les langages Ruby, Perl, C++, PHP.");
    assertLang("nl", "id", "10nl", "name", "Lucene", "subject", "Lucene is een gratis open source, tekst gebaseerde information retrieval API van origine geschreven in Java door Doug Cutting. Het wordt ondersteund door de Apache Software Foundation en is vrijgegeven onder de Apache Software Licentie. Lucene is ook beschikbaar in andere programeertalen zoals Perl, C#, C++, Python, Ruby en PHP.");
    assertLang("it", "id", "11it", "name", "Lucene", "subject", "Lucene è una API gratuita ed open source per il reperimento di informazioni inizialmente implementata in Java da Doug Cutting. È supportata dall'Apache Software Foundation ed è resa disponibile con l'Apache License. Lucene è stata successivamente reimplementata in Perl, C#, C++, Python, Ruby e PHP.");
    assertLang("pt", "id", "12pt", "name", "Lucene", "subject", "Apache Lucene, ou simplesmente Lucene, é um software de busca e uma API de indexação de documentos, escrito na linguagem de programação Java. É um software de código aberto da Apache Software Foundation licenciado através da licença Apache.");
}
Also used : ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) Test(org.junit.Test)

Aggregations

ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)524 Test (org.junit.Test)168 ArrayList (java.util.ArrayList)87 NamedList (org.apache.solr.common.util.NamedList)76 QueryRequest (org.apache.solr.client.solrj.request.QueryRequest)74 SolrException (org.apache.solr.common.SolrException)68 UpdateRequest (org.apache.solr.client.solrj.request.UpdateRequest)66 HashMap (java.util.HashMap)61 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)58 SolrQueryRequest (org.apache.solr.request.SolrQueryRequest)57 List (java.util.List)56 IOException (java.io.IOException)55 Map (java.util.Map)52 LocalSolrQueryRequest (org.apache.solr.request.LocalSolrQueryRequest)52 SolrInputDocument (org.apache.solr.common.SolrInputDocument)51 QueryResponse (org.apache.solr.client.solrj.response.QueryResponse)48 Tuple (org.apache.solr.client.solrj.io.Tuple)47 SolrParams (org.apache.solr.common.params.SolrParams)42 SolrQueryResponse (org.apache.solr.response.SolrQueryResponse)41 SolrRequest (org.apache.solr.client.solrj.SolrRequest)36