Examples with StandardAnalyzer - org.apache.lucene.analysis.standard.StandardAnalyzer

Example 91 with StandardAnalyzer

use of org.apache.lucene.analysis.standard.StandardAnalyzer in project querydsl by querydsl.

the class LuceneSerializerTest method setUp.

@Before
public void setUp() throws Exception {
    serializer = new LuceneSerializer(true, true);
    entityPath = new PathBuilder<Object>(Object.class, "obj");
    title = entityPath.getString("title");
    author = entityPath.getString("author");
    text = entityPath.getString("text");
    publisher = entityPath.getString("publisher");
    year = entityPath.getNumber("year", Integer.class);
    rating = entityPath.getString("rating");
    gross = entityPath.getNumber("gross", Double.class);
    titles = entityPath.getCollection("title", String.class, StringPath.class);
    longField = entityPath.getNumber("longField", Long.class);
    shortField = entityPath.getNumber("shortField", Short.class);
    byteField = entityPath.getNumber("byteField", Byte.class);
    floatField = entityPath.getNumber("floatField", Float.class);
    idx = new RAMDirectory();
    config = new IndexWriterConfig(Version.LUCENE_31, new StandardAnalyzer(Version.LUCENE_30)).setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    writer = new IndexWriter(idx, config);
    writer.addDocument(createDocument());
    writer.close();
    IndexReader reader = IndexReader.open(idx);
    searcher = new IndexSearcher(reader);
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) RAMDirectory(org.apache.lucene.store.RAMDirectory) IndexWriter(org.apache.lucene.index.IndexWriter) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Before(org.junit.Before)

Example 92 with StandardAnalyzer

use of org.apache.lucene.analysis.standard.StandardAnalyzer in project querydsl by querydsl.

the class LuceneSerializerTest method setUp.

@Before
public void setUp() throws Exception {
    serializer = new LuceneSerializer(true, true);
    entityPath = new PathBuilder<Object>(Object.class, "obj");
    title = entityPath.getString("title");
    author = entityPath.getString("author");
    text = entityPath.getString("text");
    publisher = entityPath.getString("publisher");
    year = entityPath.getNumber("year", Integer.class);
    rating = entityPath.getString("rating");
    gross = entityPath.getNumber("gross", Double.class);
    titles = entityPath.getCollection("title", String.class, StringPath.class);
    longField = entityPath.getNumber("longField", Long.class);
    shortField = entityPath.getNumber("shortField", Short.class);
    byteField = entityPath.getNumber("byteField", Byte.class);
    floatField = entityPath.getNumber("floatField", Float.class);
    idx = new RAMDirectory();
    config = new IndexWriterConfig(new StandardAnalyzer()).setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    writer = new IndexWriter(idx, config);
    writer.addDocument(createDocument());
    writer.close();
    IndexReader reader = DirectoryReader.open(idx);
    searcher = new IndexSearcher(reader);
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) RAMDirectory(org.apache.lucene.store.RAMDirectory) IndexWriter(org.apache.lucene.index.IndexWriter) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) StringPath(com.querydsl.core.types.dsl.StringPath) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Before(org.junit.Before)

Example 93 with StandardAnalyzer

use of org.apache.lucene.analysis.standard.StandardAnalyzer in project graylog2-server by Graylog2.

the class SearchResource method createRequestExceptionForParseFailure.

protected WebApplicationException createRequestExceptionForParseFailure(String query, SearchPhaseExecutionException e) {
    LOG.warn("Unable to execute search: {}", e.getMessage());
    QueryParseError errorMessage = QueryParseError.create(query, "Unable to execute search", e.getClass().getCanonicalName());
    // We're so going to hell for this…
    if (e.toString().contains("nested: QueryParsingException")) {
        final QueryParser queryParser = new QueryParser("", new StandardAnalyzer());
        try {
            queryParser.parse(query);
        } catch (ParseException parseException) {
            Token currentToken = null;
            try {
                // FIXME I have no idea why this is necessary but without that call currentToken will be null.
                final ParseException exception = queryParser.generateParseException();
                currentToken = exception.currentToken;
            } catch (NullPointerException npe) {
                // "Normal" exception and no need to spam the logs with it.
                LOG.debug("Exception thrown while generating parse exception.", npe);
            }
            if (currentToken == null) {
                LOG.warn("No position/token available for ParseException.", parseException);
                errorMessage = QueryParseError.create(query, parseException.getMessage(), parseException.getClass().getCanonicalName());
            } else {
                // scan for first usable token with position information
                int beginColumn = 0;
                int beginLine = 0;
                int endColumn = 0;
                int endLine = 0;
                while (currentToken != null && beginLine == 0) {
                    beginColumn = currentToken.beginColumn;
                    beginLine = currentToken.beginLine;
                    endColumn = currentToken.endColumn;
                    endLine = currentToken.endLine;
                    currentToken = currentToken.next;
                }
                errorMessage = QueryParseError.create(query, beginColumn, beginLine, endColumn, endLine, parseException.getMessage(), parseException.getClass().getCanonicalName());
            }
        }
        return new BadRequestException(Response.status(Response.Status.BAD_REQUEST).entity(errorMessage).build());
    } else {
        return new InternalServerErrorException("Unable to fulfill search request", e);
    }
}

Also used : QueryParser(org.apache.lucene.queryparser.classic.QueryParser) QueryParseError(org.graylog2.rest.resources.search.responses.QueryParseError) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) BadRequestException(javax.ws.rs.BadRequestException) InternalServerErrorException(javax.ws.rs.InternalServerErrorException) Token(org.apache.lucene.queryparser.classic.Token) ParseException(org.apache.lucene.queryparser.classic.ParseException)

Example 94 with StandardAnalyzer

use of org.apache.lucene.analysis.standard.StandardAnalyzer in project textdb by TextDB.

the class DataflowUtils method tokenizeQueryWithStopwords.

public static ArrayList<String> tokenizeQueryWithStopwords(String query) {
    ArrayList<String> result = new ArrayList<String>();
    CharArraySet emptyStopwords = new CharArraySet(1, true);
    Analyzer luceneAnalyzer = new StandardAnalyzer(emptyStopwords);
    TokenStream tokenStream = luceneAnalyzer.tokenStream(null, new StringReader(query));
    CharTermAttribute term = tokenStream.addAttribute(CharTermAttribute.class);
    try {
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            String token = term.toString();
            int tokenIndex = query.toLowerCase().indexOf(token);
            // Since tokens are converted to lower case,
            // get the exact token from the query string.
            String actualQueryToken = query.substring(tokenIndex, tokenIndex + token.length());
            result.add(actualQueryToken);
        }
        tokenStream.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
    luceneAnalyzer.close();
    return result;
}

Also used : CharArraySet(org.apache.lucene.analysis.util.CharArraySet) TokenStream(org.apache.lucene.analysis.TokenStream) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) ArrayList(java.util.ArrayList) StringReader(java.io.StringReader) Analyzer(org.apache.lucene.analysis.Analyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) DataFlowException(edu.uci.ics.textdb.api.exception.DataFlowException) IOException(java.io.IOException)

Example 95 with StandardAnalyzer

use of org.apache.lucene.analysis.standard.StandardAnalyzer in project textdb by TextDB.

the class RelationManagerTest method test2.

/*
     * Test the information about "schema catalog" itself is stored properly.
     */
@Test
public void test2() throws Exception {
    String schemaCatalogDirectory = relationManager.getTableDirectory(CatalogConstants.SCHEMA_CATALOG);
    Analyzer schemaCatalogLuceneAnalyzer = relationManager.getTableAnalyzer(CatalogConstants.SCHEMA_CATALOG);
    Schema schemaCatalogSchema = relationManager.getTableSchema(CatalogConstants.SCHEMA_CATALOG);
    Assert.assertEquals(schemaCatalogDirectory, new File(CatalogConstants.SCHEMA_CATALOG_DIRECTORY).getCanonicalPath());
    Assert.assertTrue(schemaCatalogLuceneAnalyzer instanceof StandardAnalyzer);
    Assert.assertEquals(schemaCatalogSchema, Utils.getSchemaWithID(CatalogConstants.SCHEMA_CATALOG_SCHEMA));
}

Also used : Schema(edu.uci.ics.textdb.api.schema.Schema) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) File(java.io.File) Test(org.junit.Test)

Aggregations

StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)112 Analyzer (org.apache.lucene.analysis.Analyzer)37 IndexWriter (org.apache.lucene.index.IndexWriter)36 Document (org.apache.lucene.document.Document)29 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)29 IndexSearcher (org.apache.lucene.search.IndexSearcher)24 Term (org.apache.lucene.index.Term)22 RAMDirectory (org.apache.lucene.store.RAMDirectory)21 Test (org.junit.Test)21 Query (org.apache.lucene.search.Query)20 BooleanQuery (org.apache.lucene.search.BooleanQuery)19 TermQuery (org.apache.lucene.search.TermQuery)19 IOException (java.io.IOException)16 Before (org.junit.Before)15 IndexReader (org.apache.lucene.index.IndexReader)14 HashMap (java.util.HashMap)13 Field (org.apache.lucene.document.Field)13 ArrayList (java.util.ArrayList)12 QueryParser (org.apache.lucene.queryparser.classic.QueryParser)12 Directory (org.apache.lucene.store.Directory)12