Search in sources :

Example 91 with TermQuery

use of org.apache.lucene.search.TermQuery in project textdb by TextDB.

the class RelationManager method createTable.

/**
     * Creates a new table. 
     *   Table name must be unique (case insensitive).
     *   LuceneAnalyzer must be a valid analyzer string.
     * 
     * The "_id" attribute will be added to the table schema.
     * System automatically generates a unique ID for each tuple inserted to a table,
     *   the generated ID will be in "_id" field.
     * 
     * @param tableName, the name of the table, must be unique, case is not sensitive
     * @param indexDirectory, the directory to store the index and data, must not duplicate with other tables' directories
     * @param schema, the schema of the table
     * @param luceneAnalyzerString, the string representing the lucene analyzer used
     * @throws StorageException
     */
public void createTable(String tableName, String indexDirectory, Schema schema, String luceneAnalyzerString) throws StorageException {
    // convert the table name to lower case
    tableName = tableName.toLowerCase();
    // table should not exist
    if (checkTableExistence(tableName)) {
        throw new StorageException(String.format("Table %s already exists.", tableName));
    }
    // and convert the index directory to its absolute path
    try {
        Path indexPath = Paths.get(indexDirectory);
        if (Files.notExists(indexPath)) {
            Files.createDirectories(indexPath);
        }
        indexDirectory = indexPath.toRealPath().toString();
    } catch (IOException e) {
        throw new StorageException(e);
    }
    // check if the indexDirectory overlaps with another table's index directory
    Query indexDirectoryQuery = new TermQuery(new Term(CatalogConstants.TABLE_DIRECTORY, indexDirectory));
    DataReader tableCatalogDataReader = new DataReader(CatalogConstants.TABLE_CATALOG_DATASTORE, indexDirectoryQuery);
    tableCatalogDataReader.setPayloadAdded(false);
    tableCatalogDataReader.open();
    Tuple nextTuple = tableCatalogDataReader.getNextTuple();
    tableCatalogDataReader.close();
    // if the index directory is already taken by another table, throws an exception
    if (nextTuple != null) {
        String overlapTableName = nextTuple.getField(CatalogConstants.TABLE_NAME).getValue().toString();
        throw new StorageException(String.format("Table %s already takes the index directory %s. Please choose another directory.", overlapTableName, indexDirectory));
    }
    // check if the lucene analyzer string is valid
    Analyzer luceneAnalyzer = null;
    try {
        luceneAnalyzer = LuceneAnalyzerConstants.getLuceneAnalyzer(luceneAnalyzerString);
    } catch (DataFlowException e) {
        throw new StorageException("Lucene Analyzer String is not valid.");
    }
    // create the directory and clear all data in the index directory
    Schema tableSchema = Utils.getSchemaWithID(schema);
    DataStore tableDataStore = new DataStore(indexDirectory, tableSchema);
    DataWriter dataWriter = new DataWriter(tableDataStore, luceneAnalyzer);
    dataWriter.open();
    dataWriter.clearData();
    dataWriter.close();
    // write table info to catalog
    writeTableInfoToCatalog(tableName, indexDirectory, schema, luceneAnalyzerString);
}
Also used : Path(java.nio.file.Path) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) Schema(edu.uci.ics.textdb.api.schema.Schema) IOException(java.io.IOException) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) DataFlowException(edu.uci.ics.textdb.api.exception.DataFlowException) StorageException(edu.uci.ics.textdb.api.exception.StorageException) Tuple(edu.uci.ics.textdb.api.tuple.Tuple)

Example 92 with TermQuery

use of org.apache.lucene.search.TermQuery in project textdb by TextDB.

the class RelationManager method getSchemaCatalogTuples.

/*
     * Gets the tuples of a table from schema catalog.
     */
private static List<Tuple> getSchemaCatalogTuples(String tableName) throws StorageException {
    tableName = tableName.toLowerCase();
    Query tableNameQuery = new TermQuery(new Term(CatalogConstants.TABLE_NAME, tableName));
    DataReader schemaCatalogDataReader = new DataReader(CatalogConstants.SCHEMA_CATALOG_DATASTORE, tableNameQuery);
    // read the tuples into a list
    schemaCatalogDataReader.open();
    List<Tuple> tupleList = new ArrayList<>();
    Tuple nextTuple;
    while ((nextTuple = schemaCatalogDataReader.getNextTuple()) != null) {
        tupleList.add(nextTuple);
    }
    schemaCatalogDataReader.close();
    return tupleList;
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Tuple(edu.uci.ics.textdb.api.tuple.Tuple)

Example 93 with TermQuery

use of org.apache.lucene.search.TermQuery in project textdb by TextDB.

the class RelationManager method getTableCatalogTuple.

/*
     * Gets the a tuple of a table from table catalog.
     */
private static Tuple getTableCatalogTuple(String tableName) throws StorageException {
    tableName = tableName.toLowerCase();
    Query tableNameQuery = new TermQuery(new Term(CatalogConstants.TABLE_NAME, tableName));
    DataReader tableCatalogDataReader = new DataReader(CatalogConstants.TABLE_CATALOG_DATASTORE, tableNameQuery);
    tableCatalogDataReader.setPayloadAdded(false);
    tableCatalogDataReader.open();
    List<Tuple> tupleList = new ArrayList<>();
    Tuple nextTuple;
    while ((nextTuple = tableCatalogDataReader.getNextTuple()) != null) {
        tupleList.add(nextTuple);
    }
    tableCatalogDataReader.close();
    if (tupleList.size() == 0) {
        return null;
    } else if (tupleList.size() == 1) {
        return tupleList.get(0);
    } else {
        throw new StorageException("Catalog corrupted: duplicate table name found in catalog.");
    }
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) StorageException(edu.uci.ics.textdb.api.exception.StorageException) Tuple(edu.uci.ics.textdb.api.tuple.Tuple)

Example 94 with TermQuery

use of org.apache.lucene.search.TermQuery in project lucene-solr by apache.

the class TestHierarchicalDocBuilder method createBlockJoinQuery.

private Query createBlockJoinQuery(Hierarchy hierarchy) {
    List<Hierarchy> elements = hierarchy.elements;
    if (elements.isEmpty()) {
        BooleanQuery.Builder childQuery = new BooleanQuery.Builder();
        childQuery.add(new TermQuery(new Term(FIELD_ID, (String) hierarchy.elementData.get(FIELD_ID))), Occur.MUST);
        return childQuery.build();
    }
    Query childQuery = createBlockJoinQuery(elements.get(random().nextInt(elements.size())));
    return createToParentQuery(hierarchy.elementType, childQuery);
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) Term(org.apache.lucene.index.Term)

Example 95 with TermQuery

use of org.apache.lucene.search.TermQuery in project lucene-solr by apache.

the class CloudMLTQParser method parse.

public Query parse() {
    String id = localParams.get(QueryParsing.V);
    // Do a Real Time Get for the document
    SolrDocument doc = getDocument(id);
    if (doc == null) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request. Could not fetch " + "document with id [" + id + "]");
    }
    String[] qf = localParams.getParams("qf");
    Map<String, Float> boostFields = new HashMap<>();
    MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
    mlt.setMinTermFreq(localParams.getInt("mintf", MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
    mlt.setMinDocFreq(localParams.getInt("mindf", 0));
    mlt.setMinWordLen(localParams.getInt("minwl", MoreLikeThis.DEFAULT_MIN_WORD_LENGTH));
    mlt.setMaxWordLen(localParams.getInt("maxwl", MoreLikeThis.DEFAULT_MAX_WORD_LENGTH));
    mlt.setMaxQueryTerms(localParams.getInt("maxqt", MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
    mlt.setMaxNumTokensParsed(localParams.getInt("maxntp", MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
    mlt.setMaxDocFreq(localParams.getInt("maxdf", MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
    Boolean boost = localParams.getBool("boost", MoreLikeThis.DEFAULT_BOOST);
    mlt.setBoost(boost);
    mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
    Map<String, Collection<Object>> filteredDocument = new HashMap<>();
    String[] fieldNames;
    if (qf != null) {
        ArrayList<String> fields = new ArrayList();
        for (String fieldName : qf) {
            if (!StringUtils.isEmpty(fieldName)) {
                String[] strings = splitList.split(fieldName);
                for (String string : strings) {
                    if (!StringUtils.isEmpty(string)) {
                        fields.add(string);
                    }
                }
            }
        }
        // Parse field names and boosts from the fields
        boostFields = SolrPluginUtils.parseFieldBoosts(fields.toArray(new String[0]));
        fieldNames = boostFields.keySet().toArray(new String[0]);
    } else {
        ArrayList<String> fields = new ArrayList();
        for (String field : doc.getFieldNames()) {
            // Only use fields that are stored and have an explicit analyzer.
            // This makes sense as the query uses tf/idf/.. for query construction.
            // We might want to relook and change this in the future though.
            SchemaField f = req.getSchema().getFieldOrNull(field);
            if (f != null && f.stored() && f.getType().isExplicitAnalyzer()) {
                fields.add(field);
            }
        }
        fieldNames = fields.toArray(new String[0]);
    }
    if (fieldNames.length < 1) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "MoreLikeThis requires at least one similarity field: qf");
    }
    mlt.setFieldNames(fieldNames);
    for (String field : fieldNames) {
        Collection<Object> fieldValues = doc.getFieldValues(field);
        if (fieldValues != null) {
            Collection<Object> values = new ArrayList();
            for (Object val : fieldValues) {
                if (val instanceof IndexableField) {
                    values.add(((IndexableField) val).stringValue());
                } else {
                    values.add(val);
                }
            }
            filteredDocument.put(field, values);
        }
    }
    try {
        Query rawMLTQuery = mlt.like(filteredDocument);
        BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
        if (boost && boostFields.size() > 0) {
            BooleanQuery.Builder newQ = new BooleanQuery.Builder();
            newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
            for (BooleanClause clause : boostedMLTQuery) {
                Query q = clause.getQuery();
                float originalBoost = 1f;
                if (q instanceof BoostQuery) {
                    BoostQuery bq = (BoostQuery) q;
                    q = bq.getQuery();
                    originalBoost = bq.getBoost();
                }
                Float fieldBoost = boostFields.get(((TermQuery) q).getTerm().field());
                q = ((fieldBoost != null) ? new BoostQuery(q, fieldBoost * originalBoost) : clause.getQuery());
                newQ.add(q, clause.getOccur());
            }
            boostedMLTQuery = newQ.build();
        }
        // exclude current document from results
        BooleanQuery.Builder realMLTQuery = new BooleanQuery.Builder();
        realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST);
        realMLTQuery.add(createIdQuery(req.getSchema().getUniqueKeyField().getName(), id), BooleanClause.Occur.MUST_NOT);
        return realMLTQuery.build();
    } catch (IOException e) {
        e.printStackTrace();
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Bad Request");
    }
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) HashMap(java.util.HashMap) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) ArrayList(java.util.ArrayList) MoreLikeThis(org.apache.lucene.queries.mlt.MoreLikeThis) BoostQuery(org.apache.lucene.search.BoostQuery) SolrDocument(org.apache.solr.common.SolrDocument) SolrException(org.apache.solr.common.SolrException) TermQuery(org.apache.lucene.search.TermQuery) IOException(java.io.IOException) SchemaField(org.apache.solr.schema.SchemaField) IndexableField(org.apache.lucene.index.IndexableField) BooleanClause(org.apache.lucene.search.BooleanClause) Collection(java.util.Collection)

Aggregations

TermQuery (org.apache.lucene.search.TermQuery)673 Term (org.apache.lucene.index.Term)560 BooleanQuery (org.apache.lucene.search.BooleanQuery)343 Query (org.apache.lucene.search.Query)275 IndexSearcher (org.apache.lucene.search.IndexSearcher)252 Document (org.apache.lucene.document.Document)210 TopDocs (org.apache.lucene.search.TopDocs)164 Directory (org.apache.lucene.store.Directory)164 IndexReader (org.apache.lucene.index.IndexReader)125 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)125 PhraseQuery (org.apache.lucene.search.PhraseQuery)122 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)116 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)114 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)97 BoostQuery (org.apache.lucene.search.BoostQuery)85 Field (org.apache.lucene.document.Field)81 Test (org.junit.Test)75 PrefixQuery (org.apache.lucene.search.PrefixQuery)74 ArrayList (java.util.ArrayList)72 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)62