Examples with ParsedDoc - org.solbase.indexer.ParsedDoc

Example 6 with ParsedDoc

use of org.solbase.indexer.ParsedDoc in project Solbase by Photobucket.

the class SolbaseIndexWriter method addDoc.

public int addDoc(AddUpdateCommand cmd) throws IOException {
    addCommands.incrementAndGet();
    addCommandsCumulative.incrementAndGet();
    int rc = -1;
    // no duplicates allowed
    SchemaField uniqueField = core.getSchema().getUniqueKeyField();
    if (uniqueField == null)
        throw new IOException("Solbase requires a unique field");
    // if there is no ID field, use allowDups
    if (idField == null) {
        throw new IOException("Solbase requires a unique field");
    }
    try {
        String indexName = core.getName();
        writer.setIndexName(indexName);
        Document doc = cmd.getLuceneDocument(schema);
        String idFieldName = idTerm.field();
        // solbase specific fields. should remove it after using
        boolean updateStore = false;
        String updateVal = doc.get("updateStore");
        if (updateVal != null) {
            // updating hbase after cache is updated
            updateStore = true;
        }
        int docNumber = Integer.parseInt(doc.get(idFieldName));
        // if edit field is present, it's for modification instead of blind add
        String editVal = doc.get("edit");
        // we don't need following fields. only used for update api
        doc.removeField("docId");
        doc.removeField("edit");
        doc.removeField("updateStore");
        // set indexutil to writer
        writer.setIndexUtil(indexUtil);
        String globaId = doc.getField("global_uniq_id").stringValue();
        int shardNum = SolbaseShardUtil.getShardNum(indexName);
        int startDocId = SolbaseShardUtil.getStartDocId(shardNum);
        int endDocId = SolbaseShardUtil.getEndDocId(shardNum);
        if (editVal != null) {
            logger.info("updating doc: " + docNumber);
            if (editDoc(doc, indexName, docNumber, updateStore)) {
                rc = 1;
            }
        } else {
            try {
                logger.info("adding doc: " + docNumber);
                ParsedDoc parsedDoc = writer.parseDoc(doc, schema.getAnalyzer(), indexName, docNumber, indexUtil.getSortFieldNames());
                List<TermDocMetadata> termDocMetas = parsedDoc.getTermDocMetadatas();
                // TODO: possible problem
                // doc is not in cache, cluster isn't responsible for update store
                // doc never gets updated in hbase, nor cache
                // for loop below will update tv with this new doc.
                // when searched, it will throw null point exception on this doc
                // therefore, update store first if adding doc (replication can still cause this issue if back'd up)
                ReaderCache.updateDocument(docNumber, parsedDoc, indexName, writer, LayeredCache.ModificationType.ADD, updateStore, startDocId, endDocId);
                for (TermDocMetadata termDocMeta : termDocMetas) {
                    ReaderCache.updateTermDocsMetadata(termDocMeta.getTerm(), termDocMeta, indexName, writer, LayeredCache.ModificationType.ADD, updateStore, startDocId, endDocId);
                }
                rc = 1;
                logger.info("adding doc: " + docNumber);
            } catch (NumberFormatException e) {
                logger.info("adding doc failed: " + docNumber);
                logger.info(e.toString());
            } catch (InterruptedException e) {
                logger.info("adding doc failed: " + docNumber);
                logger.info(e.toString());
            } catch (MemcachedException e) {
                logger.info("adding doc failed: " + docNumber);
                logger.info(e.toString());
            } catch (TimeoutException e) {
                logger.info("adding doc failed: " + docNumber);
                logger.info(e.toString());
            } catch (SolbaseException e) {
                logger.info("adding doc failed: " + docNumber);
                logger.info(e.toString());
            }
        }
    } finally {
        if (rc != 1) {
            numErrors.incrementAndGet();
            numErrorsCumulative.incrementAndGet();
        }
    }
    return rc;
}

Also used : SolbaseException(org.solbase.common.SolbaseException) TermDocMetadata(org.solbase.lucenehbase.TermDocMetadata) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) SchemaField(org.apache.solr.schema.SchemaField) ParsedDoc(org.solbase.indexer.ParsedDoc) MemcachedException(net.rubyeye.xmemcached.exception.MemcachedException) TimeoutException(java.util.concurrent.TimeoutException)

Example 7 with ParsedDoc

use of org.solbase.indexer.ParsedDoc in project Solbase by Photobucket.

the class DocumentLoader method deleteDocument.

private ParsedDoc deleteDocument(Document oldDoc, int docId, String indexName, IndexWriter writer, SolbaseIndexUtil indexUtil, boolean updateStore, int startDocId, int endDocId) {
    try {
        // clone so read won't conflict
        oldDoc = new Document(oldDoc);
        oldDoc.removeField("docId");
        ParsedDoc parsedDoc = writer.parseDoc(oldDoc, schema.getAnalyzer(), indexName, docId, indexUtil.getSortFieldNames());
        List<TermDocMetadata> metadatas = parsedDoc.getTermDocMetadatas();
        // TODO: doing duplicate work here - once from updateObject and again from updateObjectStore
        for (TermDocMetadata metadata : metadatas) {
            ReaderCache.updateTermDocsMetadata(metadata.getTerm(), metadata, indexName, writer, LayeredCache.ModificationType.DELETE, updateStore, startDocId, endDocId);
        }
        return parsedDoc;
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (InterruptedException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (MemcachedException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (TimeoutException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    return null;
}

Also used : ParsedDoc(org.solbase.indexer.ParsedDoc) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) SolrInputDocument(org.apache.solr.common.SolrInputDocument) MemcachedException(net.rubyeye.xmemcached.exception.MemcachedException) TimeoutException(java.util.concurrent.TimeoutException)

Aggregations

ParsedDoc (org.solbase.indexer.ParsedDoc)7 Document (org.apache.lucene.document.Document)6 TimeoutException (java.util.concurrent.TimeoutException)5 MemcachedException (net.rubyeye.xmemcached.exception.MemcachedException)5 IOException (java.io.IOException)4 SolbaseException (org.solbase.common.SolbaseException)3 ByteBuffer (java.nio.ByteBuffer)2 ArrayList (java.util.ArrayList)2 AtomicLong (java.util.concurrent.atomic.AtomicLong)2 EmbeddedSortField (org.apache.lucene.document.EmbeddedSortField)2 Fieldable (org.apache.lucene.document.Fieldable)2 Term (org.apache.lucene.index.Term)2 SolrInputDocument (org.apache.solr.common.SolrInputDocument)2 SchemaField (org.apache.solr.schema.SchemaField)2 TermDocMetadata (org.solbase.lucenehbase.TermDocMetadata)2 StringReader (java.io.StringReader)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 ConcurrentSkipListMap (java.util.concurrent.ConcurrentSkipListMap)1