Search in sources :

Example 1 with TermDocMetadata

use of org.solbase.lucenehbase.TermDocMetadata in project Solbase by Photobucket.

the class SolbaseInitialIndexMapper method map.

protected void map(ImmutableBytesWritable row, Result values, Context context) throws IOException {
    context.getCounter(Counters.TOTAL_ROWS).increment(1);
    context.setStatus(context.getCounter(Counters.TOTAL_ROWS) + "");
    // global id is user_media row key
    String globalId = Bytes.toString(row.get());
    Document doc = indexerUtil.createLuceneDocument(Bytes.toString(row.get()), values, context);
    byte[] checksum = values.getValue(Bytes.toBytes("meta"), Bytes.toBytes("checksum"));
    if (doc == null) {
        // validation must have failed if it returned null
        return;
    }
    // exists already
    if (this.idCounter > (SolbaseUtil.UNIQ_ID_CHUNK - 1) || docId == null) {
        docId = SolbaseUtil.generateUniqId();
        this.idCounter = 0;
    } else {
        docId--;
    }
    // for us, docId is going to be global uniq id, meaning we are tied to 2 billion docs limitation
    // it doesn't really hurt to add this field to doc. and it only really matters when sharding comes in, trying to fetch docs by their docid
    indexerUtil.addFieldToDoc(doc, "docId", docId + "");
    // incrementing chunking sequence (lucene doc id)
    this.idCounter++;
    try {
        ParsedDoc parsedDoc = indexerUtil.getIndexWriter().parseDoc(doc, indexerUtil.getAnalyzer(), "", docId, indexerUtil.getSortFieldNames());
        List<TermDocMetadata> metadatas = parsedDoc.getTermDocMetadatas();
        MapWritable mapWritable = new MapWritable();
        DocumentPutWritable docWritable = new DocumentPutWritable(parsedDoc.getFieldsMap(), parsedDoc.getAllTerms(), docId, globalId);
        mapWritable.put(new BytesWritable(Bytes.toBytes("doc")), docWritable);
        for (TermDocMetadata metadata : metadatas) {
            byte[] key = metadata.getFieldTermKey();
            ByteBuffer buf = metadata.serialize();
            TermDocMetadataWritable writable = new TermDocMetadataWritable(docId, Bytes.toBytes(buf), key);
            mapWritable.put(new BytesWritable(key), writable);
        }
        context.write(new BytesWritable(checksum), mapWritable);
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
}
Also used : ParsedDoc(org.solbase.indexer.ParsedDoc) TermDocMetadataWritable(org.solbase.indexer.writable.TermDocMetadataWritable) TermDocMetadata(org.solbase.lucenehbase.TermDocMetadata) BytesWritable(org.apache.hadoop.io.BytesWritable) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) MapWritable(org.apache.hadoop.io.MapWritable) Document(org.apache.lucene.document.Document) ByteBuffer(java.nio.ByteBuffer) DocumentPutWritable(org.solbase.indexer.writable.DocumentPutWritable)

Example 2 with TermDocMetadata

use of org.solbase.lucenehbase.TermDocMetadata in project Solbase by Photobucket.

the class SolbaseIndexWriter method addDoc.

public int addDoc(AddUpdateCommand cmd) throws IOException {
    addCommands.incrementAndGet();
    addCommandsCumulative.incrementAndGet();
    int rc = -1;
    // no duplicates allowed
    SchemaField uniqueField = core.getSchema().getUniqueKeyField();
    if (uniqueField == null)
        throw new IOException("Solbase requires a unique field");
    // if there is no ID field, use allowDups
    if (idField == null) {
        throw new IOException("Solbase requires a unique field");
    }
    try {
        String indexName = core.getName();
        writer.setIndexName(indexName);
        Document doc = cmd.getLuceneDocument(schema);
        String idFieldName = idTerm.field();
        // solbase specific fields. should remove it after using
        boolean updateStore = false;
        String updateVal = doc.get("updateStore");
        if (updateVal != null) {
            // updating hbase after cache is updated
            updateStore = true;
        }
        int docNumber = Integer.parseInt(doc.get(idFieldName));
        // if edit field is present, it's for modification instead of blind add
        String editVal = doc.get("edit");
        // we don't need following fields. only used for update api
        doc.removeField("docId");
        doc.removeField("edit");
        doc.removeField("updateStore");
        // set indexutil to writer
        writer.setIndexUtil(indexUtil);
        String globaId = doc.getField("global_uniq_id").stringValue();
        int shardNum = SolbaseShardUtil.getShardNum(indexName);
        int startDocId = SolbaseShardUtil.getStartDocId(shardNum);
        int endDocId = SolbaseShardUtil.getEndDocId(shardNum);
        if (editVal != null) {
            logger.info("updating doc: " + docNumber);
            if (editDoc(doc, indexName, docNumber, updateStore)) {
                rc = 1;
            }
        } else {
            try {
                logger.info("adding doc: " + docNumber);
                ParsedDoc parsedDoc = writer.parseDoc(doc, schema.getAnalyzer(), indexName, docNumber, indexUtil.getSortFieldNames());
                List<TermDocMetadata> termDocMetas = parsedDoc.getTermDocMetadatas();
                // TODO: possible problem
                // doc is not in cache, cluster isn't responsible for update store
                // doc never gets updated in hbase, nor cache
                // for loop below will update tv with this new doc.
                // when searched, it will throw null point exception on this doc
                // therefore, update store first if adding doc (replication can still cause this issue if back'd up)
                ReaderCache.updateDocument(docNumber, parsedDoc, indexName, writer, LayeredCache.ModificationType.ADD, updateStore, startDocId, endDocId);
                for (TermDocMetadata termDocMeta : termDocMetas) {
                    ReaderCache.updateTermDocsMetadata(termDocMeta.getTerm(), termDocMeta, indexName, writer, LayeredCache.ModificationType.ADD, updateStore, startDocId, endDocId);
                }
                rc = 1;
                logger.info("adding doc: " + docNumber);
            } catch (NumberFormatException e) {
                logger.info("adding doc failed: " + docNumber);
                logger.info(e.toString());
            } catch (InterruptedException e) {
                logger.info("adding doc failed: " + docNumber);
                logger.info(e.toString());
            } catch (MemcachedException e) {
                logger.info("adding doc failed: " + docNumber);
                logger.info(e.toString());
            } catch (TimeoutException e) {
                logger.info("adding doc failed: " + docNumber);
                logger.info(e.toString());
            } catch (SolbaseException e) {
                logger.info("adding doc failed: " + docNumber);
                logger.info(e.toString());
            }
        }
    } finally {
        if (rc != 1) {
            numErrors.incrementAndGet();
            numErrorsCumulative.incrementAndGet();
        }
    }
    return rc;
}
Also used : SolbaseException(org.solbase.common.SolbaseException) TermDocMetadata(org.solbase.lucenehbase.TermDocMetadata) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) SchemaField(org.apache.solr.schema.SchemaField) ParsedDoc(org.solbase.indexer.ParsedDoc) MemcachedException(net.rubyeye.xmemcached.exception.MemcachedException) TimeoutException(java.util.concurrent.TimeoutException)

Aggregations

Document (org.apache.lucene.document.Document)2 ParsedDoc (org.solbase.indexer.ParsedDoc)2 TermDocMetadata (org.solbase.lucenehbase.TermDocMetadata)2 IOException (java.io.IOException)1 ByteBuffer (java.nio.ByteBuffer)1 TimeoutException (java.util.concurrent.TimeoutException)1 MemcachedException (net.rubyeye.xmemcached.exception.MemcachedException)1 ImmutableBytesWritable (org.apache.hadoop.hbase.io.ImmutableBytesWritable)1 BytesWritable (org.apache.hadoop.io.BytesWritable)1 MapWritable (org.apache.hadoop.io.MapWritable)1 SchemaField (org.apache.solr.schema.SchemaField)1 SolbaseException (org.solbase.common.SolbaseException)1 DocumentPutWritable (org.solbase.indexer.writable.DocumentPutWritable)1 TermDocMetadataWritable (org.solbase.indexer.writable.TermDocMetadataWritable)1