use of org.solbase.lucenehbase.TermDocMetadata in project Solbase by Photobucket.
the class SolbaseInitialIndexMapper method map.
protected void map(ImmutableBytesWritable row, Result values, Context context) throws IOException {
context.getCounter(Counters.TOTAL_ROWS).increment(1);
context.setStatus(context.getCounter(Counters.TOTAL_ROWS) + "");
// global id is user_media row key
String globalId = Bytes.toString(row.get());
Document doc = indexerUtil.createLuceneDocument(Bytes.toString(row.get()), values, context);
byte[] checksum = values.getValue(Bytes.toBytes("meta"), Bytes.toBytes("checksum"));
if (doc == null) {
// validation must have failed if it returned null
return;
}
// exists already
if (this.idCounter > (SolbaseUtil.UNIQ_ID_CHUNK - 1) || docId == null) {
docId = SolbaseUtil.generateUniqId();
this.idCounter = 0;
} else {
docId--;
}
// for us, docId is going to be global uniq id, meaning we are tied to 2 billion docs limitation
// it doesn't really hurt to add this field to doc. and it only really matters when sharding comes in, trying to fetch docs by their docid
indexerUtil.addFieldToDoc(doc, "docId", docId + "");
// incrementing chunking sequence (lucene doc id)
this.idCounter++;
try {
ParsedDoc parsedDoc = indexerUtil.getIndexWriter().parseDoc(doc, indexerUtil.getAnalyzer(), "", docId, indexerUtil.getSortFieldNames());
List<TermDocMetadata> metadatas = parsedDoc.getTermDocMetadatas();
MapWritable mapWritable = new MapWritable();
DocumentPutWritable docWritable = new DocumentPutWritable(parsedDoc.getFieldsMap(), parsedDoc.getAllTerms(), docId, globalId);
mapWritable.put(new BytesWritable(Bytes.toBytes("doc")), docWritable);
for (TermDocMetadata metadata : metadatas) {
byte[] key = metadata.getFieldTermKey();
ByteBuffer buf = metadata.serialize();
TermDocMetadataWritable writable = new TermDocMetadataWritable(docId, Bytes.toBytes(buf), key);
mapWritable.put(new BytesWritable(key), writable);
}
context.write(new BytesWritable(checksum), mapWritable);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
use of org.solbase.lucenehbase.TermDocMetadata in project Solbase by Photobucket.
the class SolbaseIndexWriter method addDoc.
public int addDoc(AddUpdateCommand cmd) throws IOException {
addCommands.incrementAndGet();
addCommandsCumulative.incrementAndGet();
int rc = -1;
// no duplicates allowed
SchemaField uniqueField = core.getSchema().getUniqueKeyField();
if (uniqueField == null)
throw new IOException("Solbase requires a unique field");
// if there is no ID field, use allowDups
if (idField == null) {
throw new IOException("Solbase requires a unique field");
}
try {
String indexName = core.getName();
writer.setIndexName(indexName);
Document doc = cmd.getLuceneDocument(schema);
String idFieldName = idTerm.field();
// solbase specific fields. should remove it after using
boolean updateStore = false;
String updateVal = doc.get("updateStore");
if (updateVal != null) {
// updating hbase after cache is updated
updateStore = true;
}
int docNumber = Integer.parseInt(doc.get(idFieldName));
// if edit field is present, it's for modification instead of blind add
String editVal = doc.get("edit");
// we don't need following fields. only used for update api
doc.removeField("docId");
doc.removeField("edit");
doc.removeField("updateStore");
// set indexutil to writer
writer.setIndexUtil(indexUtil);
String globaId = doc.getField("global_uniq_id").stringValue();
int shardNum = SolbaseShardUtil.getShardNum(indexName);
int startDocId = SolbaseShardUtil.getStartDocId(shardNum);
int endDocId = SolbaseShardUtil.getEndDocId(shardNum);
if (editVal != null) {
logger.info("updating doc: " + docNumber);
if (editDoc(doc, indexName, docNumber, updateStore)) {
rc = 1;
}
} else {
try {
logger.info("adding doc: " + docNumber);
ParsedDoc parsedDoc = writer.parseDoc(doc, schema.getAnalyzer(), indexName, docNumber, indexUtil.getSortFieldNames());
List<TermDocMetadata> termDocMetas = parsedDoc.getTermDocMetadatas();
// TODO: possible problem
// doc is not in cache, cluster isn't responsible for update store
// doc never gets updated in hbase, nor cache
// for loop below will update tv with this new doc.
// when searched, it will throw null point exception on this doc
// therefore, update store first if adding doc (replication can still cause this issue if back'd up)
ReaderCache.updateDocument(docNumber, parsedDoc, indexName, writer, LayeredCache.ModificationType.ADD, updateStore, startDocId, endDocId);
for (TermDocMetadata termDocMeta : termDocMetas) {
ReaderCache.updateTermDocsMetadata(termDocMeta.getTerm(), termDocMeta, indexName, writer, LayeredCache.ModificationType.ADD, updateStore, startDocId, endDocId);
}
rc = 1;
logger.info("adding doc: " + docNumber);
} catch (NumberFormatException e) {
logger.info("adding doc failed: " + docNumber);
logger.info(e.toString());
} catch (InterruptedException e) {
logger.info("adding doc failed: " + docNumber);
logger.info(e.toString());
} catch (MemcachedException e) {
logger.info("adding doc failed: " + docNumber);
logger.info(e.toString());
} catch (TimeoutException e) {
logger.info("adding doc failed: " + docNumber);
logger.info(e.toString());
} catch (SolbaseException e) {
logger.info("adding doc failed: " + docNumber);
logger.info(e.toString());
}
}
} finally {
if (rc != 1) {
numErrors.incrementAndGet();
numErrorsCumulative.incrementAndGet();
}
}
return rc;
}
Aggregations