use of org.solbase.indexer.ParsedDoc in project Solbase by Photobucket.
the class SolbaseIndexWriter method addDoc.
public int addDoc(AddUpdateCommand cmd) throws IOException {
addCommands.incrementAndGet();
addCommandsCumulative.incrementAndGet();
int rc = -1;
// no duplicates allowed
SchemaField uniqueField = core.getSchema().getUniqueKeyField();
if (uniqueField == null)
throw new IOException("Solbase requires a unique field");
// if there is no ID field, use allowDups
if (idField == null) {
throw new IOException("Solbase requires a unique field");
}
try {
String indexName = core.getName();
writer.setIndexName(indexName);
Document doc = cmd.getLuceneDocument(schema);
String idFieldName = idTerm.field();
// solbase specific fields. should remove it after using
boolean updateStore = false;
String updateVal = doc.get("updateStore");
if (updateVal != null) {
// updating hbase after cache is updated
updateStore = true;
}
int docNumber = Integer.parseInt(doc.get(idFieldName));
// if edit field is present, it's for modification instead of blind add
String editVal = doc.get("edit");
// we don't need following fields. only used for update api
doc.removeField("docId");
doc.removeField("edit");
doc.removeField("updateStore");
// set indexutil to writer
writer.setIndexUtil(indexUtil);
String globaId = doc.getField("global_uniq_id").stringValue();
int shardNum = SolbaseShardUtil.getShardNum(indexName);
int startDocId = SolbaseShardUtil.getStartDocId(shardNum);
int endDocId = SolbaseShardUtil.getEndDocId(shardNum);
if (editVal != null) {
logger.info("updating doc: " + docNumber);
if (editDoc(doc, indexName, docNumber, updateStore)) {
rc = 1;
}
} else {
try {
logger.info("adding doc: " + docNumber);
ParsedDoc parsedDoc = writer.parseDoc(doc, schema.getAnalyzer(), indexName, docNumber, indexUtil.getSortFieldNames());
List<TermDocMetadata> termDocMetas = parsedDoc.getTermDocMetadatas();
// TODO: possible problem
// doc is not in cache, cluster isn't responsible for update store
// doc never gets updated in hbase, nor cache
// for loop below will update tv with this new doc.
// when searched, it will throw null point exception on this doc
// therefore, update store first if adding doc (replication can still cause this issue if back'd up)
ReaderCache.updateDocument(docNumber, parsedDoc, indexName, writer, LayeredCache.ModificationType.ADD, updateStore, startDocId, endDocId);
for (TermDocMetadata termDocMeta : termDocMetas) {
ReaderCache.updateTermDocsMetadata(termDocMeta.getTerm(), termDocMeta, indexName, writer, LayeredCache.ModificationType.ADD, updateStore, startDocId, endDocId);
}
rc = 1;
logger.info("adding doc: " + docNumber);
} catch (NumberFormatException e) {
logger.info("adding doc failed: " + docNumber);
logger.info(e.toString());
} catch (InterruptedException e) {
logger.info("adding doc failed: " + docNumber);
logger.info(e.toString());
} catch (MemcachedException e) {
logger.info("adding doc failed: " + docNumber);
logger.info(e.toString());
} catch (TimeoutException e) {
logger.info("adding doc failed: " + docNumber);
logger.info(e.toString());
} catch (SolbaseException e) {
logger.info("adding doc failed: " + docNumber);
logger.info(e.toString());
}
}
} finally {
if (rc != 1) {
numErrors.incrementAndGet();
numErrorsCumulative.incrementAndGet();
}
}
return rc;
}
use of org.solbase.indexer.ParsedDoc in project Solbase by Photobucket.
the class DocumentLoader method deleteDocument.
private ParsedDoc deleteDocument(Document oldDoc, int docId, String indexName, IndexWriter writer, SolbaseIndexUtil indexUtil, boolean updateStore, int startDocId, int endDocId) {
try {
// clone so read won't conflict
oldDoc = new Document(oldDoc);
oldDoc.removeField("docId");
ParsedDoc parsedDoc = writer.parseDoc(oldDoc, schema.getAnalyzer(), indexName, docId, indexUtil.getSortFieldNames());
List<TermDocMetadata> metadatas = parsedDoc.getTermDocMetadatas();
// TODO: doing duplicate work here - once from updateObject and again from updateObjectStore
for (TermDocMetadata metadata : metadatas) {
ReaderCache.updateTermDocsMetadata(metadata.getTerm(), metadata, indexName, writer, LayeredCache.ModificationType.DELETE, updateStore, startDocId, endDocId);
}
return parsedDoc;
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (MemcachedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (TimeoutException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
Aggregations