use of org.apache.solr.handler.dataimport.config.Entity in project lucene-solr by apache.
the class DocBuilder method buildDocument.
@SuppressWarnings("unchecked")
private void buildDocument(VariableResolver vr, DocWrapper doc, Map<String, Object> pk, EntityProcessorWrapper epw, boolean isRoot, ContextImpl parentCtx, List<EntityProcessorWrapper> entitiesToDestroy) {
ContextImpl ctx = new ContextImpl(epw, vr, null, pk == null ? Context.FULL_DUMP : Context.DELTA_DUMP, session, parentCtx, this);
epw.init(ctx);
if (!epw.isInitialized()) {
entitiesToDestroy.add(epw);
epw.setInitialized(true);
}
if (reqParams.getStart() > 0) {
getDebugLogger().log(DIHLogLevels.DISABLE_LOGGING, null, null);
}
if (verboseDebug) {
getDebugLogger().log(DIHLogLevels.START_ENTITY, epw.getEntity().getName(), null);
}
int seenDocCount = 0;
try {
while (true) {
if (stop.get())
return;
if (importStatistics.docCount.get() > (reqParams.getStart() + reqParams.getRows()))
break;
try {
seenDocCount++;
if (seenDocCount > reqParams.getStart()) {
getDebugLogger().log(DIHLogLevels.ENABLE_LOGGING, null, null);
}
if (verboseDebug && epw.getEntity().isDocRoot()) {
getDebugLogger().log(DIHLogLevels.START_DOC, epw.getEntity().getName(), null);
}
if (doc == null && epw.getEntity().isDocRoot()) {
doc = new DocWrapper();
ctx.setDoc(doc);
Entity e = epw.getEntity();
while (e.getParentEntity() != null) {
addFields(e.getParentEntity(), doc, (Map<String, Object>) vr.resolve(e.getParentEntity().getName()), vr);
e = e.getParentEntity();
}
}
Map<String, Object> arow = epw.nextRow();
if (arow == null) {
break;
}
// Support for start parameter in debug mode
if (epw.getEntity().isDocRoot()) {
if (seenDocCount <= reqParams.getStart())
continue;
if (seenDocCount > reqParams.getStart() + reqParams.getRows()) {
LOG.info("Indexing stopped at docCount = " + importStatistics.docCount);
break;
}
}
if (verboseDebug) {
getDebugLogger().log(DIHLogLevels.ENTITY_OUT, epw.getEntity().getName(), arow);
}
importStatistics.rowsCount.incrementAndGet();
DocWrapper childDoc = null;
if (doc != null) {
if (epw.getEntity().isChild()) {
childDoc = new DocWrapper();
handleSpecialCommands(arow, childDoc);
addFields(epw.getEntity(), childDoc, arow, vr);
doc.addChildDocument(childDoc);
} else {
handleSpecialCommands(arow, doc);
vr.addNamespace(epw.getEntity().getName(), arow);
addFields(epw.getEntity(), doc, arow, vr);
vr.removeNamespace(epw.getEntity().getName());
}
}
if (epw.getEntity().getChildren() != null) {
vr.addNamespace(epw.getEntity().getName(), arow);
for (EntityProcessorWrapper child : epw.getChildren()) {
if (childDoc != null) {
buildDocument(vr, childDoc, child.getEntity().isDocRoot() ? pk : null, child, false, ctx, entitiesToDestroy);
} else {
buildDocument(vr, doc, child.getEntity().isDocRoot() ? pk : null, child, false, ctx, entitiesToDestroy);
}
}
vr.removeNamespace(epw.getEntity().getName());
}
if (epw.getEntity().isDocRoot()) {
if (stop.get())
return;
if (!doc.isEmpty()) {
boolean result = writer.upload(doc);
if (reqParams.isDebug()) {
reqParams.getDebugInfo().debugDocuments.add(doc);
}
doc = null;
if (result) {
importStatistics.docCount.incrementAndGet();
} else {
importStatistics.failedDocCount.incrementAndGet();
}
}
}
} catch (DataImportHandlerException e) {
if (verboseDebug) {
getDebugLogger().log(DIHLogLevels.ENTITY_EXCEPTION, epw.getEntity().getName(), e);
}
if (e.getErrCode() == DataImportHandlerException.SKIP_ROW) {
continue;
}
if (isRoot) {
if (e.getErrCode() == DataImportHandlerException.SKIP) {
importStatistics.skipDocCount.getAndIncrement();
doc = null;
} else {
SolrException.log(LOG, "Exception while processing: " + epw.getEntity().getName() + " document : " + doc, e);
}
if (e.getErrCode() == DataImportHandlerException.SEVERE)
throw e;
} else
throw e;
} catch (Exception t) {
if (verboseDebug) {
getDebugLogger().log(DIHLogLevels.ENTITY_EXCEPTION, epw.getEntity().getName(), t);
}
throw new DataImportHandlerException(DataImportHandlerException.SEVERE, t);
} finally {
if (verboseDebug) {
getDebugLogger().log(DIHLogLevels.ROW_END, epw.getEntity().getName(), null);
if (epw.getEntity().isDocRoot())
getDebugLogger().log(DIHLogLevels.END_DOC, null, null);
}
}
}
} finally {
if (verboseDebug) {
getDebugLogger().log(DIHLogLevels.END_ENTITY, null, null);
}
}
}
use of org.apache.solr.handler.dataimport.config.Entity in project lucene-solr by apache.
the class TestMailEntityProcessor method testExclude.
@Test
@Ignore("Needs a Mock Mail Server to work")
public void testExclude() {
paramMap.put("folders", "top2");
paramMap.put("recurse", "true");
paramMap.put("processAttachement", "false");
paramMap.put("exclude", ".*grandchild.*");
DataImporter di = new DataImporter();
di.loadAndInit(getConfigFromMap(paramMap));
Entity ent = di.getConfig().getEntities().get(0);
RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
SolrWriterImpl swi = new SolrWriterImpl();
di.runCmd(rp, swi);
assertEquals("top2 and its direct children did not return 5 messages", swi.docs.size(), 5);
}
use of org.apache.solr.handler.dataimport.config.Entity in project lucene-solr by apache.
the class TestMailEntityProcessor method testIncludeAndExclude.
@Test
@Ignore("Needs a Mock Mail Server to work")
public void testIncludeAndExclude() {
paramMap.put("folders", "top1,top2");
paramMap.put("recurse", "true");
paramMap.put("processAttachement", "false");
paramMap.put("exclude", ".*top1.*");
paramMap.put("include", ".*grandchild.*");
DataImporter di = new DataImporter();
di.loadAndInit(getConfigFromMap(paramMap));
Entity ent = di.getConfig().getEntities().get(0);
RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
SolrWriterImpl swi = new SolrWriterImpl();
di.runCmd(rp, swi);
assertEquals("top2 and its direct children did not return 3 messages", swi.docs.size(), 3);
}
use of org.apache.solr.handler.dataimport.config.Entity in project lucene-solr by apache.
the class DocBuilder method getEntityProcessorWrapper.
public EntityProcessorWrapper getEntityProcessorWrapper(Entity entity) {
EntityProcessor entityProcessor = null;
if (entity.getProcessorName() == null) {
entityProcessor = new SqlEntityProcessor();
} else {
try {
entityProcessor = (EntityProcessor) loadClass(entity.getProcessorName(), dataImporter.getCore()).newInstance();
} catch (Exception e) {
wrapAndThrow(SEVERE, e, "Unable to load EntityProcessor implementation for entity:" + entity.getName());
}
}
EntityProcessorWrapper epw = new EntityProcessorWrapper(entityProcessor, entity, this);
for (Entity e1 : entity.getChildren()) {
epw.getChildren().add(getEntityProcessorWrapper(e1));
}
return epw;
}
use of org.apache.solr.handler.dataimport.config.Entity in project lucene-solr by apache.
the class TestDocBuilder method singleEntityMultipleRows.
@Test
public void singleEntityMultipleRows() {
DataImporter di = new DataImporter();
di.loadAndInit(dc_singleEntity);
DIHConfiguration cfg = di.getConfig();
Entity ent = cfg.getEntities().get(0);
RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
List<Map<String, Object>> l = new ArrayList<>();
l.add(createMap("id", 1, "desc", "one"));
l.add(createMap("id", 2, "desc", "two"));
l.add(createMap("id", 3, "desc", "three"));
MockDataSource.setIterator("select * from x", l.iterator());
SolrWriterImpl swi = new SolrWriterImpl();
di.runCmd(rp, swi);
assertEquals(Boolean.TRUE, swi.deleteAllCalled);
assertEquals(Boolean.TRUE, swi.commitCalled);
assertEquals(Boolean.TRUE, swi.finishCalled);
assertEquals(3, swi.docs.size());
for (int i = 0; i < l.size(); i++) {
Map<String, Object> map = (Map<String, Object>) l.get(i);
SolrInputDocument doc = swi.docs.get(i);
for (Map.Entry<String, Object> entry : map.entrySet()) {
assertEquals(entry.getValue(), doc.getFieldValue(entry.getKey()));
}
assertEquals(map.get("desc"), doc.getFieldValue("desc_s"));
}
assertEquals(1, di.getDocBuilder().importStatistics.queryCount.get());
assertEquals(3, di.getDocBuilder().importStatistics.docCount.get());
assertEquals(3, di.getDocBuilder().importStatistics.rowsCount.get());
}
Aggregations