Search in sources :

Example 1 with Entity

use of org.apache.solr.handler.dataimport.config.Entity in project lucene-solr by apache.

the class DocBuilder method buildDocument.

@SuppressWarnings("unchecked")
private void buildDocument(VariableResolver vr, DocWrapper doc, Map<String, Object> pk, EntityProcessorWrapper epw, boolean isRoot, ContextImpl parentCtx, List<EntityProcessorWrapper> entitiesToDestroy) {
    ContextImpl ctx = new ContextImpl(epw, vr, null, pk == null ? Context.FULL_DUMP : Context.DELTA_DUMP, session, parentCtx, this);
    epw.init(ctx);
    if (!epw.isInitialized()) {
        entitiesToDestroy.add(epw);
        epw.setInitialized(true);
    }
    if (reqParams.getStart() > 0) {
        getDebugLogger().log(DIHLogLevels.DISABLE_LOGGING, null, null);
    }
    if (verboseDebug) {
        getDebugLogger().log(DIHLogLevels.START_ENTITY, epw.getEntity().getName(), null);
    }
    int seenDocCount = 0;
    try {
        while (true) {
            if (stop.get())
                return;
            if (importStatistics.docCount.get() > (reqParams.getStart() + reqParams.getRows()))
                break;
            try {
                seenDocCount++;
                if (seenDocCount > reqParams.getStart()) {
                    getDebugLogger().log(DIHLogLevels.ENABLE_LOGGING, null, null);
                }
                if (verboseDebug && epw.getEntity().isDocRoot()) {
                    getDebugLogger().log(DIHLogLevels.START_DOC, epw.getEntity().getName(), null);
                }
                if (doc == null && epw.getEntity().isDocRoot()) {
                    doc = new DocWrapper();
                    ctx.setDoc(doc);
                    Entity e = epw.getEntity();
                    while (e.getParentEntity() != null) {
                        addFields(e.getParentEntity(), doc, (Map<String, Object>) vr.resolve(e.getParentEntity().getName()), vr);
                        e = e.getParentEntity();
                    }
                }
                Map<String, Object> arow = epw.nextRow();
                if (arow == null) {
                    break;
                }
                // Support for start parameter in debug mode
                if (epw.getEntity().isDocRoot()) {
                    if (seenDocCount <= reqParams.getStart())
                        continue;
                    if (seenDocCount > reqParams.getStart() + reqParams.getRows()) {
                        LOG.info("Indexing stopped at docCount = " + importStatistics.docCount);
                        break;
                    }
                }
                if (verboseDebug) {
                    getDebugLogger().log(DIHLogLevels.ENTITY_OUT, epw.getEntity().getName(), arow);
                }
                importStatistics.rowsCount.incrementAndGet();
                DocWrapper childDoc = null;
                if (doc != null) {
                    if (epw.getEntity().isChild()) {
                        childDoc = new DocWrapper();
                        handleSpecialCommands(arow, childDoc);
                        addFields(epw.getEntity(), childDoc, arow, vr);
                        doc.addChildDocument(childDoc);
                    } else {
                        handleSpecialCommands(arow, doc);
                        vr.addNamespace(epw.getEntity().getName(), arow);
                        addFields(epw.getEntity(), doc, arow, vr);
                        vr.removeNamespace(epw.getEntity().getName());
                    }
                }
                if (epw.getEntity().getChildren() != null) {
                    vr.addNamespace(epw.getEntity().getName(), arow);
                    for (EntityProcessorWrapper child : epw.getChildren()) {
                        if (childDoc != null) {
                            buildDocument(vr, childDoc, child.getEntity().isDocRoot() ? pk : null, child, false, ctx, entitiesToDestroy);
                        } else {
                            buildDocument(vr, doc, child.getEntity().isDocRoot() ? pk : null, child, false, ctx, entitiesToDestroy);
                        }
                    }
                    vr.removeNamespace(epw.getEntity().getName());
                }
                if (epw.getEntity().isDocRoot()) {
                    if (stop.get())
                        return;
                    if (!doc.isEmpty()) {
                        boolean result = writer.upload(doc);
                        if (reqParams.isDebug()) {
                            reqParams.getDebugInfo().debugDocuments.add(doc);
                        }
                        doc = null;
                        if (result) {
                            importStatistics.docCount.incrementAndGet();
                        } else {
                            importStatistics.failedDocCount.incrementAndGet();
                        }
                    }
                }
            } catch (DataImportHandlerException e) {
                if (verboseDebug) {
                    getDebugLogger().log(DIHLogLevels.ENTITY_EXCEPTION, epw.getEntity().getName(), e);
                }
                if (e.getErrCode() == DataImportHandlerException.SKIP_ROW) {
                    continue;
                }
                if (isRoot) {
                    if (e.getErrCode() == DataImportHandlerException.SKIP) {
                        importStatistics.skipDocCount.getAndIncrement();
                        doc = null;
                    } else {
                        SolrException.log(LOG, "Exception while processing: " + epw.getEntity().getName() + " document : " + doc, e);
                    }
                    if (e.getErrCode() == DataImportHandlerException.SEVERE)
                        throw e;
                } else
                    throw e;
            } catch (Exception t) {
                if (verboseDebug) {
                    getDebugLogger().log(DIHLogLevels.ENTITY_EXCEPTION, epw.getEntity().getName(), t);
                }
                throw new DataImportHandlerException(DataImportHandlerException.SEVERE, t);
            } finally {
                if (verboseDebug) {
                    getDebugLogger().log(DIHLogLevels.ROW_END, epw.getEntity().getName(), null);
                    if (epw.getEntity().isDocRoot())
                        getDebugLogger().log(DIHLogLevels.END_DOC, null, null);
                }
            }
        }
    } finally {
        if (verboseDebug) {
            getDebugLogger().log(DIHLogLevels.END_ENTITY, null, null);
        }
    }
}
Also used : Entity(org.apache.solr.handler.dataimport.config.Entity) SolrException(org.apache.solr.common.SolrException)

Example 2 with Entity

use of org.apache.solr.handler.dataimport.config.Entity in project lucene-solr by apache.

the class TestMailEntityProcessor method testExclude.

@Test
@Ignore("Needs a Mock Mail Server to work")
public void testExclude() {
    paramMap.put("folders", "top2");
    paramMap.put("recurse", "true");
    paramMap.put("processAttachement", "false");
    paramMap.put("exclude", ".*grandchild.*");
    DataImporter di = new DataImporter();
    di.loadAndInit(getConfigFromMap(paramMap));
    Entity ent = di.getConfig().getEntities().get(0);
    RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
    SolrWriterImpl swi = new SolrWriterImpl();
    di.runCmd(rp, swi);
    assertEquals("top2 and its direct children did not return 5 messages", swi.docs.size(), 5);
}
Also used : Entity(org.apache.solr.handler.dataimport.config.Entity) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 3 with Entity

use of org.apache.solr.handler.dataimport.config.Entity in project lucene-solr by apache.

the class TestMailEntityProcessor method testIncludeAndExclude.

@Test
@Ignore("Needs a Mock Mail Server to work")
public void testIncludeAndExclude() {
    paramMap.put("folders", "top1,top2");
    paramMap.put("recurse", "true");
    paramMap.put("processAttachement", "false");
    paramMap.put("exclude", ".*top1.*");
    paramMap.put("include", ".*grandchild.*");
    DataImporter di = new DataImporter();
    di.loadAndInit(getConfigFromMap(paramMap));
    Entity ent = di.getConfig().getEntities().get(0);
    RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
    SolrWriterImpl swi = new SolrWriterImpl();
    di.runCmd(rp, swi);
    assertEquals("top2 and its direct children did not return 3 messages", swi.docs.size(), 3);
}
Also used : Entity(org.apache.solr.handler.dataimport.config.Entity) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 4 with Entity

use of org.apache.solr.handler.dataimport.config.Entity in project lucene-solr by apache.

the class DocBuilder method getEntityProcessorWrapper.

public EntityProcessorWrapper getEntityProcessorWrapper(Entity entity) {
    EntityProcessor entityProcessor = null;
    if (entity.getProcessorName() == null) {
        entityProcessor = new SqlEntityProcessor();
    } else {
        try {
            entityProcessor = (EntityProcessor) loadClass(entity.getProcessorName(), dataImporter.getCore()).newInstance();
        } catch (Exception e) {
            wrapAndThrow(SEVERE, e, "Unable to load EntityProcessor implementation for entity:" + entity.getName());
        }
    }
    EntityProcessorWrapper epw = new EntityProcessorWrapper(entityProcessor, entity, this);
    for (Entity e1 : entity.getChildren()) {
        epw.getChildren().add(getEntityProcessorWrapper(e1));
    }
    return epw;
}
Also used : Entity(org.apache.solr.handler.dataimport.config.Entity) SolrException(org.apache.solr.common.SolrException)

Example 5 with Entity

use of org.apache.solr.handler.dataimport.config.Entity in project lucene-solr by apache.

the class TestDocBuilder method singleEntityMultipleRows.

@Test
public void singleEntityMultipleRows() {
    DataImporter di = new DataImporter();
    di.loadAndInit(dc_singleEntity);
    DIHConfiguration cfg = di.getConfig();
    Entity ent = cfg.getEntities().get(0);
    RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
    List<Map<String, Object>> l = new ArrayList<>();
    l.add(createMap("id", 1, "desc", "one"));
    l.add(createMap("id", 2, "desc", "two"));
    l.add(createMap("id", 3, "desc", "three"));
    MockDataSource.setIterator("select * from x", l.iterator());
    SolrWriterImpl swi = new SolrWriterImpl();
    di.runCmd(rp, swi);
    assertEquals(Boolean.TRUE, swi.deleteAllCalled);
    assertEquals(Boolean.TRUE, swi.commitCalled);
    assertEquals(Boolean.TRUE, swi.finishCalled);
    assertEquals(3, swi.docs.size());
    for (int i = 0; i < l.size(); i++) {
        Map<String, Object> map = (Map<String, Object>) l.get(i);
        SolrInputDocument doc = swi.docs.get(i);
        for (Map.Entry<String, Object> entry : map.entrySet()) {
            assertEquals(entry.getValue(), doc.getFieldValue(entry.getKey()));
        }
        assertEquals(map.get("desc"), doc.getFieldValue("desc_s"));
    }
    assertEquals(1, di.getDocBuilder().importStatistics.queryCount.get());
    assertEquals(3, di.getDocBuilder().importStatistics.docCount.get());
    assertEquals(3, di.getDocBuilder().importStatistics.rowsCount.get());
}
Also used : Entity(org.apache.solr.handler.dataimport.config.Entity) DIHConfiguration(org.apache.solr.handler.dataimport.config.DIHConfiguration) SolrInputDocument(org.apache.solr.common.SolrInputDocument) Test(org.junit.Test)

Aggregations

Entity (org.apache.solr.handler.dataimport.config.Entity)16 Test (org.junit.Test)11 DIHConfiguration (org.apache.solr.handler.dataimport.config.DIHConfiguration)6 Ignore (org.junit.Ignore)6 SolrException (org.apache.solr.common.SolrException)5 SolrInputDocument (org.apache.solr.common.SolrInputDocument)3 IOException (java.io.IOException)1 SimpleDateFormat (java.text.SimpleDateFormat)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 DocumentBuilder (javax.xml.parsers.DocumentBuilder)1 DocumentBuilderFactory (javax.xml.parsers.DocumentBuilderFactory)1 SystemIdResolver (org.apache.solr.util.SystemIdResolver)1 Document (org.w3c.dom.Document)1