Search in sources :

Example 6 with Entity

use of org.apache.solr.handler.dataimport.config.Entity in project lucene-solr by apache.

the class DocBuilder method getEntityProcessorWrapper.

public EntityProcessorWrapper getEntityProcessorWrapper(Entity entity) {
    EntityProcessor entityProcessor = null;
    if (entity.getProcessorName() == null) {
        entityProcessor = new SqlEntityProcessor();
    } else {
        try {
            entityProcessor = (EntityProcessor) loadClass(entity.getProcessorName(), dataImporter.getCore()).newInstance();
        } catch (Exception e) {
            wrapAndThrow(SEVERE, e, "Unable to load EntityProcessor implementation for entity:" + entity.getName());
        }
    }
    EntityProcessorWrapper epw = new EntityProcessorWrapper(entityProcessor, entity, this);
    for (Entity e1 : entity.getChildren()) {
        epw.getChildren().add(getEntityProcessorWrapper(e1));
    }
    return epw;
}
Also used : Entity(org.apache.solr.handler.dataimport.config.Entity) SolrException(org.apache.solr.common.SolrException)

Example 7 with Entity

use of org.apache.solr.handler.dataimport.config.Entity in project lucene-solr by apache.

the class DataImporter method loadDataConfig.

public DIHConfiguration loadDataConfig(InputSource configFile) {
    DIHConfiguration dihcfg = null;
    try {
        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        // only enable xinclude, if a a SolrCore and SystemId is present (makes no sense otherwise)
        if (core != null && configFile.getSystemId() != null) {
            try {
                dbf.setXIncludeAware(true);
                dbf.setNamespaceAware(true);
            } catch (UnsupportedOperationException e) {
                LOG.warn("XML parser doesn't support XInclude option");
            }
        }
        DocumentBuilder builder = dbf.newDocumentBuilder();
        if (core != null)
            builder.setEntityResolver(new SystemIdResolver(core.getResourceLoader()));
        builder.setErrorHandler(XMLLOG);
        Document document;
        try {
            document = builder.parse(configFile);
        } finally {
            // some XML parsers are broken and don't close the byte stream (but they should according to spec)
            IOUtils.closeQuietly(configFile.getByteStream());
        }
        dihcfg = readFromXml(document);
        LOG.info("Data Configuration loaded successfully");
    } catch (Exception e) {
        throw new DataImportHandlerException(SEVERE, "Data Config problem: " + e.getMessage(), e);
    }
    for (Entity e : dihcfg.getEntities()) {
        if (e.getAllAttributes().containsKey(SqlEntityProcessor.DELTA_QUERY)) {
            isDeltaImportSupported = true;
            break;
        }
    }
    return dihcfg;
}
Also used : DIHConfiguration(org.apache.solr.handler.dataimport.config.DIHConfiguration) Entity(org.apache.solr.handler.dataimport.config.Entity) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) DocumentBuilder(javax.xml.parsers.DocumentBuilder) SystemIdResolver(org.apache.solr.util.SystemIdResolver) Document(org.w3c.dom.Document) SolrException(org.apache.solr.common.SolrException) IOException(java.io.IOException)

Example 8 with Entity

use of org.apache.solr.handler.dataimport.config.Entity in project lucene-solr by apache.

the class DocBuilder method execute.

@SuppressWarnings("unchecked")
public void execute() {
    List<EntityProcessorWrapper> epwList = null;
    try {
        dataImporter.store(DataImporter.STATUS_MSGS, statusMessages);
        config = dataImporter.getConfig();
        final AtomicLong startTime = new AtomicLong(System.nanoTime());
        statusMessages.put(TIME_ELAPSED, new Object() {

            @Override
            public String toString() {
                return getTimeElapsedSince(startTime.get());
            }
        });
        statusMessages.put(DataImporter.MSG.TOTAL_QUERIES_EXECUTED, importStatistics.queryCount);
        statusMessages.put(DataImporter.MSG.TOTAL_ROWS_EXECUTED, importStatistics.rowsCount);
        statusMessages.put(DataImporter.MSG.TOTAL_DOC_PROCESSED, importStatistics.docCount);
        statusMessages.put(DataImporter.MSG.TOTAL_DOCS_SKIPPED, importStatistics.skipDocCount);
        List<String> entities = reqParams.getEntitiesToRun();
        // Trigger onImportStart
        if (config.getOnImportStart() != null) {
            invokeEventListener(config.getOnImportStart());
        }
        AtomicBoolean fullCleanDone = new AtomicBoolean(false);
        //we must not do a delete of *:* multiple times if there are multiple root entities to be run
        Map<String, Object> lastIndexTimeProps = new HashMap<>();
        lastIndexTimeProps.put(LAST_INDEX_KEY, dataImporter.getIndexStartTime());
        epwList = new ArrayList<>(config.getEntities().size());
        for (Entity e : config.getEntities()) {
            epwList.add(getEntityProcessorWrapper(e));
        }
        for (EntityProcessorWrapper epw : epwList) {
            if (entities != null && !entities.contains(epw.getEntity().getName()))
                continue;
            lastIndexTimeProps.put(epw.getEntity().getName() + "." + LAST_INDEX_KEY, propWriter.getCurrentTimestamp());
            currentEntityProcessorWrapper = epw;
            String delQuery = epw.getEntity().getAllAttributes().get("preImportDeleteQuery");
            if (dataImporter.getStatus() == DataImporter.Status.RUNNING_DELTA_DUMP) {
                cleanByQuery(delQuery, fullCleanDone);
                doDelta();
                delQuery = epw.getEntity().getAllAttributes().get("postImportDeleteQuery");
                if (delQuery != null) {
                    fullCleanDone.set(false);
                    cleanByQuery(delQuery, fullCleanDone);
                }
            } else {
                cleanByQuery(delQuery, fullCleanDone);
                doFullDump();
                delQuery = epw.getEntity().getAllAttributes().get("postImportDeleteQuery");
                if (delQuery != null) {
                    fullCleanDone.set(false);
                    cleanByQuery(delQuery, fullCleanDone);
                }
            }
        }
        if (stop.get()) {
            // Dont commit if aborted using command=abort
            statusMessages.put("Aborted", new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT).format(new Date()));
            handleError("Aborted", null);
        } else {
            // Do not commit unnecessarily if this is a delta-import and no documents were created or deleted
            if (!reqParams.isClean()) {
                if (importStatistics.docCount.get() > 0 || importStatistics.deletedDocCount.get() > 0) {
                    finish(lastIndexTimeProps);
                }
            } else {
                // Finished operation normally, commit now
                finish(lastIndexTimeProps);
            }
            if (config.getOnImportEnd() != null) {
                invokeEventListener(config.getOnImportEnd());
            }
        }
        statusMessages.remove(TIME_ELAPSED);
        statusMessages.put(DataImporter.MSG.TOTAL_DOC_PROCESSED, "" + importStatistics.docCount.get());
        if (importStatistics.failedDocCount.get() > 0)
            statusMessages.put(DataImporter.MSG.TOTAL_FAILED_DOCS, "" + importStatistics.failedDocCount.get());
        statusMessages.put("Time taken", getTimeElapsedSince(startTime.get()));
        LOG.info("Time taken = " + getTimeElapsedSince(startTime.get()));
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        if (writer != null) {
            writer.close();
        }
        if (epwList != null) {
            closeEntityProcessorWrappers(epwList);
        }
        if (reqParams.isDebug()) {
            reqParams.getDebugInfo().debugVerboseOutput = getDebugLogger().output;
        }
    }
}
Also used : Entity(org.apache.solr.handler.dataimport.config.Entity) SolrException(org.apache.solr.common.SolrException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) AtomicLong(java.util.concurrent.atomic.AtomicLong) SimpleDateFormat(java.text.SimpleDateFormat)

Example 9 with Entity

use of org.apache.solr.handler.dataimport.config.Entity in project lucene-solr by apache.

the class DocBuilder method getVariableResolver.

private VariableResolver getVariableResolver() {
    try {
        VariableResolver resolver = null;
        String epoch = propWriter.convertDateToString(EPOCH);
        if (dataImporter != null && dataImporter.getCore() != null && dataImporter.getCore().getResourceLoader().getCoreProperties() != null) {
            resolver = new VariableResolver(dataImporter.getCore().getResourceLoader().getCoreProperties());
        } else {
            resolver = new VariableResolver();
        }
        resolver.setEvaluators(dataImporter.getEvaluators());
        Map<String, Object> indexerNamespace = new HashMap<>();
        if (persistedProperties.get(LAST_INDEX_TIME) != null) {
            indexerNamespace.put(LAST_INDEX_TIME, persistedProperties.get(LAST_INDEX_TIME));
        } else {
            // set epoch
            indexerNamespace.put(LAST_INDEX_TIME, epoch);
        }
        indexerNamespace.put(INDEX_START_TIME, dataImporter.getIndexStartTime());
        indexerNamespace.put("request", new HashMap<>(reqParams.getRawParams()));
        indexerNamespace.put("handlerName", dataImporter.getHandlerName());
        for (Entity entity : dataImporter.getConfig().getEntities()) {
            Map<String, Object> entityNamespace = new HashMap<>();
            String key = SolrWriter.LAST_INDEX_KEY;
            Object lastIndex = persistedProperties.get(entity.getName() + "." + key);
            if (lastIndex != null) {
                entityNamespace.put(SolrWriter.LAST_INDEX_KEY, lastIndex);
            } else {
                entityNamespace.put(SolrWriter.LAST_INDEX_KEY, epoch);
            }
            indexerNamespace.put(entity.getName(), entityNamespace);
        }
        resolver.addNamespace(ConfigNameConstants.IMPORTER_NS_SHORT, indexerNamespace);
        resolver.addNamespace(ConfigNameConstants.IMPORTER_NS, indexerNamespace);
        return resolver;
    } catch (Exception e) {
        wrapAndThrow(SEVERE, e);
        // unreachable statement
        return null;
    }
}
Also used : Entity(org.apache.solr.handler.dataimport.config.Entity) SolrException(org.apache.solr.common.SolrException)

Example 10 with Entity

use of org.apache.solr.handler.dataimport.config.Entity in project lucene-solr by apache.

the class TestMailEntityProcessor method testRecursion.

@Test
@Ignore("Needs a Mock Mail Server to work")
public void testRecursion() {
    paramMap.put("folders", "top2");
    paramMap.put("recurse", "true");
    paramMap.put("processAttachement", "false");
    DataImporter di = new DataImporter();
    di.loadAndInit(getConfigFromMap(paramMap));
    Entity ent = di.getConfig().getEntities().get(0);
    RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
    SolrWriterImpl swi = new SolrWriterImpl();
    di.runCmd(rp, swi);
    assertEquals("top2 and its children did not return 8 messages", swi.docs.size(), 8);
}
Also used : Entity(org.apache.solr.handler.dataimport.config.Entity) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

Entity (org.apache.solr.handler.dataimport.config.Entity)16 Test (org.junit.Test)11 DIHConfiguration (org.apache.solr.handler.dataimport.config.DIHConfiguration)6 Ignore (org.junit.Ignore)6 SolrException (org.apache.solr.common.SolrException)5 SolrInputDocument (org.apache.solr.common.SolrInputDocument)3 IOException (java.io.IOException)1 SimpleDateFormat (java.text.SimpleDateFormat)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 DocumentBuilder (javax.xml.parsers.DocumentBuilder)1 DocumentBuilderFactory (javax.xml.parsers.DocumentBuilderFactory)1 SystemIdResolver (org.apache.solr.util.SystemIdResolver)1 Document (org.w3c.dom.Document)1