Search in sources :

Example 1 with HtmlExtractionRule

use of com.twinsoft.convertigo.beans.extractionrules.HtmlExtractionRule in project convertigo by convertigo.

the class HtmlTransaction method addExtractionRuleShemas.

private void addExtractionRuleShemas(Map<String, String> names, Map<String, String> types, List<String> schemas, HtmlScreenClass screenClass) throws Exception {
    HtmlExtractionRule htmlExtractionRule = null;
    String typeSchema, typeName;
    String erSchema, erSchemaEltName, erSchemaEltNSType;
    Map<String, String> type;
    if (screenClass != null) {
        for (ExtractionRule extractionRule : screenClass.getExtractionRules()) {
            htmlExtractionRule = (HtmlExtractionRule) extractionRule;
            if (htmlExtractionRule.isEnabled()) {
                erSchemaEltName = htmlExtractionRule.getSchemaElementName();
                erSchemaEltNSType = htmlExtractionRule.getSchemaElementNSType("p_ns");
                if (!names.containsKey(erSchemaEltName)) {
                    names.put(erSchemaEltName, erSchemaEltNSType);
                } else {
                    typeSchema = (String) names.get(erSchemaEltName);
                    if (!typeSchema.equals(erSchemaEltNSType)) {
                        throw new Exception("Transaction may generate at least two extraction rules named '" + erSchemaEltName + "' with different type : '" + typeSchema + "' and '" + erSchemaEltNSType + "'.\nPlease correct by changing tagname or name if tagname is empty");
                    }
                }
                erSchema = htmlExtractionRule.getSchema("p_ns");
                if (!schemas.contains(erSchema)) {
                    schemas.add(erSchema);
                }
                type = htmlExtractionRule.getSchemaTypes();
                for (Entry<String, String> entry : type.entrySet()) {
                    typeName = entry.getKey();
                    typeSchema = entry.getValue();
                    types.put(typeName, typeSchema);
                }
            }
        }
        List<ScreenClass> visc = screenClass.getInheritedScreenClasses();
        for (ScreenClass inheritedScreenClass : visc) {
            addExtractionRuleShemas(names, types, schemas, (HtmlScreenClass) inheritedScreenClass);
        }
    }
}
Also used : ScreenClass(com.twinsoft.convertigo.beans.core.ScreenClass) HtmlScreenClass(com.twinsoft.convertigo.beans.screenclasses.HtmlScreenClass) ExtractionRule(com.twinsoft.convertigo.beans.core.ExtractionRule) HtmlExtractionRule(com.twinsoft.convertigo.beans.extractionrules.HtmlExtractionRule) HtmlExtractionRule(com.twinsoft.convertigo.beans.extractionrules.HtmlExtractionRule) EngineException(com.twinsoft.convertigo.engine.EngineException) EvaluatorException(org.mozilla.javascript.EvaluatorException) JavaScriptException(org.mozilla.javascript.JavaScriptException) IOException(java.io.IOException) ObjectWithSameNameException(com.twinsoft.convertigo.engine.ObjectWithSameNameException)

Example 2 with HtmlExtractionRule

use of com.twinsoft.convertigo.beans.extractionrules.HtmlExtractionRule in project convertigo by convertigo.

the class HtmlConnectorDesignComposite method createExtractionRuleFromSelection.

public void createExtractionRuleFromSelection(Document dom) throws EngineException {
    String className = "com.twinsoft.convertigo.beans.core.ExtractionRule";
    // Retrieve selected extraction rule xpath
    String extractionrulesXpath = xpathEvaluator.getSelectionXpath();
    // Retrieve parent ScreenClass
    HtmlScreenClass parentObject = getParentHtmlScreenClass();
    // Add extraction rule to screen class
    NewObjectWizard newObjectWizard = new NewObjectWizard(parentObject, className, extractionrulesXpath, dom);
    WizardDialog wzdlg = new WizardDialog(Display.getCurrent().getActiveShell(), newObjectWizard);
    wzdlg.setPageSize(850, 650);
    wzdlg.open();
    if (wzdlg.getReturnCode() != Window.CANCEL) {
        HtmlExtractionRule extractionrule = (HtmlExtractionRule) newObjectWizard.newBean;
        // Reload parent ScreenClass in Tree
        fireObjectChanged(new CompositeEvent(parentObject));
        // Set selection on new extraction rule (will expand tree to new extraction rule)
        if (extractionrule != null)
            fireObjectSelected(new CompositeEvent(extractionrule));
        // Set back selection on parent ScreenClass
        fireObjectSelected(new CompositeEvent(parentObject));
    }
}
Also used : HtmlScreenClass(com.twinsoft.convertigo.beans.screenclasses.HtmlScreenClass) HtmlExtractionRule(com.twinsoft.convertigo.beans.extractionrules.HtmlExtractionRule) NewObjectWizard(com.twinsoft.convertigo.eclipse.wizards.new_object.NewObjectWizard) WizardDialog(org.eclipse.jface.wizard.WizardDialog) CompositeEvent(com.twinsoft.convertigo.eclipse.editors.CompositeEvent)

Example 3 with HtmlExtractionRule

use of com.twinsoft.convertigo.beans.extractionrules.HtmlExtractionRule in project convertigo by convertigo.

the class HtmlTransaction method applyExtractionRules.

public void applyExtractionRules(HtmlScreenClass screenClass, boolean bNotFirstLoop) throws EngineException {
    String t = context.statistics.start(EngineStatistics.APPLY_EXTRACTION_RULES);
    try {
        // We apply the extraction rules for this screen class
        int extractionRuleInitReason;
        List<ExtractionRule> vExtractionRules = screenClass.getExtractionRules();
        for (ExtractionRule extractionRule : vExtractionRules) {
            HtmlExtractionRule htmlExtractionRule = (HtmlExtractionRule) extractionRule;
            if (!runningThread.bContinue)
                break;
            if (!extractionRule.isEnabled()) {
                Engine.logBeans.trace("(HtmlTransaction) Skipping the extraction rule \"" + extractionRule.getName() + "\" because it has been disabled.");
                continue;
            }
            Engine.logBeans.debug("(HtmlTransaction) Applying the extraction rule \"" + extractionRule.getName() + "\"");
            extractionRule.checkSymbols();
            String extractionRuleQName = extractionRule.getQName();
            if (vExtractionRulesInited.contains(extractionRuleQName)) {
                extractionRuleInitReason = ExtractionRule.ACCUMULATING;
            } else {
                extractionRuleInitReason = ExtractionRule.INITIALIZING;
                vExtractionRulesInited.add(extractionRuleQName);
            }
            Engine.logBeans.trace("(HtmlTransaction) Initializing extraction rule (reason = " + extractionRuleInitReason + ")...");
            extractionRule.init(extractionRuleInitReason);
            // We fire engine events only in studio mode.
            if (Engine.isStudioMode()) {
                Engine.theApp.fireObjectDetected(new EngineEvent(extractionRule));
            }
            boolean hasMatched = htmlExtractionRule.apply(currentXmlDocument, context);
            if (hasMatched) {
                htmlExtractionRule.addToScope(scope);
                Engine.logBeans.trace("(HtmlTransaction) Applying extraction rule '" + extractionRule.getName() + "': matching");
            } else
                Engine.logBeans.trace("(HtmlTransaction) Applying extraction rule '" + extractionRule.getName() + "': not matching");
            // We fire engine events only in studio mode.
            if (Engine.isStudioMode()) {
                Engine.logBeans.debug("(HtmlTransaction) Step reached after having applied the extraction rule \"" + extractionRule.getName() + "\".");
                Engine.theApp.fireStepReached(new EngineEvent(extractionRule));
            }
            extractionRule = null;
        }
        vExtractionRules = null;
    } finally {
        context.statistics.stop(t, bNotFirstLoop);
    }
}
Also used : EngineEvent(com.twinsoft.convertigo.engine.EngineEvent) ExtractionRule(com.twinsoft.convertigo.beans.core.ExtractionRule) HtmlExtractionRule(com.twinsoft.convertigo.beans.extractionrules.HtmlExtractionRule) HtmlExtractionRule(com.twinsoft.convertigo.beans.extractionrules.HtmlExtractionRule)

Aggregations

HtmlExtractionRule (com.twinsoft.convertigo.beans.extractionrules.HtmlExtractionRule)3 ExtractionRule (com.twinsoft.convertigo.beans.core.ExtractionRule)2 HtmlScreenClass (com.twinsoft.convertigo.beans.screenclasses.HtmlScreenClass)2 ScreenClass (com.twinsoft.convertigo.beans.core.ScreenClass)1 CompositeEvent (com.twinsoft.convertigo.eclipse.editors.CompositeEvent)1 NewObjectWizard (com.twinsoft.convertigo.eclipse.wizards.new_object.NewObjectWizard)1 EngineEvent (com.twinsoft.convertigo.engine.EngineEvent)1 EngineException (com.twinsoft.convertigo.engine.EngineException)1 ObjectWithSameNameException (com.twinsoft.convertigo.engine.ObjectWithSameNameException)1 IOException (java.io.IOException)1 WizardDialog (org.eclipse.jface.wizard.WizardDialog)1 EvaluatorException (org.mozilla.javascript.EvaluatorException)1 JavaScriptException (org.mozilla.javascript.JavaScriptException)1