Search in sources :

Example 6 with Pattern

use of org.talend.dataquality.domain.pattern.Pattern in project tdq-studio-se by Talend.

the class UpdateSomePatternToMatchMysql method retirePattern.

/**
 * DOC rli Comment method "retireAnalysis".
 *
 * @param fileResource
 * @return
 */
private Pattern retirePattern(Resource fileResource) {
    EList<EObject> contents = fileResource.getContents();
    if (contents.isEmpty()) {
        // $NON-NLS-1$
        log.error(DefaultMessagesImpl.getString("UpdateSomePatternToMatchMysql_logErr", fileResource));
    }
    if (log.isDebugEnabled()) {
        // $NON-NLS-1$
        log.debug("No elements in contents " + contents.size());
    }
    PatternSwitch<Pattern> mySwitch = new PatternSwitch<Pattern>() {

        @Override
        public Pattern casePattern(Pattern object) {
            return object;
        }
    };
    Pattern pattern = null;
    if (contents != null && contents.size() != 0) {
        pattern = mySwitch.doSwitch(contents.get(0));
    }
    return pattern;
}
Also used : Pattern(org.talend.dataquality.domain.pattern.Pattern) EObject(org.eclipse.emf.ecore.EObject) PatternSwitch(org.talend.dataquality.domain.pattern.util.PatternSwitch)

Example 7 with Pattern

use of org.talend.dataquality.domain.pattern.Pattern in project tdq-studio-se by Talend.

the class UpdateDependenciesFileTask method updateDependencies.

/**
 * Method "updateDependencies" updates dependencies between elements in TOP.
 *
 * @param analysesSubFolder
 * @throws CoreException
 */
private void updateDependencies(IFolder analysesSubFolder) throws CoreException {
    for (IResource resource : analysesSubFolder.members()) {
        if (resource instanceof IFolder) {
            IFolder folder = (IFolder) resource;
            updateDependencies(folder);
        }
        if (resource instanceof IFile) {
            IFile file = (IFile) resource;
            final Analysis analysis = AnaResourceFileHelper.getInstance().findAnalysis(file);
            // update dependency between analyses and patterns
            if (analysis != null) {
                final List<Pattern> patterns = AnalysisHelper.getPatterns(analysis);
                for (Pattern pattern : patterns) {
                    DependenciesHandler.getInstance().setDependencyOn(analysis, pattern);
                    AnaResourceFileHelper.getInstance().save(analysis);
                }
                // update dependency between analyses and dq rules
                final List<IndicatorDefinition> userDefinedIndicators = AnalysisHelper.getUserDefinedIndicators(analysis);
                for (IndicatorDefinition indicatorDefinition : userDefinedIndicators) {
                    DependenciesHandler.getInstance().setDependencyOn(analysis, indicatorDefinition);
                    AnaResourceFileHelper.getInstance().save(analysis);
                }
            }
        }
    }
}
Also used : Pattern(org.talend.dataquality.domain.pattern.Pattern) IFile(org.eclipse.core.resources.IFile) Analysis(org.talend.dataquality.analysis.Analysis) IndicatorDefinition(org.talend.dataquality.indicators.definition.IndicatorDefinition) IResource(org.eclipse.core.resources.IResource) IFolder(org.eclipse.core.resources.IFolder)

Example 8 with Pattern

use of org.talend.dataquality.domain.pattern.Pattern in project tdq-studio-se by Talend.

the class CreatePatternsMigratorTask method doExecute.

@Override
protected boolean doExecute() throws Exception {
    parameter = new PatternParameter();
    // FR Insee Code
    IFolder folder = ResourceManager.getPatternRegexFolder().getFolder(PATH_ADDRESS);
    if (folder.exists()) {
        if (!folder.getFile("FR_Insee_Code_0.1.pattern").exists()) {
            // $NON-NLS-1$ //$NON-NLS-2$
            Pattern pattern = newPattern("FR Insee Code", SQLLanguage, "'^(F-|FRA?(-| ))?((2[A|B])|[0-9]{2})[0-9]{3}$'");
            if (pattern != null) {
                setTagValue(// $NON-NLS-1$
                pattern, // $NON-NLS-1$
                " FRA-2A235 |   F-2B128 |  FRA 2B356", // $NON-NLS-1$
                "French Insee code of cities with Corsica and colonies");
                ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
            }
        }
    }
    // SEDOL
    folder = ResourceManager.getPatternRegexFolder().getFolder(PATH_CUSTOMER);
    if (folder.exists()) {
        if (!folder.getFile("SEDOL_0.1.pattern").exists()) {
            // $NON-NLS-1$ //$NON-NLS-2$
            Pattern pattern = newPattern("SEDOL", SQLLanguage, "'^([B-Db-dF-Hf-hJ-Nj-nP-Tp-tV-Xv-xYyZz0-9]{6}[0-9])$'");
            if (pattern != null) {
                // $NON-NLS-1$ //$NON-NLS-2$
                setTagValue(pattern, "B01HL06 | 4155586", "Stock Exchange Daily Official List ");
                ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
            }
        }
    }
    // IPV6 MAC Address
    folder = ResourceManager.getPatternRegexFolder().getFolder(PATH_INTERNET);
    if (folder.exists()) {
        if (!folder.getFile("IPv6_Address_0.1.pattern").exists()) {
            Pattern pattern = newPattern(// $NON-NLS-1$
            "IPv6 Address", SQLLanguage, // $NON-NLS-1$
            "'^((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b)\\.){3}(\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b))|(([0-9A-Fa-f]{1,4}:){0,5}:((\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b)\\.){3}(\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b))|(::([0-9A-Fa-f]{1,4}:){0,5}((\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b)\\.){3}(\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))$'");
            if (pattern != null) {
                // $NON-NLS-1$ //$NON-NLS-2$
                setTagValue(pattern, "Check if it is a IPv6 address", "IPv6 address");
                ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
            }
        }
        if (!folder.getFile("MAC_Address_0.1.pattern").exists()) {
            Pattern pattern = newPattern("MAC Address", SQLLanguage, // $NON-NLS-1$ //$NON-NLS-2$
            "'^([0-9a-fA-F][0-9a-fA-F]:){5}([0-9a-fA-F][0-9a-fA-F])$'");
            if (pattern != null) {
                // $NON-NLS-1$ //$NON-NLS-2$
                setTagValue(pattern, "A4:4E:31:B9:C5:B4", "Match MAC Address");
                ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
            }
        }
    }
    // GPS Coordinate ,ISBN 13,UK SSN
    folder = ResourceManager.getPatternRegexFolder().getFolder(PATH_NUMBER);
    if (folder.exists()) {
        if (!folder.getFile("GPS_Coordinate_0.1.pattern").exists()) {
            Pattern pattern = newPattern(// $NON-NLS-1$
            "GPS Coordinate", // $NON-NLS-1$
            SQLLanguage, // $NON-NLS-1$
            "'^([0-9]{1,3}[\\.][0-9]*)[, ]+-?([0-9]{1,3}[\\.][0-9]*)$'");
            if (pattern != null) {
                // $NON-NLS-1$ //$NON-NLS-2$
                setTagValue(pattern, "40.7127837,-74.00594130000002", "Google Maps style GPS Decimal format");
                ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
            }
        }
        if (!folder.getFile("UK_SSN_0.1.pattern").exists()) {
            Pattern pattern = newPattern(// $NON-NLS-1$
            "UK SSN", // $NON-NLS-1$
            SQLLanguage, // $NON-NLS-1$
            "'^[A-CEGHJ-PR-TW-Z]{1}[A-CEGHJ-NPR-TW-Z]{1}([0-9]{6}|( [0-9]{2}){3} )[A-DFM]{0,1}$'");
            if (pattern != null) {
                setTagValue(// $NON-NLS-1$
                pattern, // $NON-NLS-1$
                "AB123456C | AB 12 34 56 C", // $NON-NLS-1$
                "National identification number, national identity number, or national insurance number generally called an NI Number (NINO)");
                ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
            }
        }
    }
    // EN Amount Money ,FR Amount Money
    folder = ResourceManager.getPatternRegexFolder().getFolder(PATH_MONEY);
    if (!folder.exists()) {
        folder.create(true, true, null);
    }
    if (folder.exists()) {
        if (!folder.getFile("EN_Amount_Money_0.1.pattern").exists()) {
            Pattern pattern = newPattern(// $NON-NLS-1$
            "EN Amount Money", // $NON-NLS-1$
            SQLLanguage, // $NON-NLS-1$
            "'^((US|CA)?\\$|\\£|\\€|\\¥)(([1-9][0-9]{0,2}(\\,[0-9]{3})*)|([1-9][0-9]*)|(0))(\\.[0-9]{2}|k|M|G|T)?$'");
            if (pattern != null) {
                RegularExpression regularExpr = BooleanExpressionHelper.createRegularExpression(// $NON-NLS-1$
                "MySQL", // $NON-NLS-1$
                "'^((US|CA)?\\\\$|\\£|\\€|\\¥)(([1-9][0-9]{0,2}(\\,[0-9]{3})*)|([1-9][0-9]*)|(0))(\\.[0-9]{2}|k|M|G|T)?$'");
                // $NON-NLS-1$
                regularExpr.setExpressionType("REGEXP");
                pattern.getComponents().add(regularExpr);
                // $NON-NLS-1$ //$NON-NLS-2$
                setTagValue(pattern, "$3,000 || CA$3000", "Amount of money in English format");
                ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
            }
        }
        if (!folder.getFile("FR_Amount_Money_0.1.pattern").exists()) {
            Pattern pattern = newPattern(// $NON-NLS-1$
            "FR Amount Money", // $NON-NLS-1$
            SQLLanguage, // $NON-NLS-1$
            "'^(([1-9][0-9]{0,2}( [0-9]{3})*)|([1-9][0-9]*)|0)((,[0-9]{2} | (k|M|G|T))?| )(\\$( (US|CA))?|\\£|\\€|\\¥)$'");
            if (pattern != null) {
                // $NON-NLS-1$ //$NON-NLS-2$
                setTagValue(pattern, "3000 € | 35 k€ | 35 054 T€", "Amount of money in French format");
                ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
            }
        }
    }
    // EN_Month_Abbrev,EN_Month
    folder = ResourceManager.getPatternRegexFolder().getFolder(PATH_DATE);
    if (folder.exists()) {
        if (!folder.getFile("EN_Month_Abbrev_0.1.pattern").exists()) {
            Pattern pattern = newPattern("EN_Month_Abbrev", SQLLanguage, // $NON-NLS-1$ //$NON-NLS-2$
            "'^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)$'");
            if (pattern != null) {
                // $NON-NLS-1$ //$NON-NLS-2$
                setTagValue(pattern, "Jan | Feb ", "Month English abbreviation");
                ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
            }
        }
        if (!folder.getFile("EN_Month_0.1.pattern").exists()) {
            Pattern pattern = newPattern(// $NON-NLS-1$
            "EN_Month", // $NON-NLS-1$
            SQLLanguage, // $NON-NLS-1$
            "'^(January|June|July|February|March|May|April|August|September|October|November|December)$'");
            if (pattern != null) {
                // $NON-NLS-1$ //$NON-NLS-2$
                setTagValue(pattern, "January | February ", "Month in English");
                ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
            }
        }
    }
    return true;
}
Also used : Pattern(org.talend.dataquality.domain.pattern.Pattern) RegularExpression(org.talend.dataquality.domain.pattern.RegularExpression) PatternParameter(org.talend.dq.analysis.parameters.PatternParameter) IFolder(org.eclipse.core.resources.IFolder)

Example 9 with Pattern

use of org.talend.dataquality.domain.pattern.Pattern in project tdq-studio-se by Talend.

the class CreatePatternsMigratorTask method newPattern.

private Pattern newPattern(String name, String lang, String express) {
    PatternBuilder patternBuilder = new PatternBuilder();
    boolean patternInitialized = patternBuilder.initializePattern(name);
    if (patternInitialized) {
        Pattern pattern = patternBuilder.getPattern();
        RegularExpression regularExpr = BooleanExpressionHelper.createRegularExpression(lang, express);
        // $NON-NLS-1$
        regularExpr.setExpressionType("REGEXP");
        pattern.getComponents().add(regularExpr);
        return pattern;
    }
    return null;
}
Also used : Pattern(org.talend.dataquality.domain.pattern.Pattern) RegularExpression(org.talend.dataquality.domain.pattern.RegularExpression) PatternBuilder(org.talend.dq.pattern.PatternBuilder)

Example 10 with Pattern

use of org.talend.dataquality.domain.pattern.Pattern in project tdq-studio-se by Talend.

the class AElementPersistanceRealTest method testCreateItemPattern.

/**
 * Test method for
 * {@link org.talend.dq.writer.AElementPersistance#createItem(orgomg.cwm.objectmodel.core.ModelElement)}.
 */
@Test
public void testCreateItemPattern() {
    PatternWriter createPatternWriter = ElementWriterFactory.getInstance().createPatternWriter();
    // test normal name
    // $NON-NLS-1$
    String patternName = "pattern1";
    // $NON-NLS-1$
    String exceptedFileName = patternName + "_0.1.pattern";
    Pattern createPattern = PatternFactory.eINSTANCE.createPattern();
    createPattern.setName(patternName);
    Item createItem = createPatternWriter.createItem(createPattern);
    assertTrue(createItem instanceof TDQPatternItem);
    TDQPatternItem patternItem = (TDQPatternItem) createItem;
    assertTrue(patternItem.getFilename().equals(exceptedFileName));
    // test special name
    // $NON-NLS-1$
    String patternName2 = "pattern1(),。";
    // $NON-NLS-1$
    String exceptedFileName2 = "pattern1_____0.1.pattern";
    Pattern createPattern2 = PatternFactory.eINSTANCE.createPattern();
    createPattern2.setName(patternName2);
    Item createItem2 = createPatternWriter.createItem(createPattern2);
    assertTrue(createItem2 instanceof TDQPatternItem);
    TDQPatternItem patternItem2 = (TDQPatternItem) createItem2;
    assertTrue(patternItem2.getFilename().equals(exceptedFileName2));
}
Also used : Pattern(org.talend.dataquality.domain.pattern.Pattern) TDQAnalysisItem(org.talend.dataquality.properties.TDQAnalysisItem) TDQPatternItem(org.talend.dataquality.properties.TDQPatternItem) Item(org.talend.core.model.properties.Item) TDQIndicatorDefinitionItem(org.talend.dataquality.properties.TDQIndicatorDefinitionItem) TDQBusinessRuleItem(org.talend.dataquality.properties.TDQBusinessRuleItem) TDQReportItem(org.talend.dataquality.properties.TDQReportItem) PatternWriter(org.talend.dq.writer.impl.PatternWriter) TDQPatternItem(org.talend.dataquality.properties.TDQPatternItem) Test(org.junit.Test)

Aggregations

Pattern (org.talend.dataquality.domain.pattern.Pattern)75 RegularExpression (org.talend.dataquality.domain.pattern.RegularExpression)27 Test (org.junit.Test)19 TdExpression (org.talend.cwm.relational.TdExpression)18 Domain (org.talend.dataquality.domain.Domain)16 Analysis (org.talend.dataquality.analysis.Analysis)15 PatternComponent (org.talend.dataquality.domain.pattern.PatternComponent)15 IndicatorParameters (org.talend.dataquality.indicators.IndicatorParameters)14 ArrayList (java.util.ArrayList)13 PatternMatchingIndicator (org.talend.dataquality.indicators.PatternMatchingIndicator)13 IFile (org.eclipse.core.resources.IFile)11 TDQPatternItem (org.talend.dataquality.properties.TDQPatternItem)10 Expression (orgomg.cwm.objectmodel.core.Expression)10 IFolder (org.eclipse.core.resources.IFolder)9 DatabaseConnection (org.talend.core.model.metadata.builder.connection.DatabaseConnection)9 Indicator (org.talend.dataquality.indicators.Indicator)9 Item (org.talend.core.model.properties.Item)7 TdColumn (org.talend.cwm.relational.TdColumn)7 RegexpMatchingIndicator (org.talend.dataquality.indicators.RegexpMatchingIndicator)6 IndicatorDefinition (org.talend.dataquality.indicators.definition.IndicatorDefinition)6