Search in sources :

Example 1 with RegularExpression

use of org.talend.dataquality.domain.pattern.RegularExpression in project tdq-studio-se by Talend.

the class CreatePatternWizard method initCWMResourceBuilder.

public ModelElement initCWMResourceBuilder() {
    patternBuilder = new PatternBuilder();
    boolean patternInitialized = patternBuilder.initializePattern(parameter.getName());
    if (patternInitialized) {
        Pattern pattern = patternBuilder.getPattern();
        String lang = PatternLanguageType.findLanguageByName(parameter.getLanguage());
        String express = parameter.getExpression();
        RegularExpression regularExpr = BooleanExpressionHelper.createRegularExpression(lang, express);
        regularExpr.setExpressionType(type.getLiteral());
        pattern.getComponents().add(regularExpr);
        return pattern;
    }
    return null;
}
Also used : Pattern(org.talend.dataquality.domain.pattern.Pattern) RegularExpression(org.talend.dataquality.domain.pattern.RegularExpression) PatternBuilder(org.talend.dq.pattern.PatternBuilder)

Example 2 with RegularExpression

use of org.talend.dataquality.domain.pattern.RegularExpression in project tdq-studio-se by Talend.

the class ImportFactory method createAndStorePattern.

private static TypedReturnCode<Object> createAndStorePattern(PatternParameters parameters, IFolder selectionFolder, ExpressionType type) throws TalendInternalPersistenceException {
    Pattern pattern = createPattern(parameters.name, parameters.auther, parameters.description, parameters.purpose, parameters.status);
    for (String key : parameters.regex.keySet()) {
        RegularExpression regularExpr = BooleanExpressionHelper.createRegularExpression(key, parameters.regex.get(key), type);
        pattern.getComponents().add(regularExpr);
    }
    boolean validStatus = PatternUtilities.isPatternValid(pattern);
    TaggedValueHelper.setValidStatus(validStatus, pattern);
    try {
        String relativePath = parameters.relativePath;
        if (EResourceConstant.PATTERN_REGEX.getName().equals(relativePath) || EResourceConstant.PATTERN_SQL.getName().equals(relativePath)) {
            relativePath = PluginConstant.EMPTY_STRING;
        }
        // $NON-NLS-1$
        String[] folderNames = relativePath.split("/");
        for (String folderName : folderNames) {
            IFolder folder = selectionFolder.getFolder(folderName);
            if (!folder.exists()) {
                folder.create(false, true, null);
            }
            selectionFolder = folder;
        }
    } catch (CoreException e) {
        log.error(e, e);
    }
    return ElementWriterFactory.getInstance().createPatternWriter().create(pattern, selectionFolder);
}
Also used : Pattern(org.talend.dataquality.domain.pattern.Pattern) RegularExpression(org.talend.dataquality.domain.pattern.RegularExpression) CoreException(org.eclipse.core.runtime.CoreException) IFolder(org.eclipse.core.resources.IFolder)

Example 3 with RegularExpression

use of org.talend.dataquality.domain.pattern.RegularExpression in project tdq-studio-se by Talend.

the class CreatePatternsMigratorTask method doExecute.

@Override
protected boolean doExecute() throws Exception {
    parameter = new PatternParameter();
    // FR Insee Code
    IFolder folder = ResourceManager.getPatternRegexFolder().getFolder(PATH_ADDRESS);
    if (folder.exists()) {
        if (!folder.getFile("FR_Insee_Code_0.1.pattern").exists()) {
            // $NON-NLS-1$ //$NON-NLS-2$
            Pattern pattern = newPattern("FR Insee Code", SQLLanguage, "'^(F-|FRA?(-| ))?((2[A|B])|[0-9]{2})[0-9]{3}$'");
            if (pattern != null) {
                setTagValue(// $NON-NLS-1$
                pattern, // $NON-NLS-1$
                " FRA-2A235 |   F-2B128 |  FRA 2B356", // $NON-NLS-1$
                "French Insee code of cities with Corsica and colonies");
                ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
            }
        }
    }
    // SEDOL
    folder = ResourceManager.getPatternRegexFolder().getFolder(PATH_CUSTOMER);
    if (folder.exists()) {
        if (!folder.getFile("SEDOL_0.1.pattern").exists()) {
            // $NON-NLS-1$ //$NON-NLS-2$
            Pattern pattern = newPattern("SEDOL", SQLLanguage, "'^([B-Db-dF-Hf-hJ-Nj-nP-Tp-tV-Xv-xYyZz0-9]{6}[0-9])$'");
            if (pattern != null) {
                // $NON-NLS-1$ //$NON-NLS-2$
                setTagValue(pattern, "B01HL06 | 4155586", "Stock Exchange Daily Official List ");
                ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
            }
        }
    }
    // IPV6 MAC Address
    folder = ResourceManager.getPatternRegexFolder().getFolder(PATH_INTERNET);
    if (folder.exists()) {
        if (!folder.getFile("IPv6_Address_0.1.pattern").exists()) {
            Pattern pattern = newPattern(// $NON-NLS-1$
            "IPv6 Address", SQLLanguage, // $NON-NLS-1$
            "'^((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b)\\.){3}(\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b))|(([0-9A-Fa-f]{1,4}:){0,5}:((\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b)\\.){3}(\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b))|(::([0-9A-Fa-f]{1,4}:){0,5}((\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b)\\.){3}(\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))$'");
            if (pattern != null) {
                // $NON-NLS-1$ //$NON-NLS-2$
                setTagValue(pattern, "Check if it is a IPv6 address", "IPv6 address");
                ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
            }
        }
        if (!folder.getFile("MAC_Address_0.1.pattern").exists()) {
            Pattern pattern = newPattern("MAC Address", SQLLanguage, // $NON-NLS-1$ //$NON-NLS-2$
            "'^([0-9a-fA-F][0-9a-fA-F]:){5}([0-9a-fA-F][0-9a-fA-F])$'");
            if (pattern != null) {
                // $NON-NLS-1$ //$NON-NLS-2$
                setTagValue(pattern, "A4:4E:31:B9:C5:B4", "Match MAC Address");
                ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
            }
        }
    }
    // GPS Coordinate ,ISBN 13,UK SSN
    folder = ResourceManager.getPatternRegexFolder().getFolder(PATH_NUMBER);
    if (folder.exists()) {
        if (!folder.getFile("GPS_Coordinate_0.1.pattern").exists()) {
            Pattern pattern = newPattern(// $NON-NLS-1$
            "GPS Coordinate", // $NON-NLS-1$
            SQLLanguage, // $NON-NLS-1$
            "'^([0-9]{1,3}[\\.][0-9]*)[, ]+-?([0-9]{1,3}[\\.][0-9]*)$'");
            if (pattern != null) {
                // $NON-NLS-1$ //$NON-NLS-2$
                setTagValue(pattern, "40.7127837,-74.00594130000002", "Google Maps style GPS Decimal format");
                ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
            }
        }
        if (!folder.getFile("UK_SSN_0.1.pattern").exists()) {
            Pattern pattern = newPattern(// $NON-NLS-1$
            "UK SSN", // $NON-NLS-1$
            SQLLanguage, // $NON-NLS-1$
            "'^[A-CEGHJ-PR-TW-Z]{1}[A-CEGHJ-NPR-TW-Z]{1}([0-9]{6}|( [0-9]{2}){3} )[A-DFM]{0,1}$'");
            if (pattern != null) {
                setTagValue(// $NON-NLS-1$
                pattern, // $NON-NLS-1$
                "AB123456C | AB 12 34 56 C", // $NON-NLS-1$
                "National identification number, national identity number, or national insurance number generally called an NI Number (NINO)");
                ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
            }
        }
    }
    // EN Amount Money ,FR Amount Money
    folder = ResourceManager.getPatternRegexFolder().getFolder(PATH_MONEY);
    if (!folder.exists()) {
        folder.create(true, true, null);
    }
    if (folder.exists()) {
        if (!folder.getFile("EN_Amount_Money_0.1.pattern").exists()) {
            Pattern pattern = newPattern(// $NON-NLS-1$
            "EN Amount Money", // $NON-NLS-1$
            SQLLanguage, // $NON-NLS-1$
            "'^((US|CA)?\\$|\\£|\\€|\\¥)(([1-9][0-9]{0,2}(\\,[0-9]{3})*)|([1-9][0-9]*)|(0))(\\.[0-9]{2}|k|M|G|T)?$'");
            if (pattern != null) {
                RegularExpression regularExpr = BooleanExpressionHelper.createRegularExpression(// $NON-NLS-1$
                "MySQL", // $NON-NLS-1$
                "'^((US|CA)?\\\\$|\\£|\\€|\\¥)(([1-9][0-9]{0,2}(\\,[0-9]{3})*)|([1-9][0-9]*)|(0))(\\.[0-9]{2}|k|M|G|T)?$'");
                // $NON-NLS-1$
                regularExpr.setExpressionType("REGEXP");
                pattern.getComponents().add(regularExpr);
                // $NON-NLS-1$ //$NON-NLS-2$
                setTagValue(pattern, "$3,000 || CA$3000", "Amount of money in English format");
                ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
            }
        }
        if (!folder.getFile("FR_Amount_Money_0.1.pattern").exists()) {
            Pattern pattern = newPattern(// $NON-NLS-1$
            "FR Amount Money", // $NON-NLS-1$
            SQLLanguage, // $NON-NLS-1$
            "'^(([1-9][0-9]{0,2}( [0-9]{3})*)|([1-9][0-9]*)|0)((,[0-9]{2} | (k|M|G|T))?| )(\\$( (US|CA))?|\\£|\\€|\\¥)$'");
            if (pattern != null) {
                // $NON-NLS-1$ //$NON-NLS-2$
                setTagValue(pattern, "3000 € | 35 k€ | 35 054 T€", "Amount of money in French format");
                ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
            }
        }
    }
    // EN_Month_Abbrev,EN_Month
    folder = ResourceManager.getPatternRegexFolder().getFolder(PATH_DATE);
    if (folder.exists()) {
        if (!folder.getFile("EN_Month_Abbrev_0.1.pattern").exists()) {
            Pattern pattern = newPattern("EN_Month_Abbrev", SQLLanguage, // $NON-NLS-1$ //$NON-NLS-2$
            "'^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)$'");
            if (pattern != null) {
                // $NON-NLS-1$ //$NON-NLS-2$
                setTagValue(pattern, "Jan | Feb ", "Month English abbreviation");
                ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
            }
        }
        if (!folder.getFile("EN_Month_0.1.pattern").exists()) {
            Pattern pattern = newPattern(// $NON-NLS-1$
            "EN_Month", // $NON-NLS-1$
            SQLLanguage, // $NON-NLS-1$
            "'^(January|June|July|February|March|May|April|August|September|October|November|December)$'");
            if (pattern != null) {
                // $NON-NLS-1$ //$NON-NLS-2$
                setTagValue(pattern, "January | February ", "Month in English");
                ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
            }
        }
    }
    return true;
}
Also used : Pattern(org.talend.dataquality.domain.pattern.Pattern) RegularExpression(org.talend.dataquality.domain.pattern.RegularExpression) PatternParameter(org.talend.dq.analysis.parameters.PatternParameter) IFolder(org.eclipse.core.resources.IFolder)

Example 4 with RegularExpression

use of org.talend.dataquality.domain.pattern.RegularExpression in project tdq-studio-se by Talend.

the class CreatePatternsMigratorTask method newPattern.

private Pattern newPattern(String name, String lang, String express) {
    PatternBuilder patternBuilder = new PatternBuilder();
    boolean patternInitialized = patternBuilder.initializePattern(name);
    if (patternInitialized) {
        Pattern pattern = patternBuilder.getPattern();
        RegularExpression regularExpr = BooleanExpressionHelper.createRegularExpression(lang, express);
        // $NON-NLS-1$
        regularExpr.setExpressionType("REGEXP");
        pattern.getComponents().add(regularExpr);
        return pattern;
    }
    return null;
}
Also used : Pattern(org.talend.dataquality.domain.pattern.Pattern) RegularExpression(org.talend.dataquality.domain.pattern.RegularExpression) PatternBuilder(org.talend.dq.pattern.PatternBuilder)

Example 5 with RegularExpression

use of org.talend.dataquality.domain.pattern.RegularExpression in project tdq-studio-se by Talend.

the class PatternExplorerRealTest method testGetInvalidValuesStatement_3.

/**
 * Test method for {@link org.talend.dq.analysis.explore.PatternExplorer#getInvalidValuesStatement()}. when the test
 * for indicator is user define indicator
 */
@Test
public void testGetInvalidValuesStatement_3() {
    patternExplorer = new RegexPatternExplorer();
    // mock setEntity
    PatternMatchingIndicator indicator = IndicatorsFactoryImpl.eINSTANCE.createRegexpMatchingIndicator();
    UDIndicatorDefinition indicatorDef = UserdefineFactoryImpl.eINSTANCE.createUDIndicatorDefinition();
    indicator.setIndicatorDefinition(indicatorDef);
    TdExpression udiTdExpression = RelationalFactoryImpl.eINSTANCE.createTdExpression();
    udiTdExpression.setLanguage(SupportDBUrlType.HIVEDEFAULTURL.getLanguage());
    udiTdExpression.setBody(ViewInvalidValues);
    indicatorDef.getViewInvalidValuesExpression().add(udiTdExpression);
    // create pattern parameter
    IndicatorParameters createIndicatorParameters = IndicatorsFactory.eINSTANCE.createIndicatorParameters();
    indicator.setParameters(createIndicatorParameters);
    Domain createDomain = DomainFactoryImpl.eINSTANCE.createDomain();
    createIndicatorParameters.setDataValidDomain(createDomain);
    Pattern createPattern = PatternFactoryImpl.eINSTANCE.createPattern();
    createDomain.getPatterns().add(createPattern);
    RegularExpression createPatternComponent = PatternFactoryImpl.eINSTANCE.createRegularExpression();
    createPattern.getComponents().add(createPatternComponent);
    TdExpression createTdExpression = RelationalFactoryImpl.eINSTANCE.createTdExpression();
    createPatternComponent.setExpression(createTdExpression);
    createTdExpression.setLanguage(SupportDBUrlType.HIVEDEFAULTURL.getLanguage());
    // $NON-NLS-1$
    createTdExpression.setBody("`su.*`");
    TdColumn element = RelationalFactoryImpl.eINSTANCE.createTdColumn();
    // $NON-NLS-1$
    element.setName("lname");
    TdTable createTdTable = RelationalFactoryImpl.eINSTANCE.createTdTable();
    // $NON-NLS-1$
    createTdTable.setName("table1");
    TableHelper.addColumn(createTdTable, element);
    Catalog createCatalog = orgomg.cwm.resource.relational.impl.RelationalFactoryImpl.eINSTANCE.createCatalog();
    // $NON-NLS-1$
    createCatalog.setName("catalog1");
    List<TdTable> tableList = new ArrayList<TdTable>();
    tableList.add(createTdTable);
    CatalogHelper.addTables(tableList, createCatalog);
    indicator.setAnalyzedElement(element);
    Expression createIndiExpression = CoreFactoryImpl.eINSTANCE.createExpression();
    createIndiExpression.setLanguage(SupportDBUrlType.HIVEDEFAULTURL.getLanguage());
    // $NON-NLS-1$
    createIndiExpression.setBody("SELECT *  FROM `tbi`.`customer`  WHERE (customer.lname = \"sunny\")");
    indicator.setInstantiatedExpression(createIndiExpression);
    Analysis analysis = AnalysisFactoryImpl.eINSTANCE.createAnalysis();
    AnalysisContext createAnalysisContext = AnalysisFactoryImpl.eINSTANCE.createAnalysisContext();
    analysis.setContext(createAnalysisContext);
    DatabaseConnection createDatabaseConnection = ConnectionFactoryImpl.eINSTANCE.createDatabaseConnection();
    TaggedValueHelper.setTaggedValue(createDatabaseConnection, TaggedValueHelper.DB_PRODUCT_NAME, SupportDBUrlType.HIVEDEFAULTURL.getLanguage());
    // $NON-NLS-1$
    TaggedValueHelper.setTaggedValue(createDatabaseConnection, TaggedValueHelper.DB_PRODUCT_VERSION, "1.0");
    // $NON-NLS-1$
    TaggedValueHelper.setTaggedValue(createDatabaseConnection, TaggedValueHelper.DB_IDENTIFIER_QUOTE_STRING, "`");
    createAnalysisContext.setConnection(createDatabaseConnection);
    ChartDataEntity cdEntity = new ChartDataEntity();
    cdEntity.setIndicator(indicator);
    Assert.assertTrue(patternExplorer.setAnalysis(analysis));
    patternExplorer.setEnitty(cdEntity);
    String clause = patternExplorer.getInvalidValuesStatement();
    // $NON-NLS-1$
    assertEquals("SELECT `lname` FROM `catalog1`.`table1` WHERE NOT (id>=1) ", clause);
}
Also used : Pattern(org.talend.dataquality.domain.pattern.Pattern) RegularExpression(org.talend.dataquality.domain.pattern.RegularExpression) TdTable(org.talend.cwm.relational.TdTable) UDIndicatorDefinition(org.talend.dataquality.indicators.definition.userdefine.UDIndicatorDefinition) TdExpression(org.talend.cwm.relational.TdExpression) IndicatorParameters(org.talend.dataquality.indicators.IndicatorParameters) ArrayList(java.util.ArrayList) AnalysisContext(org.talend.dataquality.analysis.AnalysisContext) ChartDataEntity(org.talend.dq.indicators.preview.table.ChartDataEntity) Catalog(orgomg.cwm.resource.relational.Catalog) PatternMatchingIndicator(org.talend.dataquality.indicators.PatternMatchingIndicator) TdColumn(org.talend.cwm.relational.TdColumn) TdExpression(org.talend.cwm.relational.TdExpression) RegularExpression(org.talend.dataquality.domain.pattern.RegularExpression) Expression(orgomg.cwm.objectmodel.core.Expression) Analysis(org.talend.dataquality.analysis.Analysis) DatabaseConnection(org.talend.core.model.metadata.builder.connection.DatabaseConnection) Domain(org.talend.dataquality.domain.Domain) Test(org.junit.Test)

Aggregations

RegularExpression (org.talend.dataquality.domain.pattern.RegularExpression)39 Pattern (org.talend.dataquality.domain.pattern.Pattern)26 TdExpression (org.talend.cwm.relational.TdExpression)18 PatternComponent (org.talend.dataquality.domain.pattern.PatternComponent)17 Test (org.junit.Test)12 Domain (org.talend.dataquality.domain.Domain)12 Expression (orgomg.cwm.objectmodel.core.Expression)10 IndicatorParameters (org.talend.dataquality.indicators.IndicatorParameters)9 TdColumn (org.talend.cwm.relational.TdColumn)7 ArrayList (java.util.ArrayList)6 Analysis (org.talend.dataquality.analysis.Analysis)6 DatabaseConnection (org.talend.core.model.metadata.builder.connection.DatabaseConnection)5 TdTable (org.talend.cwm.relational.TdTable)5 AnalysisContext (org.talend.dataquality.analysis.AnalysisContext)5 PatternMatchingIndicator (org.talend.dataquality.indicators.PatternMatchingIndicator)5 ChartDataEntity (org.talend.dq.indicators.preview.table.ChartDataEntity)5 Catalog (orgomg.cwm.resource.relational.Catalog)5 UDIndicatorDefinition (org.talend.dataquality.indicators.definition.userdefine.UDIndicatorDefinition)4 IRepositoryViewObject (org.talend.core.model.repository.IRepositoryViewObject)3 PatternBuilder (org.talend.dq.pattern.PatternBuilder)3