use of org.talend.dataquality.domain.pattern.RegularExpression in project tdq-studio-se by Talend.
the class CreatePatternWizard method initCWMResourceBuilder.
public ModelElement initCWMResourceBuilder() {
patternBuilder = new PatternBuilder();
boolean patternInitialized = patternBuilder.initializePattern(parameter.getName());
if (patternInitialized) {
Pattern pattern = patternBuilder.getPattern();
String lang = PatternLanguageType.findLanguageByName(parameter.getLanguage());
String express = parameter.getExpression();
RegularExpression regularExpr = BooleanExpressionHelper.createRegularExpression(lang, express);
regularExpr.setExpressionType(type.getLiteral());
pattern.getComponents().add(regularExpr);
return pattern;
}
return null;
}
use of org.talend.dataquality.domain.pattern.RegularExpression in project tdq-studio-se by Talend.
the class ImportFactory method createAndStorePattern.
private static TypedReturnCode<Object> createAndStorePattern(PatternParameters parameters, IFolder selectionFolder, ExpressionType type) throws TalendInternalPersistenceException {
Pattern pattern = createPattern(parameters.name, parameters.auther, parameters.description, parameters.purpose, parameters.status);
for (String key : parameters.regex.keySet()) {
RegularExpression regularExpr = BooleanExpressionHelper.createRegularExpression(key, parameters.regex.get(key), type);
pattern.getComponents().add(regularExpr);
}
boolean validStatus = PatternUtilities.isPatternValid(pattern);
TaggedValueHelper.setValidStatus(validStatus, pattern);
try {
String relativePath = parameters.relativePath;
if (EResourceConstant.PATTERN_REGEX.getName().equals(relativePath) || EResourceConstant.PATTERN_SQL.getName().equals(relativePath)) {
relativePath = PluginConstant.EMPTY_STRING;
}
// $NON-NLS-1$
String[] folderNames = relativePath.split("/");
for (String folderName : folderNames) {
IFolder folder = selectionFolder.getFolder(folderName);
if (!folder.exists()) {
folder.create(false, true, null);
}
selectionFolder = folder;
}
} catch (CoreException e) {
log.error(e, e);
}
return ElementWriterFactory.getInstance().createPatternWriter().create(pattern, selectionFolder);
}
use of org.talend.dataquality.domain.pattern.RegularExpression in project tdq-studio-se by Talend.
the class CreatePatternsMigratorTask method doExecute.
@Override
protected boolean doExecute() throws Exception {
parameter = new PatternParameter();
// FR Insee Code
IFolder folder = ResourceManager.getPatternRegexFolder().getFolder(PATH_ADDRESS);
if (folder.exists()) {
if (!folder.getFile("FR_Insee_Code_0.1.pattern").exists()) {
// $NON-NLS-1$ //$NON-NLS-2$
Pattern pattern = newPattern("FR Insee Code", SQLLanguage, "'^(F-|FRA?(-| ))?((2[A|B])|[0-9]{2})[0-9]{3}$'");
if (pattern != null) {
setTagValue(// $NON-NLS-1$
pattern, // $NON-NLS-1$
" FRA-2A235 | F-2B128 | FRA 2B356", // $NON-NLS-1$
"French Insee code of cities with Corsica and colonies");
ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
}
}
}
// SEDOL
folder = ResourceManager.getPatternRegexFolder().getFolder(PATH_CUSTOMER);
if (folder.exists()) {
if (!folder.getFile("SEDOL_0.1.pattern").exists()) {
// $NON-NLS-1$ //$NON-NLS-2$
Pattern pattern = newPattern("SEDOL", SQLLanguage, "'^([B-Db-dF-Hf-hJ-Nj-nP-Tp-tV-Xv-xYyZz0-9]{6}[0-9])$'");
if (pattern != null) {
// $NON-NLS-1$ //$NON-NLS-2$
setTagValue(pattern, "B01HL06 | 4155586", "Stock Exchange Daily Official List ");
ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
}
}
}
// IPV6 MAC Address
folder = ResourceManager.getPatternRegexFolder().getFolder(PATH_INTERNET);
if (folder.exists()) {
if (!folder.getFile("IPv6_Address_0.1.pattern").exists()) {
Pattern pattern = newPattern(// $NON-NLS-1$
"IPv6 Address", SQLLanguage, // $NON-NLS-1$
"'^((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b)\\.){3}(\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b))|(([0-9A-Fa-f]{1,4}:){0,5}:((\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b)\\.){3}(\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b))|(::([0-9A-Fa-f]{1,4}:){0,5}((\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b)\\.){3}(\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))$'");
if (pattern != null) {
// $NON-NLS-1$ //$NON-NLS-2$
setTagValue(pattern, "Check if it is a IPv6 address", "IPv6 address");
ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
}
}
if (!folder.getFile("MAC_Address_0.1.pattern").exists()) {
Pattern pattern = newPattern("MAC Address", SQLLanguage, // $NON-NLS-1$ //$NON-NLS-2$
"'^([0-9a-fA-F][0-9a-fA-F]:){5}([0-9a-fA-F][0-9a-fA-F])$'");
if (pattern != null) {
// $NON-NLS-1$ //$NON-NLS-2$
setTagValue(pattern, "A4:4E:31:B9:C5:B4", "Match MAC Address");
ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
}
}
}
// GPS Coordinate ,ISBN 13,UK SSN
folder = ResourceManager.getPatternRegexFolder().getFolder(PATH_NUMBER);
if (folder.exists()) {
if (!folder.getFile("GPS_Coordinate_0.1.pattern").exists()) {
Pattern pattern = newPattern(// $NON-NLS-1$
"GPS Coordinate", // $NON-NLS-1$
SQLLanguage, // $NON-NLS-1$
"'^([0-9]{1,3}[\\.][0-9]*)[, ]+-?([0-9]{1,3}[\\.][0-9]*)$'");
if (pattern != null) {
// $NON-NLS-1$ //$NON-NLS-2$
setTagValue(pattern, "40.7127837,-74.00594130000002", "Google Maps style GPS Decimal format");
ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
}
}
if (!folder.getFile("UK_SSN_0.1.pattern").exists()) {
Pattern pattern = newPattern(// $NON-NLS-1$
"UK SSN", // $NON-NLS-1$
SQLLanguage, // $NON-NLS-1$
"'^[A-CEGHJ-PR-TW-Z]{1}[A-CEGHJ-NPR-TW-Z]{1}([0-9]{6}|( [0-9]{2}){3} )[A-DFM]{0,1}$'");
if (pattern != null) {
setTagValue(// $NON-NLS-1$
pattern, // $NON-NLS-1$
"AB123456C | AB 12 34 56 C", // $NON-NLS-1$
"National identification number, national identity number, or national insurance number generally called an NI Number (NINO)");
ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
}
}
}
// EN Amount Money ,FR Amount Money
folder = ResourceManager.getPatternRegexFolder().getFolder(PATH_MONEY);
if (!folder.exists()) {
folder.create(true, true, null);
}
if (folder.exists()) {
if (!folder.getFile("EN_Amount_Money_0.1.pattern").exists()) {
Pattern pattern = newPattern(// $NON-NLS-1$
"EN Amount Money", // $NON-NLS-1$
SQLLanguage, // $NON-NLS-1$
"'^((US|CA)?\\$|\\£|\\€|\\¥)(([1-9][0-9]{0,2}(\\,[0-9]{3})*)|([1-9][0-9]*)|(0))(\\.[0-9]{2}|k|M|G|T)?$'");
if (pattern != null) {
RegularExpression regularExpr = BooleanExpressionHelper.createRegularExpression(// $NON-NLS-1$
"MySQL", // $NON-NLS-1$
"'^((US|CA)?\\\\$|\\£|\\€|\\¥)(([1-9][0-9]{0,2}(\\,[0-9]{3})*)|([1-9][0-9]*)|(0))(\\.[0-9]{2}|k|M|G|T)?$'");
// $NON-NLS-1$
regularExpr.setExpressionType("REGEXP");
pattern.getComponents().add(regularExpr);
// $NON-NLS-1$ //$NON-NLS-2$
setTagValue(pattern, "$3,000 || CA$3000", "Amount of money in English format");
ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
}
}
if (!folder.getFile("FR_Amount_Money_0.1.pattern").exists()) {
Pattern pattern = newPattern(// $NON-NLS-1$
"FR Amount Money", // $NON-NLS-1$
SQLLanguage, // $NON-NLS-1$
"'^(([1-9][0-9]{0,2}( [0-9]{3})*)|([1-9][0-9]*)|0)((,[0-9]{2} | (k|M|G|T))?| )(\\$( (US|CA))?|\\£|\\€|\\¥)$'");
if (pattern != null) {
// $NON-NLS-1$ //$NON-NLS-2$
setTagValue(pattern, "3000 € | 35 k€ | 35 054 T€", "Amount of money in French format");
ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
}
}
}
// EN_Month_Abbrev,EN_Month
folder = ResourceManager.getPatternRegexFolder().getFolder(PATH_DATE);
if (folder.exists()) {
if (!folder.getFile("EN_Month_Abbrev_0.1.pattern").exists()) {
Pattern pattern = newPattern("EN_Month_Abbrev", SQLLanguage, // $NON-NLS-1$ //$NON-NLS-2$
"'^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)$'");
if (pattern != null) {
// $NON-NLS-1$ //$NON-NLS-2$
setTagValue(pattern, "Jan | Feb ", "Month English abbreviation");
ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
}
}
if (!folder.getFile("EN_Month_0.1.pattern").exists()) {
Pattern pattern = newPattern(// $NON-NLS-1$
"EN_Month", // $NON-NLS-1$
SQLLanguage, // $NON-NLS-1$
"'^(January|June|July|February|March|May|April|August|September|October|November|December)$'");
if (pattern != null) {
// $NON-NLS-1$ //$NON-NLS-2$
setTagValue(pattern, "January | February ", "Month in English");
ElementWriterFactory.getInstance().createPatternWriter().create(pattern, folder);
}
}
}
return true;
}
use of org.talend.dataquality.domain.pattern.RegularExpression in project tdq-studio-se by Talend.
the class CreatePatternsMigratorTask method newPattern.
private Pattern newPattern(String name, String lang, String express) {
PatternBuilder patternBuilder = new PatternBuilder();
boolean patternInitialized = patternBuilder.initializePattern(name);
if (patternInitialized) {
Pattern pattern = patternBuilder.getPattern();
RegularExpression regularExpr = BooleanExpressionHelper.createRegularExpression(lang, express);
// $NON-NLS-1$
regularExpr.setExpressionType("REGEXP");
pattern.getComponents().add(regularExpr);
return pattern;
}
return null;
}
use of org.talend.dataquality.domain.pattern.RegularExpression in project tdq-studio-se by Talend.
the class PatternExplorerRealTest method testGetInvalidValuesStatement_3.
/**
* Test method for {@link org.talend.dq.analysis.explore.PatternExplorer#getInvalidValuesStatement()}. when the test
* for indicator is user define indicator
*/
@Test
public void testGetInvalidValuesStatement_3() {
patternExplorer = new RegexPatternExplorer();
// mock setEntity
PatternMatchingIndicator indicator = IndicatorsFactoryImpl.eINSTANCE.createRegexpMatchingIndicator();
UDIndicatorDefinition indicatorDef = UserdefineFactoryImpl.eINSTANCE.createUDIndicatorDefinition();
indicator.setIndicatorDefinition(indicatorDef);
TdExpression udiTdExpression = RelationalFactoryImpl.eINSTANCE.createTdExpression();
udiTdExpression.setLanguage(SupportDBUrlType.HIVEDEFAULTURL.getLanguage());
udiTdExpression.setBody(ViewInvalidValues);
indicatorDef.getViewInvalidValuesExpression().add(udiTdExpression);
// create pattern parameter
IndicatorParameters createIndicatorParameters = IndicatorsFactory.eINSTANCE.createIndicatorParameters();
indicator.setParameters(createIndicatorParameters);
Domain createDomain = DomainFactoryImpl.eINSTANCE.createDomain();
createIndicatorParameters.setDataValidDomain(createDomain);
Pattern createPattern = PatternFactoryImpl.eINSTANCE.createPattern();
createDomain.getPatterns().add(createPattern);
RegularExpression createPatternComponent = PatternFactoryImpl.eINSTANCE.createRegularExpression();
createPattern.getComponents().add(createPatternComponent);
TdExpression createTdExpression = RelationalFactoryImpl.eINSTANCE.createTdExpression();
createPatternComponent.setExpression(createTdExpression);
createTdExpression.setLanguage(SupportDBUrlType.HIVEDEFAULTURL.getLanguage());
// $NON-NLS-1$
createTdExpression.setBody("`su.*`");
TdColumn element = RelationalFactoryImpl.eINSTANCE.createTdColumn();
// $NON-NLS-1$
element.setName("lname");
TdTable createTdTable = RelationalFactoryImpl.eINSTANCE.createTdTable();
// $NON-NLS-1$
createTdTable.setName("table1");
TableHelper.addColumn(createTdTable, element);
Catalog createCatalog = orgomg.cwm.resource.relational.impl.RelationalFactoryImpl.eINSTANCE.createCatalog();
// $NON-NLS-1$
createCatalog.setName("catalog1");
List<TdTable> tableList = new ArrayList<TdTable>();
tableList.add(createTdTable);
CatalogHelper.addTables(tableList, createCatalog);
indicator.setAnalyzedElement(element);
Expression createIndiExpression = CoreFactoryImpl.eINSTANCE.createExpression();
createIndiExpression.setLanguage(SupportDBUrlType.HIVEDEFAULTURL.getLanguage());
// $NON-NLS-1$
createIndiExpression.setBody("SELECT * FROM `tbi`.`customer` WHERE (customer.lname = \"sunny\")");
indicator.setInstantiatedExpression(createIndiExpression);
Analysis analysis = AnalysisFactoryImpl.eINSTANCE.createAnalysis();
AnalysisContext createAnalysisContext = AnalysisFactoryImpl.eINSTANCE.createAnalysisContext();
analysis.setContext(createAnalysisContext);
DatabaseConnection createDatabaseConnection = ConnectionFactoryImpl.eINSTANCE.createDatabaseConnection();
TaggedValueHelper.setTaggedValue(createDatabaseConnection, TaggedValueHelper.DB_PRODUCT_NAME, SupportDBUrlType.HIVEDEFAULTURL.getLanguage());
// $NON-NLS-1$
TaggedValueHelper.setTaggedValue(createDatabaseConnection, TaggedValueHelper.DB_PRODUCT_VERSION, "1.0");
// $NON-NLS-1$
TaggedValueHelper.setTaggedValue(createDatabaseConnection, TaggedValueHelper.DB_IDENTIFIER_QUOTE_STRING, "`");
createAnalysisContext.setConnection(createDatabaseConnection);
ChartDataEntity cdEntity = new ChartDataEntity();
cdEntity.setIndicator(indicator);
Assert.assertTrue(patternExplorer.setAnalysis(analysis));
patternExplorer.setEnitty(cdEntity);
String clause = patternExplorer.getInvalidValuesStatement();
// $NON-NLS-1$
assertEquals("SELECT `lname` FROM `catalog1`.`table1` WHERE NOT (id>=1) ", clause);
}
Aggregations