Search in sources :

Example 81 with Analysis

use of org.talend.dataquality.analysis.Analysis in project tdq-studio-se by Talend.

the class MatchAnalysisExecutorTest method testExecute.

/**
 * Test method for
 * {@link org.talend.dq.analysis.MatchAnalysisExecutor#execute(org.talend.dataquality.analysis.Analysis)}.
 */
@SuppressWarnings("nls")
@Test
public void testExecute() {
    MatchAnalysisExecutor matchAnalysisExecutor = new MatchAnalysisExecutor();
    Analysis analysis = AnalysisPackage.eINSTANCE.getAnalysisFactory().createAnalysis();
    AnalysisContext context = AnalysisPackage.eINSTANCE.getAnalysisFactory().createAnalysisContext();
    analysis.setContext(context);
    AnalysisParameters params = AnalysisPackage.eINSTANCE.getAnalysisFactory().createAnalysisParameters();
    analysis.setParameters(params);
    TaggedValueHelper.setTaggedValue(analysis, TaggedValueHelper.PREVIEW_ROW_NUMBER, String.valueOf(100));
    // analysisResult.setAnalysis(analysis);
    context.setConnection(delimitedFileconnection);
    // $NON-NLS-1$
    URL fileUrl = this.getClass().getResource("match_test_data");
    metadataTable = UnitTestBuildHelper.getDefault().initFileConnection(fileUrl, delimitedFileconnection);
    this.name = UnitTestBuildHelper.getDefault().initColumns(context, this.metadataTable);
    // Scenario 1
    // - Match key: name, no block key, levenshtein attribute algorithm. groupQualityThreshold = 0.9d, matchInterval
    // = 0.95d .
    double groupQualityThreshold = 0.9d;
    double matchInterval = 0.95d;
    assertScenario1(matchAnalysisExecutor, analysis, name, "name", groupQualityThreshold, matchInterval);
    // Scenario 2
    // - Same to scenario 1, EXCEPT matchInterval = 0.8d .
    matchInterval = 0.8d;
    assertScenario2(matchAnalysisExecutor, analysis, name, "name", groupQualityThreshold, matchInterval);
    // Scenario 3
    // - Same to scenario 2, EXCEPT groupQualityThreshold = 0.95d.
    groupQualityThreshold = 0.95d;
    assertScenario3(matchAnalysisExecutor, analysis, name, "name", groupQualityThreshold, matchInterval);
    // Scenario 4
    // - Same to scenario 3, EXCEPT a new blocking key = country.
    assertScenario4(matchAnalysisExecutor, analysis, name, "name", groupQualityThreshold, matchInterval);
}
Also used : Analysis(org.talend.dataquality.analysis.Analysis) AnalysisParameters(org.talend.dataquality.analysis.AnalysisParameters) AnalysisContext(org.talend.dataquality.analysis.AnalysisContext) URL(java.net.URL) Test(org.junit.Test)

Example 82 with Analysis

use of org.talend.dataquality.analysis.Analysis in project tdq-studio-se by Talend.

the class TdqAnalysisConnectionPoolTest method setUp.

/**
 * DOC yyin Comment method "setUp".
 *
 * @throws java.lang.Exception
 */
@Before
public void setUp() throws Exception {
    analysis = mock(Analysis.class);
    AnalysisContext context = mock(AnalysisContext.class);
    AnalysisResult result = mock(AnalysisResult.class);
    dataManager = mock(org.talend.core.model.metadata.builder.connection.Connection.class);
    ExecutionInformations resultMetadata = mock(ExecutionInformations.class);
    PowerMockito.mockStatic(AnalysisHandler.class);
    AnalysisHandler mockHandler = mock(AnalysisHandler.class);
    when(AnalysisHandler.createHandler(analysis)).thenReturn(mockHandler);
    when(mockHandler.getNumberOfConnectionsPerAnalysis()).thenReturn(5);
    when(context.getConnection()).thenReturn(dataManager);
    when(analysis.getContext()).thenReturn(context);
    when(analysis.getResults()).thenReturn(result);
    when(result.getResultMetadata()).thenReturn(resultMetadata);
    connPool = new TdqAnalysisConnectionPool(analysis, 5);
}
Also used : ExecutionInformations(org.talend.dataquality.analysis.ExecutionInformations) AnalysisHandler(org.talend.dq.analysis.AnalysisHandler) Analysis(org.talend.dataquality.analysis.Analysis) Connection(java.sql.Connection) AnalysisContext(org.talend.dataquality.analysis.AnalysisContext) AnalysisResult(org.talend.dataquality.analysis.AnalysisResult) Before(org.junit.Before)

Example 83 with Analysis

use of org.talend.dataquality.analysis.Analysis in project tdq-studio-se by Talend.

the class PatternExplorerRealTest method setUp.

/**
 * DOC zshen Comment method "setUp".
 *
 * @throws java.lang.Exception
 */
@Before
public void setUp() throws Exception {
    patternExplorer = new RegexPatternExplorer();
    // mock setEntity
    PatternMatchingIndicator indicator = IndicatorsFactoryImpl.eINSTANCE.createRegexpMatchingIndicator();
    // create pattern parameter
    IndicatorParameters createIndicatorParameters = IndicatorsFactory.eINSTANCE.createIndicatorParameters();
    indicator.setParameters(createIndicatorParameters);
    Domain createDomain = DomainFactoryImpl.eINSTANCE.createDomain();
    createIndicatorParameters.setDataValidDomain(createDomain);
    Pattern createPattern = PatternFactoryImpl.eINSTANCE.createPattern();
    createDomain.getPatterns().add(createPattern);
    RegularExpression createPatternComponent = PatternFactoryImpl.eINSTANCE.createRegularExpression();
    createPattern.getComponents().add(createPatternComponent);
    TdExpression createTdExpression = RelationalFactoryImpl.eINSTANCE.createTdExpression();
    createPatternComponent.setExpression(createTdExpression);
    createTdExpression.setLanguage(SupportDBUrlType.HIVEDEFAULTURL.getLanguage());
    // $NON-NLS-1$
    createTdExpression.setBody("`su.*`");
    ModelElement element = RelationalFactoryImpl.eINSTANCE.createTdColumn();
    // $NON-NLS-1$
    element.setName("lname");
    indicator.setAnalyzedElement(element);
    Expression createIndiExpression = CoreFactoryImpl.eINSTANCE.createExpression();
    createIndiExpression.setLanguage(SupportDBUrlType.HIVEDEFAULTURL.getLanguage());
    // $NON-NLS-1$
    createIndiExpression.setBody("SELECT *  FROM `tbi`.`customer`  WHERE (customer.lname = \"sunny\")");
    indicator.setInstantiatedExpression(createIndiExpression);
    Analysis analysis = AnalysisFactoryImpl.eINSTANCE.createAnalysis();
    AnalysisContext createAnalysisContext = AnalysisFactoryImpl.eINSTANCE.createAnalysisContext();
    analysis.setContext(createAnalysisContext);
    DatabaseConnection createDatabaseConnection = ConnectionFactoryImpl.eINSTANCE.createDatabaseConnection();
    TaggedValueHelper.setTaggedValue(createDatabaseConnection, TaggedValueHelper.DB_PRODUCT_NAME, SupportDBUrlType.HIVEDEFAULTURL.getLanguage());
    // $NON-NLS-1$
    TaggedValueHelper.setTaggedValue(createDatabaseConnection, TaggedValueHelper.DB_PRODUCT_VERSION, "1.0");
    // $NON-NLS-1$
    TaggedValueHelper.setTaggedValue(createDatabaseConnection, TaggedValueHelper.DB_IDENTIFIER_QUOTE_STRING, "`");
    createAnalysisContext.setConnection(createDatabaseConnection);
    patternExplorer.setAnalysis(analysis);
    ChartDataEntity cdEntity = new ChartDataEntity();
    cdEntity.setIndicator(indicator);
    patternExplorer.setEnitty(cdEntity);
}
Also used : Pattern(org.talend.dataquality.domain.pattern.Pattern) RegularExpression(org.talend.dataquality.domain.pattern.RegularExpression) TdExpression(org.talend.cwm.relational.TdExpression) IndicatorParameters(org.talend.dataquality.indicators.IndicatorParameters) AnalysisContext(org.talend.dataquality.analysis.AnalysisContext) ChartDataEntity(org.talend.dq.indicators.preview.table.ChartDataEntity) ModelElement(orgomg.cwm.objectmodel.core.ModelElement) PatternMatchingIndicator(org.talend.dataquality.indicators.PatternMatchingIndicator) TdExpression(org.talend.cwm.relational.TdExpression) RegularExpression(org.talend.dataquality.domain.pattern.RegularExpression) Expression(orgomg.cwm.objectmodel.core.Expression) Analysis(org.talend.dataquality.analysis.Analysis) DatabaseConnection(org.talend.core.model.metadata.builder.connection.DatabaseConnection) Domain(org.talend.dataquality.domain.Domain) Before(org.junit.Before)

Example 84 with Analysis

use of org.talend.dataquality.analysis.Analysis in project tdq-studio-se by Talend.

the class PatternExplorerRealTest method testGetValidValuesStatement_2.

/**
 * Test method for {@link org.talend.dq.analysis.explore.PatternExplorer#getValidValuesStatement()}. when the test
 * for indicator is user define indicator
 */
@Test
public void testGetValidValuesStatement_2() {
    patternExplorer = new RegexPatternExplorer();
    // mock setEntity
    PatternMatchingIndicator indicator = IndicatorsFactoryImpl.eINSTANCE.createRegexpMatchingIndicator();
    UDIndicatorDefinition indicatorDef = UserdefineFactoryImpl.eINSTANCE.createUDIndicatorDefinition();
    indicator.setIndicatorDefinition(indicatorDef);
    TdExpression udiTdExpression = RelationalFactoryImpl.eINSTANCE.createTdExpression();
    udiTdExpression.setLanguage(SupportDBUrlType.HIVEDEFAULTURL.getLanguage());
    udiTdExpression.setBody(ViewValidValues);
    indicatorDef.getViewValidValuesExpression().add(udiTdExpression);
    // create pattern parameter
    IndicatorParameters createIndicatorParameters = IndicatorsFactory.eINSTANCE.createIndicatorParameters();
    indicator.setParameters(createIndicatorParameters);
    Domain createDomain = DomainFactoryImpl.eINSTANCE.createDomain();
    createIndicatorParameters.setDataValidDomain(createDomain);
    Pattern createPattern = PatternFactoryImpl.eINSTANCE.createPattern();
    createDomain.getPatterns().add(createPattern);
    RegularExpression createPatternComponent = PatternFactoryImpl.eINSTANCE.createRegularExpression();
    createPattern.getComponents().add(createPatternComponent);
    TdExpression createTdExpression = RelationalFactoryImpl.eINSTANCE.createTdExpression();
    createPatternComponent.setExpression(createTdExpression);
    createTdExpression.setLanguage(SupportDBUrlType.HIVEDEFAULTURL.getLanguage());
    // $NON-NLS-1$
    createTdExpression.setBody("`su.*`");
    TdColumn element = RelationalFactoryImpl.eINSTANCE.createTdColumn();
    // $NON-NLS-1$
    element.setName("lname");
    TdTable createTdTable = RelationalFactoryImpl.eINSTANCE.createTdTable();
    // $NON-NLS-1$
    createTdTable.setName("table1");
    TableHelper.addColumn(createTdTable, element);
    Catalog createCatalog = orgomg.cwm.resource.relational.impl.RelationalFactoryImpl.eINSTANCE.createCatalog();
    // $NON-NLS-1$
    createCatalog.setName("catalog1");
    List<TdTable> tableList = new ArrayList<TdTable>();
    tableList.add(createTdTable);
    CatalogHelper.addTables(tableList, createCatalog);
    indicator.setAnalyzedElement(element);
    Expression createIndiExpression = CoreFactoryImpl.eINSTANCE.createExpression();
    createIndiExpression.setLanguage(SupportDBUrlType.HIVEDEFAULTURL.getLanguage());
    // $NON-NLS-1$
    createIndiExpression.setBody("SELECT *  FROM `tbi`.`customer`  WHERE (customer.lname = \"sunny\")");
    indicator.setInstantiatedExpression(createIndiExpression);
    Analysis analysis = AnalysisFactoryImpl.eINSTANCE.createAnalysis();
    AnalysisContext createAnalysisContext = AnalysisFactoryImpl.eINSTANCE.createAnalysisContext();
    analysis.setContext(createAnalysisContext);
    DatabaseConnection createDatabaseConnection = ConnectionFactoryImpl.eINSTANCE.createDatabaseConnection();
    TaggedValueHelper.setTaggedValue(createDatabaseConnection, TaggedValueHelper.DB_PRODUCT_NAME, SupportDBUrlType.HIVEDEFAULTURL.getLanguage());
    // $NON-NLS-1$
    TaggedValueHelper.setTaggedValue(createDatabaseConnection, TaggedValueHelper.DB_PRODUCT_VERSION, "1.0");
    // $NON-NLS-1$
    TaggedValueHelper.setTaggedValue(createDatabaseConnection, TaggedValueHelper.DB_IDENTIFIER_QUOTE_STRING, "`");
    createAnalysisContext.setConnection(createDatabaseConnection);
    patternExplorer.setAnalysis(analysis);
    ChartDataEntity cdEntity = new ChartDataEntity();
    cdEntity.setIndicator(indicator);
    patternExplorer.setEnitty(cdEntity);
    Assert.assertTrue(patternExplorer.setAnalysis(analysis));
    String clause = patternExplorer.getValidValuesStatement();
    // $NON-NLS-1$
    assertEquals("SELECT `lname` FROM `catalog1`.`table1` ", clause);
}
Also used : Pattern(org.talend.dataquality.domain.pattern.Pattern) RegularExpression(org.talend.dataquality.domain.pattern.RegularExpression) TdTable(org.talend.cwm.relational.TdTable) UDIndicatorDefinition(org.talend.dataquality.indicators.definition.userdefine.UDIndicatorDefinition) TdExpression(org.talend.cwm.relational.TdExpression) IndicatorParameters(org.talend.dataquality.indicators.IndicatorParameters) ArrayList(java.util.ArrayList) AnalysisContext(org.talend.dataquality.analysis.AnalysisContext) ChartDataEntity(org.talend.dq.indicators.preview.table.ChartDataEntity) Catalog(orgomg.cwm.resource.relational.Catalog) PatternMatchingIndicator(org.talend.dataquality.indicators.PatternMatchingIndicator) TdColumn(org.talend.cwm.relational.TdColumn) TdExpression(org.talend.cwm.relational.TdExpression) RegularExpression(org.talend.dataquality.domain.pattern.RegularExpression) Expression(orgomg.cwm.objectmodel.core.Expression) Analysis(org.talend.dataquality.analysis.Analysis) DatabaseConnection(org.talend.core.model.metadata.builder.connection.DatabaseConnection) Domain(org.talend.dataquality.domain.Domain) Test(org.junit.Test)

Example 85 with Analysis

use of org.talend.dataquality.analysis.Analysis in project tdq-studio-se by Talend.

the class PatternExplorerRealTest method testGetInvalidRowsStatement_4.

/**
 * Test method for {@link org.talend.dq.analysis.explore.PatternExplorer#getInvalidRowsStatement()}. when the test
 * for indicator is user define indicator
 */
@Test
public void testGetInvalidRowsStatement_4() {
    patternExplorer = new RegexPatternExplorer();
    // mock setEntity
    PatternMatchingIndicator indicator = IndicatorsFactoryImpl.eINSTANCE.createRegexpMatchingIndicator();
    UDIndicatorDefinition indicatorDef = UserdefineFactoryImpl.eINSTANCE.createUDIndicatorDefinition();
    indicator.setIndicatorDefinition(indicatorDef);
    TdExpression udiTdExpression = RelationalFactoryImpl.eINSTANCE.createTdExpression();
    udiTdExpression.setLanguage(SupportDBUrlType.HIVEDEFAULTURL.getLanguage());
    udiTdExpression.setBody(ViewInvalidRows);
    indicatorDef.getViewInvalidRowsExpression().add(udiTdExpression);
    // create pattern parameter
    IndicatorParameters createIndicatorParameters = IndicatorsFactory.eINSTANCE.createIndicatorParameters();
    indicator.setParameters(createIndicatorParameters);
    Domain createDomain = DomainFactoryImpl.eINSTANCE.createDomain();
    createIndicatorParameters.setDataValidDomain(createDomain);
    Pattern createPattern = PatternFactoryImpl.eINSTANCE.createPattern();
    createDomain.getPatterns().add(createPattern);
    RegularExpression createPatternComponent = PatternFactoryImpl.eINSTANCE.createRegularExpression();
    createPattern.getComponents().add(createPatternComponent);
    TdExpression createTdExpression = RelationalFactoryImpl.eINSTANCE.createTdExpression();
    createPatternComponent.setExpression(createTdExpression);
    createTdExpression.setLanguage(SupportDBUrlType.HIVEDEFAULTURL.getLanguage());
    // $NON-NLS-1$
    createTdExpression.setBody("`su.*`");
    TdColumn element = RelationalFactoryImpl.eINSTANCE.createTdColumn();
    // $NON-NLS-1$
    element.setName("lname");
    TdTable createTdTable = RelationalFactoryImpl.eINSTANCE.createTdTable();
    // $NON-NLS-1$
    createTdTable.setName("table1");
    TableHelper.addColumn(createTdTable, element);
    Catalog createCatalog = orgomg.cwm.resource.relational.impl.RelationalFactoryImpl.eINSTANCE.createCatalog();
    // $NON-NLS-1$
    createCatalog.setName("catalog1");
    List<TdTable> tableList = new ArrayList<TdTable>();
    tableList.add(createTdTable);
    CatalogHelper.addTables(tableList, createCatalog);
    indicator.setAnalyzedElement(element);
    Expression createIndiExpression = CoreFactoryImpl.eINSTANCE.createExpression();
    createIndiExpression.setLanguage(SupportDBUrlType.HIVEDEFAULTURL.getLanguage());
    // $NON-NLS-1$
    createIndiExpression.setBody("SELECT *  FROM `tbi`.`customer`  WHERE (customer.lname = \"sunny\")");
    indicator.setInstantiatedExpression(createIndiExpression);
    Analysis analysis = AnalysisFactoryImpl.eINSTANCE.createAnalysis();
    AnalysisContext createAnalysisContext = AnalysisFactoryImpl.eINSTANCE.createAnalysisContext();
    analysis.setContext(createAnalysisContext);
    DatabaseConnection createDatabaseConnection = ConnectionFactoryImpl.eINSTANCE.createDatabaseConnection();
    TaggedValueHelper.setTaggedValue(createDatabaseConnection, TaggedValueHelper.DB_PRODUCT_NAME, SupportDBUrlType.HIVEDEFAULTURL.getLanguage());
    // $NON-NLS-1$
    TaggedValueHelper.setTaggedValue(createDatabaseConnection, TaggedValueHelper.DB_PRODUCT_VERSION, "1.0");
    // $NON-NLS-1$
    TaggedValueHelper.setTaggedValue(createDatabaseConnection, TaggedValueHelper.DB_IDENTIFIER_QUOTE_STRING, "`");
    createAnalysisContext.setConnection(createDatabaseConnection);
    ChartDataEntity cdEntity = new ChartDataEntity();
    cdEntity.setIndicator(indicator);
    Assert.assertTrue(patternExplorer.setAnalysis(analysis));
    patternExplorer.setEnitty(cdEntity);
    String clause = patternExplorer.getInvalidRowsStatement();
    // $NON-NLS-1$
    assertEquals("SELECT * FROM `catalog1`.`table1` WHERE NOT (id>=1) ", clause);
}
Also used : Pattern(org.talend.dataquality.domain.pattern.Pattern) RegularExpression(org.talend.dataquality.domain.pattern.RegularExpression) TdTable(org.talend.cwm.relational.TdTable) UDIndicatorDefinition(org.talend.dataquality.indicators.definition.userdefine.UDIndicatorDefinition) TdExpression(org.talend.cwm.relational.TdExpression) IndicatorParameters(org.talend.dataquality.indicators.IndicatorParameters) ArrayList(java.util.ArrayList) AnalysisContext(org.talend.dataquality.analysis.AnalysisContext) ChartDataEntity(org.talend.dq.indicators.preview.table.ChartDataEntity) Catalog(orgomg.cwm.resource.relational.Catalog) PatternMatchingIndicator(org.talend.dataquality.indicators.PatternMatchingIndicator) TdColumn(org.talend.cwm.relational.TdColumn) TdExpression(org.talend.cwm.relational.TdExpression) RegularExpression(org.talend.dataquality.domain.pattern.RegularExpression) Expression(orgomg.cwm.objectmodel.core.Expression) Analysis(org.talend.dataquality.analysis.Analysis) DatabaseConnection(org.talend.core.model.metadata.builder.connection.DatabaseConnection) Domain(org.talend.dataquality.domain.Domain) Test(org.junit.Test)

Aggregations

Analysis (org.talend.dataquality.analysis.Analysis)137 Test (org.junit.Test)36 ModelElement (orgomg.cwm.objectmodel.core.ModelElement)36 AnalysisContext (org.talend.dataquality.analysis.AnalysisContext)30 ArrayList (java.util.ArrayList)28 Property (org.talend.core.model.properties.Property)28 Indicator (org.talend.dataquality.indicators.Indicator)27 TDQAnalysisItem (org.talend.dataquality.properties.TDQAnalysisItem)27 AnalysisResult (org.talend.dataquality.analysis.AnalysisResult)26 PersistenceException (org.talend.commons.exception.PersistenceException)19 Connection (org.talend.core.model.metadata.builder.connection.Connection)18 ReturnCode (org.talend.utils.sugars.ReturnCode)18 Dependency (orgomg.cwm.objectmodel.core.Dependency)18 TdColumn (org.talend.cwm.relational.TdColumn)16 IndicatorDefinition (org.talend.dataquality.indicators.definition.IndicatorDefinition)16 DatabaseConnection (org.talend.core.model.metadata.builder.connection.DatabaseConnection)15 Pattern (org.talend.dataquality.domain.pattern.Pattern)15 IFile (org.eclipse.core.resources.IFile)14 File (java.io.File)12 IPath (org.eclipse.core.runtime.IPath)12