Search in sources :

Example 86 with MetadataColumn

use of org.talend.core.model.metadata.builder.connection.MetadataColumn in project tbd-studio-se by Talend.

the class HDFSSchemaForm method pressRetreiveSchemaButton.

private void pressRetreiveSchemaButton() {
    ConnectionStatus connectionStatus = checkConnection(false);
    if (connectionStatus == null) {
        return;
    }
    if (!connectionStatus.getResult()) {
        tableSettingsInfoLabel.setText(connectionStatus.getMessageException());
    } else {
        boolean doit = true;
        if (tableEditorView.getMetadataEditor().getBeanCount() > 0) {
            doit = // $NON-NLS-1$
            MessageDialog.openConfirm(// $NON-NLS-1$
            getShell(), // $NON-NLS-1$
            Messages.getString("HDFSSchemaForm.title.confirmChange"), // $NON-NLS-1$
            Messages.getString("HDFSSchemaForm.msg.changeSchema"));
        }
        if (doit) {
            List<MetadataColumn> metadataColumns;
            HDFSConnectionBean connectionBean = getConnectionBean();
            try {
                ClassLoader classLoader = HadoopServerUtil.getClassLoader(connectionBean);
                // reconnect the HDFS
                HadoopOperationManager.getInstance().getDFS(connectionBean, classLoader);
                metadataColumns = ExtractHDFSSchemaManager.getInstance().extractColumns(getConnection(), classLoader, metadataTable);
            } catch (Exception e) {
                ExceptionMessageDialog.openError(getShell(), Messages.getString("HDFSSchemaForm.checkSchema.errorDialog.title"), e.getMessage(), // $NON-NLS-1$
                e);
                ExceptionHandler.process(e);
                return;
            }
            tableEditorView.getMetadataEditor().removeAll();
            List<MetadataColumn> metadataColumnsValid = new ArrayList<MetadataColumn>();
            Iterator iterate = metadataColumns.iterator();
            while (iterate.hasNext()) {
                MetadataColumn metadataColumn = (MetadataColumn) iterate.next();
                if (metadataColumn.getTalendType().equals(JavaTypesManager.DATE.getId()) || metadataColumn.getTalendType().equals(PerlTypesManager.DATE)) {
                    if ("".equals(metadataColumn.getPattern())) {
                        // $NON-NLS-1$
                        // $NON-NLS-1$
                        metadataColumn.setPattern(TalendQuoteUtils.addQuotes("dd-MM-yyyy"));
                    }
                }
                String columnLabel = metadataColumn.getLabel();
                // Check the label and add it to the table
                metadataColumn.setLabel(tableEditorView.getMetadataEditor().getNextGeneratedColumnName(columnLabel));
                metadataColumnsValid.add(metadataColumn);
            }
            tableEditorView.getMetadataEditor().addAll(metadataColumnsValid);
        }
    }
    updateRetreiveSchemaButton();
    changeTableNavigatorStatus(checkFieldsValue());
}
Also used : MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) HDFSConnectionBean(org.talend.designer.hdfsbrowse.model.HDFSConnectionBean) ArrayList(java.util.ArrayList) Iterator(java.util.Iterator) ConnectionStatus(org.talend.core.repository.model.connection.ConnectionStatus)

Example 87 with MetadataColumn

use of org.talend.core.model.metadata.builder.connection.MetadataColumn in project tbd-studio-se by Talend.

the class ExtractParquetFileSchemaService method extractColumns.

private List<MetadataColumn> extractColumns(HDFSConnection connection, String filePath) throws Exception {
    List<MetadataColumn> columns = new ArrayList<MetadataColumn>();
    HDFSConnectionBean connectionBean = HDFSModelUtil.convert2HDFSConnectionBean(connection);
    Object fs = HadoopServerUtil.getDFS(connectionBean, classLoader);
    Object conf = HadoopServerUtil.getConfiguration(connectionBean, classLoader);
    // $NON-NLS-1$
    Object pathObj = ReflectionUtils.newInstance("org.apache.hadoop.fs.Path", classLoader, new Object[] { filePath });
    ClassLoader oldClassLoaderLoader = Thread.currentThread().getContextClassLoader();
    try {
        Thread.currentThread().setContextClassLoader(classLoader);
        Object fileReader = ReflectionUtils.invokeStaticMethod("org.apache.parquet.hadoop.ParquetFileReader", classLoader, "open", new Object[] { conf, pathObj });
        Object fileMetadata = ReflectionUtils.invokeMethod(fileReader, "getFileMetaData", new Object[] {});
        Object schema = ReflectionUtils.invokeMethod(fileMetadata, "getSchema", new Object[] {});
        List fields = (List) ReflectionUtils.invokeMethod(schema, "getFields", new Object[] {});
        Class RepetitionEnum = Class.forName("org.apache.parquet.schema.Type$Repetition", true, classLoader);
        for (Object field : fields) {
            String fieldName = (String) ReflectionUtils.invokeMethod(field, "getName", new Object[] {});
            Object repetition = ReflectionUtils.invokeMethod(field, "getRepetition", new Object[] {});
            boolean isNullable = true;
            if (Enum.valueOf(RepetitionEnum, "REQUIRED") == repetition) {
                isNullable = false;
            }
            MetadataColumn metadataColumn = ConnectionFactory.eINSTANCE.createMetadataColumn();
            metadataColumn.setLabel(fieldName);
            metadataColumn.setNullable(isNullable);
            handleFieldMatchedTalendType(field, metadataColumn);
            columns.add(metadataColumn);
        }
    } finally {
        Thread.currentThread().setContextClassLoader(oldClassLoaderLoader);
    }
    return columns;
}
Also used : MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) HDFSConnectionBean(org.talend.designer.hdfsbrowse.model.HDFSConnectionBean) ArrayList(java.util.ArrayList) DynamicClassLoader(org.talend.core.classloader.DynamicClassLoader) ArrayList(java.util.ArrayList) List(java.util.List)

Example 88 with MetadataColumn

use of org.talend.core.model.metadata.builder.connection.MetadataColumn in project tbd-studio-se by Talend.

the class ExtractTextFileSchemaService method guessSchemaFromArray.

public List<MetadataColumn> guessSchemaFromArray(final CsvArray csvArray, boolean isFirstLineCaption, int headerValue) {
    List<MetadataColumn> columns = new ArrayList<MetadataColumn>();
    List<String> exisColumnNames = new ArrayList<String>();
    if (csvArray == null) {
        return columns;
    } else {
        List<String[]> csvRows = csvArray.getRows();
        if (csvRows.isEmpty()) {
            return columns;
        }
        String[] fields = csvRows.get(0);
        int numberOfCol = getNumbersOfColumns(csvRows);
        // define the label to the metadata width the content of the first row
        int firstRowToExtractMetadata = headerValue;
        // the first rows is used to define the label of any metadata
        String[] label = new String[numberOfCol];
        for (int i = 0; i < numberOfCol; i++) {
            label[i] = DEFAULT_COLUMN_LABEL + i;
            if (isFirstLineCaption) {
                if (numberOfCol <= fields.length) {
                    // size
                    if (fields[i] != null && !("").equals(fields[i])) {
                        // $NON-NLS-1$
                        // $NON-NLS-1$ //$NON-NLS-2$
                        label[i] = fields[i].trim();
                        label[i] = MetadataToolHelper.validateColumnName(label[i], i);
                    } else {
                        label[i] = DEFAULT_COLUMN_LABEL + i;
                    }
                } else {
                    // size
                    if (i < fields.length) {
                        if (fields[i] != null && !("").equals(fields[i])) {
                            // $NON-NLS-1$
                            // $NON-NLS-1$ //$NON-NLS-2$
                            label[i] = fields[i].trim().replaceAll(" ", "_");
                        } else {
                            // $NON-NLS-1$
                            label[i] = DEFAULT_COLUMN_LABEL + " " + i;
                        }
                    } else {
                        // $NON-NLS-1$
                        label[i] = DEFAULT_COLUMN_LABEL + " " + i;
                    }
                }
            }
        }
        // fix bug 5694: column names check in FileDelimited wizard fails to
        // rename duplicate column name
        ShadowProcessPreview.fixDuplicateNames(label);
        for (int i = 0; i < numberOfCol; i++) {
            // define the first currentType and assimile it to globalType
            String globalType = null;
            int lengthValue = 0;
            int precisionValue = 0;
            int current = firstRowToExtractMetadata;
            while (globalType == null) {
                // see the feature 6296,qli comment
                if (current == csvRows.size()) {
                    // $NON-NLS-1$
                    globalType = "id_String";
                    continue;
                } else if (i >= csvRows.get(current).length) {
                    // $NON-NLS-1$
                    globalType = "id_String";
                } else {
                    globalType = JavaDataTypeHelper.getTalendTypeOfValue(csvRows.get(current)[i]);
                    current++;
                }
            }
            // for another lines
            for (int f = firstRowToExtractMetadata; f < csvRows.size(); f++) {
                fields = csvRows.get(f);
                if (fields.length > i) {
                    String value = fields[i];
                    if (!value.equals("")) {
                        // $NON-NLS-1$
                        if (!JavaDataTypeHelper.getTalendTypeOfValue(value).equals(globalType)) {
                            globalType = JavaDataTypeHelper.getCommonType(globalType, JavaDataTypeHelper.getTalendTypeOfValue(value));
                        }
                        if (lengthValue < value.length()) {
                            lengthValue = value.length();
                        }
                        int positionDecimal = 0;
                        if (value.indexOf(',') > -1) {
                            positionDecimal = value.lastIndexOf(',');
                            precisionValue = lengthValue - positionDecimal;
                        } else if (value.indexOf('.') > -1) {
                            positionDecimal = value.lastIndexOf('.');
                            precisionValue = lengthValue - positionDecimal;
                        }
                    } else {
                        IPreferenceStore preferenceStore = null;
                        if (GlobalServiceRegister.getDefault().isServiceRegistered(IDesignerCoreUIService.class)) {
                            IDesignerCoreUIService designerCoreUiService = (IDesignerCoreUIService) GlobalServiceRegister.getDefault().getService(IDesignerCoreUIService.class);
                            preferenceStore = designerCoreUiService.getPreferenceStore();
                        }
                        if (preferenceStore != null && preferenceStore.getString(MetadataTypeLengthConstants.VALUE_DEFAULT_TYPE) != null && !preferenceStore.getString(MetadataTypeLengthConstants.VALUE_DEFAULT_TYPE).equals("")) {
                            // $NON-NLS-1$
                            globalType = preferenceStore.getString(MetadataTypeLengthConstants.VALUE_DEFAULT_TYPE);
                            if (preferenceStore.getString(MetadataTypeLengthConstants.VALUE_DEFAULT_LENGTH) != null && !preferenceStore.getString(MetadataTypeLengthConstants.VALUE_DEFAULT_LENGTH).equals("")) {
                                // $NON-NLS-1$
                                lengthValue = Integer.parseInt(preferenceStore.getString(MetadataTypeLengthConstants.VALUE_DEFAULT_LENGTH));
                            }
                        }
                    }
                }
            }
            // see the feature 6296,qli comment
            if (csvRows.size() <= 1 && firstRowToExtractMetadata == 1) {
                lengthValue = 255;
            }
            // define the metadataColumn to field i
            MetadataColumn metadataColumn = ConnectionFactory.eINSTANCE.createMetadataColumn();
            // $NON-NLS-1$
            metadataColumn.setPattern("\"dd-MM-yyyy\"");
            // Convert javaType to TalendType
            String talendType = globalType;
            if (globalType.equals(JavaTypesManager.FLOAT.getId()) || globalType.equals(JavaTypesManager.DOUBLE.getId())) {
                metadataColumn.setPrecision(precisionValue);
            } else {
                metadataColumn.setPrecision(0);
            }
            metadataColumn.setTalendType(talendType);
            metadataColumn.setLength(lengthValue);
            String columnLabel = IndiceHelper.getIndexedLabel(label[i], exisColumnNames);
            metadataColumn.setLabel(columnLabel);
            if (!exisColumnNames.contains(columnLabel)) {
                exisColumnNames.add(columnLabel);
            }
            columns.add(i, metadataColumn);
        }
    }
    return columns;
}
Also used : MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) ArrayList(java.util.ArrayList) IDesignerCoreUIService(org.talend.core.ui.services.IDesignerCoreUIService) IPreferenceStore(org.eclipse.jface.preference.IPreferenceStore)

Example 89 with MetadataColumn

use of org.talend.core.model.metadata.builder.connection.MetadataColumn in project tbd-studio-se by Talend.

the class ExtractTextFileSchemaService method extractColumns.

@Override
public List<MetadataColumn> extractColumns(HDFSConnection connection, MetadataTable metadataTable) throws HadoopServerException, CoreException, IOException {
    List<MetadataColumn> columns = new ArrayList<MetadataColumn>();
    if (connection == null || metadataTable == null) {
        return columns;
    }
    EMap<String, String> additionalProperties = metadataTable.getAdditionalProperties();
    String hdfsPath = additionalProperties.get(HDFSConstants.HDFS_PATH);
    if (StringUtils.isEmpty(hdfsPath)) {
        return columns;
    }
    InputStream inputStream = HadoopOperationManager.getInstance().getFileContent(HDFSModelUtil.convert2HDFSConnectionBean(connection), classLoader, hdfsPath);
    return extractColumns(connection, inputStream, metadataTable.getLabel());
}
Also used : MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList)

Example 90 with MetadataColumn

use of org.talend.core.model.metadata.builder.connection.MetadataColumn in project tbd-studio-se by Talend.

the class ExtractTextFileSchemaService method extractColumns.

private List<MetadataColumn> extractColumns(HDFSConnection connection, InputStream inputStream, String tmpFileName) throws CoreException, IOException {
    List<MetadataColumn> columns = new ArrayList<MetadataColumn>();
    if (connection == null || inputStream == null || tmpFileName == null) {
        return columns;
    }
    File tmpFile = createTmpFile(inputStream, tmpFileName);
    ProcessDescription processDescription = getProcessDescription(connection, tmpFile);
    CsvArray csvArray = ShadowProcessHelper.getCsvArray(processDescription, DEFAULT_SHADOW_TYPE, true);
    return guessSchemaFromArray(csvArray, connection.isFirstLineCaption(), processDescription.getHeaderRow());
}
Also used : ProcessDescription(org.talend.metadata.managment.ui.preview.ProcessDescription) MetadataColumn(org.talend.core.model.metadata.builder.connection.MetadataColumn) CsvArray(org.talend.core.utils.CsvArray) ArrayList(java.util.ArrayList) File(java.io.File) HDFSFile(org.talend.designer.hdfsbrowse.model.HDFSFile)

Aggregations

MetadataColumn (org.talend.core.model.metadata.builder.connection.MetadataColumn)203 ArrayList (java.util.ArrayList)98 MetadataTable (org.talend.core.model.metadata.builder.connection.MetadataTable)51 ModelElement (orgomg.cwm.objectmodel.core.ModelElement)37 TdColumn (org.talend.cwm.relational.TdColumn)28 List (java.util.List)24 HashMap (java.util.HashMap)20 Test (org.junit.Test)15 TaggedValue (orgomg.cwm.objectmodel.core.TaggedValue)14 EList (org.eclipse.emf.common.util.EList)12 Indicator (org.talend.dataquality.indicators.Indicator)12 IRepositoryNode (org.talend.repository.model.IRepositoryNode)12 File (java.io.File)11 IStructuredSelection (org.eclipse.jface.viewers.IStructuredSelection)10 NoSQLExtractSchemaException (org.talend.repository.nosql.exceptions.NoSQLExtractSchemaException)10 Map (java.util.Map)9 DelimitedFileConnection (org.talend.core.model.metadata.builder.connection.DelimitedFileConnection)9 RepositoryNode (org.talend.repository.model.RepositoryNode)9 Iterator (java.util.Iterator)8 IMetadataTable (org.talend.core.model.metadata.IMetadataTable)8