Search in sources :

Example 11 with HarvestedRecordUniqueId

use of cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId in project RecordManager2 by moravianlibrary.

the class MiscellaneousJobsConfig method generateItemIdReader.

@Bean(name = Constants.JOB_ID_GENERATE_ITEM_ID + ":generateItemIdReader")
@StepScope
public synchronized ItemReader<HarvestedRecordUniqueId> generateItemIdReader(@Value("#{jobParameters[" + Constants.JOB_PARAM_CONF_ID + "]}") Long confId) throws Exception {
    JdbcPagingItemReader<HarvestedRecordUniqueId> reader = new JdbcPagingItemReader<HarvestedRecordUniqueId>();
    SqlPagingQueryProviderFactoryBean pqpf = new SqlPagingQueryProviderFactoryBean();
    pqpf.setDataSource(dataSource);
    pqpf.setSelectClause("SELECT id, import_conf_id, record_id");
    pqpf.setFromClause("FROM harvested_record");
    String where = "WHERE deleted is null";
    if (confId != null) {
        where += " AND import_conf_id=:conf_id";
        Map<String, Object> parameterValues = new HashMap<String, Object>();
        parameterValues.put("conf_id", confId);
        reader.setParameterValues(parameterValues);
    }
    pqpf.setWhereClause(where);
    pqpf.setSortKey("id");
    reader.setRowMapper(new HarvestedRecordIdRowMapper());
    reader.setPageSize(20);
    reader.setQueryProvider(pqpf.getObject());
    reader.setDataSource(dataSource);
    reader.afterPropertiesSet();
    return reader;
}
Also used : HarvestedRecordUniqueId(cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId) SqlPagingQueryProviderFactoryBean(org.springframework.batch.item.database.support.SqlPagingQueryProviderFactoryBean) HashMap(java.util.HashMap) HarvestedRecordIdRowMapper(cz.mzk.recordmanager.server.export.HarvestedRecordIdRowMapper) JdbcPagingItemReader(org.springframework.batch.item.database.JdbcPagingItemReader) StepScope(org.springframework.batch.core.configuration.annotation.StepScope) SqlPagingQueryProviderFactoryBean(org.springframework.batch.item.database.support.SqlPagingQueryProviderFactoryBean) Bean(org.springframework.context.annotation.Bean)

Example 12 with HarvestedRecordUniqueId

use of cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId in project RecordManager2 by moravianlibrary.

the class GenerateItemIdWriter method write.

@Override
public void write(List<? extends HarvestedRecordUniqueId> items) throws Exception {
    for (HarvestedRecordUniqueId uniqueId : items) {
        try {
            progress.incrementAndLogProgress();
            HarvestedRecord hr = hrDao.get(uniqueId);
            if (hr == null || hr.getRawRecord() == null || hr.getRawRecord().length == 0 || !hr.getFormat().equals("marc21-xml") || hr.getHarvestedFrom().getItemId() == null) {
                continue;
            }
            Record record = marcXmlParser.parseUnderlyingRecord(new ByteArrayInputStream(hr.getRawRecord()));
            hr.setRawRecord(new DefaultMarcInterceptor(record, hr.getHarvestedFrom(), uniqueId.getRecordId()).intercept());
        } catch (Exception ex) {
            logger.error(String.format("Exception thrown in harvested_record with id=%s", uniqueId), ex);
        }
    }
}
Also used : HarvestedRecordUniqueId(cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId) ByteArrayInputStream(java.io.ByteArrayInputStream) Record(org.marc4j.marc.Record) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord) DefaultMarcInterceptor(cz.mzk.recordmanager.server.marc.intercepting.DefaultMarcInterceptor) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Example 13 with HarvestedRecordUniqueId

use of cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId in project RecordManager2 by moravianlibrary.

the class MarcXmlDedupKeyParserTest method parseCorrectRecord.

@Test
public void parseCorrectRecord() throws Exception {
    InputStream is = this.getClass().getResourceAsStream("/records/marcxml/MZK01-001439241.xml");
    ImportConfiguration ic = importConfDao.get(300L);
    HarvestedRecordUniqueId id = new HarvestedRecordUniqueId(ic, "1");
    HarvestedRecord record = new HarvestedRecord(id);
    record.setHarvestedFrom(ic);
    record.setFormat("marc21-xml");
    byte[] rawRecord = ByteStreams.toByteArray(is);
    record.setRawRecord(rawRecord);
    record.setId(1L);
    parser.parse(record);
    Assert.assertTrue(record.getIsbns().size() > 0);
    Assert.assertEquals(record.getIsbns().get(0).getIsbn(), EXPECTED_ISBN);
    Assert.assertEquals(record.getTitles().size(), 1);
    Assert.assertEquals(record.getTitles().get(0).getTitleStr(), EXPECTED_TITLE);
    Assert.assertEquals(record.getPhysicalFormats().size(), 1);
    Assert.assertEquals(record.getPhysicalFormats().get(0).getName(), HarvestedRecordFormatEnum.BOOKS.name());
    Assert.assertEquals(record.getPublicationYear(), new Long(2014));
    Assert.assertEquals(record.getAuthorAuthKey(), EXPECTED_AUTHORAUTHKEY);
    Assert.assertEquals(record.getAuthorString(), EXPECTED_AUTHORSTRING);
    Assert.assertEquals(record.getScale(), EXPECTED_SCALE);
    Assert.assertEquals(record.getUuid(), EXPECTED_UUID);
    Assert.assertEquals(record.getIssnSeries(), EXPECTED_ISSNSERIES);
    Assert.assertEquals(record.getIssnSeriesOrder(), EXPECTED_ISSNSERIESORDER);
}
Also used : HarvestedRecordUniqueId(cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId) InputStream(java.io.InputStream) ImportConfiguration(cz.mzk.recordmanager.server.model.ImportConfiguration) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord) Test(org.testng.annotations.Test) AbstractTest(cz.mzk.recordmanager.server.AbstractTest)

Example 14 with HarvestedRecordUniqueId

use of cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId in project RecordManager2 by moravianlibrary.

the class ZakonyProLidiTest method metadataHarvestTest.

@Test
public void metadataHarvestTest() throws Exception {
    reset(httpClient);
    InputStream response0 = this.getClass().getResourceAsStream("/import/zakony/Records.xml");
    expect(httpClient.executeGet("http://www.zakonyprolidi.cz/api/v1/data.xml/YearDocList?apikey=test&Collection=cs&Year=1945")).andReturn(response0);
    replay(httpClient);
    Job job = jobRegistry.getJob(Constants.JOB_ID_HARVEST_ZAKONYPROLIDI);
    Map<String, JobParameter> params = new HashMap<String, JobParameter>();
    params.put(Constants.JOB_PARAM_CONF_ID, new JobParameter(300L));
    params.put(Constants.JOB_PARAM_FROM_DATE, new JobParameter(1945L));
    params.put(Constants.JOB_PARAM_UNTIL_DATE, new JobParameter(1945L));
    JobParameters jobParams = new JobParameters(params);
    jobLauncher.run(job, jobParams);
    Assert.assertNotNull(hrDao.get(new HarvestedRecordUniqueId(300L, "1945-1")));
    Assert.assertNotNull(hrDao.get(new HarvestedRecordUniqueId(300L, "1945-2")));
}
Also used : HarvestedRecordUniqueId(cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId) HashMap(java.util.HashMap) InputStream(java.io.InputStream) JobParameters(org.springframework.batch.core.JobParameters) Job(org.springframework.batch.core.Job) JobParameter(org.springframework.batch.core.JobParameter) Test(org.testng.annotations.Test) AbstractTest(cz.mzk.recordmanager.server.AbstractTest)

Example 15 with HarvestedRecordUniqueId

use of cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId in project RecordManager2 by moravianlibrary.

the class ImportRecordsWriter method writeInner.

protected void writeInner(List<? extends List<Record>> items) throws Exception {
    for (List<Record> records : items) {
        for (Record currentRecord : records) {
            try {
                ByteArrayOutputStream outStream = new ByteArrayOutputStream();
                MarcWriter marcWriter = new MarcXmlWriter(outStream, true);
                marcWriter.setConverter(ISOCharConvertor.INSTANCE);
                marcWriter.write(currentRecord);
                marcWriter.close();
                // need recordId before interception
                byte[] recordContent = outStream.toByteArray();
                MetadataRecord metadata = parseMetadata(recordContent);
                String recordId = metadata.getUniqueId();
                if (regexpExtractor != null) {
                    recordId = regexpExtractor.extract(recordId);
                }
                if (harvestConfiguration.isInterceptionEnabled()) {
                    MarcRecordInterceptor interceptor = marcInterceptorFactory.getInterceptor(harvestConfiguration, recordId, recordContent);
                    if (interceptor != null) {
                        byte[] recordContentNew = interceptor.intercept();
                        if (!Arrays.equals(recordContent, recordContentNew)) {
                            // if record content was changed, parse metadata again
                            metadata = parseMetadata(recordContentNew);
                            // set intercepted content
                            recordContent = recordContentNew;
                        }
                    }
                }
                HarvestedRecord hr = harvestedRecordDao.findByIdAndHarvestConfiguration(recordId, configurationId);
                if (hr == null) {
                    HarvestedRecordUniqueId id = new HarvestedRecordUniqueId(harvestConfiguration, recordId);
                    hr = new HarvestedRecord(id);
                    // TODO detect format
                    hr.setFormat("marc21-xml");
                    hr.setHarvestedFrom(harvestConfiguration);
                }
                hr.setUpdated(new Date());
                hr.setDeleted(null);
                hr.setRawRecord(recordContent);
                harvestedRecordDao.persist(hr);
                dedupKeysParser.parse(hr, metadata);
                if (harvestConfiguration.isFilteringEnabled() && !hr.getShouldBeProcessed()) {
                    logger.debug("Filtered record: " + hr.getUniqueId());
                    hr.setDeleted(new Date());
                }
                harvestedRecordDao.persist(hr);
                progress.incrementAndLogProgress();
            } catch (Exception e) {
                logger.warn("Error occured in processing record");
                throw e;
            }
        }
    }
}
Also used : MarcRecordInterceptor(cz.mzk.recordmanager.server.marc.intercepting.MarcRecordInterceptor) HarvestedRecordUniqueId(cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId) ByteArrayOutputStream(java.io.ByteArrayOutputStream) MarcXmlWriter(org.marc4j.MarcXmlWriter) Date(java.util.Date) MarcWriter(org.marc4j.MarcWriter) MetadataRecord(cz.mzk.recordmanager.server.metadata.MetadataRecord) MarcRecord(cz.mzk.recordmanager.server.marc.MarcRecord) Record(org.marc4j.marc.Record) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord) MetadataRecord(cz.mzk.recordmanager.server.metadata.MetadataRecord) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Aggregations

HarvestedRecordUniqueId (cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId)21 HarvestedRecord (cz.mzk.recordmanager.server.model.HarvestedRecord)12 HashMap (java.util.HashMap)8 StepScope (org.springframework.batch.core.configuration.annotation.StepScope)7 JdbcPagingItemReader (org.springframework.batch.item.database.JdbcPagingItemReader)7 SqlPagingQueryProviderFactoryBean (org.springframework.batch.item.database.support.SqlPagingQueryProviderFactoryBean)7 Bean (org.springframework.context.annotation.Bean)7 Date (java.util.Date)6 HarvestedRecordIdRowMapper (cz.mzk.recordmanager.server.export.HarvestedRecordIdRowMapper)5 InputStream (java.io.InputStream)5 Record (org.marc4j.marc.Record)4 MarcRecord (cz.mzk.recordmanager.server.marc.MarcRecord)3 ByteArrayInputStream (java.io.ByteArrayInputStream)3 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 AbstractTest (cz.mzk.recordmanager.server.AbstractTest)2 MarcRecordImpl (cz.mzk.recordmanager.server.marc.MarcRecordImpl)2 MarcRecordInterceptor (cz.mzk.recordmanager.server.marc.intercepting.MarcRecordInterceptor)2 FulltextKramerius (cz.mzk.recordmanager.server.model.FulltextKramerius)2 ImportConfiguration (cz.mzk.recordmanager.server.model.ImportConfiguration)2 IOException (java.io.IOException)2