Search in sources :

Example 1 with Etl

use of com.robertsanek.data.etl.Etl in project core by z1lc.

the class MasterEtl method runIndividualEtl.

@SuppressWarnings("rawtypes")
private EtlRun runIndividualEtl(Class<? extends Etl> etlClazz, SessionFactory sf) {
    Stopwatch thisEtlStopwatch = Stopwatch.createStarted();
    ZonedDateTime thisEtlStarted = ZonedDateTime.now();
    AtomicInteger max = new AtomicInteger(0);
    AtomicBoolean successful = new AtomicBoolean(false);
    AtomicLong secondsEtl = new AtomicLong(0);
    Exception exceptionDuringEtl = new RuntimeException("dummy exception -- didn't catch any other exceptions");
    AtomicLong tryNumber = new AtomicLong(0);
    try (Session session = sf.openSession()) {
        Transaction insertTransaction = session.beginTransaction();
        insertTransaction.setTimeout((int) TRANSACTION_TIMEOUT.getSeconds());
        Failsafe.with(individualEtlRetry).run(() -> {
            log.info("Running ETL with class %s (try #%s).", etlClazz.getName(), tryNumber.incrementAndGet());
            Etl instance = InjectUtils.inject(etlClazz);
            List objects = instance.getObjects();
            max.set(Math.min(ROW_LIMIT, objects.size()));
            if (max.get() == ROW_LIMIT) {
                throw new RuntimeException(String.format("ETL %s has %d+ rows. " + "You should probably do some kind of pre-aggregation.", etlClazz.getName(), ROW_LIMIT));
            }
            secondsEtl.set(thisEtlStopwatch.elapsed().getSeconds());
            thisEtlStopwatch.reset().start();
            IntStream.range(0, max.get()).forEach(i -> {
                try {
                    session.save(objects.get(i));
                } catch (NonUniqueObjectException e) {
                    log.error(e.toString());
                }
                if (i % getBatchingSize() == 0) {
                    session.flush();
                    session.clear();
                }
            });
            insertTransaction.commit();
            successful.set(true);
        });
    } catch (Exception e) {
        log.error(e);
        exceptionDuringEtl = e;
    }
    long secondsTransaction = thisEtlStopwatch.elapsed().getSeconds();
    long totalRuntime = secondsEtl.get() + secondsTransaction;
    String template = String.format("ETL with class %s completed %s in %s %s on try #%s, generating %s rows.", etlClazz.getName(), successful.get() ? "successfully" : "unsuccessfully", totalRuntime, totalRuntime == 1 ? "second" : "seconds", tryNumber.get(), max);
    if (successful.get()) {
        log.info(template);
        if (max.get() == 0) {
            notificationSender.sendNotificationDefault(String.format("%s generated 0 rows at %s!", etlClazz.getSimpleName(), LocalDateTime.now().format(DateTimeFormatter.ofPattern("HH:mm:ss", new Locale("en")))), "Check output.");
        }
    } else {
        notificationSender.sendNotificationDefault(String.format("%s failed at %s!", etlClazz.getSimpleName(), LocalDateTime.now().format(DateTimeFormatter.ofPattern("HH:mm:ss", new Locale("en")))), template + "\n\n" + ExceptionUtils.getStackTrace(exceptionDuringEtl));
        log.error(template);
    }
    return EtlRun.EtlRunBuilder.anEtlRun().withId(ETL_RUN_ID_GENERATOR.getAndIncrement()).withClass_name(etlClazz.getName()).withStart_time(thisEtlStarted).withEnd_time(ZonedDateTime.now()).withRows_generated((long) max.get()).withThread_name(Thread.currentThread().getName()).withUsing_parallel(true).withWas_successful(successful.get()).withIs_slow(etlClazz.getAnnotation(SlowEtl.class) != null).withUses_local_files(etlClazz.getAnnotation(UsesLocalFiles.class) != null).withSeconds_in_extract_and_transform(secondsEtl.get()).withSeconds_in_load(secondsTransaction).withTry_number(tryNumber.get()).build();
}
Also used : Locale(java.util.Locale) Stopwatch(com.google.common.base.Stopwatch) GeneralSecurityException(java.security.GeneralSecurityException) NonUniqueObjectException(org.hibernate.NonUniqueObjectException) HibernateException(org.hibernate.HibernateException) IOException(java.io.IOException) ServiceException(org.hibernate.service.spi.ServiceException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) AtomicLong(java.util.concurrent.atomic.AtomicLong) Transaction(org.hibernate.Transaction) ZonedDateTime(java.time.ZonedDateTime) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) List(java.util.List) NonUniqueObjectException(org.hibernate.NonUniqueObjectException) Etl(com.robertsanek.data.etl.Etl) SlowEtl(com.robertsanek.data.etl.SlowEtl) Session(org.hibernate.Session) SlowEtl(com.robertsanek.data.etl.SlowEtl)

Example 2 with Etl

use of com.robertsanek.data.etl.Etl in project core by z1lc.

the class MasterEtl method getConcreteEtls.

@VisibleForTesting
@SuppressWarnings("rawtypes")
List<Class<? extends Etl>> getConcreteEtls(boolean fastRun) {
    // return Lists.newArrayList(LeetCodeQuestionEtl.class);
    Reflections reflections = new Reflections(REFLECTIONS_PREFIX);
    Set<Class<? extends Etl>> subTypesOf = reflections.getSubTypesOf(Etl.class);
    log.info("Flag fastRun is set to %s. Will%s include slow ETLs in run.", fastRun, fastRun ? " not" : "");
    return subTypesOf.stream().filter(clazz -> !Modifier.isAbstract(clazz.getModifiers())).filter(clazz -> {
        if (clazz.getAnnotation(DoNotRun.class) != null) {
            log.info("Will not run class %s because %s.", clazz.getName(), clazz.getAnnotation(DoNotRun.class).explanation());
            return false;
        }
        if (fastRun && clazz.getAnnotation(SlowEtl.class) != null) {
            log.info("Will not run class %s because it is annotated @%s " + "and we are running with fastRun flag set to %s.", clazz.getName(), SlowEtl.class.getSimpleName(), fastRun);
            return false;
        }
        return true;
    }).sorted(Comparator.comparing(Class::getName)).collect(Collectors.toList());
}
Also used : Etl(com.robertsanek.data.etl.Etl) GsonFactory(com.google.api.client.json.gson.GsonFactory) ZonedDateTime(java.time.ZonedDateTime) Inject(com.google.inject.Inject) GOOGLE_CLOUD_SQL_CRONUS_POSTGRES_USERNAME(com.robertsanek.util.SecretType.GOOGLE_CLOUD_SQL_CRONUS_POSTGRES_USERNAME) Reflections(org.reflections.Reflections) Transaction(org.hibernate.Transaction) Log(com.robertsanek.util.Log) CrossPlatformUtils(com.robertsanek.util.platform.CrossPlatformUtils) GeneralSecurityException(java.security.GeneralSecurityException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Locale(java.util.Locale) Duration(java.time.Duration) NonUniqueObjectException(org.hibernate.NonUniqueObjectException) Configuration(org.hibernate.cfg.Configuration) JobExecutionContext(org.quartz.JobExecutionContext) NotificationSender(com.robertsanek.util.NotificationSender) EtlRun(com.robertsanek.data.etl.EtlRun) SessionFactory(org.hibernate.SessionFactory) HttpTransport(com.google.api.client.http.HttpTransport) Set(java.util.Set) GoogleNetHttpTransport(com.google.api.client.googleapis.javanet.GoogleNetHttpTransport) RetryPolicy(net.jodah.failsafe.RetryPolicy) Collectors(java.util.stream.Collectors) Logs(com.robertsanek.util.Logs) GOOGLE_CLOUD_SQL_RSANEK_POSTGRES_PASSWORD(com.robertsanek.util.SecretType.GOOGLE_CLOUD_SQL_RSANEK_POSTGRES_PASSWORD) List(java.util.List) Stream(java.util.stream.Stream) DoNotRun(com.robertsanek.data.etl.DoNotRun) Settings(com.google.api.services.sqladmin.model.Settings) Modifier(java.lang.reflect.Modifier) HibernateException(org.hibernate.HibernateException) ExceptionUtils(org.apache.commons.lang3.exception.ExceptionUtils) Operation(com.google.api.services.sqladmin.model.Operation) IntStream(java.util.stream.IntStream) GoogleCredential(com.google.api.client.googleapis.auth.oauth2.GoogleCredential) UsesLocalFiles(com.robertsanek.data.etl.UsesLocalFiles) Stopwatch(com.google.common.base.Stopwatch) LocalDateTime(java.time.LocalDateTime) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Session(org.hibernate.Session) Unchecked(com.robertsanek.util.Unchecked) SecretProvider(com.robertsanek.util.SecretProvider) GOOGLE_CLOUD_SQL_RSANEK_POSTGRES_USERNAME(com.robertsanek.util.SecretType.GOOGLE_CLOUD_SQL_RSANEK_POSTGRES_USERNAME) SQLAdmin(com.google.api.services.sqladmin.SQLAdmin) IOException(java.io.IOException) DatabaseInstance(com.google.api.services.sqladmin.model.DatabaseInstance) FileInputStream(java.io.FileInputStream) SlowEtl(com.robertsanek.data.etl.SlowEtl) Failsafe(net.jodah.failsafe.Failsafe) AtomicLong(java.util.concurrent.atomic.AtomicLong) JsonFactory(com.google.api.client.json.JsonFactory) DateTimeFormatter(java.time.format.DateTimeFormatter) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) Collections(java.util.Collections) InjectUtils(com.robertsanek.util.inject.InjectUtils) ServiceException(org.hibernate.service.spi.ServiceException) Etl(com.robertsanek.data.etl.Etl) SlowEtl(com.robertsanek.data.etl.SlowEtl) DoNotRun(com.robertsanek.data.etl.DoNotRun) Reflections(org.reflections.Reflections) SlowEtl(com.robertsanek.data.etl.SlowEtl) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 3 with Etl

use of com.robertsanek.data.etl.Etl in project core by z1lc.

the class MasterEtl method runEtls.

@SuppressWarnings({ "rawtypes", "try" })
public boolean runEtls(boolean fastRun, boolean parallel) {
    Stopwatch total = Stopwatch.createStarted();
    List<Class<? extends Etl>> concreteEtls = getConcreteEtls(fastRun);
    log.info("Will run %s ETLs.", concreteEtls.size());
    log.info("Creating connection to Cloud SQL and re-generating table schemas... (this may take up to 3 minutes)");
    try (SessionFactory ignored = Unchecked.get(() -> getSessionFactory(Hbm2ddlType.CREATE, ConnectionType.RSANEK));
        SessionFactory noneSf = Unchecked.get(() -> getSessionFactory(Hbm2ddlType.NONE, ConnectionType.RSANEK))) {
        log.info("Schema re-generation complete, taking %s seconds. Beginning ETL with parallel = %s.", total.elapsed().getSeconds(), parallel);
        Stream<Class<? extends Etl>> stream = parallel ? concreteEtls.parallelStream() : concreteEtls.stream();
        List<EtlRun> etlRuns = stream.map(etlClazz -> runIndividualEtl(etlClazz, noneSf)).collect(Collectors.toList());
        try (Session session = noneSf.openSession()) {
            Transaction transaction = session.beginTransaction();
            etlRuns.forEach(session::save);
            session.flush();
            session.clear();
            transaction.commit();
        } catch (Exception e) {
            log.error(e);
        }
        log.info("Completed %s ETLs in %s seconds.", concreteEtls.size(), total.elapsed().getSeconds());
        return etlRuns.stream().allMatch(etlRun -> etlRun.getWas_successful() && etlRun.getRows_generated() > 0);
    }
}
Also used : SessionFactory(org.hibernate.SessionFactory) Etl(com.robertsanek.data.etl.Etl) GsonFactory(com.google.api.client.json.gson.GsonFactory) ZonedDateTime(java.time.ZonedDateTime) Inject(com.google.inject.Inject) GOOGLE_CLOUD_SQL_CRONUS_POSTGRES_USERNAME(com.robertsanek.util.SecretType.GOOGLE_CLOUD_SQL_CRONUS_POSTGRES_USERNAME) Reflections(org.reflections.Reflections) Transaction(org.hibernate.Transaction) Log(com.robertsanek.util.Log) CrossPlatformUtils(com.robertsanek.util.platform.CrossPlatformUtils) GeneralSecurityException(java.security.GeneralSecurityException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Locale(java.util.Locale) Duration(java.time.Duration) NonUniqueObjectException(org.hibernate.NonUniqueObjectException) Configuration(org.hibernate.cfg.Configuration) JobExecutionContext(org.quartz.JobExecutionContext) NotificationSender(com.robertsanek.util.NotificationSender) EtlRun(com.robertsanek.data.etl.EtlRun) SessionFactory(org.hibernate.SessionFactory) HttpTransport(com.google.api.client.http.HttpTransport) Set(java.util.Set) GoogleNetHttpTransport(com.google.api.client.googleapis.javanet.GoogleNetHttpTransport) RetryPolicy(net.jodah.failsafe.RetryPolicy) Collectors(java.util.stream.Collectors) Logs(com.robertsanek.util.Logs) GOOGLE_CLOUD_SQL_RSANEK_POSTGRES_PASSWORD(com.robertsanek.util.SecretType.GOOGLE_CLOUD_SQL_RSANEK_POSTGRES_PASSWORD) List(java.util.List) Stream(java.util.stream.Stream) DoNotRun(com.robertsanek.data.etl.DoNotRun) Settings(com.google.api.services.sqladmin.model.Settings) Modifier(java.lang.reflect.Modifier) HibernateException(org.hibernate.HibernateException) ExceptionUtils(org.apache.commons.lang3.exception.ExceptionUtils) Operation(com.google.api.services.sqladmin.model.Operation) IntStream(java.util.stream.IntStream) GoogleCredential(com.google.api.client.googleapis.auth.oauth2.GoogleCredential) UsesLocalFiles(com.robertsanek.data.etl.UsesLocalFiles) Stopwatch(com.google.common.base.Stopwatch) LocalDateTime(java.time.LocalDateTime) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Session(org.hibernate.Session) Unchecked(com.robertsanek.util.Unchecked) SecretProvider(com.robertsanek.util.SecretProvider) GOOGLE_CLOUD_SQL_RSANEK_POSTGRES_USERNAME(com.robertsanek.util.SecretType.GOOGLE_CLOUD_SQL_RSANEK_POSTGRES_USERNAME) SQLAdmin(com.google.api.services.sqladmin.SQLAdmin) IOException(java.io.IOException) DatabaseInstance(com.google.api.services.sqladmin.model.DatabaseInstance) FileInputStream(java.io.FileInputStream) SlowEtl(com.robertsanek.data.etl.SlowEtl) Failsafe(net.jodah.failsafe.Failsafe) AtomicLong(java.util.concurrent.atomic.AtomicLong) JsonFactory(com.google.api.client.json.JsonFactory) DateTimeFormatter(java.time.format.DateTimeFormatter) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) Collections(java.util.Collections) InjectUtils(com.robertsanek.util.inject.InjectUtils) ServiceException(org.hibernate.service.spi.ServiceException) Stopwatch(com.google.common.base.Stopwatch) GeneralSecurityException(java.security.GeneralSecurityException) NonUniqueObjectException(org.hibernate.NonUniqueObjectException) HibernateException(org.hibernate.HibernateException) IOException(java.io.IOException) ServiceException(org.hibernate.service.spi.ServiceException) Transaction(org.hibernate.Transaction) Etl(com.robertsanek.data.etl.Etl) SlowEtl(com.robertsanek.data.etl.SlowEtl) EtlRun(com.robertsanek.data.etl.EtlRun) Session(org.hibernate.Session)

Aggregations

Stopwatch (com.google.common.base.Stopwatch)3 Etl (com.robertsanek.data.etl.Etl)3 SlowEtl (com.robertsanek.data.etl.SlowEtl)3 IOException (java.io.IOException)3 GeneralSecurityException (java.security.GeneralSecurityException)3 ZonedDateTime (java.time.ZonedDateTime)3 List (java.util.List)3 Locale (java.util.Locale)3 GoogleCredential (com.google.api.client.googleapis.auth.oauth2.GoogleCredential)2 GoogleNetHttpTransport (com.google.api.client.googleapis.javanet.GoogleNetHttpTransport)2 HttpTransport (com.google.api.client.http.HttpTransport)2 JsonFactory (com.google.api.client.json.JsonFactory)2 GsonFactory (com.google.api.client.json.gson.GsonFactory)2 SQLAdmin (com.google.api.services.sqladmin.SQLAdmin)2 DatabaseInstance (com.google.api.services.sqladmin.model.DatabaseInstance)2 Operation (com.google.api.services.sqladmin.model.Operation)2 Settings (com.google.api.services.sqladmin.model.Settings)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 Inject (com.google.inject.Inject)2 DoNotRun (com.robertsanek.data.etl.DoNotRun)2