/*
 * Decompiled with CFR 0.152.
 */
package com.logicalclocks.hsfs.engine;

import com.amazon.deequ.profiles.ColumnProfilerRunBuilder;
import com.amazon.deequ.profiles.ColumnProfilerRunner;
import com.amazon.deequ.profiles.ColumnProfiles;
import com.logicalclocks.hsfs.DataFormat;
import com.logicalclocks.hsfs.FeatureGroup;
import com.logicalclocks.hsfs.FeatureStoreException;
import com.logicalclocks.hsfs.HudiOperationType;
import com.logicalclocks.hsfs.OnDemandFeatureGroup;
import com.logicalclocks.hsfs.Split;
import com.logicalclocks.hsfs.StorageConnector;
import com.logicalclocks.hsfs.TimeTravelFormat;
import com.logicalclocks.hsfs.TrainingDataset;
import com.logicalclocks.hsfs.engine.HudiEngine;
import com.logicalclocks.hsfs.engine.Utils;
import com.logicalclocks.hsfs.metadata.OnDemandOptions;
import com.logicalclocks.hsfs.metadata.Option;
import java.io.IOException;
import java.text.ParseException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.Strings;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import scala.collection.Iterator;
import scala.collection.JavaConverters;
import scala.collection.Seq;

public class SparkEngine {
    private static SparkEngine INSTANCE = null;
    private SparkSession sparkSession;
    private Utils utils = new Utils();
    private HudiEngine hudiEngine = new HudiEngine();

    public static synchronized SparkEngine getInstance() {
        if (INSTANCE == null) {
            INSTANCE = new SparkEngine();
        }
        return INSTANCE;
    }

    private SparkEngine() {
        this.sparkSession = SparkSession.builder().enableHiveSupport().getOrCreate();
        this.sparkSession.conf().set("hive.exec.dynamic.partition", "true");
        this.sparkSession.conf().set("hive.exec.dynamic.partition.mode", "nonstrict");
        this.sparkSession.conf().set("spark.sql.hive.convertMetastoreParquet", "false");
    }

    public String getTrustStorePath() {
        return this.sparkSession.conf().get("spark.hadoop.hops.ssl.trustore.name");
    }

    public String getKeyStorePath() {
        return this.sparkSession.conf().get("spark.hadoop.hops.ssl.keystore.name");
    }

    public String getCertKey() {
        return this.sparkSession.conf().get("spark.hadoop.hops.ssl.keystores.passwd.name");
    }

    public Dataset<Row> sql(String query) {
        return this.sparkSession.sql(query);
    }

    public Dataset<Row> jdbc(String query, StorageConnector storageConnector) throws FeatureStoreException {
        Map<String, String> readOptions = storageConnector.getSparkOptionsInt();
        if (!Strings.isNullOrEmpty((String)query)) {
            readOptions.put("query", query);
        }
        return this.sparkSession.read().format("jdbc").options(readOptions).load();
    }

    public Dataset<Row> snowflake(String query, StorageConnector storageConnector) throws FeatureStoreException {
        Map<String, String> readOptions = storageConnector.getSparkOptionsInt();
        if (!Strings.isNullOrEmpty((String)query)) {
            readOptions.put("query", query);
        }
        return this.sparkSession.read().format("net.snowflake.spark.snowflake").options(readOptions).load();
    }

    public Dataset<Row> registerOnDemandTemporaryTable(OnDemandFeatureGroup onDemandFeatureGroup, String alias) throws FeatureStoreException {
        Dataset<Row> dataset;
        switch (onDemandFeatureGroup.getStorageConnector().getStorageConnectorType()) {
            case REDSHIFT: 
            case JDBC: {
                dataset = this.jdbc(onDemandFeatureGroup.getQuery(), onDemandFeatureGroup.getStorageConnector());
                break;
            }
            case SNOWFLAKE: {
                dataset = this.snowflake(onDemandFeatureGroup.getQuery(), onDemandFeatureGroup.getStorageConnector());
                break;
            }
            default: {
                dataset = this.read(onDemandFeatureGroup.getStorageConnector(), onDemandFeatureGroup.getDataFormat().toString(), this.getOnDemandOptions(onDemandFeatureGroup), onDemandFeatureGroup.getStorageConnector().getPath(onDemandFeatureGroup.getPath()));
            }
        }
        dataset.createOrReplaceTempView(alias);
        return dataset;
    }

    private Map<String, String> getOnDemandOptions(OnDemandFeatureGroup onDemandFeatureGroup) {
        if (onDemandFeatureGroup.getOptions() == null) {
            return new HashMap<String, String>();
        }
        return onDemandFeatureGroup.getOptions().stream().collect(Collectors.toMap(OnDemandOptions::getName, OnDemandOptions::getValue));
    }

    public void registerHudiTemporaryTable(FeatureGroup featureGroup, String alias, Long leftFeaturegroupStartTimestamp, Long leftFeaturegroupEndTimestamp, Map<String, String> readOptions) {
        this.hudiEngine.registerTemporaryTable(this.sparkSession, featureGroup, alias, leftFeaturegroupStartTimestamp, leftFeaturegroupEndTimestamp, readOptions);
    }

    public void write(TrainingDataset trainingDataset, Dataset<Row> dataset, Map<String, String> writeOptions, SaveMode saveMode) {
        this.setupConnectorHadoopConf(trainingDataset.getStorageConnector());
        if (trainingDataset.getCoalesce().booleanValue()) {
            dataset = dataset.coalesce(1);
        }
        if (trainingDataset.getSplits() == null) {
            String path = new Path(trainingDataset.getLocation(), trainingDataset.getName()).toString();
            this.writeSingle((Dataset<Row>)dataset, trainingDataset.getDataFormat(), writeOptions, saveMode, path);
        } else {
            List splitFactors = trainingDataset.getSplits().stream().map(Split::getPercentage).collect(Collectors.toList());
            Dataset[] datasetSplits = null;
            datasetSplits = trainingDataset.getSeed() != null ? dataset.randomSplit(splitFactors.stream().mapToDouble(Float::doubleValue).toArray(), trainingDataset.getSeed().longValue()) : dataset.randomSplit(splitFactors.stream().mapToDouble(Float::doubleValue).toArray());
            this.writeSplits(datasetSplits, trainingDataset.getDataFormat(), writeOptions, saveMode, trainingDataset.getLocation(), trainingDataset.getSplits());
        }
    }

    public Map<String, String> getWriteOptions(Map<String, String> providedOptions, DataFormat dataFormat) {
        HashMap<String, String> writeOptions = new HashMap<String, String>();
        switch (dataFormat) {
            case CSV: {
                writeOptions.put("header", "true");
                writeOptions.put("delimiter", ",");
                break;
            }
            case TSV: {
                writeOptions.put("header", "true");
                writeOptions.put("delimiter", "\t");
                break;
            }
            case TFRECORDS: 
            case TFRECORD: {
                writeOptions.put("recordType", "Example");
                break;
            }
        }
        if (providedOptions != null && !providedOptions.isEmpty()) {
            writeOptions.putAll(providedOptions);
        }
        return writeOptions;
    }

    public Map<String, String> getReadOptions(Map<String, String> providedOptions, DataFormat dataFormat) {
        HashMap<String, String> readOptions = new HashMap<String, String>();
        switch (dataFormat) {
            case CSV: {
                readOptions.put("header", "true");
                readOptions.put("delimiter", ",");
                readOptions.put("inferSchema", "true");
                break;
            }
            case TSV: {
                readOptions.put("header", "true");
                readOptions.put("delimiter", "\t");
                readOptions.put("inferSchema", "true");
                break;
            }
            case TFRECORDS: 
            case TFRECORD: {
                readOptions.put("recordType", "Example");
                break;
            }
        }
        if (providedOptions != null && !providedOptions.isEmpty()) {
            readOptions.putAll(providedOptions);
        }
        return readOptions;
    }

    private void writeSplits(Dataset<Row>[] datasets, DataFormat dataFormat, Map<String, String> writeOptions, SaveMode saveMode, String basePath, List<Split> splits) {
        for (int i = 0; i < datasets.length; ++i) {
            this.writeSingle(datasets[i], dataFormat, writeOptions, saveMode, new Path(basePath, splits.get(i).getName()).toString());
        }
    }

    private void writeSingle(Dataset<Row> dataset, DataFormat dataFormat, Map<String, String> writeOptions, SaveMode saveMode, String path) {
        dataset.write().format(dataFormat.toString()).options(writeOptions).mode(saveMode).save(SparkEngine.sparkPath(path));
    }

    public Dataset<Row> read(StorageConnector storageConnector, String dataFormat, Map<String, String> readOptions, String path) {
        this.setupConnectorHadoopConf(storageConnector);
        return SparkEngine.getInstance().getSparkSession().read().format(dataFormat).options(readOptions).load(SparkEngine.sparkPath(path));
    }

    public Map<String, String> getOnlineOptions(Map<String, String> providedWriteOptions, FeatureGroup featureGroup, StorageConnector storageConnector) throws FeatureStoreException {
        Map<String, String> writeOptions = storageConnector.getSparkOptionsInt();
        writeOptions.put("dbtable", this.utils.getFgName(featureGroup));
        if (providedWriteOptions != null) {
            writeOptions.putAll(providedWriteOptions);
        }
        return writeOptions;
    }

    public void writeOnlineDataframe(Dataset<Row> dataset, SaveMode saveMode, Map<String, String> writeOptions) {
        dataset.write().format("jdbc").options(writeOptions).mode(saveMode).save();
    }

    public void writeOfflineDataframe(FeatureGroup featureGroup, Dataset<Row> dataset, SaveMode saveMode, HudiOperationType operation, Map<String, String> writeOptions, Integer validationId) throws IOException, FeatureStoreException, ParseException {
        if (featureGroup.getTimeTravelFormat() == TimeTravelFormat.HUDI) {
            this.hudiEngine.saveHudiFeatureGroup(this.sparkSession, featureGroup, dataset, saveMode, operation, writeOptions, validationId);
        } else {
            this.writeSparkDataset(featureGroup, dataset, saveMode, writeOptions);
        }
    }

    private void writeSparkDataset(FeatureGroup featureGroup, Dataset<Row> dataset, SaveMode saveMode, Map<String, String> writeOptions) {
        dataset.write().format("hive").mode(saveMode).options((Map)(writeOptions == null ? new HashMap() : writeOptions)).partitionBy(this.utils.getPartitionColumns(featureGroup)).saveAsTable(this.utils.getTableName(featureGroup));
    }

    public String profile(Dataset<Row> df, List<String> restrictToColumns, Boolean correlation, Boolean histogram) {
        if (correlation == null) {
            correlation = true;
        }
        if (histogram == null) {
            histogram = true;
        }
        ColumnProfilerRunBuilder runner = new ColumnProfilerRunner().onData(df).withCorrelation(correlation.booleanValue()).withHistogram(histogram.booleanValue());
        if (restrictToColumns != null && !restrictToColumns.isEmpty()) {
            runner.restrictToColumns(((Iterator)JavaConverters.asScalaIteratorConverter(restrictToColumns.iterator()).asScala()).toSeq());
        }
        ColumnProfiles result = runner.run();
        return ColumnProfiles.toJson((Seq)result.profiles().values().toSeq());
    }

    public String profile(Dataset<Row> df, List<String> restrictToColumns) {
        return this.profile(df, restrictToColumns, true, true);
    }

    public String profile(Dataset<Row> df, boolean correlation, boolean histogram) {
        return this.profile(df, null, correlation, histogram);
    }

    public String profile(Dataset<Row> df) {
        return this.profile(df, null, true, true);
    }

    public void setupConnectorHadoopConf(StorageConnector storageConnector) {
        if (storageConnector == null) {
            return;
        }
        switch (storageConnector.getStorageConnectorType()) {
            case S3: {
                this.setupS3ConnectorHadoopConf(storageConnector);
                break;
            }
            case ADLS: {
                this.setupAdlsConnectorHadoopConf(storageConnector);
                break;
            }
        }
    }

    public static String sparkPath(String path) {
        if (path.startsWith("s3://")) {
            return path.replaceFirst("s3://", "s3a://");
        }
        return path;
    }

    private void setupS3ConnectorHadoopConf(StorageConnector storageConnector) {
        if (!Strings.isNullOrEmpty((String)storageConnector.getAccessKey())) {
            this.sparkSession.sparkContext().hadoopConfiguration().set("fs.s3a.access.key", storageConnector.getAccessKey());
        }
        if (!Strings.isNullOrEmpty((String)storageConnector.getSecretKey())) {
            this.sparkSession.sparkContext().hadoopConfiguration().set("fs.s3a.secret.key", storageConnector.getSecretKey());
        }
        if (!Strings.isNullOrEmpty((String)storageConnector.getServerEncryptionAlgorithm())) {
            this.sparkSession.sparkContext().hadoopConfiguration().set("fs.s3a.server-side-encryption-algorithm", storageConnector.getServerEncryptionAlgorithm());
        }
        if (!Strings.isNullOrEmpty((String)storageConnector.getServerEncryptionKey())) {
            this.sparkSession.sparkContext().hadoopConfiguration().set("fs.s3a.server-side-encryption.key", storageConnector.getServerEncryptionKey());
        }
        if (!Strings.isNullOrEmpty((String)storageConnector.getSessionToken())) {
            this.sparkSession.sparkContext().hadoopConfiguration().set("fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider");
            this.sparkSession.sparkContext().hadoopConfiguration().set("fs.s3a.session.token", storageConnector.getSessionToken());
        }
    }

    private void setupAdlsConnectorHadoopConf(StorageConnector storageConnector) {
        for (Option confOption : storageConnector.getSparkOptions()) {
            this.sparkSession.sparkContext().hadoopConfiguration().set(confOption.getName(), confOption.getValue());
        }
    }

    public SparkSession getSparkSession() {
        return this.sparkSession;
    }
}

