package io.hops.util.featurestore.ops.write_ops;

import io.hops.util.Constants;
import io.hops.util.FeaturestoreRestClient;
import io.hops.util.Hops;
import io.hops.util.exceptions.CannotWriteImageDataFrameException;
import io.hops.util.exceptions.DataframeIsEmpty;
import io.hops.util.exceptions.FeaturegroupDoesNotExistError;
import io.hops.util.exceptions.FeaturestoreNotFound;
import io.hops.util.exceptions.HiveNotEnabled;
import io.hops.util.exceptions.JWTNotFoundException;
import io.hops.util.exceptions.OnlineFeaturestoreNotEnabled;
import io.hops.util.exceptions.OnlineFeaturestorePasswordNotFound;
import io.hops.util.exceptions.OnlineFeaturestoreUserNotFound;
import io.hops.util.exceptions.SparkDataTypeNotRecognizedError;
import io.hops.util.exceptions.StorageConnectorDoesNotExistError;
import io.hops.util.exceptions.TrainingDatasetCreationError;
import io.hops.util.exceptions.TrainingDatasetFormatNotSupportedError;
import io.hops.util.featurestore.FeaturestoreHelper;
import io.hops.util.featurestore.dtos.app.FeaturestoreMetadataDTO;
import io.hops.util.featurestore.dtos.feature.FeatureDTO;
import io.hops.util.featurestore.dtos.jobs.FeaturestoreJobDTO;
import io.hops.util.featurestore.dtos.stats.StatisticsDTO;
import io.hops.util.featurestore.dtos.storageconnector.FeaturestoreHopsfsConnectorDTO;
import io.hops.util.featurestore.dtos.storageconnector.FeaturestoreS3ConnectorDTO;
import io.hops.util.featurestore.dtos.storageconnector.FeaturestoreStorageConnectorType;
import io.hops.util.featurestore.dtos.trainingdataset.ExternalTrainingDatasetDTO;
import io.hops.util.featurestore.dtos.trainingdataset.HopsfsTrainingDatasetDTO;
import io.hops.util.featurestore.dtos.trainingdataset.TrainingDatasetType;
import io.hops.util.featurestore.ops.FeaturestoreOp;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
import javax.xml.bind.JAXBException;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.json.JSONObject;

/* loaded from: input_file:io/hops/util/featurestore/ops/write_ops/FeaturestoreCreateTrainingDataset.class */
public class FeaturestoreCreateTrainingDataset extends FeaturestoreOp {
    private static final Logger LOG = Logger.getLogger(FeaturestoreCreateTrainingDataset.class.getName());

    public FeaturestoreCreateTrainingDataset(String str) {
        super(str);
    }

    @Override // io.hops.util.featurestore.ops.FeaturestoreOp
    public Object read() {
        throw new UnsupportedOperationException("read() is not supported on a write operation");
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v16, types: [io.hops.util.featurestore.dtos.storageconnector.FeaturestoreStorageConnectorDTO] */
    /* JADX WARN: Type inference failed for: r0v28, types: [io.hops.util.featurestore.dtos.storageconnector.FeaturestoreStorageConnectorDTO] */
    @Override // io.hops.util.featurestore.ops.FeaturestoreOp
    public void write() throws DataframeIsEmpty, SparkDataTypeNotRecognizedError, JAXBException, FeaturestoreNotFound, TrainingDatasetCreationError, TrainingDatasetFormatNotSupportedError, CannotWriteImageDataFrameException, JWTNotFoundException, HiveNotEnabled, StorageConnectorDoesNotExistError, OnlineFeaturestoreUserNotFound, OnlineFeaturestorePasswordNotFound, OnlineFeaturestoreNotEnabled, FeaturegroupDoesNotExistError {
        if (this.dataframe == null) {
            throw new IllegalArgumentException("Dataframe to create featuregroup from cannot be null, specify dataframe with .setDataframe(df)");
        }
        FeaturestoreMetadataDTO featurestoreMetadataCache = FeaturestoreHelper.getFeaturestoreMetadataCache();
        List<FeatureDTO> parseSparkFeaturesSchema = FeaturestoreHelper.parseSparkFeaturesSchema(this.dataframe.schema(), null, null, false, null);
        FeaturestoreHelper.validateMetadata(this.name, parseSparkFeaturesSchema, this.description);
        StatisticsDTO computeDataFrameStats = FeaturestoreHelper.computeDataFrameStats(this.name, getSpark(), this.dataframe, this.featurestore, this.version, this.descriptiveStats, this.featureCorr, this.featureHistograms, this.clusterAnalysis, this.statColumns, this.numBins, this.numClusters, this.corrMethod);
        FeaturestoreS3ConnectorDTO findStorageConnector = (this.storageConnector == null || this.storageConnector.isEmpty()) ? FeaturestoreHelper.findStorageConnector(featurestoreMetadataCache.getStorageConnectors(), FeaturestoreHelper.getProjectTrainingDatasetsSink()) : FeaturestoreHelper.findStorageConnector(featurestoreMetadataCache.getStorageConnectors(), this.storageConnector);
        if (findStorageConnector.getStorageConnectorType() == FeaturestoreStorageConnectorType.S3) {
            doCreateExternalTrainingDataset(featurestoreMetadataCache, computeDataFrameStats, parseSparkFeaturesSchema, findStorageConnector);
        } else {
            doCreateHopsfsTrainingDataset(featurestoreMetadataCache, computeDataFrameStats, parseSparkFeaturesSchema, findStorageConnector);
        }
        Hops.updateFeaturestoreMetadataCache().setFeaturestore(this.featurestore).write();
    }

    private HopsfsTrainingDatasetDTO groupInputParamsIntoHopsfsDTO(StatisticsDTO statisticsDTO, List<FeatureDTO> list, Integer num) {
        if (FeaturestoreHelper.jobNameGetOrDefault(null) != null) {
            this.jobs.add(FeaturestoreHelper.jobNameGetOrDefault(null));
        }
        List<FeaturestoreJobDTO> list2 = (List) this.jobs.stream().map(str -> {
            FeaturestoreJobDTO featurestoreJobDTO = new FeaturestoreJobDTO();
            featurestoreJobDTO.setJobName(str);
            return featurestoreJobDTO;
        }).collect(Collectors.toList());
        HopsfsTrainingDatasetDTO hopsfsTrainingDatasetDTO = new HopsfsTrainingDatasetDTO();
        hopsfsTrainingDatasetDTO.setFeaturestoreName(this.featurestore);
        hopsfsTrainingDatasetDTO.setName(this.name);
        hopsfsTrainingDatasetDTO.setVersion(Integer.valueOf(this.version));
        hopsfsTrainingDatasetDTO.setDescription(this.description);
        hopsfsTrainingDatasetDTO.setJobs(list2);
        hopsfsTrainingDatasetDTO.setDataFormat(this.dataFormat);
        hopsfsTrainingDatasetDTO.setFeatures(list);
        hopsfsTrainingDatasetDTO.setDescriptiveStatistics(statisticsDTO.getDescriptiveStatsDTO());
        hopsfsTrainingDatasetDTO.setFeatureCorrelationMatrix(statisticsDTO.getFeatureCorrelationMatrixDTO());
        hopsfsTrainingDatasetDTO.setFeaturesHistogram(statisticsDTO.getFeatureDistributionsDTO());
        hopsfsTrainingDatasetDTO.setClusterAnalysis(statisticsDTO.getClusterAnalysisDTO());
        hopsfsTrainingDatasetDTO.setHopsfsConnectorId(num);
        hopsfsTrainingDatasetDTO.setTrainingDatasetType(TrainingDatasetType.HOPSFS_TRAINING_DATASET);
        return hopsfsTrainingDatasetDTO;
    }

    private ExternalTrainingDatasetDTO groupInputParamsIntoExternalDTO(StatisticsDTO statisticsDTO, List<FeatureDTO> list, Integer num) {
        if (FeaturestoreHelper.jobNameGetOrDefault(null) != null) {
            this.jobs.add(FeaturestoreHelper.jobNameGetOrDefault(null));
        }
        List<FeaturestoreJobDTO> list2 = (List) this.jobs.stream().map(str -> {
            FeaturestoreJobDTO featurestoreJobDTO = new FeaturestoreJobDTO();
            featurestoreJobDTO.setJobName(str);
            return featurestoreJobDTO;
        }).collect(Collectors.toList());
        ExternalTrainingDatasetDTO externalTrainingDatasetDTO = new ExternalTrainingDatasetDTO();
        externalTrainingDatasetDTO.setFeaturestoreName(this.featurestore);
        externalTrainingDatasetDTO.setName(this.name);
        externalTrainingDatasetDTO.setVersion(Integer.valueOf(this.version));
        externalTrainingDatasetDTO.setDescription(this.description);
        externalTrainingDatasetDTO.setJobs(list2);
        externalTrainingDatasetDTO.setDataFormat(this.dataFormat);
        externalTrainingDatasetDTO.setFeatures(list);
        externalTrainingDatasetDTO.setDescriptiveStatistics(statisticsDTO.getDescriptiveStatsDTO());
        externalTrainingDatasetDTO.setFeatureCorrelationMatrix(statisticsDTO.getFeatureCorrelationMatrixDTO());
        externalTrainingDatasetDTO.setFeaturesHistogram(statisticsDTO.getFeatureDistributionsDTO());
        externalTrainingDatasetDTO.setClusterAnalysis(statisticsDTO.getClusterAnalysisDTO());
        externalTrainingDatasetDTO.setS3ConnectorId(num);
        externalTrainingDatasetDTO.setTrainingDatasetType(TrainingDatasetType.EXTERNAL_TRAINING_DATASET);
        return externalTrainingDatasetDTO;
    }

    private void doCreateHopsfsTrainingDataset(FeaturestoreMetadataDTO featurestoreMetadataDTO, StatisticsDTO statisticsDTO, List<FeatureDTO> list, FeaturestoreHopsfsConnectorDTO featurestoreHopsfsConnectorDTO) throws JAXBException, FeaturestoreNotFound, TrainingDatasetCreationError, JWTNotFoundException, HiveNotEnabled, TrainingDatasetFormatNotSupportedError, CannotWriteImageDataFrameException {
        HopsfsTrainingDatasetDTO hopsfsTrainingDatasetDTO = (HopsfsTrainingDatasetDTO) FeaturestoreHelper.parseTrainingDatasetJson(new JSONObject((String) FeaturestoreRestClient.createTrainingDatasetRest(groupInputParamsIntoHopsfsDTO(statisticsDTO, list, featurestoreHopsfsConnectorDTO.getId()), featurestoreMetadataDTO.getSettings().getHopsfsTrainingDatasetDtoType()).readEntity(String.class)));
        FeaturestoreHelper.writeTrainingDataset(getSpark(), this.dataframe, hopsfsTrainingDatasetDTO.getHdfsStorePath() + "/" + this.name, this.dataFormat, Constants.SPARK_OVERWRITE_MODE);
        if (this.dataFormat.equals(Constants.TRAINING_DATASET_TFRECORDS_FORMAT)) {
            try {
                FeaturestoreHelper.writeTfRecordSchemaJson(hopsfsTrainingDatasetDTO.getHdfsStorePath() + "/" + Constants.TRAINING_DATASET_TF_RECORD_SCHEMA_FILE_NAME, FeaturestoreHelper.getDataframeTfRecordSchemaJson(this.dataframe).toString());
            } catch (Exception e) {
                LOG.log(Level.SEVERE, "Could not save tf record schema json to HDFS for training dataset: " + this.name, (Throwable) e);
            }
        }
    }

    private void doCreateExternalTrainingDataset(FeaturestoreMetadataDTO featurestoreMetadataDTO, StatisticsDTO statisticsDTO, List<FeatureDTO> list, FeaturestoreS3ConnectorDTO featurestoreS3ConnectorDTO) throws FeaturestoreNotFound, JWTNotFoundException, TrainingDatasetCreationError, JAXBException, HiveNotEnabled, TrainingDatasetFormatNotSupportedError, CannotWriteImageDataFrameException {
        String externalTrainingDatasetPath = FeaturestoreHelper.getExternalTrainingDatasetPath(this.name, this.version, featurestoreS3ConnectorDTO.getBucket(), this.externalPath);
        FeaturestoreHelper.setupS3CredentialsForSpark(featurestoreS3ConnectorDTO.getAccessKey(), featurestoreS3ConnectorDTO.getSecretKey(), getSpark());
        FeaturestoreRestClient.createTrainingDatasetRest(groupInputParamsIntoExternalDTO(statisticsDTO, list, featurestoreS3ConnectorDTO.getId()), featurestoreMetadataDTO.getSettings().getExternalTrainingDatasetDtoType());
        FeaturestoreHelper.writeTrainingDataset(getSpark(), this.dataframe, externalTrainingDatasetPath, this.dataFormat, Constants.SPARK_OVERWRITE_MODE);
    }

    public FeaturestoreCreateTrainingDataset setName(String str) {
        this.name = str;
        return this;
    }

    public FeaturestoreCreateTrainingDataset setFeaturestore(String str) {
        this.featurestore = str;
        return this;
    }

    public FeaturestoreCreateTrainingDataset setSpark(SparkSession sparkSession) {
        this.spark = sparkSession;
        return this;
    }

    public FeaturestoreCreateTrainingDataset setVersion(int i) {
        this.version = i;
        return this;
    }

    public FeaturestoreCreateTrainingDataset setCorrMethod(String str) {
        this.corrMethod = str;
        return this;
    }

    public FeaturestoreCreateTrainingDataset setNumBins(int i) {
        this.numBins = i;
        return this;
    }

    public FeaturestoreCreateTrainingDataset setNumClusters(int i) {
        this.numClusters = i;
        return this;
    }

    public FeaturestoreCreateTrainingDataset setMode(String str) {
        this.mode = str;
        return this;
    }

    public FeaturestoreCreateTrainingDataset setDataframe(Dataset<Row> dataset) {
        this.dataframe = dataset;
        return this;
    }

    public FeaturestoreCreateTrainingDataset setDescriptiveStats(Boolean bool) {
        this.descriptiveStats = bool;
        return this;
    }

    public FeaturestoreCreateTrainingDataset setFeatureCorr(Boolean bool) {
        this.featureCorr = bool;
        return this;
    }

    public FeaturestoreCreateTrainingDataset setFeatureHistograms(Boolean bool) {
        this.featureHistograms = bool;
        return this;
    }

    public FeaturestoreCreateTrainingDataset setClusterAnalysis(Boolean bool) {
        this.clusterAnalysis = bool;
        return this;
    }

    public FeaturestoreCreateTrainingDataset setStatColumns(List<String> list) {
        this.statColumns = list;
        return this;
    }

    public FeaturestoreCreateTrainingDataset setJobs(List<String> list) {
        this.jobs = list;
        return this;
    }

    public FeaturestoreCreateTrainingDataset setDescription(String str) {
        this.description = str;
        return this;
    }

    public FeaturestoreCreateTrainingDataset setDataFormat(String str) {
        this.dataFormat = str;
        return this;
    }

    public FeaturestoreCreateTrainingDataset setSink(String str) {
        this.storageConnector = str;
        return this;
    }

    public FeaturestoreCreateTrainingDataset setPath(String str) {
        this.externalPath = str;
        return this;
    }
}
