/*
 * Decompiled with CFR 0.152.
 */
package io.hops.hopsworks.common.featurestore.trainingdatasets;

import io.hops.hopsworks.common.featurestore.FeaturestoreConstants;
import io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO;
import io.hops.hopsworks.common.featurestore.query.Feature;
import io.hops.hopsworks.common.featurestore.query.Query;
import io.hops.hopsworks.common.featurestore.query.join.Join;
import io.hops.hopsworks.common.featurestore.statistics.columns.StatisticColumnController;
import io.hops.hopsworks.common.featurestore.storageconnectors.FeaturestoreConnectorFacade;
import io.hops.hopsworks.common.featurestore.storageconnectors.FeaturestoreStorageConnectorDTO;
import io.hops.hopsworks.common.featurestore.trainingdatasets.TrainingDatasetDTO;
import io.hops.hopsworks.common.featurestore.trainingdatasets.split.TrainingDatasetSplitDTO;
import io.hops.hopsworks.common.featurestore.utils.FeaturestoreInputValidation;
import io.hops.hopsworks.exceptions.FeaturestoreException;
import io.hops.hopsworks.persistence.entity.featurestore.storageconnector.FeaturestoreConnector;
import io.hops.hopsworks.persistence.entity.featurestore.storageconnector.FeaturestoreConnectorType;
import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetType;
import io.hops.hopsworks.restutils.RESTCodes;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.logging.Level;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import javax.ejb.EJB;
import javax.ejb.Stateless;
import javax.ejb.TransactionAttribute;
import javax.ejb.TransactionAttributeType;
import joptsimple.internal.Strings;
import org.apache.commons.lang3.StringUtils;

@Stateless
@TransactionAttribute(value=TransactionAttributeType.NEVER)
public class TrainingDatasetInputValidation {
    @EJB
    private FeaturestoreInputValidation featurestoreInputValidation;
    @EJB
    private StatisticColumnController statisticColumnController;
    @EJB
    private FeaturestoreConnectorFacade connectorFacade;

    public void verifyUserInput(TrainingDatasetDTO trainingDatasetDTO) throws FeaturestoreException {
        this.featurestoreInputValidation.verifyUserInput(trainingDatasetDTO);
        if (trainingDatasetDTO.getQueryDTO() == null && trainingDatasetDTO.getFeatures() != null) {
            this.verifyTrainingDatasetFeatureList(trainingDatasetDTO.getFeatures());
        }
    }

    private void verifyTrainingDatasetFeatureList(List<TrainingDatasetFeatureDTO> trainingDatasetFeatureDTOS) throws FeaturestoreException {
        for (TrainingDatasetFeatureDTO trainingDatasetFeatureDTO : trainingDatasetFeatureDTOS) {
            this.featurestoreInputValidation.nameValidation(trainingDatasetFeatureDTO.getName());
        }
    }

    public void validate(TrainingDatasetDTO trainingDatasetDTO, Query query) throws FeaturestoreException {
        this.verifyUserInput(trainingDatasetDTO);
        this.statisticColumnController.verifyStatisticColumnsExist(trainingDatasetDTO, query);
        this.validateType(trainingDatasetDTO.getTrainingDatasetType());
        this.validateVersion(trainingDatasetDTO.getVersion());
        this.validateDataFormat(trainingDatasetDTO.getDataFormat());
        this.validateSplits(trainingDatasetDTO.getSplits());
        this.validateFeatures(query, trainingDatasetDTO.getFeatures());
        this.validateStorageConnector(trainingDatasetDTO.getStorageConnector());
        this.validateTrainSplit(trainingDatasetDTO.getTrainSplit(), trainingDatasetDTO.getSplits());
    }

    private void validateType(TrainingDatasetType trainingDatasetType) throws FeaturestoreException {
        if (trainingDatasetType != TrainingDatasetType.HOPSFS_TRAINING_DATASET && trainingDatasetType != TrainingDatasetType.EXTERNAL_TRAINING_DATASET) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_TYPE, Level.FINE, ", Recognized Training Dataset types are: " + TrainingDatasetType.HOPSFS_TRAINING_DATASET + ", and: " + TrainingDatasetType.EXTERNAL_TRAINING_DATASET + ". The provided training dataset type was not recognized: " + trainingDatasetType);
        }
    }

    private void validateVersion(Integer version) throws FeaturestoreException {
        if (version == null) {
            throw new IllegalArgumentException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_VERSION_NOT_PROVIDED.getMessage());
        }
        if (version <= 0) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_VERSION, Level.FINE, " version cannot be negative or zero");
        }
    }

    private void validateDataFormat(String dataFormat) throws FeaturestoreException {
        if (!FeaturestoreConstants.TRAINING_DATASET_DATA_FORMATS.contains(dataFormat)) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_DATA_FORMAT, Level.FINE, ", the recognized training dataset formats are: " + StringUtils.join((Object[])new List[]{FeaturestoreConstants.TRAINING_DATASET_DATA_FORMATS}) + ". The provided data format:" + dataFormat + " was not recognized.");
        }
    }

    private void validateSplits(List<TrainingDatasetSplitDTO> trainingDatasetSplitDTOs) throws FeaturestoreException {
        if (trainingDatasetSplitDTOs != null && !trainingDatasetSplitDTOs.isEmpty()) {
            Pattern namePattern = FeaturestoreConstants.FEATURESTORE_REGEX;
            HashSet<String> splitNames = new HashSet<String>();
            for (TrainingDatasetSplitDTO trainingDatasetSplitDTO : trainingDatasetSplitDTOs) {
                if (!namePattern.matcher(trainingDatasetSplitDTO.getName()).matches()) {
                    throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_SPLIT_NAME, Level.FINE, ", the provided training dataset split name " + trainingDatasetSplitDTO.getName() + " is invalid. Split names can only contain lower case characters, numbers and underscores and cannot be longer than " + 63 + " characters or empty.");
                }
                if (trainingDatasetSplitDTO.getPercentage() == null) {
                    throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_SPLIT_PERCENTAGE, Level.FINE, ", the provided training dataset split percentage is invalid. Percentages can only be numeric. Weights will be normalized if they don\u2019t sum up to 1.0.");
                }
                if (splitNames.add(trainingDatasetSplitDTO.getName())) continue;
                throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_DUPLICATE_SPLIT_NAMES, Level.FINE, " The split names must be unique");
            }
        }
    }

    private void validateFeatures(Query query, List<TrainingDatasetFeatureDTO> featuresDTOs) throws FeaturestoreException {
        if (query == null || featuresDTOs == null) {
            return;
        }
        List labels = featuresDTOs.stream().filter(TrainingDatasetFeatureDTO::getLabel).collect(Collectors.toList());
        List featuresWithTransformation = featuresDTOs.stream().filter(f -> f.getTransformationFunction() != null).collect(Collectors.toList());
        List<Feature> features = this.collectFeatures(query);
        for (TrainingDatasetFeatureDTO label : labels) {
            if (!features.stream().noneMatch(f -> f.getName().equals(label.getName()))) continue;
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.LABEL_NOT_FOUND, Level.FINE, "Label: " + label.getName() + " is missing");
        }
        for (TrainingDatasetFeatureDTO featureWithTransformation : featuresWithTransformation) {
            if (!features.stream().noneMatch(f -> f.getName().equals(featureWithTransformation.getFeatureGroupFeatureName()))) continue;
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURE_WITH_TRANSFORMATION_NOT_FOUND, Level.FINE, "feature: " + featureWithTransformation.getName() + " is missing and transformation function can't be attached");
        }
        if (query != null && query.getJoins() != null) {
            for (Join join : query.getJoins()) {
                Pattern namePattern;
                if (join.getPrefix() == null || (namePattern = FeaturestoreConstants.FEATURESTORE_REGEX).matcher(join.getPrefix()).matches()) continue;
                throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_PREFIX_NAME, Level.FINE, ", the provided prefix name " + join.getPrefix() + " is invalid. Prefix names can only contain lower case characters, numbers and underscores and cannot be longer than " + 63 + " characters or empty.");
            }
        }
    }

    private List<Feature> collectFeatures(Query query) {
        ArrayList<Feature> features = new ArrayList<Feature>(query.getFeatures());
        if (query.getJoins() != null) {
            for (Join join : query.getJoins()) {
                features.addAll(this.collectFeatures(join.getRightQuery()));
            }
        }
        return features;
    }

    private void validateStorageConnector(FeaturestoreStorageConnectorDTO connectorDTO) throws FeaturestoreException {
        if (connectorDTO == null || connectorDTO.getId() == null) {
            return;
        }
        FeaturestoreConnector connector = this.connectorFacade.findById(connectorDTO.getId()).orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.CONNECTOR_NOT_FOUND, Level.FINE, "Connector ID: " + connectorDTO.getId()));
        if (connector.getConnectorType() != FeaturestoreConnectorType.HOPSFS && connector.getConnectorType() != FeaturestoreConnectorType.S3 && connector.getConnectorType() != FeaturestoreConnectorType.ADLS) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_STORAGE_CONNECTOR_TYPE, Level.FINE, "Only HopsFS, S3 and ADLS storage connectors can be used to create training datasets");
        }
    }

    void validateTrainSplit(String trainSplit, List<TrainingDatasetSplitDTO> splits) throws FeaturestoreException {
        if ((splits == null || splits.isEmpty()) && !Strings.isNullOrEmpty((String)trainSplit)) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_SPLIT_NAME, Level.FINE, "Training data split name provided without splitting the dataset.");
        }
        if (splits != null && !splits.isEmpty() && !splits.stream().map(TrainingDatasetSplitDTO::getName).collect(Collectors.toList()).contains(trainSplit)) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_SPLIT_NAME, Level.FINE, "The provided training data split name `" + trainSplit + "` could not be found among the provided splits.");
        }
    }
}

