/*
 * Decompiled with CFR 0.152.
 */
package io.hops.hopsworks.common.featurestore.trainingdatasets;

import com.logicalclocks.shaded.com.google.common.collect.Streams;
import io.hops.hopsworks.common.featurestore.FeaturestoreConstants;
import io.hops.hopsworks.common.featurestore.FeaturestoreController;
import io.hops.hopsworks.common.featurestore.FeaturestoreFacade;
import io.hops.hopsworks.common.featurestore.activity.FeaturestoreActivityFacade;
import io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO;
import io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO;
import io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupController;
import io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO;
import io.hops.hopsworks.common.featurestore.featuregroup.online.OnlineFeaturegroupController;
import io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController;
import io.hops.hopsworks.common.featurestore.query.ConstructorController;
import io.hops.hopsworks.common.featurestore.query.Feature;
import io.hops.hopsworks.common.featurestore.query.Join;
import io.hops.hopsworks.common.featurestore.query.PreparedStatementParameterDTO;
import io.hops.hopsworks.common.featurestore.query.Query;
import io.hops.hopsworks.common.featurestore.query.QueryDTO;
import io.hops.hopsworks.common.featurestore.query.ServingPreparedStatementDTO;
import io.hops.hopsworks.common.featurestore.query.filter.Filter;
import io.hops.hopsworks.common.featurestore.query.filter.FilterController;
import io.hops.hopsworks.common.featurestore.query.filter.FilterLogic;
import io.hops.hopsworks.common.featurestore.query.filter.SqlFilterCondition;
import io.hops.hopsworks.common.featurestore.statistics.StatisticsController;
import io.hops.hopsworks.common.featurestore.statistics.columns.StatisticColumnController;
import io.hops.hopsworks.common.featurestore.storageconnectors.FeaturestoreConnectorFacade;
import io.hops.hopsworks.common.featurestore.trainingdatasets.TrainingDatasetDTO;
import io.hops.hopsworks.common.featurestore.trainingdatasets.TrainingDatasetFacade;
import io.hops.hopsworks.common.featurestore.trainingdatasets.external.ExternalTrainingDatasetController;
import io.hops.hopsworks.common.featurestore.trainingdatasets.external.ExternalTrainingDatasetFacade;
import io.hops.hopsworks.common.featurestore.trainingdatasets.hopsfs.HopsfsTrainingDatasetController;
import io.hops.hopsworks.common.featurestore.trainingdatasets.hopsfs.HopsfsTrainingDatasetFacade;
import io.hops.hopsworks.common.featurestore.trainingdatasets.split.TrainingDatasetSplitDTO;
import io.hops.hopsworks.common.featurestore.utils.FeaturestoreInputValidation;
import io.hops.hopsworks.common.featurestore.utils.FeaturestoreUtils;
import io.hops.hopsworks.common.hdfs.DistributedFileSystemOps;
import io.hops.hopsworks.common.hdfs.DistributedFsService;
import io.hops.hopsworks.common.hdfs.HdfsUsersController;
import io.hops.hopsworks.common.hdfs.Utils;
import io.hops.hopsworks.common.hdfs.inode.InodeController;
import io.hops.hopsworks.common.provenance.core.HopsFSProvenanceController;
import io.hops.hopsworks.common.util.Settings;
import io.hops.hopsworks.exceptions.FeaturestoreException;
import io.hops.hopsworks.exceptions.ProvenanceException;
import io.hops.hopsworks.exceptions.ServiceException;
import io.hops.hopsworks.persistence.entity.dataset.Dataset;
import io.hops.hopsworks.persistence.entity.featurestore.Featurestore;
import io.hops.hopsworks.persistence.entity.featurestore.activity.FeaturestoreActivityMeta;
import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup;
import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.FeaturegroupType;
import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.TimeTravelFormat;
import io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticColumn;
import io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticsConfig;
import io.hops.hopsworks.persistence.entity.featurestore.storageconnector.FeaturestoreConnector;
import io.hops.hopsworks.persistence.entity.featurestore.storageconnector.FeaturestoreConnectorType;
import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset;
import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature;
import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin;
import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoinCondition;
import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetType;
import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.external.ExternalTrainingDataset;
import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset;
import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.split.TrainingDatasetSplit;
import io.hops.hopsworks.persistence.entity.hdfs.inode.Inode;
import io.hops.hopsworks.persistence.entity.project.Project;
import io.hops.hopsworks.persistence.entity.user.Users;
import io.hops.hopsworks.restutils.RESTCodes;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.logging.Level;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import javax.ejb.EJB;
import javax.ejb.Stateless;
import javax.ejb.TransactionAttribute;
import javax.ejb.TransactionAttributeType;
import org.apache.calcite.sql.JoinType;
import org.apache.commons.lang3.StringUtils;

@Stateless
@TransactionAttribute(value=TransactionAttributeType.NEVER)
public class TrainingDatasetController {
    @EJB
    private TrainingDatasetFacade trainingDatasetFacade;
    @EJB
    private FeaturestoreFacade featurestoreFacade;
    @EJB
    private HopsfsTrainingDatasetController hopsfsTrainingDatasetController;
    @EJB
    private HopsfsTrainingDatasetFacade hopsfsTrainingDatasetFacade;
    @EJB
    private ExternalTrainingDatasetController externalTrainingDatasetController;
    @EJB
    private ExternalTrainingDatasetFacade externalTrainingDatasetFacade;
    @EJB
    private FeaturestoreInputValidation featurestoreInputValidation;
    @EJB
    private InodeController inodeController;
    @EJB
    private HopsFSProvenanceController fsProvenanceController;
    @EJB
    private DistributedFsService dfs;
    @EJB
    private HdfsUsersController hdfsUsersBean;
    @EJB
    private FeaturestoreUtils featurestoreUtils;
    @EJB
    private StatisticsController statisticsController;
    @EJB
    private ConstructorController constructorController;
    @EJB
    private OnlineFeaturestoreController onlineFeaturestoreController;
    @EJB
    private FeaturegroupController featuregroupController;
    @EJB
    private FeaturestoreConnectorFacade featurestoreConnectorFacade;
    @EJB
    private FeaturestoreActivityFacade fsActivityFacade;
    @EJB
    private StatisticColumnController statisticColumnController;
    @EJB
    private FilterController filterController;
    @EJB
    private FeaturestoreController featurestoreController;
    @EJB
    private OnlineFeaturegroupController onlineFeaturegroupController;
    private static final String PREPARED_STATEMENT_TYPE = "parameter";

    public List<TrainingDatasetDTO> getTrainingDatasetsForFeaturestore(Users user, Project project, Featurestore featurestore) throws ServiceException, FeaturestoreException {
        ArrayList<TrainingDatasetDTO> trainingDatasets = new ArrayList<TrainingDatasetDTO>();
        for (TrainingDataset td : this.trainingDatasetFacade.findByFeaturestore(featurestore)) {
            trainingDatasets.add(this.convertTrainingDatasetToDTO(user, project, td));
        }
        return trainingDatasets;
    }

    private TrainingDatasetDTO convertTrainingDatasetToDTO(Users user, Project project, TrainingDataset trainingDataset) throws ServiceException, FeaturestoreException {
        TrainingDatasetDTO trainingDatasetDTO = new TrainingDatasetDTO(trainingDataset);
        String featurestoreName = this.featurestoreFacade.getHiveDbName(trainingDataset.getFeaturestore().getHiveDbId());
        trainingDatasetDTO.setFeaturestoreName(featurestoreName);
        List<TrainingDatasetFeature> tdFeatures = this.getFeaturesSorted(trainingDataset, true);
        Map<Integer, String> fsLookupTable = this.getFsLookupTableFeatures(tdFeatures);
        trainingDatasetDTO.setFeatures(tdFeatures.stream().map(f -> new TrainingDatasetFeatureDTO(f.getName(), f.getType(), f.getFeatureGroup() != null ? new FeaturegroupDTO(f.getFeatureGroup().getFeaturestore().getId(), (String)fsLookupTable.get(f.getFeatureGroup().getFeaturestore().getId()), f.getFeatureGroup().getId(), f.getFeatureGroup().getName(), f.getFeatureGroup().getVersion(), this.onlineFeaturegroupController.onlineFeatureGroupTopicName(project.getId(), Utils.getFeaturegroupName(f.getFeatureGroup()))) : null, f.getIndex(), f.isLabel())).collect(Collectors.toList()));
        switch (trainingDataset.getTrainingDatasetType()) {
            case HOPSFS_TRAINING_DATASET: {
                return this.hopsfsTrainingDatasetController.convertHopsfsTrainingDatasetToDTO(trainingDatasetDTO, trainingDataset);
            }
            case EXTERNAL_TRAINING_DATASET: {
                return this.externalTrainingDatasetController.convertExternalTrainingDatasetToDTO(user, project, trainingDatasetDTO, trainingDataset);
            }
        }
        throw new IllegalArgumentException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_TYPE.getMessage() + ", Recognized training dataset types are: " + TrainingDatasetType.HOPSFS_TRAINING_DATASET + ", and: " + TrainingDatasetType.EXTERNAL_TRAINING_DATASET + ". The provided training dataset type was not recognized: " + trainingDataset.getTrainingDatasetType());
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public TrainingDatasetDTO createTrainingDataset(Users user, Project project, Featurestore featurestore, TrainingDatasetDTO trainingDatasetDTO) throws FeaturestoreException, ProvenanceException, IOException, ServiceException {
        if (trainingDatasetDTO.getVersion() == null) {
            List<TrainingDataset> tdPrevious = this.trainingDatasetFacade.findByNameAndFeaturestoreOrderedDescVersion(trainingDatasetDTO.getName(), featurestore);
            if (tdPrevious != null && !tdPrevious.isEmpty()) {
                trainingDatasetDTO.setVersion(tdPrevious.get(0).getVersion() + 1);
            } else {
                trainingDatasetDTO.setVersion(1);
            }
        }
        if (this.trainingDatasetFacade.findByNameVersionAndFeaturestore(trainingDatasetDTO.getName(), trainingDatasetDTO.getVersion(), featurestore).isPresent()) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_ALREADY_EXISTS, Level.FINE, "Training Dataset: " + trainingDatasetDTO.getName() + ", version: " + trainingDatasetDTO.getVersion());
        }
        Query query = null;
        if (trainingDatasetDTO.getQueryDTO() != null) {
            query = this.constructQuery(trainingDatasetDTO.getQueryDTO(), project, user);
        } else if (trainingDatasetDTO.getFeatures() == null) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NO_SCHEMA, Level.FINE, "The training dataset doesn't have any feature");
        }
        this.verifyTrainingDatasetInput(trainingDatasetDTO, query);
        Inode inode = null;
        if (trainingDatasetDTO.getTrainingDatasetType() == TrainingDatasetType.HOPSFS_TRAINING_DATASET) {
            FeaturestoreConnector featurestoreConnector;
            if (trainingDatasetDTO.getStorageConnector() != null && trainingDatasetDTO.getStorageConnector().getId() != null) {
                featurestoreConnector = this.featurestoreConnectorFacade.findByIdType(trainingDatasetDTO.getStorageConnector().getId(), FeaturestoreConnectorType.HOPSFS).orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.HOPSFS_CONNECTOR_NOT_FOUND, Level.FINE, "HOPSFS Connector: " + trainingDatasetDTO.getStorageConnector().getId()));
            } else {
                String connectorName = featurestore.getProject().getName() + "_" + Settings.ServiceDataset.TRAININGDATASETS.getName();
                featurestoreConnector = this.featurestoreConnectorFacade.findByFeaturestoreName(featurestore, connectorName).orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.HOPSFS_CONNECTOR_NOT_FOUND, Level.FINE, "HOPSFS Connector: " + connectorName));
            }
            Dataset trainingDatasetsFolder = featurestoreConnector.getHopsfsConnector().getHopsfsDataset();
            String trainingDatasetPath = this.getTrainingDatasetPath(this.inodeController.getPath(trainingDatasetsFolder.getInode()), trainingDatasetDTO.getName(), trainingDatasetDTO.getVersion());
            DistributedFileSystemOps udfso = null;
            String username = this.hdfsUsersBean.getHdfsUserName(project, user);
            try {
                udfso = this.dfs.getDfsOps(username);
                udfso.mkdir(trainingDatasetPath);
                inode = this.inodeController.getInodeAtPath(trainingDatasetPath);
                TrainingDatasetDTO completeTrainingDatasetDTO = this.createTrainingDatasetMetadata(user, project, featurestore, trainingDatasetDTO, query, featurestoreConnector, inode);
                this.fsProvenanceController.trainingDatasetAttachXAttr(trainingDatasetPath, completeTrainingDatasetDTO, udfso);
                TrainingDatasetDTO trainingDatasetDTO2 = completeTrainingDatasetDTO;
                return trainingDatasetDTO2;
            }
            finally {
                if (udfso != null) {
                    this.dfs.closeDfsClient(udfso);
                }
            }
        }
        if (trainingDatasetDTO.getStorageConnector() == null) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.CONNECTOR_NOT_FOUND, Level.FINE, "Storage connector is empty");
        }
        FeaturestoreConnector featurestoreConnector = this.featurestoreConnectorFacade.findById(trainingDatasetDTO.getStorageConnector().getId()).orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.CONNECTOR_NOT_FOUND, Level.FINE, "Connector: " + trainingDatasetDTO.getStorageConnector().getId()));
        return this.createTrainingDatasetMetadata(user, project, featurestore, trainingDatasetDTO, query, featurestoreConnector, null);
    }

    @TransactionAttribute(value=TransactionAttributeType.REQUIRED)
    private TrainingDatasetDTO createTrainingDatasetMetadata(Users user, Project project, Featurestore featurestore, TrainingDatasetDTO trainingDatasetDTO, Query query, FeaturestoreConnector featurestoreConnector, Inode inode) throws FeaturestoreException, ServiceException {
        HopsfsTrainingDataset hopsfsTrainingDataset = null;
        ExternalTrainingDataset externalTrainingDataset = null;
        switch (trainingDatasetDTO.getTrainingDatasetType()) {
            case HOPSFS_TRAINING_DATASET: {
                hopsfsTrainingDataset = this.hopsfsTrainingDatasetFacade.createHopsfsTrainingDataset(featurestoreConnector, inode);
                break;
            }
            case EXTERNAL_TRAINING_DATASET: {
                externalTrainingDataset = this.externalTrainingDatasetFacade.createExternalTrainingDataset(featurestoreConnector, trainingDatasetDTO.getLocation());
                break;
            }
            default: {
                throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_TYPE, Level.FINE, ", Recognized training dataset types are: " + TrainingDatasetType.HOPSFS_TRAINING_DATASET + ", and: " + TrainingDatasetType.EXTERNAL_TRAINING_DATASET + ". The provided training dataset type was not recognized: " + trainingDatasetDTO.getTrainingDatasetType());
            }
        }
        TrainingDataset trainingDataset = new TrainingDataset();
        trainingDataset.setName(trainingDatasetDTO.getName());
        trainingDataset.setHopsfsTrainingDataset(hopsfsTrainingDataset);
        trainingDataset.setExternalTrainingDataset(externalTrainingDataset);
        trainingDataset.setDataFormat(trainingDatasetDTO.getDataFormat());
        trainingDataset.setDescription(trainingDatasetDTO.getDescription());
        trainingDataset.setFeaturestore(featurestore);
        trainingDataset.setCreated(new Date());
        trainingDataset.setCreator(user);
        trainingDataset.setVersion(trainingDatasetDTO.getVersion());
        trainingDataset.setTrainingDatasetType(trainingDatasetDTO.getTrainingDatasetType());
        trainingDataset.setSeed(trainingDatasetDTO.getSeed());
        trainingDataset.setSplits((Collection)trainingDatasetDTO.getSplits().stream().map(tdDTO -> new TrainingDatasetSplit(trainingDataset, tdDTO.getName(), tdDTO.getPercentage())).collect(Collectors.toList()));
        trainingDataset.setCoalesce(Boolean.valueOf(trainingDatasetDTO.getCoalesce() != null ? trainingDatasetDTO.getCoalesce() : false));
        StatisticsConfig statisticsConfig = new StatisticsConfig(trainingDatasetDTO.getStatisticsConfig().getEnabled().booleanValue(), trainingDatasetDTO.getStatisticsConfig().getCorrelations().booleanValue(), trainingDatasetDTO.getStatisticsConfig().getHistograms().booleanValue());
        statisticsConfig.setTrainingDataset(trainingDataset);
        statisticsConfig.setStatisticColumns((Collection)trainingDatasetDTO.getStatisticsConfig().getColumns().stream().map(sc -> new StatisticColumn(statisticsConfig, sc)).collect(Collectors.toList()));
        trainingDataset.setStatisticsConfig(statisticsConfig);
        trainingDataset.setQuery(trainingDatasetDTO.getQueryDTO() != null);
        if (trainingDataset.isQuery()) {
            this.setTrainingDatasetQuery(query, trainingDatasetDTO.getFeatures(), trainingDataset);
        } else {
            trainingDataset.setFeatures(this.getTrainingDatasetFeatures(trainingDatasetDTO.getFeatures(), trainingDataset));
        }
        TrainingDataset dbTrainingDataset = this.trainingDatasetFacade.update(trainingDataset);
        this.fsActivityFacade.logMetadataActivity(user, dbTrainingDataset, FeaturestoreActivityMeta.TD_CREATED);
        return this.convertTrainingDatasetToDTO(user, project, dbTrainingDataset);
    }

    private Query constructQuery(QueryDTO queryDTO, Project project, Users user) throws FeaturestoreException {
        HashMap<Integer, String> fgAliasLookup = new HashMap<Integer, String>();
        HashMap<Integer, Featuregroup> fgLookup = new HashMap<Integer, Featuregroup>();
        HashMap<Integer, List<Feature>> availableFeatureLookup = new HashMap<Integer, List<Feature>>();
        this.constructorController.populateFgLookupTables(queryDTO, 0, fgAliasLookup, fgLookup, availableFeatureLookup, project, user);
        return this.constructorController.convertQueryDTO(queryDTO, fgAliasLookup, fgLookup, availableFeatureLookup);
    }

    private void setTrainingDatasetQuery(Query query, List<TrainingDatasetFeatureDTO> features, TrainingDataset trainingDataset) {
        List<TrainingDatasetJoin> tdJoins = this.collectJoins(query, trainingDataset);
        trainingDataset.setJoins(tdJoins);
        trainingDataset.setFeatures(this.collectFeatures(query, features, trainingDataset, 0, tdJoins, 0));
    }

    private List<TrainingDatasetFeature> collectFeatures(Query query, List<TrainingDatasetFeatureDTO> featureDTOs, TrainingDataset trainingDataset, int featureIndex, List<TrainingDatasetJoin> tdJoins, int joinIndex) {
        ArrayList<TrainingDatasetFeature> features = new ArrayList<TrainingDatasetFeature>();
        boolean isLabel = false;
        for (Feature f : query.getFeatures()) {
            if (featureDTOs != null && !featureDTOs.isEmpty()) {
                isLabel = featureDTOs.stream().anyMatch(dto -> f.getName().equals(dto.getName()) && dto.getLabel() != false);
            }
            features.add(new TrainingDatasetFeature(trainingDataset, tdJoins.get(joinIndex), query.getFeaturegroup(), f.getName(), f.getType(), Integer.valueOf(featureIndex++), isLabel));
        }
        if (query.getJoins() != null) {
            for (Join join : query.getJoins()) {
                List<TrainingDatasetFeature> joinFeatures = this.collectFeatures(join.getRightQuery(), featureDTOs, trainingDataset, featureIndex, tdJoins, ++joinIndex);
                features.addAll(joinFeatures);
                featureIndex += joinFeatures.size();
            }
        }
        return features;
    }

    private List<TrainingDatasetJoin> collectJoins(Query query, TrainingDataset trainingDataset) {
        ArrayList<TrainingDatasetJoin> joins = new ArrayList<TrainingDatasetJoin>();
        int index = 0;
        if (query.getFeaturegroup().getFeaturegroupType() == FeaturegroupType.CACHED_FEATURE_GROUP && query.getFeaturegroup().getCachedFeaturegroup().getTimeTravelFormat() == TimeTravelFormat.HUDI) {
            joins.add(new TrainingDatasetJoin(trainingDataset, query.getFeaturegroup(), query.getLeftFeatureGroupEndCommitId(), 0, index++));
        } else {
            joins.add(new TrainingDatasetJoin(trainingDataset, query.getFeaturegroup(), 0, index++));
        }
        if (query.getJoins() != null && !query.getJoins().isEmpty()) {
            for (Join join : query.getJoins()) {
                TrainingDatasetJoin tdJoin = query.getFeaturegroup().getFeaturegroupType() == FeaturegroupType.CACHED_FEATURE_GROUP && query.getFeaturegroup().getCachedFeaturegroup().getTimeTravelFormat() == TimeTravelFormat.HUDI ? new TrainingDatasetJoin(trainingDataset, join.getRightQuery().getFeaturegroup(), join.getRightQuery().getLeftFeatureGroupEndCommitId(), (short)join.getJoinType().ordinal(), index++) : new TrainingDatasetJoin(trainingDataset, join.getRightQuery().getFeaturegroup(), (short)join.getJoinType().ordinal(), index++);
                tdJoin.setConditions(this.collectJoinConditions(join, tdJoin));
                joins.add(tdJoin);
            }
        }
        return joins;
    }

    private List<TrainingDatasetJoinCondition> collectJoinConditions(Join join, TrainingDatasetJoin tdJoin) {
        if (join.getOn() != null) {
            return join.getOn().stream().map(f -> new TrainingDatasetJoinCondition(tdJoin, f.getName(), f.getName())).collect(Collectors.toList());
        }
        return Streams.zip(join.getLeftOn().stream(), join.getRightOn().stream(), (left, right) -> new TrainingDatasetJoinCondition(tdJoin, left.getName(), right.getName())).collect(Collectors.toList());
    }

    private List<TrainingDatasetFeature> getTrainingDatasetFeatures(List<TrainingDatasetFeatureDTO> featureList, TrainingDataset trainingDataset) {
        ArrayList<TrainingDatasetFeature> trainingDatasetFeatureList = new ArrayList<TrainingDatasetFeature>();
        int index = 0;
        for (TrainingDatasetFeatureDTO f : featureList) {
            trainingDatasetFeatureList.add(new TrainingDatasetFeature(trainingDataset, f.getName(), f.getType(), Integer.valueOf(index++), f.getLabel().booleanValue()));
        }
        return trainingDatasetFeatureList;
    }

    public TrainingDatasetDTO getTrainingDatasetWithIdAndFeaturestore(Users user, Project project, Featurestore featurestore, Integer id) throws FeaturestoreException, ServiceException {
        TrainingDataset trainingDataset = this.getTrainingDatasetById(featurestore, id);
        return this.convertTrainingDatasetToDTO(user, project, trainingDataset);
    }

    public TrainingDataset getTrainingDatasetById(Featurestore featurestore, Integer id) throws FeaturestoreException {
        return this.trainingDatasetFacade.findByIdAndFeaturestore(id, featurestore).orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NOT_FOUND, Level.FINE, "trainingDatasetId: " + id));
    }

    public List<TrainingDatasetDTO> getWithNameAndFeaturestore(Users user, Project project, Featurestore featurestore, String name) throws FeaturestoreException, ServiceException {
        List<TrainingDataset> trainingDatasetList = this.trainingDatasetFacade.findByNameAndFeaturestore(name, featurestore);
        if (trainingDatasetList == null || trainingDatasetList.isEmpty()) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NOT_FOUND, Level.FINE, "training dataset name : " + name);
        }
        ArrayList<TrainingDatasetDTO> trainingDatasetDTOS = new ArrayList<TrainingDatasetDTO>();
        for (TrainingDataset td : trainingDatasetList) {
            trainingDatasetDTOS.add(this.convertTrainingDatasetToDTO(user, project, td));
        }
        return trainingDatasetDTOS;
    }

    public TrainingDatasetDTO getWithNameVersionAndFeaturestore(Users user, Project project, Featurestore featurestore, String name, Integer version) throws FeaturestoreException, ServiceException {
        Optional<TrainingDataset> trainingDataset = this.trainingDatasetFacade.findByNameVersionAndFeaturestore(name, version, featurestore);
        return this.convertTrainingDatasetToDTO(user, project, trainingDataset.orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NOT_FOUND, Level.FINE, "training dataset name : " + name)));
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public String delete(Users user, Project project, Featurestore featurestore, Integer trainingDatasetId) throws FeaturestoreException {
        TrainingDataset trainingDataset = this.trainingDatasetFacade.findByIdAndFeaturestore(trainingDatasetId, featurestore).orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NOT_FOUND, Level.FINE, "training dataset id:" + trainingDatasetId));
        this.featurestoreUtils.verifyUserRole(trainingDataset, featurestore, user, project);
        this.statisticsController.deleteStatistics(project, user, trainingDataset);
        this.trainingDatasetFacade.removeTrainingDataset(trainingDataset);
        if (trainingDataset.getTrainingDatasetType() == TrainingDatasetType.HOPSFS_TRAINING_DATASET) {
            String dsPath = this.inodeController.getPath(trainingDataset.getHopsfsTrainingDataset().getInode());
            String username = this.hdfsUsersBean.getHdfsUserName(project, user);
            DistributedFileSystemOps udfso = this.dfs.getDfsOps(username);
            try {
                udfso.rm(dsPath, true);
            }
            catch (IOException iOException) {
            }
            finally {
                if (udfso != null) {
                    this.dfs.closeDfsClient(udfso);
                }
            }
        }
        return trainingDataset.getName();
    }

    public TrainingDatasetDTO updateTrainingDatasetMetadata(Users user, Project project, Featurestore featurestore, TrainingDatasetDTO trainingDatasetDTO) throws FeaturestoreException, ServiceException {
        TrainingDataset trainingDataset = this.verifyTrainingDatasetId(trainingDatasetDTO.getId(), featurestore);
        this.featurestoreInputValidation.verifyUserInput(trainingDatasetDTO);
        trainingDataset.setDescription(trainingDatasetDTO.getDescription());
        this.trainingDatasetFacade.update(trainingDataset);
        TrainingDataset updatedTrainingDataset = this.trainingDatasetFacade.findByIdAndFeaturestore(trainingDatasetDTO.getId(), featurestore).orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NOT_FOUND, Level.FINE, "training dataset id: " + trainingDatasetDTO.getId()));
        return this.convertTrainingDatasetToDTO(user, project, updatedTrainingDataset);
    }

    public TrainingDatasetDTO updateTrainingDatasetStatsConfig(Users user, Project project, Featurestore featurestore, TrainingDatasetDTO trainingDatasetDTO) throws FeaturestoreException, ServiceException {
        TrainingDataset trainingDataset = this.getTrainingDatasetById(featurestore, trainingDatasetDTO.getId());
        if (trainingDatasetDTO.getStatisticsConfig().getEnabled() != null) {
            trainingDataset.getStatisticsConfig().setDescriptive(trainingDatasetDTO.getStatisticsConfig().getEnabled().booleanValue());
        }
        if (trainingDatasetDTO.getStatisticsConfig().getHistograms() != null) {
            trainingDataset.getStatisticsConfig().setHistograms(trainingDatasetDTO.getStatisticsConfig().getHistograms().booleanValue());
        }
        if (trainingDatasetDTO.getStatisticsConfig().getCorrelations() != null) {
            trainingDataset.getStatisticsConfig().setCorrelations(trainingDatasetDTO.getStatisticsConfig().getCorrelations().booleanValue());
        }
        this.statisticColumnController.verifyStatisticColumnsExist(trainingDatasetDTO, trainingDataset);
        trainingDataset = this.trainingDatasetFacade.update(trainingDataset);
        this.statisticColumnController.persistStatisticColumns(trainingDataset, trainingDatasetDTO.getStatisticsConfig().getColumns());
        trainingDataset = this.getTrainingDatasetById(featurestore, trainingDatasetDTO.getId());
        return this.convertTrainingDatasetToDTO(user, project, trainingDataset);
    }

    public String getTrainingDatasetFolderName(Project project) {
        return project.getName() + "_" + Settings.ServiceDataset.TRAININGDATASETS.getName();
    }

    public String getTrainingDatasetPath(String trainingDatasetsFolderPath, String trainingDatasetName, Integer version) {
        return trainingDatasetsFolderPath + "/" + trainingDatasetName + "_" + version;
    }

    private TrainingDataset verifyTrainingDatasetId(Integer trainingDatasetId, Featurestore featurestore) throws FeaturestoreException {
        return this.trainingDatasetFacade.findByIdAndFeaturestore(trainingDatasetId, featurestore).orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NOT_FOUND, Level.FINE, "training dataset id: " + trainingDatasetId));
    }

    private void verifyTrainingDatasetType(TrainingDatasetType trainingDatasetType) throws FeaturestoreException {
        if (trainingDatasetType != TrainingDatasetType.HOPSFS_TRAINING_DATASET && trainingDatasetType != TrainingDatasetType.EXTERNAL_TRAINING_DATASET) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_TYPE, Level.FINE, ", Recognized Training Dataset types are: " + TrainingDatasetType.HOPSFS_TRAINING_DATASET + ", and: " + TrainingDatasetType.EXTERNAL_TRAINING_DATASET + ". The provided training dataset type was not recognized: " + trainingDatasetType);
        }
    }

    private void verifyTrainingDatasetVersion(Integer version) throws FeaturestoreException {
        if (version == null) {
            throw new IllegalArgumentException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_VERSION_NOT_PROVIDED.getMessage());
        }
        if (version <= 0) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_VERSION, Level.FINE, " version cannot be negative or zero");
        }
    }

    private void verifyTrainingDatasetDataFormat(String dataFormat) throws FeaturestoreException {
        if (!FeaturestoreConstants.TRAINING_DATASET_DATA_FORMATS.contains(dataFormat)) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_DATA_FORMAT, Level.FINE, ", the recognized training dataset formats are: " + StringUtils.join((Object[])new List[]{FeaturestoreConstants.TRAINING_DATASET_DATA_FORMATS}) + ". The provided data format:" + dataFormat + " was not recognized.");
        }
    }

    private void verifyTrainingDatasetSplits(List<TrainingDatasetSplitDTO> trainingDatasetSplitDTOs) throws FeaturestoreException {
        if (trainingDatasetSplitDTOs != null && !trainingDatasetSplitDTOs.isEmpty()) {
            Pattern namePattern = FeaturestoreConstants.FEATURESTORE_REGEX;
            HashSet<String> splitNames = new HashSet<String>();
            for (TrainingDatasetSplitDTO trainingDatasetSplitDTO : trainingDatasetSplitDTOs) {
                if (!namePattern.matcher(trainingDatasetSplitDTO.getName()).matches()) {
                    throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_SPLIT_NAME, Level.FINE, ", the provided training dataset split name " + trainingDatasetSplitDTO.getName() + " is invalid. Split names can only contain lower case characters, numbers and underscores and cannot be longer than " + 63 + " characters or empty.");
                }
                if (trainingDatasetSplitDTO.getPercentage() == null) {
                    throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_SPLIT_PERCENTAGE, Level.FINE, ", the provided training dataset split percentage is invalid. Percentages can only be numeric. Weights will be normalized if they don\u2019t sum up to 1.0.");
                }
                if (splitNames.add(trainingDatasetSplitDTO.getName())) continue;
                throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_DUPLICATE_SPLIT_NAMES, Level.FINE, " The split names must be unique");
            }
        }
    }

    private List<Feature> collectFeatures(Query query) {
        ArrayList<Feature> features = new ArrayList<Feature>(query.getFeatures());
        if (query.getJoins() != null) {
            for (Join join : query.getJoins()) {
                features.addAll(this.collectFeatures(join.getRightQuery()));
            }
        }
        return features;
    }

    private void verifyFeatures(Query query, List<TrainingDatasetFeatureDTO> featuresDTOs) throws FeaturestoreException {
        if (query == null || featuresDTOs == null) {
            return;
        }
        List labels = featuresDTOs.stream().filter(TrainingDatasetFeatureDTO::getLabel).collect(Collectors.toList());
        List<Feature> features = this.collectFeatures(query);
        for (TrainingDatasetFeatureDTO label : labels) {
            if (!features.stream().noneMatch(f -> f.getName().equals(label.getName()))) continue;
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.LABEL_NOT_FOUND, Level.FINE, "Label: " + label.getName() + " is missing");
        }
    }

    private void verifyTrainingDatasetInput(TrainingDatasetDTO trainingDatasetDTO, Query query) throws FeaturestoreException {
        this.featurestoreInputValidation.verifyUserInput(trainingDatasetDTO);
        this.statisticColumnController.verifyStatisticColumnsExist(trainingDatasetDTO, query);
        this.verifyTrainingDatasetType(trainingDatasetDTO.getTrainingDatasetType());
        this.verifyTrainingDatasetVersion(trainingDatasetDTO.getVersion());
        this.verifyTrainingDatasetDataFormat(trainingDatasetDTO.getDataFormat());
        this.verifyTrainingDatasetSplits(trainingDatasetDTO.getSplits());
        this.verifyFeatures(query, trainingDatasetDTO.getFeatures());
    }

    public Query getQuery(TrainingDataset trainingDataset, boolean withLabel, Project project, Users user) throws FeaturestoreException {
        if (!trainingDataset.isQuery()) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NO_QUERY, Level.FINE, "Inference vector is only available for datasets generated by queries");
        }
        List<TrainingDatasetJoin> joins = this.getJoinsSorted(trainingDataset);
        Map<Integer, String> fgAliasLookup = this.getAliasLookupTable(joins);
        List<TrainingDatasetFeature> tdFeatures = this.getFeaturesSorted(trainingDataset, withLabel);
        if (tdFeatures.stream().anyMatch(j -> j.getFeatureGroup() == null)) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_QUERY_FG_DELETED, Level.FINE);
        }
        HashMap<Integer, List<Feature>> availableFeaturesLookup = new HashMap<Integer, List<Feature>>();
        for (TrainingDatasetJoin trainingDatasetJoin : joins) {
            if (availableFeaturesLookup.containsKey(trainingDatasetJoin.getFeatureGroup().getId())) continue;
            List availableFeatures = this.featuregroupController.getFeatures(trainingDatasetJoin.getFeatureGroup(), project, user).stream().map(f -> new Feature(f.getName(), (String)fgAliasLookup.get(trainingDatasetJoin.getId()), f.getType(), f.getPrimary(), f.getDefaultValue())).collect(Collectors.toList());
            availableFeaturesLookup.put(trainingDatasetJoin.getFeatureGroup().getId(), availableFeatures);
        }
        ArrayList<Feature> features = new ArrayList<Feature>();
        for (TrainingDatasetFeature requestedFeature : tdFeatures) {
            features.add(((List)availableFeaturesLookup.get(requestedFeature.getFeatureGroup().getId())).stream().filter(af -> af.getName().equals(requestedFeature.getName())).map(af -> new Feature(af.getName(), (String)fgAliasLookup.get(requestedFeature.getTrainingDatasetJoin().getId()), af.getType(), af.getDefaultValue())).findFirst().orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURE_DOES_NOT_EXIST, Level.FINE, "Feature: " + requestedFeature.getName() + " not found in feature group: " + requestedFeature.getFeatureGroup().getName())));
        }
        Map<Integer, String> map = this.getFsLookupTableJoins(joins);
        Query query = new Query(map.get(joins.get(0).getFeatureGroup().getFeaturestore().getId()), this.onlineFeaturestoreController.getOnlineFeaturestoreDbName(joins.get(0).getFeatureGroup().getFeaturestore().getProject()), joins.get(0).getFeatureGroup(), fgAliasLookup.get(joins.get(0).getId()), features, (List)availableFeaturesLookup.get(joins.get(0).getFeatureGroup().getId()));
        ArrayList<Join> queryJoins = new ArrayList<Join>();
        for (int i = 1; i < joins.size(); ++i) {
            queryJoins.add(this.getQueryJoin(query, joins.get(i), fgAliasLookup, map, availableFeaturesLookup));
        }
        query.setJoins(queryJoins);
        return query;
    }

    public List<ServingPreparedStatementDTO> getPreparedStatementDTO(TrainingDataset trainingDataset, Project project, Users user) throws FeaturestoreException {
        if (!trainingDataset.isQuery()) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NO_QUERY, Level.FINE, "Inference vector is only available for datasets generated by queries");
        }
        List<TrainingDatasetJoin> joins = this.getJoinsSorted(trainingDataset);
        List<TrainingDatasetFeature> tdFeatures = this.getFeaturesSorted(trainingDataset, false);
        if (tdFeatures.stream().anyMatch(j -> j.getFeatureGroup() == null)) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_QUERY_FG_DELETED, Level.FINE);
        }
        ArrayList<ServingPreparedStatementDTO> servingPreparedStatementDTOS = new ArrayList<ServingPreparedStatementDTO>();
        for (TrainingDatasetJoin join : joins) {
            Featuregroup featuregroup = join.getFeatureGroup();
            if (!featuregroup.getCachedFeaturegroup().isOnlineEnabled()) {
                throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURESTORE_ONLINE_NOT_ENABLED, Level.FINE, "Inference vector is only available for training datasets generated by online enabled feature groups");
            }
            List<FeatureGroupFeatureDTO> availableFeatures = this.featuregroupController.getFeatures(join.getFeatureGroup(), project, user);
            List<Feature> primaryKeys = availableFeatures.stream().filter(FeatureGroupFeatureDTO::getPrimary).map(af -> new Feature(af.getName(), "fg0", af.getType(), af.getPrimary(), af.getDefaultValue())).collect(Collectors.toList());
            if (primaryKeys.size() == 0) {
                throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.PRIMARY_KEY_REQUIRED, Level.FINE, "Inference vector is only available for training datasets generated by feature groups with at least 1 primary key");
            }
            List tdFeaturesNames = tdFeatures.stream().filter(tdf -> tdf.getTrainingDatasetJoin().getId().equals(join.getId())).map(tdf -> tdf.getName()).collect(Collectors.toList());
            List<Feature> features = availableFeatures.stream().filter(af -> tdFeaturesNames.contains(af.getName())).map(af -> new Feature(af.getName(), "fg0", af.getType(), af.getPrimary(), af.getDefaultValue())).collect(Collectors.toList());
            String featureStore = this.featurestoreController.getOfflineFeaturestoreDbName(project);
            String projectName = this.onlineFeaturestoreController.getOnlineFeaturestoreDbName(featuregroup.getFeaturestore().getProject());
            Query query = new Query();
            query.setFeatureStore(featureStore);
            query.setProject(projectName);
            query.setFeaturegroup(featuregroup);
            query.setAs("fg0");
            query.setFeatures(features);
            servingPreparedStatementDTOS.add(this.buildServingPreparedStatementDTO(join.getIndex(), primaryKeys, query));
        }
        return servingPreparedStatementDTOS;
    }

    private Map<Integer, String> getAliasLookupTable(List<TrainingDatasetJoin> tdJoins) {
        int i = 0;
        HashMap<Integer, String> fgAlias = new HashMap<Integer, String>();
        for (TrainingDatasetJoin tdJoin : tdJoins) {
            fgAlias.put(tdJoin.getId(), "fg" + i++);
        }
        return fgAlias;
    }

    private Map<Integer, String> getFsLookupTableJoins(List<TrainingDatasetJoin> tdJoins) {
        HashMap<Integer, String> fsLookup = new HashMap<Integer, String>();
        for (TrainingDatasetJoin join : tdJoins) {
            if (fsLookup.containsKey(join.getFeatureGroup().getFeaturestore().getId())) continue;
            fsLookup.put(join.getFeatureGroup().getFeaturestore().getId(), this.featurestoreFacade.getHiveDbName(join.getFeatureGroup().getFeaturestore().getHiveDbId()));
        }
        return fsLookup;
    }

    private Map<Integer, String> getFsLookupTableFeatures(List<TrainingDatasetFeature> tdFeatures) {
        HashMap<Integer, String> fsLookup = new HashMap<Integer, String>();
        for (TrainingDatasetFeature tdFeature : tdFeatures) {
            if (tdFeature.getFeatureGroup() == null || fsLookup.containsKey(tdFeature.getFeatureGroup().getFeaturestore().getId())) continue;
            fsLookup.put(tdFeature.getFeatureGroup().getFeaturestore().getId(), this.featurestoreFacade.getHiveDbName(tdFeature.getFeatureGroup().getFeaturestore().getHiveDbId()));
        }
        return fsLookup;
    }

    private List<TrainingDatasetFeature> getFeaturesSorted(TrainingDataset trainingDataset, boolean withLabel) {
        return trainingDataset.getFeatures().stream().sorted((t1, t2) -> {
            if (t1.getIndex() != null) {
                return t1.getIndex().compareTo(t2.getIndex());
            }
            return t1.getName().compareTo(t2.getName());
        }).filter(f -> !f.isLabel() || withLabel).collect(Collectors.toList());
    }

    private List<TrainingDatasetJoin> getJoinsSorted(TrainingDataset trainingDataset) {
        return trainingDataset.getJoins().stream().sorted(Comparator.comparing(TrainingDatasetJoin::getIndex)).collect(Collectors.toList());
    }

    private Join getQueryJoin(Query leftQuery, TrainingDatasetJoin rightTdJoin, Map<Integer, String> fgAliasLookup, Map<Integer, String> fsLookup, Map<Integer, List<Feature>> availableFeaturesLookup) throws FeaturestoreException {
        String rightAs = fgAliasLookup.get(rightTdJoin.getId());
        Query rightQuery = new Query(fsLookup.get(rightTdJoin.getFeatureGroup().getFeaturestore().getId()), this.onlineFeaturestoreController.getOnlineFeaturestoreDbName(rightTdJoin.getFeatureGroup().getFeaturestore().getProject()), rightTdJoin.getFeatureGroup(), rightAs, null, availableFeaturesLookup.get(rightTdJoin.getFeatureGroup().getId()));
        List<Feature> leftOn = rightTdJoin.getConditions().stream().map(c -> new Feature(c.getLeftFeature())).collect(Collectors.toList());
        List<Feature> rightOn = rightTdJoin.getConditions().stream().map(c -> new Feature(c.getRightFeature())).collect(Collectors.toList());
        JoinType joinType = JoinType.values()[rightTdJoin.getType()];
        return this.constructorController.extractLeftRightOn(leftQuery, rightQuery, leftOn, rightOn, joinType);
    }

    private ServingPreparedStatementDTO buildServingPreparedStatementDTO(Integer preparedStatementIndex, List<Feature> primaryKeys, Query query) throws FeaturestoreException {
        ArrayList<PreparedStatementParameterDTO> preparedStatementParameterDTOS = new ArrayList<PreparedStatementParameterDTO>();
        Integer primaryKeyIndex = 1;
        Feature pkFeature = primaryKeys.get(0);
        pkFeature.setType(PREPARED_STATEMENT_TYPE);
        FilterLogic filterLogic = new FilterLogic(new Filter(pkFeature, SqlFilterCondition.EQUALS, "?"));
        Integer n = primaryKeyIndex;
        Integer n2 = primaryKeyIndex = Integer.valueOf(primaryKeyIndex + 1);
        preparedStatementParameterDTOS.add(new PreparedStatementParameterDTO(primaryKeys.get(0).getName(), n));
        for (int i = 1; i < primaryKeys.size(); ++i) {
            pkFeature = primaryKeys.get(i);
            pkFeature.setType(PREPARED_STATEMENT_TYPE);
            filterLogic = filterLogic.and(new Filter(pkFeature, SqlFilterCondition.EQUALS, "?"));
            n2 = primaryKeyIndex;
            Integer n3 = primaryKeyIndex = Integer.valueOf(primaryKeyIndex + 1);
            preparedStatementParameterDTOS.add(new PreparedStatementParameterDTO(primaryKeys.get(i).getName(), n2));
        }
        query.setFilter(filterLogic);
        ServingPreparedStatementDTO servingPreparedStatementDTO = new ServingPreparedStatementDTO();
        servingPreparedStatementDTO.setPreparedStatementIndex(preparedStatementIndex);
        servingPreparedStatementDTO.setQueryOnline(this.constructorController.generateSQL(query, true));
        servingPreparedStatementDTO.setPreparedStatementParameters(preparedStatementParameterDTOS);
        return servingPreparedStatementDTO;
    }
}

