package org.apache.hudi.hadoop.hive;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hive.common.StringInternUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.CombineHiveRecordReader;
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
import org.apache.hadoop.hive.ql.io.HiveInputFormat;
import org.apache.hadoop.hive.ql.io.IOContextMap;
import org.apache.hadoop.hive.ql.io.IOPrepareCache;
import org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat;
import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.hive.ql.parse.SplitSample;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.shims.HadoopShims;
import org.apache.hadoop.hive.shims.HadoopShimsSecure;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.lib.CombineFileInputFormat;
import org.apache.hadoop.mapred.lib.CombineFileSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hudi.common.util.ReflectionUtils;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.hadoop.HoodieParquetInputFormat;
import org.apache.hudi.hadoop.hive.HoodieCombineRealtimeFileSplit;
import org.apache.hudi.hadoop.realtime.HoodieCombineRealtimeRecordReader;
import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat;
import org.apache.hudi.hadoop.realtime.HoodieRealtimeRecordReader;
import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

/* loaded from: input_file:org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat.class */
public class HoodieCombineHiveInputFormat<K extends WritableComparable, V extends Writable> extends HiveInputFormat<K, V> {
    private static final String CLASS_NAME = HoodieCombineHiveInputFormat.class.getName();
    public static final Logger LOG = LogManager.getLogger(CLASS_NAME);
    private static final int MAX_CHECK_NONCOMBINABLE_THREAD_NUM = 50;
    private static final int DEFAULT_NUM_PATH_PER_THREAD = 100;

    /* loaded from: input_file:org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat$AvoidSplitCombination.class */
    public interface AvoidSplitCombination {
        boolean shouldSkipCombine(Path path, Configuration configuration) throws IOException;
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat$CheckNonCombinablePathCallable.class */
    public class CheckNonCombinablePathCallable implements Callable<Set<Integer>> {
        private final Path[] paths;
        private final int start;
        private final int length;
        private final JobConf conf;

        public CheckNonCombinablePathCallable(Path[] pathArr, int i, int i2, JobConf jobConf) {
            this.paths = pathArr;
            this.start = i;
            this.length = i2;
            this.conf = jobConf;
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.concurrent.Callable
        public Set<Integer> call() throws Exception {
            HashSet hashSet = new HashSet();
            for (int i = 0; i < this.length; i++) {
                AvoidSplitCombination inputFormatFromCache = HiveInputFormat.getInputFormatFromCache(HoodieCombineHiveInputFormat.getPartitionFromPath(HoodieCombineHiveInputFormat.this.pathToPartitionInfo, this.paths[i + this.start], IOPrepareCache.get().allocatePartitionDescMap()).getInputFileFormatClass(), this.conf);
                if ((inputFormatFromCache instanceof AvoidSplitCombination) && inputFormatFromCache.shouldSkipCombine(this.paths[i + this.start], this.conf)) {
                    if (HoodieCombineHiveInputFormat.LOG.isDebugEnabled()) {
                        HoodieCombineHiveInputFormat.LOG.debug("The path [" + this.paths[i + this.start] + "] is being parked for HiveInputFormat.getSplits");
                    }
                    hashSet.add(Integer.valueOf(i + this.start));
                }
            }
            return hashSet;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat$CombineFilter.class */
    public static class CombineFilter implements PathFilter {
        private final Set<String> pStrings = new HashSet();

        public CombineFilter(Path path) {
            addPath(path);
        }

        public void addPath(Path path) {
            this.pStrings.add(path.toUri().getPath());
        }

        public boolean accept(Path path) {
            boolean z = false;
            while (true) {
                if (path == null) {
                    break;
                }
                if (this.pStrings.contains(path.toUri().getPath())) {
                    z = true;
                    break;
                }
                path = path.getParent();
            }
            return z;
        }

        public String toString() {
            StringBuilder sb = new StringBuilder();
            sb.append("PathFilter: ");
            Iterator<String> it = this.pStrings.iterator();
            while (it.hasNext()) {
                sb.append(it.next() + " ");
            }
            return sb.toString();
        }
    }

    /* loaded from: input_file:org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat$CombineHiveInputSplit.class */
    public static class CombineHiveInputSplit extends HadoopShimsSecure.InputSplitShim {
        private String inputFormatClassName;
        protected CombineFileSplit inputSplitShim;
        private Map<Path, PartitionDesc> pathToPartitionInfo;

        public CombineHiveInputSplit() throws IOException {
            this(ShimLoader.getHadoopShims().getCombineFileInputFormat().getInputSplitShim());
        }

        public CombineHiveInputSplit(CombineFileSplit combineFileSplit) throws IOException {
            this(combineFileSplit.getJob(), combineFileSplit);
        }

        public CombineHiveInputSplit(JobConf jobConf, CombineFileSplit combineFileSplit) throws IOException {
            this(jobConf, combineFileSplit, null);
        }

        public CombineHiveInputSplit(JobConf jobConf, CombineFileSplit combineFileSplit, Map<Path, PartitionDesc> map) throws IOException {
            this.inputSplitShim = combineFileSplit;
            this.pathToPartitionInfo = map;
            if (jobConf != null) {
                if (this.pathToPartitionInfo == null) {
                    this.pathToPartitionInfo = Utilities.getMapWork(jobConf).getPathToPartitionInfo();
                }
                Path[] paths = combineFileSplit.getPaths();
                if (paths.length > 0) {
                    this.inputFormatClassName = HoodieCombineHiveInputFormat.getPartitionFromPath(this.pathToPartitionInfo, paths[0], IOPrepareCache.get().getPartitionDescMap()).getInputFileFormatClass().getName();
                }
            }
        }

        public CombineFileSplit getInputSplitShim() {
            return this.inputSplitShim;
        }

        public String inputFormatClassName() {
            return this.inputFormatClassName;
        }

        public void setInputFormatClassName(String str) {
            this.inputFormatClassName = str;
        }

        public JobConf getJob() {
            return this.inputSplitShim.getJob();
        }

        public long getLength() {
            return this.inputSplitShim.getLength();
        }

        public long[] getStartOffsets() {
            return this.inputSplitShim.getStartOffsets();
        }

        public long[] getLengths() {
            return this.inputSplitShim.getLengths();
        }

        public long getOffset(int i) {
            return this.inputSplitShim.getOffset(i);
        }

        public long getLength(int i) {
            return this.inputSplitShim.getLength(i);
        }

        public int getNumPaths() {
            return this.inputSplitShim.getNumPaths();
        }

        public Path getPath(int i) {
            return this.inputSplitShim.getPath(i);
        }

        public Path[] getPaths() {
            return this.inputSplitShim.getPaths();
        }

        public String[] getLocations() throws IOException {
            return this.inputSplitShim.getLocations();
        }

        public String toString() {
            StringBuilder sb = new StringBuilder();
            sb.append(this.inputSplitShim.toString());
            sb.append("InputFormatClass: " + this.inputFormatClassName);
            sb.append("\n");
            return sb.toString();
        }

        public void readFields(DataInput dataInput) throws IOException {
            this.inputFormatClassName = Text.readString(dataInput);
            if (!HoodieParquetRealtimeInputFormat.class.getName().equals(this.inputFormatClassName)) {
                this.inputSplitShim.readFields(dataInput);
            } else {
                this.inputSplitShim = (CombineFileSplit) ReflectionUtils.loadClass(Text.readString(dataInput));
                this.inputSplitShim.readFields(dataInput);
            }
        }

        public void write(DataOutput dataOutput) throws IOException {
            if (this.inputFormatClassName == null) {
                if (this.pathToPartitionInfo == null) {
                    this.pathToPartitionInfo = Utilities.getMapWork(getJob()).getPathToPartitionInfo();
                }
                this.inputFormatClassName = HoodieCombineHiveInputFormat.getPartitionFromPath(this.pathToPartitionInfo, this.inputSplitShim.getPath(0), IOPrepareCache.get().getPartitionDescMap()).getInputFileFormatClass().getName();
            }
            Text.writeString(dataOutput, this.inputFormatClassName);
            if (HoodieParquetRealtimeInputFormat.class.getName().equals(this.inputFormatClassName)) {
                Text.writeString(dataOutput, this.inputSplitShim.getClass().getName());
            }
            this.inputSplitShim.write(dataOutput);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat$CombinePathInputFormat.class */
    public static class CombinePathInputFormat {
        private final List<Operator<? extends OperatorDesc>> opList;
        private final String inputFormatClassName;
        private final String deserializerClassName;

        public CombinePathInputFormat(List<Operator<? extends OperatorDesc>> list, String str, String str2) {
            this.opList = list;
            this.inputFormatClassName = str;
            this.deserializerClassName = str2;
        }

        public boolean equals(Object obj) {
            if (!(obj instanceof CombinePathInputFormat)) {
                return false;
            }
            CombinePathInputFormat combinePathInputFormat = (CombinePathInputFormat) obj;
            return this.opList.equals(combinePathInputFormat.opList) && this.inputFormatClassName.equals(combinePathInputFormat.inputFormatClassName) && (this.deserializerClassName != null ? this.deserializerClassName.equals(combinePathInputFormat.deserializerClassName) : combinePathInputFormat.deserializerClassName == null);
        }

        public int hashCode() {
            if (this.opList == null) {
                return 0;
            }
            return this.opList.hashCode();
        }
    }

    /* loaded from: input_file:org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat$HoodieCombineFileInputFormatShim.class */
    public static class HoodieCombineFileInputFormatShim<K, V> extends CombineFileInputFormat<K, V> implements HadoopShims.CombineFileInputFormatShim<K, V> {
        private boolean hoodieFilter = false;
        private boolean isRealTime = false;

        protected HoodieParquetInputFormat createParquetInputFormat() {
            return new HoodieParquetInputFormat();
        }

        protected HoodieParquetRealtimeInputFormat createParquetRealtimeInputFormat() {
            return new HoodieParquetRealtimeInputFormat();
        }

        public Path[] getInputPathsShim(JobConf jobConf) {
            try {
                return FileInputFormat.getInputPaths(jobConf);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }

        public void createPool(JobConf jobConf, PathFilter... pathFilterArr) {
            super.createPool(jobConf, pathFilterArr);
        }

        public RecordReader<K, V> getRecordReader(InputSplit inputSplit, JobConf jobConf, Reporter reporter) throws IOException {
            throw new IOException("CombineFileInputFormat.getRecordReader not needed.");
        }

        protected List<FileStatus> listStatus(JobContext jobContext) throws IOException {
            List<FileStatus> listStatus;
            HoodieParquetInputFormat createParquetInputFormat;
            HoodieCombineHiveInputFormat.LOG.info("Listing status in HoodieCombineHiveInputFormat.HoodieCombineFileInputFormatShim");
            if (this.hoodieFilter) {
                if (this.isRealTime) {
                    HoodieCombineHiveInputFormat.LOG.info("Using HoodieRealtimeInputFormat");
                    createParquetInputFormat = createParquetRealtimeInputFormat();
                } else {
                    HoodieCombineHiveInputFormat.LOG.info("Using HoodieInputFormat");
                    createParquetInputFormat = createParquetInputFormat();
                }
                createParquetInputFormat.setConf(jobContext.getConfiguration());
                listStatus = new ArrayList(Arrays.asList(createParquetInputFormat.listStatus(new JobConf(jobContext.getConfiguration()))));
            } else {
                listStatus = super.listStatus(jobContext);
            }
            listStatus.removeIf(fileStatus -> {
                return !fileStatus.isFile();
            });
            return listStatus;
        }

        /* renamed from: getSplits, reason: merged with bridge method [inline-methods] */
        public CombineFileSplit[] m12331getSplits(JobConf jobConf, int i) throws IOException {
            InputSplit[] splits;
            long j = jobConf.getLong("mapreduce.input.fileinputformat.split.minsize", 0L);
            long j2 = jobConf.getLong("mapreduce.input.fileinputformat.split.maxsize", j);
            if (jobConf.getLong("mapreduce.input.fileinputformat.split.minsize.per.node", 0L) == 0) {
                super.setMinSplitSizeNode(j);
            }
            if (jobConf.getLong("mapreduce.input.fileinputformat.split.minsize.per.rack", 0L) == 0) {
                super.setMinSplitSizeRack(j);
            }
            if (jobConf.getLong("mapreduce.input.fileinputformat.split.maxsize", 0L) == 0) {
                super.setMaxSplitSize(j);
            }
            HoodieCombineHiveInputFormat.LOG.info("mapreduce.input.fileinputformat.split.minsize=" + j + ", mapreduce.input.fileinputformat.split.maxsize=" + j2);
            if (!this.isRealTime) {
                CombineFileSplit[] splits2 = super.getSplits(jobConf, i);
                ArrayList arrayList = new ArrayList();
                for (CombineFileSplit combineFileSplit : splits2) {
                    if (combineFileSplit.getPaths().length > 0) {
                        arrayList.add(new HadoopShimsSecure.InputSplitShim(jobConf, combineFileSplit.getPaths(), combineFileSplit.getStartOffsets(), combineFileSplit.getLengths(), combineFileSplit.getLocations()));
                    }
                }
                return (CombineFileSplit[]) arrayList.toArray(new HadoopShimsSecure.InputSplitShim[arrayList.size()]);
            }
            jobConf.set("hudi.hive.realtime", "true");
            if (this.hoodieFilter) {
                HoodieParquetRealtimeInputFormat createParquetRealtimeInputFormat = createParquetRealtimeInputFormat();
                createParquetRealtimeInputFormat.setConf(jobConf);
                splits = createParquetRealtimeInputFormat.getSplits(jobConf, i);
            } else {
                splits = super.getSplits(jobConf, i);
            }
            ArrayList arrayList2 = new ArrayList();
            HoodieCombineRealtimeFileSplit.Builder builder = new HoodieCombineRealtimeFileSplit.Builder();
            int i2 = 0;
            for (int i3 = 0; i3 < splits.length; i3++) {
                if (i2 == j2 - 1 || i3 == splits.length - 1) {
                    builder.addSplit((FileSplit) splits[i3]);
                    arrayList2.add(builder.build(jobConf));
                    builder = new HoodieCombineRealtimeFileSplit.Builder();
                    i2 = 0;
                } else if (i2 < j2) {
                    i2++;
                    builder.addSplit((FileSplit) splits[i3]);
                }
            }
            return (CombineFileSplit[]) arrayList2.toArray(new CombineFileSplit[arrayList2.size()]);
        }

        /* renamed from: getInputSplitShim, reason: merged with bridge method [inline-methods] */
        public HadoopShimsSecure.InputSplitShim m12332getInputSplitShim() {
            return new HadoopShimsSecure.InputSplitShim();
        }

        public RecordReader getRecordReader(JobConf jobConf, CombineFileSplit combineFileSplit, Reporter reporter, Class<RecordReader<K, V>> cls) throws IOException {
            this.isRealTime = Boolean.valueOf(jobConf.get("hudi.hive.realtime", HoodieRealtimeRecordReader.DEFAULT_REALTIME_SKIP_MERGE)).booleanValue();
            if (!this.isRealTime) {
                return new HadoopShimsSecure.CombineFileRecordReader(jobConf, combineFileSplit, reporter, cls);
            }
            LinkedList linkedList = new LinkedList();
            ValidationUtils.checkArgument(combineFileSplit instanceof HoodieCombineRealtimeFileSplit, "Only " + HoodieCombineRealtimeFileSplit.class.getName() + " allowed, found " + combineFileSplit.getClass().getName());
            for (InputSplit inputSplit : ((HoodieCombineRealtimeFileSplit) combineFileSplit).getRealtimeFileSplits()) {
                if (combineFileSplit.getPaths().length != 0) {
                    linkedList.add(HoodieInputFormatUtils.getInputFormat(combineFileSplit.getPath(0).toString(), true, (Configuration) jobConf).getRecordReader(inputSplit, jobConf, reporter));
                }
            }
            return new HoodieCombineRealtimeRecordReader(jobConf, combineFileSplit, linkedList);
        }

        public void setHoodieFilter(boolean z) {
            this.hoodieFilter = z;
        }

        public void setRealTime(boolean z) {
            this.isRealTime = z;
        }
    }

    protected String getParquetInputFormatClassName() {
        return HoodieParquetInputFormat.class.getName();
    }

    protected String getParquetRealtimeInputFormatClassName() {
        return HoodieParquetRealtimeInputFormat.class.getName();
    }

    protected HoodieCombineFileInputFormatShim createInputFormatShim() {
        return new HoodieCombineFileInputFormatShim();
    }

    private InputSplit[] getCombineSplits(JobConf jobConf, int i, Map<Path, PartitionDesc> map) throws IOException {
        init(jobConf);
        LinkedHashMap pathToAliases = this.mrwork.getPathToAliases();
        LinkedHashMap aliasToWork = this.mrwork.getAliasToWork();
        HoodieCombineFileInputFormatShim createInputFormatShim = createInputFormatShim();
        if (createInputFormatShim.getInputPathsShim(jobConf).length == 0) {
            throw new IOException("No input paths specified in job");
        }
        ArrayList arrayList = new ArrayList();
        Path[] internUriStringsInPathArray = StringInternUtils.internUriStringsInPathArray(createInputFormatShim.getInputPathsShim(jobConf));
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        HashMap hashMap = new HashMap();
        HashSet hashSet = new HashSet();
        for (Path path : internUriStringsInPathArray) {
            PartitionDesc partitionFromPath = getPartitionFromPath(map, path, IOPrepareCache.get().allocatePartitionDescMap());
            TableDesc tableDesc = partitionFromPath.getTableDesc();
            if (tableDesc != null && tableDesc.isNonNative()) {
                return super.getSplits(jobConf, i);
            }
            Class inputFileFormatClass = partitionFromPath.getInputFileFormatClass();
            String name = inputFileFormatClass.getName();
            InputFormat inputFormatFromCache = getInputFormatFromCache(inputFileFormatClass, jobConf);
            LOG.info("Input Format => " + inputFileFormatClass.getName());
            if (inputFileFormatClass.getName().equals(getParquetInputFormatClassName())) {
                createInputFormatShim.setHoodieFilter(true);
            } else if (inputFileFormatClass.getName().equals(getParquetRealtimeInputFormatClassName())) {
                LOG.info("Setting hoodie filter and realtime input format");
                createInputFormatShim.setHoodieFilter(true);
                createInputFormatShim.setRealTime(true);
                if (jobConf.get("partition_columns", "").isEmpty()) {
                    ArrayList arrayList4 = new ArrayList(partitionFromPath.getPartSpec().keySet());
                    if (arrayList4.isEmpty()) {
                        jobConf.set("partition_columns", "");
                    } else {
                        String join = String.join("/", arrayList4);
                        LOG.info("Setting Partitions in jobConf - Partition Keys for Path : " + path + " is :" + join);
                        jobConf.set("partition_columns", join);
                    }
                }
            }
            String str = null;
            try {
                str = partitionFromPath.getDeserializer(jobConf).getClass().getName();
            } catch (Exception e) {
                LOG.error("Getting deserializer class name error ", e);
            }
            if (inputFormatFromCache instanceof SymlinkTextInputFormat) {
                return super.getSplits(jobConf, i);
            }
            if (!this.mrwork.isMapperCannotSpanPartns()) {
                CombinePathInputFormat combinePathInputFormat = new CombinePathInputFormat(HiveFileFormatUtils.doGetWorksFromPath(pathToAliases, aliasToWork, path), name, str);
                CombineFilter combineFilter = (CombineFilter) hashMap.get(combinePathInputFormat);
                if (combineFilter == null) {
                    CombineFilter combineFilter2 = new CombineFilter(path);
                    LOG.info("CombineHiveInputSplit creating pool for " + path + "; using filter path " + path);
                    createInputFormatShim.createPool(jobConf, combineFilter2);
                    hashMap.put(combinePathInputFormat, combineFilter2);
                } else {
                    LOG.info("CombineHiveInputSplit: pool is already created for " + path + "; using filter path " + path);
                    combineFilter.addPath(path);
                }
            } else if (path.getFileSystem(jobConf).getFileStatus(path).isDirectory()) {
                arrayList2.add(path);
            } else {
                Path parent = path.getParent();
                arrayList3.add(path);
                hashSet.add(parent);
            }
        }
        List<CombineFileSplit> arrayList5 = new ArrayList();
        if (this.mrwork.isMapperCannotSpanPartns()) {
            Iterator it = arrayList2.iterator();
            while (it.hasNext()) {
                processPaths(jobConf, createInputFormatShim, arrayList5, (Path) it.next());
            }
            if (arrayList3.size() > 0) {
                Iterator it2 = hashSet.iterator();
                while (it2.hasNext()) {
                    createInputFormatShim.createPool(jobConf, new CombineFilter((Path) it2.next()));
                }
                processPaths(jobConf, createInputFormatShim, arrayList5, (Path[]) arrayList3.toArray(new Path[0]));
            }
        } else {
            arrayList5 = Arrays.asList(createInputFormatShim.m12331getSplits(jobConf, 1));
        }
        if (this.mrwork.getNameToSplitSample() != null && !this.mrwork.getNameToSplitSample().isEmpty()) {
            arrayList5 = sampleSplits(arrayList5);
        }
        for (CombineFileSplit combineFileSplit : arrayList5) {
            arrayList.add(createInputFormatShim.isRealTime ? combineFileSplit instanceof HoodieCombineRealtimeHiveSplit ? combineFileSplit : new HoodieCombineRealtimeHiveSplit(jobConf, combineFileSplit, map) : new CombineHiveInputSplit(jobConf, combineFileSplit, map));
        }
        LOG.info("number of splits " + arrayList.size());
        return (InputSplit[]) arrayList.toArray(new CombineHiveInputSplit[arrayList.size()]);
    }

    public Set<Integer> getNonCombinablePathIndices(JobConf jobConf, Path[] pathArr, int i) throws ExecutionException, InterruptedException {
        LOG.info("Total number of paths: " + pathArr.length + ", launching " + i + " threads to check non-combinable ones.");
        int ceil = (int) Math.ceil(pathArr.length / i);
        ExecutorService newFixedThreadPool = Executors.newFixedThreadPool(i);
        ArrayList arrayList = new ArrayList(i);
        int i2 = 0;
        while (i2 < i) {
            try {
                int i3 = i2 * ceil;
                arrayList.add(newFixedThreadPool.submit(new CheckNonCombinablePathCallable(pathArr, i3, i2 != i - 1 ? ceil : pathArr.length - i3, jobConf)));
                i2++;
            } finally {
                newFixedThreadPool.shutdownNow();
            }
        }
        HashSet hashSet = new HashSet();
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            hashSet.addAll((Collection) ((Future) it.next()).get());
        }
        return hashSet;
    }

    public InputSplit[] getSplits(JobConf jobConf, int i) throws IOException {
        PerfLogger perfLogger = SessionState.getPerfLogger();
        perfLogger.PerfLogBegin(CLASS_NAME, "getSplits");
        init(jobConf);
        ArrayList arrayList = new ArrayList();
        Path[] inputPaths = getInputPaths(jobConf);
        ArrayList arrayList2 = new ArrayList(inputPaths.length / 2);
        ArrayList arrayList3 = new ArrayList(inputPaths.length / 2);
        int min = Math.min(50, (int) Math.ceil(inputPaths.length / 100.0d));
        if (min > 0) {
            try {
                Set<Integer> nonCombinablePathIndices = getNonCombinablePathIndices(jobConf, inputPaths, min);
                for (int i2 = 0; i2 < inputPaths.length; i2++) {
                    if (nonCombinablePathIndices.contains(Integer.valueOf(i2))) {
                        arrayList2.add(inputPaths[i2]);
                    } else {
                        arrayList3.add(inputPaths[i2]);
                    }
                }
            } catch (Exception e) {
                LOG.error("Error checking non-combinable path", e);
                perfLogger.PerfLogEnd(CLASS_NAME, "getSplits");
                throw new IOException(e);
            }
        }
        String str = jobConf.get("mapreduce.input.fileinputformat.inputdir");
        if (LOG.isDebugEnabled()) {
            LOG.debug("The received input paths are: [" + str + "] against the property mapreduce.input.fileinputformat.inputdir");
        }
        if (arrayList2.size() > 0) {
            FileInputFormat.setInputPaths(jobConf, (Path[]) arrayList2.toArray(new Path[0]));
            Collections.addAll(arrayList, super.getSplits(jobConf, i));
        }
        if (arrayList3.size() > 0) {
            FileInputFormat.setInputPaths(jobConf, (Path[]) arrayList3.toArray(new Path[0]));
            Collections.addAll(arrayList, getCombineSplits(jobConf, i, this.pathToPartitionInfo != null ? this.pathToPartitionInfo : Utilities.getMapWork(jobConf).getPathToPartitionInfo()));
        }
        if (str != null) {
            jobConf.set("mapreduce.input.fileinputformat.inputdir", str);
        }
        Utilities.clearWorkMapForConf(jobConf);
        LOG.info("Number of all splits " + arrayList.size());
        perfLogger.PerfLogEnd(CLASS_NAME, "getSplits");
        return (InputSplit[]) arrayList.toArray(new InputSplit[arrayList.size()]);
    }

    private void processPaths(JobConf jobConf, HadoopShims.CombineFileInputFormatShim combineFileInputFormatShim, List<CombineFileSplit> list, Path... pathArr) throws IOException {
        JobConf jobConf2 = new JobConf(jobConf);
        FileInputFormat.setInputPaths(jobConf2, pathArr);
        list.addAll(Arrays.asList(combineFileInputFormatShim.getSplits(jobConf2, 1)));
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static PartitionDesc getPartitionFromPath(Map<Path, PartitionDesc> map, Path path, Map<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>> map2) throws IOException {
        Method method;
        try {
            Class<?> cls = Class.forName("org.apache.hadoop.hive.ql.io.HiveFileFormatUtils");
            try {
                method = cls.getMethod("getPartitionDescFromPathRecursively", Map.class, Path.class, Map.class);
            } catch (NoSuchMethodException e) {
                method = cls.getMethod("getFromPathRecursively", Map.class, Path.class, Map.class);
            }
            return (PartitionDesc) method.invoke(null, map, path, map2);
        } catch (ReflectiveOperationException e2) {
            throw new IOException(e2);
        }
    }

    Path[] getInputPaths(JobConf jobConf) throws IOException {
        Path[] inputPaths = FileInputFormat.getInputPaths(jobConf);
        if (inputPaths.length == 0) {
            if (!HiveConf.getVar(jobConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
                throw new IOException("No input paths specified in job");
            }
            try {
                List inputPathsTez = Utilities.getInputPathsTez(jobConf, this.mrwork);
                inputPaths = (Path[]) inputPathsTez.toArray(new Path[inputPathsTez.size()]);
            } catch (Exception e) {
                throw new IOException("Could not create input files", e);
            }
        }
        return inputPaths;
    }

    private List<CombineFileSplit> sampleSplits(List<CombineFileSplit> list) {
        HashMap nameToSplitSample = this.mrwork.getNameToSplitSample();
        ArrayList arrayList = new ArrayList();
        HashMap hashMap = new HashMap();
        LinkedHashMap pathToAliases = this.mrwork.getPathToAliases();
        Map<Path, ArrayList<String>> removeScheme = removeScheme(pathToAliases);
        for (CombineFileSplit combineFileSplit : list) {
            String str = null;
            for (Path path : combineFileSplit.getPaths()) {
                List doGetAliasesFromPath = HiveFileFormatUtils.doGetAliasesFromPath(path.toUri().getScheme() == null ? removeScheme : pathToAliases, path);
                if (doGetAliasesFromPath.size() != 1 || !nameToSplitSample.containsKey(doGetAliasesFromPath.get(0)) || (str != null && !Objects.equals(doGetAliasesFromPath.get(0), str))) {
                    str = null;
                    break;
                }
                str = (String) doGetAliasesFromPath.get(0);
            }
            if (str != null) {
                if (!hashMap.containsKey(str)) {
                    hashMap.put(str, new ArrayList());
                }
                ((ArrayList) hashMap.get(str)).add(combineFileSplit);
            } else {
                arrayList.add(combineFileSplit);
            }
        }
        for (Map.Entry entry : hashMap.entrySet()) {
            ArrayList arrayList2 = (ArrayList) entry.getValue();
            long j = 0;
            Iterator it = arrayList2.iterator();
            while (it.hasNext()) {
                j += ((CombineFileSplit) it.next()).getLength();
            }
            SplitSample splitSample = (SplitSample) nameToSplitSample.get(entry.getKey());
            long targetSize = splitSample.getTargetSize(j);
            int seedNum = splitSample.getSeedNum() % arrayList2.size();
            long j2 = 0;
            int i = 0;
            while (true) {
                if (i < arrayList2.size()) {
                    HadoopShimsSecure.InputSplitShim inputSplitShim = (CombineFileSplit) arrayList2.get((seedNum + i) % arrayList2.size());
                    arrayList.add(inputSplitShim);
                    long length = inputSplitShim.getLength();
                    if (j2 + length >= targetSize) {
                        LOG.info("Sample alias " + entry.getValue() + " using " + (i + 1) + "splits");
                        if (j2 + length > targetSize) {
                            inputSplitShim.shrinkSplit(targetSize - j2);
                        }
                    } else {
                        j2 += length;
                        i++;
                    }
                }
            }
        }
        return arrayList;
    }

    Map<Path, ArrayList<String>> removeScheme(Map<Path, ArrayList<String>> map) {
        HashMap hashMap = new HashMap();
        for (Map.Entry<Path, ArrayList<String>> entry : map.entrySet()) {
            Path pathWithoutSchemeAndAuthority = Path.getPathWithoutSchemeAndAuthority(entry.getKey());
            StringInternUtils.internUriStringsInPath(pathWithoutSchemeAndAuthority);
            hashMap.put(pathWithoutSchemeAndAuthority, entry.getValue());
        }
        return hashMap;
    }

    public RecordReader getRecordReader(InputSplit inputSplit, JobConf jobConf, Reporter reporter) throws IOException {
        if (!(inputSplit instanceof CombineHiveInputSplit)) {
            return super.getRecordReader(inputSplit, jobConf, reporter);
        }
        CombineHiveInputSplit combineHiveInputSplit = (CombineHiveInputSplit) inputSplit;
        String str = null;
        try {
            str = combineHiveInputSplit.inputFormatClassName();
            Class classByName = jobConf.getClassByName(str);
            pushProjectionsAndFilters(jobConf, classByName, combineHiveInputSplit.getPath(0));
            if (!classByName.getName().equals(getParquetRealtimeInputFormatClassName())) {
                return ShimLoader.getHadoopShims().getCombineFileInputFormat().getRecordReader(jobConf, (CombineFileSplit) inputSplit, reporter, CombineHiveRecordReader.class);
            }
            HoodieCombineFileInputFormatShim createInputFormatShim = createInputFormatShim();
            IOContextMap.get(jobConf).setInputPath(((CombineHiveInputSplit) inputSplit).getPath(0));
            return createInputFormatShim.getRecordReader(jobConf, ((CombineHiveInputSplit) inputSplit).getInputSplitShim(), reporter, CombineHiveRecordReader.class);
        } catch (Exception e) {
            throw new IOException("cannot find class " + str);
        }
    }
}
