/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ignite.ml.dataset.impl.bootstrapping;

import java.util.Arrays;
import java.util.Iterator;
import org.apache.commons.math3.distribution.PoissonDistribution;
import org.apache.ignite.ml.dataset.PartitionDataBuilder;
import org.apache.ignite.ml.dataset.UpstreamEntry;
import org.apache.ignite.ml.dataset.impl.bootstrapping.BootstrappedDatasetPartition;
import org.apache.ignite.ml.dataset.impl.bootstrapping.BootstrappedVector;
import org.apache.ignite.ml.dataset.primitive.context.EmptyContext;
import org.apache.ignite.ml.math.functions.IgniteBiFunction;
import org.apache.ignite.ml.math.primitives.vector.Vector;

public class BootstrappedDatasetBuilder<K, V>
implements PartitionDataBuilder<K, V, EmptyContext, BootstrappedDatasetPartition> {
    private static final long serialVersionUID = 8146220902914010559L;
    private final IgniteBiFunction<K, V, Vector> featureExtractor;
    private final IgniteBiFunction<K, V, Double> lbExtractor;
    private final int samplesCnt;
    private final double subsampleSize;

    public BootstrappedDatasetBuilder(IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor, int samplesCnt, double subsampleSize) {
        this.featureExtractor = featureExtractor;
        this.lbExtractor = lbExtractor;
        this.samplesCnt = samplesCnt;
        this.subsampleSize = subsampleSize;
    }

    @Override
    public BootstrappedDatasetPartition build(Iterator<UpstreamEntry<K, V>> upstreamData, long upstreamDataSize, EmptyContext ctx) {
        BootstrappedVector[] dataset = new BootstrappedVector[Math.toIntExact(upstreamDataSize)];
        int cntr = 0;
        PoissonDistribution poissonDistribution = new PoissonDistribution(this.subsampleSize);
        while (upstreamData.hasNext()) {
            UpstreamEntry<K, V> nextRow = upstreamData.next();
            Vector features = (Vector)this.featureExtractor.apply(nextRow.getKey(), nextRow.getValue());
            Double lb = (Double)this.lbExtractor.apply(nextRow.getKey(), nextRow.getValue());
            int[] repetitionCounters = new int[this.samplesCnt];
            Arrays.setAll(repetitionCounters, i -> poissonDistribution.sample());
            dataset[cntr++] = new BootstrappedVector(features, lb, repetitionCounters);
        }
        return new BootstrappedDatasetPartition(dataset);
    }
}

