/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ignite.ml.structures.preprocessing;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Stream;
import org.apache.ignite.ml.math.Vector;
import org.apache.ignite.ml.math.exceptions.CardinalityException;
import org.apache.ignite.ml.math.exceptions.NoDataException;
import org.apache.ignite.ml.math.exceptions.knn.EmptyFileException;
import org.apache.ignite.ml.math.exceptions.knn.FileParsingException;
import org.apache.ignite.ml.structures.LabeledDataset;
import org.apache.ignite.ml.structures.LabeledVector;
import org.jetbrains.annotations.NotNull;

public class LabeledDatasetLoader {
    public static LabeledDataset loadFromTxtFile(Path pathToFile, String separator, boolean isDistributed, boolean isFallOnBadData) throws IOException {
        Stream<String> stream = Files.lines(pathToFile);
        ArrayList<String> list = new ArrayList<String>();
        stream.forEach(list::add);
        int rowSize = list.size();
        ArrayList<Double> labels = new ArrayList<Double>();
        ArrayList<Vector> vectors = new ArrayList<Vector>();
        if (rowSize > 0) {
            int colSize = LabeledDatasetLoader.getColumnSize(separator, list) - 1;
            if (colSize > 0) {
                for (int i = 0; i < rowSize; ++i) {
                    String[] rowData = ((String)list.get(i)).split(separator);
                    try {
                        Double clsLb = Double.parseDouble(rowData[0]);
                        Vector vec = LabeledDatasetLoader.parseFeatures(pathToFile, isDistributed, isFallOnBadData, colSize, i, rowData);
                        labels.add(clsLb);
                        vectors.add(vec);
                        continue;
                    }
                    catch (NumberFormatException e) {
                        if (!isFallOnBadData) continue;
                        throw new FileParsingException(rowData[0], i, pathToFile);
                    }
                }
                LabeledVector[] data = new LabeledVector[vectors.size()];
                for (int i = 0; i < vectors.size(); ++i) {
                    data[i] = new LabeledVector((Vector)vectors.get(i), labels.get(i));
                }
                return new LabeledDataset(data, colSize);
            }
            throw new NoDataException("File should contain first row with data");
        }
        throw new EmptyFileException(pathToFile.toString());
    }

    @NotNull
    private static Vector parseFeatures(Path pathToFile, boolean isDistributed, boolean isFallOnBadData, int colSize, int rowIdx, String[] rowData) {
        Vector vec = LabeledDataset.emptyVector(colSize, isDistributed);
        if (isFallOnBadData && rowData.length != colSize + 1) {
            throw new CardinalityException(colSize + 1, rowData.length);
        }
        double missedData = LabeledDatasetLoader.fillMissedData();
        for (int j = 0; j < colSize; ++j) {
            try {
                double feature = Double.parseDouble(rowData[j + 1]);
                vec.set(j, feature);
                continue;
            }
            catch (NumberFormatException e) {
                if (isFallOnBadData) {
                    throw new FileParsingException(rowData[j + 1], rowIdx, pathToFile);
                }
                vec.set(j, missedData);
                continue;
            }
            catch (ArrayIndexOutOfBoundsException e) {
                vec.set(j, missedData);
            }
        }
        return vec;
    }

    private static double fillMissedData() {
        return 0.0;
    }

    private static int getColumnSize(String separator, List<String> list) {
        String[] rowData = list.get(0).split(separator, -1);
        return rowData.length;
    }
}

