sgsPy/clhs_8h_source.html

/******************************************************************************

 *

 * Project: sgs

 * Purpose: C++ implementation of CLHS

 * Author: Joseph Meyer

 * Date: November, 2025

 *

 ******************************************************************************/


#include <iostream>

#include <random>


#include "utils/access.h"

#include "utils/existing.h"

#include "utils/helper.h"

#include "utils/raster.h"

#include "utils/vector.h"


#include <boost/unordered/unordered_flat_set.hpp>

#include <mkl.h>

#include "oneapi/dal.hpp"

#include <xoshiro.h>


#define MILLION 1000000


typedef oneapi::dal::homogen_table              DALHomogenTable;


namespace sgs {


namespace clhs {


template <typename T>


struct Point {

    T *p_features = nullptr;

    int x = -1;

    int y = -1;

};


template <typename T>

inline size_t


getQuantile(T val, std::vector<T>& quantiles) {

    auto it = std::lower_bound(quantiles.begin(), quantiles.end(), val);

    return (it == quantiles.end()) ?

        quantiles.size() :

        std::distance(quantiles.begin(), it);

}


template <typename T>


class CLHSDataManager {

    private:

    std::vector<T> features;

    std::vector<int> x;

    std::vector<int> y;

    size_t fi; //features index

    size_t count;

    int64_t size;

    uint64_t ucount;


    //for existing sample points

    std::vector<T> efeatures;

    std::vector<int> ex;

    std::vector<int> ey;

    size_t efi = 0;

    size_t ecount = 0;


    std::vector<std::vector<T>> corr;


    int nFeat;

    int nSamp;


    xso::xoshiro_4x64_plus *p_rng = nullptr;

    uint64_t mask = 0;


    public:


    CLHSDataManager(int nFeat, int nSamp, xso::xoshiro_4x64_plus *p_rng, size_t existingCount) {

        this->nFeat = nFeat;

        this->nSamp = nSamp;

        this->count = 0;

        this->fi = 0;

        this->size = MILLION;

        this->features.resize(MILLION * nFeat);

        this->x.resize(MILLION);

        this->y.resize(MILLION);


        this->p_rng = p_rng;


        if (existingCount != 0) {

            this->efeatures.resize(existingCount * static_cast<size_t>(nFeat));

            this->ex.resize(existingCount);

            this->ey.resize(existingCount);

        }

    }


    inline void


    addPoint(T *p_features, int x, int y) {

        for (int f = 0; f < nFeat; f++) {

            features[this->fi] = p_features[f];

            this->fi++;

        }


        this->x[this->count] = x;

            this->y[this->count] = y;

        this->count++;


        if (this->count == this->size) {

            this->features.resize(this->features.size() + MILLION * this->nFeat);

            this->x.resize(this->x.size() + MILLION);

            this->y.resize(this->y.size() + MILLION);

            this->size += MILLION;

        }

    }


    inline void


    addExistingPoint(T *p_features, int x, int y) {

        for (int f = 0; f < nFeat; f++) {

            this->efeatures[this->efi] = p_features[f];

            this->efi++;

        }


        this->ex[this->ecount] = x;

        this->ey[this->ecount] = y;

        this->ecount++;

    }


    inline void


    finalize(std::vector<std::vector<T>>& corr) {

        if (this->count < static_cast<size_t>(this->nSamp)) {

            throw std::runtime_error("not enough points saved during raster iteration to conduct clhs sampling.");

        }


        this->corr = corr;


        this->x.resize(this->count);

        this->y.resize(this->count);

        this->features.resize(this->count * nFeat);

        this->ucount = static_cast<uint64_t>(this->count);


        //use bit twiddling to fill the mask

        this->mask = static_cast<uint64_t>(this->count);

        this->mask |= this->mask >> 1;

        this->mask |= this->mask >> 2;

        this->mask |= this->mask >> 4;

        this->mask |= this->mask >> 8;

        this->mask |= this->mask >> 16;

        this->mask |= this->mask >> 32;


        //resize existing sample vectors

        this->ex.resize(this->ecount);

        this->ey.resize(this->ecount);

        this->features.resize(this->ecount * nFeat);

    }


    inline uint64_t


    randomIndex() {

        uint64_t index = ((*p_rng)() >> 11) & mask;


        while (index >= this->ucount) {

            index = ((*p_rng)() >> 11) & mask;

        }


        return index;

    }


    inline void


    getRandomPoint(Point<T>& point) {

        uint64_t index = randomIndex();


        point.p_features = this->features.data() + (index * nFeat);

        point.x = x[index];

        point.y = y[index];

    }


    inline T


    quantileObjectiveFunc(std::vector<int>& sampleCountPerQuantile) {

        int retval = 0;


        for (const int& count : sampleCountPerQuantile) {

            retval += std::abs(count - 1);

        }


        return static_cast<T>(retval);

    }


    inline T


    correlationObjectiveFunc(std::vector<std::vector<T>>& corr) {

        T retval = 0;


        for (size_t i = 0; i < this->corr.size(); i++) {

            for (size_t j = 0; j < this->corr[i].size(); j++) {

                retval += std::abs(corr[i][j] - this->corr[i][j]);

            }

        }


        return retval;

    }


    inline void


    getExistingFeatures(std::vector<T>& features, std::vector<int>& x, std::vector<int>& y) {

        features.resize(this->efeatures.size());

        x.resize(this->ex.size());

        y.resize(this->ey.size());


        std::memcpy(features.data(), this->efeatures.data(), this->efeatures.size() * sizeof(T));

        std::memcpy(x.data(), this->ex.data(), this->ex.size() * sizeof(int));

        std::memcpy(y.data(), this->ey.data(), this->ey.size() * sizeof(int));

    }


};


template <typename T>

inline void


readRaster(

    std::vector<helper::RasterBandMetaData>& bands,

    CLHSDataManager<T>& clhs,

    access::Access& access,

    existing::Existing& existing,

    helper::RandValController& rand,

    GDALDataType type,

    std::vector<std::vector<T>>& quantiles,

    size_t size,

    int width,

    int height,

    int count,

    int nSamp)

{

    std::vector<std::vector<T>> probabilities(count);

    quantiles.resize(count);


    for (int i = 0; i < count; i++) {

        probabilities[i].resize(nSamp - 1);

        quantiles[i].resize(nSamp - 1);


        for (int j = 0; j < nSamp - 1; j++) {

            probabilities[i][j] = static_cast<T>(j + 1) / static_cast<T>(nSamp);

        }

    }


    int xBlockSize = bands[0].xBlockSize;

    int yBlockSize = bands[0].yBlockSize;


    int xBlocks = (width + xBlockSize - 1) / xBlockSize;

    int yBlocks = (height + yBlockSize - 1) / yBlockSize;


    double deps = .001;

    float seps = .001f;


    std::vector<T> corrBuffer(count * xBlockSize * yBlockSize);

    std::vector<std::vector<T>> quantileBuffers(count);

    for (int i = 0; i < count; i++) {

        quantileBuffers[i].resize(xBlockSize * yBlockSize);

    }


    if (access.used) {

        access.band.p_buffer = VSIMalloc3(xBlockSize, yBlockSize, access.band.size);

    }


    //create descriptor for correlation matrix streaming calculation with oneDAL

    const auto cor_desc = oneapi::dal::covariance::descriptor{}.set_result_options(oneapi::dal::covariance::result_options::cor_matrix);

    oneapi::dal::covariance::partial_compute_result<> partial_result;


    //create tasks for quantile streaming calculation with MKL

    std::vector<VSLSSTaskPtr> quantileTasks(count);

    int status;

    MKL_INT quant_order_n = quantiles[0].size();

    MKL_INT p = 1;

    MKL_INT n = xBlockSize * yBlockSize;

    MKL_INT nparams = VSL_SS_SQUANTS_ZW_PARAMS_N;

    MKL_INT xstorage = VSL_SS_MATRIX_STORAGE_ROWS;


    //MKL functions have different versions for single/double precision floating point data

    if (type == GDT_Float64) {

        for (int i = 0; i < count; i++) {

            //reinterpret cast the pointer for compiler reasons

            status = vsldSSNewTask(

                &quantileTasks[i],

                &p,

                &n,

                &xstorage,

                reinterpret_cast<double *>(quantileBuffers[i].data()),

                0,

                0

            );

        }

    }

    else {

        for (int i = 0; i < count; i++) {

            //reinterpret cast the pointer for compiler reasons

            status = vslsSSNewTask(

                &quantileTasks[i],

                &p,

                &n,

                &xstorage,

                reinterpret_cast<float *>(quantileBuffers[i].data()),

                0,

                0

            );

        }

    }


    int8_t *p_access = reinterpret_cast<int8_t *>(access.band.p_buffer);


    bool calledEditStreamQuantiles = false;

    void *p_data = reinterpret_cast<void *>(corrBuffer.data());


    for (int yBlock = 0; yBlock < yBlocks; yBlock++) {

        for (int xBlock = 0; xBlock < xBlocks; xBlock++) {

            //get block size

            int xValid, yValid;

            bands[0].p_band->GetActualBlockSize(xBlock, yBlock, &xValid, &yValid);


            //read bands into memory

            for (int i = 0; i < count ; i++) {

                CPLErr err = bands[i].p_band->RasterIO(

                    GF_Read,

                    xBlock * xBlockSize,

                    yBlock * yBlockSize,

                    xValid,

                    yValid,

                    (void *)((size_t)p_data + i * size),

                    xValid,

                    yValid,

                    type,

                    size * static_cast<size_t>(count),

                    size * static_cast<size_t>(count) * static_cast<size_t>(xBlockSize)

                );


                if (err) {

                    throw std::runtime_error("Error reading data from raster band.");

                }

            }


            //calculate rand vals

            rand.calculateRandValues();


            //read access band into memory if used

            if (access.used) {

                rasterBandIO(access.band, access.band.p_buffer, xBlockSize, yBlockSize, xBlock, yBlock, xValid, yValid, true, false);

            }


            //iterate through pixels

            n = 0;

            for (int y = 0; y < yValid; y++) {

                int index = y * xBlockSize;

                for (int x = 0; x < xValid; x++) {

                    bool isNan = false;

                    T *p_buff = corrBuffer.data() + (index * count);

                    for (int b = 0; b < count; b++) {

                        T val = p_buff[b];

                        isNan = std::isnan(val) || val == bands[b].nan;


                        if (isNan) {

                            break;

                        }


                        quantileBuffers[b][n] = val;

                        corrBuffer[n * count + b] = val;

                    }


                    if (!isNan) {

                        n++;


                        bool accessible = !access.used || p_access[index] != 1;


                        if (existing.used && existing.containsIndex(x + xBlock * xBlockSize, y + yBlock * yBlockSize)) {

                            clhs.addExistingPoint(

                                p_buff,

                                xBlock * xBlockSize + x,

                                yBlock * yBlockSize + y

                            );

                        }

                        else if (accessible && rand.next()) {

                            clhs.addPoint(

                                p_buff,

                                xBlock * xBlockSize + x,

                                yBlock * yBlockSize + y

                            );

                        }

                    }

                    index++;

                }

            }


            if (n == 0) {

                continue;

            }


            //MKL functions have different versions for single/double precision floating point data

            if (type == GDT_Float64) {

                if (!calledEditStreamQuantiles) {

                    for (int i = 0; i < count; i++) {

                        //reinterpret cast the pointers for compiler reasons

                        status = vsldSSEditStreamQuantiles(

                            quantileTasks[i],

                            &quant_order_n,

                            reinterpret_cast<double *>(probabilities[i].data()),

                            reinterpret_cast<double *>(quantiles[i].data()),

                            &nparams,

                            &deps

                        );

                    }

                    calledEditStreamQuantiles = true;

                }

                for (int i = 0; i < count; i++) {

                    status = vsldSSCompute(

                        quantileTasks[i],

                        VSL_SS_STREAM_QUANTS,

                        VSL_SS_METHOD_SQUANTS_ZW_FAST

                    );

                }

            }

            else { //type == GDT_Float32

                if (!calledEditStreamQuantiles) {

                    for (int i = 0; i < count; i++) {

                        //reinterpret cast the pointers for compiler reasons

                        status = vslsSSEditStreamQuantiles(

                            quantileTasks[i],

                            &quant_order_n,

                            reinterpret_cast<float *>(probabilities[i].data()),

                            reinterpret_cast<float *>(quantiles[i].data()),

                            &nparams,

                            &seps

                        );

                    }

                    calledEditStreamQuantiles = true;

                }

                for (int i = 0; i < count; i++) {

                    status = vslsSSCompute(

                        quantileTasks[i],

                        VSL_SS_STREAM_QUANTS,

                        VSL_SS_METHOD_SQUANTS_ZW_FAST

                    );

                }


            }


            //update correlation matrix calculations

            DALHomogenTable table = DALHomogenTable(corrBuffer.data(), n, count, [](const T *){}, oneapi::dal::data_layout::row_major);

            partial_result = oneapi::dal::partial_compute(cor_desc, partial_result, table);

        }

    }


    if (access.used) {

        VSIFree(access.band.p_buffer);

    }


    //calculate and update clhs data manager with correlation matrix

    auto result = oneapi::dal::finalize_compute(cor_desc, partial_result);

    auto correlation = result.get_cor_matrix();


    oneapi::dal::row_accessor<const T> acc {correlation};


    n = 0;

    std::vector<std::vector<T>> corr(count);

    for (int i = 0; i < count; i++) {

        corr[i].resize(count);

        auto row = acc.pull({i, i + 1});


        for (int j = 0; j < count; j++) {

            corr[i][j] = row[j];

        }


        status = (type == GDT_Float64) ?

            vsldSSCompute(quantileTasks[i], VSL_SS_STREAM_QUANTS, VSL_SS_METHOD_SQUANTS_ZW) :

            vslsSSCompute(quantileTasks[i], VSL_SS_STREAM_QUANTS, VSL_SS_METHOD_SQUANTS_ZW);

        status = vslSSDeleteTask(&quantileTasks[i]);

    }


    clhs.finalize(corr);

}


template <typename T>

inline void


selectSamples(std::vector<std::vector<T>>& quantiles,

          CLHSDataManager<T>& clhs,

          xso::xoshiro_4x64_plus& rng,

          existing::Existing& existing,

          size_t replace,

          size_t iterations,

          size_t nSamp,

          size_t nFeat,

          OGRLayer *p_layer,

          double *GT,

          bool plot,

          std::vector<double>& xCoords,

          std::vector<double>& yCoords)

{

    std::vector<T> features;

    std::vector<int> x;

    std::vector<int> y;


    std::vector<int> sampleCountPerQuantile(nFeat * nSamp, 0); //nFeat x nSamp 2d array

    std::vector<int> quantilesOfEachSample(nSamp * nFeat, 0); //nSamp x nFeat 2d array


    //if there are existing samples, add all of them.

    if (existing.used) {

        clhs.getExistingFeatures(features, x, y);

    }


    //Then, remove up to 'replace' number of them which are redundant

    //'redundant' samples are samples which cause over-representation in feature quantiles


    size_t neSamples = x.size(); //number of existing samples

    if (neSamples > 0 && replace != 0) {

        for (size_t si = 0; si < neSamples; si++) {

            for (size_t fi = 0; fi < nFeat; fi++) {

                T val = features[(si * nFeat) + fi];

                size_t q = getQuantile<T>(val, quantiles[fi]);

                sampleCountPerQuantile[(fi * nSamp) + q]++;

                quantilesOfEachSample[(si * nFeat) + fi] = q;

            }

        }


        while (replace > 0 && neSamples > 0) {

            size_t worstRedundancy = 0;

            size_t worstRedundancyIndex = 0;


            //get the sample with the worst redundancy. In other words, the one which is in the

            //most over-represented quantile across all features

            for (size_t si = 0; si < neSamples; si++) {

                size_t curSampleRedundancy = 0;

                for (size_t fi = 0; fi < nFeat; fi++) {

                    size_t q = quantilesOfEachSample[(si * nFeat) + fi];

                    curSampleRedundancy += sampleCountPerQuantile[(fi * nSamp) + q];

                }

                if (curSampleRedundancy > worstRedundancy) {

                    worstRedundancy = curSampleRedundancy;

                    worstRedundancyIndex = si;

                }

            }


            //if the remaining samples are already forming a partial latin hypercube

            //(no more than 1 sample per quantile of each feature)

            //then don't remove any more indices!

            if (worstRedundancy == nFeat) {

                break;

            }


            //replace the most redundant sample with the last sample in the vector, and decrease

            //the size of the vector by 1. But first, adjust the values of sampleCountPerQuantile

            size_t si = worstRedundancyIndex;

            for (size_t fi = 0; fi < nFeat; fi++) {

                size_t q = quantilesOfEachSample[(si * nFeat) + fi];

                sampleCountPerQuantile[(fi * nSamp) + q]--;

            }


            x[si] = x[neSamples - 1];

            y[si] = y[neSamples - 1];

            std::memcpy(features.data() + (si * nFeat),

                    features.data() + ((neSamples - 1) * nFeat),

                    sizeof(T) * nFeat);

            std::memcpy(quantilesOfEachSample.data() + (si * nFeat),

                    quantilesOfEachSample.data() + ((neSamples - 1) * nFeat),

                    sizeof(int) * nFeat);


            neSamples--;

            replace--;

        }

    }


    //NOW, the neSamples variable contains the number of existing samples which MUST be kept.

    //This number also happens to be the first index of the remaining space in the vector which

    //may be filled in with non-existing samples. If there were no existing samples this value

    //is 0.

    size_t starti = neSamples;

    boost::unordered::unordered_flat_set<uint64_t> points;


    //Add all of the existing samples to the output layer, and add to indices map

    helper::Field fieldExistingTrue("existing", 1);

    for (size_t si = 0; si < neSamples; si++) {

        OGRPoint point = existing.getPoint(x[si], y[si]);

        helper::addPoint(&point, p_layer, &fieldExistingTrue);


        if (plot) {

            xCoords.push_back(point.getX());

            yCoords.push_back(point.getY());

        }


        points.insert((((uint64_t) x[si]) << 32) | ((uint64_t) y[si]));

    }


    //if there are already enough (existing) samples, return and don't add any more

    if (starti >= nSamp) {

        return;

    }


    std::uniform_real_distribution<T> dist(0.0, 1.0);

    std::uniform_int_distribution<size_t> indexDist(starti, nSamp - 1);


    std::vector<std::vector<T>> corr(nFeat);

    for (int i = 0; i < nFeat; i++) {

        corr[i].resize(nFeat);

    }


    features.resize(nSamp * nFeat);

    x.resize(nSamp);

    y.resize(nSamp);


    //get first random samples

    int i = starti;

    Point<T> p;

    while (i < nSamp) {

        clhs.getRandomPoint(p);


        if (points.contains((((uint64_t) p.x) << 32) | ((uint64_t) p.y))) {

            continue;

        }


        x[i] = p.x;

        y[i] = p.y;


        for (int f = 0; f < nFeat; f++) {

            T val = p.p_features[f];

            features[(i * nFeat) + f] = val;


            int q = getQuantile<T>(val, quantiles[f]);

            sampleCountPerQuantile[(f * nSamp) + q]++;

            quantilesOfEachSample[(i * nFeat) + f] = q;

        }


        points.insert((((uint64_t) p.x) << 32) | ((uint64_t) p.y));

        i++;

    }


    //define covariance calculation

    DALHomogenTable table = DALHomogenTable(features.data(), nSamp, nFeat, [](const T *){}, oneapi::dal::data_layout::row_major);

    const auto cor_desc = oneapi::dal::covariance::descriptor{}.set_result_options(oneapi::dal::covariance::result_options::cor_matrix);

    const auto result = oneapi::dal::compute(cor_desc, table);

    oneapi::dal::row_accessor<const T> acc {result.get_cor_matrix()};

    for (int i = 0; i < nFeat; i++) {

        auto row = acc.pull({i, i + 1});


        for (int j = 0; j < nFeat; j++) {

            corr[i][j] = row[j];

        }

    }


    double temp = 1;

    double d = temp / static_cast<double>(iterations);


    T obj = 0;

    T objQ = clhs.quantileObjectiveFunc(sampleCountPerQuantile);

    T objC = clhs.correlationObjectiveFunc(corr);


    obj = objQ + objC;


    //features of old (before random new index) index

    std::vector<T> oldf(nFeat);


    //begin annealing schedule. If we have a perfect latin hypercube -- or if we pass enough iterations -- stop iterating.

    while (temp > 0 && objQ != 0) {

        size_t i; //the index within the indices, x, y, and features vector so we know what to swap without searching

        if (dist(rng) < 0.5) {

            //50% of the time, choose a random sample to replace

            i = indexDist(rng);

        }

        else {

            //50% of the time, choose the worst sample to replace


            //get the sample with the worst redundancy. In other words, the one which is in the

            //most over-represented quantile across all features

            size_t worstRedundancyIndex = 0;

            size_t worstRedundancy = 0;

            for (size_t si = starti; si < nSamp; si++) {

                size_t curSampleRedundancy = 0;

                for (size_t fi = 0; fi < nFeat; fi++) {

                    size_t q = quantilesOfEachSample[(si * nFeat) + fi];

                    curSampleRedundancy += sampleCountPerQuantile[(fi * nSamp) + q];

                }

                if (curSampleRedundancy > worstRedundancy) {

                    worstRedundancy = curSampleRedundancy;

                    worstRedundancyIndex = si;

                }

            }

            i = worstRedundancyIndex;

        }


        //move selected replacement to 'oldf' vector to retain the old values in case we revert back

        //to that state

        std::memcpy(oldf.data(), features.data() + (i * nFeat), nFeat * sizeof(T));


        //select a new index

        Point<T> p;

        clhs.getRandomPoint(p);

        while (points.contains((((uint64_t) p.x) << 32) | ((uint64_t) p.y))) {

            clhs.getRandomPoint(p);

        }


        //move new features into feature vector

        std::memcpy(features.data() + (i * nFeat), p.p_features, nFeat * sizeof(T));


        //recalculate sample count per quantile

        std::vector<int> oldq(nFeat);

        std::vector<int> newq(nFeat);

        for (int f = 0; f < nFeat; f++) {

            //decrement based removal of on old features

            int q = getQuantile(oldf[f], quantiles[f]);

            oldq[f] = q;

            sampleCountPerQuantile[(f * nSamp) + q]--;


            //increment based on inputof new features

            q = getQuantile(p.p_features[f], quantiles[f]);

            newq[f] = q;

            sampleCountPerQuantile[(f * nSamp) + q]++;

        }


        //recalculate objective function from quantiles

        T newObjQ = clhs.quantileObjectiveFunc(sampleCountPerQuantile);


        //recalculate correlation matrix

        const auto result = oneapi::dal::compute(cor_desc, table); // we update the table in place

        oneapi::dal::row_accessor<const T> acc {result.get_cor_matrix()};

        for (int j = 0; j < nFeat; j++) {

            auto row = acc.pull({j, j + 1});


            for (int k = 0; k < nFeat; k++) {

                corr[j][k] = row[k];

            }

        }


        //recalculate objective function from correlation matrix

        T newObjC = clhs.correlationObjectiveFunc(corr);


        T newObj = newObjQ + newObjC;

        T delta = newObj - obj;


        bool keep = dist(rng) < std::exp(-1 * delta / temp);


        if (keep) {

            //update the new changes

            points.erase((((uint64_t) x[i]) << 32) | ((uint64_t) y[i]));

            points.insert((((uint64_t) p.x) << 32) | ((uint64_t) p.y));


            x[i] = p.x;

            y[i] = p.y;


            std::memcpy(quantilesOfEachSample.data() + (i * nFeat), newq.data(), sizeof(int) * nFeat);


            objC = newObjC;

            objQ = newObjQ;

            obj = newObj;

        }

        else {

            //revert back to old changes

            for (int f = 0; f < nFeat; f++) {

                sampleCountPerQuantile[(f * nSamp) + newq[f]]--;

                sampleCountPerQuantile[(f * nSamp) + oldq[f]]++;

            }


            std::memcpy(

                reinterpret_cast<void *>(features.data() + (i * nFeat)),

                reinterpret_cast<void *>(oldf.data()),

                nFeat * sizeof(T)

            );

        }


        //update annealing temperature

        temp -= d;

    }


    //add samples to output layer

    helper::Field fieldExistingFalse("existing", 0);

    for (int i = starti ; i < nSamp; i++) {

        const auto [xCoord, yCoord] = helper::sample_to_point(GT, x[i], y[i]);

        OGRPoint point = OGRPoint(xCoord, yCoord);

        existing.used ?

            helper::addPoint(&point, p_layer, &fieldExistingFalse) :

            helper::addPoint(&point, p_layer);


        if (plot) {

            xCoords.push_back(xCoord);

            yCoords.push_back(yCoord);

        }

    }

}


std::tuple<std::vector<std::vector<double>>, vector::GDALVectorWrapper *>


clhs(

    raster::GDALRasterWrapper *p_raster,

    int nSamp,

    int iterations,

    vector::GDALVectorWrapper *p_access,

    std::string layerName,

    double buffInner,

    double buffOuter,

    vector::GDALVectorWrapper *p_existing,

    size_t replace,

    bool plot,

    std::string tempFolder,

    std::string filename)

{

    GDALAllRegister();


    int width = p_raster->getWidth();

    int height = p_raster->getHeight();

    int nFeat = p_raster->getBandCount();

    double *GT = p_raster->getGeotransform();


    std::vector<double> xCoords, yCoords;


    std::vector<helper::RasterBandMetaData> bands(p_raster->getBandCount());

    for (int i = 0; i < nFeat; i++) {

        bands[i].p_band = p_raster->getRasterBand(i);

        bands[i].type = p_raster->getRasterBandType(i);

        bands[i].size = p_raster->getRasterBandTypeSize(i);

        bands[i].nan = bands[i].p_band->GetNoDataValue();

        bands[i].p_band->GetBlockSize(&bands[i].xBlockSize, &bands[i].yBlockSize);

    }


    //create output dataset before doing anything which will take a long time in case of failure.

    GDALDriver *p_driver = GetGDALDriverManager()->GetDriverByName("MEM");

    if (!p_driver) {

        throw std::runtime_error("unable to create output sample dataset driver.");

    }

    GDALDataset *p_samples = p_driver->Create("", 0, 0, 0, GDT_Unknown, nullptr);

    if (!p_samples) {

        throw std::runtime_error("unable to create output dataset with driver.");

    }


    vector::GDALVectorWrapper *p_wrapper = new vector::GDALVectorWrapper(p_samples, std::string(p_raster->getDataset()->GetProjectionRef()));

    OGRLayer *p_layer = p_samples->CreateLayer("samples", p_wrapper->getSRS(), wkbPoint, nullptr);

    if (!p_layer) {

        throw std::runtime_error("unable to create output dataset layer.");

    }


    access::Access access(

        p_access,

        p_raster,

        layerName,

        buffInner,

        buffOuter,

        true,

        tempFolder,

        bands[0].xBlockSize,

        bands[0].yBlockSize

    );


    existing::Existing existing(

        p_existing,

        p_raster,

        GT,

        width,

        p_layer,

        false,

        xCoords,

        yCoords,

        false

    );


    //fast random number generator using xoshiro256++

    //https://vigna.di.unimi.it/ftp/papers/ScrambledLinear.pdf

    xso::xoshiro_4x64_plus rng;

    uint64_t multiplier = helper::getProbabilityMultiplier(

        width,

        height,

        p_raster->getPixelWidth(),

        p_raster->getPixelHeight(),

        8,

        MILLION * 100,

        false,

        access.area

    );

    helper::RandValController rand(bands[0].xBlockSize, bands[0].yBlockSize, multiplier, &rng);


    //get data type for all bands

    GDALDataType type = GDT_Float32;

    for (const helper::RasterBandMetaData& band : bands) {

        if (band.type == GDT_Float64) {

            type = GDT_Float64;

            break;

        }

    }


    if (type == GDT_Float64) {

        std::vector<std::vector<double>> quantiles;


        //create instance of data management class

        CLHSDataManager<double> clhs(nFeat, nSamp, &rng, existing.count());


        //read raster, calculating quantiles, correlation matrix, and adding points to sample from.

        readRaster<double>(bands, clhs, access, existing, rand, type, quantiles, sizeof(double), width, height, nFeat, nSamp);


        //select samples and add them to output layer

        selectSamples<double>(quantiles, clhs, rng, existing, replace, iterations, nSamp, nFeat, p_layer, GT, plot, xCoords, yCoords);

    }

    else { //type == GDT_Float32

        std::vector<std::vector<float>> quantiles;


        //create instance of data management class

        CLHSDataManager<float> clhs(nFeat, nSamp, &rng, existing.count());


        //read raster, calculating quantiles, correlation matrix, and adding points to sample from.

        readRaster<float>(bands, clhs, access, existing, rand, type, quantiles, sizeof(float), width, height, nFeat, nSamp);


        //select samples and add them to output layer

        selectSamples<float>(quantiles, clhs, rng, existing, replace, iterations, nSamp, nFeat, p_layer, GT, plot, xCoords, yCoords);

    }


    if (filename != "") {

        try {

            p_wrapper->write(filename);

        }

        catch (const std::exception& e) {

            std::cout << "Exception thrown trying to write file: " << e.what() << std::endl;

        }

    }


    return {{xCoords, yCoords}, p_wrapper};

}


} //namespace clhs


} //namespace sgs

access.h

sgs::clhs::CLHSDataManager
Definition clhs.h:75

sgs::clhs::CLHSDataManager::finalize
void finalize(std::vector< std::vector< T > > &corr)
Definition clhs.h:207

sgs::clhs::CLHSDataManager::CLHSDataManager
CLHSDataManager(int nFeat, int nSamp, xso::xoshiro_4x64_plus *p_rng, size_t existingCount)
Definition clhs.h:113

sgs::clhs::CLHSDataManager::quantileObjectiveFunc
T quantileObjectiveFunc(std::vector< int > &sampleCountPerQuantile)
Definition clhs.h:289

sgs::clhs::CLHSDataManager::addExistingPoint
void addExistingPoint(T *p_features, int x, int y)
Definition clhs.h:175

sgs::clhs::CLHSDataManager::correlationObjectiveFunc
T correlationObjectiveFunc(std::vector< std::vector< T > > &corr)
Definition clhs.h:313

sgs::clhs::CLHSDataManager::getExistingFeatures
void getExistingFeatures(std::vector< T > &features, std::vector< int > &x, std::vector< int > &y)
Definition clhs.h:333

sgs::clhs::CLHSDataManager::getRandomPoint
void getRandomPoint(Point< T > &point)
Definition clhs.h:266

sgs::clhs::CLHSDataManager::randomIndex
uint64_t randomIndex()
Definition clhs.h:249

sgs::clhs::CLHSDataManager::addPoint
void addPoint(T *p_features, int x, int y)
Definition clhs.h:145

sgs::helper::RandValController
Definition helper.h:976

sgs::helper::RandValController::calculateRandValues
void calculateRandValues(void)
Definition helper.h:1019

sgs::helper::RandValController::next
bool next(void)
Definition helper.h:1034

sgs::raster::GDALRasterWrapper
Definition raster.h:57

sgs::raster::GDALRasterWrapper::getDataset
GDALDataset * getDataset()
Definition raster.h:429

sgs::raster::GDALRasterWrapper::getWidth
int getWidth()
Definition raster.h:467

sgs::raster::GDALRasterWrapper::getRasterBandTypeSize
size_t getRasterBandTypeSize(int band)
Definition raster.h:735

sgs::raster::GDALRasterWrapper::getGeotransform
double * getGeotransform()
Definition raster.h:590

sgs::raster::GDALRasterWrapper::getRasterBandType
GDALDataType getRasterBandType(int band)
Definition raster.h:724

sgs::raster::GDALRasterWrapper::getBandCount
int getBandCount()
Definition raster.h:485

sgs::raster::GDALRasterWrapper::getHeight
int getHeight()
Definition raster.h:476

sgs::raster::GDALRasterWrapper::getRasterBand
GDALRasterBand * getRasterBand(int band)
Definition raster.h:696

sgs::raster::GDALRasterWrapper::getPixelWidth
double getPixelWidth()
Definition raster.h:555

sgs::raster::GDALRasterWrapper::getPixelHeight
double getPixelHeight()
Definition raster.h:565

sgs::vector::GDALVectorWrapper
Definition vector.h:46

sgs::vector::GDALVectorWrapper::getSRS
OGRSpatialReference * getSRS(void)
Definition vector.h:457

sgs::vector::GDALVectorWrapper::write
void write(std::string filename)
Definition vector.h:397

MILLION
#define MILLION
Definition clhs.h:29

existing.h

sgs::clhs::getQuantile
size_t getQuantile(T val, std::vector< T > &quantiles)
Definition clhs.h:59

sgs::clhs::clhs
std::tuple< std::vector< std::vector< double > >, vector::GDALVectorWrapper * > clhs(raster::GDALRasterWrapper *p_raster, int nSamp, int iterations, vector::GDALVectorWrapper *p_access, std::string layerName, double buffInner, double buffOuter, vector::GDALVectorWrapper *p_existing, size_t replace, bool plot, std::string tempFolder, std::string filename)
Definition clhs.h:1038

sgs::clhs::selectSamples
void selectSamples(std::vector< std::vector< T > > &quantiles, CLHSDataManager< T > &clhs, xso::xoshiro_4x64_plus &rng, existing::Existing &existing, size_t replace, size_t iterations, size_t nSamp, size_t nFeat, OGRLayer *p_layer, double *GT, bool plot, std::vector< double > &xCoords, std::vector< double > &yCoords)
Definition clhs.h:705

sgs::clhs::readRaster
void readRaster(std::vector< helper::RasterBandMetaData > &bands, CLHSDataManager< T > &clhs, access::Access &access, existing::Existing &existing, helper::RandValController &rand, GDALDataType type, std::vector< std::vector< T > > &quantiles, size_t size, int width, int height, int count, int nSamp)
Definition clhs.h:409

sgs::helper::getProbabilityMultiplier
uint64_t getProbabilityMultiplier(double width, double height, double pixelWidth, double pixelHeight, int startMult, int numSamples, bool useMindist, double accessibleArea)
Definition helper.h:940

sgs::helper::sample_to_point
std::pair< double, double > sample_to_point(double *GT, Index &index)
Definition helper.h:1108

sgs::helper::addPoint
void addPoint(OGRPoint *p_point, OGRLayer *p_layer)
Definition helper.h:700

helper.h

sgs::access
Definition access.h:23

sgs::dist
Definition dist.h:20

sgs::existing
Definition existing.h:27

sgs::quantiles
Definition quantiles.h:24

sgs
Definition pca.h:23

raster.h

sgs::access::Access
Definition access.h:30

sgs::clhs::Point
Definition clhs.h:42

sgs::clhs::Point::p_features
T * p_features
Definition clhs.h:43

sgs::clhs::Point::x
int x
Definition clhs.h:44

sgs::clhs::Point::y
int y
Definition clhs.h:45

sgs::existing::Existing
Definition existing.h:38

sgs::helper::Field
Definition helper.h:46

sgs::helper::RasterBandMetaData
Definition helper.h:87

vector.h