sgsPy
structurally guided sampling
Loading...
Searching...
No Matches
clhs.h
Go to the documentation of this file.
1/******************************************************************************
2 *
3 * Project: sgs
4 * Purpose: C++ implementation of CLHS
5 * Author: Joseph Meyer
6 * Date: November, 2025
7 *
8 ******************************************************************************/
9
14
15#include <iostream>
16#include <random>
17
18#include "utils/access.h"
19#include "utils/existing.h"
20#include "utils/helper.h"
21#include "utils/raster.h"
22#include "utils/vector.h"
23
24#include <boost/unordered/unordered_flat_set.hpp>
25#include <mkl.h>
26#include "oneapi/dal.hpp"
27#include <xoshiro.h>
28
29#define MILLION 1000000
30
31typedef oneapi::dal::homogen_table DALHomogenTable;
32
33namespace sgs {
34namespace clhs {
35
41template <typename T>
42struct Point {
43 T *p_features = nullptr;
44 int x = -1;
45 int y = -1;
46};
47
57template <typename T>
58inline size_t
59getQuantile(T val, std::vector<T>& quantiles) {
60 auto it = std::lower_bound(quantiles.begin(), quantiles.end(), val);
61 return (it == quantiles.end()) ?
62 quantiles.size() :
63 std::distance(quantiles.begin(), it);
64}
65
74template <typename T>
76 private:
77 std::vector<T> features;
78 std::vector<int> x;
79 std::vector<int> y;
80 size_t fi; //features index
81 size_t count;
82 int64_t size;
83 uint64_t ucount;
84
85 //for existing sample points
86 std::vector<T> efeatures;
87 std::vector<int> ex;
88 std::vector<int> ey;
89 size_t efi = 0;
90 size_t ecount = 0;
91
92 std::vector<std::vector<T>> corr;
93
94 int nFeat;
95 int nSamp;
96
97 xso::xoshiro_4x64_plus *p_rng = nullptr;
98 uint64_t mask = 0;
99
100 public:
113 CLHSDataManager(int nFeat, int nSamp, xso::xoshiro_4x64_plus *p_rng, size_t existingCount) {
114 this->nFeat = nFeat;
115 this->nSamp = nSamp;
116 this->count = 0;
117 this->fi = 0;
118 this->size = MILLION;
119 this->features.resize(MILLION * nFeat);
120 this->x.resize(MILLION);
121 this->y.resize(MILLION);
122
123 this->p_rng = p_rng;
124
125 if (existingCount != 0) {
126 this->efeatures.resize(existingCount * static_cast<size_t>(nFeat));
127 this->ex.resize(existingCount);
128 this->ey.resize(existingCount);
129 }
130 }
131
144 inline void
145 addPoint(T *p_features, int x, int y) {
146 for (int f = 0; f < nFeat; f++) {
147 features[this->fi] = p_features[f];
148 this->fi++;
149 }
150
151 this->x[this->count] = x;
152 this->y[this->count] = y;
153 this->count++;
154
155 if (this->count == this->size) {
156 this->features.resize(this->features.size() + MILLION * this->nFeat);
157 this->x.resize(this->x.size() + MILLION);
158 this->y.resize(this->y.size() + MILLION);
159 this->size += MILLION;
160 }
161 }
162
174 inline void
175 addExistingPoint(T *p_features, int x, int y) {
176 for (int f = 0; f < nFeat; f++) {
177 this->efeatures[this->efi] = p_features[f];
178 this->efi++;
179 }
180
181 this->ex[this->ecount] = x;
182 this->ey[this->ecount] = y;
183 this->ecount++;
184 }
185
206 inline void
207 finalize(std::vector<std::vector<T>>& corr) {
208 if (this->count < static_cast<size_t>(this->nSamp)) {
209 throw std::runtime_error("not enough points saved during raster iteration to conduct clhs sampling.");
210 }
211
212 this->corr = corr;
213
214 this->x.resize(this->count);
215 this->y.resize(this->count);
216 this->features.resize(this->count * nFeat);
217 this->ucount = static_cast<uint64_t>(this->count);
218
219 //use bit twiddling to fill the mask
220 this->mask = static_cast<uint64_t>(this->count);
221 this->mask |= this->mask >> 1;
222 this->mask |= this->mask >> 2;
223 this->mask |= this->mask >> 4;
224 this->mask |= this->mask >> 8;
225 this->mask |= this->mask >> 16;
226 this->mask |= this->mask >> 32;
227
228 //resize existing sample vectors
229 this->ex.resize(this->ecount);
230 this->ey.resize(this->ecount);
231 this->features.resize(this->ecount * nFeat);
232 }
233
248 inline uint64_t
250 uint64_t index = ((*p_rng)() >> 11) & mask;
251
252 while (index >= this->ucount) {
253 index = ((*p_rng)() >> 11) & mask;
254 }
255
256 return index;
257 }
258
265 inline void
267 uint64_t index = randomIndex();
268
269 point.p_features = this->features.data() + (index * nFeat);
270 point.x = x[index];
271 point.y = y[index];
272 }
273
288 inline T
289 quantileObjectiveFunc(std::vector<int>& sampleCountPerQuantile) {
290 int retval = 0;
291
292 for (const int& count : sampleCountPerQuantile) {
293 retval += std::abs(count - 1);
294 }
295
296 return static_cast<T>(retval);
297 }
298
312 inline T
313 correlationObjectiveFunc(std::vector<std::vector<T>>& corr) {
314 T retval = 0;
315
316 for (size_t i = 0; i < this->corr.size(); i++) {
317 for (size_t j = 0; j < this->corr[i].size(); j++) {
318 retval += std::abs(corr[i][j] - this->corr[i][j]);
319 }
320 }
321
322 return retval;
323 }
324
332 inline void
333 getExistingFeatures(std::vector<T>& features, std::vector<int>& x, std::vector<int>& y) {
334 features.resize(this->efeatures.size());
335 x.resize(this->ex.size());
336 y.resize(this->ey.size());
337
338 std::memcpy(features.data(), this->efeatures.data(), this->efeatures.size() * sizeof(T));
339 std::memcpy(x.data(), this->ex.data(), this->ex.size() * sizeof(int));
340 std::memcpy(y.data(), this->ey.data(), this->ey.size() * sizeof(int));
341 }
342};
343
407template <typename T>
408inline void
410 std::vector<helper::RasterBandMetaData>& bands,
415 GDALDataType type,
416 std::vector<std::vector<T>>& quantiles,
417 size_t size,
418 int width,
419 int height,
420 int count,
421 int nSamp)
422{
423 std::vector<std::vector<T>> probabilities(count);
424 quantiles.resize(count);
425
426 for (int i = 0; i < count; i++) {
427 probabilities[i].resize(nSamp - 1);
428 quantiles[i].resize(nSamp - 1);
429
430 for (int j = 0; j < nSamp - 1; j++) {
431 probabilities[i][j] = static_cast<T>(j + 1) / static_cast<T>(nSamp);
432 }
433 }
434
435 int xBlockSize = bands[0].xBlockSize;
436 int yBlockSize = bands[0].yBlockSize;
437
438 int xBlocks = (width + xBlockSize - 1) / xBlockSize;
439 int yBlocks = (height + yBlockSize - 1) / yBlockSize;
440
441 double deps = .001;
442 float seps = .001f;
443
444 std::vector<T> corrBuffer(count * xBlockSize * yBlockSize);
445 std::vector<std::vector<T>> quantileBuffers(count);
446 for (int i = 0; i < count; i++) {
447 quantileBuffers[i].resize(xBlockSize * yBlockSize);
448 }
449
450 if (access.used) {
451 access.band.p_buffer = VSIMalloc3(xBlockSize, yBlockSize, access.band.size);
452 }
453
454 //create descriptor for correlation matrix streaming calculation with oneDAL
455 const auto cor_desc = oneapi::dal::covariance::descriptor{}.set_result_options(oneapi::dal::covariance::result_options::cor_matrix);
456 oneapi::dal::covariance::partial_compute_result<> partial_result;
457
458 //create tasks for quantile streaming calculation with MKL
459 std::vector<VSLSSTaskPtr> quantileTasks(count);
460 int status;
461 MKL_INT quant_order_n = quantiles[0].size();
462 MKL_INT p = 1;
463 MKL_INT n = xBlockSize * yBlockSize;
464 MKL_INT nparams = VSL_SS_SQUANTS_ZW_PARAMS_N;
465 MKL_INT xstorage = VSL_SS_MATRIX_STORAGE_ROWS;
466
467 //MKL functions have different versions for single/double precision floating point data
468 if (type == GDT_Float64) {
469 for (int i = 0; i < count; i++) {
470 //reinterpret cast the pointer for compiler reasons
471 status = vsldSSNewTask(
472 &quantileTasks[i],
473 &p,
474 &n,
475 &xstorage,
476 reinterpret_cast<double *>(quantileBuffers[i].data()),
477 0,
478 0
479 );
480 }
481 }
482 else {
483 for (int i = 0; i < count; i++) {
484 //reinterpret cast the pointer for compiler reasons
485 status = vslsSSNewTask(
486 &quantileTasks[i],
487 &p,
488 &n,
489 &xstorage,
490 reinterpret_cast<float *>(quantileBuffers[i].data()),
491 0,
492 0
493 );
494 }
495 }
496
497 int8_t *p_access = reinterpret_cast<int8_t *>(access.band.p_buffer);
498
499 bool calledEditStreamQuantiles = false;
500 void *p_data = reinterpret_cast<void *>(corrBuffer.data());
501
502 for (int yBlock = 0; yBlock < yBlocks; yBlock++) {
503 for (int xBlock = 0; xBlock < xBlocks; xBlock++) {
504 //get block size
505 int xValid, yValid;
506 bands[0].p_band->GetActualBlockSize(xBlock, yBlock, &xValid, &yValid);
507
508 //read bands into memory
509 for (int i = 0; i < count ; i++) {
510 CPLErr err = bands[i].p_band->RasterIO(
511 GF_Read,
512 xBlock * xBlockSize,
513 yBlock * yBlockSize,
514 xValid,
515 yValid,
516 (void *)((size_t)p_data + i * size),
517 xValid,
518 yValid,
519 type,
520 size * static_cast<size_t>(count),
521 size * static_cast<size_t>(count) * static_cast<size_t>(xBlockSize)
522 );
523
524 if (err) {
525 throw std::runtime_error("Error reading data from raster band.");
526 }
527 }
528
529 //calculate rand vals
530 rand.calculateRandValues();
531
532 //read access band into memory if used
533 if (access.used) {
534 rasterBandIO(access.band, access.band.p_buffer, xBlockSize, yBlockSize, xBlock, yBlock, xValid, yValid, true, false);
535 }
536
537 //iterate through pixels
538 n = 0;
539 for (int y = 0; y < yValid; y++) {
540 int index = y * xBlockSize;
541 for (int x = 0; x < xValid; x++) {
542 bool isNan = false;
543 T *p_buff = corrBuffer.data() + (index * count);
544 for (int b = 0; b < count; b++) {
545 T val = p_buff[b];
546 isNan = std::isnan(val) || val == bands[b].nan;
547
548 if (isNan) {
549 break;
550 }
551
552 quantileBuffers[b][n] = val;
553 corrBuffer[n * count + b] = val;
554 }
555
556 if (!isNan) {
557 n++;
558
559 bool accessible = !access.used || p_access[index] != 1;
560
561 if (existing.used && existing.containsIndex(x + xBlock * xBlockSize, y + yBlock * yBlockSize)) {
562 clhs.addExistingPoint(
563 p_buff,
564 xBlock * xBlockSize + x,
565 yBlock * yBlockSize + y
566 );
567 }
568 else if (accessible && rand.next()) {
569 clhs.addPoint(
570 p_buff,
571 xBlock * xBlockSize + x,
572 yBlock * yBlockSize + y
573 );
574 }
575 }
576 index++;
577 }
578 }
579
580 if (n == 0) {
581 continue;
582 }
583
584 //MKL functions have different versions for single/double precision floating point data
585 if (type == GDT_Float64) {
586 if (!calledEditStreamQuantiles) {
587 for (int i = 0; i < count; i++) {
588 //reinterpret cast the pointers for compiler reasons
589 status = vsldSSEditStreamQuantiles(
590 quantileTasks[i],
591 &quant_order_n,
592 reinterpret_cast<double *>(probabilities[i].data()),
593 reinterpret_cast<double *>(quantiles[i].data()),
594 &nparams,
595 &deps
596 );
597 }
598 calledEditStreamQuantiles = true;
599 }
600 for (int i = 0; i < count; i++) {
601 status = vsldSSCompute(
602 quantileTasks[i],
603 VSL_SS_STREAM_QUANTS,
604 VSL_SS_METHOD_SQUANTS_ZW_FAST
605 );
606 }
607 }
608 else { //type == GDT_Float32
609 if (!calledEditStreamQuantiles) {
610 for (int i = 0; i < count; i++) {
611 //reinterpret cast the pointers for compiler reasons
612 status = vslsSSEditStreamQuantiles(
613 quantileTasks[i],
614 &quant_order_n,
615 reinterpret_cast<float *>(probabilities[i].data()),
616 reinterpret_cast<float *>(quantiles[i].data()),
617 &nparams,
618 &seps
619 );
620 }
621 calledEditStreamQuantiles = true;
622 }
623 for (int i = 0; i < count; i++) {
624 status = vslsSSCompute(
625 quantileTasks[i],
626 VSL_SS_STREAM_QUANTS,
627 VSL_SS_METHOD_SQUANTS_ZW_FAST
628 );
629 }
630
631 }
632
633 //update correlation matrix calculations
634 DALHomogenTable table = DALHomogenTable(corrBuffer.data(), n, count, [](const T *){}, oneapi::dal::data_layout::row_major);
635 partial_result = oneapi::dal::partial_compute(cor_desc, partial_result, table);
636 }
637 }
638
639 if (access.used) {
640 VSIFree(access.band.p_buffer);
641 }
642
643 //calculate and update clhs data manager with correlation matrix
644 auto result = oneapi::dal::finalize_compute(cor_desc, partial_result);
645 auto correlation = result.get_cor_matrix();
646
647 oneapi::dal::row_accessor<const T> acc {correlation};
648
649 n = 0;
650 std::vector<std::vector<T>> corr(count);
651 for (int i = 0; i < count; i++) {
652 corr[i].resize(count);
653 auto row = acc.pull({i, i + 1});
654
655 for (int j = 0; j < count; j++) {
656 corr[i][j] = row[j];
657 }
658
659 status = (type == GDT_Float64) ?
660 vsldSSCompute(quantileTasks[i], VSL_SS_STREAM_QUANTS, VSL_SS_METHOD_SQUANTS_ZW) :
661 vslsSSCompute(quantileTasks[i], VSL_SS_STREAM_QUANTS, VSL_SS_METHOD_SQUANTS_ZW);
662 status = vslSSDeleteTask(&quantileTasks[i]);
663 }
664
665 clhs.finalize(corr);
666}
667
703template <typename T>
704inline void
705selectSamples(std::vector<std::vector<T>>& quantiles,
707 xso::xoshiro_4x64_plus& rng,
709 size_t replace,
710 size_t iterations,
711 size_t nSamp,
712 size_t nFeat,
713 OGRLayer *p_layer,
714 double *GT,
715 bool plot,
716 std::vector<double>& xCoords,
717 std::vector<double>& yCoords)
718{
719 std::vector<T> features;
720 std::vector<int> x;
721 std::vector<int> y;
722
723 std::vector<int> sampleCountPerQuantile(nFeat * nSamp, 0); //nFeat x nSamp 2d array
724 std::vector<int> quantilesOfEachSample(nSamp * nFeat, 0); //nSamp x nFeat 2d array
725
726 //if there are existing samples, add all of them.
727 if (existing.used) {
728 clhs.getExistingFeatures(features, x, y);
729 }
730
731 //Then, remove up to 'replace' number of them which are redundant
732 //'redundant' samples are samples which cause over-representation in feature quantiles
733
734 size_t neSamples = x.size(); //number of existing samples
735 if (neSamples > 0 && replace != 0) {
736 for (size_t si = 0; si < neSamples; si++) {
737 for (size_t fi = 0; fi < nFeat; fi++) {
738 T val = features[(si * nFeat) + fi];
739 size_t q = getQuantile<T>(val, quantiles[fi]);
740 sampleCountPerQuantile[(fi * nSamp) + q]++;
741 quantilesOfEachSample[(si * nFeat) + fi] = q;
742 }
743 }
744
745 while (replace > 0 && neSamples > 0) {
746 size_t worstRedundancy = 0;
747 size_t worstRedundancyIndex = 0;
748
749 //get the sample with the worst redundancy. In other words, the one which is in the
750 //most over-represented quantile across all features
751 for (size_t si = 0; si < neSamples; si++) {
752 size_t curSampleRedundancy = 0;
753 for (size_t fi = 0; fi < nFeat; fi++) {
754 size_t q = quantilesOfEachSample[(si * nFeat) + fi];
755 curSampleRedundancy += sampleCountPerQuantile[(fi * nSamp) + q];
756 }
757 if (curSampleRedundancy > worstRedundancy) {
758 worstRedundancy = curSampleRedundancy;
759 worstRedundancyIndex = si;
760 }
761 }
762
763 //if the remaining samples are already forming a partial latin hypercube
764 //(no more than 1 sample per quantile of each feature)
765 //then don't remove any more indices!
766 if (worstRedundancy == nFeat) {
767 break;
768 }
769
770 //replace the most redundant sample with the last sample in the vector, and decrease
771 //the size of the vector by 1. But first, adjust the values of sampleCountPerQuantile
772 size_t si = worstRedundancyIndex;
773 for (size_t fi = 0; fi < nFeat; fi++) {
774 size_t q = quantilesOfEachSample[(si * nFeat) + fi];
775 sampleCountPerQuantile[(fi * nSamp) + q]--;
776 }
777
778 x[si] = x[neSamples - 1];
779 y[si] = y[neSamples - 1];
780 std::memcpy(features.data() + (si * nFeat),
781 features.data() + ((neSamples - 1) * nFeat),
782 sizeof(T) * nFeat);
783 std::memcpy(quantilesOfEachSample.data() + (si * nFeat),
784 quantilesOfEachSample.data() + ((neSamples - 1) * nFeat),
785 sizeof(int) * nFeat);
786
787 neSamples--;
788 replace--;
789 }
790 }
791
792 //NOW, the neSamples variable contains the number of existing samples which MUST be kept.
793 //This number also happens to be the first index of the remaining space in the vector which
794 //may be filled in with non-existing samples. If there were no existing samples this value
795 //is 0.
796 size_t starti = neSamples;
797 boost::unordered::unordered_flat_set<uint64_t> points;
798
799 //Add all of the existing samples to the output layer, and add to indices map
800 helper::Field fieldExistingTrue("existing", 1);
801 for (size_t si = 0; si < neSamples; si++) {
802 OGRPoint point = existing.getPoint(x[si], y[si]);
803 helper::addPoint(&point, p_layer, &fieldExistingTrue);
804
805 if (plot) {
806 xCoords.push_back(point.getX());
807 yCoords.push_back(point.getY());
808 }
809
810 points.insert((((uint64_t) x[si]) << 32) | ((uint64_t) y[si]));
811 }
812
813 //if there are already enough (existing) samples, return and don't add any more
814 if (starti >= nSamp) {
815 return;
816 }
817
818 std::uniform_real_distribution<T> dist(0.0, 1.0);
819 std::uniform_int_distribution<size_t> indexDist(starti, nSamp - 1);
820
821 std::vector<std::vector<T>> corr(nFeat);
822 for (int i = 0; i < nFeat; i++) {
823 corr[i].resize(nFeat);
824 }
825
826 features.resize(nSamp * nFeat);
827 x.resize(nSamp);
828 y.resize(nSamp);
829
830 //get first random samples
831 int i = starti;
832 Point<T> p;
833 while (i < nSamp) {
834 clhs.getRandomPoint(p);
835
836 if (points.contains((((uint64_t) p.x) << 32) | ((uint64_t) p.y))) {
837 continue;
838 }
839
840 x[i] = p.x;
841 y[i] = p.y;
842
843 for (int f = 0; f < nFeat; f++) {
844 T val = p.p_features[f];
845 features[(i * nFeat) + f] = val;
846
847 int q = getQuantile<T>(val, quantiles[f]);
848 sampleCountPerQuantile[(f * nSamp) + q]++;
849 quantilesOfEachSample[(i * nFeat) + f] = q;
850 }
851
852 points.insert((((uint64_t) p.x) << 32) | ((uint64_t) p.y));
853 i++;
854 }
855
856 //define covariance calculation
857 DALHomogenTable table = DALHomogenTable(features.data(), nSamp, nFeat, [](const T *){}, oneapi::dal::data_layout::row_major);
858 const auto cor_desc = oneapi::dal::covariance::descriptor{}.set_result_options(oneapi::dal::covariance::result_options::cor_matrix);
859 const auto result = oneapi::dal::compute(cor_desc, table);
860 oneapi::dal::row_accessor<const T> acc {result.get_cor_matrix()};
861 for (int i = 0; i < nFeat; i++) {
862 auto row = acc.pull({i, i + 1});
863
864 for (int j = 0; j < nFeat; j++) {
865 corr[i][j] = row[j];
866 }
867 }
868
869 double temp = 1;
870 double d = temp / static_cast<double>(iterations);
871
872 T obj = 0;
873 T objQ = clhs.quantileObjectiveFunc(sampleCountPerQuantile);
874 T objC = clhs.correlationObjectiveFunc(corr);
875
876 obj = objQ + objC;
877
878 //features of old (before random new index) index
879 std::vector<T> oldf(nFeat);
880
881 //begin annealing schedule. If we have a perfect latin hypercube -- or if we pass enough iterations -- stop iterating.
882 while (temp > 0 && objQ != 0) {
883 size_t i; //the index within the indices, x, y, and features vector so we know what to swap without searching
884 if (dist(rng) < 0.5) {
885 //50% of the time, choose a random sample to replace
886 i = indexDist(rng);
887 }
888 else {
889 //50% of the time, choose the worst sample to replace
890
891 //get the sample with the worst redundancy. In other words, the one which is in the
892 //most over-represented quantile across all features
893 size_t worstRedundancyIndex = 0;
894 size_t worstRedundancy = 0;
895 for (size_t si = starti; si < nSamp; si++) {
896 size_t curSampleRedundancy = 0;
897 for (size_t fi = 0; fi < nFeat; fi++) {
898 size_t q = quantilesOfEachSample[(si * nFeat) + fi];
899 curSampleRedundancy += sampleCountPerQuantile[(fi * nSamp) + q];
900 }
901 if (curSampleRedundancy > worstRedundancy) {
902 worstRedundancy = curSampleRedundancy;
903 worstRedundancyIndex = si;
904 }
905 }
906 i = worstRedundancyIndex;
907 }
908
909 //move selected replacement to 'oldf' vector to retain the old values in case we revert back
910 //to that state
911 std::memcpy(oldf.data(), features.data() + (i * nFeat), nFeat * sizeof(T));
912
913 //select a new index
914 Point<T> p;
915 clhs.getRandomPoint(p);
916 while (points.contains((((uint64_t) p.x) << 32) | ((uint64_t) p.y))) {
917 clhs.getRandomPoint(p);
918 }
919
920 //move new features into feature vector
921 std::memcpy(features.data() + (i * nFeat), p.p_features, nFeat * sizeof(T));
922
923 //recalculate sample count per quantile
924 std::vector<int> oldq(nFeat);
925 std::vector<int> newq(nFeat);
926 for (int f = 0; f < nFeat; f++) {
927 //decrement based removal of on old features
928 int q = getQuantile(oldf[f], quantiles[f]);
929 oldq[f] = q;
930 sampleCountPerQuantile[(f * nSamp) + q]--;
931
932 //increment based on inputof new features
933 q = getQuantile(p.p_features[f], quantiles[f]);
934 newq[f] = q;
935 sampleCountPerQuantile[(f * nSamp) + q]++;
936 }
937
938 //recalculate objective function from quantiles
939 T newObjQ = clhs.quantileObjectiveFunc(sampleCountPerQuantile);
940
941 //recalculate correlation matrix
942 const auto result = oneapi::dal::compute(cor_desc, table); // we update the table in place
943 oneapi::dal::row_accessor<const T> acc {result.get_cor_matrix()};
944 for (int j = 0; j < nFeat; j++) {
945 auto row = acc.pull({j, j + 1});
946
947 for (int k = 0; k < nFeat; k++) {
948 corr[j][k] = row[k];
949 }
950 }
951
952 //recalculate objective function from correlation matrix
953 T newObjC = clhs.correlationObjectiveFunc(corr);
954
955 T newObj = newObjQ + newObjC;
956 T delta = newObj - obj;
957
958 bool keep = dist(rng) < std::exp(-1 * delta / temp);
959
960 if (keep) {
961 //update the new changes
962 points.erase((((uint64_t) x[i]) << 32) | ((uint64_t) y[i]));
963 points.insert((((uint64_t) p.x) << 32) | ((uint64_t) p.y));
964
965 x[i] = p.x;
966 y[i] = p.y;
967
968 std::memcpy(quantilesOfEachSample.data() + (i * nFeat), newq.data(), sizeof(int) * nFeat);
969
970 objC = newObjC;
971 objQ = newObjQ;
972 obj = newObj;
973 }
974 else {
975 //revert back to old changes
976 for (int f = 0; f < nFeat; f++) {
977 sampleCountPerQuantile[(f * nSamp) + newq[f]]--;
978 sampleCountPerQuantile[(f * nSamp) + oldq[f]]++;
979 }
980
981 std::memcpy(
982 reinterpret_cast<void *>(features.data() + (i * nFeat)),
983 reinterpret_cast<void *>(oldf.data()),
984 nFeat * sizeof(T)
985 );
986 }
987
988 //update annealing temperature
989 temp -= d;
990 }
991
992 //add samples to output layer
993 helper::Field fieldExistingFalse("existing", 0);
994 for (int i = starti ; i < nSamp; i++) {
995 const auto [xCoord, yCoord] = helper::sample_to_point(GT, x[i], y[i]);
996 OGRPoint point = OGRPoint(xCoord, yCoord);
997 existing.used ?
998 helper::addPoint(&point, p_layer, &fieldExistingFalse) :
999 helper::addPoint(&point, p_layer);
1000
1001 if (plot) {
1002 xCoords.push_back(xCoord);
1003 yCoords.push_back(yCoord);
1004 }
1005 }
1006}
1007
1037std::tuple<std::vector<std::vector<double>>, vector::GDALVectorWrapper *>
1039 raster::GDALRasterWrapper *p_raster,
1040 int nSamp,
1041 int iterations,
1042 vector::GDALVectorWrapper *p_access,
1043 std::string layerName,
1044 double buffInner,
1045 double buffOuter,
1046 vector::GDALVectorWrapper *p_existing,
1047 size_t replace,
1048 bool plot,
1049 std::string tempFolder,
1050 std::string filename)
1051{
1052 GDALAllRegister();
1053
1054 int width = p_raster->getWidth();
1055 int height = p_raster->getHeight();
1056 int nFeat = p_raster->getBandCount();
1057 double *GT = p_raster->getGeotransform();
1058
1059 std::vector<double> xCoords, yCoords;
1060
1061 std::vector<helper::RasterBandMetaData> bands(p_raster->getBandCount());
1062 for (int i = 0; i < nFeat; i++) {
1063 bands[i].p_band = p_raster->getRasterBand(i);
1064 bands[i].type = p_raster->getRasterBandType(i);
1065 bands[i].size = p_raster->getRasterBandTypeSize(i);
1066 bands[i].nan = bands[i].p_band->GetNoDataValue();
1067 bands[i].p_band->GetBlockSize(&bands[i].xBlockSize, &bands[i].yBlockSize);
1068 }
1069
1070 //create output dataset before doing anything which will take a long time in case of failure.
1071 GDALDriver *p_driver = GetGDALDriverManager()->GetDriverByName("MEM");
1072 if (!p_driver) {
1073 throw std::runtime_error("unable to create output sample dataset driver.");
1074 }
1075 GDALDataset *p_samples = p_driver->Create("", 0, 0, 0, GDT_Unknown, nullptr);
1076 if (!p_samples) {
1077 throw std::runtime_error("unable to create output dataset with driver.");
1078 }
1079
1080 vector::GDALVectorWrapper *p_wrapper = new vector::GDALVectorWrapper(p_samples, std::string(p_raster->getDataset()->GetProjectionRef()));
1081 OGRLayer *p_layer = p_samples->CreateLayer("samples", p_wrapper->getSRS(), wkbPoint, nullptr);
1082 if (!p_layer) {
1083 throw std::runtime_error("unable to create output dataset layer.");
1084 }
1085
1087 p_access,
1088 p_raster,
1089 layerName,
1090 buffInner,
1091 buffOuter,
1092 true,
1093 tempFolder,
1094 bands[0].xBlockSize,
1095 bands[0].yBlockSize
1096 );
1097
1099 p_existing,
1100 p_raster,
1101 GT,
1102 width,
1103 p_layer,
1104 false,
1105 xCoords,
1106 yCoords,
1107 false
1108 );
1109
1110 //fast random number generator using xoshiro256++
1111 //https://vigna.di.unimi.it/ftp/papers/ScrambledLinear.pdf
1112 xso::xoshiro_4x64_plus rng;
1113 uint64_t multiplier = helper::getProbabilityMultiplier(
1114 width,
1115 height,
1116 p_raster->getPixelWidth(),
1117 p_raster->getPixelHeight(),
1118 8,
1119 MILLION * 100,
1120 false,
1121 access.area
1122 );
1123 helper::RandValController rand(bands[0].xBlockSize, bands[0].yBlockSize, multiplier, &rng);
1124
1125 //get data type for all bands
1126 GDALDataType type = GDT_Float32;
1127 for (const helper::RasterBandMetaData& band : bands) {
1128 if (band.type == GDT_Float64) {
1129 type = GDT_Float64;
1130 break;
1131 }
1132 }
1133
1134 if (type == GDT_Float64) {
1135 std::vector<std::vector<double>> quantiles;
1136
1137 //create instance of data management class
1138 CLHSDataManager<double> clhs(nFeat, nSamp, &rng, existing.count());
1139
1140 //read raster, calculating quantiles, correlation matrix, and adding points to sample from.
1141 readRaster<double>(bands, clhs, access, existing, rand, type, quantiles, sizeof(double), width, height, nFeat, nSamp);
1142
1143 //select samples and add them to output layer
1144 selectSamples<double>(quantiles, clhs, rng, existing, replace, iterations, nSamp, nFeat, p_layer, GT, plot, xCoords, yCoords);
1145 }
1146 else { //type == GDT_Float32
1147 std::vector<std::vector<float>> quantiles;
1148
1149 //create instance of data management class
1150 CLHSDataManager<float> clhs(nFeat, nSamp, &rng, existing.count());
1151
1152 //read raster, calculating quantiles, correlation matrix, and adding points to sample from.
1153 readRaster<float>(bands, clhs, access, existing, rand, type, quantiles, sizeof(float), width, height, nFeat, nSamp);
1154
1155 //select samples and add them to output layer
1156 selectSamples<float>(quantiles, clhs, rng, existing, replace, iterations, nSamp, nFeat, p_layer, GT, plot, xCoords, yCoords);
1157 }
1158
1159 if (filename != "") {
1160 try {
1161 p_wrapper->write(filename);
1162 }
1163 catch (const std::exception& e) {
1164 std::cout << "Exception thrown trying to write file: " << e.what() << std::endl;
1165 }
1166 }
1167
1168 return {{xCoords, yCoords}, p_wrapper};
1169}
1170
1171} //namespace clhs
1172} //namespace sgs
Definition clhs.h:75
void finalize(std::vector< std::vector< T > > &corr)
Definition clhs.h:207
CLHSDataManager(int nFeat, int nSamp, xso::xoshiro_4x64_plus *p_rng, size_t existingCount)
Definition clhs.h:113
T quantileObjectiveFunc(std::vector< int > &sampleCountPerQuantile)
Definition clhs.h:289
void addExistingPoint(T *p_features, int x, int y)
Definition clhs.h:175
T correlationObjectiveFunc(std::vector< std::vector< T > > &corr)
Definition clhs.h:313
void getExistingFeatures(std::vector< T > &features, std::vector< int > &x, std::vector< int > &y)
Definition clhs.h:333
void getRandomPoint(Point< T > &point)
Definition clhs.h:266
uint64_t randomIndex()
Definition clhs.h:249
void addPoint(T *p_features, int x, int y)
Definition clhs.h:145
Definition helper.h:976
void calculateRandValues(void)
Definition helper.h:1019
bool next(void)
Definition helper.h:1034
Definition raster.h:57
GDALDataset * getDataset()
Definition raster.h:429
int getWidth()
Definition raster.h:467
size_t getRasterBandTypeSize(int band)
Definition raster.h:735
double * getGeotransform()
Definition raster.h:590
GDALDataType getRasterBandType(int band)
Definition raster.h:724
int getBandCount()
Definition raster.h:485
int getHeight()
Definition raster.h:476
GDALRasterBand * getRasterBand(int band)
Definition raster.h:696
double getPixelWidth()
Definition raster.h:555
double getPixelHeight()
Definition raster.h:565
Definition vector.h:46
OGRSpatialReference * getSRS(void)
Definition vector.h:457
void write(std::string filename)
Definition vector.h:397
#define MILLION
Definition clhs.h:29
size_t getQuantile(T val, std::vector< T > &quantiles)
Definition clhs.h:59
std::tuple< std::vector< std::vector< double > >, vector::GDALVectorWrapper * > clhs(raster::GDALRasterWrapper *p_raster, int nSamp, int iterations, vector::GDALVectorWrapper *p_access, std::string layerName, double buffInner, double buffOuter, vector::GDALVectorWrapper *p_existing, size_t replace, bool plot, std::string tempFolder, std::string filename)
Definition clhs.h:1038
void selectSamples(std::vector< std::vector< T > > &quantiles, CLHSDataManager< T > &clhs, xso::xoshiro_4x64_plus &rng, existing::Existing &existing, size_t replace, size_t iterations, size_t nSamp, size_t nFeat, OGRLayer *p_layer, double *GT, bool plot, std::vector< double > &xCoords, std::vector< double > &yCoords)
Definition clhs.h:705
void readRaster(std::vector< helper::RasterBandMetaData > &bands, CLHSDataManager< T > &clhs, access::Access &access, existing::Existing &existing, helper::RandValController &rand, GDALDataType type, std::vector< std::vector< T > > &quantiles, size_t size, int width, int height, int count, int nSamp)
Definition clhs.h:409
uint64_t getProbabilityMultiplier(double width, double height, double pixelWidth, double pixelHeight, int startMult, int numSamples, bool useMindist, double accessibleArea)
Definition helper.h:940
std::pair< double, double > sample_to_point(double *GT, Index &index)
Definition helper.h:1108
void addPoint(OGRPoint *p_point, OGRLayer *p_layer)
Definition helper.h:700
Definition access.h:23
Definition dist.h:20
Definition existing.h:27
Definition quantiles.h:24
Definition pca.h:23
Definition access.h:30
Definition clhs.h:42
T * p_features
Definition clhs.h:43
int x
Definition clhs.h:44
int y
Definition clhs.h:45
Definition existing.h:38
Definition helper.h:46
Definition helper.h:87