sgsPy
structurally guided sampling
Loading...
Searching...
No Matches
dist.h
Go to the documentation of this file.
1/******************************************************************************
2 *
3 * Project: sgs
4 * Purpose: C++ implementation of PCA
5 * Author: Joseph Meyer
6 * Date: January, 2026
7 *
8 ******************************************************************************/
9
14
15#include "utils/helper.h"
16#include "utils/raster.h"
17#include "utils/vector.h"
18
19namespace sgs {
20namespace dist {
21
37template <typename T>
38void
41 int width,
42 int height,
43 T& min,
44 T& max)
45{
46 int xBlocks = (width + band.xBlockSize - 1) / band.xBlockSize;
47 int yBlocks = (height + band.yBlockSize - 1) / band.yBlockSize;
48
49 min = std::numeric_limits<T>::max();
50 max = std::numeric_limits<T>::min();
51 T nan = static_cast<T>(band.nan);
52 void *p_data = VSIMalloc3(band.xBlockSize, band.yBlockSize, band.size);
53
54 //calculate raster band minimum and maximum values
55 for (int yBlock = 0; yBlock < yBlocks; yBlock++) {
56 for (int xBlock = 0; xBlock < xBlocks; xBlock++) {
57 int xValid, yValid;
58 band.p_mutex->lock();
59 band.p_band->GetActualBlockSize(xBlock, yBlock, &xValid, &yValid);
60 band.p_mutex->unlock();
61
62 //read the band into memory
63 rasterBandIO(band, p_data, band.xBlockSize, band.yBlockSize, xBlock, yBlock, xValid, yValid, true, true);
64
65 for (int y = 0; y < yValid; y++) {
66 int index = y * width;
67 for (int x = 0; x < xValid; x++) {
68 T val = reinterpret_cast<T *>(p_data)[index];
69 if (val != nan && !std::isnan(val)) {
70 min = std::min(min, val);
71 max = std::max(max, val);
72 }
73 index++;
74 }
75 }
76 }
77 }
78
79 VSIFree(p_data);
80}
81
110template <typename T>
111void
113 T min,
114 T max,
115 int nBins,
116 GDALDataType type,
117 std::vector<double>& dbins,
118 std::vector<T>& tbins)
119{
120 dbins.resize(nBins + 1);
121 tbins.resize(nBins);
122
123 //determine bin values as doubles to display to the user.
124 double step = ((double)max - (double)min) / ((double)nBins);
125 double cur = (double)min;
126 for (int i = 0; i <= nBins; i++) {
127 dbins[i] = cur;
128 cur += step;
129 }
130
131
132 //set bin values as the data type used so less data type conversion is necessary in later code
133 cur = (double)min;
134 if (type == GDT_Float32 || type == GDT_Float64) {
135 for (int i = 0; i < nBins; i++) {
136 tbins[i] = static_cast<T>(dbins[i]);
137 }
138 }
139 else {
140 tbins[0] = min;
141 for (int i = 1; i < nBins; i++) {
142 cur += step;
143 tbins[i] = std::ceil(cur);
144 }
145 }
146}
147
170template <typename T>
171void
174 int width,
175 int height,
176 int nBins,
177 std::vector<T>& binVals,
178 std::vector<int64_t>& counts)
179{
180 int xBlocks = (width + band.xBlockSize - 1) / band.xBlockSize;
181 int yBlocks = (height + band.yBlockSize - 1) / band.yBlockSize;
182
183 T nan = static_cast<T>(band.nan);
184 void *p_data = VSIMalloc3(band.xBlockSize, band.yBlockSize, band.size);
185
186 for (int yBlock = 0; yBlock < yBlocks; yBlock++) {
187 for (int xBlock = 0; xBlock < xBlocks; xBlock++) {
188
189 int xValid, yValid;
190 band.p_mutex->lock();
191 band.p_band->GetActualBlockSize(xBlock, yBlock, &xValid, &yValid);
192 band.p_mutex->unlock();
193
194 //read the band into memory
195 rasterBandIO(band, p_data, band.xBlockSize, band.yBlockSize, xBlock, yBlock, xValid, yValid, true, true);
196
197 for (int y = 0; y < yValid; y++) {
198 int index = y * width;
199 for (int x = 0; x < xValid; x++) {
200 T val = reinterpret_cast<T *>(p_data)[index];
201
202 if (val != nan && !std::isnan(val)) {
203 for (int i = nBins - 1; i >= 0; i--) {
204 if (binVals[i] <= val) {
205 counts[i]++;
206 break;
207 }
208 }
209 }
210 index++;
211 }
212 }
213 }
214 }
215
216 VSIFree(p_data);
217}
218
232template <typename T>
233std::vector<int64_t>
236 std::vector<helper::Index>& samples,
237 std::vector<T> binVals,
238 int nBins)
239{
240 std::vector<int64_t> retval(nBins, 0);
241
242 T val;
243 for (const helper::Index& index : samples) {
244 band.p_band->RasterIO(GF_Read, index.x, index.y, 1, 1, &val, 1, 1, band.type, 0, 0);
245 for (int i = nBins - 1; i >= 0; i--) {
246 if (binVals[i] <= val) {
247 retval[i]++;
248 break;
249 }
250 }
251 }
252
253 return retval;
254}
255
277template <typename T>
278void
281 std::vector<helper::Index>& sampled,
282 int height,
283 int width,
284 int nBins,
285 std::unordered_map<std::string, std::pair<std::vector<double>, std::vector<int64_t>>>& retval)
286{
287 //determine min and max values in the raster
288 T min;
289 T max;
290 findMinMax<T>(band, width, height, min, max);
291
292 //call the setBins function which sets the vector<double> bins to return to the user,
293 //and the vector<T> bins to use to create the distribution. There is a seperate vector<T>
294 //bins object so that while iterating through every pixel, they don't have to be cast to
295 //type double to check.
296 std::vector<double> dbins;
297 std::vector<T> tbins;
298 setBins<T>(min, max, nBins, band.type, dbins, tbins);
299
300 //determine the population distribution of the raster band
301 std::vector<int64_t> counts(nBins, 0);
302 populationDistribution<T>(band, width, height, nBins, tbins, counts);
303
304 //add population distribution to return value
305 retval.insert({std::string("population"), {dbins, counts}});
306
307 //if samples are passed, add calculate sample distribution and add to return value
308 if (sampled.size() != 0) {
309 std::vector<int64_t> sampleCounts = sampleDistribution<T>(band, sampled, tbins, nBins);
310 retval.insert({std::string("sample"), {dbins, sampleCounts}});
311 }
312}
313
336std::unordered_map<std::string, std::pair<std::vector<double>, std::vector<int64_t>>>
339 int index,
341 std::string layer,
342 int nBins)
343{
344 std::mutex datasetMutex;
345 double *GT = p_raster->getGeotransform();
346 double IGT[6];
347 GDALInvGeoTransform(GT, IGT);
348
349 //get samples as vector if indices, if samples are given
350 std::vector<helper::Index> sampled;
351 if (p_vector) {
352 OGRLayer *p_layer = p_vector->getDataset()->GetLayerByName(layer.c_str());
353
354 //check to ensure spatial reference system of raster and vector match
355 OGRSpatialReference rastSRS;
356 rastSRS.importFromWkt(p_raster->getDataset()->GetProjectionRef());
357 const OGRSpatialReference *p_sampSRS = p_layer->GetSpatialRef();
358 if (!rastSRS.IsSame(p_sampSRS)) {
359 throw std::runtime_error("existing sample vector and raster do not have the same spatial reference system.");
360 }
361
362 for (const auto& p_feature : *p_layer) {
363 OGRGeometry *p_geometry = p_feature->GetGeometryRef();
364 switch (wkbFlatten(p_geometry->getGeometryType())) {
365 case OGRwkbGeometryType::wkbPoint: {
366 OGRPoint *p_point = p_geometry->toPoint();
367 double xCoord = p_point->getX();
368 double yCoord = p_point->getY();
369 sampled.push_back(helper::Index(
370 static_cast<int>(IGT[0] + xCoord * IGT[1] + yCoord * IGT[2]),
371 static_cast<int>(IGT[3] + xCoord * IGT[4] + yCoord * IGT[5])
372 ));
373 break;
374 }
375 case OGRwkbGeometryType::wkbMultiPoint: {
376 for (const auto& p_point : *p_geometry->toMultiPoint()) {
377 double xCoord = p_point->getX();
378 double yCoord = p_point->getY();
379 sampled.push_back(helper::Index(
380 static_cast<int>(IGT[0] + xCoord * IGT[1] + yCoord * IGT[2]),
381 static_cast<int>(IGT[3] + xCoord * IGT[4] + yCoord * IGT[5])
382 ));
383 }
384 break;
385 }
386 default:
387 throw std::runtime_error("encountered a geometry which was not a Point MultiPoint.");
388 }
389 }
390 }
391
392 std::mutex bandMutex;
393
395 band.p_band = p_raster->getRasterBand(index);
396 band.type = p_raster->getRasterBandType(index);
397 band.size = p_raster->getRasterBandTypeSize(index);
398 band.p_mutex = &datasetMutex;
399 band.nan = band.p_band->GetNoDataValue();
400 band.p_band->GetBlockSize(&band.xBlockSize, &band.yBlockSize);
401 band.p_mutex = &bandMutex;
402
403 int height = p_raster->getHeight();
404 int width = p_raster->getWidth();
405
406 std::unordered_map<std::string, std::pair<std::vector<double>, std::vector<int64_t>>> retval;
407 switch (band.type) {
408 case GDT_Int8:
409 calculateDist<int8_t>(band, sampled, height, width, nBins, retval);
410 break;
411 case GDT_UInt16:
412 calculateDist<uint16_t>(band, sampled, height, width, nBins, retval);
413 break;
414 case GDT_Int16:
415 calculateDist<int16_t>(band, sampled, height, width, nBins, retval);
416 break;
417 case GDT_UInt32:
418 calculateDist<uint32_t>(band, sampled, height, width, nBins, retval);
419 break;
420 case GDT_Int32:
421 calculateDist<int32_t>(band, sampled, height, width, nBins, retval);
422 break;
423 case GDT_Float32:
424 calculateDist<float>(band, sampled, height, width, nBins, retval);
425 break;
426 case GDT_Float64:
427 calculateDist<double>(band, sampled, height, width, nBins, retval);
428 break;
429 default:
430 throw std::runtime_error("raster pixel data type not supported.");
431 }
432
433 return retval;
434}
435
436} //dist
437} //sgs
Definition raster.h:57
GDALDataset * getDataset()
Definition raster.h:429
int getWidth()
Definition raster.h:467
size_t getRasterBandTypeSize(int band)
Definition raster.h:735
double * getGeotransform()
Definition raster.h:590
GDALDataType getRasterBandType(int band)
Definition raster.h:724
int getHeight()
Definition raster.h:476
GDALRasterBand * getRasterBand(int band)
Definition raster.h:696
Definition vector.h:46
GDALDataset * getDataset()
Definition vector.h:185
void populationDistribution(helper::RasterBandMetaData &band, int width, int height, int nBins, std::vector< T > &binVals, std::vector< int64_t > &counts)
Definition dist.h:172
void setBins(T min, T max, int nBins, GDALDataType type, std::vector< double > &dbins, std::vector< T > &tbins)
Definition dist.h:112
std::vector< int64_t > sampleDistribution(helper::RasterBandMetaData &band, std::vector< helper::Index > &samples, std::vector< T > binVals, int nBins)
Definition dist.h:234
void calculateDist(helper::RasterBandMetaData &band, std::vector< helper::Index > &sampled, int height, int width, int nBins, std::unordered_map< std::string, std::pair< std::vector< double >, std::vector< int64_t > > > &retval)
Definition dist.h:279
std::unordered_map< std::string, std::pair< std::vector< double >, std::vector< int64_t > > > dist(raster::GDALRasterWrapper *p_raster, int index, vector::GDALVectorWrapper *p_vector, std::string layer, int nBins)
Definition dist.h:337
void findMinMax(helper::RasterBandMetaData &band, int width, int height, T &min, T &max)
Definition dist.h:39
Definition pca.h:23
Definition helper.h:37
Definition helper.h:87
int xBlockSize
Definition helper.h:94
std::mutex * p_mutex
Definition helper.h:96
int yBlockSize
Definition helper.h:95
size_t size
Definition helper.h:91
GDALDataType type
Definition helper.h:90
double nan
Definition helper.h:93
GDALRasterBand * p_band
Definition helper.h:88