sgsPy
structurally guided sampling
Loading...
Searching...
No Matches
breaks.h
Go to the documentation of this file.
1/******************************************************************************
2 *
3 *
4 * Project: sgs
5 * Purpose: C++ implementation of raster stratification using breaks
6 * Author: Joseph Meyer
7 * Date: September, 2025
8 *
9 ******************************************************************************/
10
15
16#include <iostream>
17
18#include <boost/asio/thread_pool.hpp>
19#include <boost/asio/post.hpp>
20
21#include "utils/raster.h"
22#include "utils/helper.h"
23
24namespace sgs {
25namespace breaks {
26
57inline void processMapPixel(
58 size_t index,
60 void *p_dataBuffer,
62 void *p_stratBuffer,
63 std::vector<double>& bandBreaks,
64 size_t multiplier,
65 bool& mapNan,
66 size_t& mapStrat)
67{
68 double val = helper::getPixelValueDependingOnType<double>(dataBand.type, p_dataBuffer, index);
69 bool isNan = std::isnan(val) || (double)val == dataBand.nan;
70 mapNan |= isNan;
71
72 size_t strat = 0;
73 if (!isNan) {
74 auto it = std::lower_bound(bandBreaks.begin(), bandBreaks.end(), val);
75 strat = (it == bandBreaks.end()) ? bandBreaks.size() : std::distance(bandBreaks.begin(), it);
76 }
77
78 helper::setStrataPixelDependingOnType(stratBand.type, p_stratBuffer, index, isNan, strat);
79
80 if (!mapNan) {
81 mapStrat += strat * multiplier;
82 }
83}
84
107inline void
109 size_t index,
110 void *p_data,
111 helper::RasterBandMetaData *p_dataBand,
112 void *p_strat,
113 helper::RasterBandMetaData *p_stratBand,
114 std::vector<double>& bandBreaks)
115{
116 double val = helper::getPixelValueDependingOnType<double>(p_dataBand->type, p_data, index);
117 bool isNan = std::isnan(val) || val == p_dataBand->nan;
118
119 size_t strat = 0;
120 if (!isNan) {
121 auto it = std::lower_bound(bandBreaks.begin(), bandBreaks.end(), val);
122 strat = (it == bandBreaks.end()) ? bandBreaks.size() : std::distance(bandBreaks.begin(), it);
123 }
124
125 helper::setStrataPixelDependingOnType(p_stratBand->type, p_strat, index, isNan, strat);
126}
127
220 std::map<int, std::vector<double>> breaks,
221 bool map,
222 std::string filename,
223 bool largeRaster,
224 int threads,
225 std::string tempFolder,
226 std::map<std::string, std::string> driverOptions)
227{
228 GDALAllRegister();
229
230 int height = p_raster->getHeight();
231 int width = p_raster->getWidth();
232 double *geotransform = p_raster->getGeotransform();
233 std::string projection = std::string(p_raster->getDataset()->GetProjectionRef());
234
235 GDALDataset *p_dataset = nullptr;
236
237 size_t bandCount = breaks.size();
238 std::vector<std::vector<double>> bandBreaks;
239 std::vector<std::string> bandNames = p_raster->getBands();
240
241 std::vector<helper::RasterBandMetaData> dataBands(bandCount);
242 std::vector<helper::RasterBandMetaData> stratBands(bandCount + map);
243 std::vector<helper::VRTBandDatasetInfo> VRTBandInfo;
244
245 bool isMEMDataset = !largeRaster && filename == "";
246 bool isVRTDataset = largeRaster && filename == "";
247
248 std::mutex dataBandMutex;
249 std::mutex stratBandMutex;
250 //VRT bands are each their own dataset so they can each have their own mutex
251 std::vector<std::mutex> stratBandMutexes(isVRTDataset * (bandCount + map));
252
253 std::string driver;
254 if (isMEMDataset || isVRTDataset) {
255 driver = isMEMDataset ? "MEM" : "VRT";
256 p_dataset = helper::createVirtualDataset(driver, width, height, geotransform, projection);
257 }
258 else {
259 std::filesystem::path filepath = filename;
260 std::string extension = filepath.extension().string();
261
262 if (extension == ".tif") {
263 driver = "Gtiff";
264 }
265 else {
266 throw std::runtime_error("sgs only supports .tif files right now");
267 }
268 }
269
270 //allocate, read, and initialize raster data and breaks information
271 GDALDataType stratPixelType = GDT_Int8;
272 size_t stratPixelSize = 1;
273 size_t band = 0;
274 for (auto const& [key, val] : breaks) {
275 helper::RasterBandMetaData *p_dataBand = &dataBands[band];
276 helper::RasterBandMetaData *p_stratBand = &stratBands[band];
277
278 //get and store metadata from input raster band
279 GDALRasterBand *p_band = p_raster->getRasterBand(key);
280 p_dataBand->p_band = p_band;
281 p_dataBand->type = p_raster->getRasterBandType(key);
282 p_dataBand->size = p_raster->getRasterBandTypeSize(key);
283 p_dataBand->p_buffer = largeRaster ? nullptr : p_raster->getRasterBandBuffer(key);
284 p_dataBand->nan = p_band->GetNoDataValue();
285 p_dataBand->p_mutex = &dataBandMutex;
286 p_band->GetBlockSize(&p_dataBand->xBlockSize, &p_dataBand->yBlockSize);
287
288 //sort and add band breaks vector
289 std::vector<double> valCopy = val; //have to create copy to alter the band breaks in iteration loop
290 std::sort(valCopy.begin(), valCopy.end());
291 bandBreaks.push_back(valCopy);
292
293 //update metadata of new strat raster
294 size_t maxStrata = val.size() + 1;
295 helper::setStratBandTypeAndSize(maxStrata, &p_stratBand->type, &p_stratBand->size);
296 p_stratBand->name = "strat_" + bandNames[key];
297 p_stratBand->xBlockSize = map ? dataBands[0].xBlockSize : p_dataBand->xBlockSize;
298 p_stratBand->yBlockSize = map ? dataBands[0].yBlockSize : p_dataBand->yBlockSize;
299 p_stratBand->p_mutex = &stratBandMutex; //overwritten if VRT dataset
300
301 //update dataset with new band information
302 if (isMEMDataset) {
303 helper::addBandToMEMDataset(p_dataset, *p_stratBand);
304 }
305 else if (isVRTDataset) {
306 helper::createVRTBandDataset(p_dataset, *p_stratBand, tempFolder, std::to_string(key), VRTBandInfo, driverOptions);
307 p_stratBand->p_mutex = &stratBandMutexes[band];
308 }
309 else { //non-virtual dataset driver
310 if (stratPixelSize < p_stratBand->size) {
311 stratPixelSize = p_stratBand->size;
312 stratPixelType = p_stratBand->type;
313 }
314 }
315
316 band++;
317 }
318
319 //set multipliers if mapped stratification
320 std::vector<size_t> multipliers(breaks.size(), 1);
321 if (map) {
322 //determine the stratification band index multipliers of the mapped band
323 for (size_t i = 0; i < bandCount - 1; i++) {
324 multipliers[i + 1] = multipliers[i] * (bandBreaks[i].size() + 1);
325 }
326
327 //update info of new strat raster map band
328 helper::RasterBandMetaData *p_stratBand = &stratBands.back();
329 size_t maxStrata = multipliers.back() * (bandBreaks.back().size() + 1);
330 helper::setStratBandTypeAndSize(maxStrata, &p_stratBand->type, &p_stratBand->size);
331 p_stratBand->name = "strat_map";
332 p_stratBand->xBlockSize = dataBands[0].xBlockSize;
333 p_stratBand->yBlockSize = dataBands[0].yBlockSize;
334 p_stratBand->p_mutex = &stratBandMutex; //overwritten if VRT dataset
335
336 //update dataset with new band information
337 if (isMEMDataset) {
338 helper::addBandToMEMDataset(p_dataset, *p_stratBand);
339 }
340 else if (isVRTDataset) {
342 p_dataset,
343 *p_stratBand,
344 tempFolder,
345 "map",
346 VRTBandInfo,
347 driverOptions
348 );
349 p_stratBand->p_mutex = &stratBandMutexes.back();
350 }
351 else { //non-virtual dataset driver
352 if (stratPixelSize < p_stratBand->size) {
353 stratPixelSize = p_stratBand->size;
354 stratPixelType = p_stratBand->type;
355 }
356 }
357 }
358
359 //create full non-virtual dataset now that we have all required band information
360 if (!isMEMDataset && !isVRTDataset) {
361 bool useTiles = stratBands[0].xBlockSize != width &&
362 stratBands[0].yBlockSize != height;
363
364 for (size_t band = 0; band < stratBands.size(); band++) {
365 stratBands[band].size = stratPixelSize;
366 stratBands[band].type = stratPixelType;
367 stratBands[band].p_buffer = !largeRaster ? VSIMalloc3(height, width, stratPixelSize) : nullptr;
368 }
369
370 p_dataset = helper::createDataset(
371 filename,
372 driver,
373 width,
374 height,
375 geotransform,
376 projection,
377 stratBands.data(),
378 stratBands.size(),
379 useTiles,
380 driverOptions
381 );
382 }
383
384 //iterate through all pixels and update the stratified raster bands
385 if (largeRaster) {
386 pybind11::gil_scoped_acquire acquire;
387 boost::asio::thread_pool pool(threads);
388
389 if (map) {
390 //use the first raster band to determine block size
391 int xBlockSize = dataBands[0].xBlockSize;
392 int yBlockSize = dataBands[0].yBlockSize;
393
394 int xBlocks = (p_raster->getWidth() + xBlockSize - 1) / xBlockSize;
395 int yBlocks = (p_raster->getHeight() + yBlockSize - 1) / yBlockSize;
396 int chunkSize = yBlocks / threads;
397
398 for (int yBlockStart = 0; yBlockStart < yBlocks; yBlockStart += chunkSize) {
399 int yBlockEnd = std::min(yBlockStart + chunkSize, yBlocks);
400
401 boost::asio::post(pool, [
402 bandCount,
403 xBlockSize,
404 yBlockSize,
405 yBlockStart,
406 yBlockEnd,
407 xBlocks,
408 &dataBands,
409 &stratBands,
410 &bandBreaks,
411 &multipliers
412 ] {
413 std::vector<void *> dataBuffers(dataBands.size());
414 std::vector<void *> stratBuffers(stratBands.size());
415 for (size_t band = 0; band < dataBuffers.size(); band++) {
416 dataBuffers[band] = VSIMalloc3(xBlockSize, yBlockSize, dataBands[band].size);
417 stratBuffers[band] = VSIMalloc3(xBlockSize, yBlockSize, stratBands[band].size);
418 }
419 stratBuffers.back() = VSIMalloc3(xBlockSize, yBlockSize, stratBands.back().size);
420
421 for (int yBlock = yBlockStart; yBlock < yBlockEnd; yBlock++) {
422 for (int xBlock = 0; xBlock < xBlocks; xBlock++) {
423 int xValid, yValid;
424 dataBands[0].p_mutex->lock();
425 dataBands[0].p_band->GetActualBlockSize(xBlock, yBlock, &xValid, &yValid);
426 dataBands[0].p_mutex->unlock();
427
428 //read raster band data into band buffers
429 for (size_t band = 0; band < bandCount; band++) {
431 dataBands[band],
432 dataBuffers[band],
433 xBlockSize,
434 yBlockSize,
435 xBlock,
436 yBlock,
437 xValid,
438 yValid,
439 true //read = true
440 );
441 }
442
443 //process blocked band data
444 for (int y = 0; y < yValid; y++) {
445 size_t index = static_cast<size_t>(y * xBlockSize);
446 for (int x = 0; x < xValid; x++) {
447 bool mapNan = false;
448 size_t mapStrat = 0;
449
450 for (size_t band = 0; band < bandCount; band++) {
452 index,
453 dataBands[band],
454 dataBuffers[band],
455 stratBands[band],
456 stratBuffers[band],
457 bandBreaks[band],
458 multipliers[band],
459 mapNan,
460 mapStrat
461 );
462 }
463
465 stratBands.back().type,
466 stratBuffers.back(),
467 index,
468 mapNan,
469 mapStrat
470 );
471
472 index++;
473 }
474 }
475
476 //write strat band data
477 for (size_t band = 0; band <= bandCount; band++) {
479 stratBands[band],
480 stratBuffers[band],
481 xBlockSize,
482 yBlockSize,
483 xBlock,
484 yBlock,
485 xValid,
486 yValid,
487 false //read = false
488 );
489 }
490 }
491 }
492
493 for (size_t band = 0; band < dataBuffers.size(); band++) {
494 VSIFree(dataBuffers[band]);
495 VSIFree(stratBuffers[band]);
496 }
497 VSIFree(stratBuffers.back());
498 });
499 }
500 }
501 else {
502 for (size_t band = 0; band < bandCount; band++) {
503 helper::RasterBandMetaData* p_dataBand = &dataBands[band];
504 helper::RasterBandMetaData* p_stratBand = &stratBands[band];
505
506 int xBlockSize = p_dataBand->xBlockSize;
507 int yBlockSize = p_dataBand->yBlockSize;
508
509 int xBlocks = (p_raster->getWidth() + xBlockSize - 1) / xBlockSize;
510 int yBlocks = (p_raster->getHeight() + yBlockSize - 1) / yBlockSize;
511 int chunkSize = yBlocks / threads;
512
513 for (int yBlockStart = 0; yBlockStart < yBlocks; yBlockStart += chunkSize) {
514 int yBlockEnd = std::min(yBlocks, yBlockStart + chunkSize);
515 std::vector<double> *p_breaks = &bandBreaks[band];
516
517 boost::asio::post(pool, [
518 xBlockSize,
519 yBlockSize,
520 yBlockStart,
521 yBlockEnd,
522 xBlocks,
523 p_dataBand,
524 p_stratBand,
525 p_breaks
526 ] {
527 void *p_data = VSIMalloc3(xBlockSize, yBlockSize, p_dataBand->size);
528 void *p_strat = VSIMalloc3(xBlockSize, yBlockSize, p_stratBand->size);
529
530 int xValid, yValid;
531 for (int yBlock = yBlockStart; yBlock < yBlockEnd; yBlock++) {
532 for (int xBlock = 0; xBlock < xBlocks; xBlock++) {
533 p_dataBand->p_mutex->lock();
534 p_dataBand->p_band->GetActualBlockSize(xBlock, yBlock, &xValid, &yValid);
535 p_dataBand->p_mutex->unlock();
536
537 //read block into memory
539 *p_dataBand,
540 p_data,
541 xBlockSize,
542 yBlockSize,
543 xBlock,
544 yBlock,
545 xValid,
546 yValid,
547 true //read = true
548 );
549
550 //process block
551 for (int y = 0; y < yValid; y++) {
552 size_t index = static_cast<size_t>(y * xBlockSize);
553 for (int x = 0; x < xValid; x++) {
554 processPixel(index, p_data, p_dataBand, p_strat, p_stratBand, *p_breaks);
555 index++;
556 }
557 }
558
559 //write resulting stratifications to disk
561 *p_stratBand,
562 p_strat,
563 xBlockSize,
564 yBlockSize,
565 xBlock,
566 yBlock,
567 xValid,
568 yValid,
569 false //read = false
570 );
571 }
572 }
573 VSIFree(p_data);
574 VSIFree(p_strat);
575 });
576 }
577 }
578 }
579 pool.join();
580 pybind11::gil_scoped_release release;
581 }
582 else {
583 size_t pixelCount = static_cast<size_t>(p_raster->getWidth()) * static_cast<size_t>(p_raster->getHeight());
584 if (map) {
585 for (size_t index = 0; index < pixelCount; index++) {
586 bool mapNan = false;
587 size_t mapStrat = 0;
588
589 for (size_t band = 0; band < bandCount; band++) {
591 index,
592 dataBands[band],
593 dataBands[band].p_buffer,
594 stratBands[band],
595 stratBands[band].p_buffer,
596 bandBreaks[band],
597 multipliers[band],
598 mapNan,
599 mapStrat
600 );
601 }
602
604 stratBands.back().type,
605 stratBands.back().p_buffer,
606 index,
607 mapNan,
608 mapStrat
609 );
610 }
611 }
612 else {
613 for (size_t band = 0; band < bandCount; band++) {
614 for (size_t i = 0; i < pixelCount; i++) {
616 i,
617 dataBands[band].p_buffer,
618 &dataBands[band],
619 stratBands[band].p_buffer,
620 &stratBands[band],
621 bandBreaks[band]
622 );
623 }
624 }
625 }
626
627 //if non-virtual dataset then write in-memory output bands to disk
628 if (!isVRTDataset && !isMEMDataset) {
629 CPLErr err;
630 for (size_t band = 0; band < stratBands.size(); band++) {
631 err = stratBands[band].p_band->RasterIO(
632 GF_Write,
633 0,
634 0,
635 width,
636 height,
637 stratBands[band].p_buffer,
638 width,
639 height,
640 stratBands[band].type,
641 0,
642 0
643 );
644 if (err) {
645 throw std::runtime_error("error writing band to file.");
646 }
647 }
648 }
649 }
650
651 //close and add all of the VRT sub datasets as bands
652 if (isVRTDataset) {
653 for (size_t band = 0; band < VRTBandInfo.size(); band++) {
654 GDALClose(VRTBandInfo[band].p_dataset);
655 helper::addBandToVRTDataset(p_dataset, stratBands[band], VRTBandInfo[band]);
656 }
657 }
658
659 //if the bands are in memory, populate a vector of just bands to use in GDALRasterWrapper creation
660 std::vector<void *> buffers(stratBands.size());
661 if (!largeRaster) {
662 for (size_t band = 0; band < stratBands.size(); band++) {
663 buffers[band] = stratBands[band].p_buffer;
664 }
665 }
666
667 return largeRaster ?
668 new raster::GDALRasterWrapper(p_dataset) :
669 new raster::GDALRasterWrapper(p_dataset, buffers);
670
671}
672
673} //namespace breaks
674} //namespace sgs
Definition raster.h:57
void * getRasterBandBuffer(int band)
Definition raster.h:706
GDALDataset * getDataset()
Definition raster.h:429
int getWidth()
Definition raster.h:467
size_t getRasterBandTypeSize(int band)
Definition raster.h:735
double * getGeotransform()
Definition raster.h:590
GDALDataType getRasterBandType(int band)
Definition raster.h:724
int getHeight()
Definition raster.h:476
GDALRasterBand * getRasterBand(int band)
Definition raster.h:696
std::vector< std::string > getBands()
Definition raster.h:575
raster::GDALRasterWrapper * breaks(raster::GDALRasterWrapper *p_raster, std::map< int, std::vector< double > > breaks, bool map, std::string filename, bool largeRaster, int threads, std::string tempFolder, std::map< std::string, std::string > driverOptions)
Definition breaks.h:218
void processPixel(size_t index, void *p_data, helper::RasterBandMetaData *p_dataBand, void *p_strat, helper::RasterBandMetaData *p_stratBand, std::vector< double > &bandBreaks)
Definition breaks.h:108
void processMapPixel(size_t index, helper::RasterBandMetaData &dataBand, void *p_dataBuffer, helper::RasterBandMetaData &stratBand, void *p_stratBuffer, std::vector< double > &bandBreaks, size_t multiplier, bool &mapNan, size_t &mapStrat)
Definition breaks.h:57
void rasterBandIO(RasterBandMetaData &band, void *p_buffer, int xBlockSize, int yBlockSize, int xBlock, int yBlock, int xValid, int yValid, bool read, bool threaded=true)
Definition helper.h:643
void addBandToVRTDataset(GDALDataset *p_dataset, RasterBandMetaData &band, VRTBandDatasetInfo &info)
Definition helper.h:599
GDALDataset * createDataset(std::string filename, std::string driverName, int width, int height, double *geotransform, std::string projection, RasterBandMetaData *bands, size_t bandCount, bool useTiles, std::map< std::string, std::string > &driverOptions)
Definition helper.h:375
void setStrataPixelDependingOnType(GDALDataType type, void *p_data, size_t index, bool isNan, size_t strata)
Definition helper.h:213
void addBandToMEMDataset(GDALDataset *p_dataset, RasterBandMetaData &band)
Definition helper.h:467
T getPixelValueDependingOnType(GDALDataType type, void *p_data, size_t index)
Definition helper.h:173
void createVRTBandDataset(GDALDataset *p_dataset, RasterBandMetaData &band, std::string tempFolder, std::string key, std::vector< VRTBandDatasetInfo > &VRTBandInfo, std::map< std::string, std::string > &driverOptions)
Definition helper.h:538
GDALDataset * createVirtualDataset(std::string driverName, int width, int height, double *geotransform, std::string projection)
Definition helper.h:294
void setStratBandTypeAndSize(size_t maxStrata, GDALDataType *p_type, size_t *p_size)
Definition helper.h:140
Definition map.h:22
Definition strat.h:27
Definition pca.h:23
Definition helper.h:87
int xBlockSize
Definition helper.h:94
std::mutex * p_mutex
Definition helper.h:96
void * p_buffer
Definition helper.h:89
int yBlockSize
Definition helper.h:95
size_t size
Definition helper.h:91
GDALDataType type
Definition helper.h:90
double nan
Definition helper.h:93
std::string name
Definition helper.h:92
GDALRasterBand * p_band
Definition helper.h:88