From 61bc8f0a155597ad69d2f66a09bceb7c24f4f776 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Thu, 11 Sep 2025 13:13:12 -0400 Subject: [PATCH 01/46] implement CPU scan --- stream_compaction/cpu.cu | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/stream_compaction/cpu.cu b/stream_compaction/cpu.cu index 719fa115..7e8342ff 100644 --- a/stream_compaction/cpu.cu +++ b/stream_compaction/cpu.cu @@ -6,6 +6,7 @@ namespace StreamCompaction { namespace CPU { using StreamCompaction::Common::PerformanceTimer; + PerformanceTimer& timer() { static PerformanceTimer timer; @@ -19,7 +20,15 @@ namespace StreamCompaction { */ void scan(int n, int *odata, const int *idata) { timer().startCpuTimer(); - // TODO + + if (n <= 0) return; + + int currentSum = 0; + for (int i = 0; i < n; ++i) { + odata[i] = currentSum; + currentSum += idata[i]; + } + timer().endCpuTimer(); } From ccbafc29fe6e96fc53c7cfc1ae39c91e7d29f966 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Thu, 11 Sep 2025 13:19:44 -0400 Subject: [PATCH 02/46] implement `CPU::compactWithoutScan` --- stream_compaction/cpu.cu | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/stream_compaction/cpu.cu b/stream_compaction/cpu.cu index 7e8342ff..9f9ccef3 100644 --- a/stream_compaction/cpu.cu +++ b/stream_compaction/cpu.cu @@ -39,9 +39,22 @@ namespace StreamCompaction { */ int compactWithoutScan(int n, int *odata, const int *idata) { timer().startCpuTimer(); - // TODO + + if (n <= 0) return 0; + + int outputIndex = 0; + for (int inputIndex = 0; inputIndex < n; ++inputIndex) { + int element = idata[inputIndex]; + + if (element > 0) { + odata[outputIndex] = element; + outputIndex++; + } + } + timer().endCpuTimer(); - return -1; + + return outputIndex; } /** From fb9227bd18b2dc6aa14ba5ff8eca13fec5947b1b Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Thu, 11 Sep 2025 14:58:47 -0400 Subject: [PATCH 03/46] implement `CPU::compactWithScan` --- stream_compaction/cpu.cu | 41 +++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/stream_compaction/cpu.cu b/stream_compaction/cpu.cu index 9f9ccef3..7f331c0f 100644 --- a/stream_compaction/cpu.cu +++ b/stream_compaction/cpu.cu @@ -1,11 +1,21 @@ -#include #include "cpu.h" - #include "common.h" +#include + namespace StreamCompaction { namespace CPU { using StreamCompaction::Common::PerformanceTimer; + + namespace { + inline void scanImplementation(int n, int* odata, const int* idata) { + int currentSum = 0; + for (int i = 0; i < n; ++i) { + odata[i] = currentSum; + currentSum += idata[i]; + } + } + } PerformanceTimer& timer() { @@ -23,11 +33,7 @@ namespace StreamCompaction { if (n <= 0) return; - int currentSum = 0; - for (int i = 0; i < n; ++i) { - odata[i] = currentSum; - currentSum += idata[i]; - } + scanImplementation(n, odata, idata); timer().endCpuTimer(); } @@ -64,9 +70,26 @@ namespace StreamCompaction { */ int compactWithScan(int n, int *odata, const int *idata) { timer().startCpuTimer(); - // TODO + + if (n <= 0) return 0; + + std::unique_ptr valid = std::make_unique(n); + for (int i = 0; i < n; ++i) { + valid[i] = idata[i] > 0 ? 1 : 0; + } + + std::unique_ptr scanResult = std::make_unique(n); + scanImplementation(n, scanResult.get(), valid.get()); + + for (int i = 0; i < n; ++i) { + if (valid[i] > 0) { + odata[scanResult[i]] = idata[i]; + } + } + timer().endCpuTimer(); - return -1; + + return scanResult[n - 1]; } } } From dc2a0a93d7e2d3bc1c6e64bc60a4979c96dca742 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Thu, 11 Sep 2025 15:37:46 -0400 Subject: [PATCH 04/46] set up VS Code --- .clang-format | 4 ++++ .vscode/c_cpp_properties.json | 22 ++++++++++++++++++++++ .vscode/settings.json | 8 ++++++++ 3 files changed, 34 insertions(+) create mode 100644 .clang-format create mode 100644 .vscode/c_cpp_properties.json create mode 100644 .vscode/settings.json diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..3097e964 --- /dev/null +++ b/.clang-format @@ -0,0 +1,4 @@ +BasedOnStyle: Chromium +ColumnLimit: 120 +InsertNewlineAtEOF: true +AllowShortIfStatementsOnASingleLine: WithoutElse \ No newline at end of file diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json new file mode 100644 index 00000000..7af6a19c --- /dev/null +++ b/.vscode/c_cpp_properties.json @@ -0,0 +1,22 @@ +{ + "configurations": [ + { + "name": "Win32", + "includePath": [ + "${workspaceFolder}/**" + ], + "defines": [ + "_DEBUG", + "UNICODE", + "_UNICODE" + ], + "windowsSdkVersion": "10.0.26100.0", + "compilerPath": "cl.exe", + "cStandard": "c17", + "cppStandard": "c++17", + "intelliSenseMode": "windows-msvc-x64", + "configurationProvider": "ms-vscode.cmake-tools" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..ac9bee19 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,8 @@ +{ + "files.associations": { + ".clang-format": "yaml", + ".clang-tidy": "yaml", + ".clangd": "yaml", + "memory": "cpp" + } +} \ No newline at end of file From 3723bd5b6cae93b4e8dcee42b5e6863187a01bae Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Thu, 11 Sep 2025 16:09:44 -0400 Subject: [PATCH 05/46] add cuda includes --- .vscode/c_cpp_properties.json | 6 ++++-- .vscode/settings.json | 8 -------- 2 files changed, 4 insertions(+), 10 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json index 7af6a19c..e262ac06 100644 --- a/.vscode/c_cpp_properties.json +++ b/.vscode/c_cpp_properties.json @@ -3,7 +3,8 @@ { "name": "Win32", "includePath": [ - "${workspaceFolder}/**" + "${workspaceFolder}/**", + "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v13.0\\include" ], "defines": [ "_DEBUG", @@ -15,7 +16,8 @@ "cStandard": "c17", "cppStandard": "c++17", "intelliSenseMode": "windows-msvc-x64", - "configurationProvider": "ms-vscode.cmake-tools" + "configurationProvider": "ms-vscode.cmake-tools", + "mergeConfigurations": true } ], "version": 4 diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index ac9bee19..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "files.associations": { - ".clang-format": "yaml", - ".clang-tidy": "yaml", - ".clangd": "yaml", - "memory": "cpp" - } -} \ No newline at end of file From 8997104341556dedf20252a11da9544bcb027040 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Thu, 11 Sep 2025 16:14:13 -0400 Subject: [PATCH 06/46] run clang-format --- src/main.cpp | 276 ++++++++++++++++----------------- src/testing_helpers.hpp | 96 ++++++------ stream_compaction/common.cu | 58 ++++--- stream_compaction/common.h | 215 +++++++++++++------------ stream_compaction/cpu.cu | 176 ++++++++++----------- stream_compaction/cpu.h | 14 +- stream_compaction/efficient.cu | 61 ++++---- stream_compaction/efficient.h | 12 +- stream_compaction/naive.cu | 33 ++-- stream_compaction/naive.h | 10 +- stream_compaction/thrust.cu | 35 ++--- stream_compaction/thrust.h | 10 +- 12 files changed, 492 insertions(+), 504 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 3d5c8820..26c5b47b 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -6,149 +6,149 @@ * @copyright University of Pennsylvania */ -#include #include -#include #include +#include #include +#include #include "testing_helpers.hpp" -const int SIZE = 1 << 8; // feel free to change the size of array -const int NPOT = SIZE - 3; // Non-Power-Of-Two -int *a = new int[SIZE]; -int *b = new int[SIZE]; -int *c = new int[SIZE]; +const int SIZE = 1 << 8; // feel free to change the size of array +const int NPOT = SIZE - 3; // Non-Power-Of-Two +int* a = new int[SIZE]; +int* b = new int[SIZE]; +int* c = new int[SIZE]; int main(int argc, char* argv[]) { - // Scan tests - - printf("\n"); - printf("****************\n"); - printf("** SCAN TESTS **\n"); - printf("****************\n"); - - genArray(SIZE - 1, a, 50); // Leave a 0 at the end to test that edge case - a[SIZE - 1] = 0; - printArray(SIZE, a, true); - - // initialize b using StreamCompaction::CPU::scan you implement - // We use b for further comparison. Make sure your StreamCompaction::CPU::scan is correct. - // At first all cases passed because b && c are all zeroes. - zeroArray(SIZE, b); - printDesc("cpu scan, power-of-two"); - StreamCompaction::CPU::scan(SIZE, b, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - printArray(SIZE, b, true); - - zeroArray(SIZE, c); - printDesc("cpu scan, non-power-of-two"); - StreamCompaction::CPU::scan(NPOT, c, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - printArray(NPOT, c, true); - printCmpResult(NPOT, b, c); - - zeroArray(SIZE, c); - printDesc("naive scan, power-of-two"); - StreamCompaction::Naive::scan(SIZE, c, a); - printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(SIZE, c, true); - printCmpResult(SIZE, b, c); - - /* For bug-finding only: Array of 1s to help find bugs in stream compaction or scan - onesArray(SIZE, c); - printDesc("1s array for finding bugs"); - StreamCompaction::Naive::scan(SIZE, c, a); - printArray(SIZE, c, true); */ - - zeroArray(SIZE, c); - printDesc("naive scan, non-power-of-two"); - StreamCompaction::Naive::scan(NPOT, c, a); - printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(SIZE, c, true); - printCmpResult(NPOT, b, c); - - zeroArray(SIZE, c); - printDesc("work-efficient scan, power-of-two"); - StreamCompaction::Efficient::scan(SIZE, c, a); - printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(SIZE, c, true); - printCmpResult(SIZE, b, c); - - zeroArray(SIZE, c); - printDesc("work-efficient scan, non-power-of-two"); - StreamCompaction::Efficient::scan(NPOT, c, a); - printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(NPOT, c, true); - printCmpResult(NPOT, b, c); - - zeroArray(SIZE, c); - printDesc("thrust scan, power-of-two"); - StreamCompaction::Thrust::scan(SIZE, c, a); - printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(SIZE, c, true); - printCmpResult(SIZE, b, c); - - zeroArray(SIZE, c); - printDesc("thrust scan, non-power-of-two"); - StreamCompaction::Thrust::scan(NPOT, c, a); - printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(NPOT, c, true); - printCmpResult(NPOT, b, c); - - printf("\n"); - printf("*****************************\n"); - printf("** STREAM COMPACTION TESTS **\n"); - printf("*****************************\n"); - - // Compaction tests - - genArray(SIZE - 1, a, 4); // Leave a 0 at the end to test that edge case - a[SIZE - 1] = 0; - printArray(SIZE, a, true); - - int count, expectedCount, expectedNPOT; - - // initialize b using StreamCompaction::CPU::compactWithoutScan you implement - // We use b for further comparison. Make sure your StreamCompaction::CPU::compactWithoutScan is correct. - zeroArray(SIZE, b); - printDesc("cpu compact without scan, power-of-two"); - count = StreamCompaction::CPU::compactWithoutScan(SIZE, b, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - expectedCount = count; - printArray(count, b, true); - printCmpLenResult(count, expectedCount, b, b); - - zeroArray(SIZE, c); - printDesc("cpu compact without scan, non-power-of-two"); - count = StreamCompaction::CPU::compactWithoutScan(NPOT, c, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - expectedNPOT = count; - printArray(count, c, true); - printCmpLenResult(count, expectedNPOT, b, c); - - zeroArray(SIZE, c); - printDesc("cpu compact with scan"); - count = StreamCompaction::CPU::compactWithScan(SIZE, c, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - printArray(count, c, true); - printCmpLenResult(count, expectedCount, b, c); - - zeroArray(SIZE, c); - printDesc("work-efficient compact, power-of-two"); - count = StreamCompaction::Efficient::compact(SIZE, c, a); - printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(count, c, true); - printCmpLenResult(count, expectedCount, b, c); - - zeroArray(SIZE, c); - printDesc("work-efficient compact, non-power-of-two"); - count = StreamCompaction::Efficient::compact(NPOT, c, a); - printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(count, c, true); - printCmpLenResult(count, expectedNPOT, b, c); - - system("pause"); // stop Win32 console from closing on exit - delete[] a; - delete[] b; - delete[] c; + // Scan tests + + printf("\n"); + printf("****************\n"); + printf("** SCAN TESTS **\n"); + printf("****************\n"); + + genArray(SIZE - 1, a, 50); // Leave a 0 at the end to test that edge case + a[SIZE - 1] = 0; + printArray(SIZE, a, true); + + // initialize b using StreamCompaction::CPU::scan you implement + // We use b for further comparison. Make sure your StreamCompaction::CPU::scan is correct. + // At first all cases passed because b && c are all zeroes. + zeroArray(SIZE, b); + printDesc("cpu scan, power-of-two"); + StreamCompaction::CPU::scan(SIZE, b, a); + printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); + printArray(SIZE, b, true); + + zeroArray(SIZE, c); + printDesc("cpu scan, non-power-of-two"); + StreamCompaction::CPU::scan(NPOT, c, a); + printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); + printArray(NPOT, c, true); + printCmpResult(NPOT, b, c); + + zeroArray(SIZE, c); + printDesc("naive scan, power-of-two"); + StreamCompaction::Naive::scan(SIZE, c, a); + printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + // printArray(SIZE, c, true); + printCmpResult(SIZE, b, c); + + /* For bug-finding only: Array of 1s to help find bugs in stream compaction or scan + onesArray(SIZE, c); + printDesc("1s array for finding bugs"); + StreamCompaction::Naive::scan(SIZE, c, a); + printArray(SIZE, c, true); */ + + zeroArray(SIZE, c); + printDesc("naive scan, non-power-of-two"); + StreamCompaction::Naive::scan(NPOT, c, a); + printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + // printArray(SIZE, c, true); + printCmpResult(NPOT, b, c); + + zeroArray(SIZE, c); + printDesc("work-efficient scan, power-of-two"); + StreamCompaction::Efficient::scan(SIZE, c, a); + printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + // printArray(SIZE, c, true); + printCmpResult(SIZE, b, c); + + zeroArray(SIZE, c); + printDesc("work-efficient scan, non-power-of-two"); + StreamCompaction::Efficient::scan(NPOT, c, a); + printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + // printArray(NPOT, c, true); + printCmpResult(NPOT, b, c); + + zeroArray(SIZE, c); + printDesc("thrust scan, power-of-two"); + StreamCompaction::Thrust::scan(SIZE, c, a); + printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + // printArray(SIZE, c, true); + printCmpResult(SIZE, b, c); + + zeroArray(SIZE, c); + printDesc("thrust scan, non-power-of-two"); + StreamCompaction::Thrust::scan(NPOT, c, a); + printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + // printArray(NPOT, c, true); + printCmpResult(NPOT, b, c); + + printf("\n"); + printf("*****************************\n"); + printf("** STREAM COMPACTION TESTS **\n"); + printf("*****************************\n"); + + // Compaction tests + + genArray(SIZE - 1, a, 4); // Leave a 0 at the end to test that edge case + a[SIZE - 1] = 0; + printArray(SIZE, a, true); + + int count, expectedCount, expectedNPOT; + + // initialize b using StreamCompaction::CPU::compactWithoutScan you implement + // We use b for further comparison. Make sure your StreamCompaction::CPU::compactWithoutScan is correct. + zeroArray(SIZE, b); + printDesc("cpu compact without scan, power-of-two"); + count = StreamCompaction::CPU::compactWithoutScan(SIZE, b, a); + printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); + expectedCount = count; + printArray(count, b, true); + printCmpLenResult(count, expectedCount, b, b); + + zeroArray(SIZE, c); + printDesc("cpu compact without scan, non-power-of-two"); + count = StreamCompaction::CPU::compactWithoutScan(NPOT, c, a); + printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); + expectedNPOT = count; + printArray(count, c, true); + printCmpLenResult(count, expectedNPOT, b, c); + + zeroArray(SIZE, c); + printDesc("cpu compact with scan"); + count = StreamCompaction::CPU::compactWithScan(SIZE, c, a); + printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); + printArray(count, c, true); + printCmpLenResult(count, expectedCount, b, c); + + zeroArray(SIZE, c); + printDesc("work-efficient compact, power-of-two"); + count = StreamCompaction::Efficient::compact(SIZE, c, a); + printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + // printArray(count, c, true); + printCmpLenResult(count, expectedCount, b, c); + + zeroArray(SIZE, c); + printDesc("work-efficient compact, non-power-of-two"); + count = StreamCompaction::Efficient::compact(NPOT, c, a); + printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + // printArray(count, c, true); + printCmpLenResult(count, expectedNPOT, b, c); + + system("pause"); // stop Win32 console from closing on exit + delete[] a; + delete[] b; + delete[] c; } diff --git a/src/testing_helpers.hpp b/src/testing_helpers.hpp index 025e94aa..bd63a710 100644 --- a/src/testing_helpers.hpp +++ b/src/testing_helpers.hpp @@ -1,76 +1,72 @@ #pragma once -#include #include +#include +#include #include #include -#include -template -int cmpArrays(int n, T *a, T *b) { - for (int i = 0; i < n; i++) { - if (a[i] != b[i]) { - printf(" a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]); - return 1; - } +template +int cmpArrays(int n, T* a, T* b) { + for (int i = 0; i < n; i++) { + if (a[i] != b[i]) { + printf(" a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]); + return 1; } - return 0; + } + return 0; } -void printDesc(const char *desc) { - printf("==== %s ====\n", desc); +void printDesc(const char* desc) { + printf("==== %s ====\n", desc); } -template -void printCmpResult(int n, T *a, T *b) { - printf(" %s \n", - cmpArrays(n, a, b) ? "FAIL VALUE" : "passed"); +template +void printCmpResult(int n, T* a, T* b) { + printf(" %s \n", cmpArrays(n, a, b) ? "FAIL VALUE" : "passed"); } -template -void printCmpLenResult(int n, int expN, T *a, T *b) { - if (n != expN) { - printf(" expected %d elements, got %d\n", expN, n); - } - printf(" %s \n", - (n == -1 || n != expN) ? "FAIL COUNT" : - cmpArrays(n, a, b) ? "FAIL VALUE" : "passed"); +template +void printCmpLenResult(int n, int expN, T* a, T* b) { + if (n != expN) { + printf(" expected %d elements, got %d\n", expN, n); + } + printf(" %s \n", (n == -1 || n != expN) ? "FAIL COUNT" : cmpArrays(n, a, b) ? "FAIL VALUE" : "passed"); } -void zeroArray(int n, int *a) { - for (int i = 0; i < n; i++) { - a[i] = 0; - } +void zeroArray(int n, int* a) { + for (int i = 0; i < n; i++) { + a[i] = 0; + } } -void onesArray(int n, int *a) { - for (int i = 0; i < n; i++) { - a[i] = 1; - } +void onesArray(int n, int* a) { + for (int i = 0; i < n; i++) { + a[i] = 1; + } } -void genArray(int n, int *a, int maxval) { - srand(time(nullptr)); +void genArray(int n, int* a, int maxval) { + srand(time(nullptr)); - for (int i = 0; i < n; i++) { - a[i] = rand() % maxval; - } + for (int i = 0; i < n; i++) { + a[i] = rand() % maxval; + } } -void printArray(int n, int *a, bool abridged = false) { - printf(" [ "); - for (int i = 0; i < n; i++) { - if (abridged && i + 2 == 15 && n > 16) { - i = n - 2; - printf("... "); - } - printf("%3d ", a[i]); +void printArray(int n, int* a, bool abridged = false) { + printf(" [ "); + for (int i = 0; i < n; i++) { + if (abridged && i + 2 == 15 && n > 16) { + i = n - 2; + printf("... "); } - printf("]\n"); + printf("%3d ", a[i]); + } + printf("]\n"); } -template -void printElapsedTime(T time, std::string note = "") -{ - std::cout << " elapsed time: " << time << "ms " << note << std::endl; +template +void printElapsedTime(T time, std::string note = "") { + std::cout << " elapsed time: " << time << "ms " << note << std::endl; } diff --git a/stream_compaction/common.cu b/stream_compaction/common.cu index 2ed6d630..0a2a90f3 100644 --- a/stream_compaction/common.cu +++ b/stream_compaction/common.cu @@ -1,39 +1,35 @@ #include "common.h" -void checkCUDAErrorFn(const char *msg, const char *file, int line) { - cudaError_t err = cudaGetLastError(); - if (cudaSuccess == err) { - return; - } +void checkCUDAErrorFn(const char* msg, const char* file, int line) { + cudaError_t err = cudaGetLastError(); + if (cudaSuccess == err) { + return; + } - fprintf(stderr, "CUDA error"); - if (file) { - fprintf(stderr, " (%s:%d)", file, line); - } - fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err)); - exit(EXIT_FAILURE); + fprintf(stderr, "CUDA error"); + if (file) { + fprintf(stderr, " (%s:%d)", file, line); + } + fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err)); + exit(EXIT_FAILURE); } +namespace StreamCompaction::Common { -namespace StreamCompaction { - namespace Common { - - /** - * Maps an array to an array of 0s and 1s for stream compaction. Elements - * which map to 0 will be removed, and elements which map to 1 will be kept. - */ - __global__ void kernMapToBoolean(int n, int *bools, const int *idata) { - // TODO - } - - /** - * Performs scatter on an array. That is, for each element in idata, - * if bools[idx] == 1, it copies idata[idx] to odata[indices[idx]]. - */ - __global__ void kernScatter(int n, int *odata, - const int *idata, const int *bools, const int *indices) { - // TODO - } +/** + * Maps an array to an array of 0s and 1s for stream compaction. Elements + * which map to 0 will be removed, and elements which map to 1 will be kept. + */ +__global__ void kernMapToBoolean(int n, int* bools, const int* idata) { + // TODO +} - } +/** + * Performs scatter on an array. That is, for each element in idata, + * if bools[idx] == 1, it copies idata[idx] to odata[indices[idx]]. + */ +__global__ void kernScatter(int n, int* odata, const int* idata, const int* bools, const int* indices) { + // TODO } + +} // namespace StreamCompaction::Common diff --git a/stream_compaction/common.h b/stream_compaction/common.h index d2c1fed9..abb569ab 100644 --- a/stream_compaction/common.h +++ b/stream_compaction/common.h @@ -3,11 +3,11 @@ #include #include -#include -#include -#include #include #include +#include +#include +#include #include #define FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) @@ -16,117 +16,116 @@ /** * Check for CUDA errors; print and exit if there was a problem. */ -void checkCUDAErrorFn(const char *msg, const char *file = NULL, int line = -1); +void checkCUDAErrorFn(const char* msg, const char* file = NULL, int line = -1); inline int ilog2(int x) { - int lg = 0; - while (x >>= 1) { - ++lg; - } - return lg; + int lg = 0; + while (x >>= 1) { + ++lg; + } + return lg; } inline int ilog2ceil(int x) { - return x == 1 ? 0 : ilog2(x - 1) + 1; + return x == 1 ? 0 : ilog2(x - 1) + 1; } namespace StreamCompaction { - namespace Common { - __global__ void kernMapToBoolean(int n, int *bools, const int *idata); - - __global__ void kernScatter(int n, int *odata, - const int *idata, const int *bools, const int *indices); - - /** - * This class is used for timing the performance - * Uncopyable and unmovable - * - * Adapted from WindyDarian(https://github.com/WindyDarian) - */ - class PerformanceTimer - { - public: - PerformanceTimer() - { - cudaEventCreate(&event_start); - cudaEventCreate(&event_end); - } - - ~PerformanceTimer() - { - cudaEventDestroy(event_start); - cudaEventDestroy(event_end); - } - - void startCpuTimer() - { - if (cpu_timer_started) { throw std::runtime_error("CPU timer already started"); } - cpu_timer_started = true; - - time_start_cpu = std::chrono::high_resolution_clock::now(); - } - - void endCpuTimer() - { - time_end_cpu = std::chrono::high_resolution_clock::now(); - - if (!cpu_timer_started) { throw std::runtime_error("CPU timer not started"); } - - std::chrono::duration duro = time_end_cpu - time_start_cpu; - prev_elapsed_time_cpu_milliseconds = - static_cast(duro.count()); - - cpu_timer_started = false; - } - - void startGpuTimer() - { - if (gpu_timer_started) { throw std::runtime_error("GPU timer already started"); } - gpu_timer_started = true; - - cudaEventRecord(event_start); - } - - void endGpuTimer() - { - cudaEventRecord(event_end); - cudaEventSynchronize(event_end); - - if (!gpu_timer_started) { throw std::runtime_error("GPU timer not started"); } - - cudaEventElapsedTime(&prev_elapsed_time_gpu_milliseconds, event_start, event_end); - gpu_timer_started = false; - } - - float getCpuElapsedTimeForPreviousOperation() //noexcept //(damn I need VS 2015 - { - return prev_elapsed_time_cpu_milliseconds; - } - - float getGpuElapsedTimeForPreviousOperation() //noexcept - { - return prev_elapsed_time_gpu_milliseconds; - } - - // remove copy and move functions - PerformanceTimer(const PerformanceTimer&) = delete; - PerformanceTimer(PerformanceTimer&&) = delete; - PerformanceTimer& operator=(const PerformanceTimer&) = delete; - PerformanceTimer& operator=(PerformanceTimer&&) = delete; - - private: - cudaEvent_t event_start = nullptr; - cudaEvent_t event_end = nullptr; - - using time_point_t = std::chrono::high_resolution_clock::time_point; - time_point_t time_start_cpu; - time_point_t time_end_cpu; - - bool cpu_timer_started = false; - bool gpu_timer_started = false; - - float prev_elapsed_time_cpu_milliseconds = 0.f; - float prev_elapsed_time_gpu_milliseconds = 0.f; - }; +namespace Common { +__global__ void kernMapToBoolean(int n, int* bools, const int* idata); + +__global__ void kernScatter(int n, int* odata, const int* idata, const int* bools, const int* indices); + +/** + * This class is used for timing the performance + * Uncopyable and unmovable + * + * Adapted from WindyDarian(https://github.com/WindyDarian) + */ +class PerformanceTimer { + public: + PerformanceTimer() { + cudaEventCreate(&event_start); + cudaEventCreate(&event_end); + } + + ~PerformanceTimer() { + cudaEventDestroy(event_start); + cudaEventDestroy(event_end); + } + + void startCpuTimer() { + if (cpu_timer_started) { + throw std::runtime_error("CPU timer already started"); } -} + cpu_timer_started = true; + + time_start_cpu = std::chrono::high_resolution_clock::now(); + } + + void endCpuTimer() { + time_end_cpu = std::chrono::high_resolution_clock::now(); + + if (!cpu_timer_started) { + throw std::runtime_error("CPU timer not started"); + } + + std::chrono::duration duro = time_end_cpu - time_start_cpu; + prev_elapsed_time_cpu_milliseconds = static_cast(duro.count()); + + cpu_timer_started = false; + } + + void startGpuTimer() { + if (gpu_timer_started) { + throw std::runtime_error("GPU timer already started"); + } + gpu_timer_started = true; + + cudaEventRecord(event_start); + } + + void endGpuTimer() { + cudaEventRecord(event_end); + cudaEventSynchronize(event_end); + + if (!gpu_timer_started) { + throw std::runtime_error("GPU timer not started"); + } + + cudaEventElapsedTime(&prev_elapsed_time_gpu_milliseconds, event_start, event_end); + gpu_timer_started = false; + } + + float getCpuElapsedTimeForPreviousOperation() // noexcept //(damn I need VS 2015 + { + return prev_elapsed_time_cpu_milliseconds; + } + + float getGpuElapsedTimeForPreviousOperation() // noexcept + { + return prev_elapsed_time_gpu_milliseconds; + } + + // remove copy and move functions + PerformanceTimer(const PerformanceTimer&) = delete; + PerformanceTimer(PerformanceTimer&&) = delete; + PerformanceTimer& operator=(const PerformanceTimer&) = delete; + PerformanceTimer& operator=(PerformanceTimer&&) = delete; + + private: + cudaEvent_t event_start = nullptr; + cudaEvent_t event_end = nullptr; + + using time_point_t = std::chrono::high_resolution_clock::time_point; + time_point_t time_start_cpu; + time_point_t time_end_cpu; + + bool cpu_timer_started = false; + bool gpu_timer_started = false; + + float prev_elapsed_time_cpu_milliseconds = 0.f; + float prev_elapsed_time_gpu_milliseconds = 0.f; +}; +} // namespace Common +} // namespace StreamCompaction diff --git a/stream_compaction/cpu.cu b/stream_compaction/cpu.cu index 7f331c0f..d97c0046 100644 --- a/stream_compaction/cpu.cu +++ b/stream_compaction/cpu.cu @@ -1,95 +1,95 @@ -#include "cpu.h" #include "common.h" +#include "cpu.h" #include namespace StreamCompaction { - namespace CPU { - using StreamCompaction::Common::PerformanceTimer; - - namespace { - inline void scanImplementation(int n, int* odata, const int* idata) { - int currentSum = 0; - for (int i = 0; i < n; ++i) { - odata[i] = currentSum; - currentSum += idata[i]; - } - } - } - - PerformanceTimer& timer() - { - static PerformanceTimer timer; - return timer; - } - - /** - * CPU scan (prefix sum). - * For performance analysis, this is supposed to be a simple for loop. - * (Optional) For better understanding before starting moving to GPU, you can simulate your GPU scan in this function first. - */ - void scan(int n, int *odata, const int *idata) { - timer().startCpuTimer(); - - if (n <= 0) return; - - scanImplementation(n, odata, idata); - - timer().endCpuTimer(); - } - - /** - * CPU stream compaction without using the scan function. - * - * @returns the number of elements remaining after compaction. - */ - int compactWithoutScan(int n, int *odata, const int *idata) { - timer().startCpuTimer(); - - if (n <= 0) return 0; - - int outputIndex = 0; - for (int inputIndex = 0; inputIndex < n; ++inputIndex) { - int element = idata[inputIndex]; - - if (element > 0) { - odata[outputIndex] = element; - outputIndex++; - } - } - - timer().endCpuTimer(); - - return outputIndex; - } - - /** - * CPU stream compaction using scan and scatter, like the parallel version. - * - * @returns the number of elements remaining after compaction. - */ - int compactWithScan(int n, int *odata, const int *idata) { - timer().startCpuTimer(); - - if (n <= 0) return 0; - - std::unique_ptr valid = std::make_unique(n); - for (int i = 0; i < n; ++i) { - valid[i] = idata[i] > 0 ? 1 : 0; - } - - std::unique_ptr scanResult = std::make_unique(n); - scanImplementation(n, scanResult.get(), valid.get()); - - for (int i = 0; i < n; ++i) { - if (valid[i] > 0) { - odata[scanResult[i]] = idata[i]; - } - } - - timer().endCpuTimer(); - - return scanResult[n - 1]; - } +namespace CPU { +using StreamCompaction::Common::PerformanceTimer; + +namespace { +inline void scanImplementation(int n, int* odata, const int* idata) { + int currentSum = 0; + for (int i = 0; i < n; ++i) { + odata[i] = currentSum; + currentSum += idata[i]; + } +} +} // namespace + +PerformanceTimer& timer() { + static PerformanceTimer timer; + return timer; +} + +/** + * CPU scan (prefix sum). + * For performance analysis, this is supposed to be a simple for loop. + * (Optional) For better understanding before starting moving to GPU, you can simulate your GPU scan in this function + * first. + */ +void scan(int n, int* odata, const int* idata) { + timer().startCpuTimer(); + + if (n <= 0) return; + + scanImplementation(n, odata, idata); + + timer().endCpuTimer(); +} + +/** + * CPU stream compaction without using the scan function. + * + * @returns the number of elements remaining after compaction. + */ +int compactWithoutScan(int n, int* odata, const int* idata) { + timer().startCpuTimer(); + + if (n <= 0) return 0; + + int outputIndex = 0; + for (int inputIndex = 0; inputIndex < n; ++inputIndex) { + int element = idata[inputIndex]; + + if (element > 0) { + odata[outputIndex] = element; + outputIndex++; + } + } + + timer().endCpuTimer(); + + return outputIndex; +} + +/** + * CPU stream compaction using scan and scatter, like the parallel version. + * + * @returns the number of elements remaining after compaction. + */ +int compactWithScan(int n, int* odata, const int* idata) { + timer().startCpuTimer(); + + if (n <= 0) return 0; + + std::unique_ptr valid = std::make_unique(n); + for (int i = 0; i < n; ++i) { + valid[i] = idata[i] > 0 ? 1 : 0; + } + + std::unique_ptr scanResult = std::make_unique(n); + scanImplementation(n, scanResult.get(), valid.get()); + + for (int i = 0; i < n; ++i) { + if (valid[i] > 0) { + odata[scanResult[i]] = idata[i]; } + } + + timer().endCpuTimer(); + + return scanResult[n - 1]; } +} // namespace CPU +} // namespace StreamCompaction diff --git a/stream_compaction/cpu.h b/stream_compaction/cpu.h index 873c0476..f85e9e21 100644 --- a/stream_compaction/cpu.h +++ b/stream_compaction/cpu.h @@ -3,13 +3,13 @@ #include "common.h" namespace StreamCompaction { - namespace CPU { - StreamCompaction::Common::PerformanceTimer& timer(); +namespace CPU { +StreamCompaction::Common::PerformanceTimer& timer(); - void scan(int n, int *odata, const int *idata); +void scan(int n, int* odata, const int* idata); - int compactWithoutScan(int n, int *odata, const int *idata); +int compactWithoutScan(int n, int* odata, const int* idata); - int compactWithScan(int n, int *odata, const int *idata); - } -} +int compactWithScan(int n, int* odata, const int* idata); +} // namespace CPU +} // namespace StreamCompaction diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index 2db346ee..6ced77fa 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -4,37 +4,36 @@ #include "efficient.h" namespace StreamCompaction { - namespace Efficient { - using StreamCompaction::Common::PerformanceTimer; - PerformanceTimer& timer() - { - static PerformanceTimer timer; - return timer; - } +namespace Efficient { +using StreamCompaction::Common::PerformanceTimer; +PerformanceTimer& timer() { + static PerformanceTimer timer; + return timer; +} - /** - * Performs prefix-sum (aka scan) on idata, storing the result into odata. - */ - void scan(int n, int *odata, const int *idata) { - timer().startGpuTimer(); - // TODO - timer().endGpuTimer(); - } +/** + * Performs prefix-sum (aka scan) on idata, storing the result into odata. + */ +void scan(int n, int* odata, const int* idata) { + timer().startGpuTimer(); + // TODO + timer().endGpuTimer(); +} - /** - * Performs stream compaction on idata, storing the result into odata. - * All zeroes are discarded. - * - * @param n The number of elements in idata. - * @param odata The array into which to store elements. - * @param idata The array of elements to compact. - * @returns The number of elements remaining after compaction. - */ - int compact(int n, int *odata, const int *idata) { - timer().startGpuTimer(); - // TODO - timer().endGpuTimer(); - return -1; - } - } +/** + * Performs stream compaction on idata, storing the result into odata. + * All zeroes are discarded. + * + * @param n The number of elements in idata. + * @param odata The array into which to store elements. + * @param idata The array of elements to compact. + * @returns The number of elements remaining after compaction. + */ +int compact(int n, int* odata, const int* idata) { + timer().startGpuTimer(); + // TODO + timer().endGpuTimer(); + return -1; } +} // namespace Efficient +} // namespace StreamCompaction diff --git a/stream_compaction/efficient.h b/stream_compaction/efficient.h index 803cb4fe..94aaa8e8 100644 --- a/stream_compaction/efficient.h +++ b/stream_compaction/efficient.h @@ -3,11 +3,11 @@ #include "common.h" namespace StreamCompaction { - namespace Efficient { - StreamCompaction::Common::PerformanceTimer& timer(); +namespace Efficient { +StreamCompaction::Common::PerformanceTimer& timer(); - void scan(int n, int *odata, const int *idata); +void scan(int n, int* odata, const int* idata); - int compact(int n, int *odata, const int *idata); - } -} +int compact(int n, int* odata, const int* idata); +} // namespace Efficient +} // namespace StreamCompaction diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index 43088769..1673c457 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -4,22 +4,21 @@ #include "naive.h" namespace StreamCompaction { - namespace Naive { - using StreamCompaction::Common::PerformanceTimer; - PerformanceTimer& timer() - { - static PerformanceTimer timer; - return timer; - } - // TODO: __global__ +namespace Naive { +using StreamCompaction::Common::PerformanceTimer; +PerformanceTimer& timer() { + static PerformanceTimer timer; + return timer; +} +// TODO: __global__ - /** - * Performs prefix-sum (aka scan) on idata, storing the result into odata. - */ - void scan(int n, int *odata, const int *idata) { - timer().startGpuTimer(); - // TODO - timer().endGpuTimer(); - } - } +/** + * Performs prefix-sum (aka scan) on idata, storing the result into odata. + */ +void scan(int n, int* odata, const int* idata) { + timer().startGpuTimer(); + // TODO + timer().endGpuTimer(); } +} // namespace Naive +} // namespace StreamCompaction diff --git a/stream_compaction/naive.h b/stream_compaction/naive.h index 37dcb064..55ef289c 100644 --- a/stream_compaction/naive.h +++ b/stream_compaction/naive.h @@ -3,9 +3,9 @@ #include "common.h" namespace StreamCompaction { - namespace Naive { - StreamCompaction::Common::PerformanceTimer& timer(); +namespace Naive { +StreamCompaction::Common::PerformanceTimer& timer(); - void scan(int n, int *odata, const int *idata); - } -} +void scan(int n, int* odata, const int* idata); +} // namespace Naive +} // namespace StreamCompaction diff --git a/stream_compaction/thrust.cu b/stream_compaction/thrust.cu index 1def45e7..f72a64d1 100644 --- a/stream_compaction/thrust.cu +++ b/stream_compaction/thrust.cu @@ -7,22 +7,21 @@ #include "thrust.h" namespace StreamCompaction { - namespace Thrust { - using StreamCompaction::Common::PerformanceTimer; - PerformanceTimer& timer() - { - static PerformanceTimer timer; - return timer; - } - /** - * Performs prefix-sum (aka scan) on idata, storing the result into odata. - */ - void scan(int n, int *odata, const int *idata) { - timer().startGpuTimer(); - // TODO use `thrust::exclusive_scan` - // example: for device_vectors dv_in and dv_out: - // thrust::exclusive_scan(dv_in.begin(), dv_in.end(), dv_out.begin()); - timer().endGpuTimer(); - } - } +namespace Thrust { +using StreamCompaction::Common::PerformanceTimer; +PerformanceTimer& timer() { + static PerformanceTimer timer; + return timer; } +/** + * Performs prefix-sum (aka scan) on idata, storing the result into odata. + */ +void scan(int n, int* odata, const int* idata) { + timer().startGpuTimer(); + // TODO use `thrust::exclusive_scan` + // example: for device_vectors dv_in and dv_out: + // thrust::exclusive_scan(dv_in.begin(), dv_in.end(), dv_out.begin()); + timer().endGpuTimer(); +} +} // namespace Thrust +} // namespace StreamCompaction diff --git a/stream_compaction/thrust.h b/stream_compaction/thrust.h index fe98206b..e9a602b5 100644 --- a/stream_compaction/thrust.h +++ b/stream_compaction/thrust.h @@ -3,9 +3,9 @@ #include "common.h" namespace StreamCompaction { - namespace Thrust { - StreamCompaction::Common::PerformanceTimer& timer(); +namespace Thrust { +StreamCompaction::Common::PerformanceTimer& timer(); - void scan(int n, int *odata, const int *idata); - } -} +void scan(int n, int* odata, const int* idata); +} // namespace Thrust +} // namespace StreamCompaction From 6953fbe6b78b51efa178f54754a8051e759748e6 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Thu, 11 Sep 2025 18:23:20 -0400 Subject: [PATCH 07/46] add more clang-format rules (sorting includes) and format --- .clang-format | 13 ++++++++++++- src/main.cpp | 5 +++-- stream_compaction/common.h | 2 ++ stream_compaction/cpu.cu | 4 ++++ stream_compaction/cpu.h | 2 ++ stream_compaction/efficient.cu | 7 +++++-- stream_compaction/efficient.h | 2 ++ stream_compaction/naive.cu | 7 +++++-- stream_compaction/naive.h | 2 ++ stream_compaction/thrust.cu | 8 ++++++-- stream_compaction/thrust.h | 2 ++ 11 files changed, 45 insertions(+), 9 deletions(-) diff --git a/.clang-format b/.clang-format index 3097e964..5414d00f 100644 --- a/.clang-format +++ b/.clang-format @@ -1,4 +1,15 @@ BasedOnStyle: Chromium + ColumnLimit: 120 InsertNewlineAtEOF: true -AllowShortIfStatementsOnASingleLine: WithoutElse \ No newline at end of file +AllowShortIfStatementsOnASingleLine: WithoutElse +WrapNamespaceBodyWithEmptyLines: Always + +IncludeBlocks: Regroup +IncludeCategories: + - Regex: '^".*"' + Priority: 1 + - Regex: '^' + Priority: 2 + - Regex: '^<.*>' + Priority: 3 \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 26c5b47b..b629dc88 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -6,12 +6,13 @@ * @copyright University of Pennsylvania */ +#include "testing_helpers.hpp" + +#include #include #include #include #include -#include -#include "testing_helpers.hpp" const int SIZE = 1 << 8; // feel free to change the size of array const int NPOT = SIZE - 3; // Non-Power-Of-Two diff --git a/stream_compaction/common.h b/stream_compaction/common.h index abb569ab..87000f1e 100644 --- a/stream_compaction/common.h +++ b/stream_compaction/common.h @@ -32,6 +32,7 @@ inline int ilog2ceil(int x) { namespace StreamCompaction { namespace Common { + __global__ void kernMapToBoolean(int n, int* bools, const int* idata); __global__ void kernScatter(int n, int* odata, const int* idata, const int* bools, const int* indices); @@ -127,5 +128,6 @@ class PerformanceTimer { float prev_elapsed_time_cpu_milliseconds = 0.f; float prev_elapsed_time_gpu_milliseconds = 0.f; }; + } // namespace Common } // namespace StreamCompaction diff --git a/stream_compaction/cpu.cu b/stream_compaction/cpu.cu index d97c0046..e6b48dc1 100644 --- a/stream_compaction/cpu.cu +++ b/stream_compaction/cpu.cu @@ -5,9 +5,11 @@ namespace StreamCompaction { namespace CPU { + using StreamCompaction::Common::PerformanceTimer; namespace { + inline void scanImplementation(int n, int* odata, const int* idata) { int currentSum = 0; for (int i = 0; i < n; ++i) { @@ -15,6 +17,7 @@ inline void scanImplementation(int n, int* odata, const int* idata) { currentSum += idata[i]; } } + } // namespace PerformanceTimer& timer() { @@ -91,5 +94,6 @@ int compactWithScan(int n, int* odata, const int* idata) { return scanResult[n - 1]; } + } // namespace CPU } // namespace StreamCompaction diff --git a/stream_compaction/cpu.h b/stream_compaction/cpu.h index f85e9e21..42635cbb 100644 --- a/stream_compaction/cpu.h +++ b/stream_compaction/cpu.h @@ -4,6 +4,7 @@ namespace StreamCompaction { namespace CPU { + StreamCompaction::Common::PerformanceTimer& timer(); void scan(int n, int* odata, const int* idata); @@ -11,5 +12,6 @@ void scan(int n, int* odata, const int* idata); int compactWithoutScan(int n, int* odata, const int* idata); int compactWithScan(int n, int* odata, const int* idata); + } // namespace CPU } // namespace StreamCompaction diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index 6ced77fa..830da30e 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -1,10 +1,12 @@ -#include -#include #include "common.h" #include "efficient.h" +#include +#include + namespace StreamCompaction { namespace Efficient { + using StreamCompaction::Common::PerformanceTimer; PerformanceTimer& timer() { static PerformanceTimer timer; @@ -35,5 +37,6 @@ int compact(int n, int* odata, const int* idata) { timer().endGpuTimer(); return -1; } + } // namespace Efficient } // namespace StreamCompaction diff --git a/stream_compaction/efficient.h b/stream_compaction/efficient.h index 94aaa8e8..d64c3426 100644 --- a/stream_compaction/efficient.h +++ b/stream_compaction/efficient.h @@ -4,10 +4,12 @@ namespace StreamCompaction { namespace Efficient { + StreamCompaction::Common::PerformanceTimer& timer(); void scan(int n, int* odata, const int* idata); int compact(int n, int* odata, const int* idata); + } // namespace Efficient } // namespace StreamCompaction diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index 1673c457..f238b09e 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -1,10 +1,12 @@ -#include -#include #include "common.h" #include "naive.h" +#include +#include + namespace StreamCompaction { namespace Naive { + using StreamCompaction::Common::PerformanceTimer; PerformanceTimer& timer() { static PerformanceTimer timer; @@ -20,5 +22,6 @@ void scan(int n, int* odata, const int* idata) { // TODO timer().endGpuTimer(); } + } // namespace Naive } // namespace StreamCompaction diff --git a/stream_compaction/naive.h b/stream_compaction/naive.h index 55ef289c..65e709e6 100644 --- a/stream_compaction/naive.h +++ b/stream_compaction/naive.h @@ -4,8 +4,10 @@ namespace StreamCompaction { namespace Naive { + StreamCompaction::Common::PerformanceTimer& timer(); void scan(int n, int* odata, const int* idata); + } // namespace Naive } // namespace StreamCompaction diff --git a/stream_compaction/thrust.cu b/stream_compaction/thrust.cu index f72a64d1..cdce6c35 100644 --- a/stream_compaction/thrust.cu +++ b/stream_compaction/thrust.cu @@ -1,13 +1,16 @@ +#include "common.h" +#include "thrust.h" + #include #include + #include #include #include -#include "common.h" -#include "thrust.h" namespace StreamCompaction { namespace Thrust { + using StreamCompaction::Common::PerformanceTimer; PerformanceTimer& timer() { static PerformanceTimer timer; @@ -23,5 +26,6 @@ void scan(int n, int* odata, const int* idata) { // thrust::exclusive_scan(dv_in.begin(), dv_in.end(), dv_out.begin()); timer().endGpuTimer(); } + } // namespace Thrust } // namespace StreamCompaction diff --git a/stream_compaction/thrust.h b/stream_compaction/thrust.h index e9a602b5..056f7c90 100644 --- a/stream_compaction/thrust.h +++ b/stream_compaction/thrust.h @@ -4,8 +4,10 @@ namespace StreamCompaction { namespace Thrust { + StreamCompaction::Common::PerformanceTimer& timer(); void scan(int n, int* odata, const int* idata); + } // namespace Thrust } // namespace StreamCompaction From 8476b9fe0b158aee8138fbcbdc07423069509ce2 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Thu, 11 Sep 2025 18:26:41 -0400 Subject: [PATCH 08/46] add newline between blocks --- .clang-format | 1 + stream_compaction/efficient.cu | 1 + stream_compaction/naive.cu | 2 ++ stream_compaction/thrust.cu | 2 ++ 4 files changed, 6 insertions(+) diff --git a/.clang-format b/.clang-format index 5414d00f..814e93f9 100644 --- a/.clang-format +++ b/.clang-format @@ -4,6 +4,7 @@ ColumnLimit: 120 InsertNewlineAtEOF: true AllowShortIfStatementsOnASingleLine: WithoutElse WrapNamespaceBodyWithEmptyLines: Always +SeparateDefinitionBlocks: Always IncludeBlocks: Regroup IncludeCategories: diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index 830da30e..e854510a 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -8,6 +8,7 @@ namespace StreamCompaction { namespace Efficient { using StreamCompaction::Common::PerformanceTimer; + PerformanceTimer& timer() { static PerformanceTimer timer; return timer; diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index f238b09e..6b033130 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -8,10 +8,12 @@ namespace StreamCompaction { namespace Naive { using StreamCompaction::Common::PerformanceTimer; + PerformanceTimer& timer() { static PerformanceTimer timer; return timer; } + // TODO: __global__ /** diff --git a/stream_compaction/thrust.cu b/stream_compaction/thrust.cu index cdce6c35..8b9c3adc 100644 --- a/stream_compaction/thrust.cu +++ b/stream_compaction/thrust.cu @@ -12,10 +12,12 @@ namespace StreamCompaction { namespace Thrust { using StreamCompaction::Common::PerformanceTimer; + PerformanceTimer& timer() { static PerformanceTimer timer; return timer; } + /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ From eada25f124372c9f1b72fa456c8dfbd3b164229f Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Mon, 15 Sep 2025 16:13:20 -0400 Subject: [PATCH 09/46] add cuda include path to vscode profile settings --- .vscode/c_cpp_properties.json | 24 ------------------------ 1 file changed, 24 deletions(-) delete mode 100644 .vscode/c_cpp_properties.json diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json deleted file mode 100644 index e262ac06..00000000 --- a/.vscode/c_cpp_properties.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "configurations": [ - { - "name": "Win32", - "includePath": [ - "${workspaceFolder}/**", - "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v13.0\\include" - ], - "defines": [ - "_DEBUG", - "UNICODE", - "_UNICODE" - ], - "windowsSdkVersion": "10.0.26100.0", - "compilerPath": "cl.exe", - "cStandard": "c17", - "cppStandard": "c++17", - "intelliSenseMode": "windows-msvc-x64", - "configurationProvider": "ms-vscode.cmake-tools", - "mergeConfigurations": true - } - ], - "version": 4 -} \ No newline at end of file From d530ded690c0d92f57a06aff17bd33ea13cb06a9 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Mon, 15 Sep 2025 16:21:24 -0400 Subject: [PATCH 10/46] move early returns, mem allocations out of timing measurements --- stream_compaction/cpu.cu | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/stream_compaction/cpu.cu b/stream_compaction/cpu.cu index e6b48dc1..ce57206d 100644 --- a/stream_compaction/cpu.cu +++ b/stream_compaction/cpu.cu @@ -32,12 +32,10 @@ PerformanceTimer& timer() { * first. */ void scan(int n, int* odata, const int* idata) { - timer().startCpuTimer(); - if (n <= 0) return; + timer().startCpuTimer(); scanImplementation(n, odata, idata); - timer().endCpuTimer(); } @@ -47,10 +45,10 @@ void scan(int n, int* odata, const int* idata) { * @returns the number of elements remaining after compaction. */ int compactWithoutScan(int n, int* odata, const int* idata) { - timer().startCpuTimer(); - if (n <= 0) return 0; + timer().startCpuTimer(); + int outputIndex = 0; for (int inputIndex = 0; inputIndex < n; ++inputIndex) { int element = idata[inputIndex]; @@ -72,16 +70,17 @@ int compactWithoutScan(int n, int* odata, const int* idata) { * @returns the number of elements remaining after compaction. */ int compactWithScan(int n, int* odata, const int* idata) { - timer().startCpuTimer(); - if (n <= 0) return 0; std::unique_ptr valid = std::make_unique(n); + std::unique_ptr scanResult = std::make_unique(n); + + timer().startCpuTimer(); + for (int i = 0; i < n; ++i) { valid[i] = idata[i] > 0 ? 1 : 0; } - std::unique_ptr scanResult = std::make_unique(n); scanImplementation(n, scanResult.get(), valid.get()); for (int i = 0; i < n; ++i) { From c05d4a43266b0e64ad2f91221392c856c5c13675 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Mon, 15 Sep 2025 16:37:57 -0400 Subject: [PATCH 11/46] delete irrelevant build files --- .cproject | 212 --------------------------- .project | 27 ---- GNUmakefile | 31 ---- cis565_stream_compaction_test.launch | 27 ---- 4 files changed, 297 deletions(-) delete mode 100644 .cproject delete mode 100644 .project delete mode 100644 GNUmakefile delete mode 100644 cis565_stream_compaction_test.launch diff --git a/.cproject b/.cproject deleted file mode 100644 index 6615a581..00000000 --- a/.cproject +++ /dev/null @@ -1,212 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/.project b/.project deleted file mode 100644 index d95a4e38..00000000 --- a/.project +++ /dev/null @@ -1,27 +0,0 @@ - - - Project2-Stream-Compaction - - - - - - org.eclipse.cdt.managedbuilder.core.genmakebuilder - clean,full,incremental, - - - - - org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder - full,incremental, - - - - - - org.eclipse.cdt.core.cnature - org.eclipse.cdt.core.ccnature - org.eclipse.cdt.managedbuilder.core.managedBuildNature - org.eclipse.cdt.managedbuilder.core.ScannerConfigNature - - diff --git a/GNUmakefile b/GNUmakefile deleted file mode 100644 index 2b433114..00000000 --- a/GNUmakefile +++ /dev/null @@ -1,31 +0,0 @@ -CMAKE_ALT1 := /usr/local/bin/cmake -CMAKE_ALT2 := /Applications/CMake.app/Contents/bin/cmake -CMAKE := $(shell \ - which cmake 2>/dev/null || \ - ([ -e ${CMAKE_ALT1} ] && echo "${CMAKE_ALT1}") || \ - ([ -e ${CMAKE_ALT2} ] && echo "${CMAKE_ALT2}") \ - ) - -all: Release - - -Debug: build - (cd build && ${CMAKE} -DCMAKE_BUILD_TYPE=$@ .. && make) - -MinSizeRel: build - (cd build && ${CMAKE} -DCMAKE_BUILD_TYPE=$@ .. && make) - -Release: build - (cd build && ${CMAKE} -DCMAKE_BUILD_TYPE=$@ .. && make) - -RelWithDebugInfo: build - (cd build && ${CMAKE} -DCMAKE_BUILD_TYPE=$@ .. && make) - - -build: - mkdir -p build - -clean: - ((cd build && make clean) 2>&- || true) - -.PHONY: all Debug MinSizeRel Release RelWithDebugInfo clean diff --git a/cis565_stream_compaction_test.launch b/cis565_stream_compaction_test.launch deleted file mode 100644 index 4267429a..00000000 --- a/cis565_stream_compaction_test.launch +++ /dev/null @@ -1,27 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - From 375e015fb3750b14e6f8439854f8fec8714d9641 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Mon, 15 Sep 2025 18:28:16 -0400 Subject: [PATCH 12/46] implement `Naive::scan` --- src/main.cpp | 54 ++++++++++++++++++------- stream_compaction/naive.cu | 82 +++++++++++++++++++++++++++++++++++++- 2 files changed, 120 insertions(+), 16 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index b629dc88..4f27de17 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -14,8 +14,10 @@ #include #include -const int SIZE = 1 << 8; // feel free to change the size of array -const int NPOT = SIZE - 3; // Non-Power-Of-Two +constexpr bool runDebugTests = true; // runs additional simpler tests +const int SIZE = 1 << 8; // feel free to change the size of array +const int NPOT = SIZE - 3; // Non-Power-Of-Two + int* a = new int[SIZE]; int* b = new int[SIZE]; int* c = new int[SIZE]; @@ -52,50 +54,74 @@ int main(int argc, char* argv[]) { printDesc("naive scan, power-of-two"); StreamCompaction::Naive::scan(SIZE, c, a); printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - // printArray(SIZE, c, true); + printArray(SIZE, c, true); printCmpResult(SIZE, b, c); - /* For bug-finding only: Array of 1s to help find bugs in stream compaction or scan - onesArray(SIZE, c); - printDesc("1s array for finding bugs"); - StreamCompaction::Naive::scan(SIZE, c, a); - printArray(SIZE, c, true); */ - zeroArray(SIZE, c); printDesc("naive scan, non-power-of-two"); StreamCompaction::Naive::scan(NPOT, c, a); printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - // printArray(SIZE, c, true); + printArray(SIZE, c, true); printCmpResult(NPOT, b, c); zeroArray(SIZE, c); printDesc("work-efficient scan, power-of-two"); StreamCompaction::Efficient::scan(SIZE, c, a); printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - // printArray(SIZE, c, true); + printArray(SIZE, c, true); printCmpResult(SIZE, b, c); zeroArray(SIZE, c); printDesc("work-efficient scan, non-power-of-two"); StreamCompaction::Efficient::scan(NPOT, c, a); printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - // printArray(NPOT, c, true); + printArray(NPOT, c, true); printCmpResult(NPOT, b, c); zeroArray(SIZE, c); printDesc("thrust scan, power-of-two"); StreamCompaction::Thrust::scan(SIZE, c, a); printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - // printArray(SIZE, c, true); + printArray(SIZE, c, true); printCmpResult(SIZE, b, c); zeroArray(SIZE, c); printDesc("thrust scan, non-power-of-two"); StreamCompaction::Thrust::scan(NPOT, c, a); printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - // printArray(NPOT, c, true); + printArray(NPOT, c, true); printCmpResult(NPOT, b, c); + // For bug-finding only: Array of 1s to help find bugs in stream compaction or scan + if constexpr (runDebugTests) { + printf("\n"); + printf("*************************\n"); + printf("** SCAN TESTS (ALL 1s) **\n"); + printf("*************************\n"); + + onesArray(SIZE, a); + + zeroArray(SIZE, b); + printDesc("cpu scan, power-of-two"); + StreamCompaction::CPU::scan(SIZE, b, a); + printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); + printArray(SIZE, b, true); + + zeroArray(SIZE, c); + printDesc("cpu scan, non-power-of-two"); + StreamCompaction::CPU::scan(NPOT, c, a); + printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); + printArray(NPOT, c, true); + printCmpResult(NPOT, b, c); + + zeroArray(SIZE, c); + printDesc("naive scan, power-of-two"); + StreamCompaction::Naive::scan(SIZE, c, a); + printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + printArray(SIZE, c, true); + printCmpResult(SIZE, b, c); + } + printf("\n"); printf("*****************************\n"); printf("** STREAM COMPACTION TESTS **\n"); diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index 6b033130..a647c2f7 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -9,20 +9,98 @@ namespace Naive { using StreamCompaction::Common::PerformanceTimer; +/// Toggle for including CUDA error checking at compile time. Not sure +/// how much of a difference it makes to performance, guess we'll see! +constexpr bool checkErrors = true; + +/// Number of threads per block. +constexpr int blockSize = 256; + +/// Whether to return an inclusive or exclusive scan. +constexpr bool useExclusiveScan = true; + +/// Perform inner loop within the kernel. Results in only 1 invocation per "layer" versus one +/// kernel dispatch per iteration of the inner loop. +constexpr bool runInnerLoopOnGPU = true; + PerformanceTimer& timer() { static PerformanceTimer timer; return timer; } -// TODO: __global__ +__global__ void kernSumStrided(int n, const int* in, int* out, int stride) { + int tId = (blockDim.x * blockIdx.x) + threadIdx.x; + + if (tId >= n) return; + + for (int k = stride; k <= n; ++k) { + out[k] = in[k - stride] + in[k]; + } +} + +__global__ void kernAddStridedPair(int n, const int* in, int* out, int k, int stride) { + int tId = (blockDim.x * blockIdx.x) + threadIdx.x; + + if (tId >= n) return; + + out[k] = in[k - stride] + in[k]; +} /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ void scan(int n, int* odata, const int* idata) { + int* dev_dataA = nullptr; + int* dev_dataB = nullptr; + + size_t numBytes = n * sizeof(int); + cudaMalloc((void**)&dev_dataA, numBytes); + checkCUDAError("cudaMalloc: dev_dataA failed!"); + cudaMalloc((void**)&dev_dataB, numBytes); + checkCUDAError("cudaMalloc: dev_dataB failed!"); + + cudaMemcpy(dev_dataA, idata, numBytes, cudaMemcpyHostToDevice); + checkCUDAError("cudaMemcpy: idata -> dev_dataA failed!"); + cudaMemcpy(dev_dataB, dev_dataA, numBytes, cudaMemcpyDeviceToDevice); + checkCUDAError("cudaMemcpy: dev_dataA -> dev_dataB failed!"); + timer().startGpuTimer(); - // TODO + + for (int iteration = 1; iteration <= ilog2ceil(n); ++iteration) { + int stride = 1 << (iteration - 1); + int numDispatches = n - stride; + int numBlocks = (numDispatches + blockSize + 1) / blockSize; + + if constexpr (runInnerLoopOnGPU) { + kernSumStrided<<>>(n, dev_dataA, dev_dataB, stride); + } else { + for (int k = stride; k < n; ++k) { + kernAddStridedPair<<>>(numDispatches, dev_dataA, dev_dataB, k, stride); + } + } + + // Swap read and write buffers (output in B will be read next in A) + cudaMemcpy(dev_dataA, dev_dataB, numBytes, cudaMemcpyDeviceToDevice); + } + + // Include writing to output in performance measurements because CPU does it too + cudaMemcpy(odata, dev_dataA, numBytes, cudaMemcpyDeviceToHost); + if constexpr (checkErrors) checkCUDAError("cudaMemcpy: dev_dataA -> odata failed!"); + timer().endGpuTimer(); + + // Convert from inclusive scan to exclusive + if constexpr (useExclusiveScan) { + for (int i = n - 1; i > 0; --i) { + odata[i] = odata[i - 1]; + } + odata[0] = 0; + } + + cudaFree(dev_dataA); + checkCUDAError("cudaFree: dev_dataA failed!"); + cudaFree(dev_dataB); + checkCUDAError("cudaFree: dev_dataB failed!"); } } // namespace Naive From 39a082bbe8718c0295064aa3df6beec19e941ae5 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Mon, 15 Sep 2025 18:50:05 -0400 Subject: [PATCH 13/46] nevermind README says to not include final `cudaMemcpy`s --- stream_compaction/naive.cu | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index a647c2f7..17bc0da8 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -9,10 +9,6 @@ namespace Naive { using StreamCompaction::Common::PerformanceTimer; -/// Toggle for including CUDA error checking at compile time. Not sure -/// how much of a difference it makes to performance, guess we'll see! -constexpr bool checkErrors = true; - /// Number of threads per block. constexpr int blockSize = 256; @@ -83,12 +79,11 @@ void scan(int n, int* odata, const int* idata) { cudaMemcpy(dev_dataA, dev_dataB, numBytes, cudaMemcpyDeviceToDevice); } - // Include writing to output in performance measurements because CPU does it too - cudaMemcpy(odata, dev_dataA, numBytes, cudaMemcpyDeviceToHost); - if constexpr (checkErrors) checkCUDAError("cudaMemcpy: dev_dataA -> odata failed!"); - timer().endGpuTimer(); + cudaMemcpy(odata, dev_dataA, numBytes, cudaMemcpyDeviceToHost); + checkCUDAError("cudaMemcpy: dev_dataA -> odata failed!"); + // Convert from inclusive scan to exclusive if constexpr (useExclusiveScan) { for (int i = n - 1; i > 0; --i) { From d288ccf66352d6db71aea88c9736aae41238dd17 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Mon, 15 Sep 2025 22:14:45 -0400 Subject: [PATCH 14/46] disable debug tests --- src/main.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 4f27de17..8b193275 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -14,9 +14,9 @@ #include #include -constexpr bool runDebugTests = true; // runs additional simpler tests -const int SIZE = 1 << 8; // feel free to change the size of array -const int NPOT = SIZE - 3; // Non-Power-Of-Two +constexpr bool runDebugTests = false; // runs additional simpler tests +const int SIZE = 1 << 8; // feel free to change the size of array +const int NPOT = SIZE - 3; // Non-Power-Of-Two int* a = new int[SIZE]; int* b = new int[SIZE]; From 822ed395add82f5a53fb5e06fd851eca91af661a Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Mon, 15 Sep 2025 22:17:49 -0400 Subject: [PATCH 15/46] efficient: left pad array with zeroes to next power of two if necessary --- stream_compaction/efficient.cu | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index e854510a..67844441 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -4,6 +4,8 @@ #include #include +#include + namespace StreamCompaction { namespace Efficient { @@ -18,6 +20,21 @@ PerformanceTimer& timer() { * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ void scan(int n, int* odata, const int* idata) { + int actualN = n; + const int* actualInputData = idata; + + // Input array size is not a power of two; we have to pad the left with zeroes + if (int numLeaves = 1 << ilog2ceil(n); n < numLeaves) { + int offset = numLeaves - n; + + // Pad to the next power of two + std::unique_ptr paddedInputData = std::make_unique(numLeaves); + std::memcpy(paddedInputData.get() + offset, idata, n * sizeof(int)); + + actualN = numLeaves; + actualInputData = paddedInputData.release(); + } + timer().startGpuTimer(); // TODO timer().endGpuTimer(); From 837472a1c1d2e42eccfb773c43c8f73b5e0a930f Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Mon, 15 Sep 2025 22:26:31 -0400 Subject: [PATCH 16/46] alloc device mem, use `reinterpret_cast` --- stream_compaction/efficient.cu | 13 ++++++++++++- stream_compaction/naive.cu | 4 ++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index 67844441..d456e2a9 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -22,6 +22,7 @@ PerformanceTimer& timer() { void scan(int n, int* odata, const int* idata) { int actualN = n; const int* actualInputData = idata; + size_t numBytes = n * sizeof(int); // Input array size is not a power of two; we have to pad the left with zeroes if (int numLeaves = 1 << ilog2ceil(n); n < numLeaves) { @@ -29,15 +30,25 @@ void scan(int n, int* odata, const int* idata) { // Pad to the next power of two std::unique_ptr paddedInputData = std::make_unique(numLeaves); - std::memcpy(paddedInputData.get() + offset, idata, n * sizeof(int)); + std::memcpy(paddedInputData.get() + offset, idata, numBytes); actualN = numLeaves; actualInputData = paddedInputData.release(); + numBytes = numLeaves * sizeof(int); } + int* dev_data = nullptr; + cudaMalloc(reinterpret_cast(&dev_data), numBytes); + checkCUDAError("cudaMalloc: dev_data failed!"); + cudaMemcpy(dev_data, actualInputData, numBytes, cudaMemcpyHostToDevice); + checkCUDAError("cudaMemcpy: actualInputData -> dev_data failed!"); + timer().startGpuTimer(); // TODO timer().endGpuTimer(); + + cudaFree(dev_data); + checkCUDAError("cudaFree: dev_data failed!"); } /** diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index 17bc0da8..4498a9ac 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -50,9 +50,9 @@ void scan(int n, int* odata, const int* idata) { int* dev_dataB = nullptr; size_t numBytes = n * sizeof(int); - cudaMalloc((void**)&dev_dataA, numBytes); + cudaMalloc(reinterpret_cast(&dev_dataA), numBytes); checkCUDAError("cudaMalloc: dev_dataA failed!"); - cudaMalloc((void**)&dev_dataB, numBytes); + cudaMalloc(reinterpret_cast(&dev_dataB), numBytes); checkCUDAError("cudaMalloc: dev_dataB failed!"); cudaMemcpy(dev_dataA, idata, numBytes, cudaMemcpyHostToDevice); From e3cbafa6f5f38a553d12c6c2854a4e2290b096b7 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Mon, 15 Sep 2025 23:47:30 -0400 Subject: [PATCH 17/46] efficient: up-sweep (reduction) --- src/main.cpp | 20 ++++++++++-- stream_compaction/efficient.cu | 58 +++++++++++++++++++++++++++++++--- 2 files changed, 71 insertions(+), 7 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 8b193275..c0c072c8 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -14,9 +14,9 @@ #include #include -constexpr bool runDebugTests = false; // runs additional simpler tests -const int SIZE = 1 << 8; // feel free to change the size of array -const int NPOT = SIZE - 3; // Non-Power-Of-Two +constexpr bool runDebugTests = true; // runs additional simpler tests +const int SIZE = 1 << 8; // feel free to change the size of array +const int NPOT = SIZE - 3; // Non-Power-Of-Two int* a = new int[SIZE]; int* b = new int[SIZE]; @@ -120,6 +120,20 @@ int main(int argc, char* argv[]) { printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); printArray(SIZE, c, true); printCmpResult(SIZE, b, c); + + zeroArray(SIZE, c); + printDesc("work-efficient scan, power-of-two"); + StreamCompaction::Efficient::scan(SIZE, c, a); + printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + printArray(SIZE, c, true); + printCmpResult(SIZE, b, c); + + zeroArray(SIZE, c); + printDesc("work-efficient scan, non-power-of-two"); + StreamCompaction::Efficient::scan(NPOT, c, a); + printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + printArray(NPOT, c, true); + printCmpResult(NPOT, b, c); } printf("\n"); diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index d456e2a9..c37ba5cf 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -4,27 +4,43 @@ #include #include +#include #include +#include namespace StreamCompaction { namespace Efficient { using StreamCompaction::Common::PerformanceTimer; +/// Number of threads per block. +constexpr int blockSize = 256; + PerformanceTimer& timer() { static PerformanceTimer timer; return timer; } +__global__ void kernReductionAddPair(int n, int* data, int layer, int k) { + int tId = (blockIdx.x * blockDim.x) + threadIdx.x; + + if (tId >= n) return; + + data[k + (1 << (layer + 1)) - 1] += data[k + (1 << layer) - 1]; +} + /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ void scan(int n, int* odata, const int* idata) { int actualN = n; - const int* actualInputData = idata; size_t numBytes = n * sizeof(int); + std::unique_ptr actualInputData = std::make_unique(n); + std::memcpy(actualInputData.get(), idata, numBytes); + // Input array size is not a power of two; we have to pad the left with zeroes + std::optional paddingOpt; if (int numLeaves = 1 << ilog2ceil(n); n < numLeaves) { int offset = numLeaves - n; @@ -32,21 +48,55 @@ void scan(int n, int* odata, const int* idata) { std::unique_ptr paddedInputData = std::make_unique(numLeaves); std::memcpy(paddedInputData.get() + offset, idata, numBytes); + paddingOpt = offset; actualN = numLeaves; - actualInputData = paddedInputData.release(); numBytes = numLeaves * sizeof(int); + actualInputData.swap(paddedInputData); } + // std::cout << "\n--> padded (size " << actualN << "): [ "; + // for (int i = 0; i < actualN; ++i) { + // std::cout << actualInputData[i] << " "; + // } + // std::cout << "]\n" << std::endl; + int* dev_data = nullptr; cudaMalloc(reinterpret_cast(&dev_data), numBytes); checkCUDAError("cudaMalloc: dev_data failed!"); - cudaMemcpy(dev_data, actualInputData, numBytes, cudaMemcpyHostToDevice); + cudaMemcpy(dev_data, actualInputData.get(), numBytes, cudaMemcpyHostToDevice); checkCUDAError("cudaMemcpy: actualInputData -> dev_data failed!"); timer().startGpuTimer(); - // TODO + + // Perform up-sweep via parallel reduction + for (int layer = 0; layer < ilog2(actualN); ++layer) { + int stride = 1 << (layer + 1); + int numDispatches = actualN / stride; + int numBlocks = (numDispatches + blockSize + 1) / blockSize; + + for (int k = 0; k < actualN; k += stride) { + kernReductionAddPair<<>>(numDispatches, dev_data, layer, k); + } + } + timer().endGpuTimer(); + if (paddingOpt) { + // If previously padded, remove extra zeroes + cudaMemcpy(actualInputData.get(), dev_data, numBytes, cudaMemcpyDeviceToHost); + checkCUDAError("cudaMemcpy: dev_data -> actualInputData failed!"); + std::memcpy(odata, actualInputData.get() + paddingOpt.value(), n * sizeof(int)); + } else { + cudaMemcpy(odata, dev_data, numBytes, cudaMemcpyDeviceToHost); + checkCUDAError("cudaMemcpy: dev_data -> odata failed!"); + } + + // std::cout << "\n--> reduction (orig. size " << n << "): [ "; + // for (int i = 0; i < n; ++i) { + // std::cout << odata[i] << " "; + // } + // std::cout << "]\n" << std::endl; + cudaFree(dev_data); checkCUDAError("cudaFree: dev_data failed!"); } From 36259a154db1be40096b80389f3a9c52fedca4b6 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Mon, 15 Sep 2025 23:55:15 -0400 Subject: [PATCH 18/46] fix block count calculation --- stream_compaction/efficient.cu | 2 +- stream_compaction/naive.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index c37ba5cf..f33d8195 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -72,7 +72,7 @@ void scan(int n, int* odata, const int* idata) { for (int layer = 0; layer < ilog2(actualN); ++layer) { int stride = 1 << (layer + 1); int numDispatches = actualN / stride; - int numBlocks = (numDispatches + blockSize + 1) / blockSize; + int numBlocks = (numDispatches + blockSize - 1) / blockSize; for (int k = 0; k < actualN; k += stride) { kernReductionAddPair<<>>(numDispatches, dev_data, layer, k); diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index 4498a9ac..b4b50bd4 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -65,7 +65,7 @@ void scan(int n, int* odata, const int* idata) { for (int iteration = 1; iteration <= ilog2ceil(n); ++iteration) { int stride = 1 << (iteration - 1); int numDispatches = n - stride; - int numBlocks = (numDispatches + blockSize + 1) / blockSize; + int numBlocks = (numDispatches + blockSize - 1) / blockSize; if constexpr (runInnerLoopOnGPU) { kernSumStrided<<>>(n, dev_dataA, dev_dataB, stride); From 9a8374f6e345db861cf4f5e54539999ed1a508fd Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 00:42:30 -0400 Subject: [PATCH 19/46] what am i doing help --- src/main.cpp | 9 ++++++++- stream_compaction/efficient.cu | 17 ++++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index c0c072c8..caa7406f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -15,7 +15,7 @@ #include constexpr bool runDebugTests = true; // runs additional simpler tests -const int SIZE = 1 << 8; // feel free to change the size of array +const int SIZE = 1 << 10; // feel free to change the size of array const int NPOT = SIZE - 3; // Non-Power-Of-Two int* a = new int[SIZE]; @@ -121,6 +121,13 @@ int main(int argc, char* argv[]) { printArray(SIZE, c, true); printCmpResult(SIZE, b, c); + zeroArray(SIZE, c); + printDesc("naive scan, non-power-of-two"); + StreamCompaction::Naive::scan(NPOT, c, a); + printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + printArray(SIZE, c, true); + printCmpResult(NPOT, b, c); + zeroArray(SIZE, c); printDesc("work-efficient scan, power-of-two"); StreamCompaction::Efficient::scan(SIZE, c, a); diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index f33d8195..cfab34be 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -29,6 +29,16 @@ __global__ void kernReductionAddPair(int n, int* data, int layer, int k) { data[k + (1 << (layer + 1)) - 1] += data[k + (1 << layer) - 1]; } +__global__ void kernTest(int n, int* data, int layer, int stride) { + int tId = (blockIdx.x * blockDim.x) + threadIdx.x; + + if (tId >= n) return; + + for (int k = 0; k < n; k += stride) { + data[k + (1 << (layer + 1)) - 1] += data[k + (1 << layer) - 1]; + } +} + /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ @@ -74,9 +84,10 @@ void scan(int n, int* odata, const int* idata) { int numDispatches = actualN / stride; int numBlocks = (numDispatches + blockSize - 1) / blockSize; - for (int k = 0; k < actualN; k += stride) { - kernReductionAddPair<<>>(numDispatches, dev_data, layer, k); - } + // for (int k = 0; k < actualN; k += stride) { + // kernReductionAddPair<<>>(numDispatches, dev_data, layer, k); + // } + kernTest<<>>(actualN, dev_data, layer, stride); } timer().endGpuTimer(); From 6caee7ed1373d1cdd6438c04084eba43c24f7175 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 01:05:18 -0400 Subject: [PATCH 20/46] naive: actually use the gpu --- src/main.cpp | 2 +- stream_compaction/naive.cu | 32 ++++++++------------------------ 2 files changed, 9 insertions(+), 25 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index caa7406f..64581b2f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -15,7 +15,7 @@ #include constexpr bool runDebugTests = true; // runs additional simpler tests -const int SIZE = 1 << 10; // feel free to change the size of array +const int SIZE = 1 << 8; // feel free to change the size of array const int NPOT = SIZE - 3; // Non-Power-Of-Two int* a = new int[SIZE]; diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index b4b50bd4..a3534ec3 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -15,33 +15,23 @@ constexpr int blockSize = 256; /// Whether to return an inclusive or exclusive scan. constexpr bool useExclusiveScan = true; -/// Perform inner loop within the kernel. Results in only 1 invocation per "layer" versus one -/// kernel dispatch per iteration of the inner loop. -constexpr bool runInnerLoopOnGPU = true; - PerformanceTimer& timer() { static PerformanceTimer timer; return timer; } -__global__ void kernSumStrided(int n, const int* in, int* out, int stride) { - int tId = (blockDim.x * blockIdx.x) + threadIdx.x; +__global__ void kernSumPairsForIteration(int n, const int* in, int* out, int stride) { + int k = (blockIdx.x * blockDim.x) + threadIdx.x; - if (tId >= n) return; + // As the number of dispatches decrease with every iteration, we have to add the stride to + // get the last index of the array + if (k >= n + stride) return; - for (int k = stride; k <= n; ++k) { + if (k >= stride) { out[k] = in[k - stride] + in[k]; } } -__global__ void kernAddStridedPair(int n, const int* in, int* out, int k, int stride) { - int tId = (blockDim.x * blockIdx.x) + threadIdx.x; - - if (tId >= n) return; - - out[k] = in[k - stride] + in[k]; -} - /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ @@ -67,15 +57,9 @@ void scan(int n, int* odata, const int* idata) { int numDispatches = n - stride; int numBlocks = (numDispatches + blockSize - 1) / blockSize; - if constexpr (runInnerLoopOnGPU) { - kernSumStrided<<>>(n, dev_dataA, dev_dataB, stride); - } else { - for (int k = stride; k < n; ++k) { - kernAddStridedPair<<>>(numDispatches, dev_dataA, dev_dataB, k, stride); - } - } + kernSumPairsForIteration<<>>(numDispatches, dev_dataA, dev_dataB, stride); - // Swap read and write buffers (output in B will be read next in A) + // Write new results back into A to be read from cudaMemcpy(dev_dataA, dev_dataB, numBytes, cudaMemcpyDeviceToDevice); } From 03ccb7d68c265c7434bccefe3ed8a68c313825a3 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 01:06:20 -0400 Subject: [PATCH 21/46] naive: simplify early return further --- stream_compaction/naive.cu | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index a3534ec3..0e9e255f 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -25,11 +25,9 @@ __global__ void kernSumPairsForIteration(int n, const int* in, int* out, int str // As the number of dispatches decrease with every iteration, we have to add the stride to // get the last index of the array - if (k >= n + stride) return; + if (k >= n + stride || k < stride) return; - if (k >= stride) { - out[k] = in[k - stride] + in[k]; - } + out[k] = in[k - stride] + in[k]; } /** From 049008a84c203d724c17acedd678db2b9a84fd54 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 03:14:39 -0400 Subject: [PATCH 22/46] efficient: fix up-sweep and finally understand what i'm doing --- stream_compaction/common.h | 9 ++++++++ stream_compaction/efficient.cu | 41 +++++++++++----------------------- 2 files changed, 22 insertions(+), 28 deletions(-) diff --git a/stream_compaction/common.h b/stream_compaction/common.h index 87000f1e..0dd4d356 100644 --- a/stream_compaction/common.h +++ b/stream_compaction/common.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #define FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) @@ -30,6 +31,14 @@ inline int ilog2ceil(int x) { return x == 1 ? 0 : ilog2(x - 1) + 1; } +inline void printArray(int n, const int* data) { + std::cout << "[ "; + for (int i = 0; i < n; ++i) { + std::cout << data[i] << " "; + } + std::cout << "]" << std::endl; +} + namespace StreamCompaction { namespace Common { diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index cfab34be..0329b9b2 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -21,22 +21,18 @@ PerformanceTimer& timer() { return timer; } -__global__ void kernReductionAddPair(int n, int* data, int layer, int k) { - int tId = (blockIdx.x * blockDim.x) + threadIdx.x; +__global__ void kernReduceForLayer(int n, int* data, int layer, int stride) { + int k = (blockIdx.x * blockDim.x) + threadIdx.x; - if (tId >= n) return; + if (k >= n) return; - data[k + (1 << (layer + 1)) - 1] += data[k + (1 << layer) - 1]; -} - -__global__ void kernTest(int n, int* data, int layer, int stride) { - int tId = (blockIdx.x * blockDim.x) + threadIdx.x; + int offset = k * stride; + int previousStride = 1 << layer; - if (tId >= n) return; + int rightChild = offset + stride - 1; + int leftChild = offset + previousStride - 1; - for (int k = 0; k < n; k += stride) { - data[k + (1 << (layer + 1)) - 1] += data[k + (1 << layer) - 1]; - } + data[rightChild] += data[leftChild]; } /** @@ -64,12 +60,6 @@ void scan(int n, int* odata, const int* idata) { actualInputData.swap(paddedInputData); } - // std::cout << "\n--> padded (size " << actualN << "): [ "; - // for (int i = 0; i < actualN; ++i) { - // std::cout << actualInputData[i] << " "; - // } - // std::cout << "]\n" << std::endl; - int* dev_data = nullptr; cudaMalloc(reinterpret_cast(&dev_data), numBytes); checkCUDAError("cudaMalloc: dev_data failed!"); @@ -84,12 +74,13 @@ void scan(int n, int* odata, const int* idata) { int numDispatches = actualN / stride; int numBlocks = (numDispatches + blockSize - 1) / blockSize; - // for (int k = 0; k < actualN; k += stride) { - // kernReductionAddPair<<>>(numDispatches, dev_data, layer, k); - // } - kernTest<<>>(actualN, dev_data, layer, stride); + kernReduceForLayer<<>>(numDispatches, dev_data, layer, stride); } + // cudaMemcpy(actualInputData.get(), dev_data, numBytes, cudaMemcpyDeviceToHost); + // std::cout << "reduction: "; + // printArray(actualN, actualInputData.get()); + timer().endGpuTimer(); if (paddingOpt) { @@ -102,12 +93,6 @@ void scan(int n, int* odata, const int* idata) { checkCUDAError("cudaMemcpy: dev_data -> odata failed!"); } - // std::cout << "\n--> reduction (orig. size " << n << "): [ "; - // for (int i = 0; i < n; ++i) { - // std::cout << odata[i] << " "; - // } - // std::cout << "]\n" << std::endl; - cudaFree(dev_data); checkCUDAError("cudaFree: dev_data failed!"); } From 9d658cd3541252fb54551982a3745decfa0eb998 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 04:07:41 -0400 Subject: [PATCH 23/46] efficient: finish down-sweep, whole algorithm --- src/main.cpp | 6 +++--- stream_compaction/efficient.cu | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 64581b2f..53950b1d 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -14,9 +14,9 @@ #include #include -constexpr bool runDebugTests = true; // runs additional simpler tests -const int SIZE = 1 << 8; // feel free to change the size of array -const int NPOT = SIZE - 3; // Non-Power-Of-Two +constexpr bool runDebugTests = false; // runs additional simpler tests +const int SIZE = 1 << 8; // feel free to change the size of array +const int NPOT = SIZE - 3; // Non-Power-Of-Two int* a = new int[SIZE]; int* b = new int[SIZE]; diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index 0329b9b2..60d9ccc1 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -16,6 +16,9 @@ using StreamCompaction::Common::PerformanceTimer; /// Number of threads per block. constexpr int blockSize = 256; +/// Enable `checkCUDAError()` calls within the performance measuring fence. +constexpr bool checkErrorsDuringTimer = true; + PerformanceTimer& timer() { static PerformanceTimer timer; return timer; @@ -35,6 +38,22 @@ __global__ void kernReduceForLayer(int n, int* data, int layer, int stride) { data[rightChild] += data[leftChild]; } +__global__ void kernTraverseDownLayer(int n, int* data, int layer, int stride) { + int k = (blockIdx.x * blockDim.x) + threadIdx.x; + + if (k >= n) return; + + int offset = k * stride; + int previousStride = 1 << layer; + + int rightChild = offset + stride - 1; + int leftChild = offset + previousStride - 1; + + int leftValue = data[leftChild]; + data[leftChild] = data[rightChild]; + data[rightChild] += leftValue; +} + /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ @@ -77,9 +96,18 @@ void scan(int n, int* odata, const int* idata) { kernReduceForLayer<<>>(numDispatches, dev_data, layer, stride); } - // cudaMemcpy(actualInputData.get(), dev_data, numBytes, cudaMemcpyDeviceToHost); - // std::cout << "reduction: "; - // printArray(actualN, actualInputData.get()); + // Zero out the root + int zero = 0; + cudaMemcpy(dev_data + (actualN - 1), &zero, sizeof(int), cudaMemcpyHostToDevice); + if constexpr (checkErrorsDuringTimer) checkCUDAError("cudaMemcpy: 0 -> dev_data failed!"); + + for (int layer = ilog2(actualN) - 1; layer >= 0; --layer) { + int stride = 1 << (layer + 1); + int numDispatches = actualN / stride; + int numBlocks = (numDispatches + blockSize - 1) / blockSize; + + kernTraverseDownLayer<<>>(numDispatches, dev_data, layer, stride); + } timer().endGpuTimer(); From 7273b97b3b3b420b6ae397298fb54f315b2961b8 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 04:15:07 -0400 Subject: [PATCH 24/46] stream compaction: implement helpers --- stream_compaction/common.cu | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/stream_compaction/common.cu b/stream_compaction/common.cu index 0a2a90f3..1f2b7092 100644 --- a/stream_compaction/common.cu +++ b/stream_compaction/common.cu @@ -21,7 +21,11 @@ namespace StreamCompaction::Common { * which map to 0 will be removed, and elements which map to 1 will be kept. */ __global__ void kernMapToBoolean(int n, int* bools, const int* idata) { - // TODO + int tId = (blockIdx.x * blockDim.x) + threadIdx.x; + + if (tId >= n) return; + + bools[tId] = idata[tId] > 0 ? 1 : 0; } /** @@ -29,7 +33,11 @@ __global__ void kernMapToBoolean(int n, int* bools, const int* idata) { * if bools[idx] == 1, it copies idata[idx] to odata[indices[idx]]. */ __global__ void kernScatter(int n, int* odata, const int* idata, const int* bools, const int* indices) { - // TODO + int tId = (blockIdx.x * blockDim.x) + threadIdx.x; + + if (tId >= n || !bools[tId]) return; + + odata[indices[tId]] = idata[tId]; } } // namespace StreamCompaction::Common From 0e1cdce07e659372b89e0d7fdd9387e696b2988d Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 05:43:38 -0400 Subject: [PATCH 25/46] efficient: finish `compact` --- src/main.cpp | 4 +-- stream_compaction/efficient.cu | 63 +++++++++++++++++++++++++++++++--- stream_compaction/efficient.h | 2 +- 3 files changed, 61 insertions(+), 8 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 53950b1d..1ac86e65 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -185,14 +185,14 @@ int main(int argc, char* argv[]) { printDesc("work-efficient compact, power-of-two"); count = StreamCompaction::Efficient::compact(SIZE, c, a); printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - // printArray(count, c, true); + printArray(count, c, true); printCmpLenResult(count, expectedCount, b, c); zeroArray(SIZE, c); printDesc("work-efficient compact, non-power-of-two"); count = StreamCompaction::Efficient::compact(NPOT, c, a); printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - // printArray(count, c, true); + printArray(count, c, true); printCmpLenResult(count, expectedNPOT, b, c); system("pause"); // stop Win32 console from closing on exit diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index 60d9ccc1..3bf23356 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -57,7 +57,7 @@ __global__ void kernTraverseDownLayer(int n, int* data, int layer, int stride) { /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ -void scan(int n, int* odata, const int* idata) { +void scan(int n, int* odata, const int* idata, bool measure) { int actualN = n; size_t numBytes = n * sizeof(int); @@ -85,7 +85,7 @@ void scan(int n, int* odata, const int* idata) { cudaMemcpy(dev_data, actualInputData.get(), numBytes, cudaMemcpyHostToDevice); checkCUDAError("cudaMemcpy: actualInputData -> dev_data failed!"); - timer().startGpuTimer(); + if (measure) timer().startGpuTimer(); // Perform up-sweep via parallel reduction for (int layer = 0; layer < ilog2(actualN); ++layer) { @@ -109,7 +109,7 @@ void scan(int n, int* odata, const int* idata) { kernTraverseDownLayer<<>>(numDispatches, dev_data, layer, stride); } - timer().endGpuTimer(); + if (measure) timer().endGpuTimer(); if (paddingOpt) { // If previously padded, remove extra zeroes @@ -135,10 +135,63 @@ void scan(int n, int* odata, const int* idata) { * @returns The number of elements remaining after compaction. */ int compact(int n, int* odata, const int* idata) { + int* dev_bools = nullptr; + int* dev_indices = nullptr; + int* dev_odata = nullptr; + int* dev_idata = nullptr; + + int numBlocks = (n + blockSize - 1) / blockSize; + size_t numBytes = n * sizeof(int); + + cudaMalloc(reinterpret_cast(&dev_bools), numBytes); + checkCUDAError("cudaMalloc: dev_bools failed"); + cudaMalloc(reinterpret_cast(&dev_indices), numBytes); + checkCUDAError("cudaMalloc: dev_indices failed"); + cudaMalloc(reinterpret_cast(&dev_odata), numBytes); + checkCUDAError("cudaMalloc: dev_odata failed"); + cudaMalloc(reinterpret_cast(&dev_idata), numBytes); + checkCUDAError("cudaMalloc: dev_idata failed"); + + std::unique_ptr indices = std::make_unique(n); + std::unique_ptr bools = std::make_unique(n); + + cudaMemcpy(dev_idata, idata, numBytes, cudaMemcpyHostToDevice); + checkCUDAError("cudaMemcpy: idata -> dev_idata failed"); + timer().startGpuTimer(); - // TODO + + Common::kernMapToBoolean<<>>(n, dev_bools, dev_idata); + cudaMemcpy(bools.get(), dev_bools, numBytes, cudaMemcpyDeviceToHost); + if constexpr (checkErrorsDuringTimer) checkCUDAError("cudaMemcpy: dev_bools -> bools failed"); + scan(n, indices.get(), bools.get(), false); + cudaMemcpy(dev_indices, indices.get(), numBytes, cudaMemcpyHostToDevice); + if constexpr (checkErrorsDuringTimer) checkCUDAError("cudaMemcpy: indices -> dev_indices failed"); + Common::kernScatter<<>>(n, dev_odata, dev_idata, dev_bools, dev_indices); + timer().endGpuTimer(); - return -1; + + cudaMemcpy(odata, dev_indices, numBytes, cudaMemcpyDeviceToHost); + checkCUDAError("cudaMemcpy: dev_indices -> odata failed"); + int numRemaining = odata[n - 1]; + + cudaMemcpy(odata, dev_odata, numBytes, cudaMemcpyDeviceToHost); + checkCUDAError("cudaMemcpy: dev_odata -> odata failed"); + + cudaFree(dev_bools); + checkCUDAError("cudaFree: dev_bools failed"); + cudaFree(dev_indices); + checkCUDAError("cudaFree: dev_indices failed"); + cudaFree(dev_odata); + checkCUDAError("cudaFree: dev_odata failed"); + cudaFree(dev_idata); + checkCUDAError("cudaFree: dev_idata failed"); + + // Since we're doing an exclusive scan, we need to manually check if the last element is valid + if (bools[n - 1]) { + return numRemaining + 1; + } else { + return numRemaining; + } } } // namespace Efficient diff --git a/stream_compaction/efficient.h b/stream_compaction/efficient.h index d64c3426..a13935d9 100644 --- a/stream_compaction/efficient.h +++ b/stream_compaction/efficient.h @@ -7,7 +7,7 @@ namespace Efficient { StreamCompaction::Common::PerformanceTimer& timer(); -void scan(int n, int* odata, const int* idata); +void scan(int n, int* odata, const int* idata, bool measure = true); int compact(int n, int* odata, const int* idata); From c3256648494e3ce640b20ddee8ac9353def5f03e Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 12:52:40 -0400 Subject: [PATCH 26/46] remove credit info --- src/main.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 1ac86e65..61354d71 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,11 +1,3 @@ -/** - * @file main.cpp - * @brief Stream compaction test program - * @authors Kai Ninomiya - * @date 2015 - * @copyright University of Pennsylvania - */ - #include "testing_helpers.hpp" #include From 3b5b013cc48d2952d169ee966168af69e9b0bed5 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 13:36:29 -0400 Subject: [PATCH 27/46] implement `Thrust::scan` --- .clang-format | 4 +++- stream_compaction/thrust.cu | 11 ++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/.clang-format b/.clang-format index 814e93f9..3eb1dadd 100644 --- a/.clang-format +++ b/.clang-format @@ -12,5 +12,7 @@ IncludeCategories: Priority: 1 - Regex: '^' Priority: 2 + - Regex: '^' + Priority: 3 - Regex: '^<.*>' - Priority: 3 \ No newline at end of file + Priority: 4 \ No newline at end of file diff --git a/stream_compaction/thrust.cu b/stream_compaction/thrust.cu index 8b9c3adc..dce9607c 100644 --- a/stream_compaction/thrust.cu +++ b/stream_compaction/thrust.cu @@ -22,11 +22,16 @@ PerformanceTimer& timer() { * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ void scan(int n, int* odata, const int* idata) { + thrust::device_vector dv_in(idata, idata + n); + thrust::device_vector dv_out(n); + timer().startGpuTimer(); - // TODO use `thrust::exclusive_scan` - // example: for device_vectors dv_in and dv_out: - // thrust::exclusive_scan(dv_in.begin(), dv_in.end(), dv_out.begin()); + + thrust::exclusive_scan(dv_in.begin(), dv_in.end(), dv_out.begin()); + timer().endGpuTimer(); + + thrust::copy(dv_out.begin(), dv_out.end(), odata); } } // namespace Thrust From abb3bd1c41f95710e351029661c13bc7fe1d918a Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 13:40:19 -0400 Subject: [PATCH 28/46] remove console pause --- src/main.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main.cpp b/src/main.cpp index 61354d71..1782c686 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -187,7 +187,6 @@ int main(int argc, char* argv[]) { printArray(count, c, true); printCmpLenResult(count, expectedNPOT, b, c); - system("pause"); // stop Win32 console from closing on exit delete[] a; delete[] b; delete[] c; From 7ce7882f606d07e7896eb53b914bb53bed3ea710 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 13:59:52 -0400 Subject: [PATCH 29/46] implement `Thrust::compact` --- src/main.cpp | 16 +++++++++++++++- stream_compaction/thrust.cu | 31 ++++++++++++++++++++++++++++++- stream_compaction/thrust.h | 2 ++ 3 files changed, 47 insertions(+), 2 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 1782c686..b60db9ee 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -7,7 +7,7 @@ #include constexpr bool runDebugTests = false; // runs additional simpler tests -const int SIZE = 1 << 8; // feel free to change the size of array +const int SIZE = 1 << 16; // feel free to change the size of array const int NPOT = SIZE - 3; // Non-Power-Of-Two int* a = new int[SIZE]; @@ -187,6 +187,20 @@ int main(int argc, char* argv[]) { printArray(count, c, true); printCmpLenResult(count, expectedNPOT, b, c); + zeroArray(SIZE, c); + printDesc("thrust compact, power-of-two"); + count = StreamCompaction::Thrust::compact(SIZE, c, a); + printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + printArray(count, c, true); + printCmpLenResult(count, expectedCount, b, c); + + zeroArray(SIZE, c); + printDesc("thrust compact, non-power-of-two"); + count = StreamCompaction::Thrust::compact(NPOT, c, a); + printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + printArray(count, c, true); + printCmpLenResult(count, expectedNPOT, b, c); + delete[] a; delete[] b; delete[] c; diff --git a/stream_compaction/thrust.cu b/stream_compaction/thrust.cu index dce9607c..6074c0fd 100644 --- a/stream_compaction/thrust.cu +++ b/stream_compaction/thrust.cu @@ -5,7 +5,9 @@ #include #include +#include #include +#include #include namespace StreamCompaction { @@ -27,12 +29,39 @@ void scan(int n, int* odata, const int* idata) { timer().startGpuTimer(); - thrust::exclusive_scan(dv_in.begin(), dv_in.end(), dv_out.begin()); + thrust::exclusive_scan(thrust::device, dv_in.begin(), dv_in.end(), dv_out.begin()); timer().endGpuTimer(); thrust::copy(dv_out.begin(), dv_out.end(), odata); } +struct isNotValid { + __host__ __device__ bool operator()(const int& element) { return element == 0; } +}; + +/** + * Performs stream compaction on idata, storing the result into odata. + * All zeroes are discarded. + * + * @param n The number of elements in idata. + * @param odata The array into which to store elements. + * @param idata The array of elements to compact. + * @returns The number of elements remaining after compaction. + */ +int compact(int n, int* odata, const int* idata) { + thrust::device_vector dv_in(idata, idata + n); + + timer().startGpuTimer(); + + auto newEndIt = thrust::remove_if(thrust::device, dv_in.begin(), dv_in.end(), isNotValid()); + + timer().endGpuTimer(); + + thrust::copy(dv_in.begin(), newEndIt, odata); + + return thrust::distance(dv_in.begin(), newEndIt); +} + } // namespace Thrust } // namespace StreamCompaction diff --git a/stream_compaction/thrust.h b/stream_compaction/thrust.h index 056f7c90..a9b0d3d0 100644 --- a/stream_compaction/thrust.h +++ b/stream_compaction/thrust.h @@ -9,5 +9,7 @@ StreamCompaction::Common::PerformanceTimer& timer(); void scan(int n, int* odata, const int* idata); +int compact(int n, int* odata, const int* idata); + } // namespace Thrust } // namespace StreamCompaction From af5d82a2da641e231407ae4ac6f7807773202a2d Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 14:56:58 -0400 Subject: [PATCH 30/46] begin README --- README.md | 38 +++++++++++++++++++++++++++++--------- src/main.cpp | 16 ++++++++++++++++ 2 files changed, 45 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 0e38ddb1..00c0d58c 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,34 @@ -CUDA Stream Compaction -====================== +**University of Pennsylvania, CIS 5650: GPU Programming and Architecture, Project 2** -**University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 2** +* Charles Wang + * [LinkedIn](https://linkedin.com/in/zwcharl) + * [Personal website](https://charleszw.com) +* Tested on: + * Windows 11 Pro (26100.4946) + * Ryzen 5 7600X @ 4.7Ghz + * 32 GB RAM + * RTX 5060 Ti 16 GB (Studio Driver 580.97) -* (TODO) YOUR NAME HERE - * (TODO) [LinkedIn](), [personal website](), [twitter](), etc. -* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) +# CUDA Stream Compaction -### (TODO: Your README) +This project implements multiple commonly used GPU algorithms, which are reduction, computing prefix sums (scan), and stream compaction. Stream compaction uses the scan algorithm under the hood, and one of my implementations for finding prefix sums uses a parallel reduction, so these algorithms are all building on each other. -Include analysis, etc. (Remember, this is public, so don't put -anything here that you don't want to share with the world.) +The purpose of this project was to understand these algorithms in more detail, and explore how their implementations change when we parallelize them on the GPU. It also taught me more about how CUDA works and how my kernels interact with the physical NVIDIA hardware. +## Implementations + +In order to explore potential performance differences, this project includes three different versions of the scan and compaction algorithms. + +- [`cpu.cu`](stream_compaction/cpu.cu): these implementations run entirely on the CPU and are written in pure C++. They are single threaded by nature. In particular, the compaction algorithm was implemented both with and without using scan. +- [`naive.cu`](stream_compaction/naive.cu): the first implementation that utilizes CUDA. It is based on the naive algorithm described in [GPU Gems 3, Chapter 39.2.1](https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda). +- [`efficient.cu`](stream_compaction/efficient.cu): implementations of the scan and compaction algorithms which theoretically require less operations and therefore should run more efficiently. It is based on the work-efficient parallel scan algorithm described in [GPU Gems 3, Chapter 39.2.2](https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda), and involves an "up-sweep" where we build up a balanced binary tree, and then a "down-sweep" where we calculate final terms using the node elements in the tree. + +Below I demonstrate how my algorithms performed in benchmarking tests. + +## Performance benchmarks + +### Testing methodology + +### Graphs + +### Analysis diff --git a/src/main.cpp b/src/main.cpp index b60db9ee..2c040477 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -166,6 +166,22 @@ int main(int argc, char* argv[]) { printArray(count, c, true); printCmpLenResult(count, expectedNPOT, b, c); + zeroArray(SIZE, b); + printDesc("cpu compact with scan, power-of-two"); + count = StreamCompaction::CPU::compactWithScan(SIZE, b, a); + printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); + expectedCount = count; + printArray(count, b, true); + printCmpLenResult(count, expectedCount, b, b); + + zeroArray(SIZE, c); + printDesc("cpu compact with scan, non-power-of-two"); + count = StreamCompaction::CPU::compactWithScan(NPOT, c, a); + printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); + expectedNPOT = count; + printArray(count, c, true); + printCmpLenResult(count, expectedNPOT, b, c); + zeroArray(SIZE, c); printDesc("cpu compact with scan"); count = StreamCompaction::CPU::compactWithScan(SIZE, c, a); From a5c6fe939b764fa3b6af268f62c37d7ef2d6099c Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 16:07:29 -0400 Subject: [PATCH 31/46] enable disable running diff benchmarks --- src/main.cpp | 310 ++++++++++++++++++++++++++++----------------------- 1 file changed, 171 insertions(+), 139 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 2c040477..d5615d95 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -6,9 +6,24 @@ #include #include -constexpr bool runDebugTests = false; // runs additional simpler tests -const int SIZE = 1 << 16; // feel free to change the size of array -const int NPOT = SIZE - 3; // Non-Power-Of-Two +const int SIZE = 1 << 16; // feel free to change the size of array +const int NPOT = SIZE - 3; // Non-Power-Of-Two + +/// If true, run additional simpler tests. +constexpr bool runDebugTests = false; + +/// Print out resulting arrays from computation. +constexpr bool enablePrintingArrays = false; + +constexpr bool enableCPUScan = true; +constexpr bool enableNaiveScan = false; +constexpr bool enableEfficientScan = false; +constexpr bool enableThrustScan = false; + +constexpr bool enableCPUCompactWith = true; +constexpr bool enableCPUCompactWithout = true; +constexpr bool enableEfficientCompact = false; +constexpr bool enableThrustCompact = false; int* a = new int[SIZE]; int* b = new int[SIZE]; @@ -29,112 +44,128 @@ int main(int argc, char* argv[]) { // initialize b using StreamCompaction::CPU::scan you implement // We use b for further comparison. Make sure your StreamCompaction::CPU::scan is correct. // At first all cases passed because b && c are all zeroes. - zeroArray(SIZE, b); - printDesc("cpu scan, power-of-two"); - StreamCompaction::CPU::scan(SIZE, b, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - printArray(SIZE, b, true); - - zeroArray(SIZE, c); - printDesc("cpu scan, non-power-of-two"); - StreamCompaction::CPU::scan(NPOT, c, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - printArray(NPOT, c, true); - printCmpResult(NPOT, b, c); - - zeroArray(SIZE, c); - printDesc("naive scan, power-of-two"); - StreamCompaction::Naive::scan(SIZE, c, a); - printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - printArray(SIZE, c, true); - printCmpResult(SIZE, b, c); - - zeroArray(SIZE, c); - printDesc("naive scan, non-power-of-two"); - StreamCompaction::Naive::scan(NPOT, c, a); - printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - printArray(SIZE, c, true); - printCmpResult(NPOT, b, c); - - zeroArray(SIZE, c); - printDesc("work-efficient scan, power-of-two"); - StreamCompaction::Efficient::scan(SIZE, c, a); - printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - printArray(SIZE, c, true); - printCmpResult(SIZE, b, c); - - zeroArray(SIZE, c); - printDesc("work-efficient scan, non-power-of-two"); - StreamCompaction::Efficient::scan(NPOT, c, a); - printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - printArray(NPOT, c, true); - printCmpResult(NPOT, b, c); - - zeroArray(SIZE, c); - printDesc("thrust scan, power-of-two"); - StreamCompaction::Thrust::scan(SIZE, c, a); - printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - printArray(SIZE, c, true); - printCmpResult(SIZE, b, c); - - zeroArray(SIZE, c); - printDesc("thrust scan, non-power-of-two"); - StreamCompaction::Thrust::scan(NPOT, c, a); - printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - printArray(NPOT, c, true); - printCmpResult(NPOT, b, c); - - // For bug-finding only: Array of 1s to help find bugs in stream compaction or scan - if constexpr (runDebugTests) { - printf("\n"); - printf("*************************\n"); - printf("** SCAN TESTS (ALL 1s) **\n"); - printf("*************************\n"); - - onesArray(SIZE, a); - + if constexpr (enableCPUScan) { zeroArray(SIZE, b); printDesc("cpu scan, power-of-two"); StreamCompaction::CPU::scan(SIZE, b, a); printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - printArray(SIZE, b, true); + if constexpr (enablePrintingArrays) printArray(SIZE, b, true); zeroArray(SIZE, c); printDesc("cpu scan, non-power-of-two"); StreamCompaction::CPU::scan(NPOT, c, a); printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - printArray(NPOT, c, true); + if constexpr (enablePrintingArrays) printArray(NPOT, c, true); printCmpResult(NPOT, b, c); + } + if constexpr (enableNaiveScan) { zeroArray(SIZE, c); printDesc("naive scan, power-of-two"); StreamCompaction::Naive::scan(SIZE, c, a); printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - printArray(SIZE, c, true); + if constexpr (enablePrintingArrays) printArray(SIZE, c, true); printCmpResult(SIZE, b, c); zeroArray(SIZE, c); printDesc("naive scan, non-power-of-two"); StreamCompaction::Naive::scan(NPOT, c, a); printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - printArray(SIZE, c, true); + if constexpr (enablePrintingArrays) printArray(SIZE, c, true); printCmpResult(NPOT, b, c); + } + if constexpr (enableEfficientScan) { zeroArray(SIZE, c); printDesc("work-efficient scan, power-of-two"); StreamCompaction::Efficient::scan(SIZE, c, a); printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - printArray(SIZE, c, true); + if constexpr (enablePrintingArrays) printArray(SIZE, c, true); printCmpResult(SIZE, b, c); zeroArray(SIZE, c); printDesc("work-efficient scan, non-power-of-two"); StreamCompaction::Efficient::scan(NPOT, c, a); printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - printArray(NPOT, c, true); + if constexpr (enablePrintingArrays) printArray(NPOT, c, true); + printCmpResult(NPOT, b, c); + } + + if constexpr (enableThrustScan) { + zeroArray(SIZE, c); + printDesc("thrust scan, power-of-two"); + StreamCompaction::Thrust::scan(SIZE, c, a); + printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + if constexpr (enablePrintingArrays) printArray(SIZE, c, true); + printCmpResult(SIZE, b, c); + + zeroArray(SIZE, c); + printDesc("thrust scan, non-power-of-two"); + StreamCompaction::Thrust::scan(NPOT, c, a); + printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + if constexpr (enablePrintingArrays) printArray(NPOT, c, true); printCmpResult(NPOT, b, c); } + // For bug-finding only: Array of 1s to help find bugs in stream compaction or scan + if constexpr (runDebugTests) { + printf("\n"); + printf("*************************\n"); + printf("** SCAN TESTS (ALL 1s) **\n"); + printf("*************************\n"); + + onesArray(SIZE, a); + + if constexpr (enableCPUScan) { + zeroArray(SIZE, b); + printDesc("cpu scan, power-of-two"); + StreamCompaction::CPU::scan(SIZE, b, a); + printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), + "(std::chrono Measured)"); + if constexpr (enablePrintingArrays) printArray(SIZE, b, true); + + zeroArray(SIZE, c); + printDesc("cpu scan, non-power-of-two"); + StreamCompaction::CPU::scan(NPOT, c, a); + printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), + "(std::chrono Measured)"); + if constexpr (enablePrintingArrays) printArray(NPOT, c, true); + printCmpResult(NPOT, b, c); + } + + if constexpr (enableNaiveScan) { + zeroArray(SIZE, c); + printDesc("naive scan, power-of-two"); + StreamCompaction::Naive::scan(SIZE, c, a); + printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + if constexpr (enablePrintingArrays) printArray(SIZE, c, true); + printCmpResult(SIZE, b, c); + + zeroArray(SIZE, c); + printDesc("naive scan, non-power-of-two"); + StreamCompaction::Naive::scan(NPOT, c, a); + printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + if constexpr (enablePrintingArrays) printArray(SIZE, c, true); + printCmpResult(NPOT, b, c); + } + + if constexpr (enableEfficientScan) { + zeroArray(SIZE, c); + printDesc("work-efficient scan, power-of-two"); + StreamCompaction::Efficient::scan(SIZE, c, a); + printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + if constexpr (enablePrintingArrays) printArray(SIZE, c, true); + printCmpResult(SIZE, b, c); + + zeroArray(SIZE, c); + printDesc("work-efficient scan, non-power-of-two"); + StreamCompaction::Efficient::scan(NPOT, c, a); + printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + if constexpr (enablePrintingArrays) printArray(NPOT, c, true); + printCmpResult(NPOT, b, c); + } + } + printf("\n"); printf("*****************************\n"); printf("** STREAM COMPACTION TESTS **\n"); @@ -150,72 +181,73 @@ int main(int argc, char* argv[]) { // initialize b using StreamCompaction::CPU::compactWithoutScan you implement // We use b for further comparison. Make sure your StreamCompaction::CPU::compactWithoutScan is correct. - zeroArray(SIZE, b); - printDesc("cpu compact without scan, power-of-two"); - count = StreamCompaction::CPU::compactWithoutScan(SIZE, b, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - expectedCount = count; - printArray(count, b, true); - printCmpLenResult(count, expectedCount, b, b); - - zeroArray(SIZE, c); - printDesc("cpu compact without scan, non-power-of-two"); - count = StreamCompaction::CPU::compactWithoutScan(NPOT, c, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - expectedNPOT = count; - printArray(count, c, true); - printCmpLenResult(count, expectedNPOT, b, c); - - zeroArray(SIZE, b); - printDesc("cpu compact with scan, power-of-two"); - count = StreamCompaction::CPU::compactWithScan(SIZE, b, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - expectedCount = count; - printArray(count, b, true); - printCmpLenResult(count, expectedCount, b, b); - - zeroArray(SIZE, c); - printDesc("cpu compact with scan, non-power-of-two"); - count = StreamCompaction::CPU::compactWithScan(NPOT, c, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - expectedNPOT = count; - printArray(count, c, true); - printCmpLenResult(count, expectedNPOT, b, c); - - zeroArray(SIZE, c); - printDesc("cpu compact with scan"); - count = StreamCompaction::CPU::compactWithScan(SIZE, c, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - printArray(count, c, true); - printCmpLenResult(count, expectedCount, b, c); - - zeroArray(SIZE, c); - printDesc("work-efficient compact, power-of-two"); - count = StreamCompaction::Efficient::compact(SIZE, c, a); - printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - printArray(count, c, true); - printCmpLenResult(count, expectedCount, b, c); - - zeroArray(SIZE, c); - printDesc("work-efficient compact, non-power-of-two"); - count = StreamCompaction::Efficient::compact(NPOT, c, a); - printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - printArray(count, c, true); - printCmpLenResult(count, expectedNPOT, b, c); - - zeroArray(SIZE, c); - printDesc("thrust compact, power-of-two"); - count = StreamCompaction::Thrust::compact(SIZE, c, a); - printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - printArray(count, c, true); - printCmpLenResult(count, expectedCount, b, c); - - zeroArray(SIZE, c); - printDesc("thrust compact, non-power-of-two"); - count = StreamCompaction::Thrust::compact(NPOT, c, a); - printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - printArray(count, c, true); - printCmpLenResult(count, expectedNPOT, b, c); + if constexpr (enableCPUCompactWithout) { + zeroArray(SIZE, b); + printDesc("cpu compact without scan, power-of-two"); + count = StreamCompaction::CPU::compactWithoutScan(SIZE, b, a); + printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); + expectedCount = count; + if constexpr (enablePrintingArrays) printArray(count, b, true); + printCmpLenResult(count, expectedCount, b, b); + + zeroArray(SIZE, c); + printDesc("cpu compact without scan, non-power-of-two"); + count = StreamCompaction::CPU::compactWithoutScan(NPOT, c, a); + printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); + expectedNPOT = count; + if constexpr (enablePrintingArrays) printArray(count, c, true); + printCmpLenResult(count, expectedNPOT, b, c); + } + + if constexpr (enableCPUCompactWith) { + zeroArray(SIZE, b); + printDesc("cpu compact with scan, power-of-two"); + count = StreamCompaction::CPU::compactWithScan(SIZE, b, a); + printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); + expectedCount = count; + if constexpr (enablePrintingArrays) printArray(count, b, true); + printCmpLenResult(count, expectedCount, b, b); + + zeroArray(SIZE, c); + printDesc("cpu compact with scan, non-power-of-two"); + count = StreamCompaction::CPU::compactWithScan(NPOT, c, a); + printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); + expectedNPOT = count; + if constexpr (enablePrintingArrays) printArray(count, c, true); + printCmpLenResult(count, expectedNPOT, b, c); + } + + if constexpr (enableEfficientCompact) { + zeroArray(SIZE, c); + printDesc("work-efficient compact, power-of-two"); + count = StreamCompaction::Efficient::compact(SIZE, c, a); + printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + if constexpr (enablePrintingArrays) printArray(count, c, true); + printCmpLenResult(count, expectedCount, b, c); + + zeroArray(SIZE, c); + printDesc("work-efficient compact, non-power-of-two"); + count = StreamCompaction::Efficient::compact(NPOT, c, a); + printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + if constexpr (enablePrintingArrays) printArray(count, c, true); + printCmpLenResult(count, expectedNPOT, b, c); + } + + if constexpr (enableThrustCompact) { + zeroArray(SIZE, c); + printDesc("thrust compact, power-of-two"); + count = StreamCompaction::Thrust::compact(SIZE, c, a); + printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + if constexpr (enablePrintingArrays) printArray(count, c, true); + printCmpLenResult(count, expectedCount, b, c); + + zeroArray(SIZE, c); + printDesc("thrust compact, non-power-of-two"); + count = StreamCompaction::Thrust::compact(NPOT, c, a); + printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + if constexpr (enablePrintingArrays) printArray(count, c, true); + printCmpLenResult(count, expectedNPOT, b, c); + } delete[] a; delete[] b; From b76cf8d4207f3a82ad042801e28dc950fdd25318 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 16:41:22 -0400 Subject: [PATCH 32/46] naive: fix indexing for bigger-sized arrays --- src/main.cpp | 33 ++++++++++++++++----------------- stream_compaction/naive.cu | 8 ++++---- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index d5615d95..2ec73d83 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -15,13 +15,13 @@ constexpr bool runDebugTests = false; /// Print out resulting arrays from computation. constexpr bool enablePrintingArrays = false; -constexpr bool enableCPUScan = true; -constexpr bool enableNaiveScan = false; +constexpr bool enableCPUScan = false; +constexpr bool enableNaiveScan = true; constexpr bool enableEfficientScan = false; constexpr bool enableThrustScan = false; -constexpr bool enableCPUCompactWith = true; -constexpr bool enableCPUCompactWithout = true; +constexpr bool enableCPUCompactWith = false; +constexpr bool enableCPUCompactWithout = false; constexpr bool enableEfficientCompact = false; constexpr bool enableThrustCompact = false; @@ -44,13 +44,13 @@ int main(int argc, char* argv[]) { // initialize b using StreamCompaction::CPU::scan you implement // We use b for further comparison. Make sure your StreamCompaction::CPU::scan is correct. // At first all cases passed because b && c are all zeroes. - if constexpr (enableCPUScan) { - zeroArray(SIZE, b); - printDesc("cpu scan, power-of-two"); - StreamCompaction::CPU::scan(SIZE, b, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - if constexpr (enablePrintingArrays) printArray(SIZE, b, true); + zeroArray(SIZE, b); + printDesc("cpu scan, power-of-two"); + StreamCompaction::CPU::scan(SIZE, b, a); + printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); + if constexpr (enablePrintingArrays) printArray(SIZE, b, true); + if constexpr (enableCPUScan) { zeroArray(SIZE, c); printDesc("cpu scan, non-power-of-two"); StreamCompaction::CPU::scan(NPOT, c, a); @@ -116,14 +116,13 @@ int main(int argc, char* argv[]) { onesArray(SIZE, a); - if constexpr (enableCPUScan) { - zeroArray(SIZE, b); - printDesc("cpu scan, power-of-two"); - StreamCompaction::CPU::scan(SIZE, b, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), - "(std::chrono Measured)"); - if constexpr (enablePrintingArrays) printArray(SIZE, b, true); + zeroArray(SIZE, b); + printDesc("cpu scan, power-of-two"); + StreamCompaction::CPU::scan(SIZE, b, a); + printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); + if constexpr (enablePrintingArrays) printArray(SIZE, b, true); + if constexpr (enableCPUScan) { zeroArray(SIZE, c); printDesc("cpu scan, non-power-of-two"); StreamCompaction::CPU::scan(NPOT, c, a); diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index 0e9e255f..480e0048 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -23,11 +23,11 @@ PerformanceTimer& timer() { __global__ void kernSumPairsForIteration(int n, const int* in, int* out, int stride) { int k = (blockIdx.x * blockDim.x) + threadIdx.x; - // As the number of dispatches decrease with every iteration, we have to add the stride to - // get the last index of the array - if (k >= n + stride || k < stride) return; + if (k >= n) return; - out[k] = in[k - stride] + in[k]; + int outIndex = stride + k; + + out[outIndex] = in[k] + in[outIndex]; } /** From 1f715b5b48f1b52bbb4129e253941109bf677878 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 17:42:36 -0400 Subject: [PATCH 33/46] efficient: remove extra `measure` parameter from `scan` --- src/main.cpp | 45 ++++++++++++++++++++-------------- stream_compaction/efficient.cu | 18 +++++++++++--- stream_compaction/efficient.h | 2 +- stream_compaction/naive.cu | 2 +- 4 files changed, 42 insertions(+), 25 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 2ec73d83..6754ae23 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,6 +1,7 @@ #include "testing_helpers.hpp" #include +#include #include #include #include @@ -17,14 +18,22 @@ constexpr bool enablePrintingArrays = false; constexpr bool enableCPUScan = false; constexpr bool enableNaiveScan = true; -constexpr bool enableEfficientScan = false; +constexpr bool enableEfficientScan = true; constexpr bool enableThrustScan = false; constexpr bool enableCPUCompactWith = false; -constexpr bool enableCPUCompactWithout = false; -constexpr bool enableEfficientCompact = false; +constexpr bool enableEfficientCompact = true; constexpr bool enableThrustCompact = false; +namespace Perf { + +using ScanFn = std::function; +using CompactionFn = std::function; + +constexpr int numIterations = 1'000; + +} // namespace Perf + int* a = new int[SIZE]; int* b = new int[SIZE]; int* c = new int[SIZE]; @@ -180,23 +189,21 @@ int main(int argc, char* argv[]) { // initialize b using StreamCompaction::CPU::compactWithoutScan you implement // We use b for further comparison. Make sure your StreamCompaction::CPU::compactWithoutScan is correct. - if constexpr (enableCPUCompactWithout) { - zeroArray(SIZE, b); - printDesc("cpu compact without scan, power-of-two"); - count = StreamCompaction::CPU::compactWithoutScan(SIZE, b, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - expectedCount = count; - if constexpr (enablePrintingArrays) printArray(count, b, true); - printCmpLenResult(count, expectedCount, b, b); + zeroArray(SIZE, b); + printDesc("cpu compact without scan, power-of-two"); + count = StreamCompaction::CPU::compactWithoutScan(SIZE, b, a); + printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); + expectedCount = count; + if constexpr (enablePrintingArrays) printArray(count, b, true); + printCmpLenResult(count, expectedCount, b, b); - zeroArray(SIZE, c); - printDesc("cpu compact without scan, non-power-of-two"); - count = StreamCompaction::CPU::compactWithoutScan(NPOT, c, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - expectedNPOT = count; - if constexpr (enablePrintingArrays) printArray(count, c, true); - printCmpLenResult(count, expectedNPOT, b, c); - } + zeroArray(SIZE, c); + printDesc("cpu compact without scan, non-power-of-two"); + count = StreamCompaction::CPU::compactWithoutScan(NPOT, c, a); + printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); + expectedNPOT = count; + if constexpr (enablePrintingArrays) printArray(count, c, true); + printCmpLenResult(count, expectedNPOT, b, c); if constexpr (enableCPUCompactWith) { zeroArray(SIZE, b); diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index 3bf23356..6987ee9c 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -11,6 +11,12 @@ namespace StreamCompaction { namespace Efficient { +namespace { + +bool enableScanMeasure = true; + +} + using StreamCompaction::Common::PerformanceTimer; /// Number of threads per block. @@ -57,7 +63,7 @@ __global__ void kernTraverseDownLayer(int n, int* data, int layer, int stride) { /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ -void scan(int n, int* odata, const int* idata, bool measure) { +void scan(int n, int* odata, const int* idata) { int actualN = n; size_t numBytes = n * sizeof(int); @@ -85,7 +91,7 @@ void scan(int n, int* odata, const int* idata, bool measure) { cudaMemcpy(dev_data, actualInputData.get(), numBytes, cudaMemcpyHostToDevice); checkCUDAError("cudaMemcpy: actualInputData -> dev_data failed!"); - if (measure) timer().startGpuTimer(); + if (enableScanMeasure) timer().startGpuTimer(); // Perform up-sweep via parallel reduction for (int layer = 0; layer < ilog2(actualN); ++layer) { @@ -109,7 +115,7 @@ void scan(int n, int* odata, const int* idata, bool measure) { kernTraverseDownLayer<<>>(numDispatches, dev_data, layer, stride); } - if (measure) timer().endGpuTimer(); + if (enableScanMeasure) timer().endGpuTimer(); if (paddingOpt) { // If previously padded, remove extra zeroes @@ -163,7 +169,11 @@ int compact(int n, int* odata, const int* idata) { Common::kernMapToBoolean<<>>(n, dev_bools, dev_idata); cudaMemcpy(bools.get(), dev_bools, numBytes, cudaMemcpyDeviceToHost); if constexpr (checkErrorsDuringTimer) checkCUDAError("cudaMemcpy: dev_bools -> bools failed"); - scan(n, indices.get(), bools.get(), false); + + enableScanMeasure = false; + scan(n, indices.get(), bools.get()); + enableScanMeasure = true; + cudaMemcpy(dev_indices, indices.get(), numBytes, cudaMemcpyHostToDevice); if constexpr (checkErrorsDuringTimer) checkCUDAError("cudaMemcpy: indices -> dev_indices failed"); Common::kernScatter<<>>(n, dev_odata, dev_idata, dev_bools, dev_indices); diff --git a/stream_compaction/efficient.h b/stream_compaction/efficient.h index a13935d9..d64c3426 100644 --- a/stream_compaction/efficient.h +++ b/stream_compaction/efficient.h @@ -7,7 +7,7 @@ namespace Efficient { StreamCompaction::Common::PerformanceTimer& timer(); -void scan(int n, int* odata, const int* idata, bool measure = true); +void scan(int n, int* odata, const int* idata); int compact(int n, int* odata, const int* idata); diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index 480e0048..4495ca89 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -10,7 +10,7 @@ namespace Naive { using StreamCompaction::Common::PerformanceTimer; /// Number of threads per block. -constexpr int blockSize = 256; +constexpr int blockSize = 128; /// Whether to return an inclusive or exclusive scan. constexpr bool useExclusiveScan = true; From 36476538285a8614af359353dbe7ad0ab5e7e602 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 18:33:24 -0400 Subject: [PATCH 34/46] benchmark system --- src/main.cpp | 415 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 242 insertions(+), 173 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 6754ae23..c9e7104a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,18 +1,24 @@ #include "testing_helpers.hpp" +#include #include #include #include #include #include #include +#include +#include -const int SIZE = 1 << 16; // feel free to change the size of array -const int NPOT = SIZE - 3; // Non-Power-Of-Two +constexpr int sizePOT = 1 << 16; // feel free to change the size of array +constexpr int sizeNPOT = sizePOT - 3; // Non-Power-Of-Two /// If true, run additional simpler tests. constexpr bool runDebugTests = false; +/// Run benchmarks instead of tests. +constexpr bool runBenchmarks = true; + /// Print out resulting arrays from computation. constexpr bool enablePrintingArrays = false; @@ -27,232 +33,295 @@ constexpr bool enableThrustCompact = false; namespace Perf { +using TimerFn = std::function; using ScanFn = std::function; using CompactionFn = std::function; +enum class Implementation { CPU, Naive, Efficient, Thrust }; + constexpr int numIterations = 1'000; +constexpr int maxValue = 50; -} // namespace Perf +std::pair getScanImplementation(Implementation implementation) { + using namespace StreamCompaction; -int* a = new int[SIZE]; -int* b = new int[SIZE]; -int* c = new int[SIZE]; + switch (implementation) { + case Implementation::CPU: + return std::make_pair(CPU::scan, CPU::timer); -int main(int argc, char* argv[]) { - // Scan tests - - printf("\n"); - printf("****************\n"); - printf("** SCAN TESTS **\n"); - printf("****************\n"); - - genArray(SIZE - 1, a, 50); // Leave a 0 at the end to test that edge case - a[SIZE - 1] = 0; - printArray(SIZE, a, true); - - // initialize b using StreamCompaction::CPU::scan you implement - // We use b for further comparison. Make sure your StreamCompaction::CPU::scan is correct. - // At first all cases passed because b && c are all zeroes. - zeroArray(SIZE, b); - printDesc("cpu scan, power-of-two"); - StreamCompaction::CPU::scan(SIZE, b, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - if constexpr (enablePrintingArrays) printArray(SIZE, b, true); - - if constexpr (enableCPUScan) { - zeroArray(SIZE, c); - printDesc("cpu scan, non-power-of-two"); - StreamCompaction::CPU::scan(NPOT, c, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - if constexpr (enablePrintingArrays) printArray(NPOT, c, true); - printCmpResult(NPOT, b, c); - } + case Implementation::Naive: + return std::make_pair(Naive::scan, Naive::timer); - if constexpr (enableNaiveScan) { - zeroArray(SIZE, c); - printDesc("naive scan, power-of-two"); - StreamCompaction::Naive::scan(SIZE, c, a); - printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - if constexpr (enablePrintingArrays) printArray(SIZE, c, true); - printCmpResult(SIZE, b, c); - - zeroArray(SIZE, c); - printDesc("naive scan, non-power-of-two"); - StreamCompaction::Naive::scan(NPOT, c, a); - printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - if constexpr (enablePrintingArrays) printArray(SIZE, c, true); - printCmpResult(NPOT, b, c); + case Implementation::Efficient: + return std::make_pair(Efficient::scan, Efficient::timer); + + case Implementation::Thrust: + return std::make_pair(Thrust::scan, Thrust::timer); + + default: + throw std::invalid_argument("invalid enum"); } +} + +void runScanBenchmark(Implementation implementation, int n, std::string_view benchmarkName) { + std::string prefix = "[" + std::string(benchmarkName) + "]"; + const auto [scan, timer] = getScanImplementation(implementation); + + std::vector elapsedTimes; + std::array out; + std::array in; + genArray(sizePOT, in.data(), maxValue); + + for (int i = 1; i <= numIterations; ++i) { + std::cout << prefix << " Executing scan(): " << i << " of " << numIterations << "...\r"; - if constexpr (enableEfficientScan) { - zeroArray(SIZE, c); - printDesc("work-efficient scan, power-of-two"); - StreamCompaction::Efficient::scan(SIZE, c, a); - printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - if constexpr (enablePrintingArrays) printArray(SIZE, c, true); - printCmpResult(SIZE, b, c); - - zeroArray(SIZE, c); - printDesc("work-efficient scan, non-power-of-two"); - StreamCompaction::Efficient::scan(NPOT, c, a); - printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - if constexpr (enablePrintingArrays) printArray(NPOT, c, true); - printCmpResult(NPOT, b, c); + scan(n, out.data(), in.data()); + elapsedTimes.push_back(timer().getCpuElapsedTimeForPreviousOperation()); } - if constexpr (enableThrustScan) { - zeroArray(SIZE, c); - printDesc("thrust scan, power-of-two"); - StreamCompaction::Thrust::scan(SIZE, c, a); - printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - if constexpr (enablePrintingArrays) printArray(SIZE, c, true); - printCmpResult(SIZE, b, c); - - zeroArray(SIZE, c); - printDesc("thrust scan, non-power-of-two"); - StreamCompaction::Thrust::scan(NPOT, c, a); - printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - if constexpr (enablePrintingArrays) printArray(NPOT, c, true); - printCmpResult(NPOT, b, c); + float average = 0.f; + for (const float& time : elapsedTimes) { + average += time; } + average /= elapsedTimes.size(); - // For bug-finding only: Array of 1s to help find bugs in stream compaction or scan - if constexpr (runDebugTests) { - printf("\n"); - printf("*************************\n"); - printf("** SCAN TESTS (ALL 1s) **\n"); - printf("*************************\n"); + std::cout << prefix << " Average scan() time: " << average << " " << std::endl; +} + +} // namespace Perf - onesArray(SIZE, a); +int* a = new int[sizePOT]; +int* b = new int[sizePOT]; +int* c = new int[sizePOT]; - zeroArray(SIZE, b); +int main(int argc, char* argv[]) { + if constexpr (runBenchmarks) { + printf("********************\n"); + printf("** SCAN BENCHMARK **\n"); + printf("********************\n"); + + Perf::runScanBenchmark(Perf::Implementation::CPU, sizePOT, "CPU/POT"); + Perf::runScanBenchmark(Perf::Implementation::CPU, sizeNPOT, "CPU/NPOT"); + } else { + printf("****************\n"); + printf("** SCAN TESTS **\n"); + printf("****************\n"); + + genArray(sizePOT - 1, a, 50); // Leave a 0 at the end to test that edge case + a[sizePOT - 1] = 0; + printArray(sizePOT, a, true); + + // initialize b using StreamCompaction::CPU::scan you implement + // We use b for further comparison. Make sure your StreamCompaction::CPU::scan is correct. + // At first all cases passed because b && c are all zeroes. + zeroArray(sizePOT, b); printDesc("cpu scan, power-of-two"); - StreamCompaction::CPU::scan(SIZE, b, a); + StreamCompaction::CPU::scan(sizePOT, b, a); printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - if constexpr (enablePrintingArrays) printArray(SIZE, b, true); + if constexpr (enablePrintingArrays) printArray(sizePOT, b, true); if constexpr (enableCPUScan) { - zeroArray(SIZE, c); + zeroArray(sizePOT, c); printDesc("cpu scan, non-power-of-two"); - StreamCompaction::CPU::scan(NPOT, c, a); + StreamCompaction::CPU::scan(sizeNPOT, c, a); printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - if constexpr (enablePrintingArrays) printArray(NPOT, c, true); - printCmpResult(NPOT, b, c); + if constexpr (enablePrintingArrays) printArray(sizeNPOT, c, true); + printCmpResult(sizeNPOT, b, c); } if constexpr (enableNaiveScan) { - zeroArray(SIZE, c); + zeroArray(sizePOT, c); printDesc("naive scan, power-of-two"); - StreamCompaction::Naive::scan(SIZE, c, a); + StreamCompaction::Naive::scan(sizePOT, c, a); printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - if constexpr (enablePrintingArrays) printArray(SIZE, c, true); - printCmpResult(SIZE, b, c); + if constexpr (enablePrintingArrays) printArray(sizePOT, c, true); + printCmpResult(sizePOT, b, c); - zeroArray(SIZE, c); + zeroArray(sizePOT, c); printDesc("naive scan, non-power-of-two"); - StreamCompaction::Naive::scan(NPOT, c, a); + StreamCompaction::Naive::scan(sizeNPOT, c, a); printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - if constexpr (enablePrintingArrays) printArray(SIZE, c, true); - printCmpResult(NPOT, b, c); + if constexpr (enablePrintingArrays) printArray(sizePOT, c, true); + printCmpResult(sizeNPOT, b, c); } if constexpr (enableEfficientScan) { - zeroArray(SIZE, c); + zeroArray(sizePOT, c); printDesc("work-efficient scan, power-of-two"); - StreamCompaction::Efficient::scan(SIZE, c, a); + StreamCompaction::Efficient::scan(sizePOT, c, a); printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - if constexpr (enablePrintingArrays) printArray(SIZE, c, true); - printCmpResult(SIZE, b, c); + if constexpr (enablePrintingArrays) printArray(sizePOT, c, true); + printCmpResult(sizePOT, b, c); - zeroArray(SIZE, c); + zeroArray(sizePOT, c); printDesc("work-efficient scan, non-power-of-two"); - StreamCompaction::Efficient::scan(NPOT, c, a); + StreamCompaction::Efficient::scan(sizeNPOT, c, a); printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - if constexpr (enablePrintingArrays) printArray(NPOT, c, true); - printCmpResult(NPOT, b, c); + if constexpr (enablePrintingArrays) printArray(sizeNPOT, c, true); + printCmpResult(sizeNPOT, b, c); + } + + if constexpr (enableThrustScan) { + zeroArray(sizePOT, c); + printDesc("thrust scan, power-of-two"); + StreamCompaction::Thrust::scan(sizePOT, c, a); + printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + if constexpr (enablePrintingArrays) printArray(sizePOT, c, true); + printCmpResult(sizePOT, b, c); + + zeroArray(sizePOT, c); + printDesc("thrust scan, non-power-of-two"); + StreamCompaction::Thrust::scan(sizeNPOT, c, a); + printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + if constexpr (enablePrintingArrays) printArray(sizeNPOT, c, true); + printCmpResult(sizeNPOT, b, c); + } + + // For bug-finding only: Array of 1s to help find bugs in stream compaction or scan + if constexpr (runDebugTests) { + printf("\n"); + printf("*************************\n"); + printf("** SCAN TESTS (ALL 1s) **\n"); + printf("*************************\n"); + + onesArray(sizePOT, a); + + zeroArray(sizePOT, b); + printDesc("cpu scan, power-of-two"); + StreamCompaction::CPU::scan(sizePOT, b, a); + printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), + "(std::chrono Measured)"); + if constexpr (enablePrintingArrays) printArray(sizePOT, b, true); + + if constexpr (enableCPUScan) { + zeroArray(sizePOT, c); + printDesc("cpu scan, non-power-of-two"); + StreamCompaction::CPU::scan(sizeNPOT, c, a); + printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), + "(std::chrono Measured)"); + if constexpr (enablePrintingArrays) printArray(sizeNPOT, c, true); + printCmpResult(sizeNPOT, b, c); + } + + if constexpr (enableNaiveScan) { + zeroArray(sizePOT, c); + printDesc("naive scan, power-of-two"); + StreamCompaction::Naive::scan(sizePOT, c, a); + printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + if constexpr (enablePrintingArrays) printArray(sizePOT, c, true); + printCmpResult(sizePOT, b, c); + + zeroArray(sizePOT, c); + printDesc("naive scan, non-power-of-two"); + StreamCompaction::Naive::scan(sizeNPOT, c, a); + printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + if constexpr (enablePrintingArrays) printArray(sizePOT, c, true); + printCmpResult(sizeNPOT, b, c); + } + + if constexpr (enableEfficientScan) { + zeroArray(sizePOT, c); + printDesc("work-efficient scan, power-of-two"); + StreamCompaction::Efficient::scan(sizePOT, c, a); + printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), + "(CUDA Measured)"); + if constexpr (enablePrintingArrays) printArray(sizePOT, c, true); + printCmpResult(sizePOT, b, c); + + zeroArray(sizePOT, c); + printDesc("work-efficient scan, non-power-of-two"); + StreamCompaction::Efficient::scan(sizeNPOT, c, a); + printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), + "(CUDA Measured)"); + if constexpr (enablePrintingArrays) printArray(sizeNPOT, c, true); + printCmpResult(sizeNPOT, b, c); + } } } - printf("\n"); - printf("*****************************\n"); - printf("** STREAM COMPACTION TESTS **\n"); - printf("*****************************\n"); - - // Compaction tests - - genArray(SIZE - 1, a, 4); // Leave a 0 at the end to test that edge case - a[SIZE - 1] = 0; - printArray(SIZE, a, true); - - int count, expectedCount, expectedNPOT; - - // initialize b using StreamCompaction::CPU::compactWithoutScan you implement - // We use b for further comparison. Make sure your StreamCompaction::CPU::compactWithoutScan is correct. - zeroArray(SIZE, b); - printDesc("cpu compact without scan, power-of-two"); - count = StreamCompaction::CPU::compactWithoutScan(SIZE, b, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - expectedCount = count; - if constexpr (enablePrintingArrays) printArray(count, b, true); - printCmpLenResult(count, expectedCount, b, b); - - zeroArray(SIZE, c); - printDesc("cpu compact without scan, non-power-of-two"); - count = StreamCompaction::CPU::compactWithoutScan(NPOT, c, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - expectedNPOT = count; - if constexpr (enablePrintingArrays) printArray(count, c, true); - printCmpLenResult(count, expectedNPOT, b, c); - - if constexpr (enableCPUCompactWith) { - zeroArray(SIZE, b); - printDesc("cpu compact with scan, power-of-two"); - count = StreamCompaction::CPU::compactWithScan(SIZE, b, a); + if constexpr (runBenchmarks) { + } else { + printf("\n"); + printf("*****************************\n"); + printf("** STREAM COMPACTION TESTS **\n"); + printf("*****************************\n"); + + genArray(sizePOT - 1, a, 4); // Leave a 0 at the end to test that edge case + a[sizePOT - 1] = 0; + printArray(sizePOT, a, true); + + int count, expectedCount, expectedNPOT; + + // initialize b using StreamCompaction::CPU::compactWithoutScan you implement + // We use b for further comparison. Make sure your StreamCompaction::CPU::compactWithoutScan is correct. + zeroArray(sizePOT, b); + printDesc("cpu compact without scan, power-of-two"); + count = StreamCompaction::CPU::compactWithoutScan(sizePOT, b, a); printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); expectedCount = count; if constexpr (enablePrintingArrays) printArray(count, b, true); printCmpLenResult(count, expectedCount, b, b); - zeroArray(SIZE, c); - printDesc("cpu compact with scan, non-power-of-two"); - count = StreamCompaction::CPU::compactWithScan(NPOT, c, a); + zeroArray(sizePOT, c); + printDesc("cpu compact without scan, non-power-of-two"); + count = StreamCompaction::CPU::compactWithoutScan(sizeNPOT, c, a); printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); expectedNPOT = count; if constexpr (enablePrintingArrays) printArray(count, c, true); printCmpLenResult(count, expectedNPOT, b, c); - } - if constexpr (enableEfficientCompact) { - zeroArray(SIZE, c); - printDesc("work-efficient compact, power-of-two"); - count = StreamCompaction::Efficient::compact(SIZE, c, a); - printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - if constexpr (enablePrintingArrays) printArray(count, c, true); - printCmpLenResult(count, expectedCount, b, c); + if constexpr (enableCPUCompactWith) { + zeroArray(sizePOT, b); + printDesc("cpu compact with scan, power-of-two"); + count = StreamCompaction::CPU::compactWithScan(sizePOT, b, a); + printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), + "(std::chrono Measured)"); + expectedCount = count; + if constexpr (enablePrintingArrays) printArray(count, b, true); + printCmpLenResult(count, expectedCount, b, b); - zeroArray(SIZE, c); - printDesc("work-efficient compact, non-power-of-two"); - count = StreamCompaction::Efficient::compact(NPOT, c, a); - printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - if constexpr (enablePrintingArrays) printArray(count, c, true); - printCmpLenResult(count, expectedNPOT, b, c); - } + zeroArray(sizePOT, c); + printDesc("cpu compact with scan, non-power-of-two"); + count = StreamCompaction::CPU::compactWithScan(sizeNPOT, c, a); + printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), + "(std::chrono Measured)"); + expectedNPOT = count; + if constexpr (enablePrintingArrays) printArray(count, c, true); + printCmpLenResult(count, expectedNPOT, b, c); + } - if constexpr (enableThrustCompact) { - zeroArray(SIZE, c); - printDesc("thrust compact, power-of-two"); - count = StreamCompaction::Thrust::compact(SIZE, c, a); - printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - if constexpr (enablePrintingArrays) printArray(count, c, true); - printCmpLenResult(count, expectedCount, b, c); + if constexpr (enableEfficientCompact) { + zeroArray(sizePOT, c); + printDesc("work-efficient compact, power-of-two"); + count = StreamCompaction::Efficient::compact(sizePOT, c, a); + printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + if constexpr (enablePrintingArrays) printArray(count, c, true); + printCmpLenResult(count, expectedCount, b, c); - zeroArray(SIZE, c); - printDesc("thrust compact, non-power-of-two"); - count = StreamCompaction::Thrust::compact(NPOT, c, a); - printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - if constexpr (enablePrintingArrays) printArray(count, c, true); - printCmpLenResult(count, expectedNPOT, b, c); + zeroArray(sizePOT, c); + printDesc("work-efficient compact, non-power-of-two"); + count = StreamCompaction::Efficient::compact(sizeNPOT, c, a); + printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + if constexpr (enablePrintingArrays) printArray(count, c, true); + printCmpLenResult(count, expectedNPOT, b, c); + } + + if constexpr (enableThrustCompact) { + zeroArray(sizePOT, c); + printDesc("thrust compact, power-of-two"); + count = StreamCompaction::Thrust::compact(sizePOT, c, a); + printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + if constexpr (enablePrintingArrays) printArray(count, c, true); + printCmpLenResult(count, expectedCount, b, c); + + zeroArray(sizePOT, c); + printDesc("thrust compact, non-power-of-two"); + count = StreamCompaction::Thrust::compact(sizeNPOT, c, a); + printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + if constexpr (enablePrintingArrays) printArray(count, c, true); + printCmpLenResult(count, expectedNPOT, b, c); + } } delete[] a; From f1d399e1aa4639d30ec48aa42f1d4508902e887e Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 20:07:21 -0400 Subject: [PATCH 35/46] add `getCompactionImplementation` --- src/main.cpp | 21 +++++++++++++++++---- stream_compaction/cpu.cu | 4 ++++ stream_compaction/cpu.h | 3 +++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index c9e7104a..2d93f1d4 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -39,7 +39,7 @@ using CompactionFn = std::function; enum class Implementation { CPU, Naive, Efficient, Thrust }; -constexpr int numIterations = 1'000; +constexpr int numIterations = 100'000; constexpr int maxValue = 50; std::pair getScanImplementation(Implementation implementation) { @@ -48,16 +48,29 @@ std::pair getScanImplementation(Implementation implementation) switch (implementation) { case Implementation::CPU: return std::make_pair(CPU::scan, CPU::timer); - case Implementation::Naive: return std::make_pair(Naive::scan, Naive::timer); - case Implementation::Efficient: return std::make_pair(Efficient::scan, Efficient::timer); - case Implementation::Thrust: return std::make_pair(Thrust::scan, Thrust::timer); + default: + throw std::invalid_argument("invalid enum"); + } +} +std::pair getCompactionImplementation(Implementation implementation) { + using namespace StreamCompaction; + + switch (implementation) { + case Implementation::CPU: + return std::make_pair(CPU::compact, CPU::timer); + case Implementation::Efficient: + return std::make_pair(Efficient::compact, Efficient::timer); + case Implementation::Thrust: + return std::make_pair(Thrust::compact, Thrust::timer); + case Implementation::Naive: + throw std::invalid_argument("naive does not have compact"); default: throw std::invalid_argument("invalid enum"); } diff --git a/stream_compaction/cpu.cu b/stream_compaction/cpu.cu index ce57206d..cfcf27ad 100644 --- a/stream_compaction/cpu.cu +++ b/stream_compaction/cpu.cu @@ -39,6 +39,10 @@ void scan(int n, int* odata, const int* idata) { timer().endCpuTimer(); } +int compact(int n, int* odata, const int* idata) { + return compactWithScan(n, odata, idata); +} + /** * CPU stream compaction without using the scan function. * diff --git a/stream_compaction/cpu.h b/stream_compaction/cpu.h index 42635cbb..322f3c92 100644 --- a/stream_compaction/cpu.h +++ b/stream_compaction/cpu.h @@ -9,6 +9,9 @@ StreamCompaction::Common::PerformanceTimer& timer(); void scan(int n, int* odata, const int* idata); +/// By default, `compact` will use `scan` because that is also true for `Efficient::compact`. +int compact(int n, int* odata, const int* idata); + int compactWithoutScan(int n, int* odata, const int* idata); int compactWithScan(int n, int* odata, const int* idata); From 7a96acc7295ad5ebb0032c22e616618e96ddf013 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 20:27:31 -0400 Subject: [PATCH 36/46] bind different timers --- src/main.cpp | 73 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 31 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 2d93f1d4..32a7d646 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -24,53 +24,43 @@ constexpr bool enablePrintingArrays = false; constexpr bool enableCPUScan = false; constexpr bool enableNaiveScan = true; -constexpr bool enableEfficientScan = true; +constexpr bool enableEfficientScan = false; constexpr bool enableThrustScan = false; -constexpr bool enableCPUCompactWith = false; -constexpr bool enableEfficientCompact = true; +constexpr bool enableCPUCompact = false; +constexpr bool enableEfficientCompact = false; constexpr bool enableThrustCompact = false; namespace Perf { -using TimerFn = std::function; +using TimerFn = std::function; using ScanFn = std::function; using CompactionFn = std::function; enum class Implementation { CPU, Naive, Efficient, Thrust }; -constexpr int numIterations = 100'000; +constexpr int numIterations = 5'000; constexpr int maxValue = 50; std::pair getScanImplementation(Implementation implementation) { using namespace StreamCompaction; + auto cpu = &Common::PerformanceTimer::getCpuElapsedTimeForPreviousOperation; + auto gpu = &Common::PerformanceTimer::getGpuElapsedTimeForPreviousOperation; + switch (implementation) { case Implementation::CPU: - return std::make_pair(CPU::scan, CPU::timer); - case Implementation::Naive: - return std::make_pair(Naive::scan, Naive::timer); - case Implementation::Efficient: - return std::make_pair(Efficient::scan, Efficient::timer); - case Implementation::Thrust: - return std::make_pair(Thrust::scan, Thrust::timer); - default: - throw std::invalid_argument("invalid enum"); - } -} + return std::make_pair(CPU::scan, std::bind(cpu, &CPU::timer())); -std::pair getCompactionImplementation(Implementation implementation) { - using namespace StreamCompaction; + case Implementation::Naive: + return std::make_pair(Naive::scan, std::bind(gpu, &Naive::timer())); - switch (implementation) { - case Implementation::CPU: - return std::make_pair(CPU::compact, CPU::timer); case Implementation::Efficient: - return std::make_pair(Efficient::compact, Efficient::timer); + return std::make_pair(Efficient::scan, std::bind(gpu, &Efficient::timer())); + case Implementation::Thrust: - return std::make_pair(Thrust::compact, Thrust::timer); - case Implementation::Naive: - throw std::invalid_argument("naive does not have compact"); + return std::make_pair(Thrust::scan, std::bind(gpu, &Thrust::timer())); + default: throw std::invalid_argument("invalid enum"); } @@ -78,7 +68,7 @@ std::pair getCompactionImplementation(Implementation impl void runScanBenchmark(Implementation implementation, int n, std::string_view benchmarkName) { std::string prefix = "[" + std::string(benchmarkName) + "]"; - const auto [scan, timer] = getScanImplementation(implementation); + const auto [scan, getTime] = getScanImplementation(implementation); std::vector elapsedTimes; std::array out; @@ -89,7 +79,7 @@ void runScanBenchmark(Implementation implementation, int n, std::string_view ben std::cout << prefix << " Executing scan(): " << i << " of " << numIterations << "...\r"; scan(n, out.data(), in.data()); - elapsedTimes.push_back(timer().getCpuElapsedTimeForPreviousOperation()); + elapsedTimes.push_back(getTime()); } float average = 0.f; @@ -111,10 +101,31 @@ int main(int argc, char* argv[]) { if constexpr (runBenchmarks) { printf("********************\n"); printf("** SCAN BENCHMARK **\n"); - printf("********************\n"); + printf("********************\n\n"); + + std::cout << "- Number of iterations: " << Perf::numIterations << std::endl; + std::cout << "- Size of POT array: " << sizePOT << std::endl; + std::cout << "- Size of NPOT array: " << sizeNPOT << "\n" << std::endl; + + if constexpr (enableCPUScan) { + Perf::runScanBenchmark(Perf::Implementation::CPU, sizePOT, "CPU/POT"); + Perf::runScanBenchmark(Perf::Implementation::CPU, sizeNPOT, "CPU/NPOT"); + } + + if constexpr (enableNaiveScan) { + Perf::runScanBenchmark(Perf::Implementation::Naive, sizePOT, "Naive/POT"); + Perf::runScanBenchmark(Perf::Implementation::Naive, sizeNPOT, "Naive/NPOT"); + } - Perf::runScanBenchmark(Perf::Implementation::CPU, sizePOT, "CPU/POT"); - Perf::runScanBenchmark(Perf::Implementation::CPU, sizeNPOT, "CPU/NPOT"); + if constexpr (enableEfficientScan) { + Perf::runScanBenchmark(Perf::Implementation::Efficient, sizePOT, "Efficient/POT"); + Perf::runScanBenchmark(Perf::Implementation::Efficient, sizeNPOT, "Efficient/NPOT"); + } + + if constexpr (enableThrustScan) { + Perf::runScanBenchmark(Perf::Implementation::Thrust, sizePOT, "Thrust/POT"); + Perf::runScanBenchmark(Perf::Implementation::Thrust, sizeNPOT, "Thrust/NPOT"); + } } else { printf("****************\n"); printf("** SCAN TESTS **\n"); @@ -284,7 +295,7 @@ int main(int argc, char* argv[]) { if constexpr (enablePrintingArrays) printArray(count, c, true); printCmpLenResult(count, expectedNPOT, b, c); - if constexpr (enableCPUCompactWith) { + if constexpr (enableCPUCompact) { zeroArray(sizePOT, b); printDesc("cpu compact with scan, power-of-two"); count = StreamCompaction::CPU::compactWithScan(sizePOT, b, a); From 50d3815895e0787268bb0053839868a7a8208903 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 20:40:53 -0400 Subject: [PATCH 37/46] optimal naive block size --- src/main.cpp | 4 ++-- stream_compaction/naive.cu | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 32a7d646..47148844 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -10,7 +10,7 @@ #include #include -constexpr int sizePOT = 1 << 16; // feel free to change the size of array +constexpr int sizePOT = 1 << 12; // feel free to change the size of array constexpr int sizeNPOT = sizePOT - 3; // Non-Power-Of-Two /// If true, run additional simpler tests. @@ -39,7 +39,7 @@ using CompactionFn = std::function; enum class Implementation { CPU, Naive, Efficient, Thrust }; -constexpr int numIterations = 5'000; +constexpr int numIterations = 10'000; constexpr int maxValue = 50; std::pair getScanImplementation(Implementation implementation) { diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index 4495ca89..477b447b 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -10,7 +10,7 @@ namespace Naive { using StreamCompaction::Common::PerformanceTimer; /// Number of threads per block. -constexpr int blockSize = 128; +constexpr int blockSize = 64; /// Whether to return an inclusive or exclusive scan. constexpr bool useExclusiveScan = true; From 9793725b17d939d85d407540597eda521727757d Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 20:43:55 -0400 Subject: [PATCH 38/46] optimal efficient block size --- src/main.cpp | 4 ++-- stream_compaction/efficient.cu | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 47148844..d7c9fffb 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -23,8 +23,8 @@ constexpr bool runBenchmarks = true; constexpr bool enablePrintingArrays = false; constexpr bool enableCPUScan = false; -constexpr bool enableNaiveScan = true; -constexpr bool enableEfficientScan = false; +constexpr bool enableNaiveScan = false; +constexpr bool enableEfficientScan = true; constexpr bool enableThrustScan = false; constexpr bool enableCPUCompact = false; diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index 6987ee9c..a39a777f 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -20,7 +20,7 @@ bool enableScanMeasure = true; using StreamCompaction::Common::PerformanceTimer; /// Number of threads per block. -constexpr int blockSize = 256; +constexpr int blockSize = 512; /// Enable `checkCUDAError()` calls within the performance measuring fence. constexpr bool checkErrorsDuringTimer = true; From 4cc48f36eb65eda74f292a121a2307da9ef9fa61 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 21:08:36 -0400 Subject: [PATCH 39/46] benchmark: 2^4 --- src/main.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index d7c9fffb..6633ab74 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -10,7 +10,7 @@ #include #include -constexpr int sizePOT = 1 << 12; // feel free to change the size of array +constexpr int sizePOT = 1 << 4; // feel free to change the size of array constexpr int sizeNPOT = sizePOT - 3; // Non-Power-Of-Two /// If true, run additional simpler tests. @@ -22,10 +22,10 @@ constexpr bool runBenchmarks = true; /// Print out resulting arrays from computation. constexpr bool enablePrintingArrays = false; -constexpr bool enableCPUScan = false; -constexpr bool enableNaiveScan = false; +constexpr bool enableCPUScan = true; +constexpr bool enableNaiveScan = true; constexpr bool enableEfficientScan = true; -constexpr bool enableThrustScan = false; +constexpr bool enableThrustScan = true; constexpr bool enableCPUCompact = false; constexpr bool enableEfficientCompact = false; @@ -39,7 +39,7 @@ using CompactionFn = std::function; enum class Implementation { CPU, Naive, Efficient, Thrust }; -constexpr int numIterations = 10'000; +constexpr int numIterations = 50'000; constexpr int maxValue = 50; std::pair getScanImplementation(Implementation implementation) { From b7aff7337df25a76c3f0d4da2c9fab09f10bfeaa Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 21:15:10 -0400 Subject: [PATCH 40/46] benchmark: 2^8 --- src/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.cpp b/src/main.cpp index 6633ab74..4b66253b 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -10,7 +10,7 @@ #include #include -constexpr int sizePOT = 1 << 4; // feel free to change the size of array +constexpr int sizePOT = 1 << 8; // feel free to change the size of array constexpr int sizeNPOT = sizePOT - 3; // Non-Power-Of-Two /// If true, run additional simpler tests. From 2dcf705639102566c612bd5b62e9a34f38ed35e4 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 21:15:18 -0400 Subject: [PATCH 41/46] benchmark: 2^12 --- src/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.cpp b/src/main.cpp index 4b66253b..953c1bd0 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -10,7 +10,7 @@ #include #include -constexpr int sizePOT = 1 << 8; // feel free to change the size of array +constexpr int sizePOT = 1 << 12; // feel free to change the size of array constexpr int sizeNPOT = sizePOT - 3; // Non-Power-Of-Two /// If true, run additional simpler tests. From b44a7fdd3f0b95ba7791ef00b5baa252b6505fc0 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 21:39:05 -0400 Subject: [PATCH 42/46] benchmark: fix stack overflow --- README.md | 4 ++++ src/main.cpp | 12 ++++++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 00c0d58c..b03c861b 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,10 @@ Below I demonstrate how my algorithms performed in benchmarking tests. ### Testing methodology +Fun fact: I originally was testing with *much* smaller array sizes, like $2^4$ and $2^{12}$. When I tried increasing the array size past $2^{18}$, the program would completely crash. I was really confused why at first, until I looked at the exception being thrown: *stack overflow*. + +It turns out that because I was using `std::array` for my input and output arrays, I was allocating too much stack memory and literally ran out. Switching to heap allocation solved the issue. + ### Graphs ### Analysis diff --git a/src/main.cpp b/src/main.cpp index 953c1bd0..594eb4c0 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -10,7 +10,7 @@ #include #include -constexpr int sizePOT = 1 << 12; // feel free to change the size of array +constexpr int sizePOT = 1 << 18; // feel free to change the size of array constexpr int sizeNPOT = sizePOT - 3; // Non-Power-Of-Two /// If true, run additional simpler tests. @@ -39,7 +39,7 @@ using CompactionFn = std::function; enum class Implementation { CPU, Naive, Efficient, Thrust }; -constexpr int numIterations = 50'000; +constexpr int numIterations = 10; constexpr int maxValue = 50; std::pair getScanImplementation(Implementation implementation) { @@ -71,14 +71,14 @@ void runScanBenchmark(Implementation implementation, int n, std::string_view ben const auto [scan, getTime] = getScanImplementation(implementation); std::vector elapsedTimes; - std::array out; - std::array in; - genArray(sizePOT, in.data(), maxValue); + std::unique_ptr out = std::make_unique(sizePOT); + std::unique_ptr in = std::make_unique(sizePOT); + genArray(sizePOT, in.get(), maxValue); for (int i = 1; i <= numIterations; ++i) { std::cout << prefix << " Executing scan(): " << i << " of " << numIterations << "...\r"; - scan(n, out.data(), in.data()); + scan(n, out.get(), in.get()); elapsedTimes.push_back(getTime()); } From d37d06397b6f6a06b52cc091c05ed8d15b95b0f4 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 22:19:04 -0400 Subject: [PATCH 43/46] finish benchmarking --- README.md | 35 ++++++++++++++++++++++++++++++++--- src/main.cpp | 2 +- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index b03c861b..85a10065 100644 --- a/README.md +++ b/README.md @@ -27,12 +27,41 @@ Below I demonstrate how my algorithms performed in benchmarking tests. ## Performance benchmarks -### Testing methodology +### Methodology -Fun fact: I originally was testing with *much* smaller array sizes, like $2^4$ and $2^{12}$. When I tried increasing the array size past $2^{18}$, the program would completely crash. I was really confused why at first, until I looked at the exception being thrown: *stack overflow*. +For instance, this is what the output looks like for a benchmark where I'm running 10 iterations for each algorithm, on an array size of $2^{30}$: -It turns out that because I was using `std::array` for my input and output arrays, I was allocating too much stack memory and literally ran out. Switching to heap allocation solved the issue. +``` +******************** +** SCAN BENCHMARK ** +******************** + +- Number of iterations: 10 +- Size of POT array: 1073741824 +- Size of NPOT array: 1073741821 + +[CPU/POT] Average scan() time: 268.988 +[CPU/NPOT] Average scan() time: 287.848 +[Naive/POT] Average scan() time: 1381.91 +[Naive/NPOT] Average scan() time: 1382.35 +[Efficient/POT] Average scan() time: 233.941 +[Efficient/NPOT] Average scan() time: 239.228 +[Thrust/POT] Average scan() time: 23.0701 +[Thrust/NPOT] Executing scan(): 6 of 10... +``` ### Graphs ### Analysis + +### Miscellaneous: powers of scale + +Just wanted to share some other fun stuff I encountered while testing. + +I originally was testing with *much* smaller array sizes, like $2^4$ and $2^{12}$. When I tried increasing the array size past $2^{18}$, the program would instantly crash. I was really confused why at first, until I looked at the exception being thrown: *stack overflow*. Because I was using `std::array` for my input and output arrays, I was allocating too much stack memory and literally ran out. Switching to heap allocation solved the issue. + +I then tried testing with array sizes from $2^{18}$ to $2^{30}$, incrementing by 4. This turned out to not be helpful at all; my numbers ranged from 0.068ms using CPU and $2^{18}$ to 1380.23ms using naive and $2^{30}$. Furthermore, my naive at $2^{26}$ ran in 75ms, so there was a ~18× difference between two adjacent data points. This would have translated to a *horrible* graph, so I adjusted the numbers to what I have now. + +Both of these experiences really left me with a newfound appreciation for exponents and the powers of two. It's *scary* how fast numbers can scale. + +## Test output \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 594eb4c0..137e4baa 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -10,7 +10,7 @@ #include #include -constexpr int sizePOT = 1 << 18; // feel free to change the size of array +constexpr int sizePOT = 1 << 30; // feel free to change the size of array constexpr int sizeNPOT = sizePOT - 3; // Non-Power-Of-Two /// If true, run additional simpler tests. From 682a0c483b6a18723979bc6c7c17ba261e90e659 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 22:21:07 -0400 Subject: [PATCH 44/46] add test output to readme --- README.md | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++- src/main.cpp | 10 ++++----- 2 files changed, 67 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 85a10065..bb277dac 100644 --- a/README.md +++ b/README.md @@ -64,4 +64,65 @@ I then tried testing with array sizes from $2^{18}$ to $2^{30}$, incrementing by Both of these experiences really left me with a newfound appreciation for exponents and the powers of two. It's *scary* how fast numbers can scale. -## Test output \ No newline at end of file +## Test output + +This is the complete output for my tests. I used an array size of $2^{16}$ here. + +``` +**************** +** SCAN TESTS ** +**************** + [ 29 10 21 39 47 19 41 42 5 25 49 34 4 ... 32 0 ] +==== cpu scan, power-of-two ==== + elapsed time: 0.0168ms (std::chrono Measured) +==== cpu scan, non-power-of-two ==== + elapsed time: 0.0162ms (std::chrono Measured) + passed +==== naive scan, power-of-two ==== + elapsed time: 0.246016ms (CUDA Measured) + passed +==== naive scan, non-power-of-two ==== + elapsed time: 0.313984ms (CUDA Measured) + passed +==== work-efficient scan, power-of-two ==== + elapsed time: 0.806944ms (CUDA Measured) + passed +==== work-efficient scan, non-power-of-two ==== + elapsed time: 0.3496ms (CUDA Measured) + passed +==== thrust scan, power-of-two ==== + elapsed time: 0.088352ms (CUDA Measured) + passed +==== thrust scan, non-power-of-two ==== + elapsed time: 0.09392ms (CUDA Measured) + passed + +***************************** +** STREAM COMPACTION TESTS ** +***************************** + [ 3 2 1 3 3 1 1 0 1 3 1 2 0 ... 0 0 ] +==== cpu compact without scan, power-of-two ==== + elapsed time: 0.1223ms (std::chrono Measured) + passed +==== cpu compact without scan, non-power-of-two ==== + elapsed time: 0.0794ms (std::chrono Measured) + passed +==== cpu compact with scan, power-of-two ==== + elapsed time: 0.116ms (std::chrono Measured) + passed +==== cpu compact with scan, non-power-of-two ==== + elapsed time: 0.1174ms (std::chrono Measured) + passed +==== work-efficient compact, power-of-two ==== + elapsed time: 0.771552ms (CUDA Measured) + passed +==== work-efficient compact, non-power-of-two ==== + elapsed time: 0.634944ms (CUDA Measured) + passed +==== thrust compact, power-of-two ==== + elapsed time: 0.136608ms (CUDA Measured) + passed +==== thrust compact, non-power-of-two ==== + elapsed time: 0.241568ms (CUDA Measured) + passed +``` \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 137e4baa..5c95b04e 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -10,14 +10,14 @@ #include #include -constexpr int sizePOT = 1 << 30; // feel free to change the size of array +constexpr int sizePOT = 1 << 16; // feel free to change the size of array constexpr int sizeNPOT = sizePOT - 3; // Non-Power-Of-Two /// If true, run additional simpler tests. constexpr bool runDebugTests = false; /// Run benchmarks instead of tests. -constexpr bool runBenchmarks = true; +constexpr bool runBenchmarks = false; /// Print out resulting arrays from computation. constexpr bool enablePrintingArrays = false; @@ -27,9 +27,9 @@ constexpr bool enableNaiveScan = true; constexpr bool enableEfficientScan = true; constexpr bool enableThrustScan = true; -constexpr bool enableCPUCompact = false; -constexpr bool enableEfficientCompact = false; -constexpr bool enableThrustCompact = false; +constexpr bool enableCPUCompact = true; +constexpr bool enableEfficientCompact = true; +constexpr bool enableThrustCompact = true; namespace Perf { From 8f006089ae1a989b1ccb8e1bbc79fe87e629bf24 Mon Sep 17 00:00:00 2001 From: Charles Wang Date: Tue, 16 Sep 2025 22:51:50 -0400 Subject: [PATCH 45/46] add graphs and raw data --- README.md | 6 ++++++ analysis/graphs/scan_npot.png | Bin 0 -> 58673 bytes analysis/graphs/scan_pot.png | Bin 0 -> 56566 bytes analysis/scan_npot.csv | 4 ++++ analysis/scan_pot.csv | 4 ++++ 5 files changed, 14 insertions(+) create mode 100644 analysis/graphs/scan_npot.png create mode 100644 analysis/graphs/scan_pot.png create mode 100644 analysis/scan_npot.csv create mode 100644 analysis/scan_pot.csv diff --git a/README.md b/README.md index bb277dac..7f545a3e 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,12 @@ For instance, this is what the output looks like for a benchmark where I'm runni ### Graphs +These are my graphs. The left column has array sizes that are powers of two (POT), while the right column subtract 3 from the sizes, therefore making them not powers of two (NPOT). + +|Powers of two|Not powers of two| +|:-:|:-:| +|![](analysis/graphs/scan_pot.png)|![](analysis/graphs/scan_npot.png)| + ### Analysis ### Miscellaneous: powers of scale diff --git a/analysis/graphs/scan_npot.png b/analysis/graphs/scan_npot.png new file mode 100644 index 0000000000000000000000000000000000000000..95db0859fd593ff0e6ebf4b1c0cf2ecd8313d5a8 GIT binary patch literal 58673 zcmeFZ`#;lv_&>gSbv~;cijZEFvy?;55h=&ylyi}gOy<;vt)oJy7;+4m^Ooam%<5IB zO%{vA911fGdl@q`+dfad-tQm2f5PYX{XB0sH&4&)>G^nEkH>Xg_v?OL*E8v=gZ18B zM|XihpuJZvTV4Z!wxxkUTMq2l23#2qtqcWz?Ywu{BN7DKeQ4un6DT)d2DrE>@|v{; z2;Cz$1N^Zi$lTr>1gcBly?%2m2sGJw#nRm6{-!x$!ixfTxWaw;cAb&KDjD^$c4uSk zc4=@GV>(OvE*ri&rHFgp8u0V4*POge!g*g4=nb+7R<8;nx^XciVoVr8TwJv^*Sn7Bj^138>i)cqFp=ED(#uJF+{6iGp&7>e# zs85{cCeWEZ|82+Ht}dJrxL`g*oJRWK?h8GsZR~VY_V+N=(QG7obVq6odvv7k5$}8P z#SluN3Gl5YNoHzqQ*dvIj--edGSy8JREuhgs&*u$9W&D7hf_|1K#ArlCqNG*4j>cC zf_EM=@0#teKIAMNhzlfV<$20HRUY&1#5Dn0iNvr8yh6XZPcJRih{Jc(7FyRL_X9%M z`)K3#f5%g_Mwi*c1w&_Abv(b6$dSJ*i-f-(?}yhIeW>_3N<<*I`jg4?sDG6QBtBlN z(T@%98kS({^vIq&y?Jn=E8U%4>greiE4Y2sFJd^TCx7O=qg<$5+;mLA0K)qhZVTwc zndz#1pc`3MmnCX%TDWY!6nr?!U0*JcpE!*MvquTXvXcfb1(wCQEXT+$J@0joag`p4 zvybjRv3BU@>Zko?-URl^3mz3EcOmyC4Yz`B=&3t^K;2ct(az8PvYCD_t$)4pYCqFb zA75(G>=k_2zmSu?j{mV41iDfD-SrsggSu)fcvmken71(E8XNNaskCnn#QkDj>W;Tc zJCe|Y_@?}sAW`r!$)zc`Yue%J=WGJbAp@lQTT!8h1PVTbCc4oL4t0PRO*UNr(t+jC(J^2RImbs6Qw}5Co95Rq~NJX0_jV{Gql zbV#zezd8iRG42;P;7x0b+`Vv5l~c}#NBAszEsXr&;j&p^)oKm||T~E5UrAH_3D{+`@ zoyoF%%C%?B{=IVC(?77+BJ^cK8q|L{F9$=5JVzggeW^X}Ulh6f<0)-lw}Hy)&o_(4 zf{)YY3)8q8iW-Sm>r_qwGo%6!(5^!@@kw>x4NaKiOF6#H!TF)U%=}?+G!}WKMr#1s zRd;ltc{{WaPr^#+Qx_cLdZ2S4l1&E^{(%&oLXIlj}Z1gfSA5j9i|_CFnF)J0}+(lx#Cl|JpFNEV~t&QvZT9oKwjBvudiAx zEmDFOQz*}~a5>+FI}&XGBAHum(UMpnAS?d!?CkaD2kO7Rr-bL{o&CEKqZRw=4OV)o zv#~NJW$S_O=Emk=fE z7Sogo|NeBq(kg^iFYNJSL)50i4*Kjy`s~+@_Hl{nrOmIBqW2CFu1Ecqa}@8AYomWz z7**DP3ZJY>lkiE=O%iJG2qDCY21q*`&rqZFvY;xhm1G}Z^o>&8@ptU-mwJ(Mo++dr-$$ z;=$)So~)9+?)j?iYGEHO651UE1}`|OnAYZ{gZO!85A%_En2H)H&!Xa{3eOT{;HsJ zC2oe}(BP%)37XXr{WmjzMy=f`0hr3?=y-^}N!Um{GN3av;5b)lfEMZ=Lg?{Jm2;gMcvk3aXRUOdcU1IYpJ)B8M#*8;`gYm_wwV{PbqR^b zdlCkb*X6wEKlK@+&j!;!e4G>VOV(De621h4J#AjPhSu+i#%jL97htC?)YHs-sl7g) z8PogJ@W)t;e!VpNgzge!U)%hKdfP4spEr92>sgfK>0M~*n!hJhVIPFhodqXpWR6gy zWsT7ErT3$$Oz$Ik)taOlXIuxtM?MIPwjjsplitm=|Ib7>fM55cdQMGEg%;BM3O2dt z&C9L}M{ea8pJKGwN8GhXVi<~Zfm?ZROJ198_p|RhKqnvXi`sUX(V||b5N7V(O$|Si ziNrqMqpE>SH-Xjrg4r8kQB(77RWO?u||I zv*tsXpO>B-WnB38`2KAQMb@@Y&AkkDljgJNhEUF}(Dzp{%7l16ga(S}EwRdAkHxVG zKQQM9cI;e=nPr~LL@tEUZmA!lf%C`iTggZlyKE^VG+xUEOAK&f_R&V-h}7W(-bjDM zC01q4s8S{PClf2*gVYcYO`f^0S()vH5h#4L*7Q66xjQfQ&(``Q@%Qyjm#_myPIU#3 zORsCHwmv)Jzeh>6ycA;msHQTe_i(K%g!N*_3DScS_41iJPhYEjo48+c?VN%;Zhyp! z0{l^y^^-G!?*6?;%bXc-lO5Aoxosw95;2pzMo>$yY4n-3RF)1Y48V06T@qbIUXYH6 zF13Gp2X|=tixVY%84oJv;Ab*NN_CH{@HJ@q^pC?)N0-l&q0BoxNaWyM6Tmr57XeYi zjl}*gwd8-_N<-%|o26{d9dA}ZP6TfCb@@P3`yCnj4y<(M%SWdLDUIy`fz??zqnv5> zPh=wEBTSX%z6cZql1038w_P&US}xVKJJdSecFwc{?{zcN3VchOZ~F{WGEP35>J}5& z5dp513e0<)U759bH`(T2<4&h9K1Kj!>DJuOU++3;Ikto9nBB}tBNxjlK<8bb{Lhg>e}}A?h2_YzLely3cpyo13c7eG27U! zI6Lxp*2BPs+uBnzWp?A&Bh^ad30;OH&3#Kvl8Csy*R1X5VvkfzN!=xkV$MO~H=A*z zn?RTHHYBl)v1{3!k<1$m{(9Bzfi){xCND^a6k!cHCzn4taMk(P z{xVav(-vn!RGq6^<|SqA3=Iegd~gY>@|a?d-(Qnneu!z{d6N-+2}Gl$I8|VT;>NE&++tMWyi6r-0 z9N+xJ*Ei4#W~PyuSUG2?*?H<{*4dY&krFQTnCI!b!r-q5F>9;mM)rgnX9h=KQ=nDQ zB0Od%6uwR3(nB@fm#3DlZ7cdTZFjPtWfFTQLFc)pa!rYSc#Xs`i2 zWj&iu#?*RY9W;$(C0SID zO~-y)di*Zv`n1z|9)+&Ckrpsr&iH=D^Yo94PvKp<$wEIOTsTS0^K7y!KdboSdj)Oj zAPv@?;a6Zem__5cSl|Z_&_V}m{qx5Hy3$4t-M*mjp2>%{4+d{Bzm~awV0Vh$d+?L+j-c%K9vVa zLbXq?eKS9rd%QW{uSqVkvc<6p&4yLjeX{SOEouU)CXPPk40q}Nuw6xV?ar;LH^QGU zhZc#Xb=oO& z^NK-j_AX+DUH2wO4|9`#%|-u$!0U%3hXH)VZgJGEh3|PBMRsep+(}3ab&>YrBgxT2 zV&&HJj!wtpcq6fn2Je$V6>~8ftQg-}jn{^*>z&N7-&Rr@F-o8BgyPQ0mKc*1&!T_V zUu&Ct{oXlyV+M8Y9N#KGG6V0AAP-z)h_Z`smY+GlQtTWXU$!P==e+YTf4?{l?0ISX zO$z72^@;!sy$s&4&_n%*dDF|bxjVEV9UkCKXEOe3vknX$g}W?oU`SWn;vLfdeo8v3 zpX#n*6H-GYWHKj|tqk@C$i1$pXp84_CJmmuDJ>??Ga};0CA$uHxtk?kZyy`j>D22H zqawp%M<1(mWN;6XFVTV|Ku^3^>e^cu@S*kIrR#v*+-|(cKSR>$iSW2dXF(0_|9iuA zkWMkxZ`zU+iTo=!J|T0$f3Iqfqm~a2snkO1^4<2B`{1k^#cQD+-OdK44NY)5UX+~@ z&5Y9xFC9IqlBL36x|s6n*xK^0;0{_0G3pv(p##pis<%?Rry3T)nf&_IEg=xc_*XkU z+ihV&k$3LXIr;{qgX!!8eQ3<9cycUVyqrt}(0|wVh{f@a#UxV0SFW9l%b1RdsG!{5 z<$H{eA93kf5{dIQaLBhVoq?a=lGHF!KC#`-n&*NIn%?X<3pU(OXgj6)rYmk_?3!>@ z0JV2{s|OA*dELeorY}LKy5B6`e9l69$7Tj#{ENt;%~Z9tFr}Q;pN9T%jCd}qUEy(I z##*$u885=GF)6g7O@F9|3_v5ou;B=%ZvIqVs_#lji3JhHKsavst5%>Zu}>FW3imY{ z@hfY&4b#9j$Ehs~jJ=8Q1jjC;=an_BUO4+L(bSmy2tp=(Au6?J;E$sHiE48@v_{G3 z``f}aB#14Vt#)c&%pzd$yvuA$E48SjoRNdtF`bSZISmcqI>3dFe3<` zsy+ss;>edd+90K)FZYu-d{M}n(lLs0Vu-dMiv^v)XK%GM71>||(`KzM=9 zGlp8R9*6yXoQ{|jKzOi4{n#>TdV!i`W*yt9UusFHO0fiRv3uXf9XSyOe5h zG-`cy@oR{7bFWLM!v?Ll4Xd*W^kGlR;)@mbn7ED-;CL1=2 z(08?u5}@z>mCogTyS3V`y58tid-S~GbN;-|vkFksjlV^mrm1+7xD^)$HjcMYI7JB@ zy~UmFcMXJ;Pb|d7QB_s~#3Et8;3w~AhECCG#(qW# zj`~TVA4F(~MQLgyxsXB&s4HgO2Kdt$L@k3A+QX2UWJ9uQ>l0!q$;uLI zX+9Z@GhV#2-%v2Xzb?h$JL=dYt+AZJ1ME17r6wfJ@+pGrJ@0P(*q?vDQ0wsS8U7HN z7sWq1#q%SgSaC9-oTA3_n?XlAFi)1WoG-51#FV@?@M({S)*~&}N9qG@VF0aC?e;gd zZvP^gO$WEu4qa05%>1l3A>DZB_m!j3d}-$K6++&5berA!*%lG&*D!vn-`&qXb}S(_ zw_*!EgdhZm_Emqwgtg@^bT2MOX!ooI zTXMHvS*tefQkToZ8>jnlN@Gj*L~fdtVuUywrb4XmE@?Zpxv0v!q-NaU5TcazVYuHN zGYWY8*>JBTv1^60flD(2=+%+-^1SXT-eQF?7Eq>fU=hIpa1qbess$S;XE|jB z6`;-x!CzwbanBu_t#ulg(>COo9ICy&rmae0zi~0ua_%irC9-3Rx|WUEmWrIZnRJtt zpOd(0Ce*We{jM4lJQi7c$27rge^oYn#_8-F`zGiH+$$dh+O?>*yk5;(wYszZx@E?_ z@sMQ5%*X8tHKS$#t(U-E9IGBassvXV`+Fr6y?#fJr2853K8_!a;FEtf`)PL>JooFM zF%^Ivqro0~zfvYwep5;Dj(2!t;n#_l&$^!a+32F1*>B+JUMCow z@~*z7mpu6pHD_G7y8E=nm3g!8S^EjG(2^1 z?Q&Zt`ATP$)yg^XbqhICBT@|Wp1kI~R(%q3TH8@;B)S7qx6r0g*K-|RYk?ZG`PF7N zlJ)iB6~Z_+d)~@}@o*x*#7Peq-}<%7Z=a$9lQ_Q`c{0EYYsGJ4BrF_0)1ep1&qRK* zSGrE_k|9qDL-BADH-_4rj`KGN2N2#ZzUcWpGeMPIev|y$Vwl}a@N3VaX9iMdj)t^q zHNeT`ZABMHDLbhMUT7lCcZ#Cu;5jc}$JvPQtLLX6-??jQc%3_ajl_aU@B@6bN(M6}VIexMm*C)K z&~;%UtgEmI=|x}6W|mbN6>V3De~)3I+Z;-b@ZhCR<3-ihoI9#T-dTjU+-*|n0ikNN zW^k3ic&J&Hm7BJZ?(aupwW(8@BmH6ov;kLlXklKDEs^1j*214q$KYVR>yJmXkH?O= zAUgE?3DBYmeE8WfPZKGq+8#$O;E zRxVcqoffZRUOb;!Oir%mEWsGJ3bj=INhHjDhE09_KMggL!E~^?J6K=!IfMZ%JgwS7 z3Tt+~KEO=k{SJsfhcKFByJ?;;1Fn~9;GAXay&LCEVwK2)KAX5X zP-xCuLS)xuipmridSp%yV=yKmtJsn*ngqR<_fIq@mBY=L_342mroGH$`<}oKIA8q> z8OaLUwR^h(+P685<`Ww|49>V|Otcmn^G+yuVSj1-lk7lQuNe!vAG})%#xLAm8Z&0c z5a8g(Lby1yOOo8FwqnriqdBIxuRqE-JdR(uaO*pBz=im12_no?0KNYghz1{AdAts5 zRynV#raQg_J89{1J+|ub&Vp@;T51809HQXogGB@h0}cg&5B zX^T7=OkL(=a|0R9*!e8U2yI8_r+4|)Q?LsMx?}`YH`BnU4jwAhW?~pA8LM2*WdUnW zLWhhexSa2&4K@?x;El)Pz93!g19P&&tFNnhZAKCwQJ5#)gR_b2Y5byumBB^xOv#7P zqEd_cZ1l>SfkNPi{zgn7PA??1;}wlwdksfT_>t~_6yem3n($v}m#EyaN=edNMg}IY z<&t3m)tpTR3nUF)3-`mk33v7VWQF;Hn{p(3MVR=lIQvF1l{Z{5 zsz0(4hi^xa*gVvPA+>Bb^`==kfJ$h7_JPLz%z?LKpB`sz?1qihl#2a6v>fc194_n^ zn>QMVS)+%%pH8xQkzQN+BjmlfBuz}OyEk9U%@mbZTpA+F35D~-# zdV#$rYEIfzPmb=4V7Mp9+IRIf5{w_j)s11?X0W%v%;?VBAUD3t9Npy=>tC??B=h#*ZQbM=n7YmL?3yiTDzj8VDn)b_pCM4wjW3-4@L zsII;(pqF5@l7d@7S}#7^ttY{+UC-VA3cH6;xwK;Js1zq zwfz2CjWm(qY~^$Jbml`#Ov!7V6-I}&U(F+Q#T-l5d(OSwM*Nbj>_@i`q)KTU-SiHiJ>X+>%=t?m5+nJ)VQg~sUQzEk8I2X5J#U)+OU-lO zbDM+`l4uh?2DVK>ku2rf%|R^WkhfxrvAsA z(EJ2CrgaHlQloU3g&I!3&SsFat3y>)lOqEoT<)2!{0vA%);}g8o5Ccq++9N`mL}T~ zy!Sv?pT^6hjqxNF%FAF6#BYil8+WPcX}hxt8eDtB(6t*oA4(i5d{zo86uk0skBXaG zSxe{-{pqbrw9OqDnE_xFI}a=<=v$5#FGIXXdKmAEungX^B5gsDVfKRk+nl@~bp1XM zXMXs=!T6E=J~u}rTxgX_Xy>h$JSsgMSU+kV22>7Lg}1hk)TcBZKx#Stb`fnhu}J=z z4tv|bfdFip9T?fIm1JIY6rDXI63s+;6XJ>cm!NbqVfc0xrM5DD zzhl|PKCuF9xJxeHkoC9f5lG-NfXPprv7_%MIUqIJF*1))wf>cJdMGa|A1#3^CMW4v zv9ok&SnZ*;+AW%Dqj=YSjQWr}M}9ODz>*d?IhJG=chLdnin;Yd`&fyEdh?;*b}kk? z&nvDm)<0`gid#-a(mVs-M5z^3*o`#=Q6}|cEn3*>MK$SdS>pL$JbuH+V<4SCl(%Ff%YO- z$a9+k@W?*hd3WKG9(zUG?Ftn$OK+B**!>iYB;n5DMhooG(+iF9;443VbX=XYT{x@h zZpuW%V3NfS>o%?HWidp&(9^WDeTNf@;G(GaGp=n7=J?b#YMA#Je{Oy#{6f@ z{6x->FX8UFF{8Sk94&WvbE?R5+tq2kD5}=Kfa&mEC^c8T_l&#Va|xJ! zbx}~T7KvTz+xoZKykt^f$MqM-U(#r&Vr8Ysy23}TfVOyT{qnZ>DizOSu>-$P56?T( z>j`d}+@m^HTiwyO=s zFwK*y{iMDvFOy2P`QqRsF@=5z3I1VgThUhNE%EamJiad6$zkzM^`wHE6$~79IPhiH z;`A~}K$q%f2$NZgm{wA)AcmBZNtZ%VoA%1*v_2P8_)# z)%uLAIXXmAS3sE6rowhg872g!&L$k%#)lUI>^^p4c3Eh8j6$z$ypy-UbSr27u3YCy zfZl^r{{pRlungqiUuT+c(2=rCfMWeMX?#fXsJ?yc{l)Rg#-v@Xi|2xR;_vd4b`^hx zo>EOdihWhy)fQd%El1BOiPkdmlL+pTVz(aisyP=2g#AWYwm5IOSgdgV7Eptc%2Fu+ zV&Yu+C3_dnuuD%ET`gTa7*r(uX_yXF`4}XWoQ1Ygwe^sGP~Rm{l%bZkBQ~`5id=X( zOHCov!?X;=N#ag&2gnV1^~1Sn+I9E4=X{}rku(#M2HO#u4upNF_qp}y41zAeG*;$hND-1K-;)$; zS#1p6vBv%OlqTOyGIH!>nFB-jKa7~hE~c5Oii+oyQvZtf_>JO{u}0x_z0j)0TpMK< z60+GIX^!((alhNKHXGRI;_+s!W=oG`c-{U8%~_1M9<;FH$!v67T{r`%_prKf)r@J+ zaCSp?#L#;}k8F!E(sBxM!%WVeO4EO4!N5HiJ|{BTmpx4xxYV4}X%xX#wYL+rO4mtY zOs1W>AG!g)4+ZhIr)ywFe6fJU3df>LqX75BE2ZjstFk+;qv(;f(u6I$0sBLmkSW`M zJb4-ip_H=^!g-{U-)BFP>ALy+UMA9Zyav@&es_i|olUt+Cq;*dxud z3}4fW0$n6Cc0{>=>ASZtxsnd26@?PM1kz@311lyp7W~^l0+r2+#B@j@iCEu{0fOw(S*|^%qgj z&U7j@!r%WGG(U0Gcz~|=0VWl2?6aP~Tw0r*uW5S1)rE_9)exh4D67J9XR%N_(dxya zK0VYGImZ|KUz7hHkYq@ZKSvC7IR?|m^t3xKlt)-{VE$(GZ7%Fa!s^|UzRII}jC-!g zQx!W+zI2U-(YcHzT$*`1OK^B8)=+D4(6Vgc7LLJPt$P zDUXNYipWp_L90P|BIk6WK0`*a&auwU+Y2iD={yj$=1~yk^D$@tSG-6-iU9{Z;Epbn z?hIrXoFq8Lyz_GD07C)eKyFT;AO*U~JbU68r6BEelt6T(lP6H5(Dj7yph;o1IPN<~ zVTE^ZvT1q*gC1Kv{#9UZH0}4~oAd zrR<7Yi2icPH&eGZyzKa?h}-J^6YwcqeNVfr-?7GBPkwDY`UUi1N^Nl9;lDcK5&_zNHxl4CWOe}*ex&`D%X~?SMlBwA-bTh@<5>SN zzU9BO4c*%~KJ4Gf?rt0v{}-L8+XFHjmSy`mvb|fJfNGs~r-WusMoM4#fVxAhaPoCy zgz)v}VQ%kCI4U%ph(wFE!cis-4%8QdLIkRLYF>Pv2;1=Vv2wN89e7B0y(GqGIL{wWh7ngUQP?{jz;Q z@VWA8X%MJ1ym$4<7SNgf$HWR|Hhy9rt?%cv0&!%}x2lerk3@0OGQVgsFBf-f?(?hV z5ez~jHQi_C{MfiIt1tPO5%sj0cz4p)Ut+9QnY-9eC4qO1$%o&%Xk&)eFQb^|Jg79_ zoJPEai@%xKxwj*lCgKrc9SCHMXc_a)uO-12NX7rSv({TKi<#o% zWeOuNABggg3CPQnp?-GZzVXL@}ivPS$>jy6AHGQW;ZW{iDJ=;%W*pXMLS5BoMRV40m`LOgG7P zCwu#*`wpeY%;%yr7WboFSl1JNiJem)>e%tuKO3xa{lwe>zGw*{qPQ|{t*+6Cqxaj_ zBw6djOjCEC-CD$TNVJJMkTIMV3lVdpz{u3fz>vbU zG`HF-3?#gN%PyCO^|^>B9TtD5$1iyZ1M2uoG`0gx+(P^pN6WQ3~j}8nKUK zkQ8V5=r=w*enPe%M`mJF-aZ!?q9{#~Lo8i+TBr)?_%n+|xfG@chFV&a7@_duAK~Fm zm%a578*8`BryxvIU#pENVze-ptkz%2iQPlu7tK;UaL8Qgma6CCnyk%)?@f-lNRc1egn zSKD`s=fe>Kl>54>C>z1guAXtK917v%re~KZu0G%P+SagUmxx8c#?&(dXoHYb9o%Ij z;*wE+Q{4@V(tER4ZN{Kld+npVVrOS`)cM|)g&m z4f3FJe^yE^c%LKD_JYn}H%MLsEqsO1BIcPa=Lu6RY+C;K?9=Yjl9M)1F5$eYU@4i? zC7Rg7$xy+OfD@|v`vJ;!(8^QY%+FFf+TC%F*$%;i<-NnT>8Oifp>9SA(%HE>3#x{% z5V#Wy5;Y%{-fX;)`x3D@_Cx5ps^V`EQ@mc} z&NfPQKKR=SNT>ImSJe_oe016(g^R1($0!w=cf~u$n2PK!0x(Oh2SfIBlb`JqNl6{c3TgQb$8cXKi9a zC2X_+rAji@d1_%`mmKxVUPxGnK0?2{rySz|8f#+Jfc+t#_ z#n#bue;#xlEGbFeCm4K9D{9@FqS*xj$um}+&A86T%DZ$&qZEE{ulYVkm%|a6jAK+% zAVlUmqO&U|0G6NKmKL8MCODh%$2a{E!LuOWD`H_eu$7a2d)%jE{Q5DE@xF!q(1PPD zC#OyQn%wBvxOERAq zgw*MfOylSKb$m>7ca$X_le0SMv!_R%X^gL7VP)T)=Mk4}i9gOeSVx)v_~EIc+Dne` zvuOPCoHCx9j6T4EWM7K-_Crl7K+MuWmGrfT2^z-D+Hz3#Pp)0ukybK(oAk^=HORIa8ahO_KzS?0e_BNuN&9(mjU*JE#$LN)5rN+cr| zLobTamhX+?r%*#=AhvZ^&9?K$dhaNXvAA_FIYRGd^)DP~a@ZUli!pSB)-;@V)Hd<7 z)kzBL!cPo$H4Rms^nluT*3FF(&Fr5Ahe^;pk*=$x+9zSOF_NuRJFC(}{%kNO#XncO@uek$VdR2l3TQN{l;y7x2;bGnE4D?l?BLeSA$4l@%-vQsB+Grw3@^`pFl0^S_@Mr6$VQFwncn zSM@iSRP0F$acXwTl6ame*~j<%!cy+w@QsDN3c_A{ri$vz(k2hPbQ@+f$ne>*g3qQR z0A&o_3O-C;mc4A9k_1!CLi$892684Q5Zvz6fC{!Bb|!2b_N~}Sh);|D3^bnHhsWG0 z&_)VQpqRo`{)q=Ek#h7@&Q~gDY&uu{vuS$Yb=z?JfR;$`oEEiWnj7EGh4^t*o3o1! z8JyX>=$+0|vLFF?f9#SFzX>!Y4^*FgkUNCDg5&5|c3g9DyL8~CT45Z2t0`3teq0_2 zb5a=z&5rJ^{~PW!`nT#p>E?4lb-Ml&C+IQB5gbH2C3*gA%NKw|E%mRLkLp;N$X=F6 z>}b+see9K!U^b506w41;oeT$?zwC1UdNb0Q5iTEEF=PC6hZ*Po1?~cyV;EQIl9oJk ze{@nY@}=7gs9&B=^f%pOB=Dv@k{X(&T#Z^dZu@vtA&gN#>kVt|t}&;$V|cEZ67uKo z%WV{MP0)v^jaui(QIdjZrs?WEnCG$D^;_bJJ@(jOtM8{&wMzT$SNTl7cx|N?Za<;m zeT8~_z{F?V+me-D5uZ*HRM;)JU6ChGlK2&0dNiCN9XE*+Zj9vawN(S05&>Q?`LMtA zQDY_Y@+f=|c6&hGub?kht1LuYtG7|j%+^1a`SZ>C2T2^~zGBh7e`uC}Eug{C|l#Xe?GhUSM|Jh%yjvH=38ZfK}m+44sn?!Bc- z-A_jjp2fTF;!A&a?j0a0s^*kNe~LdsSN5vE9fsGl|Jd8-m|$pMZhRiLWpnRrMXj!r zit5hNYwcljspc!k2VA%9z;)Or%;m=?3~YajVd;O!l`^Y0Eem&azPEfxmim5j*Jyo8 zwcMA?;;Z}8{M+vrT}(z@I8ju-?mctsWu$A&SH1oqH}8N3AHY>Yx%Gtx5eU1CCN;Ra z&l}XjtSJF@ah&*N;(o`f_hR>R@QI5HiJI`g`)s8vfX9{0uXGMk>-mLQdnI-%-Rknqo&kZ*6N99h3`p!bum~19g>iXVIFf zbEQwOxfolso}Z^J?p~UcMl4DT76w%D-6_cnKxP2kahMfe9d&p2>4r+T5Boa`^2)>W zOBIM}d!4sfawF1adH1>xNp?ugcIaj>3u@~~q=luDalC1}lecXFn!&&8FswkGJZH8- zrRw$c7F35j4y!MSS3jEX6EB^&g{s$%iRQ-~i0fn`Kxrr1|5+gMe0eqsfGx{gtm_tT zenE<%?^zDDk5=Xeoxe#V>w7%|WfBL+Lr}u+guK4kBC$~C7<_v4TL>QMGY~B+tLzE= z%$1O&Zr?9&_vvc${Umq!NE8)FyPX&t`Hg7MEpv7(4_Q1%?>l1!of@#@%h$x_NikKn z+le`TBJym5+^PN!q1+~a7kTW{F9tW{-dS0IXAU3`D?x~6n0&{oX{4_5Y+2iy_UVQ6 zka~WX*kMhm?`K23xs)!Rw`_o{ziUaGfWJU1BUo5?H7u(eo7D*v>B%~bsewQZ%ARr~ z+zi{k_ak+;7rzg9?IVehnE0~j_KE`}jlHlVk)`gqev2mu3-A}n#s4Tb1t&{c7qzg$ z%WitXy2{G~3HFfe=whmjD}Q)@tDso!+GnjF6YGFccJ7F6pW;NH=_~2zJ(kz^UAWp= zjM0zXDZ}$C*PI5N&#vF!V-%(+^d4uvTxG2F&y*dWob##@u*4nh9QZd~T-#8md~%%9 zqY($*?Hp%bmrjzD_2SWHM@)Ny=YWyTkY`GBbeIe+Ky>_yisxUKY)jkj4zod^Pp1He zW9sR4vFLQ)d*KvxizHGVN*B>;D+L8?VqIbM6SPd-VZDRdK+vu;N=Hxzm-P2)iRQ~@SQlsO7d{e+>It)5i2~WY9RVeM%Bm`* z5N(k#R=hA)i?ULCV!Y46WqtA`AP!aoC1D}b^`z^1YJ>#jssx=vEJ+t0ZTh9`c}#=I z78OT(NSpt#dlBxU+FBEOXp^JMWoASmqzn-i;l6E4!9V5A08a2XDV|@S+C-Z;n!1?d zZ~Sg}*C}+K+~v=LS2CFP`!gcgxNXgQO5!;E_<_4@aSkYmbv}gs;Cs zs|oKmNA!gu%a9iw^>3?pI-v1*LA*@aUrWX#*7Ol46JXp#9ls9U!q(lXEG@X=L8d-} z-E2ac@KkG+PGu>YDL`^u#IL^o2f~y#_DZUcnc(8gW466Zy_n*>JKa=#&X$@M68!P7 zJYs!umQ8D4X{tI~G0Jdq(X?tga}3FG(q7|`wcL?zPIX5#ifo*IAC`25Dp-Fb`J|Tv zLHav3!MKS;a)d81sTxMN%av+tK7jKI)lerUznVKff2*ikHPO)QR}p&<`K<0P)=ine z-evj&)7x?h&WlCx0w$Kas|5Q{QCxo;@q%UUYl`W46OA0WqWgHF!$_Bv)je;8unF0~ zh+|e;A=HMydluR2J?uSPgM-Y3my2mbo>qZ@`e{H#slMm!&+Rleaea8c!)`=DLWwE$ z2x#)DHkB;{{Yq+8a8QVk(iQ%0#k{LreM-mL+3rR;9Won!{$1Hqg0XaDd?-cGR4Mp$ zf1BlH*_&b1<(8gV-tYf_T3v9w#b!{Bbs@iW|8_8RIR(Ky6hbq*o`v#qS-rI58)S8T@SHf^YdVu+OjPT`4wUGzi@^DtDAc=&x;C^KlHuo8gf`>az4?CfvEY zhGX#EHlnU9JA|7G@O z?omAzq1F8q2!3P|fNrB3zH6O`Rk-1zS%O&sahQ&nt?_9(KfgBU_%6<@)_leLUqZI{ zjE~78GD^6PE1dGHj~TJN(Rf;;H7L?wUN8~lyp}C+Uh9b?d=sCLJ+-;C{My=^u$?m9 zVqSOUGSW==I10djak*kl?P^^kbKx0vNX=y)f9qQw6VYt@l9LeUu$B*&3E^#-?TEEJg}mlqMs3JUxoLqZpI_u?7R5$2 z26xmZZB^R*ZW_N(>#R`J93O3CR;OJ?T>1qJm5UMWwe#3=S&JaBS)1~Mx2b*eGOJTc zSZm%pBdvG$N*;WiZMQDS0$SCl4+0Qicc+mw_}zZf^=k2QlGLS8x1GsT830lErqgWo zElfe-X-dN9MC>Nqz;;$hs_;-O@*YOU8|05D73#s8s} zF9b$xRGtGB?;G>pIDT#aUn1J&|DAw9{}%`$>y*?x+icA;Y2W+DH=6TJz9D9RR2ELr z+n|wM(QS*Jvi;4UI#Xq$lW_nTCzh?LUxnrYyWO1Ua+C9&?+`+#6|B(0He>M02 z|3^Z^MO2cAkhHrLNh)y)ZA}e@E1gP9OZ$k>X}Qpl<~iDn_AawaL!G8dMYI=9?eF9H zI)}^q&FB3Oe9upAI^#JWY$`}Gi)k^I97g@Nd8h?26 za9OOgAwSB$&F%T^(u7%&^H`K&X;M=i43bE;d0wUv#GsXY^e?b@EUF7XyeS zOko*)Q^)Hafn+7^dUw|1{EV8a-zGDdfN@x8`?LCyysq<$0j3vSt+!1+XBN?_x{Y;u z1X;Ny(^pZlg-b|oakV0qY=+7ph1|kl>Yaj6#L@*O!&xQ?VES|IhzWf+14y5_RB?T)I5hpQGC_#oU+d z<<$*h50Ryc`I|P!wF*t-uWM-|nm9-ZbXK`VD|g8*roA4>8~!$xeX&ATMU+JMjiM*F za?dIbv>ml^j8MGxGVyzd*7`T^eD@fgpx92mq#}gENKv4B+!Nz~85$>wyF8YD{@+bP zcdQNjOW3G0%P;aa`*Q;AguU44|5*R5J6K>o+>1&w*z)7A>)()rOKu;JURbEz&w-k> z(Tl@dpjJBgwQPQUL$OkGx>EGQdy&0A{u)Mpx<#@6wn$lHLoo{{N-#d-qf1FStNUbI zk^`(bs(10FQFM|#<&|agJ`Vhk;Z-pfsy939y9yHQojH31|*f^;S#SzmJWBS421Z8VCv1pf7-Gc`_?*(g&P6` zM}wsAS{q8XNP2(>4CIPJ!!#rG>hG>RsOGpnK!;mWQ2gR~`oJ!^HJA1W zKHg~TnS@s4Ru75d&DCq@yC;05S|-#tZBaBx$awekAVQ|zv+{H)*Z@51qt4~-t@;Tf z5EnXBowQ|~fQ4Z{X2By~IQf!d?mm3x(`20U`-eKCuF}ukzn@O{%#6%KY_V3bSo5YO zk$qu7^v$Ys9JSkP1Wa!8K*~lgRlk;0B~fnr`F&WEF0*V{=-N#fn(U8KCA@S;5k0(! zYg;-!4+b2wf8scY^4#O|qiUkj^iS!1K}!{ei*lUHbUPep!0WvyYz!d>Y^_h^0=#VZ zdG!6grapkVk2668($yW)JsA)eTdRn6ncHEmrQF-8=V5}>Ed^Klh>+T zr@2cP&JIlm7fnA#;-<$(--_TV?h+?{+I6EtJpvXwdx~e0^?Hl2j;2``He9`zNN= zZTE79>XR3Z&&U|BQo`eD&Rbmc9b zE-8+4gDY=0AeE_8^`orfl@C2}`;WMqM~4$o`(c79kDzOR>}}*}qtFzlY?vT6$Ts#V z%PiU>_tSWSYUCN1J1{uVbh)e4qiN}o)iTV`!ep}ixzp5aA5HTVXB_1B^rP?RdH#q= zFOXYSruAaNRu)eDsV!b7vd3GWTsSc$qrd#JZ=>vj?bPAtk-UpnGTvMYIu?Rz&>!V; zdwmMKScJwPf$vW3U5WO<9`);G#}pxpGNF^bn#8V4f-mlXhS-@3dG6ht&^M@)>QIqM{MV-f0S%#@c4qnoLth!!tLi$gAO3io zJKZ~)@HNYMtn;-?5e+QnA_0ncMfpy~Z42WDrgjcRN#l~~u5e7*(j#)-8 zl_i*NeJi0_QKym)PZcP?JMPtPWKt{Dp>c;(BkS)XIjwURnlo1m^M3 z(v8}ECWHQVCymM4>f1PT95q^ku3mYZX?avA2)&>9s~VX|GAd3A7Cvf6Rdh1?+cxYG zU&pd34cdrZz2LfEyGR8%@4Z61-2r65#yj^LB#3}gAP>NBOVReE8c#%PsfXK?%xiwq zXpbw)r(O6Ndz<2~3Kg?h`|udVCA_i%20V`XGPlpLaMEm^i6_VMhcigY244Hv!)2u! zg?fHF>?N?`g{l9ZLQ);y>s=%w@AHWJ`_-n4hhapbQH~br$pDPVGml7E?^F4N@Z#16 z?M2~Irs7} zw7}(}JDKUCd_n`akOvRoDH)b-)XuRkNXmD%GDP-iV7pCO$=vp;*HUo(aAjaJwNoA! z8&3&;;jtxoFdfOS911cA6Zqw?L=;70PP^I)bHJVRkJ1`|!#6kF6Q}bb>@}5v^|(@A$ce>jUHu%3rV5#=SoNE-Vmmdm#Vao^vK$)eD9Uq; z6th~Ko6>&AmgZl%=;x@Y-ZbWk;TFn1d5V&nbQ>q9P{}9NSe9y>P+*u{L5~Kb?4o}t zE006vv-?jlch?)_7qmce7>Uf|Oh5&7fP3HND~PWA?$DBv&&QzJa{pKfpSbrndI(i6 zSTMI>4^|8TCdPxPzYt#Ksrl2>kIaW3-oQ@gVF(})hRcC;*SgJlaiP`0W$Zpcla8VT z9e+al^5`jYwSKvEkZyobVioXGAP`j@7DaF zQ}?OD>9+=O6x@;#h=n9lchP{QiM}C3IVhrN9-tnJtYcC@t-bIf#uoj0fdBrn5c#{} znK#V?RihmbtRFH35~*1jF^)dF8)@p?+dUf}Z%1Yjs9&SPkt$;qKb`mCL@EU;9nkB9 z{ZEMV853$HftM)juZUf=kS+$wt-k^$4lDZE$AA9K3HE>g+9yl_!pFW6Xc9iK!#N(4 z@b}-(!Q@xKRQ`T9{g8H{C83f}DGy{MzJ@!l4LeF-(&Wg00#4BxI&|)@%E{|ezEsNv zQ4IojP$0GC-#@k@mFp={%T+aYg2GW0T~w?52;=%M{@)=FLoY8j0R=*yH^F96gB&PE z)FUw3N6>FB{a3$iVog7WI_j?n7&cGeN;bMx*JVFrLS!i_=Rzo0RF=LS0_*P(dzdW! zv*N#zz}~HN+2;O@1kR!^GI*7%{AbL=IB}o855S=R9rKhg&`I!^tG{aSM9X);PF!X6 zzs^sDetxHPqg;|N5whpXFT}D4;%H(-TxI<$V(*ou?WCMx`j?QAfqnJ&KRUvqCYPYT zT?(_IC=fIGNRLDXBlgA*+mzl-lU-OM}yO&B-?02K95FJuem@1uulh?oPmlY0V}5gd z-*wQw8ZBzxctL(>vS+3v8kuF>+&-ES3p?>!7qXttcZl9D{#E|gqcRfGBL3F_YSMp^!|qoF#{Kx?gaItB(~2KO(H*J(-~n6 zbI2X$NAh4`@)YX6IsP`0{zd)20`sr~$ouq`5Mn0~zl~B=C3Ax*_oHVZ|MOQH1XDwk zOfjla(hniOB6$zv(98Ka|8GZ+yA1U*6NcHsD3Oq?BL&fP!2$6>e0J}fSQW;DkS#jH zcIed*B!Cbh(IgEKyv?wTbuN~KA!hlFU)!gbm_f3=plIXCqf zWI(w4Pnv6`DK{_?kXZSsl1<>u!BRseta!p?M_*aHN>yH-S+ z_oP>BDZjFri<-Rtf>e$U|B1-l4`zq3bQV$LYESf{6UIbkBPL3FzU%?+@}m!TbTl_y zxa@|da5&fyWEqQI(i!?!V_SJ8$*8jlUQpBaA2eBcqFy-hk^97>E@kv*(e+htm+<=Y z)z6xtkz{&neRM|ILH@YSqRt+@2cu6<_^>Nx4~nPP8R}sfr^^lXe#-`PvJsN+r4|R( ztdz%=Z$zu%C@1~)=nFM|Ajs@{fxfjm@CULMW7#Qj4wPi>&2q){Y)_!kOdb&IUOp&+ zmf!nhYu37b-$yQTB^YmsM~?bq_9+6R|E^~&X-EpO+!rBo3`i}A&#WpxT&7D;>!w&L zl%whYOIi{&_DYhQAic`l6)okM>{!Y*_~3Xg{jv`JhK3MLW>m~3H6Mi(TzV(OdhVH~ zY6l8^-KLs!Q)Xar-i{=inDH9@#$-HL8`w{POcYub6wBE+WHVXO8szSp!LYbJd6;-r z-~1gW#Zj!G2kNEwIC@%u&^39Hgt7=(hH#kRC;;QY3f5U)6^KEq0K`wwXDk&d+5yOR zJ7!!YF~S)@>T3yxUPee(HQLy+Li=3w^ho>6)7m}%Af~IOS%xm)%As()Qh}C}l(HFe z9aGprMnz`z?D{uCj)$(it82N6g7T*N{a4=SKXt94C8bT?A-KWm)@y<|YT?(17{UbT zc6TMh8w1@o(tv+}(wj}~Bbn=049cdT1| zPN{*x8FJ{8ueaN1WdNg-=jyXz`ynl_!!Vi-Y@cBXB(Ecp&=GwE`xa z4I~Q`g4x!E$o$Awq1)(b4)!zlEW7a6SkoS$Nre^#C2li1Ls($cn?aM7p?n3UvHXJ_ zPTyuWm%BgN8YNTS6L|F9iWh20an+q!S&@8+YBj>H* z=@HtYj!(~j+KGM(7)Jz{usj6!V#!t?h4SJ|6MZgx%hd)kUX4^YpdK-@E6n&`Vc=-s zL&BiOUd^G{A-+mZEM6m9k5qozj|ZKDo+Pb}ZTm)Wt>C;0O}kf#b_uF58!{?@<-A!bWG%Y<=(m4;S2l z4Qc9vk-jCaud}mNDE2?#wr6gfKp2O74hi5nL3k8yg3JKR) zFLCxXD*#Ob!jp3MKs@cv)u1QFW62T_#%SAp6mR2tvsUfkf)aqI>wLO{drSzxC2nuAh~@LqumC_95!ZzweW5ZhX~ zINhG&B@5Lm_a{y!IzVb)cZKFGR@fjqV>D}47WtnEE~3w0V79V}=t{JqWr?)pJiR1y zD*0&5Mt83%hRv)@k6X=SD%#7x)H}a$y0LexxxlRn_3yz$s=Oq z(R+zWW8>RpRgZ~`3KU7VHN3jAhp0dg#W*Viwp29ujCmvskLT93Hw-^t6~iJi>Awek zF6QZE)Y1FGqpr!r;}0#H*98!7#nUVxfoU@nT0}0ML_;2%)X>ijZ>3co-Xg|2ul+p02ptzd|^|B*$_$g56s#WQ zF$dtRE&4w|xyy-GN5)e@vP;)6v5ODwv^T34iC$dHtTMBD|KRdgh?iH3JERmQih^2X zPupWZxdBgTocc^L?c>Z5HvW6mb3kz|QA8qPb;2>)L92DtjtFh%%n>wv&^t1FgRwk~ zQ%bT8ay(LwgIZ3Q0>=kMy%0$lisTM4l3AV&Jv-rzUrRKQaInpWv5(?jOAC|#>DEv#f-o!11iH?NIBulvXw-G?A!GR$*wNtdaCQ1dF3G##N3n>8n3UA_r3#LC zBHHVKL{5zgZhc0qL6gwA%)MY1#EkN7cVnB^)Px{oW0}6wY;j3#{KGE8P#niOdkYhA zQP7SHrn%8^WO4e!2@1%Uj{;7c(Eouj(w38yk|N;f;IJ3H#l&LZm^1I}%xv0|=n=tvfprvTtJ%M0Qn+is1GWt&G8bjl31EZmy4{(X9mKG)e zaa16MSIO}BOnbA0E&XgL^93cQCn!ili;sET#U-9Uk(caO&L>P;8`b_>k3-4Ci_EG% z+2ykq=b!McP%pWaE(zg7869}ut+H8=G?H1cXzj7z=_XHL6kWaH*e48>hDljRNJa^} zj~z7)&9mx^y1Z2w=iixaqrH_4k7OGPgj92ysnowmp&03kD$PN7y4FUX-s*fz+iA91 z!V-=|LUiN?Wp<*^?Bm<0V{Y|tBad1(hYPp8hZKcOQORF#?qNpPKWIs*BWdW3qGs1@ z*1pg@kVo3>y;GV0zz5A?A|vmf?qPQ0ISI3WKP0YvJUu;~>U}^yk;y+0P7iID__YDH z3s*5)TlOHlt(O<~Iz9F>7v2`t8~v5mL*8r-H)9S-2P7=#m{?~vunj5bv04qF$$T7= z_)R&ME$R2#(Rbl;SYoL(F}4}YGVeXyvJYoLiS?#zI<8)Ql(Z@F0h4!c&7)A&Xc?oc zE?V31Caf*!W98opraj#X^M?j9tJ)9Q9ByHXdCe&-S=keO5@3XcMnSV9%U(SGEF&I@ z2E}TzW9F3`D>yjt+}TTHYQ`bdP{#68yg2C2W?W9wlV?KF5p+LHXYPvc^c6hyS@Qpn@@`1s+?Gd&0_ce+o|3ho+JZz||^ zUP~JuKN2*eT*J)156272VT&XY;@Ja5#h`ZvJ6jZkcB&u=#jfO?pKDX)dzpAQg(4_RhgKU4#g#IL@qq?sB=8j28J5#yg>bXk79NbGJ8b+N#jA)Xs?;0bYu3z-6n4I zraU6Eb%3AMG&3{Pxxfl#sQiy1w!my_j8y1(cNrs>PdWFhc*MiuTzWZIG;X7DOX~K3 zB2@p&dMAq1U^g3cE4`#k<_e)P*QKZqh!h$xL=69(t(M8*J?rM?=Hz=sagXm2S~xPo zBR=RlPHpUJ00RJ~+WhS$wY|7O-50fz`gztyQV`d@8rr@VON+k%(eytc3v{Y+o+fh$2zD0Y{pUzMq>WnVle+ zg+l{on{k>-n2+g)+;B~DAXs1W9N`WkTrA)*Gocs1N<*MsL>}3=Q(A9)DLp;?3cEHO z-t;vz_*x&TZl_BWChU>4i5~jR5n4we-NuMnNej$h3lECBMpePIsREng8l zla}wbozUC=0ERwU8@=f%I9sLYI;^$Wy3QDff69-v|WVsuZ}@U68I&eW)_e1C+TcJygW<@vJ5+3AZH; z;6&2cLMz>WOfMW~Fd4M$|G?K4A?C_5_?hkai<%Ra0)gxz=OdCm7T(*&YM%FH9qQ|1 zMmuU7II?iHuygi8YtIkY6E+72r4Oi&UHqgebCch?XeLXZ{B7xY!>|67!(HwNQk z8TpWV{}Mt>)U#s}H+)poZHl=dVq4*VnXd-L6Sb0UPcJFDg4wBu%zQnV5Cw@F34f2Y zX6-KutRv+2jY25VIt4Jp#(=%nOKS`M)!Bt*K3+y7abbpsRk_r#P@&jrVp<$rEo1p4 z6~a)h($6C`{8k1nU0u1&p$rRkZ)a{~4ThyBdw^3#A@JWEX z)6YG^aCdy@tv>>J!ZxY;tDd)F{n&mgSf54kmEdDNXMm=8g6(Bq%O?BIYI)+Hh`*P{ zqL0_c%WHeDciCG^R)?0k>8kKY?e;hGv4yD#|gV1=%~h z_@fU?yf1$23pEbh$XpN=oMx8ig6iM@8A}Z)^4hnUkmj{Na$%-o3vT6T+47ROkvJfA z*QvpiQ#_LESiUs!eD?jqmtkUIW5eyPd@upN0>|4~>^1EaKEG7PSG4flYrZS4Q~nwp zOk>?MmL@-w{cUf3{!R|yXVX4y=S!kAB&a1=G!xtmDl%MC#jpnLQw(`6dHQ>MV=k_wv*=zpB7YOi z#OK{w8^|LLe2$0HJk;Rdc&2#%m}b(_(o$n058)xu)mfF&Xea$?&aYr zt}b7;#I^$+$=}A?958J!Qv5}})e+&2XYH>pXe@YDmU)86ow442{p`*qny*ku_*GkC z#Ke_h_`-7^=B=3)6lqD)nc&1-$-VW7aG5;SN#gER?X7q`6{J&1W)$FXW;@HDY-M{K(0#3)-4iT7pa5C#wn)Hy2*} zdNKU4&G|zD2ill)>gR-W1C*+j*C9mYEqE{mwvJ0iN{0MvrhUL=(6ZhU7vU9kX!a|- z>g14Tsx6z3%#78jdX66$#y9N0hqP#X(R@q zFKGj-;VAt-hD zSX35@$y9WXohD{j4?aa-E8LN5IXhN2qI{)+f;M`+ka`9x^fH@iajZ%yb1p-_YvOFJCV zn)koL_VqNdPoyo|u0<0l2oy>S!)MgbSXykF=&Mx&&|H&_e%M565a2sR8RAosvIbXO z&=IF6M4!f~s{#^4%f`hQD~ZTNrx$K0O2Q7M-RMWcgw?)Bi~RE!awYK#yS)NBgGZODD{f zge}kI^8rDyW-Tt6RxCn1kO*=9fNS4v*s|AB97ZM)EwXvhLSsg!0Cwtj$%b@{E>6lm z7O*J}sq~$O0$7{^<>L(sfER?$AWrzZW9MlAG1!&)ZoaOewROW*RyND_;$t;!`3F{% z6Trwi0RN31Gd#?RzI#yRrUC|N`#3g8d=HVz35YfUb1orkvnEUX8;nCsp@rd7OT1hRrjy(oK4(sjgGq#Oc5 z7T%_AlkV)P*EDxnW^vf_t;9aqJN#EPY=;nc% z(#iI0h$uKGP!xMPwNSQWYVsha-ng8)F9!qByG_U^zuC_QaO=Jg5oSPSO#{TUnRgHH zKOjpT@FqX>sUy5jp5KgXq(5Vvx4g-@OEvWTd)=>G6ehp6+$(A-R`y$P$`5Wu*=ePg zlpHIW?R|vGL~o1?g$NskH>VptLLGJ3kBO7d^pAhk<+oPaI?@MPa4<*M;32F*!u^%T z&65B{t6)Uf#dIo_qF8enoqEhgYDhW~R4`B4*`A*X+#nY4Ii?|<0_7(oxq`?(G{<-%Lm+Rk4- zJQL*LOd|8?H|>ye-~-e(g;pB#fS}Z}IE+~!{19dnJcfN~O5$2q$EN6#qoK7q?R<&r zJ4e*wmAY;KeA~GxjEne;R2D+lv}=KgtTKN~QL|85BiCWBxArQ0~3_IzAcj4>udBJy)4>`jeJ;en(`2%w1av% zl|VyUrcgimIROZP*75v9T}Y8)#P9}%$b=zwi_J%YW_zPmCZd;5#KOlFzK6WE{DIHg zaxAkdDG;9hbzMVI5yPID?RH&od0CcdbDUK)3dL0d5O`3&b+uS{30SbHG?ljc6{0=j z?tlpD5cdTJl94|8RT@V5 z5Fd642ri^_aHwsEY}62offL<9Gl07+f4Y4|y~bSAq9Y18WdVS0h~sW?87d&C=?d9n zDik03{j3qiL~0H?qM%{=SZD{XLE}YsvR@qmYED~6pxXEH3U$U)6@l8#35OE0xM7>+ zlbfstXv+UpA3vmMP>ARo=cVZrYjN=@w~{15q?bN8dTxjJ5?{00`wMfs=5!xX$T|#+ zOKHw&11Vg53A}Bck&#hr4rcUY#9nbxs%{%6B@1;Jsmb0C-zs@)UyC_R(#ofh@QU2^aL6g0F(D9+v9ep`B@Mcu^Q5tu#+mi;;32bkBRG zCFeS19kx-!)!pGN?Bkxt-iK9f?|h}0n&!W!U)JZGh??isLsx@!!?I|L}53x zKkdvo$R97Gd)$~CiK@e$=7Fu+W4%@TH260-=()}4pgrSYx5(dM>@Y6RGXF!O-p%70 zR*mIqXp7|!Kz<617d$bq^TRY%i!l&c#0^slFQ({bsH&)_AX0^xM8Ry>P=zx(sN+e4!cnssTxkt1*s`C5S%5)f5a=Yp)!H>; zr1Def!{>pRIO;TfYR8Tp+ExvaNPr*0ad?SU^h4SwJz24WX!a{Iw-pS{GvP<{O3W{c$m` z85P-3=yIs5A1tOhU_$JK+U^{Id_svnc$L=ehvUw5wIL1izX3K{Hvo7wvleQmH9eO> zeCnI)&`~>z2pFC0C7{zA-UE$@SqBq$ra*qytUyu*S~V6wodxj{h`Q8>5di)7!S<9h zZsH2qY5@<`g5WwJE(RR00-Vi{q}3u2gJCv^ za1veD(rKgC0VkO+WONg6Wsv*m5+iPl@+*y1Jc6V;0t8{$-YV%TnJ?7guRUX?t$>#B znJ_4FYC|%Ga_QwmJoxT6!tjkaE#bEMDK5_=h`A#*+aM?Ja>WcwiQ&Se0X?8ysk@;| zj@5-tiMs(QK4kDHJ8mUWsxIQx$jC@*F@>ObxDgyf1B6qGsmjQY5*)yUK6fsQbOq0? zK>*Pmoqe{kVYueNjP(83CK&Epel05HKpj9P2A&_r`MY10#8${ab z9c7CSBj8P)Lvn3pko|Zi(X|D9;JtlH^&lb*5L_x49`=p{TeLw3lUi}|FQysj+Z0g- z;sNqUGcIj|W2{XWLSIDHw+&6hPhjHu-=@C25y%M*t@M+-?ycF3P!T&1p2%9o z^W;u}g56R5wpZ>Hg;Z-_{%9s^evQGF1N=qZ1?x3-XFH9w4Ic^UJ5=VC|HUj7+d>nf ze5Y6$C?cNYs`w$4dg_(Qsp8s$3tJ83)jAgGlX#7rn|e;_v9g^Gco#2>MiHXPxy-e~ zhWVn&Ruk=zmHj4Eb?MxDZgRLla-l^5YC}^m_~CoC0npfhIK8@GtP+Wy*p$Iwg1oANG9S}7wS8N|x}Hl1hecea~@ zMN>YQ=BvDOlU-5q4#S*}=dy9tdFb(kZydYDDE9>9i@@kO#T9lNi(eoC4p@X zix1y&u78uZ1od8eQqqBtUQ9G)34t6%6_Rwppx8uVW1a>3EV(s31jJ1bH$PP;- zyLG$V*(Ah&ecU4YYVGOsLaeu#JxQUj4$Zb)>Of^{d_pw%insTd*W##|Yv#2Yz{RXU zR-KLCg^=wy)SS9Q|0a)E?f$JF{{iB!er{?g%W=DyWaY{T(1qajtvrdo>Q>p;YxMdQ zCB_#Hz?UW}REt5B9Gu)%|8G*y2N?;3_R9S7fd~HJ!yQy zz=?oGY8~m;xw8HaDUB&mUdyhhwntAOi#H|HlE(3gM*%;F{0j}fxXpn;7X>La~FBQ@lk$PI!t&$}_l%7`0EnqX9Kuxg*3Z{25* z*w##i4g1DtRaugQx9m1Z|K>u95z64?w4N_6>8R_wM*n&?s{!GXZ|;TPK2z9o;xI-7 zh>KJqBMghgL;x0HG%ZHa=iiDu!H<=P=$iHL=oP|1kTnKx@c-ysnHlXchYGURk;FEV zoHb7iNXSSnI+o!Nf#H z61yGZLfJWe2xc=d^SUe3%I*nk`$>FDh>$_%{GJCSC*tg%e1Yu{*w~5h2~>;WyyfYyL7pU}=%1dVPzqa8&fQol3sM1IZ!dMDxUN%bSE& zOyoM`bRzo@p9w;kxlJKB`!RD`mG0vsFa#c=eW)_iN&%}H*0m*8 zy%1+Xma*2impuqlPp};HL1~MejP1hLn4fw|hidF*J+5_UgyMQ2`HsS$wB;xJ9zhpc z2?+LmNc#5_0ZvpO5gHsUR`15u`MfE`bBXztd@nty>uT1cG8&U&#B4d1nSNnB!F^GRMc?33*oX*(fSR+-+#~O+i0F;f(aJBx0ayMK@>hxa z8Xj87{0ODSJop7F(V|$W20ZCu5egByoDr=H z$6x+?%qoOY51v4-0QGRFiU}7(Z&67`;ahaaqq(=yX@te2+%Q!RkTzDzh8v{KHi(h` zZ(v|jNzW|q`dQ$Davd}-&>$l&elx~Hmy4os=|qN(cm>}{A1WR9h24Xi7gRuoYIUq; zoE;raTWm^aFQYKus!HU1G0ZxLK%eC|@u#x3%Hpb|6)^72VW3`IXT!Jy9E98pFi*fT z%6fdg@64}XX8MsV4m?LM#|m77{0VKY?{x6$&kZ%%IhqsH;M# z=ILaEYWm1iWA0yKp~@zb0VKK2JbxR8jtl<7%b_5>Je$UxbE*Q2peG}NYs`-GDmXJ^n1%wf=Db5_YvO{~9f*$n3=O?{!*k3>FR2mG8>DG(utJrNh0*N>OgvNQ?W~sL8GMMp2T<84|>?%-1y}tOkDYn>ao+ zr536;5BxWodCo+9;?WS$EH7xMrHP@k{w7l24@U&2h2y7n6Aqq-n{xR&KkzysS_>U^ z5!4-EWrqNRX-87ngz`h4);7R(>W5~Wvft^ExrwVI6aZfo#t$VP{k{F*IEEdGx-L|I zL=_sp52fh^#_$5!h>KZ`U5cDhKFViJ-tNkPm=~0ce-B!q1+AS~IT=_}R9c5cQkXav zZQ|&dZQr;L-a{*qQVtKhPZucn6_M( zs7SP$KthV+{hM}VNk8vO76q?Y%nB#+xWUFL)7li(57Hy@@Jp<~sM6(7eLrp0VZWi| zaE%*jGH8Kp3=o~a1X)BWnu;Z}Uk7fuXnf8RP>&x*{-GE$<}K~UlYbU+L>-~{=YS~e zi-C4y`0s4AMW6=MlLGgEQ{GM6vYv}UAq0?M{|HK3uT!x~Hk>A{vzl+{J5BVn;^N}@ zuw~mmLbv!uKs)DWAxxrO@p`xcRAGcF(*A>EEEmU;PsG(*M*7m1K94-JAUB%|oUO{t z*eW06Gl0;=cYX{5VdxU#|3BV0S=4K7vjP?vA>E2;N1#`qIar6-OxRSJu^?>1&M54% zD@@!mE_N0)nm2bVwClbPl}(Y!fJOgRZvq$C=C$xkVVUE}WXmy_KF%ua;q!rzIyy@= zNHT?jG@RTe4Xj*EQ(Y*TRO$OrL4FTddQ7j02TY{l{|YYEw4wK59!xH>kma~W#qD$2 zRH$rgI7t8|2swLTa+I-kC2rBsc9>ecl$5uL!^mSMG8{P4X8W`%op`9(W@kv;n3)`R z7Ha4%YweTHJ(2>8+XUGvSR?VYUD~G4hPA@n&DqI(-sEj5V@l5M_C}C~<$-8mhoRHd zejBrAT3X_GJ+|Z2R1hejng>E4Ekcl{9f_QaHZ>qfRvAtuHC`Jp_9!uK6Gx5f7iUH} zocbDJqYo#QqbQcM^>d>F6Z{KWb(GIe36c?<@(xqoO;4i zr|c}x;%7DM^+j(RHg^2IbK%yPJ9D4%5NB<%E18te_MDt3Son2df0B+C+lunW@zIJF z#q|Nt{gxN>^(Ur6#QOr{8K#*b6dKiqv9}UzzNC*`2b9Bdt_T8_0^?``^x(9;eP%*l6uw z-7qy!(8)MO8$xwBKv!EMJKFCIIg!6G%NC9H3gdk1IAyk*n&4C3U-GHs(mK*DebKTb z$4k(7eq3Fh^-1-{uOIWZ**d<~I3_vHu7eX3#lIYmomqFBT9Gh+w!*ArVBytQ%l$JS zjHlU;O;y-{fyQmv2ge5H7m}o5>810kHQqevgAzT*HMCeuyxiSykS{X#NhCD+8EkIc z{)~lfXmIch;FsPHEhza2yj=>R9m@Z;v(HQS+IE#eGO#FpdLE*z0IOYmoJ@Y*X9ZYT zYxF##lX-Aq@;rP}fBT#~e74WblKm**OcnbfGyA)@Cf*r8`kTGl@{GC-DwU9r zMi~a+YdUfTpFcv(1k^lJhNoGkAtsa2CYLQwi_6m3T%d|#KhJdKSn5tsz)^fAC-)D| zSJeL`tsr~4`R}7IANzLH8&<(E9>eDy@wV1A>q%nBcnqP26q4#qX!X8cfX`U|IyL#a ztTgKFh9nbxZN0v+g_eVBT5h;DG#U>^y5=WQD0`SaNGa}U{;1yKeZMcf&m+&0&!F)0 zbTHr45J!7&uqvt`K)C#Q>bKVMXiMNEUfj}!xO4I_C<4pon&NHjcwmSc%L3(3O%Bu~ zlehHEYu0YfaoaAvk)-357PpB(B0<}DdufN^!ur0#xrT^yD@O(t^tpMVlk$R)QK2}q zIghB6?T2z9#LF-_@|3Ry7G9`T${Oll%=_L(p6j=VAp1gDR~@Q73_}>bBhZAOA^B{V z>`qib)G7f&!|^&Rxjzu7bbp)op`+JK($F0-kOW8|%}wHb>ooRdyC2uo&&tfP?TBa_ zt-BaFpp+t2Nvh+GR$;wfQ>VWF`C}ODW4P}2A3ONBaRr$2YIH^k+nm-Nov)a=LrgYo zf_@OuV<)w#Ir4g}!Fb5Q?`(%4lgb;x_R2vXw>Et_|eSx%nr{ z+x2Fh#V#*|Yj05g-f5O74da4TgR|g~--43-=OJ4c7EWG#KcyCArj4hW_e7n{WHPoF zFuT0C7*D)9>k80Rx8-XiH^^a{*}X$6527oS5DME5LiXWKCV0brQb&(w4KdlCDS&Dp zrxJM3>adjeV`*j1bVnn*RSmXHXIyr%v7MQfhOz{c!GBUn>5JLNIwZfIuKA7GXne{< zx~Qtr>6t!}RjQ@L!ul$LtZi(my3yT8o$&<=ml&_ungh`nP(#YSq0cMSv+or_;6VwsG;ldhkBj`y{jC6w>Io-a<&f_`q!^ zUfSVvx4+?$HXaOKLX?O_yQeTe9FkSc;`{McHNC9Q_~1Trpyu?iClZEQ>w`B}QF!>C z1E0Ee=v6T}U*tI%BJmBm>3;1js0UEfSc}bQXknMp*n%kL2Up>fNpNtIEltrrCp9!0 z?FwxSy~#by;q~FcX;a%-7gKH=@L13}vz)Xi?%agtTu#%0#omwyI;l$P8DMPA07^uO z$1y13x{{;u-s|IYAHltxQnh@gOHfdNQL7as$H5B<)HU4h;82N!9kT;p9V%)A6OMYZ zw?seIFR+`*cwFUWr(Jk9-*A4=_|&5F+nst}CJwILii-he{LeiebF;&fSAlb)pum2CzMSFk4_Fb8=tQ7jN9MQDpl7x`Z_vca%@?;Dfp;x}RY6$ZO@mvE^ z57A>cvhML7MLC&UE$&lS!>1A=+Kb$Ld37%_YJnDqM@8~>dSbs55o!OKIT$60U? z6tH^7dbxR8OEq{XI(d8?^hdHLRMdBMH(z1IQoZ-+vzWwAXnY>;7>j!tT3KtiP+2x_ z=OiRe-nlj~@eCt=){v5Kl#Vx&Af1SVoV|E!yyl-Uoh|23ou_roDkDP+Q>Zd&sMek_ zZ8^BE1t+&y;E*Hh2rg7gZ6C&%Afs@Zu}_*Gzx z)Q^g#{HJ%wB8}=n_1Cx89s_}En&abh3)bQiMOauPCA&gv4$NiScFdl$67b~VBOSq~ zlr~4`Y)uapO0=>pT5DhMrL(6m;jEOMU|h8GV}&q zfIHS>-AxTnXKZ)~zNrUj$HOe2wYG5jz~p2}=3C=RNw$43Talyqn8PfhNqZ3aq0^HM z%}xd(iu1+A+D7goV2l1@VKrKwyjD29^mG#cvEJG9^Uj6fvtY@mMhtbm*f-Y4E6rQGXpUvqY@MriGzFY*WB`=c$jk_h_`XY-pAaN1|G%a1}bkh7*1+N_WnO;KO~ z?jB)g7FJX%g=#)W1r4#P4!OmAhn%uan>tQKM~x|sZS^LDl5n&3r1ixstF!LbIDdTd z^nCe9ty;oq=h}M@oR$TTmr#=fGs*mkZ3V}nsByvgNI72meMD55xofKd@*b(u0$BXiGQoIjsMBBG<2S%!P$)yEQs5Ta!C0mbe@3X5tr1;q%P(D%>CE<=EeB)pAV6_F>P9FOB_0+hW zrFi>uMvSv*dv(DGk^o`sq4aSqR?mQ!( zR2hD$!32Pmqa~s$yybO9Q#I?H6kZo=b1hY5`O>$;QBUv zN?9}5=#=|0eDdUC@BaJK*V|`PqrbwA8BegVuJw(vKrs<4 z{6^135Zka|yzvx3%2t1}rL2|OVO96D4awxA+zXN7pSJsy6Q(?n`269(Do<9k(P0Bu z5$#%pJ@Uh8Y(jg9P4NXk#{ z_j?|fv5Bimo)J$E;x7l|?Y2W_#w#}bj)Zv&PF5envAmxlc-Z4<`!Y$1O@H7|J;;kH zstnlStldsH?=^l5oFOzTP8M32s zZiJuZZk9#~OdB^HCNCNyu0w-SA%~2VMv)QEvI!P46VxJuoULVP=e-b*IFQv60Mm!B zABSpDmsQmLBP;V{Wg}l3-8{*%9Pe{bDB9&~n^}CfS_9nb4y#Ay=>ebq?C`2g8o#3bifoe`w zIaq3E+3pZ^}JJ%|;beu|e+2AhH9pdc3ih`RuWbdNK+Vub0d+)HQ zmako~4M$K!g5)R&k|aow93-eD0ZEcE5EU9|a%v6;NF$(ny0+K;gLQ5mG zB$3?EG(ELB=l9M1=FZGtcjm9VeS97odhgm*wQ8+ZtKRi4brrP})i!<`_||^=nG=NS z1<0vpL1h;$RI@HGfSu~x<{YrU6id?iKuZbN!0yMo@7KFFY9hdwmV{6%(=*SCI2fLJek&+mE$b{TZ(GT3LT0EK*F zl}frt{VQajDl$q$I~U0C9QyUD7>}w0$t2fEZ*-Ie0`QrSmiCm?Nz89$$k&)&3gjRg zU;ZBqsn~hBdpLTH*6@*{_5uWoo8P?doou<;=$j6gU#b0^Fyvp^; z^(F`R(@c8MtJ;p~f}M3{xHgce4_zo!E;%aqze`VTsv+IHzR_i33*rh7sw8*QgpJlN zQGNBf`iHkCvK^1TeO7~=6B<0HB}-G1?P}wG?EV)8ev~(&l!9v%Y@ce+qk`R?udo4k z2CnFnFBDTs9`#7XpeA+TV{Xy4(?ut>A#8P^7*ZR)PrewO(ygr){YkCe^f4Uhv?vYu zCmeGh&L!4#_*-6z0(By${}HS4h0FiyAz3l>ufJ6vNEZCxNL16mfy@%RTLu7m0cHMb zi=bN%;Q7p@zeTT;u2>Y#_~+5w8h!bXgQn@W?2P2P!cxTV-%d@J+5TXjc znk)-q?Ux$cIexHR*9M*|UvFE6o+J&Ifr)0#?|~$QyD-n`0%}M{i3x&p%#Tv(8j^2- zY5|ej3ED{=s^i#fa@aNfvl@KwU;Vnh0xT79e*~ydKvl^q{@FD+2Le2c^y=sus+r>> zPZl3wtHkJTy?#ZWa;>K149MpB0uD>usbX@?ot8=n?^l__pTcY({k&y>Tb@(ZpGXh? zpWU4jx=3~94W=T&v=^@ci;ANJqc9GL?SZ-~w=K(wwc_1n2+d*TJ62n1_7q6hp1>dk z2u=GljcJ2?f=Q*Xsu@dQ%X{4_>= z&uToJ#3uc>m{HL?qUC>grrhr)7}J(traeV@azZx#Ro#>_MjeWW#v-hUW{|bqVBaVQ zn~wD*6HM7P)El%nVVFur9_HUq; zDKj>mURBZ5(773rU4r{NF&{G*EvvCX|AmP6v{}KINkhVt?bvZt(7Df%qDQCv@x`st zC(qTnC?dC#5aB?F^DfbzdYKLO8z2(lB@~)X{-OeNG3?OQghtn7QonXdiKz3>LD7d_ z88TDJS$;(4zCR8fElWE@I3{(8F7z$=i*w(HC;v5aN@97AyHn;A7@w4E{v}$CyE}bV zqPWY@x(CBVc)w_ANf8Arfe08wG#PUAZ44+Hu_ZYvibiqs`oLD919%KvAXudq00(BQ zu4p3($R_jcFCJH;viSwmOoGZGv3wW`%rTf%z3*hMAgw>9nLLV{36%|;Cw)V6P-pwY zS$;J&S2J;^Kw7_3&lV96Uf zAm1MZa0{h`%*VFE&s3va4gVkQDXb?3a`TH`&*d_B80I+hOM-FZ{^?}>XYx85YTDh& zIb9yR2dd^RR=1-;JX*}x=Ip%_{SL4vD2_8QmdVD7W6xsh3z^MdlZSjLEqjl0FC)Js z5Av6IH8(!;)2}&vdS)HxN&%_(>Cd{JcN0vh%O6NnGt5@YoDy?~EU$h}5fe(RsKL2t z-;(Q!oR7Db5v9~%YP>lz$j%${GTm6cmbT+-%w_ZwOy1ZCfaF(`2aDKo>2KJQCq$~@ zL}KCmAexY6(UvU$Y^r&K>pd4AI&co-#$OT@JSR&{3%n~|p-U$ok1BVF+{xnMZq*R} z>94G4O|IUSp=h*WtTw7zVP`hq zS-xeSDdj2HnIXyul~%LW1_d^hR?dT)%sC?&#}zw27CG~cr%3#R%grgX`@hTJg+r-6JL;HT_i&5vP{ofevn;GqQB zNi*KjCQ*x_ZZ9LIQ)0Z(;bw}*R!Pp^2dX&0mLVbiG54^7#aYmqYpUf(shP}MXL6Wx zAqVb4%;eT8XCdd&qrn%e8oQ%mrojn4nXkxgzeB^IpUh2;3i)+7C|0y#PjAp_O4*%} z^CkVEzV_%E*E}`srEa5s(cIDnd zL5Luez8`PaB`7IpUeh?MI%+Qxnj5GDbooCGi3xDitq3`DnOdGKof~W{70&$jR89h9 zusqW^M0k&&{UzayDa!`Q{J$hb3c`IZKEE|!q09=n($E|!x9HY?Dh*fi-S~I+k)r82 zj6JKe0G;QEx%L}VcCY35y9!7@5IE^}t`Wb{1|s}6EyQtdMm~YOA^b}*(@GRhjB~m- zTwIe)&1CmbpMl&eBgITDQSed}R!$K0Tpcz8`5v}9{?X>2bi+$>Mgk=+3RbXyC?tBiQf-hBW{HcWVBV(WW#^RQAI87 zXe^F6IB5+CMgc|ol+5*v3cWdq$Wd0-7Xgti>@`G=f(}=*pcp)a2QEx$R7XN`Wj91= za&A|*uTfR)V>b1q3PrSFM1cmAQSq#@_Hq_4NSK3mJR`9Gi{xjNaKTct3brhB{;K~= z`OHIo{QF109ytq@NeF8MG=AGAQ_aR>JadD7x*Q*h)Z3@m6sly?!w&C(1j&l$s+mt!0Di zLk(2yD@BFo!Y+O7DWJCfSQCeA)=GpmO1wkVc69OCwghgix+5GCe)bD z!o1!}f%Rp$SQ7DQPhYV4KB+$w8x8u*Vx>4pFD}imJ2P_y_TwgSXG3UnAMtl)1JgE#2oL!+^G(1& zica|F|EZT@BcCFM=uyr&WILYiodm7v(e?(v$h$?WAxac~&!(zM+XIp#=bN~uB7 z=sVNa86*u%`s7t`zguZv3bQmu69S`_5w7~{`uI^ch z31#i|qm@B3wdiwU+)&eLQBc$REaYLGmF3U__@UjZPE(Oc)1O%k>&;(LCFy`!h7vRd z=Fboq_ocsV20NkGo`{bfZw(WkO%iszSISub)?6q{$cXL2q;rKn$2pBZ)klZuC;O?a zPh6=1dk7do+fMag<5LKMX7+~$d%OAC5T8XJrDv z#QF$l^LA+X(6tc>`6lHUM_tUJsDsgt%z~`xra#rPx4X#5k*oy5UJx1C1;(z4ER?we>kJ_CI7gbA4Lj&mkw{a#y5 zxlEU%k2DZV5JlhPW@9;#w-1s@#$*6-Fx(O>rRcV_Q7D{z?AjNyv|HCZHZMz3YnqU( z%LoE-6CMA?5f2EKi4IXi;+gQzAUZf-fR^&s2f9`mm5%hz7s|3AWUmovj z!B@XHMv`_Bd4Buja>jwha#X-^eHlyNOg8TlkTpsO@)Uoi@~|%qeY@h2xfDpNBiGd7 zs!%n-ex{83bV#56>IVnOE$3|NhkAPBEjwX0WyOxuj2nlf{J~kIGzF@F5)lT#U1Mpm z5QAmmk`?9#j>eTce$UO5Usui zsi~nipOF~o@3xIfcwdOKd^DMj-derJrZWMLzC(Ct5^Pmgpfi^s@I zn+Wy%e?UyTIWm9x!m@Jy)+-xFOv^1W;-CLzXHh@Pa+u&sGQs5N%|gr1_B*hzV)iA$ z*Q~+Icc>KcGqE&Ol&ut6&)|1__vC4n_l*uxPZfmvQwA>WFotHPpBh>(pLWf}UEN8& z;>QaqkrO!xDM4$#B2&(pD+z`bDeto2pXcNZm;Zfl8H}J-cC?|x(vVRCnIadJfAHMv zF|&x=q7m_Dhu)?Ktf1ZhQlKiwVL}C#=1`~PevY2e z@e>=&GXewst%Ig&SCi9rHdKb^={iP9q{;%%e4)hMDIX|LoFxTy)z&;(Y|*@z?AyAp zArfpX4O{Q;N^F*tgQ4RxR)*C_|5@ld_LFkesZm2*qaWde?WrhT&*_)nOdJREK zaU%|vl$4$k6hXh4JrrryF-ppbV{ym!TId>#KQa&#Lx8e$Det+R)zOA{W0H^m5FjRj z`Wend4u~ELLes{O87p7bo22^ib46x0{Ss-kcH+PcBfL-AvvJk62!3jBE`xk@=4~-G zG&QBZT9Hai2RhlWOMD|tPS1L@%IFHvTp<imw{MGTE%YNx$sxKJ$f(kX#mbu)~-0u4U#?MdhVMu$K^x=2Z&Y%^3oX3%=Wm=p; zYumh?{*p>=Q|HEN-VG+v69`?t;NKwB({VmF#EDnXk6W^Y=TmCcijuLiRPG8{c9r@A zL`A7`&_1&5O;GE~cg}n%n-)bG3Y_|?^q8J~kbLG-vyAXNPfc!4Y5a&E-adadTq)n- zm^`Rw@^1O`)`&yH9vmDvq(F~9fx-fa;kA;y6*60Afr_ENMd7LUiyD)1T<}qomZ0Hx z(}fmwRQac~$RF)l<7ZXfcB};#|74JS`5C6gtwgfk>cLb8FGWx|McW~Pl;-Z7{?I9vQ>fYh7zu+;6Q&0+Jm z-Bp)3WA?*9v-3PV%%+JouB^pGkY#M=WwW8Vu1uO*em0D@v#D$ea5CS2O0o-uNdF5G z9TlaGiw{o~S=ML@-IVkdsK`1$c}^M^VKg=I)XlJ1Aw!_MmA3oSw;RUCf#&_< zLc^i4+SF>*o?cX?WnOD#+$3rSza7xjqvbku-6?7xsi~!41=Am?g{21e z4E^GSAL0IFJUuXXF2M!_eLqYdf6t11!+*cR%fdZdV(c>jajbmV=49u8etQF;#wsvJ z^2<5AtCt}xlUO<6=HRswvAit=RQhzQvH5P^EbGMWvt%bQpk?%KJ^<#7b^3t==>9Kj z@7;n8vD#{3={dJ_%fr&|Z@XO6YD6XHTy)*w7uSHjrjWTI6k$ZDj$NMF7Z zyqQT!Ut4ynbITAODgDLnL$W9GUMAg<5}eL z_x$PaP2nk*J&V;uy;1Zu<+?-3W8PVM#wc z0+tgHcQK7_R-E!N)yCTetHQBOio&tnI-9|zyfOksyFmjnv1>v+?aJHl?|6+YZXt?4 z6TdyS65*u2;g6=hVVZq*k|5U_g~zDijZ@H7PRs7HG2j47s~h;}J( zU}~%}soQC?R#{7heoMdLt776@eZ$TI5o-j_Z}nIV6qbIYd9&NP_+~N3-$u(kxIU+T zOFD*YPZcI^C-Y>t1^#g$-3;3JRY5Zq;6%*wB$*yR>a)8{Q>TM7c_fzgs6B?r2px(y z)jj+evGc_OZub!65C?f$h|$fDLK%(IP~C7S<9*~{1NmBns5d>9CORk5s4(aI8xJS> z`MVLfjN(>`{h!pF(sUOyC|NYEs%xuwMtt3w=1(Et)0EYpAnG*~c;)FG(_%e}TjTFn zIr)lMyC)^PBO=7Qjc?YM)%yl=!WA^!j2b-ahwk=HFBMAoT)enT^dIi7d0+VShr{s( zNn=^%LW(31KX{?(@w)Q$gMu|R?Wuge-^BC?Pea3xgUZZ9Zu8x4{o!<1nFohuEgy&< zc_)&lm6qkq3|`O@%HRD}kY&}rN%|Dq4nayM${jU)_Giz)BNUGD3*TPm-OuF~5RA_? zwyG)1tI7mXJe3E9d`)*ga zX3AMeLYl{v?ODyP&n&~EgbQ~cWv0O2u#!L3Qf_;9=6+q}F}`$wg0Oz%xi|F9X((pZ zX;yUTtrr^Iw>Rrn>}TE0#XYWGe1#Y8U{YozV^A_^mLF#-m@;~E{igCUBMH6J|4n&l z5A(>T^?SVaVQIj4E~(|l_oWX;n_KAPkJlwxhWeY63pj~;h~hb&F4mpyYvO@S1KxQD zvZ66tvIc*h*6~?f3fB;m_p!I=Bj6Gmlw(cTNy|2cuLEoU5JB>(ebOyOCtdwQ?u-2z ziw3d~`_&g=Yjn?~C-wmYi-v8DLpO1GjlCXu--CcW@>zx>_cq4PkO=TXJ82_fkSjz= z)NtC^YWJ8tp<9Z`${aTvGc^AxV4|xk2fxN@l@_A~yq%MPm?Rxhx^( zeGZ(b#FVaaKV$w{aCjlD(Jr_mirk_gejT|9e=n5N`^dcB^2%e9?_=r<!;g z`90XdLYJeFU~CF)M7j^euGKMJOP>A@q7M9NQr}&D7SLzy{#eJlITk4xp+@c|o=~~K zUSI*^_5&&2;A02Y97F0&W^noDS355NgBKI79^rWFmf|M`bS9G1r?^GHj^_m3^Z6?#*^V=s;>;h6-R3vh{fhh8m6NO4FQ zmP74%x`PuIa5<}iT{m>86#T;l2m7ze^ltywV}pgfgpdm0`v2nxou{Xn@&MKNRxv%fe2IrelEP5A4G9HqhN;iVVsC6yr&=H-mAHGd&)) z^Vbpq_>Hi7D}UHD)yPeTc88DbLPCm3UGDA;E{`$SP5P>Cw$oYb?CaHOp}`aJ z4@tLg2~T`%^ic4woz|6vdgCu+%JG{pHE_}i!&1Z|pm;X&s>opd{lH4ER}|N8lqYZ; z4=bTVYdrwvQGW1os0F5RSJJ)EyS<2x?Wez_Umrv(`z5NkZz@LbahZ9oAmI=c90*&b zpVJvqnl-qt#7;9Efp6O+Mhdd+@a>MXPb%qEmAQ4!kF&pw#w)(W^kVhsyEiP4+Pb~2 zi&`^0A_=sldQCs>CLSoU5bWo}-M!_14M_)+E}^M^S$Q=Ep3`-0txLG|+p+{ejb+6w@6p zSa+Y_GiG}Z(L}|{OE1t8dxXCGD}I7X%1nm}-S1_VI(>^G5sP@a64=(Xds)Sk=BS@w zp6^rP`n!AIA3b2lNCUV|bKe|1Q^*kR>{S7fNe6!LM(CEI#K<-ho$=oXN@*M0RhT7Q zpSw-qa38q8WcQ4b)2~9t1@B_PzwtkxZ2E5&N93R%ywH>XIY2>HWsyXW6qHjMl$-!8 z73Y$Z2@}0fSk||VsL)^S{aFr+KYYbS{Ep98VprTT9OStm{LG#nQWO6&a)oD#Z#0xb!zd2*PfqPd!tJ zku*o8Xex5d9AU!EfPSoJ??Z{}e;M(EyvTg>l*3zT6v`$gN$ywIl=}xkuMt$(rkkj9 zw9WOY$LC!(x9Ru}CKdE{$10+iA3HV|&!0~JibmaFBJQP)1 z{=J6nMfWSwVfc?!${D+_V+Y)VuzXtXdaGJlg;?CArc|v-V6H_4ZPC{eJs#u3^Bq{m z`24+nERh?HpcBnnxKBlJ-Q;T)^2;g-vd&@YhW}3T=aMsOxn($0-IB&w*e%*5G>ciy z|L*uJ{lV4N90gCJf0?>nY8H4!KC!*_pQCc$_ga9N?1qy4!AlP^vOXiVVa{SH)|;I z@o%l`(dd&TB0wLu{JVLBBV(MJaQtNGwAX)T_OYQ7y6pSLjkydVL%p7DOIiD{o~m6y z!o{9pLpRMK>+$wy;JW{kW$LK2F3kAFnWvk7*S!I9OC^)YX1mSR*eD~Ge5mC9o=l{1%$LM#s-=N<-FY#Xwn5)g`*zChr9tgUevUWk8}W&s$Siwr zCbryA%EETTFA7BX>+9kAHy_-{Xr5qIsw$Itk$MpbXu`jveXa`W>}M%%!PS1^U_&ro)tOq|mi$5<&_!?R9Y-P8FZyYxCK{ zFj1l=NGIO~j&g|6*?X2ZIuab_F9EEcTHN5MxlpV{ce%iwLwN(elL23~Y;55D;+9<8 z31qTh46*o?aUnxmWHTUxGobKMX_K?8>9s;*cCp`Ef}vuT-;jYQL8WkdbPr0;o$k^| z-XR_0SvnH!TjtNd%-J`hSFK||O-S`|^wNUYc7rm_DmC>~6 z7YHt&rq71=RWyZs}59R6_R_ebG!Q#O(lr3Kgai=HCI8tFI%gAZXj-C<L>VI`TV00H-m26fDVWk!P0;S&4H$gjvP=>)Y#M!y4!`9NfJkT|^1 zUv_3&EKeiLuT7S^l`w4H5asY%?&|m&OH05Qo?W@Q4 z=t)5Xn6n{$;$VKd;V&t{!M7pfEuBsO-F`41_6oy^YDd9TnFnTU=S;Vl;s?Pm9p3_t zqITXLx!H6O)RD>z~ilxEmaW^l{fjq^af)=)l7n+v{s81@nDLv8!4GQ_=hT*qah+B67th*udmDNtm`^rnWbNj-GY*&U}SO zav&%~UlJaQ$KV=0HZjj-IK3+_)=Fdn3G6=RAgDG@we5ar6gRxZ3;KMO3N`(;JD92b z$+pWv6^fqIbV0_d#cI>NEo%>WaYo=6{AD^e>|W#L6&a>NnnUSi`A-b{+OgV!?bzVj z{7b*$T^f++F!9}N{9y6?Jy)USRwYTc3u<~<3*Kw>-J&9^>$*8uX|f1w$WgUhD!LH_ zMyrJ?F<@KPn%d+&CQ06&vHeHI{G@C?Vi zdu&^V6A}4)Q(xm&-M_p5>e?zF^a%49h3LttG#+5DyecL*XBz3sNKjRDuh8KIu(%$L zgx#f}Pum=8`7iUOj1$^yKy8F~-FMtE+xV@!Je;!&MJF%emWuYY-``5Qi+ru?vHJLS?<<_#b}Hx?QW zp}17s0_d4NH!wx@aeG5tdsB`<^ul?!tC6y%1+R?}6>PnqsjC|!hSV-padODGW&fsC#kU%jwA53hgpjkbA)Q}$^|5e>eCea@}?@b4(v_Arpzvhm~m z?p?n6tF0iO@cW4b2EGum6f}GXfn}Xu5-rn?YA0KqdsV8w8NJ>Rixb?N#^SC7@8K=T zE(-*6Mw7Y5N$x*+D;?{RKc^xXe9AIfIwc}YD8XavXP-g)1NOsEMOU0?DB`N3jhOYd zLrYoY_CTZf%Q$q{h!_Xup{N!quM$s&(t?^lS56kJE-8PlOuY~ zqx`mQ3tfFglnIleQ}Blo6J@98br!Yalcc#DyRxPn?6`iNYq+6{wo~J;^X$K*^m(SS;c)I)SB9{20%pb6i}8e{0`J>sXZE;;BsH zfd{4){jvYtkkAsB43=VP?hEAj10|wD239Y$8^M4+Ow$@>H=G*5wBjXy__CvW)Yvv| zA4+-$rUfYM73LUU-keD+od_}as8F~$ln7a~-kwHrpenb10ZRbz70%pR=Fx#lTBYw|c< z;6a5oAs(|ozj!r@(WP7T`9h2l_+mZCYweiM%*;IB6OUN@?pEQ0eYD;^?33>3iz}K5wRZ+GeuV!?#tya|ZJ5n)RMxSJu~ZHJo@qq< zS;b2(g3PY_dC`xbS*jkf55UzF-6|~8z{<}=ERF%|Ih-Ms3^^tMtD2XT{2Rv?J1Tlp zD}>qYgvWW4EoMhG4?k|Y*>}Aa%`EDEng|uCs$bZ@zGor&oU98|8;B`?OXgjM1mT2^ zO2xRB$@nhBT&{p4VmBX4tQ*r)UqWA@>P8nmho(aT>;i>w{fl5_TFaV>334IlKoTXPL!49jh0_pKsrCaeZH5GU9n`Jr>8kH~sqikDq9D$|}t2 zw;^Aq?@Rp@^A_|QKHxt8bdR}&mrpsTMDi}SK49?I?c&0fxpIAtz( zffnlD%6?lemMqRUkI`S}{=#CEkGy z{DP(56KH!glqdw&#;7BT#1kUe^{xZkR<&822gN6xCAB!W9rebncbsDJ(u{L=nT~3l z0>58X3O7D-iv3k+<;#_sorE=1J5el!@321azQ5VV&pvyuhrdK*WhA%+_B%tx8K(00 zELu01MFD4CYrdl4alnJ(uN$oMDdIbD%NLCln?54SOScDNa?r7{7pM~{Cejd#{m^8{ zm{w$fk%|HzWlZfQ&)!j)#QwOiDIYbTq2TwP>mJKO2&?E7!JxJlYWtQhR{d^8^qs2_ ze$xH08Ak(J;Y)(^;bIHLfSfT#qviGGc67`YYih>fc;bk6zS*=B|cmM7ONIu6m+ujMs^9L9xuyWFHN?0lv)vo z3@)Bq#G89lftbHmOoLlu^o)CgSqWvQaQc`Gp^pr^hF1q8l&SuiHw^%ygRX3H0Ca;cy7_;DklYX_2e|Z~{}Z`| zq5c!XqyZRL=;H5$*oVQj_#504!nz5HAC3aT#noCJ4~$=JOiWDi@Qq%dk=ciRaY=>8a^F?DGIXStKHJ?ytHPLd@B>$)Yw{@^hn(M`oU4=5>olFxU^hhfuXp+YUgc zV7QT5r%B!S=9%BD38zwE(Y_8#%$>YrLPy6fqEFUGwuNh)-1Lwb9A&-?Tyx3G#tPPN}?A$|P#v5}hz2D{FBGF21_ zFK88=5<*^odL44(X=76?$9btJSptE8U?0ZFv)Ey;{VmwLopt{0CxTm41DJ3dcc(E) zP+!BK74G5^W3TyYSwG%)eE0{tAJM-X*Y1D*1CtHy2B72IW%IE4NEqIJp@5zzo`J!T zmm#5I;F;$(l)I7Px{TifN2jUnuLaU{4tTnFLO&xEwA&XkvN&J;zIJ(P%H(GQ82{77 zm5sbJKdXu9)j#V!tPTS2lub@f9=dXm*@fwfI!Om=MCMt?RxGj+FM6FiP+H*%tP#sX6#R+jVDb`xGCv_{YdRJ@sfeU13x|cHYiDMW|xj02%xeL zB<$C_K=tL-poV099(x{N9yUAYf@HmB*VZ-5$m~l}<9ihSQk2Q1s~-}7NGvNYy~cbJ z+E2rLhA+yAyCn7%Wa%`$RH)42PpiS)$JosADNc*pv`r9N1igF@+i&|p(R;1Vb`Z5XF#-g5~C zOUgW1DX>=LJ?=iJ077sbffja9smoW|nVR!om%ULho4?A;;kbw7Qv-ES3NRpdU_4V@4h`Kgeqy3!gH(HpUq`b#m5_K`^5 zW8I{d)#bvRjP~S=bUv-Y>dI04!M5e)_G{~$i4LuGbtc1PUlYOb^z(@)V}JO6MDKsg_{0>}s~A^g!2OuKDWm zXOq?63_O3Mv$3|eW;D~d!ecaq8JM$UDJl$g)k2dnGSV>EQTvJMkl_n}*^S|Lb|#(x z(YXFv9(o4TCneRW+h$|BAT2dy@e!npL*D2o*aFgF6Vgo=AeaMpAX}APR#r9Y zP(9K!z=v%mJ*I`;u>(7=k0W%n74ExKU!|B2j-otWVgv8>_VAuJ#Vc zj3J4uG7s;aHGgllCHwxZU?*G9v{ZBT!+)gfjlwM-orr&nKLc8JH$J-KifvoriJ=p> z-xkf7fV90VPr)m9TbP+^pMPUAVnujAbVBCu0})Q3gq>%TKFIYqrho&lM2(|8_wCM-n0P z3n^$IiUij4HyZ8>=*W%srC7&tCp!ow z{}u?ew{2+k={C^k>Ll~lW;WKJL$^7)N>DE)Eggc&+)?Ep1iA9?SIQ& zd2nKWqyZQe+&bQ(f$i+<>;td5m%X_N{PNmIMCGY7Ix(xiC-Hh&AkdK=DgXmtzXo1G z7M-=&CZptlvaoQ=U`+kU3A*obxF{y%D02|Pj|I233RkFz+5^UiYSv*@HJ=t{NqQjA z-AlVRZ~shG!^e>EArbf6=K6e3Cc~7Ci&uG14(P8wVuOl3qZh_-1N4_ljX7V79nH4g zsvR4}$%L5YlCK1H=kEF^O6< zL32POZQAnzbSDfL3 zuB7A3&G%9IBoJt-As7U@_v2Svd@=Rk*@iII;Os{S2;7_VF)``m;@?uL%k?OLF4n5N zpT-C5h|tkb!JLol{C7)Po^q_E!K|yXYoQZ$0^S1NZ!EA&icI7ecdyy7eW;6<66YW}}j2A850K z))OIwXY&ec*2k9&7a9Ub2z634LX9r^Y6QDlpT&He%!7I#;=!t`9LRj#t)RR2L+y5g z)RSS!nd&yX-GwRcP^Nnz(>;u*l-bN1iza_hKNqhP*DW;xZO=B$n@VB-@{F|e4EnVV>R=Fy~)9YwWOwJchirkDY5$P#y^mP>R+;>_^7hUuHk=%m|8uSM_>0-vYNkjQ1e`f?fRye+~E^$l2dixSPS7P9q=R)cKa9a&(hw9n3o6L)G*Rf zjd>KCtS9uSrViW@@@$t=ezR^IdxwU5!O46-4cvUx+Y07U5Xe@2KOhgM`z64*=tc;3 zpHj24xxuWFe^F6=*f4^Lj%botx}1o5MnJ{C#i}YcPqV^f7}y>25M}HZi+yg2mL0!?t^9}6_}qTb;5YI7njaNWm|X<;^tj`kNW+f6<}2E0UILqlk?jd}c8?ViC#1RmmKVVO_KqCAq-tkCYK!_HSGcDB1N@+W zel6hx#?=ACCoZyzUgUHy|C#lXiCv((k|qxNO*#3Efvp4$FGB6hC@~hRp{gFjRO?!Q zQ+{IZO3?5;n7k^{unxlagI>=$UEN znwz2V60}q<=l=1oQvG{2vgortPQxW$WvK4wu?V7%Y3O!}U#vz=%YmIukbAcb7?K3` z3^qlEG7ooL=nO%<_+cmIJyMi?uXlSd^xRH|jh0?%EG1B22UqGIN8h&jUho86>Xk6hIOwOd^*zirb11FxgzD4` zW-oz2HQa$rBaFt-8uGI0{=!I?m4Y*ay6KXwPd`bX>Q8pSPr!dP9%(z51<$a1Y6j0O z|9RN{UYkDNrjy}=d-_1TKKJ|2z{(xxF}jV;gOW*os-0u3_b%6TkCQF}mifmsfQlJ?5b_onDRbdex7*BC>i#9*<<@2 zn?yNkqC>!GqZHVQ)t5V;q>+kCzP?k*d)ppPgvuBoS~MNY9Wo%;&y~`N!(LlqYbhD*K~Uf?vOs_`m8)4v!xktMn-sLXc*%3wSxmA7cYq0{^*W1Bf znmKsircpcc&HbaK((-L2FX?Ba4W~h%&#tEdM1=H7`q#-S6+xoYlIN^lvFt`Mni! zZ_${g=TXP0KuNEB2;oesCguWx5?x(9H?HpPvWTIa2GDX_w>-k;ZtRE@)`wM7gnX;3qG$!>ICN(OV~qvGI*}iA?Mhvy5u(H z)N_Oi+X%$4dQVVOYZT`0(#>ISMYp9$^}+GCva1c ze)>WAU5*efADhc$Q2hclO1lC!jkoEve*r=HJj3G#yHXj0nqC>H^CnK#eYC07&y)Bm zW|bi^2G`A19|)W!?3udq9O1lwUvZd`9(X#!wRlL>;7_sIXs)m$hAM9Tx+^70Pc68n zOP4=zGXn~bwLbekXJ0Tblcw0o*8|La%R-l@@sy+ntQyn# zC!@dMDC1*;Wo~hd@VW2Ix9+#|o5~d@E)5tUFY69r=<4t1`pzi^`>bV+aQ}U83lbMN zJU@$5c=Dv~LU5&9rfG5}j2;aQ0|&`mzQKCx*ahy=+{0)}^Doc(t&}Ep$<8|>G}vlM zr#UZ60ga&}qB)xGE|Mm9Z6N3P>zz%i#+bUfP39+ZO}~w*ET8A$_u3LZt6{GAi<2jG zN$=}AT%>(4Fr}`ZBrme%$Xxc1|8IT|*U&V<{8ZJ(-vY#wHx-pZ28#D{6gfqQ)W5!l zIjcz^Y~EyV&98eLRp~a~p+o#2ulNZAr49xccsdJ@?3l@{2}nE?B||A!#d4IgSoI>C z@ZkI+qm{Afg*@0@@h!K9=bZ=VTl!tjgjKGlkf>jim7V8I5(WlgK9^148HWG3wt1IC z4xjZXQ)O+EHk29UGZ7B?O_GahsLLgE`|e_NqBoBfIp_j`r3S=fGgqw}0v1 z8~e{GpuGm1y80^7NbJkZOJb18q>rP2-Id=NnMN3F5tdIkvrx^InMPu&?X4!h;OAZ? zfr4Y?Q|by`Hc7B!ombA-*uZD{O1_62tT7pvpb{rm+m%w!r=k&iYh&;=;zdOt=kBNv z(pfs51q9aF%oG&q6lhn|oL4N1f-X>|D2U3XNn5OA^j~n2LtO2LR-zy)3v_q8m6eYq zOBKU$SH?qmcR9u<$0tck{k_1<+8mNJ{Gs}=-+wF_|ve`7K{X?(~J zYB}b5lD9cu8%c_ti+*8-bXjbmspa(-MAy1&XQad`wN*l-4G=MPcJ5Ar+A-Y|B}l=Q zZ^G{1-7A}tQ#NHQNPS*j(W7g^05{$c!t7Zzbp7X0V1o?=L|mdg zg_=`-W&tjmh{@NFjfy!kj#Z+_Fovpzn4Gus%Sm22yX5shqz*x}-G$hNu6NA$2pI|~ zc2o*FeoS#8rE%%P+>(nL#){`3(0ntXI4kyT`A;;2PZ$4kjfJp$YMjCScUf)fkOe-HJbQ0Z-VqKHnQ_QNK`li@4s?U}r7mlu-JAS?D^_^-pY%<{g z#6c>l2@fyuZDpjcZm!*bZ4&z0B4^#SbwXC4Cm*u4)?Y zWO!oOq}NOtV=%NL;wl9?SD9@eE<+itp#uBqx*(36uc0YVv2-z~?jNFl6ZAKx#<1C+ zCQ|#8BN2Q1xL!F5=|f#nQobdh5MKFFRf_jW0eYJm$;@)qD9Gz?a!!8dm1}Q;Pi07m5KFyZ3M2ksYjkCMj425;ap2I{EF@ zcR!8BUT~+?7GS^*3}7{V-kZGmVuei;JR*6yp8n5`vYTF+`k_;oM(}ZQn?T^bG)ds8 z-JgzhO^FoB<@P9u)3I))vdXKi}^#J^^Ow%?_#4s}_+{%c5I>k4`5|dQh6fU%gzq0WO-=^?1 zc|GM*>V)im0Ku?+a%_Ofvq+psn_z23CP$x&XE(JJRqxox9&b8~{_v7r`-~GazpO_) zgDM$TOyfjH79yBiK(R-5f_xVN8102MSoK9$+=WXx&yg8&S(lIToXc2RBH`1bO~kLS z&%O)u%D1uo-X#7mZkSv0pXf|UVcvZ?Z&}8P=@57%0&=6sndn2zR7qi6q@W6B>JY-U z@38Ee=r{LlaX#1id_CIuP{hKR9zP&~hfCm(5d4e2;-gS20L$0L=#r$8o-N1BGorCE z`<4V~cQRLRFR0tv8BoC~;|^c>SrsP{XRq3P&sBB9UK%z(xe++lqKyOmb33KiGehyK z3|?Num=m=4u!^~7&f?!AiNDXJVAqqE4hqIBr5zyhh@-Beum^!|A)LwYp$j-(X`=bN zoI&csN+~sT&U!sc^zGH@0l_E9BGbvh*l!V zCv*B{eM2o6`A44t)J~4pM$PzcImC;r$l@pwl?nc*epe9A5)|_}-!%@oDL+qNOQ12ly(-3@wo5N|}nV^iJVcd8}kAWW~? zyvme~`Tb>9c#)(_c90X{2sJ0ivpANX_$dPVUa_n3`kv-2cU~5LZ=gUTB=^axT}jN=s2wAWiom~7rKxRAW zX9{W-^f1Sy#6|Yt`s(6@4D*NEvyS!N6l?9s$n~p}NyV!XY>0}!|M3$#q;z+WfPes4 ztoE*NOQv^k+gmaCo-_>=tP0^tt`<})A}B+t7qI%VtRz6k!wo$1YF%BPS>TbM;c-}% z^sVVp9YJc;C)7x{Y$NmmwEV6+#SNZ!JtCAgl>#5P3)dI`>o+M!!!`FQ%XSUgqo6c! z$3_EGYfw{$CfCuLL4iBvLjiDP1zl5x;^=DSWg&=VuI`~`j*t*8yLJao&6wRBWeA!K;O;M8faXC1ls=%`&e*+x&?;XnKf820N!B z#=z$ZLe!6LE@j>ez>qJ743Vf1d>M62=Ak$>92Fx^NJuy^m|VhlU3YMM2GV_zfc(e%nUB-=-bR`uYBUgh5CE8Ut-%kF)PSlF z)g*CNMLd{GXo1B=*MxR6Mk@L~R{vI3O45O<$wla|Tw$*&H>MjbfOiiz#yGWd&8uM2 z2^W-K2$hK+vk2@KSJeW?R=P&+2&y0^_A-tj3^*22s+{#n64sPKG1UVlL|5nLqsw;n z4RST~Zy6AvnT$N*$N4p7LXOkREF}u{ zu;hN2D=r{HxW<*iQ2#kXTCg!73BPp>szfG#C?a1pVDXO1jy!$_X=Q#br)k;p5hyup zJl%j1+)=lf6q^doB4SdEHDv$-ww7Hzy4<8Wu<)zJ)qt@Rq^$e+H0W;M34A_uPb$GY zWwiyeTTEgwPtnw*0_U{S!$*CxqXbjPuj&)L{ z%xe!U>mFJDREnitZi_;s&BEDc{AyNXD+DDBtdXaNHpgF$C8f;9lA-uAmY@vb%6(9u zI~LmAh8PPMG?Iwdyx@=T7$sTOi=-jABT00fm+7s&MgL3M|RS157J&}&C^M4yD zCt<}6YlN!B%R#$jVcvGON2eH6#X&;mH3s_MCHQxV5r!JxYlB%3hln}oZF+Z%S^jda zqzZ@VF+2J|bKc(xWjW#;Jjo5O0<%NqxW%?5ajHbC8dXk1Z>TWh{<90d{7IHP>Xj8U z^rxVXig;#)WImB~P^12!TsL4M9D!q@M-uw_GltHPC>7p!@E#%4XC0@P0nNg*#0c|I z3k4hjZswC!#LU-FfSau7WUhnlQEP6K<}~m3yFaHP`4H%udHNHjc*st(z~ifeKT0(G zm~vFnp^2?5g|K5s^wq>!X^(0I!+}jg*l_i_a+cYqF?JucvPylQi+8n_{n^}#hk8Do zgXIOg^6lu?$-$SPY6srQ$SZ2N@QQT-7i5;Gl^A9PRxVouG5Ia+nul0JE%2qucat{x zwcRJ=jdCy9e9?r@cj!!f1R{<~dh5U2YAE(pF8Rz31rpQi6oALHUUy=0J6bQDUr6DM z-kY$aad&ovNs{Fim;2P)H<-f22}MS9sh_AWMr~^8O~;~~HtccY{vQToZ@< zI~=JOnv6;+?wlt|NO1{;>rNXj5WWoxU%j5dUt%8D3bdfDM6d>HQkItQ3`L(Vp;>sTo3I>dy+-b6?Hpnt; ziYa3iR~%B#@&i9-{!|zY0W@(wy5VxL@?z3w1;vJt;ST&nEcxphH3tlROrQSCa=7J3CZ{cKgZx%moW`htFp@|f{!i=12fTq;Dww(fm>i) zHr=2ieqz>s3}DMlI(bo#e6x)8;3abq+#0`A<$rh+Sqdu4DScT&algl$*R&hSVEMis z16Pa?!=vAva~N$8XHipQQ}?{_$%mJE!MRJUIe9n@ircE}{L{c>1Sk1moN*Z@{l9tF#(_3-$Dw%r+^M<-~Kyi;7XRt&;y35w~+R(q>?@%e= zU}x<2KN-z}3u*Z_(82|9T9u1Zww1&ZR4{^WWd178XCfvCC209gID|X**ySw3!t2to zh_(FV1ArZZd5~l`Ro#bm7KI~K3#G{o zgjFW6z>jR~D>KPDVO8m2d!EfwQ9+nFPZbl!E$|g2{#x40Q!B-?zMAtv=bB#E*zy?w zqGx|OuQ7`k4G67@^v0zug9VJ;XO{-{xF>!|t-82RS#j?9`4#O52PuhXO#*C79;C-? z&S8JbsefkrOB{y}6=s<;)^q{42i@Z}=e2%Fe??`ZQz=e=K=MXy>N<<-7>5zQU{zF@ zLRtqysxY5+!WjN8Cp3Ne85GO13AyD~WnPtK8T046t^p#B#=00<`8J;p&nXVi7Yz9S zB>VgSM6`)uay$4Z+senZXe-fJe4LJqgBpFIEoN*5riOVl8{)4>Z?oN$;U1eZ99fF$ z(MdFa?s^mlpf=x0(K0SYwk1Rtna+29RgQ|yDKqvq6e40k>E5Zx>HZF97uHjR(j#IL zCJ<@d6!6x0M*|e?^6a*1SJX4_*p_t2fYMPfi}><`$@3;x#F0O7_IHppv2d2B0_yu{ zgT0eHjc`tp95Fwl*_6__XiA9P(>rf72EiBgsf+gL63#|6OCjIb*xTH*eP#Ti1y5ry zSTS`7XI3;Y=72ss8>F`EqL0%z1B|1ORuaQ`ir;qXp;|`Y0nAg)jA)|S=#Yd*!ikBT zH=~0ce(8Z8wwvxtYtRlpqGyM2V}|i5ijO$Z7CE-Eq(}SbC#PVR_gkHC0OWW@@+|0K zzy8ng99T4*97+VVLL@hsD zVqe(xEmy5I&UdUD6}ptbP!^~NpL8YcRh0slLz-b4hXt>jUl5U`rmB31X}LJQPc^QB zh7QDMrq!w$W%#K{Bv}4((`w3nn|cz%PS`W?lQbl6>FEZ)y67*sQrZaUr*58A^~EV7 zvH&`fx%ZBKAq6!Fte+TGKePPzIv{wn1iml#jC$zdKW;96mU(m}nIkBtsD3BhEu7!E zH4ktz_^n4L1@KC`0btw|q%!BhsM?AQk$P)9b#EU4Z)DQ}4CY!%^osZ^x8GaZTL=m-T|}3|H}qH>b2i;!B?Tbp1rmfee{w}1b{CEppmz5J1lL!AeDwM z2aXPfMf2(KH}XyXnA<^42i@?`Tzf3pK)C3B>!|0sN%tFB4Ev4oW@^J==-xu*1$oAD z-Jv>Lho?>c@1&1$`wl6h&pB+nTp{N^=aOODGNQTsR+t%)deAe@gb?TQgc>S`fF zs4IktH~_mb_|Vf9f56(Rpxi=sA@qeM!y%v2VvyH!tD8LU?0=|>Inzj=6`4GZ%c-Y z)(!qL%z(lxp$qCHTFAOjv{4#n%@SKaGr*5!xg5W>2=8efA zHBt;1BjRR-E&RVW;)KAblD_anKj`WVfPnk}6XJ&GZBA-`=(lMY|0@r;MgLkNpEVWf zC|=SE9F4Rs@xq;|sg6n5ljf+es5rh3eh(0*T2O<`A8!?FN)PQp!b+gF-nUVX z1PPbg)~Gj^?CeYRl^%t3xXc;nCa)=FS>~TpER3Q-=3L-=ZS2Jk_LJbY{0yXjEv<3M z`S~Lt&<1`xpwxs-ocK|A8?NeUFdo#Th6CKtz~UEg33~Xqe<5RfI2-M{i%HP>?J?Yt zS7|3R&-2PgA!?Kd_Rl>m4e#c(;4M9F#^y&NdG~SvzpTK#+i-!XfQM1}+Tz44R&~LX z3TX#&IME+NnGe5!WJb^w@&NTDoDSWWMTpkOoLO;YnW}v;0^|?p%JrGnnMOcW3opJ{ z;28pTPN*Osg`XAZA(?|8@bS@qr12p@8fzt*KdCdq&q$DS6r~7+`0@{Eho_mylX;3N zME4(=$a4&}Fu<#*cy8o`iu9{gW`*lEovK*(Mo>4x`zl78ksg?Nq=yk%32}C;qOY9H zu|e(JFa61HgX1s>^#G$l^Ce-|;JJCJr_pHHW12{7zW*EqVkwbKBQ(soc7zl6kV`q$ zq6-G@ZMb#CR@Vpvm9tTPCn=#d5q&}YW!psUZ?SKX6d!UOjZHJ`iN?%dZ^|P5L|8KT zZd&HdCWj~Wp0kZ~F4I{JHXIg(YKSFqvF;zk=*t4(C~L7kR8qYoFD2rl-NZauEodKSq`E<fE^FjIK@h2UK7qWH)P#`1;6Pe18bE%`&F#w`O^p=$a$<)M{3wx)%=?z)r^e1KCM zK@E@P$Dq7kF0AQytOM*9bJ?as(->gvSC_b%Ll*#qUyr-mZkBQdH zV0Tlke=L3gk@>zcTy&MXY6SX?sg#X|o(VjHWe+48-_`H4lL4t8#`Zm)5$8F)FxIH9 zy(?LF+vXhyR1)%Q`g4N;K?V!)o z$B!4h1o9jUPm4snJ_k{oU}pA!j)>V)c~!Dxr)W`S#x{-=bbsn~mDz#P2>hDoF?>|h zL`<@M%}rwC!VS5#{xb2uKZ_!)&CiK~&f}tMmtqWBRZHB5^7^`DEk5^4ucyc*A+}(YJqTId8YaDIUuPu3)tiAh#t3=SFOJXFu6$8 z2r0L|*eRBX*=k&TkTxV-)5@>L)*qMPuhDVQ5r2UI7^CYiQ1|iPzUf5~h1VQzN%@Xk zNONE%WSVL0=nGBQ^Syj|T}T%Rm!;wsrP2#T#c|7Nedvu@v^`qh=H|%cW^`;b2ViZu zKsa3Q)s?zzYrwUY%RZ-QSA1bENHmYn+Ze#FIaK_>v~p|e{VAwfp`5T+uEge$N{Xuf z;tdLFM&~FV*5uqph!v6U?|tt$BI;@hi(V^7*hd>M-%KpCuQlDJFA}0OS-9;b}kUsVherl(_?d}vpSnP(o z@Py1dQ_h?i7x0NT$RJEgA2Ibkh^$|PgB?4R#ujMJysgB{leT1L?OdU8eBVTxL4JPZ zcw){}Q|QbcP5O`{Z^Dn-6F_)G0vi>>+_GAK|FgEJ7JD{D#r@Uuvso_S`=azXj77n95vc8N(n^%q+W$anV5=+N0uE9et3(rmF%@3)m#cJg!E^<=(0pi%G} z+}Tma66Nsi_E;G*b3}Wddn}FBaG3GZxG*xcW2u@lV`Ja;>A>}0h$PS%R1RJcAqoU| zPak>xJ3|hV2lKtugi8E9{XI?)XX(!GZ>=BCEb*)4M7SBuQvP+lOrY!osbe++q}fuf zYYTDNJ?_WGex~$7-Ks;fi(rcJ*KHD|RiefvRQ7Ew#chxV)bfkRuY4_tY)Ite1+;~+ zo`odthl~xkb^Pn`s}%ITv?FnBwE1 zcWL&|7AuTei-Fodwv3t56(WC;S~GhfLVDi^6HJ)e`mo%4kJScuK|?1?*&i!=wc%@? z|8+ig3ux*OO3pNu3NsVUKQPXJFlW$GfLgyB=>e`cN&Kl0amy~~)giZjMy8QQkbzf# z31jNf)}8XrHzNbj$m9t&36%zD>+btVVhFP?YS)@-={22+%bjavdA!|qCb z-uu|kXrm*}mggXL#*FxIl%Oz1VSGM6<#d~Qxg-Z$%JNzJz<#nl!hoxQEf;VKHW0@e z>CqGZ#(rObkne*(Td-s$pv&#@(=qW~76!$r(85Jq{j!Vdv>!i)V5|MpNpb8br8JL$ z%NusGT1u0@YhAKT85cBEZE-pELx|9l%&pzJJ1E~)dlE0HoVv7DeEK|ebl(R*sVHZ!M6# zRXn$ouKT8x7%g7d|KUk-`TOKA!xOn?Sz*HPCx@9ES#kWV<6ScestIqMt4z;EX|r!7 zNFH5ua<%Q1@N@%mx>0YUIy~p%a2;Uq2nHHwzuuKau1~yuf7cfqu(TOpYkO0ZQbnX} zyEe%y7fFhjR&CI`IMQRAOD_L_iZ|^2;gZ|t=&qbqaBFeF>WdBB@_Gj^bX#9-O3c1^ zm~!#ZRFZ^}j#XOHa;e~5t^syW4Q7MDkZ>b2CT8pNf3k!&nGwQOBXV}=q7!_fOPj#- zJN(zfbh?n|hh1MZQP5QUV)IlLmy7;6->O)K*>1Mj_Buu;YaE-zs@aH@r6zKP?1c^X zKT??D7#au^lC~ME)K=>}{Z=(7|G1UHz%AkGS8E01YC5%TFxe^wj!?^8ZfI`UC}%{HBPKF z#e4nTnY831tkee=Fai6D5r2pROqJL{`_1s$}OsBM*h~*O~Z5%7YS_X+z{QuCc`gE#VY1h(6Th@z;Q8^90 zg`jS^Ev~r}LxCCA`I(~Ylz&dHz%x3Xqr8r_<$ZJzT}VlSkR069cy?&dXukHVt>l9w z?kCk#?_UU%R-paprDx+V8OWB3*NQia2zAc-*dvEFd42R%sICU&`bBJ1w@$q^h`a(4?gN%nNgV zWALRN!|{#|M^8*js&Q*WI9w;W!>uSf$lrN=btI$ps^ynnYv_%(*GiE@RTS)m?Q-W{bY+J1dxWgw&@d zgvj94Nr;&&M8!g2O%buBe7c7=z_7Qd-DouLYJ6}Z=l=Hf+cBZ!<&==+UtGWJQzt6Z zLcsWVul#a_)Z1#PH@XV}DR%bZ1yT|e?qbTg^K7UJ$tPqS*dU&;on%G5VkYMXcNo7H zEb2P2AGTW1kD12wGJ>R*UZ(Ss6*F!38ztM<=JCE=G?L8 zbse`_IN2BTR!&dJ@!9)DIV$J#BIPZU@fgj0wi!NwmKh6QqA-5J`LNH+Xb~5W(kFV{ zaT_Ixow&{jc1g(H+G*%~W-@dCB99rj!K`B1yjZyC@T3SrsFM4$vK;;O1APm)jfP7y zmt$8K&b>uWoa+da=6Qvh@}H^26h>usV=_BIpXZKS(UpM8J_Tp#8qrLL-;jRY{obF0 zF8c`^47IqXu-5W}1eJYJM?;Dong)!X9?HXBBNl#DXz)#vG{aPjnl1(0(ua}it1&m( zb*iDqiWLh49uWo^#!0KsqH;y${{&R7_E20~_g{Cezq0nN^HQkXnEEnzN_ES`2CIL& zo4&QpN$C-G+d7;IS1dTqW$%g>GrT2hcGie=@QcG3x=B9EfjVRyY;E#^Iy1KS4_Eb$ zD?r-9NrdK7Z(Q;Fng(AN%JX2oDY@4fMVfc(FA<2H!u=rifX##gNaSxh`s?TOA)+q1 zTWyDF*3YIdSD$Z5-1AQNxgiGiat+yKvdiY^0N6$XY1`x(*^jWq2MAIx1t{f@=H0s^ zn^r_#J;%@0a+6#9d&zZiD2P}K*tPCeMi0x^)57YXz2+|sA#88FZ0=)Wnp^yq{w6=z zZxC{2p9`>mTk{&PDa7mY*D*h`jK7U0RjXoLl@Yo}m!6pg z2bpSzvm_tLFQi?GD8AoC+Ak7#@VYj-F1a%#yMkl;(J2=uYm)uM?o2IqnlmD<4Qq}o zxNCtvj}`&-pm!k&_Gb8HUe?>;B$@-p)eAz$D$ig&Ui9Iu zWX(a&l`A%j>BglVNhp&3rf-M@FKTs~els);Fc2l_ulV;rF7^!zr3w%1ts!D}72WfU zDZT^LdgbhjEHctbQ%YA77AH4fI5B-#zX5u6P=d7^%G5gWNtWXKqU-R)Nrt_8Fm3YM zsj;;7cXPe7zT}=?VNOzp{V%(;2t@%Q+k;*BOw01Vi&pTIoD9Rk{Li zh;ncOney28=<;Vh{FQpZV2#HMs%G+$tiBitN+etL5(>Zi=teGm9YTd3%+_0wHmH(DG2UHW@F>_yL{Jt!oZaSnDI;*d$_;_N?_c&c4nWdokGAI9R zaB3Vg0^w6mdUa*t$%}~oK}AKS=HOhbT72(?({yI`R z6P!1&(Domr-zy--2&l3#%NMrE?H%PzXKj2@ndud=2QSfzzG@jglm?k}*ns6DDyM%S z(KN-3XfbNad>5h}%UNN51yLRHh4YVGWKF9|cD*Y#Ml;W0ZtQ|?u>z}hu8jv(D@AB# zHR}-E3b-G8TGF`Q)JfG0PL$W1SFP7m=a028Z0enpK=+R^Li%ggsXf7E{3UpjcaclA z6~(SBjHja)%K8g!>j=KzoJn0NBrii2Y62Mp3sFX7VQ-DD*YJ?NE-%N~8%cTuaW?t! zJQgi0`-#yTCvuO|(QEw$6f8x-^$$d~t~L1-pS>*{ArdieJK-YHr1i!>b$=7AL^yRR z6ku*GZ|Vm43`?{aKQk>_ZwqDL2@+L&TuU{b=`FyYjUAWWSI2z35I1q4uc6r8ibBep z_uJ^GqF8LZ-rQFsN6*Oa+L-2NY;eaiT6y6?OQKb5;^%3M?OL>pxO%pTG$TGkj9U}T z%njh@{5Al5PTY@zJ5XjRw;|XKdf2=9I)Lh?s$E`IK5j%>_n`H<$eqBQs)m?9Cv9wwP`~v7g`HF*8nwT;YGARBbl)4Wf2Rj zWfk*a1gd1aRoXcUZUH)jCJ@jh8(1*e( zpw?TJi#=fLB>QKBrvm_4VZZd1+uN@}f8D|`%1#-T?TRz1zu}BW#Bw(n zS?(x(dUHe&ccJmOPbG1)asj9S>mB#aPgXy>Q)!rhR65u84q_Ge)M_(Yk{JzDyj|^a zS-8yRGYhPNS1bIUY&BuM>jQ!_m6ErAAsrz6YYq{%ZvUM-t*E^v(`DA=o%k0bkX%pB zRB^du0Q+{BHN8?vEVSFE!~J#9x-OF6DH_SKw_N&P*apg(=s` z`Lm>JahYeP$AK!10lMzxzC`hJmH5SlU7;h+8c0K{vP>(ubM?i{@1p$K{~5l`EEKsl zcX|&SNN{>1y`+#FI0=wZ{lQ&wkW^$Fh^2y^{lm9F7_lW@Tee;^jjvL1>2V}>% zhkH0=l?@md{QvFDZ*Z)lL@o61` z;q9l!mcJ}R#d*Km4#eHv2w?W)r4zLpj*S3nTH^m<@68{n+Q0wt!_`e1Wk^zrGEd!z z%*jxhi)4x>4nk!nLrP_C!ih}h7>`hyXU(?|$B@iL<|%UspY`15RQJAa-}hhe-9K{n z-p}WHuHmsBYdxP^Ctx-Am9Kv^r2bIn5;&J9aMhmTxT;-x;;AcWuiiR)p1)g)UF@1T z1r*!B0KVihUUD7p^lo^{EYji9Y~Iv$k}st{@}PLx^s&4vw`@{%Y!LrIjnl=w_C` zetzLCy5;Ketvl74ohfQ&Wx>V8C;3cv1M_qM7nHys8GJ&^LxePFLbC{+AMX16|9`O1 zZ%+?GhwH#)Ie-X`u9d#G&OP5-hPpJzmhmWDAC-7Vi;5EGjKX(ljr+{JlJuFG)Q%T7 z$>Ym(cQz_E^m}=W6DuFWbNmtuCr8Ss$DVAFd!N(oWC5kYFR3gcoT%UiSLN+1E5p8Z zQj$zYaUB*e=U0=|X$y2{67KPg7FQwJz3~x$mWJ3Rbi*c9j%ncErsaK=rk-p4{A+vR z<>c)7?238oHR_?X`tS`)%4F|SO~nvDypUE7UiS=2rYxuHqT~8k>Qa67{^P-B3Z4=j zGQRn#9$T_c-E%PaweZ(Ro4Mig6D)Qe32+`ANQ~>ZSRTkKH{pM#WqcR?3+H#$T*-~k zT&~^}?_9O8NUp;}L3THX8VxFN=v`Oe67Rk{)S|3i#`~>?Vzk53w-np&BvZcUL{zr* z*WbTiaThi@oOj9B77N|+ih1>(T#h&R-tvCuBX1s_38A76j>-t@xx5?dl;?->FU@EN zV)-eW!?+>sfAwLoz;|p)g|}}vZLZUw%N*klH;3)8jV*`l($!-2*-1~%3vaGAMBAg_Qr_6< zv5+2?NO|X7yC#${_2b#Enn zM(G}=2F#}P0kMA=zO%-n|=M{~0e|9DD6}6%-d8=-qnd~!rG3lhYH6DE(~6MxfxbxUAmo|ZK4)f_ zt@kPX%hElxCFk}SkKqUGeA5IQOUJ^5_L@Fn+Do+xq5NL_ePkJg-m#)=BFoJ6>*U3(c)5FGqdxFl(cy7TO@4pglla%K`KiW!Qh9l(xgZIs{{1?prJmX* zMdJs0IG^maxrBk-O8?8fE#8@l6fhkXUUo^E4A#Cz>s<2Y)P-1|#ePL`$ukGh)^)fZ z?<}3GVHs?t9Lj8nvhTWsG@fBC+C2)FpU3z@(Z0dhRNowz1^YRr+mTRNdt6784WXGw zviRFeUYqYjy}v|vkR}3&Y;zA+-{MQl7kY9TLr;u(iG_-q-oeoZH1+YnidSht8DqI$xj>^@earwpk7?aU|rT6LDPw5f(eTMUIG^+#-HT3-(=(*ew@ydS)zaJJYg$+y0~n)2(*dHAAyVgnks4 znSp1>ZJMTbEf>|U4NZXOu@e~00R`ROZ{K=Y*}*nBAGkX?Rd#FydtE)xYc<@+Vg!Uf#%v2b2l zcGeuGSw#>nf_3HVWwR*VO?LIM77Ir?P4ju^I&3aU)~aPZL~Qi+*Q&HY}#BU9MrU=pFi#*@EjTWM)#{%F70w^kDcOs}V)n5EdX zR}$GrBJ=dYi;}hWkTTdx<)YT>4>FWjX5u`SOi#>mXKgkgtb-k$4Q-AU9j)Z_k-jW$ z=09WQJ)U}UFLvcGxxXs!^S~B5;PVjW+ThL9<-%Vk@7K8|tgk3NY>_2VYez$xwpRG3 zH0=#x#hIyD>k4^rb~&)gRY+z?Y7-{qzViRNRApZVjohHDF3&rroN9gzUohm2f51xI zqu=6f9>XOQppzLfDLd&I>V%p2RNG>ghl07ctMBmi`8KztwlUwGbW5i9Et$@&BXZH_ zxmZX2k-$hGu4+L0U0vd%EXqPO8wJ-1rS^JbqUlRTORZTF56%FhV10eyCq|BtPq(hB z9nOIY-jMfob^DIDy%PP&QKuFvWmK7kUdf`i;*Hl8Rd%AqOOW*Gy^JiM>dLVYj9j^d zjCs@E$2Yls3YmO}uWH9SO1AY2mc#^$^`0rmyRateg&bi`)UUqRHQu!%U889&EVJ+f ztou=%`d1b$QrJWjDWMxhwY$6re6>#m0e_w4AE1iB@F0=xt*2G8~t%U8px5I1X$y=M8(~5gu zpgBu$V}7!JW!#SHD3?DdErl{@#qJFo(9C5LIz-AB8(Pp03pH@HPwmCteQ~M%!Swrv z3W%-9!y`v-DZ`*KEm9oQmS<-KCc1S!uOt)mCOGsk-_sZNZ@l)94Rzan0pS?Gc|puT z?!L}Gk{#=cc$ho~I^qs||U7RP&y<~7T{pPOxSH1ZvX zLkQm1zKc8@8(Eup?7BKNViA!Z-0b%`;syK(b%WCyn(>UG#5~>WGRSc~C+VCif7+}2 zs3)srjJ*8%>60)}0Uez-$cUPR1jmia)|N6pJj1QBA!~BsZ0n1SO7dH6!3W7=R^UW6 zqi)gFcYa>c>>@Koqdhiei-8yEz^82T9aHnU!(!V7kQ2f-+wk8)WM99*X zrRwEVbS4EXRM9AkkqGXD**OpM?@y7N_1R0;@PP}qh-mV#SoV8yBl zgeq5%~c5<@lGX@stZ zF|-KvWBl;kg}_joK_LCAvwj{IYR5Ow7X$!4Au&Tl#W<7RS|#T6lx<6LS*0dl*SuP`0y z2n|8jp#+F(c@{u*dNFr3jQ{{98wtM&;i!f=#l;7|kg}T(=-GvqAchb{8i3;X*lX=uau%O^xywm~ zmvD%dW~}>y0_Wc{-M>B(M{ ztohBK#%dsFI??}!6stGVlC`>l$()LgAK0aOAMMeKpMFV^O?ONOlj3(UN+hUHeE%aw zNLT6mxM@?lhfpzOGSXlksObzd4_(@Gg{YPc)WGGQ={?-@coQ(bHG~S9kPT4xE)~aP zW;45dg1%3h_Ta&HIg9m+j^z{?`wR0yXxXVIjW#);JqV;3Q-N?znjQ=4;5wjov-1%EwTpL516Wzaa z-?5L2rfZa`<}6VQsgeb1kmgK9ov0{T5(SEej^+#cp8U*wQNF0cYk8OU&^S-j=!|*H z@VWtnGH|ri&2qDN)P?61_$BE%X@Q~d4jK+M z^9|y6OI6GGu-lK1CI*l0)+by3k%~WE?%%AujV60}1c{tS4}0NJ?Kml!?_$zemuZ@B~dpFapI_MEo#ockDz1&>{l^b0h*AzbT}n;E69 zI5Q{e$Xt`a{bNXdXuT&Jbn|p7W>~g0AfTLJakhsAj#-Aw>G}T=R0e(o6{o-Dbd`Q4 zA`%ibQE*-QsfItXQ)L3pYMd&61;Tu94U@}r3TrHdX#eEJLS5^fS?Yo#Kkb)}HebLM z@|Y2807otUmlp1ZUy&5`ot|KjSVx-?^X&UE{sxRE5=A`GlevP;m5*s^rrrpgRWelI zNS9jCW(AJg;-^@F-!#{8XP4thB7V@!Qz$GdcX(pOM1^pT}o_e1rsBOh+=vylD| zbJn`~BRKmROaFhHGcM&CEjaUb|I?faCw|Q6;&0^WOcO`-(_iTpuxijki*)wNX*Ax%eTlXrOZ#|~0q z=kVv7+*$q)^(`>kT3!EtIYd*j0AlXt{Nvw@c=YyXP69*wB`+` z)TM2qMDyR=&s<#!C%hScWX(pYOoXt}iKz>1`I%1^3tOml81*J9FL}1gh%wa9c$Z9c zKVm5mFCb(Pc>6q56%3;s%Yr^A?kX4(jPeG`f?TU|U7A9)X^f-_${7j#A8ZHpDLf)U zkN21iJ%kjW=`K3HdTLT5j4OTlwQTfj)u7s`g=mv~HM3aBNW%7)$ze{`=gM(T>gNuiilp(EDxr-lm`PgZGlt;}!?rdywx%kI z_K8P57^VLBJ|O6aXg|a+(1*|mF6YQa)r`Nc4>p>luXAbjk8e3iUAnhFEwu;(h6HVY zUK4Gp>pz}nfKXT2T;}ZFV#ltRpS67-y zUb3OgWlQP!j`RBpl}&XmouTL019_Hh;-4B*f?LryB`jynFlB*LPx%nmu`BZ?53A z7@7iEz4++^s7xZgdgP~DCx8ixx#A0TtPk^-(~keCKzfY5raE`7+=Itl&NGe5)uCnv zoO%S@K=HJOa$VCR)M>TyM+DWRFY)WfJ?eCX&ALfcMSI|kEOH*S@)*A&<%_vEHR0IyW7ZwT zF5aSZz#tlmdZYR)x!H_b!M-tyf@jl)9!})8QH+H9XlaOqAIc`iy;I8gA0n8X%p&er zbdFjXOAx0*IPkBJDs=G-)kkRFQSsjZ7>W*S*I0_tyyNue--9~-jlXY!Jt(qU}XZ_q`sI!3F3&l_KOytvWgo+LoD@l#+;9HQwHI2r^@MASYdsbLwg2p5(ff3~=iLN&Gez%7+m za(0VeWafaALo%6ey<0bm_-F9SMT1>8H*)KsnOA;yMOJ1rcMp<$W8ZjKzXJB;RU+ZO ztiB}Sk`ZWKh+h|aSvcw2h1xg;%!LuUe!A3LB&#;1xc~+U3(_%GGJel1*S;V-v7*{kaW&(WD4~QID2q4)*&zh5-A&e zFE_4*$^bLtH+Oi#VFrv5h+Y-46jH(-(pL`R(UpE5Z~u@TE7)O$r^=Q0ohz%~8u!m% z?JqdGXIJ`e17|N2@{Vmsc%O9!*pqRKTy42VcYE@mI6h|XexDlXF#INRkjMw0XCznOmiRmYly!%W;<0cm_g zgM;ZGgT%;pdF0<^+(WUk7dd5Ih~34; znyts%){3Y0y&7GUkY=(uJtITdG;D{0Y+wH3?5HLhf^yG+mHM@Yhe#=_)&{8Kj=a)0 zDf4t08XL><@LFYpeo^NR6S^zL<(@gTpjHy6=~SL4OUG)SC>%CB(y|YV7k;@HTAosE zSR|@zoKs5Dw&SrHp5dZ);x!U{$HqE}d8fj}OBSlm887JW(6xYOL&N4;(*ngB@l_hQ zb%M?r#c2|$iK@viWUe^L(v8Ff?@3?!%+ES=lCe zNY({{HN|&|G2U;yrvr)W*k#F5xM>UjIC}Pj_-CQ(O-EopQ&Jl81b($Iuh9 zQ)r+3VP}Qb6;*xOQby7Givj>N0$$$ zZZz;It)M`>#7m?hPw7z_LTfBr*vw9+gNZUVUQp|$Ydy~A&YLNpW#;*pf;{F5i38u%&j%s%d8GL#FPOCS-Ma=xvOlA08RsGgor|YWbmf1A1SY zFEb}?x>lk#x)zI{+t&C^*c@4v4}sKqdVnILOwO}6zv{Ff`}MZ|xGpJMeB@YI~P_^e`|_kUzuSI6D6e7#!3-rnB0Fb7I$xe#2A*lBOEAHybY zk$=G$lVZy2PxDTL)yKn=uO8 zS$){t+}wE8@#}ib9NXpHC9oO8fkKtSR4e>xv3@Z|X z7#bO=R!JzdPx`q3IT5c}072w)rJ#QrI47fa%r*#I;G8kp1iLCk^G-a2Cp{~70wNuZT2!K$(XyUxzYM3K zKZ)`7JGH~R1(2xx2H6rR4^nqz$Ts6Y+?}1y<1UZI5w1`XTte1EkD=xUSasp7;JUQY zfT-hB-LnjpU+l2(jdLMVMYL0ro?t?|MeLW|hCfhO0d|1aC*e?p)*o}MUB#z0qYE6o z=`C%ktd5^4dF9K5v+H65xF8Qp!)ObpFdV^C`x+d00M{pW4tZa(rS8Qp%|B9g49t}5 zj(8RKxcLyGO}6miQsROz66aIopxoDYX^HyaU)P4>Qto_8O9JnTKIjEsK@yRYk~**1 z9*OERkd72!EQ*8&Y9iDkBvo(?NB>BaoUb{oJ0>Kkkez^fHNgnkq);7`2MJ$sVq-9aH~P^(jWsb(zs(JiZc zAu_@(c~!X516-jLlf@<0gOtC8NJnL5WuGEP0GC!fxM8a1#~{sboi|GrNr81~B_(ntoj=^+MA6s2 z@cMv1vJK6-M+dN53f(4c6$Sm2S*`xSq5=N08qTOu#_W(>;`!y`ke7{H(@f^J z{3MUz5BxZXvXp0c4J^TfZ_SeNgyzbImK^Jxcxh-==uY>5gA{YV0?-XtTo6F3hM8jC z;P1S)BCDbd6Y3*?UfT3x#plFpt2f#n$BeB_T-VW7Y;8kox73bmSuB6+2pZ;*wDtOY9t>_Mgi7=goV{0{IBxY2q5A`mp;X0>vsY&slsrxQ;PFho;s?8hj_+7rKkO9u@m^7SGuD2+GhW zDCX@xW@KcP|MDT}$xE)?n1ZYSc74+<0^%9QV0SfIbQ04z+vZJ0OK)5dKXb>){#dcEa+Gc2y{qgy052L8-~m zH}jv7))7Mq@UsrGm{16Kks}Kp_m`W@30axR^xUZHE$EleJ5x^21d2MB=v3lbU z18>RaU_tfnY8+cMi`?VfMn38{ECV*ma9>2mXlww!x@D;YPN1}bw7KQ1_Ln)d1fTk@ zOtim;0#8?FA*dmQ;r-2w33P0-;9*rC8Rzhol-o9uqw~dsQc_YRPk{nd?Ak5l8}{bd z;7S-6Zp$+B4#Uk?y8uEm_@X&CBq96207!Tp-t~?58o$>KcbPD`Ct#&;m;2{YzJ_ z!VrVr5g51g-2Pa2=`2JNtt-hNW-ohW6fy>=uML}gmoTxsVSbt0dz#{6s5Wu3DoVUI zKaiYq^7S??^gO+i-}sc!3@~>`M@QEpa4FmRCiP+_ZApw>LT^p0wUZ*uPOW19Y>>P} z5)!iFes%WcIJFu^;QXx`;xE*sS3)~Y>uFQPx&+X`sL$L z-Zpb7xnh2&PZteIt3W(oDHdC52UzM{e81x!9H#|13%t|omuhMEj2J@qZxqayF)4Po z;T&J;S6>@Dc{J&xqAyn4=c}EMI(67cX(_3@7(no^@cSCz7ru#XXk51`LNvs};G616 zmS;71(e8KFT!<+!K7Y)>BIEI%e}km@b+WxXdQ)oMhL;YlD5dk0%;<1<+@`4 z-Qhz3+Kv@VuC36hFYEsG({-z_tS#_~#+)j~P<5{{uz^T(hqnCD8!i*yHKg> z^pe+nsUYU5D>IRQkS$pnBoFrk8pV)zwMf8*Fdo7t9WAX>_N<@VPDYqOkF!Y9M_1@& zXwHQAReN#IF#{j9$C9n`Sl~#jPO5IHM+ z)_J09r+?a&&PM#y-IN#5zYT}@;BY}BQL_~f`wZ8FW_3=?Ok2nJz|l6`8~JfJ+I?4+ zY*GZAFCuKrQNb9d!XST-c~aH=zLA8Bi_7}1)Cb4W6B-;TE8lgZ=T^ouidsom#FxE4sDGCgyLR%Vu?FRNR;!c64zBcLDCQ6d z$QtHR{-)&Eu^?H&%0(xU!v>U09lj=7s%55Daq)u8x4$#2ZKYs+f`t6@G4m~O>^jw; z$Xx%*XTr=V#)b8n?v7;<^Wy1Z?<($+X!%Wzo}17J&+tgfPBosjc|30RYDQFe{3t@Y zbP`iiss4Xw7)|EmLo9ZDR~^WQfD%0$Xir*d4;C*!=@J8!zuh-akCUt`v6Jy#K8vG4 zi|PsYZ~9`W$uDsJ%wmJhr^djbVFc5IU7^aEz`?}ArIvPaVS4aUtH8!E;;}tKw_fM4 z8hYgEUq6s{!_h;oBu$_-AC6dE>;QtvRY4_X@{_co^O9^r(k~IP9dSt$u?dCDw!9ei%jbWE;y6VjC-X|_#_}?8YylO z7qEd@Pp`y%?2Mv;Vg?=<2%AzOIjuE>#CWe;Q8++-zQ?hnIM24E_+){X+sw02G~V`s z|H-uL>6M=_hH)G>nET>%t9yQ%Zk~S8H^)t){ludK@VN^cK+Y|XlJLAIHHS++N=FhN zup&IPv#MGMp5{04Tm0ZoEg$LaM4`#z%8D0&eMrw6wpUEOXlyK&pwpAD!QiJrhf-?57Y2`j@dvaDIIxVIJ&LG3#93p<@ zMF<4}Ij{#8lzDMqn8S&KaboBvR=1JT4vw!}qar*2N~svaOSs5}vYqstoPAjAXI7;q z;WyMl_aM?rJh})Lt_}-y6g@|e!Qz6i?D~hY`j* zpQgq;UqDQ9$|1^4g4S*#S@>_+MzXo_<2Y#T z8=NQ6$eu3`B~nUVh8w;ggMxP+wTnXeT2S2p$`7#wso+tmu&6uahqyedLMvZe_^(0w zP0;TBLl*d%!J*W61GDB_%hP}odoc)B>7I+BxrDLxaY|8QWnHm`9P8s&c+9U%%^1{B zMM}8+e6@!4`-ex+1~3FWW}Y~^xL{a~AjZt$J<*=!JfGSjWsl`Zi?KI15%ZoQp!j&~rqL4G8nm5EtnV;xVwuA)yam7Nsr$>Yy_=5H5b|O&Jg`wlx@$K^6PO zeE@f{F%zh)!RxGY#az%~Jc3!uW%HA%6-rs7 z+0#C^ai6RJyqM6q3qMaFVYtKX7Ozghr-0iZ0S4ULJW%~$AL6($?NumjUpy$gv@{-; zp>2*om7u*HC!KBccsJWeg!rHXHWt3-69e_T7&OaICvH7Z@3&};6{`fRGdK0w;{zF7H$C+A4a$&#X?jf(f#6_+m1`^7W? za-%0rK+Xb}!~;NJ5%0@#Gjd4QX}eqNW4#=V5dE1l+OPOc&*ba3{Z1jcv~2-dGVd}%iFea-a-16NQQznz zra?Gn*LVg?W52*#5T9rlrVo@}YdxKwTlDd(y`lrJnL<@^A6LyGo$}jcKZwXspNX6AfIXO>F8@JIu31x8qzgA!|o? z+CL#HD{F5-XryamT7X$e7e&4}9oUncd}oXrl=S~c3XD-m zK=7IXdBx5#Gd-OI+xO&3b`)%vbrvW=HkWV`6a?g*o&H>)kE6z-;FPH>j`P^(Yb3@1 zU@0Cz#6Ip4!*~yjM_3YjJpr%jHdbVg5Jhx;{NkrWEh4;Z>NrUEI+9SW^ta^JCnu?`(^;F%Bp$H@7+WF}}eOl-En_Q#CFA5u_Ss zb0Tw719Tmw-!S@Z&6L0b<%_6!_zXAzS;0OI^N%YgHBKRY*{1P*3dKlfR;AAkxB`Gn zJr_mAU1wlfy~USynlDY?13^Lf%KbXQkC9wBHN7+}`uUx~IAbp#BY2p}iOvXi`PuvV z&Nx<`(9Fm*-hadf)STchJmCnx2k;{)}otW7X#x4=V)FMrn5{Z#M z-7(qCxhMqNJnLzzf9RO01N-ZquhM``9A3qjd4032-h^$ZN=TN~DMW0YOfzY0(7Q~!oz?~H^%P77a8ylORo-T;YypH2c zn0{_@$+y?TJ+_+CzAqvfa3RQe(Q_oj{-y3lOd%%H4tw7);$A1xe$iNgelc&}6MYsO zeK4$^1Rcc6e20!`@Nlp5bb*gShGRLT_D=-^Z-NBWJ}!SB=NBjlQ6~|ij{P63Xh;Ob zy>AAdFE`sppJgRJ$9Iy3^I#ET3?M@V=?xB0NF)cRzUl|l-NE8kZO~*RjqZ|gLC*e< z?63^cj?r5A7z{Oj>XTX@xm%qr0^`0>wMW)@Ko1+xWf+hXXeiZTBi)hKTA(mfC)c}Sc^arMmu6R&_#86@yGhw zxM@3us9aae+*W|sT#akcdZoV}T6*g6K>J{((faDAyJ(B|*pJnXDyQu@MUJd$YG~Nk zS4{-M>-<_Yt2|g;jHc_lQkF7m4cQccbB8cyT>BYVar6tql@=}%gxkhQG0dAP)_6IW zflBDDj%GnnCB}_)bU+=U8c43%2oXonO=?^02Yr@1ZXUM?%?Uw>%X<{~=RQY?Dn{Ip z+GlI7(T`7f;ag|>N_Om`9EOUQHUPZ0&yx?`+;Dyhd+oQQ7*3ulv5h<1_;fNW_tTS9 zWVW&ffH~k8)ANb?*AFu99PKPyykW1nFhD|?cFmz6943KX^IW8cu9el)827A4H%0E$ zvUwsit}z`TLeQ*pM@NSZO0J-<#jqfFzl(=dy>+~I$`CS0fl|a+JBg3#0Daz=y98+X z_^`U@AP$RTDDcR7&0di8UYI7dsXs>hsy{QKVBp6S1VIx67R8XYKuSV$)yhl4-$O_f zCpM~Kp<0>p#ePt%eBKk($fQ1Ql{!|C#DyXUF3DNDfzL5gk=w&IsiZ@d5_k zw$(K^(s#n_&C-|8rt(oxDcn9)6#r-m%)~E{$}}%!BY=L^VcWmlI9@rx!LR^MrbWA{ zMEqo++|`8R=@PTzK_Qb(n_)kMkaU~2qbqJfmSe!hA)w7Rhqj-?Hhed7pVb_SCVYIY z^)qGD*ASIzi?GlySH*c8Aa|Nspz`jGyRk%AH3%p8LCuocDYV-&H#c}*UG0%Wpin%c zgRTDHijIB@TkZKpbJ6~%fUobyP3Q&?RQG%C9D54716o?)rHpnO(J~y;!KBIWA z!Qn4}MyfDu)YoU-r7FeDuBLX3xveok!D}`N>RRVNx4=mZ{JD-kOIR(z`%!S)Ap<8h z!{&2iXzPC?XfHU2V8_x^PNv1E9u3RN0x=S3@po!+`6oTSylx7IM2eY5uVQ}kvQ~|) zNnX7CTlaO7ncgOqrrFSb+5EsN#(>F9_@}c-9=0s@gYOjzgi(UlFw4xI=g@uz?50{m zG*D~Drc0#K?d|R_a`1G7@e-(!2DQEPR^MZE-4zt?UNJV+e5#X0H3bM?5Cq>HhM-G$ z5Y1vdu*p!0ayJXSxoLO4nZc8n595hJwsD)-PO`L!hwR@*CXskcs;#xPKP*LM2k-z<}JN7bIG`It1L9`)QFLa z`QvUfQiz)cvS-t0AAV_4Ii33{^@{n(y_G9Di+{kQBakOqw8-$loMS_&MzbAwoKjR1~nRtBrlH%)37xLclg!*TRz)sAo^ST2u2rTxjyU zAUu>K9HFAh3p>hPkQ%`kjA? zW&iaqs#MCX-3O81pY;uP_P!Io!PiaI&Km%A{_FTn>gkl}*vg?5=(fBhr*<_(@XSTl z$vTlfW&5sj@51N&alvWtgFcBqPsTJzpS1Yt+D26Bs%EH(5=#p3{pw}&S$D(v<~M&y zxcR^#M$4JguU00Eju*>vINk;A$zL3=P^w{tx`TmTOit-g3chB4yrD+ks`Lb8xhNUbK7rab0V zA&rfVjd*$p@4un?>I8$vE^^^y|D=_0j)UR(W-aWf3xPNq^rRvF!6?}=%_GfIEWⅇ}he z%0<&I0J9{`Ak(|64WEOB02h0~pKgWmzHS#}ixbcAZAZAalAsXQ?)9Ih8i zF|22NbPZSo7z24!nE+S0vnHxuXMSSNd?=8ynMlM#KNHCTY8y{RE&~96Q4mou6U^?~ z^krB*pRTk8`9RB*5|k}ZHD+694%)5>1$2Z`;79c+5jQyY?UTSrXPKNg;A9&JlNf~8 z+pKw=4{9jYAK0Ys|J^UAmUcq>`h}v&enJ2IQd@lP($uUgZ+uTo2RDexiVWt87@^*?q5~rR;8)ep6#RQSBdeGy%^%hhP%y zuXT0Rm7jR&vKYBq^NFs3vweHv-Vz0sbo^>Umt*qQV^ptN%?b_zD5rW1DJ%6~HAL+h zWO&Q#WJGx@qnys9F?4^0fJ#u>g}~cMw{(rZFY*$$W}2JHD^19s*i3iaK(v>x;qXM) zE))7b`$0Z0RQ18119xq`FT~d7$QD4H0N#_U7U6(JcQ!>hg;D_)a)9~s--q>Wav<;f zTu1ync8U0$KN4cm`C5%?R#eXTQ0|cXN%ZCHgqeDP9)td_+3=y{G#<{X8q8~lBJn`I z4P3>RtB96j!BwiF3XlX(>*L-;Jq8Vr8GVlk{abBBw&<;>eW}u*1XL}3yfQtf~ttKU!@=XliZv86W*z zawkVTqQJ%2rDxC3B)leh! zcf%_9X#g959RPn=!PsUI@&2^jvzjr0EQ2sHwx}tn2;99byZJ}9fOrI8HMFq9MwF%g z<%i~;-9#%wr2l2U3S>157#z7f~W8?JJp;@660m11&mm zDDpY#zw}TU3__S~jr|}Gs9WJHE{LX^FuG8A7otv*EAjI7gY5Fi9q=m z0SU9psl>l8IKhweBT}Wi??m<9;>Rm_(fx=>`jPr5pT_7NzpecLgv6-js{Z+#crB_N zsT7dkRuU~A1k<-Q_AoN}`ti68Dv#r+$7@LZ0Rx24G7;Kr=b+hKOl$SQeG;ZlF45el zLO2%ep2`R!bHe85$3&gqp>VL8jVyhiJO3Sg1SvD453I?mf-J|ZbYYg~b5(0PcFt!J;{n$V3p#YL_ zn=!W?!0l>LH-lcRI(L=wKSTqLz=OTj@^`5*PK$cxjf6d-P@{Zqmw%rqk+Mf^QzqZ0 zT_JqnQeZb3m;r&Enk&1PrQ|#BeS=>voo4$)9=uRsCC7NJC3JbRGL| zq0LV#h7^aY-+D@za#UXb7eDih0L0N+Zv55+Qfb~PE)tDNZ-y}?gi-bTQeeJwe_UGx z>P)a%gl3haYq@>@!srBCBXw7)-n}0LsQjC>^S$Oa&(7*1v#nF6jv(zP5e4DcuH*3@ zs_E99q-w7rwA&u-KhRxjfYA&NPYb_Glk1Cu`8q8jqQ;RP(Uz_xC6WcE>s)S zf_498E_QT9QloIf1ongkOXL}fr+eJ$n|R~T?F%+J(H_wUoI0mKfO^k^vy$MYCC4e) z8dmS5wa*bIh|vQhv#E9tN=Dkip)EbIrZSwB)kJKoB~y31rcz;g9iTH*4nQiqkAN)& z)2FJrlKLnmA8BpX+<(kgA_p6eEtZs*rw7b|)qYi5D+LNWsYo1Ml8&CkZQX zr$8caQwqYP21Pv)+UW2nnOV)}r!N+N_i{Wtk1tqB_TQjFgro><&pAAJor{vjdnE4J@$p{8E;4^4yD{HfYyf zeBXaOXFt%&0Zo!8?}E|znRZ_uU!AUE_8Z!g2*QPYg$RqslU9Up%F z*(J*7J1kUZNlQ;(+k3CCiD+4x?N{TJC$J5VmYmw>4JqK>j#e$%jQx>eupS)AE|Vsh zYb9$inOK$lSU~7Xme0bGXc6YkHAU>~(7uhsNo!)Na_`YfW8kCcfl1vGqzGEJh7@~H z=A7{&al1pQnb5O!kE&DBf~~bkx+e^$&)aMjSfz1K%@9P;K&{=fNJJQ_IdOk)YMc~y z%Mbz&+DaAMpZbQZIYC*|J_OLWv7|rM5JB&><79*(`zsJcp&_=!dav~KBw5*C{bdX+ zrgf8Qfw!FtM1&|Z57;s_(q{#diJIeXsTth9guKLn?=H|ZNXPn5Ye zM1*Ue)~~&J&&1HV4T9B(*H~3+-I^dsJ$2%?9@bqROLiW*06k&NOEWEl(#ur`)(qYG zbSpacfD<3;7kZ&24n2mMr^<>`yu)`Wm^8RviYF=f02}Ojpz{r+GeN@sMB(@yI$+g0 z7Lk$Q2kXy0=(yw?-Ehgm^+1X98l1!l{C%&l+4)J<%VYLyhC)4MUb80FLOl~zF4hn8 zlzMar^4CLud&U^|=~~Eoj)@BO5|~*LUx?a>*|>cH(zJ}j%We;Yj_)P?Sk*LGkd&Hb zrNY0g@T{GSq>RC5UM9!h)EloE3jB0SA1X!X5_LTybgixXFV5&*KMJjo+cvlfy`76q z0~oo{NH#AoFFDVy*(!Wng{j%=1@{ojb79CUtzst0#A7E}Bh>a5Fybte2-eg0UKll` zrv6#4yK%0$)Z#j<%}^-3LSgc6USk24s}%U0TejW$Es5HX!g1EHOS7f;XlBpkei!Bi zE@+nyeN*nec;gU>F@;#1*4m@dq%Z#`tkF6nUV))`i3}ms@E*fMvYp7egv zfDJx@D07VImS^~k^+EmG{v|0x4#Ulhhsv0_^bN1?_ZVQgTJyJ_!1E+Vf7C>>R0^bI z&@)O+_3n3V4ZWg+WCb7j*KrD7DGNIz+AXIP9^7(%H!wrYO+G^GI=o_%rh>9ZA z^Mx1oA$dCN6ax{Y%I&pys1GWp_KGqnrG@ZF*>g4aqD|5zx z`JJlFzG$^z_RwO*jbX2gp3r;oASPeOm;Qw_Zw~biVuTuM?!1=WvC=)2v6;<7( ziIt-c@a4;Q)*R3;o!7nJ1IL>AWeiHPpGTc2e~R~Q31$Bl!p;?1unKmoUbLHum%U@i z^1~M&$po=L4nC_eciw&=H~tTtXI9OgpkDr)i>bNM_*dP1jYYW!O3W^MIeE@n_&j)T z;Vlim`^jJUtm+KLEA8ofkCU|lc1^Czwk$dr{B=ndCidmIpoybtHT?K2jj742hDubO9@C$81jGR3FC4N!ek+p(XERXc()VEJ9QQ@Z!FC}YcMVT+iD;+59}_>~UZ z$GasT=)Y+)6K8taSJtb4Xu8*bmSN`T#g~PudiQIkf`*%b)Y0%rL&a-{ya)H!{ei1I z7i9c1j(gAF8#`jJ)cmx2OcIeo4mLKYMy|2q1nv^FS?&9Yy=LS6OX>X|EAQ9vF;T3t zoNT7*dSgXxCWv@$>9?bf;t!7u=4l+0t@rMzvG=WyU4|34RtqkTS|49f8a<3uwebQ_ zg!guE_99*#_%CP$uRcuXYi;W#`h%+=$p3H^7aoW(v(}82ig(OrHc|5|N9!AFgktes4Xl5+1qC)zyNH{F$eWUy9!{_gjRPAO+WdE$mO zVQELIccKneS8rf0rEaRD;*v^ynh^Ozme`j>;3$J{I1Z1Dl#WeQ`RHW#mGv~1Yv{y|{SMi!aA4ahBd83|2ROkoR zC*x0zCyGWpHoTf)^yhI8TH<|7-tFipZ6X1l$$)*dKkJI83?zYa-o_>X#1G#)4^Y@f z=Z$wuGQt2izz=@b+rhg1fRxO6ShHxn+6lGB`?5z*>-X0iHyqbCcv~AD`J5PcUvW@p zFuarOT`{)hxJiNBK?P>6y<~aGps#%X524fNGaQZZ-aqk5I_p>|1lkwP8d+V?vXQ@> zSHW9r4jHq`7bcDFh+H*>_z-?IUbVS>NBFs zWzr?#AEn25KfIa2?~S^tavFB4w=CSLI5fgN$1=D5nc=AjR{O7KR<3kV`6j<@YKD|v zJEx$OByt)nyV`mEYEgQeRaP3G>|gRxX-YGoCFx{7roAx&rjl-p@{EyTP(%vIRM2Lah$P>!3>fQ%P}$T5_Pn;)fxqRf>T;B(8xb96CtRvy?Q1E?x$;?QZ*1 zBa9b|`UW=!rO56kf5~bRTD!A(>pRZn#|IxJ zrU^xxnxJD-tl}nZi6?z77y2H}2qXrj{jy>D>#7mox%cy>NeiKhD`%tLVo@TyGx%q} ziCYN*R-0#b4A=S-c~k#P6%h!sC9-Ko=gg~a*K|B6rC)YkmDk(e_^6yuWI|^e+98AA zbllibvZSZ{!YNAWYF|S=2HZY91D4)hur(|7y^guUv+n#9k=-T}p)j-vLpJRgQ?|`D zcAA63RmV(v}t}B<}$el=0`ald@z|xeovCPy4tH0nr1zf!ndRT0{!e zpnYOgmEbg0j1f>F#?>x1q~{zr(uTW-Ps(aTM)KG0cQ1DBVh+Lb9f?(l`pG(1b+hF! zW5B>|ux&Q)QK=^2FUiNh_c}FuhRkH`Y%>CvbFBrqf^G{uClV%{gjI+U8OIk|>lqrc zW5OlYz|&BQQ~v$PT+#`)n)3ar2-QjEmyWi-d6SY27)Uxg=eB?qtM=Ai%+&&zOxHj* zRK?8h?dcAyL|kegdB6CS3`dYa2N6q?s>Y4}%`>TS$4*LaZ&GF6C${J)|foKjVOS8zdX~{^)i$@R}kLwsFx3 zhYvFZP_>cgfAhmVCbcz1AN+OD@FNEw@iFPE0eJHmn0z0Z>wf1pBS97${9hj6q~cwZ zGc*+S4Hd4D0csOf&~}bW;#QZTKcI#Zc8v^ z-511{Lxz2B1Vc$cyWr!@R^vDi{lmJXDF0(!(ek@x>z54GDDg8XHoJEF-@vvNGV%Kp z)A&G;j3IoQus0EM;(Talg68OBFsk)C9p$z_ULPA1W=o6+j=^AI0=}LccwMz}H>OjzNC%#XvITYoVpY$r2FEJXVP9@ePcmW!|n(NeFQj(J& zG*l~4fDT@vRnV3A-S8l^bUXpM2jx3$*R)O!lZ$D1_h`v%s-_^er0M^)_uf%Wbzi$^ zKvWbFDIx-bpwdy0t~8}7y(1k(iXgq$AX22G^bSFgBA~R;L6KsR4kBF%p%)1ygp!=K zdEf7R-}s&T@4aK(G0y&n5ZHUKHs_ja&iOo(ZP2t-`OCmvHfp+)q;1-+bKFF|&NIGBGhK79z4-+0qopCCJdvf5q*7|SU4L;fEdP08l)V=5?l80LUF&hL-ESF7|49D+&fqd+&J_c{AY}DlOf;Z{JQcpNl&XdzT5m+% zvQ#p{Z*RVQ9Cby+_EEL#?!azRX;?|rzwXx5Pmpi1LyYJHd4}4P9#YTzfV&5zBR9sv zkQt~F#Q4|(T`@F5m;PIxh6g=wFVK4MWm$*-@%w%6AHSJJCgR!BT2;R;<*Uk-9PLzE zp(seA9{wv0K#3UhmPBCV0hAKV`v$tR!$HmPQS}^C@Aylwmoi>qqi#!uX1uLVpnL?tN;@M6J9CmKG9FoR+fOwipu4h=u=Y?8ANrc73TwC)f6tbV+Xf~*y;hN z#Rwm8^Gad6#%c;P&THujFB(xm`;7DI|K4XdSy+z}VH#iT$@Cd5QowR9{3AB+Hv$201=t z@)`BpDa!aB_=?6M@waliIyId~;@7;``@r8v&k!@@xv{9!dOj>f9wrn9!N2RyVf5q% zAC?ns!h!Kr|FOWNS)US0{Rn+PeG&YOHwwgTtksSeYUJGNL$BN;tV`-Q)-7%9DG_Co?(#nRB^+5q7q@d>10= zU&`?>thCw{;f*Njn+m4nOT%D6pOcXBJ#x7^?W1x?QM!`nF2*|aG>}F;*n{gAK)X#F zIkf7|vjyjpzbQANczp(CO*LvSPDi4td!YvWZk?n@eJ&hkfgRPMsJGKMITq(<5YOln zyQ7>_JLNXrkg8Vg>Y@Y9YF5Fjm@e(P2w2f8ec%B8!EikGHTSKaoS!u`JYtAJGdkhy_!Y z28o;Sw0pOS?^kL!`vbVLT5aTfoo5b_qBk4t^_0zxIlu~ZhI+rE3$N*;FbT<2}ZywE9cc`mwF1E`Jy3tPwK#YvTvZEYibjA21eMtP3H zPKD?DoeHmlhU+&;xyYa`ZMZ~xbU=2wwsZ^(Xg=t-E!!Ayq1zyPTl5vkV5%nh$ap4h z(!8I4E^%404ZO1-mHTXR(* z7e;ym#_$@*T@dN&9ecJE14_zFNi!TjhDVRGV~vd;1IrZ+wPzdi;;Xrs_%KIi_3A{O z7Eeg$s?$r=@dej&}fS@QIUD%ugguqzY?q3U*x(^j@s+V?{XQTC=Je=mNKGj zsEQd$gC^(-Lrr>e-k&~YGd#E5T<~{st%fHrUoM5327|o(pgMDMtBMT7MgZMU(3vrF zpa6-(+OoQ$enm5-VTjPFli41ZgGRtzs3ml2t?I>hhk^p)_De&_#i$C>JX#^)OOds% z+mm5mb6zeCAo6ewIY|k>5EO$4PhZfv)bFgB~Lj?GyiV+I2l4%p;O~|o-T$xbype56;|tfynsbqkR*qwa}b6bo$oR8d+&O` zxh{_1$xiKT+sPg|@(MUkt~a>yUtr`aWiJ_X<|554V&HM&{s1(HqSy`N?p?Un`B!~~ zr?12obQr`HoL*k5p?9?znj`c_T_I1J6&)w6wg1 z0;(hw5inKL({bEoZFU{_faKUz>B&qf;B&PCNv|%!MyQ3r2F&qzM%$4?m$Jjb2u0Fn zV69D2FL?=NLvEGy;FMg7`Lw5K&BbgrA;lV5+j$3A5VQm&4|=}l5;Ukje5vF?{3D+H z^FX5N%7G&Bx8kUdrzJlZ0<*Xah0Rcj29ydwnsct*4wYZ5Om^Kh5~1VAV3q$v02RiT z_~*lnJ57)C`Sv5|T_}|Q#K}IoP6pF?0E|I8buu}S=9Q96OKAXZpSLdKGEMGf(H0?- zeJkQhQ9nqUSK)Xl*~>-B1}pLdN+j120yID=_`{fNs>+pYa(O5PyvFg@yVEyoB7x0O zDPa41WXt}4D(Orhx{>A7#@!eaO|x@!GIzN7AAlB&Gy?1o8Q5M=(ydMiVYbDN+O>mr z;=Y3Rf4Huyr=-9yZpb!5rhae;P8m0j6&VgUDM2%}gBx zdv+VY*K)Jh5QLekhcYKB_gw!^{R}2IH+zS9RU=1nRGqud@N=Dn$BYCof9hl+hsuo_ zFxoNXAtDLn%Vom(p01WP8Gr$>DRZ^A4cB7Ia`( zEmVE>0RVo#2DaqCx7}9b>GB#_ZJM^9_!vsYN^*^aQ$in9=}%>a%2k#BGd(x_h<`_S z)>@BTM;t``{XBjDW{-PJKUBYd^Ad=a#e(&8{yK%Hc%SV>1Ai2!}`>T`j{H7wKNsl?ReWNtgGBQ%$$bfrg;Lm3={h#vc1PcZ;!OYhv$vZw!1n3A)RMlbda@s7)(ESG zN?tPZ!Ne*5=}#L;KmCA|4akXR9EfMP371&SXw;g9mRv8t@*g@n)iEhJ+jg5C8Lnf~ zTC%^~$ad*qg&*&(M=IqFDtL;u>~k7=3k@-q#eGPjCk*SBg4LQg*{AQH8FmW{!g(my z1U(|7Tqi`kcs!Co{>E_@?|&lG76{bqY{}o;RKDy}-{BjLPaECku8%xoSwFN{-f|;8 zA90w;z)kN=AyTUB-h-w~{`K*^I{LsTG+q|SsWkxN27SG1KhqC_jSm)%Rco5=a(&Hy zSv&A9uZ@!`5CE|OFcLc4;8SrT0H8AhobY6_TNnD5xhcG{C0UY7=RPRDG+^0@9&K^p zh|qU0J0t^-oV3hd5>w1|pqr-QuMdu1!I zIxCcM3#Dn`<%4FI7~C^%@4VsAhLNj_pN-c%lwF}es86PqmEH!kp%EOW?Bbt^(}K# z1xJd0uz5b9!?Le?-k0a>!{F=&>Mt5(vOtJ~z7CdL1Sw}R6OdM1)fyN4fF|fx94MXh z0+08P5de>-ZrTbr7y?0TS2=-71Re9bue+ns(9gs|oiaCHb-fEAkwPDAzF2T~f0xU0 zIU_aZ#JN;#O#SzV)8UG5EuZqThM)F9$LG&D;>JuOp|cZYc7XhepsUPG z2?+0OQ@Z&^((PP8KINrwE_Yhhms&FPcQ9YkEXd_-{ZW3WtYz9eIJ@|7R6ciets|E_ z&y`ktGR4xIM=%&&%5T8Bj2Pm4m~RxE31B!c@S^8_9D>&0bX->WYj&Y8*R z(OT(&PVE_X!BY*LGH*%HH^Yk`*4y;Ws>)>Jrrllj(LZF;9u4UD+<%}k9f_B$-#R0e zRGV>*ux2#bP$V4*N2ByWAb?D%E!q`qB$f0YBeyFK&Y{zA`530QUDWH$wZ+Q6g+H$^ zb{=!k&#R`88+&FEYP6kq5+YZr`1YNK2&ah->F0q0yJeNvYx|&ock-O?ihkJxjj~A) z!4!G%7$($Rz}lJ++1~DyX6vn_h|&XRT7GSuhXO8TgkB?=CmKvLSh|;3Uq;%j$&dn87mf!N4LKQ<7 zvTL_Mc21i|17VG7m`ZvgLw)BM?}3!5_V+kYg5x(S;){%d&)SH=79dgq@e>yjImoEGx2ui2WN9cS8bDv@{AUA0tjuC_=Ithnm%JSZVl)XSvN zAu;#E1n7}!ngQXs^-%>O9IoYY{8gfC86$Fkc-oDz)6F#($nHKjvx)mRD2MZQ&N6Eu&k1+b{rp0lamn%bG z$G2%m0N*hs27vd+X+WSX+2Z{xovz_KEkr6iJHwNl&S{s{zh_sz`86916Nvc!;aNfl zF&;WX9u7#C(4Fig%j-l1BiwIOkYZ4X>~W+H&Qx~z@wI#4z4j*>J4$Q2R+%pskljy6 z`o@xf)@J3Ng;_k|XHzLd#(BaQ%XbR4Qw~c$HpchqJ@INmt;?2>GANUYj0Gm;rS54Q^3=(p@ocUvxRuKR@x84 zD++P5M=8srLUGdr2PJ5%#!&ILS0Q|_Q$~$qK{{RE-}2|*2CVGDQOavWi-gjY>oy

Jq8&Gcjd?pBbVEU1o_1gAhd+o_!lbZw!cUof#Qw(3P>`B00ScJ{O#lY ztdCZv#P76XA2a?zlysP|F-~+Vr$$ajlWTW2O3VMEnT)ZXZ^}?s)L)FQ{;d%xvwm;O zYv@V#(L}3qvm0T-1F5#>pLR>k10H;}s6=}p!Q6T*MSoU(;FRo_=I!yko!4=D`bW;v zMy3vpIAXFs5o@lY!2bMc;hWz$xc^cC*{-~^yy<@tF?u!w>Jh++=v>!O1d-{@G)Y{O z@)d?sivV<)uQ9}VeqQ!qNl8X^79U4cGx~KDH6pxf*Wl|C=)1|(n%#0VN!0UoAH8#n zvFrO|a*j?T*3J^)tA;h&E*XqpQximVtLIx)YDAwU@fCmWFesbUJFizVyZAZ1wv?`o zIW0irV6VW-&Q8nI?q}BW{KtY~G?w*dt|I1vYCU#)*P(byWFg8NKgz`xqjKLSFs;I`WJnw$; zxXC>vV+yb$c~tQ8Nm9up)a!`6PaJwRb)A>c`eD~5oCQbc+80>^+^p`I91KX7hqoT) z#9m}=`~%FebfQep1ugDWyipmFzigf0$g#v-$9g<-xDxb+z!2V2lmr;IRT~*kkUVs3<(xxhCrM+k<!2aYbu_zKLE9Y!|V*y8dDe)#}+E-p_jbfMWdYz+`M-rati~WWcs^T9v8l zWh3M3w2X7UMaJN7aDvvdX~!DV&ucl8>4X^#;&PFLo2?1$v4vj~hmnY;Q=AI{yHjmc za_hbKqU~7fq`dJ*;tFTK8W)#@%wUkhB@%%fqcoLMbsD>u3IYN#hsNTKBK)btMpn~D z)t|mL*dfDDyU|a%?>bbcMvsjQ9_6=UaiNMMx4uw&@bbjE=J(!4*|2o%eNJ`LefgT` z3E%s(JxpvQHm!(O5DJI^^juQA%WvIVUTEpNoSHHUH@*_%JJor1;4=XOCmtH^YRRRo z)mrLM@0bXgvXaG>I6Y%!R_{97s*|S$-`u|>+48`!=j?z+E78OVK2}kfA(-$ml6II{ zJN4|Bn@fZ$8C95S1$#K3(BL+eWJ2KI!ImM3?U-xt=2{l&KpWd(Y z{aL?geGVAJJwEt?jTJ=tEi$Q8jZ-XI_dZ?(wj75}0T1U&e|@rqm~JhIEuVU? zj*DxEcK6IHi0ca%Lq%wb1A9@c@Fwj8JSr!;;rpk4nvsGcNrqB>dVt_MXISL5MSQgm zM~e+5Ys>ER*@TCc+HaawOG@qV+8+^v8aTuej!9>gGy%o!r=o@591BJA-q)92by6oO zc{u!4RyJA(CZJ6SpY!TBLHbdJfk!}w_`guoZhfh2%^u1jhNss`O_kiE1TpV>(ex(K z3mLSj$AW`CY@=V}2U+b`h^DN`Gw4o}FQ31i9X2LZQhgw)1J%bY6eR|8tJpzsbn9wa zL(yv~9clPT8qp*%JXORw)2Z|tr z_aU@h zyw4s~N@#ze2@+*T*CCu&5B7w^0^BGAExm{Nk{2dg;qVSm53PuilRP(&d2{ko7>7F8 zUM5=t{5-FoFxywy8(z_SbfRgMJWlq(lh+8iK!L86UjSbSQ3Hp;%>cH}(+40X|4d$F zkxczBmLPx?2UQs^?X6FyoPGpSIN3^~5j>A;fRh$nQ9L#(ATt^Q)h`+aj^#Zkl0&TYM7>Tr42I@7sob=%AhT?bXVxA*Ps ziEG3j7VY_7i@OS^vU^Khhn0Kr_9tz(GJQ5D7)fQ1`EZpO2b5vLd-008q79?(h;IsK(SLnmsv-+n6@q4TSg&z6?>= zjUKCTcI&aiKX#SZ-l5Z5j@kSTz|w*XpJ`Q=DYuGnRe-az{nsB_y+Gf91!t7 z_^k-&Mp$-hDvv|}2~lapSjXUAjAfMxe?<*m+gC`6+`oXOFA3e!)yPU`D3!?@x3?c_ zT=cTCFGtixO&)&V`qaP8W)@qWN;#5z;H^VD^3J=S%LKmM24H*_aDPSMb>GX(%b1xH|5@AYReVL%Snt~|ZU0kl+Y7zpyW$&jB6n~5`T12j z=`9>4&-}W$NsDU)Q16mfW9Cht%L+%?E>ov=UCNndlxpMyrOZn0$EhMqz-aWCZ*PyS zcu$EZZyz%wotwYkGl;dzJ8D8s)Vm%rFQRV9z#~iAju?95@Zyf!+w*n&EVLZc*JZC= z#jkhNnH4KvZ;>!;G4(%uHjgZsGiJjd3`HG#tfE>kUz=@UTn6c6?W?5$tEcu+-A;Ep z1;wzbA9vQ5-p&T!u8@6W=|A6al%aoP41*%xPE_wZ`)XxwU*$%LFHeA_VC>@C$r#*wgt{JA5FI{~z9A zk8%^U2w|*A{gD@gVZ@Wb*s{hmI(ZT#BX9RRQHO@J5dpK&H)X7{x7VEIv&dmUHInF$iH9s9}XZYO0-mpeW)4||L~M8!?M9XBeW~l2Pr=vjnGLpBoVX*pj>(4=FP0~V7rU#Z zV^)8u0&f=e^igvr@1kPgMLzg?zj;;O(#ju``X(BedA=n7TJ1Njf;BDr?W$sFizo=Y zT@vx^st+qhCQg!o+O(Os_rM=czl}Sa@a$4p8Q%U(ayFKS*lAGEvWY*~ zU(oYMW1)1%gGXI=jdHxX5Ll;M2Bl;?V%QAh8hx5Xczfm0mf6gzRE}FUpn-Ymvi5a% zj)7a|Vpj=Sq?1AlPCrgL)F{b7YQ(pxR5Td1f-#Y3o!xJ|lytyGP*0S8!sH>th42+O zxNUAwj8^pNcs*vC+TsR0?{js`VCV@f$e|Xr1ZU8ewhP%v%-eu0rN9itJMCN{|7Pve z4{vQT;Kh)08P}3p3et!yTlYJVsxu&A^keu-KEZZ2-ya7Y^cTMS@rGk9E7P$~g5gH7 z1x>rhEiF)N+LQRqh^;O>JK$FHK_h6L-z2ct`@^wf%lLwlxAS@&(Q%FxZV{@8pNt! z-J0-)Q0@J9A_MYyiii9i>hz3gWV7B}`T6$Nwk0=Qs|<5ml$9eh>dJz094+VSVDgQ? z4_w5Ugb@EXUSciDl9H&p9$Ibg&fkpnA5 z!ihTrIS@(paa+`2i_Nn@;9R6*2~clOuKE1vu=I{op1w+EYk{#M4l-H_YZ;lC-7~rO zfQ7jNi$yETg}wS5!*|HKF3u=%(|i8!hHLT1)7{;Nd_-fxYZat>a5ngGDo@eF{w0p zE>tudjcPf8U^SzWnZhMS-orR2tq$a;xydJ6{ILvn;%}V_3uVJ^m7Eo85=zE8#7WM&_yztwJ|?nyRvDBtF~11QQbRR?yI^? zU*=OI-7vOFDMYC^W0)t12DXyZDy-gAt!Eb3H-fgH7JECySl;J}nG3XxP-k*6CN!AY zTb*`p-U6cMU||_C)~D?)?9aO8Yf^r+&x)6>ZCdea$|A|72TmPIUNg<@D=6PzZEUBco)}ip!cv2+mo(0W|7GiQRr&_4Ga9C*shltEsI$#gl;oa@=~(MS9KTDn~zPRUBQH z-w|ce-prLEHU32u<^~mN@>hooL)r7~0CWo~skOXw)g2#*so0J+Ee!rmZuaWZWXnQqg4YP;kZF!X3k^9^>h1tg^ZL_Kzlm50-fhUB{m@mWvaL z)?ogC>C&vB%E|Gbi%5^1w5u4$IMwxv24{#RfnCYm3GE~=IRmPaXYsbHrLMSPObXB< zvFM01nwrmRKvR#Rxo1Aa`d1jO@7^QL$69*K2SSEXNU=pH^?)c)B^akjs7*`XHux5^Iqv(?t?_Csf$~rMrG!H zO>f40wM}O@YfYLMx*q#C33NU7kp4FC@XzMS%h*{ zf7gXhbbQv4cJ}*N&xvueP@?Vn(@j-~6>A&)d8#I3!wqw!!q}JebguBwM8cX8v)H|r zzUm#i#k5)PTK{%a5A9KveEclumsiu_7x{CsL1EOhCR2d*>Tmy6XWax+BIrQC)!C^@ zBF0IWbeEs!^xo!dj%gfK;#}QsC*4Yho0Y9(Us9f+gh3%WqyrxM^P-rZ+Vb6)C10HV zO1fg))S28$UA{Y+&{l>vxyl#^Zcyz@;y(X{S`&w0_5wytyNutB3%hqvi>yBLZ<%Ju z?L$d+9iC^DHv8f1uATMhtG_GEvWOZ;&E(a&2}ISyVIKJj!7Of;`s#q*DFi*)|5Wlg z`<1Z`xCJmoP*TmEUP<`pEqPnw28Fg{O0v_8ct!CuMDFSEgDa;3%>B1I)*8gV4lvBM zm+oKU{qT%rCFg(K@dn&s8aZ?fGWT%}Y&l z5(Z(N^Z)cJQrVeoq+q}EIUptVoJM}i6^vfPK|mg#8y7qrpLYa9@5|(sL%Wn;i0P`o ztMD@uwxXK20s0)?J$D|Kv6Gj-U3C^p8qiQo%Hr2pTJdxMU<^TY-<+;#nBk*Rve zsG2KADKo@x`@%4%`uluSb+ZOm2QiRn-|2Q`{P8|;V$li&0LYy5k)k-n1s$Wrj05Jf zdfU#wuNLN5MA4b5kQUt-yzg0^^Xg!DbW{ZV^S9ljjDZ@6#*(zq0MGPCU`2Xb~@PqJA&5VYMQ5(*A9M%!hO%Oh?X^9hX_J2fYnDi#a^ARDNBJJUSA>&=2H zjfxaMbsjYCgcQLdviW##avTG(cJZ@O8Ss3!IR%bb4s|@7Uz1ZQbbAg=K28EIx!T*w zsjRv$YBk@Wa_e$zImIS66OrjmYW#X~_)TUN4UPW4fi8Z0m^m+$Q$_7JH?3t{H__Dc z_>cq^^bgWoHILi`!Ea1<1v({rIIMyi3#KkPsYF^5r8Qb??hU6^(Vqv{Lvo+X8f4r) zMYOBn{OaAld3Ca5Q{wJU#9wOwsu{O3wxYMTotmjV|)it@$KHY76;pYLd<;>nqrS8^P)1Zcu5$ z;e$FUsu;u^2WiO@))cK*baRzW3MLTLxK6j2u>>j$;}6wtdBPt5{dGFI6L!%8G3OIG z3!i_7gk88{JO^Qi$G0yL%dv2{Br98X4i*kb;sl^%Y?y!od)*tJEKRfNcj0&L6Onrx zONB1N=i8lw7g)4N5Tweox-k9$l0<>a*LiZulpp8@alJb4xp;pff0vQdMf7`lm}FeY z-Ai4b{)c^dVvdYh_6;53H0mTu_h8*{4uf_UGG$h;a2z3Ws`0xRHe1B@45yX3c-=_gd zN3e_UIU{}JvVI zu*$X-W66iO$}t^w?L9{2wQ;M8ylgI%R1EsbtP|<@7mwIEdTcgFkFU<`j zeb*uBdttWfgkrXV2A$eEa%A>Sg--w8`g)m}*Es_i?62uL$g_1ep6hKLwN4Mh)XYp@ z^|7Rcyts0_a^~{)ym}W7KS7U?$A7hCX zS8?y$+z=y~;F)9nZfZyA6EZyg2Xk|XYybrRtHA-Su{|z9j)dE9n?psv3r4W77FyOO zZMzGjq#vfFr0n8Am5c<@YjK71$7bcX@7;88J609BHF5Kjl`A)HfL0n`9>5wG7su2; z_LjR#BQ!VpdSX!`x-!vJI>jc#W^jvFkguPut89`8RATJO+ScpR+4{#`Wh=J0)B z9bNYF%nCv6`i(cLp4!zNfEQE{;U*vzCQuBTf0+4ms$A8Brq Date: Tue, 16 Sep 2025 23:53:17 -0400 Subject: [PATCH 46/46] finish README --- README.md | 76 +++++++++++++++++++++++++++++--- img/figure-39-3.jpg | Bin 0 -> 61647 bytes img/thrust_nsight_systems.png | Bin 0 -> 10519 bytes img/thrust_nsight_systems_2.png | Bin 0 -> 21101 bytes src/main.cpp | 2 +- 5 files changed, 71 insertions(+), 7 deletions(-) create mode 100644 img/figure-39-3.jpg create mode 100644 img/thrust_nsight_systems.png create mode 100644 img/thrust_nsight_systems_2.png diff --git a/README.md b/README.md index 7f545a3e..c2bde24a 100644 --- a/README.md +++ b/README.md @@ -17,19 +17,59 @@ The purpose of this project was to understand these algorithms in more detail, a ## Implementations -In order to explore potential performance differences, this project includes three different versions of the scan and compaction algorithms. +In order to explore potential performance differences when scaling the input size, this project includes three different versions of the scan and compaction algorithms. -- [`cpu.cu`](stream_compaction/cpu.cu): these implementations run entirely on the CPU and are written in pure C++. They are single threaded by nature. In particular, the compaction algorithm was implemented both with and without using scan. -- [`naive.cu`](stream_compaction/naive.cu): the first implementation that utilizes CUDA. It is based on the naive algorithm described in [GPU Gems 3, Chapter 39.2.1](https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda). -- [`efficient.cu`](stream_compaction/efficient.cu): implementations of the scan and compaction algorithms which theoretically require less operations and therefore should run more efficiently. It is based on the work-efficient parallel scan algorithm described in [GPU Gems 3, Chapter 39.2.2](https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda), and involves an "up-sweep" where we build up a balanced binary tree, and then a "down-sweep" where we calculate final terms using the node elements in the tree. +### CPU (single threaded) -Below I demonstrate how my algorithms performed in benchmarking tests. +*Found in [`cpu.cu`](stream_compaction/cpu.cu).* + +These implementations run entirely on the CPU and are written in pure C++. They are single threaded by nature and are extremely simple. Given the input array, we iterate through each element to process it. + +- For the scan, we keep a variable that stores the current sum of all previous elements. We then add the current element and set that as the output. +- The stream compaction algorithm was implemented both with and without using scan. + - Without scan: maintain a separate index that we use to write to the output array, because it is (likely) to be less than the input array size. + - With scan: map the input array elements to $1$ if it's considered valid, and $0$ otherwise. We then run an exclusive scan on this mapping array. This tells us which output array index to write to for each valid element. + +### Naive GPU algorithms + +*Found in [`naive.cu`](stream_compaction/naive.cu).* + +This scan algorithm uses the GPU and is based on the naive algorithm described in [GPU Gems 3, Chapter 39.2.1](https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda). Essentially, we process the array in-place across multiple iterations. + +

+ +

Source: GPU Gems 3, Chapter 39.2.1

+
+ +As the figure above demonstrates, each iteration has us add pairs of numbers together and store it in the larger index of the two. Each addition operation is parallelized and performed in a separate thread. + +Some more notes: + +- The stride is calculated via $2^{i-1}$, where $i$ is the iteration and $1 \leq i \leq \text{ceil}(\lg(N))$. $N$ is the array size. Instead of using `pow(2, i - 1)`, I calculated it via bitshifts: `int stride = 1 << iteration - 1`. +- The number of blocks in my kernel dispatch depends on the number of operations needed for the current iteration. +- We need to maintain a separate read and write buffer to avoid potential race conditions. This increases memory usage and potentially affects performance. + +### Work-efficient parallel scan + +*Found in [`efficient.cu`](stream_compaction/efficient.cu).* + +Implementations of the scan and compaction algorithms which theoretically require less operations and therefore should run more efficiently. + +
+ + +

Source: GPU Gems 3, Chapter 39.2.2

+
+ +It is based on the work-efficient parallel scan algorithm described in [GPU Gems 3, Chapter 39.2.2](https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda), and involves an "up-sweep" where we build up a balanced binary tree, and then a "down-sweep" where we calculate final terms using the node elements in the tree. ## Performance benchmarks ### Methodology -For instance, this is what the output looks like for a benchmark where I'm running 10 iterations for each algorithm, on an array size of $2^{30}$: +First, I found the optimal block sizes for the naive and efficient CUDA kernels. This was mostly trial and error; I ended up using 64 for naive and 512 for efficient. + +I then added additional code that would run each `scan()` algorithm a certain number of iterations, and average each of the execution times. For instance, this is what the output looks like for a benchmark where I'm running 10 iterations for each algorithm, on an array size of $2^{30}$: ``` ******************** @@ -50,6 +90,8 @@ For instance, this is what the output looks like for a benchmark where I'm runni [Thrust/NPOT] Executing scan(): 6 of 10... ``` +The `runBenchmarks` global variable in [`main.cpp`](src/main.cpp) controls this. I've also made available the raw data in the [`analysis`](analysis/) folder. Rows are CPU, naive, work-efficient, and thrust top to bottom. Columns are increasing array sizes left to right. + ### Graphs These are my graphs. The left column has array sizes that are powers of two (POT), while the right column subtract 3 from the sizes, therefore making them not powers of two (NPOT). @@ -60,6 +102,28 @@ These are my graphs. The left column has array sizes that are powers of two (POT ### Analysis +Given that we're working with increasing powers of two here, the exponential curve makes sense. While the naive algorithm is able to stay competitive at smaller array sizes, it is essentially doubles in execution time every time we double the array size. The additional $\log n$ factor is really causing this algorithm to suffer, and deems it highly inefficient for large inputs. + +The CPU and work-efficient algorithm are much closer in execution time, and this can again be explained by their theoretical runtime. As previously explained, the CPU algorithm iterates over each element sequentially, netting us a $O(n)$ runtime. The work-efficient GPU algorithm manages to execute its operations using $O(n)$ operations as well. I believe this explains why they performed essentially the same (although I definitely missed some of the hardware and indexing optimizations to truly make the GPU algorithm faster). + +### Thrust + +To briefly analyze Thrust, I executed the test program with just the Thrust implementations of exclusive scan and compaction. I then profiled the program with Nsight Systems. Here is a screenshot of the overall CUDA utilization: + +
+ +
+ +If I'm not mistaken, this is telling me that over 95% of the time the GPU hardware was simply dealing with memory-related operations. If we zoom in more closely on the timeline, we can confirm this: + +
+ +
+ +Here we can see that most of the time was spent on host-to-device and device-to-host memory operations. Meanwhile, the small blue rectangles between the larger green and red blocks indicate the *actual* time spent in the CUDA kernels. + +This tells me that the Thrust implementations of these algorithms are so highly optimized and efficient that it's not the algorithm that's causing the bottleneck, it's memory bandwidth speeds between the GPU and the rest of the system! + ### Miscellaneous: powers of scale Just wanted to share some other fun stuff I encountered while testing. diff --git a/img/figure-39-3.jpg b/img/figure-39-3.jpg new file mode 100644 index 0000000000000000000000000000000000000000..22c4da0fbc2e363fc48ed254d14bcdabe5107a49 GIT binary patch literal 61647 zcmeFa2UJwcmp6L4o1Al$oO6<#B}mRWrzYnpARvll$vH>{$pVs5R8W$FWC0Nr0m(>| z{0`{7*E{o{Z|2T?^WIu-S-noxu3fw8?Ao=@>C?a7U48NXVjjRzl2?=mAW#Tk0lt8X zPq-{{w;XH%KvflB1pojAK!)G}Fpz?PF91RYAYRb`U6-2roA^ z53dL>p9nWM*ad*)L;lh!ANt3*kiC4^A2hIdDF`Mc2<)dU(y#hMi2l+aLiwxype=st z31Ni(IX3pmkKq6XummmfqrCXL4HYiA7aIV^)m+SFQ~}7vCV+KS18nYG_Pm$@!T<~u z6ckhx3{+GMTy!*aTtaLN3~WLYe0)NDd=gxYtMs$@QTewy1QQ({6AKdu3k!z;3k!?j zGQ%SH(FOOvGQh=i02c)i1@6Egv;Y(r0>gz|%!1QKx#)&Sf?6U%t`ey5WrBeYf`EvG zjDm`W4!XhL8zBG`_D3TQ06~Djpa@VzWF!kj(i3;oQG0G@U+sV*Aq(zf;aW;{36oJntLXf4{-q~6x0^sN+)C_ zME)xkXn61tKn*--@ez4L6A3Ogc%e1C%|~zb$@6ZKj5eXQ*Y$4c>Q-%Ln1E#-JxR?eoU{~kF1tfue-M9Lq+vYOWLI#H>Tbd)sQ(VDoD05C3 zTcukiR-=^_U*0=j08De{mG*Wk0J)(+^F;8Fw%e{~ljtr-+1!@=S#lU7;sbtXpz9tBw!|{AQj6*|h$*UU5tVi8S&-SyN7;5a@eJ9rs8@xi~+`b!K01B0S zgt39rDs>*{8%S0dynoksK1`2sk%u{gBVw z5>`l5Aw}ch!tZa+JDjJA#)+qGE4L5L-Io^NIb$Qs%39iJ;Vo0x_n6UtXV-YJ*}Knj zctUEvBB@gKW!^%EZ1vby{Ouf3__*0pJ;kxo)XI70$${X=1yB=m&uH`OcP`gSbQ5|K zLDOEEjSB$%Qz`CR)2pj7f&lKV zdTvm6<+Vro=go*{uJ@)2PApE8^Ah*BI2zVFi@gK_>jfLe8cxx^nhtN97@2S|M0ka$ z`i7(19nH;phD;fRYU=4( zq<;Y*AFFRi7QxUrxXNewNVFSkD!%F3d3AR2P6UJ$Os(&Aj-Dr<;Z1}nuT%w^TR)rx zj7HWrkg!JP8Rx$6NPYHx(Nz<&&(Rxl9vGO25WI4NV|`|M(Bdz2eKOhY{jn?mCT5Y3 zYiSE69ee9%RZ%~Ke&ciebFNc5$~V3k?g!ptUhY^#ll6sUXPrfRlJb*qt+g1opw9d= zs&c>Y40Q*bd8@+XqbY|Sz{0bGUe}F#J`>}6eorOe8*CLkxFxxr$pyOrV9&OWQ)*v! ztmrO;EC`Qz4`w*`VRiL{`os44GbnSB$k)%c3^GUrCML~CdA@BTyQBu_wiZl>@Vk}H z8P)pjR!Vv2l{9s#bZmM(8%@~qMjD)|^lN-@y3aE_bZjU1wze%~<;&B~J+062MmI4! z%z``g>Z;jSgr-R5mGf5gsP;Z^PVLP)t<@e?JS96bUlLc}espB;W-2fQK5S~18fIEw zI6tgB*ZQ*QJbLblb9L+)dFZ<- z;Np4#6gJi!7uCbi-FKX@Z#mDH^*Zb$R~&R~I{9huN#>k!uyk5$F~JY(ij&A@u9HXI z7F1tkk_~N}g8E+n(u|Gt;il<++U*fWZ}!SQ2$!$zF} zvKf)Ux%kGss=9+@q_(+^3DaZK$fRs!NWzC*^eBzeZx-G|97dMou$V%Dyl7e)fcIZ}p66g(&rm4M(4N@#6%Z zL~6V2UCNJLyBQG&MyO|mQ)$UBEUBsPH}R*2%tt{*2}trjQaQ;mm3?X$nNfRIMHDf$ zc}B50wb8Kq@O4FXVa%gI-T=|JFYfPaK7G}c7VqUZYfT=%<;8YpcgA=DEUmUW2luu0 zTu(QdXq?G%?nsBiGTTSXvx8%1&ncv5nOHK%uJ5Dg$`yR>_a=LyOr?8oFZSI0(MibT zsDvGTjJWL9mRZTtc7|;3aPdG_$ioBI_a2>!&0Z4s*4y%q<4dbzO=OZ*siF@aUl*x- zkVYhS0o=E18X6YKv)k<)ayvub^{wU`cHknq7W-fWvEXAFWJWoHYTH@sEMe#*Ir#!` z4OwFL=y{}$&_?xYd8e+jjCQW!^R)y-K8>Jf?>?n1zn_f@8S&BOQF^`5e9cG%KJ(nY zss8Xh?)mKaI7i)_{XWOu*L{y{xOwq@NOsMu77+$$j{z^QUQC*IKPDPnG zu;bGdF1Tapb`H(8J`A2E8a`XwssB7_%r+y0-CM{o*?5qIbcpWFdB#*=-Z@$sG~v8g zc_#2m^xL;=)5ghAIJ=7;*(R4eq`KrF;DHmnU(w3!%J53(+&Nxteq)48`v`_D{C!?z>B|3}UYiZpndJ1)>Oz~`kV-57R4R94aYjtYkf3<$bIT83&pwn;b=QTLIuRBzi z|8DK!+Q+RC@fXeJEnRsVHBBZHn+IS~IqvhpDTvOLl-}IFym(~k4r*O>>+84KPG{(y zpVlq_RHgWJL1>UfL8>u&j?r^wn)ayRiLFoE5vwkeEr z7*D#gmLgoOXVy+QDqbJ^Hte$NX6MEZkju?ElFP{KhzGv4$XaXnsd1Sr`nV>}_5A`E zz5v3EgeiC$n_XtLnFH=T;l}V>R)|1*|3+Q&0(h^>C*o&N3mtIZ-7H#PpEc-n!Tvxp zxAdXkqw~gONbwxJb6fF{VzpyJr>T91!!IbR$BiU&}jK%4^fQA`{l;1=5Zgv z<1M&vaNtVs1t4Ez(@u4bf1;hvdhT(Dd4f?NmS1_`(~x29jj7d;;vmP_T+{>Zx-(_= zCka+OM-D`S=Oe+pI-OQC7eJb!xv%8;4oweb#eBNV_a~lCgvYWH-;Nb-oyZ-BnohW! z2Od`HIu5;TYc^SJKuqNss(E^Z9duVxf&0Mp*zHaA1t7DRrZu!3eKsrntjOSFQD)G% zkeI%o)EDEn)XuAzhfu&*~sI452nnfO)WT_s+^`mrpP8kIBQkO zG zvK^eBe*u{9)fAnNg%s?wp6$EuF=dlmt{TGEoiavAPK6o=FMua`+sNux+>GNq*ZDz~qCEnSl#`i<6OK!(5o&rei!SQFs*yAzXhq@Tk(BQ9K8D>jjnEI)upkWCO@~fW?;)D*8Rb_sd??OsB!yO zHkYqlmi}&<{YX7-z8S}DWJ4!WCykB#Px*%3%H4GPtflXm@U*TEAD6ZEqTXztuR#qy z{QQMaBHDK{WI7k-wdHcyTI~>+{cLVDkhSFDNfr2PqP{|~*-qBB{yHD7XQ+KfjLu8) zoK$(sC1tbusG_Mp@4M(3zvr{WhpSK7#ZD{O@at!0r0*&QFD(S6*7}|K?=ihx5jM=_ zO<(l67O%}1e!THWgG71iSY6$h=@ED_m*eB))2m$oF&88Ix_2gp-mS;Db*gJ_%~*eT zdKi@2aKv#?7Zc>A6Vz-!wdV5itUY)DcUAIhKxfGX&@8_4uG;lkrJXnHdF-kS=PO?| z{=>BQJqDC6k6RM1$zK4PA4^wf&k-+xHx~eZ9h<823!+8y1V=9t_84{#iPDK6vZoh- z+EB=5h$g~W=lY>~qtxNB(*iZ?~HogxNU{-YJS~f;!(d9}N|qju=Z#Y}1_zN4W;;jl+-}BL-q+aZ@}R?*hm#WJO}z)N+zyAzR)dqL zj5`%BfRM7&*+^0<*L~(Z^P_M02a+G!yLMC7&k8Wk{f=IranTz{iSJrh0)GXE%^IZTE;ob!~<+Joi> z1E_lU*`)kto=vImj;5yOF7m z>Yo$ucBKc|(VoS^IqmmN3;a7nLI(HVXWf%)GGpna8>@Mneh>aYbQ&h;5;9^t5y)Zp zwd#H4LZq9=g1@a*|KPtxpekoiBivz@vt|?^Jt|`(HT4i@vj#e7FDx zm1Woiw~$T-2VzK%^-(Bux#B|nw=}y*rZ`+piOs&bEsM;^L{Js+Px~!g0Gzpv)S3qy zM?7mGVs^(=$0A~ytS2Fre!HE6c|`}}ANYAvF-4EJ{5g$~i>ZqZ!rs5Y3O*35 z5W1nLiokI7i5cfA|0TO@gKhk5vjEFWIOS(~)sOM24eKhu$|UaqKSsp7s$N18lvk3Z zf3L%Kuj;5*Iha9y^g_F;gFwY)AEqDrpj`nMmkp3BefF6$%t46cvh^pY zAT#Sv4(}D;2+aSG;lJW={KdH%^E+4v0Cx}u`Iiay^|JY2HeLKr+O8~vayd3|IahGr zP^llsPb*S_T;K}W`C%Wj%PPze%zn)0vI<;+Mvy(Q!~=8yIN$=#LmqGdoI#ogdT zwV(a8ti5@_+NDDPIv`Hw?B;5BNna)vxUKiE^&8%9EJ%bg`obi~sHU!+boQb>s|mz*GG+ z?xpen&QbNUga3_#Wa;d!V`=v{9+ov6><7Q)t>~qoqpD)%=H`4kAj%)?zX_w-yLkpk zJ3H9@VJYk%lUMkI4faH~f!kX8ID3O6sxREr`(KFb{lWgbIJ%XcjGMEY=P$;^{W1QH zn}0TdT>v#V*GpTYdb_!+`*?f7e{~!*XV8)U9T&sO%^P&6f5%4$m)QQ_ieFukmmu&D zSA|~HF)ph(#9(%5o;%kY#pUY7 zX?Yoj#%b;5!gb5ior{N)n+p(=yyb3b?F9Fxwt|EBmN?T^dnXgMgN-4F2r)H+wi3T$uwr5gPT?sbsXoJ zkAsZ}ub>r=jW91ihp-?YKL-zwjWvg`kd+{Z6BXsZ&1-SoD3W!~<@81Ul zr^R(?!e8bE&h-ZmG%ILeu0I#;Ql0<(^!p_L$i~0T_1j$k$O8XJ__ufcHrGG0z&{fH z?Ongk^^Yv@kA#1F*Kc$EBMbZ^;osi%+g$(10{=+(w|D(E*FUnrKN9}$dKZZA{DBI= zT|p?w55#;fK4Tat%E*{&XsOF9s>oddSQrW_4z6xcFu=>j)!S3+<_&5CLnCVBK`_7z z6-1!GP%cYrFLzaK`O7FIKv7PH+6xrAq<n!w8Gl7?Oel@WM?3W7ArC2jYEX1|j8K}%iIHZHDU z2p9BHXLlPHn@hSIq$7QOEj3uy=`oO|clL2{0O`xlJ`5ZTqdS~CdIo44<+3gn{F3BH$ z4HEdp=Kz5@CxA0V0Khih00{W#0LcKo*b(6a(cz4Nwm>10BE%;59G+yaUF88DJiadR+sy zfdk+K{LwEm1QUV}A%##u7$9sAUWgDx0wM=dfoMStAr=rjh%3YgavO3F5(7zsWI+lb z<&Zi^E93>F4>Amyf_#FkKz1OgN&uyRGD5kaLQrX_GE@g@3bliJKm(v*&^TxY zv=CYaZGygl4nW7D^UyWuKKLtbbQlqg2F3vsg2}+tVMZ`pm?tb477a^-6~by@ZLl8L zC~O|K4m(0XLcm9$M&LjYMNmM{MX*NjKnO;NMaV*UgwTZ0jWCMv31J)I91#=o8X_B_ z2%;jQ0iqqEA7VIS8e%D8BjPK>al}Q$10*CQA|xgxK_mqvLnKF}K%`itT%=m0=Sagy z3rPFO$jGF~Y{=rs>d2PJ-pJv|naEYhoya4|i^$(m&{3#Rcv0k0j8I%r?xLijRG>UV z89`Y_IYGriWk3}{RY$c!^+%0IEkvKM#hJv}$Hm9x!Bxj~!HvbO!0p5RiieEHh$n|1)tytk<-z1zsz<_U76?1trBz3U`W3iY|(EN+L=rN=M2R z%4d`-RQOa9R1Q=rRGn0-)P&U1)Xvly)ZNtEG}mYpXnbf2Xa;FcXqjkrY46h3($3PM z(FxJn(xuS7q}!&aqF1HAOrQ_nBA4VkbRN^ zgF~9bm!p#76DJX;3g=zUX3kA6dM+cbM6MpLb8dcaXYOL|86G?yC7!!HZ9F@?ti0B| z*}P+Xn0)elA$%=-->$P=hhKkq{XIWEzbbzO|4aTe0YL#zff|7oL3%+8!5qO!Ap#){ zp;)0_VMO5@!Xd&P!bc(kBHkkPB3q&yqE4a}qF=?B#NcAZVhiH5;+EnC;`0*J5*8Bq z67!NYl9rN%lAopMq->;0rIw{xq#dQJq&IJH-|)QAc;iq;L?%e)xhzcfrfjtApd6l@ zu3VPfM|oO#JNZia?VJ2J18+W8KvYmxNK}|qq)@b0ELYrC5>yIO>Q=^3)>h6^{;a~P z;;GW63RP87O;-J&#-Qe^)~pVwE2<}}&uTDfcxbe1B57)9W@|2K@o3%F>e0s6HrKAu zKG3KS_VyfXg7rV8D>f(7~|92-!%_sMKiRSk5@r zc+rI4B-~`el*#m#X`dOHnX}nbb8K@<^9Bn<3w?_Ui&IN=%RvZci z8!4L9+v%Yh! z3yO<{ORFoMtCMTD8>QPVw_$fS_i*=l4-t=4k8Mw7&r&alm#J5aHv#xC^~Q(U=f2M; zUkTrA-|v2Uehs&9Z@J!jw=KacWtZ>ip$q3PiyhwydhsePw zzNpNo^Jtss{uu6)8GWJP$G+T*TSN4aN(^KTJSMa7~y}C{5}mS_YM$Dcc0DaG9WC7_eLh1aqa~9*Ga>Ud%RXy7`$l$c&b6GVoWoq3+>wV; z4{P!$@?!Fi^BwZv7u+mpDr787D?%#rEm|tpEAA-~Dk(1|D~%~VD|0QIf28%OyIiom zyy99#d?mEfr*fsrr0QL@Y;{WwM@>O3QEkj);IYr+)jEs1iF)PwmkmMy-1@Ofy!Nqhj%BWPUSR&|C)rPLK5KuTS+HJM`{MQGbTMoRb17|^YPtL? z@7InM`IUF8#;c2K?rSIO;TyOcIh)LzPqw7C-fSCeFMjjohzSzxNre~FZ0(nB2wV9z>jT2pnoP9 zNWl=m9NhK-;W8m2AzmdUBxGb1WMm{%R5UbjyOF;q2n+^8L_kDELPEtvLq)^HzT8Fx z8{CWpOg{^ZAGN>t`?Gd&wT;LkWDf=b_ucqw8xhHiZs77G3#bp|mq#6B6hv@076eo% z$OIdHX}R1(1cHc!13u+|lR!X4L`B0ufT2PlmlFbzka1DKJw)(R^`vPD#t6rW(nvHd zy>uHr>Ucfr_=p9CLT}VJT=#k98?LBjMX%%ign^$?pvkY`u8eF_;l0Ec!`jJ3a`)x+ zK^?)JZctEQNC=3CS3Af+uC^!P(X7MMA|RBu^axGF=R+(Q=6&&r*7GjX_KhUjZ&ro% zbT4JJ>8}%wSTp>6caM`JDP}|diep6LPN ztbUDIY#Q-uHnlsa9-qVo-|dDL_G2eU2Jn}Uj@B=!RvP_4YnDBD`t;HACww>4F5{+B zv4b3n>fzTPKaNTKIWdP9)3euHHa2Abn?k{()07|OZUFC<6FxmX#hllL-t;H}+LT9>sJC*QH2hokoz+x#vqbO|c<#7|n5-F=)vbNnv<0`n z`RTTZEc!5U{pGgp;E0<4cMbjTmrNEl`CvK1!t-FJx+TVRfLZyY>%5Z>H+y$^^fw~+ zRbi=cp4_OI-f}6>y?3`OI4!HoUokvvi`lTMX;E!XWl+Zp{Oj7Q{4Q!-@;`Bef3M%a za48wl$QeE2`fXdSc8(a;H>zC_gvu6wUKORBUUpr4=4)t04FtDP_U?=e#qzhjcv7Ek z`OOdR$udA^)15Z#dtj5u{oV;BlNU8&RGWfU0~h?={5W_Y8F>i0LQ%ti(}Vv@iu=+$ z;pm{SQE)=FWQo(@2K_w$=9xgtcG}hLNfXs&%O%5V=M{G|=B2A=`ml)I-HPX7l_3@I z!X~FS)0s_qMyp8hloCtKv695t^S;q&RXc7>JL!{>WX3{=UzVW-Tn4fK!S(p>XLHjk zP;q8DV8w#ubner@Kwi<8@ntmkE5&btKVUD&@%dJFOtUB2Io#9mEakv@q~6p#^_Z=C zEntpi&1ZR`Od^lpYKNxVd6WUOLvE1Vwbe1?;@!`81Lkg~4Td(Z()wpzWcxwwE9nl4 zfoZo6cui^*b`c>Js^-4B&CFp8T|ROzl!fEHSf^qR5nO#p>LGmT2!sgvR`ihaJxWNo zBVLZx*D2OwLeKxjsQ$Ugg4IdQBc2OeKrYuutWTSrx*} zEp1{;YHjWsgbNJ8odP|3&FSyIRB#3)Y>nABuc)^hW(`gS9S}Aax;|l#5X=dj^8Lme zO?!m0XDvM!c`seLMjIzG(-g%z?mlxQY8bRVU@NUsDY4G(Z7Q1#-ZO_*+Qc4q&RDCG zx?5jjQtcxL;ELh3$u~HXwIzbMi(cDk@~9G^(4yY)BJswQF?EXD)u||Q`)<>bvZkoZ zP*f((c;iclzh}q2_+~c)|DNXS0V4;y@@kRwaqk7SD-8SL)R8+lpZYB+t_xyi)eF+G z4+-m3EAdx2hHq(8+iPX2lS{Ye!uf7@$w@Tn+Qyy`t0^UI=p+cT@#fOmdl2^Ay`3-C zHLT%BTdALs)hMXMl=2$Ro;69t^TZBbBsbnD>e(~XF!H=s&%VAtD$#Mh4;6DFx%LZ9 zmsXV2hY#K(MV?Eq$lq$|Rr{wZi|B6fV@{+fH>*6?cE@{}f*|4U`>`N;Z;G9(5-*r* z9{(Hq;oT<$*WOXGun=*7+bSucz>RgbLK8^{sOg9-OqA_s4gzF>5(w^5<%40}uxxMn zPSoHjLB5G^O^Z{+B4=Yt_;07VJ>>@Nd*m0Tm%?y|`H@3q18x%%3U$0msqPmxDwA#+ z>y3uhaSfJze<5?l{Hvp1wW|V71Z!~crk7hbqn!kJ5^+%a@y+#$U z7`N%mnXWtd%r4qLrM8rn7F+i&$&E$O?Qv>wCb5rmkBF$1N(PqB^Qg-4sP245JRrR2 zb&2b#O~uabCkd?)DW*hbac2l~1W^U6$01=4TID(E2nCNZX;I_YqZ8m5`y4D0i?56y7THftSEY zrEs~`hiH{Vbg^#+_oP&&)9I_LJ#R!mpT`X(>eORYwV)C1t+1snq12$jEW2Cex-=c6 z7PdL_-l^nzCi#Now4gVYoJ~;LQ+>CwJm12Bhbf}_X=+bMUqK27zpVQ>i)F{RY}wyp zn)ndFJNjl$n3nar#iEoPF)P6_ww@0H3Y2Wsm4YADB-X2=d%@AO1m8!{CRNot$&?N< zVsc-T5SHI91c|1p?T1yXgic8@qrJ%NG#8aUubY*$;4s`gPS^-|AP^*)8)NwJZDx}b zuF`v+H$0VqQQsnQiBHqzv5>T|_Asjo4l>o#sSG1<%Vcv~_Kr~DF!A&(8y8^mCV zx>Kwpl&txgg3#HHRD@BR%`>`EN1#6wUq(Q)2f^Lf?&+ugec{j3%T}C9=wZV#g9hi7 z+sB`{9t@P-q>PTpq!7Qp#k8m{^VV#E(ErOAd*um%VG=H~VVYhb1782_=uqk67{+IM zchTtSDJ|S@DuYgb3+UYBUR3h-QK3fs4y zHb4mBju^)OPGf|z42RrraTP*WBHei9t=xDg^Z|=19rxaRvNxJnDsQGtJoHY+bFz3W zeo{p_-6c_yRuT59)u=X?@)gvU48;kp9f9e{BKPaI<$8Sl*Gkc8<-*!RRlK{gAT@1R zPqb8r#a_HB&Nh5=rZAr`njOOIwikBt$;3a7Fwhm7p6+0M;szs)@?G7SnbvM+G^IpD z%9@-nQOVA7I^R&>Du4!03lL9M*j3VYux49zLv7O}vyqPCfgppsdD?09E?*>tq0lB` z>_F5>^@m+#Rvs@D?U-;i5=KP#tbFh8yGYCq&jS!xxx0lqsl^n@1X(@t=LP*_ z8k2QJr6_$4G1O{)lnRHc%+_Rs`mWBH(i9=bu0ahY~HfdkQ8nplGqHiVDRAF<%Gub`YsL8!r$Ze+9i$OUkx?)FfHb@vMhI z(`zguxJa&3DQJ3#bEVAA(CW}P2Qyvx78L_?psUL*g$Ct^j4b_D*m)HB?i!0KvX2s8 zBJ$oPi-b63q#f8N`}7rCS~aas>kK^1RJc9zAR%}9{uHv!N22k(2Mi@1-|IU_nO_`X z)1hSFYF73M*N`SmQ+O2pQmlk0yrRb7dX`cpm+g&Jo0e?VHnH4Rm0*rnQ^c;#dNKpe z4DBv&c>6Jl65A~b2xze^Nm>J@Ar&ErZjG5Q|&8BpC6oU zM<3Qu{qoinA3H+{|rRQ{_UT&A0h z8kTto7^=BQeTf;S1=jRN5U5FCS1@pX`Ub&g>76C&Bbl}A=Ep=1&nM^1`W|mQo}*{fOrJirh`Cnn!-fwm3^JedQ!ITF_{A zS_@$nRTL2?-Adk8lpflti8l0(??$aG@Ia&M5)Vg~xiUA06fxtvuI0x`>1KV~UJ*)B zcpUv9mNsiZ*_{a7uDMA+X4{wS|?q-ZzZP3a&$lA4Tom7J@Qi!K-T2UA(RSaoHeHqv?@F^ zr;&Xn$;FdGjnqi)(?F3{&za=s>w!P5_F&Iv--kSI+OVeb_N9q^ zxM_^Lbm+BWZ2%?Fx=A?N^&Zk)LNUo*ab6o9t`#HNxHonu@|*X*R>jv(mULZTB1dd zY;v2ViiIA9(Ca)kibhp3vwSDQ`rTA3*$88|eu0pKzCo6V?UI6CQlid$1qD{ejnvvxS{aYqUc{ zi$kZ>a`xUA+(h1|?!_@rWM3k4N9Le@!hIxgFqSl!HBbU)EbM3|+Eo;(#&}^F$f|GM zBq1M^NO&*nNvWpg^_NE1x?N~Lf8vE^J!TQqf#p(jV<$gidRMZ9Pf$>k*oYji?j$&q zpSn_|lk~vpz~Lw@RkV|BP)@XkeU0%$LU>wC^CCL~)tg)fiX_>nN_zcVEqPWm6>#4z zdU|~5T}rgkW`T;NuLbE=sKd=BjeYadXqMMEsrRx#F(mMy1qoio%(oU#G^P;><+qm{; zHPO1WtX=(m2e$>SN)NM~b~NsXRe_3URBveZWY1Cb?g>vx*$XR4d`a)oPvI!QB$V_} zldC1CAQsAJxaM)ota#a5taORiMmWA7vVa1)Ei&$>?crd@x@t-Yx<>L$=~r2b=z1BfHxUYwnZw;orqf8WSFq$SJ|R z6**2s!7okJ1jOUIXn0q)&9gM`fX-L;_ylp z6Ess=;l8R|+hdVcl9yt_QhXJIygCj8l{BR~VfhEiu@0jt>q*^HOhUEIr?XDdGj%Ig zG4j)1cE-u9o*eE9^o2|ba9Rr$xh&Oy1}LqC(0*HX3V)5iXKMrhX}RBhU!tb@nN!ut z$_7QdY1_d1ZPHt(ffj-uV>5&wg!?jbtZkdW|bXANLqtWx@1hAaauRxQ&C?5X|}EbmHin}kH@JYO0;7u z$!StLTr?=!_F9qI*$?FRm^?YJ8=h0~>T%u}OR1Pq7Bq|NnzAs_e@0x-!x8HLp8NH! zcSQGzoAxSg$c_k<969}=ydqK_*mMf|CMA@}lp14d1YsqvtQ<6+Jnbxsf@Q3uhYzds zpDRdPC>#3A64sPGC-&2lx#N~wT8%fFyDFV4#7An zv+9d+J3bzd2Q)>QvX2p!`o*c`%JmKPSwqo>2kVzbBB*j+;~uS*Yica`_=xY1Mp&8W zs(-s1fws|TKE|JXo+x-QFkOVXQgddTqhE2-NJOrABwx#yqe13raQ;?)$DFzhddG}Y zgJ@M1bqpV(l8MXHn~D6I6yud#f?$+%05-prO$RX$t{f1m2I7aML{9TbXTbmuTrm z=jS2wV0evAFJUd|`EYywF&o`_8D zBz<-WePWj5IK2sHx)D6A;@W)(ccu(#EM0e3X`u+BoHUb8eUewQjvGz=SY-5n{6=U5 zTsqPd((poYx|%0Qc=OHFoGoP}=FPCCXaPJ1~Y%An?GV|^GEK<<6B&A4!nM&~p z8UmV8jXQ;o1PX<=m`YX|*Ku$t6L4yB_>I+i8mnx#W{kdBUsL2)YFH#N^;0SzPfw&p z8`RO2Ql-6BUW)XE+MPy<758qOm3B^9PSoLFpT2dio(lUCTZ$VTq(OdtyI{st+pgO% zKIUhRX`^ny?d6y(y;D&SMdHCo92g886-tyHv|^T~)J~8EZ{H(DLM*Jj8aer^4{{N3 z9PeTp6=vJM)u!dhrq`f#D9w~Ara?0WN?5?JKt-T!vK#;A4sYUHd7RMP`LHJB`0kDJ zt|0U)j7(xfp-v>epWi=cp{q8c%m}7__p$Sdt!MC;(0fV(woBX(;UjiBN*=u7hHBMx z*ZK<4SS>ZBR7zkuvP)wh9MN#q%I%ogl9CiVB0sR&V2D0K@oUgH#BV|{bA%APe9FsE zeKRXKpJ|d(P|LLlp}i9s-iC~g>G29gHC0)?`M7E6x^F%3{O^BZ!Tk$(wulfUfS#cxz$|)U(8^zrQ|U)*u{sv-rj4#Y-#gxI)-a zwL@K$uXbUz%KO;sZdx@dB4;@t)xEhVI|Pv{D$cDrP=*F9J5G@sviml}$qvKk_jN%~ zj^=-Zhl%y6o4m8d?Y2)~HDV-LTaSDipU{rFI;o&pnms1=#5JqdnDIMaNV#;Q8fhYG zilaO>?mU_v)BX?5{ zY!QQ$jq6HSb0FSsA|HCbJoWa=p$ukVmwP}(&TAu_#m66mVgHwT$PCTRN!kQ2>^>DN zQg|QyTvjY&%;kCeTSGXT7XaxL_kl9=HEH(gYg<@3_o7z5jA;*0RINX^N@A80ma}Ab zm`A(c;axzcQNIc0OHLaV?EL>~4N#6}y8@pzyPd{|6d^~NiuNu>u=S1(4iPdwjBxht zULC|WdA$Z4XJ_XJu;RY%Q|!+iUO> z5!Y1%F9Fa01s+tW-MwbZb!LXhY>>tSok^(vhLUtxfObQV>Zjt2+^F922LwUaw z`%KA8=U`fx>7;xX+Kr3J`F*#iCD~0}tPuPJz|z_)^9rIO!6h}uX$==@85M?CA-MD@ zJA4sLGB)m`K~cBieiw>{2mu{#8!7Kf_bY?L_Wxh7IP4&|vm<4UHWYMvJQ57Z2Ih-- zir|W(LsW^-%i9G?)2kli(k?w^bj=YG6$5TR`t_uwboNmh5}Q2E@2_x3o^yure%4s% zh8d+%;rdi(NDtIkEiqxk#mk56nqmwR`wWxlF?jVQW{jdFQE8sRRmwIQ6&9|AxU;=$ zjyOcmQ=@d->r7lS={|Akti(q=F50DAi}1|UdmkNmy@N=!wP-4(&`$63q;{ARlduAk zXjvi$St!4tx@oQa9g)e#jfZ{F-KOuKrB+mBTFrVpPug#vjm|yJa5963Zw^=0*uLacz2WWT?n|AM(N`#wet@d-zTwJ zNm5tsGnON7W-FyrKgHJb-8gqj94BVH3GU`?kXPcmF*M+6^ri3_qp(U(su^1!q$#`n zCF3dL;vx+=s`o2mHk6PBJuZira=~II#EY?u2ejB9d6M?F{iv2w=nfjF70>!qAh+Ww zrt~atZpZUWtg1>K9hYJxF`o@KA38Q$Pj4)@dxoT;Ns=nAGS*RUc=2cJT>#fVJj0Ei z2o}%%Q2FTsVC_4Q#lcp%Y0No`8a&^Yj2Yt1)^W43hd(=MBg-h?EBw%5xzkZR4g6~Z z@1d!0L{e#xgLL=wfPA+z@}OcJ8rTv>|NKJqn=pUy-) zvF+ODG|O9R<*F;PiJy6h6{n%c$a62jT{3J{7h8#ymJ(NUyT+0#&uGZ&5J|%Ep?4B! z_o)lO1(OK;-;h9WirX2@=kO;Nz_@mB?BjKNnM4IAUNi>T3jkOz%BH3{M7%ywV%Zwk zZF0P+WLodGAvpcqawCXnfwH%xobu?z5xQcP0`I|_cI0P5X+v4ILB!#$emj^Lm+-v| zngKjVf?!G^t4gu9lfd>c1|ZsB?4F z{$+2HfLPLMen%ZSy&?Z}LSMcgAcd%sh{edGz&NG^rKP4M`_wfHUNDf0@tOKTy|xC^ zWOweYw>7kz7_}KAuUK5Ffp`_VjZS64q{44??{>D`3l;WXK7NGo?Fc}?yx~CX#k;I> zFP6)1!ZhbVK;UCoyp_Rg)`h^Ap9%Uo`o#)puv*C;LFQRtTIZ-O@1kXm+l?ED9`wbL zs};Ep*{MXu(bKqe*QP&`o_dGNTbNjEN5w#oWJuULFkYB|QWdPt^jzw7!I)VGwwtVx zkU6o@J3&YNFXFAvuV43kRGp+tu}O9e#Y0AA&5YB=`S6T0oB}=H>16~d`@8x4e{u&9 zbF;Bt(B#V`e!DP}zMbw_jz=9ddQFPey_uy=PT)9F?TbYvW71l$y_Rq?Zg*(7GP6-^ zSlgB%FbRNOR-C(`TATS<1PCpGHzXM&%~7 zV?n7AnhS~-ftQqkZi6)#dNO2eed0xa`c30>BsLgxa^5O^^EUF1;?Rz~*7p+is703+ ztNzF|WOO*G=9l6$llcLC$e1xDN@iVk`=8lsCmXk;ZRk!S-t28pE*=Z$$L6Rj zH6PqcRFOxA3C1j)PERttc?$z>A!u4851$RXlSMy2xlOgu-F!qn_gYOPN-U&R{p-s| z=YfglDCJIK4^n4u-?;!%1YR}u%baZbN6E=t0L=b0Ax9NjWF)0CZUO1e0i?T`p%D~mqQ9_Dxoc0d)k{AYEdcdEQ z3JtApYY!(iC0#H4FOlzwop_F-({2kFK5|)Qcc|*_c`GU0*J`YLu{Dgc-^z?b>S`kmR`#JH` zE%H+{P9<9pC$V6QqRI{ev;+MD@sY~^|7XK5vVcdjQUKwISOiaQuHz9y_G12=5dv~; zH<29ir`T9+1Wl&xOsM6o!**c|<5I$5SnpDy9C#;xa#Tgs1C9UODQ+?MO&Xn@P zLb@T*tC9bF^wl-9yq>;we&V*&H%-$0r85P^x2D;vPN+`TDS@Rp92%JPtQ`V3QxVos zG>g?XU%iAMDB&iN0`oA*$oMigZ`geygRf#cgRp)8lwQ9rB1@CxXz7er@>6xno=VJq zo4Nu8?cVd;5!jgY$_CzjXFk=!T1Cx%Re;{1YSM56+=AUNN5kdYr!v#E5|GhNb;&oM zGS|Em9LU?7+K|tv;1!7#zG^e=nD5)jaS>n+ELHB zaZNYQt-2aYOWHP$=?}m=&>7ivCCBwwRm~fb?7?EhnXN6X7P*(pd6{!d-s5aQg~a3Y z3GdmLG%QY;l2%b?F{SP+S)t9S86g8Mpx!&$m+R(bU5}$OeZZRioX5tc&*m))Zt3KPtV1+rYoHuJi zgYA>ljjP!jn70`wSAI2p3mSR_;Tvs)hGdlM&JDQDs4J8Ds;;G_Yx?gM;=9rTHLfuw zl%o!!V5<(AhuP@0TNl_~{b8Bpxn96m=T958jiTYWq>SB7bm(t#3^L{3%c_*f2g?T6 z8efyEO_U#gcFww0ySi^ebJ=ZpyoCRu$Ad*#ItW>lMwt{pmlS|BN)Xo|SR`}#AJ$>8 zxB$d1G*|#;5qS7!1M1HqqR}!k!7v9!__6TE_K1YT%3|@*@XM5yr(aW(w7bB!)Sn3O z8R-}4SB0=wEpvexEwXP!K0cC_F}&c`qj7suq%$#KmeWL$rkvpthEB9v7T{ZQZ(#%@ z35zLzV`TG!ZyUJ2^8Wx-viwE+$~?YyH2dW61Mo}-#;OWKq08ajuhEWYJKJX--u(q) z|9aeCcr7Y1SDW_3xyJYIKVf1SqVZ2|KzXw+!ckA)h`m^cN_G<&pm_%!^J%DqKBz3_ zo2scCQdS@KLQj>BI7fqUxm}qj&vTO&(JkOSEIs^sVl@owg`EB*!2i(~OW<>xc^BPp zzr73-7BedNN$~nHn}0Y`xgC={K>Z6H*X(+=oOYcVFpvKSHJneeLb-Y>mlk>^x8N*0 z+GmWdCFe#!YZT}9R8Gr1QY5fkf=8i`I~<#|_eSMi*SYnK{vdP-E0{Qquw?PycRC zS&pXFcFU|89K!{UF#tKk)qtowv4jl%MDHVsBQFb3 z39$6mU?8TAL4Lmt=e6KaW4GJvw~ygekm(#k(jsX77ax>m=(H{WzfMQ8H#u*gKuRYm~}mRx_xc*m!Y>R7LhHF6{o!EKlH4K>2_@c5RB?+ z;U*D0Ccby)bs4O(Z`6Esq#hx1X|6L9v}@Cr=d>B?v@3DUgvOD@Ip@w=KVz-0Lf8Lj z4NqFhdF1a=1wmYRs0%z-h)s|(joenkiCPssoGa)ZB2`KQn1{q1xA^C+S= z&I#o4Lo|WDYQQ1m9ml=NY9=*Nm0r){7L$XB#w9SPPssLd0&`ukWz4Wp^S4Qtv$YKl zN&2GUWd0Zmu4i)1 zJNWo|tdS7RM_Q*5;*VZPNtbnoH4zJ@Y9C5)9#+cH1n7sePuTXyeKg~z~rZ-124vWkOv@%A#>pJAQ=CB6jD%^B8!v^lB zxncPBwTp21MW5yZ!O!sF-PiVQ{i^0R&d8{i-P2!6lp@r}6_F&zr9dHkT3%@Z8YXa3 zulpl_cpVwz|7HtV)=~EP_t}(!4MYVe%!}k^KiDiL2WK0ux1J+hSPFpPtE%4ls^x`> zgv15Yto@mMDL(%<67#aoOMf z=As9~aq3kef~Q2npw_Lje~5Hfs+czVJ<8;sX;L+2uYZlnKsNBfGs_ETJ^5&y92=j< zHzEPC1p5wqU4TfqM1F`_sl3o8pJBZkxe?FR9a%+YIA5P938aony@q$n8ksFjTzrD; zxuBz_OYTr2Z|zj_7|xVe|KQv?K*je}oO1q78Rorwv_IswA*1~$-NZu7l6GIc-}fa? zLb?kMHHT@FImD;!RMU0uxmhx;ypC+|Ha3f_lqxj=biuE|5EvUR;Z$UANidd%?&Rk^ zyw^8g#Wp~X}?fp1|3wcTogVU9Kk-JIH|p90}^>w@@pnXdfjlObP} z`J{3cg^!SYUWO1?(NG(>7cy$%EzdaETm-C78D@R@TD~_J=L1J+tp{DVcv=A~qKFL#Irpc`uk#LlFKXJfsy8TlKZ!7ILtft_j6it^+h-zF_SX_}qPIhJ zThMb$_riIMBQZ4*u$8qC@O`6b0RqUD3_@zztM(}?;^q|%hLstVI&K45LA5R~?VI?s zD{WF&zw;(zquW^HsP(jgMW&#*_Pv^T`&0TAczXj0{Ifo@N1tO-JpSak&E5n%#wA*D zc0nqQ-oNc1qE_Jfnn-CqFQ<#t|l9jgH&q1)-Y1{{x*v3q035VBX$faVp4 zn`*b5RN)S7StJ4TSvZ2jmr`g;V;cp^YSX&~HC@GpU&pumZT^L@ljUD96b7#yoz<<| z>n!m$|HkS>l(2aDK?oP);s?HXCj0>zNwZzZW zLE_>*$BGh|kcee0Gi;g-PXw-*Xy+U7HL9EvRpq!UW-=n~MP((^zx1^se&?AShT6C2 zxH=TXM`I43~$vGunu?(7d6~>mN7WUpd|CIU}qpGQE z{Ql+kLGK44#>~Be{Jd=i4SiopPD4t^Yht4TM!~wThq^X{y`>eZ)_jJn?PbzxZYZ_`PH!-1t2(7PJg6Ggw`6$nE!@GwJZ=7 z9t@!CFMYQM?r2@Az)o&cNER%#s(rI%=kiU2ZC)QkX^D87Af~6RZR}#jD$ur_MPmzn zg-7w@mVne=dN3ljdpyn;8m*=XcipA@BL%PpgmO5=R2A}yKHAQXjq=4%RO_+tU^Jne z4>B}rtKi54-A?@k!1b6<1enPc3eUWTMYR-E`*;(4M;yQVR z!dldP1P~Yhw3AE#+~6zm1CYqJl;O6S z(?wyqqGt}RKkO403mYM_7mjg~fGE2U4>Z7^m3lrc``D`wK`A;4V@cu3`Rb6{~$|I6b!iw=pkf<~-b4 zY?g2c5F6)<;WkZ2CWB!NtC1HNGO|4Q>M+FTAjFZ#*e36}QL48<3gn%7*q6!Cw<$`@ zvdeuU#_C>@jIDbql+6kdOp67n@~nc80wwE_MdK9R-eu6#=FzZ4tvcu6ORv_N_7+bg z!p&sz)V}A}p6s|~#oO?p*|PT6lnipd$`wNJUveDoG#~rD;>YA^0_$ZZ8$3`bfLCL8 zOBv5>Ym|$h&p#u~z~fjw+S)q341=#^6OPnV)5?MHa-sRk4zYIm%mo2>Eho+xxrLec z#f1@D^}^g?_?bd)9yHWlsr5eA?ufcFE96|_>1ZRSD@VQk`;^|nr-Y0%CSRW!T(qXSmqZ!05?)$E4pkYm@oqeW9RK8zVyw*h=Co+09 z1!sFfUqm=(n+$4`70XIDmGiZa&FE1w)b(}+6bA@Yso}}}XiPdKd7K$i%U+dmPo8ZS zFBgvlSO^@qX@nd142z~s7EStVo;BV%uILvJ!+=Mni_SoeUan85lbRkbDFrx5GK-btl(~W(GyEH2VGZKQa9uc9(Cb(tUpj+LU*m}RXPFmnl znNy+R#Mx2eTI6knO(cr*l~(JmW%Oo&HH_b;)r}r~1k8*JDZ##xd3rKGIO4!)neA<4 zh!I>Ud#G|=_Q0*VX3|(uvP(L=kV}-cpicL2^YY8zml&wel6jjyjc2xuwY%Y$8=iuSn<@A^`v^VYy;R&ZVm2BJwCXE+BPn~OC zJ6-;_;+q%4aX2DA#e-7)&J3q<=G5`8TfB_3jqU4dPQ@Jc))-jZ-ZmS=U)4o)i zYRIaoC|zkBU(&~@xA`K?{Osi|t1<5U^Z;{0+%sxqZo0z2kMQ)*j^Y@Eo`ysKPhi&` zUkoMmb{5%3iMhQ!B5TFUuag)tO}$sTb`x*Q z&`57NFFh`8{l#jf?zLaiNdM3-Ribs2EM6g9%~m^zzw-Ivu)7+=pWt@m4*;Qmb;G;? znm+4CQ=*&+>vyVssqdGXOidCY@wC%LLz( zlsNRW8Y&4^vi)DyHhth0U!_GOBN8I2l?C0R=YxgcADTgfi4ap!2_9`uSyBLbj!qLR zq=>dPuu$D2Ja$B@cdm$3@XZU{Zmr+Y?amFFf>HI+EU~2)M8bhoS$8l8jNO)>Y1ig= z3=+cKrJ`NENg;UgJ|~}%(gQj2W5`;mQbz-QeR18Mfl@!)h?@(4GRWQ*0r}ZE8GXGw zU3(OE-TZr~&rSM`k4_2;5#E=@K2tct;j_qTYDo9u-AC=@Y_(wuociC z+lE{IyAl4EZMl}9*LeAQCV=07JAw5B0J04T2* z5t$))Gf|>=>4;_CHru`#e0Tg;_hAoy;%{L$tG~GLrgZvrBWO7!X4gblwJ@oJZ2(v> zUNOoON_!U`7o}w>>YY%S-ozHtij2}F3 z@bQ%J67L{WUREso~bjkBir*#|<2c_@ViRsAVxEl1n*}be5|)Rp1CazG}*)G@Zvu zj%QlsZl4So+uyO2(tDc+MaU5Q%1KD!;iJOv?SStGFnrs@36BF-)~)+FDW_<|`UnM4 zwU9I$-|@axJ&Rcf|D*LhHS<#hY#1SnfP@VLxRK|3_bnX5i(4_bKM#}L(@1>Nr?d() zgL=Nr^FA53nwqe=$A36_zMDSVx)ax$v4nTY@fW`xt7S30`qHajk3sK9On|mSN^bmf z^M)JY`Jsj1l7Wh=TfKHNn^>4uY;&I{DS6_dHV}v#RiYS1TjB1(UKT^wlcu^SBCi}P z;$_XRRH}~UAl#oLqJdw`GPpGQ4NGX5@V*&(N)9Q6-d< z)hj=Sa}z%WspSF|#LJAAG7XZxidOf}&yod1wq6ouj60{c*?d)z#FCOXZfbzwunm5< zRpPgtQ-d}v579>*je4OM;K7h~9A^As#Gsytz4PoWEd#BzAuXk`DcB|<#dcoi>4W)L zYRm&qN+ZZ##oO`BIa#GksM!57tPUy{tosNPZiJQYV2B(59~&_;Ivijc z5)qEWBZp#sx%&)VYPLFRzY;fUEe(9MJ@xM?RgTKxN`zT z>QO7LJF(0bX(}Oyk?!8ex(5mUv8bn(54PELLV#6gUWMefNq_v2*u+2Ow)tmI?{iaj ztDh>9qRqX6?SIVvb z>-TA5Wj=Fob5KQ0Rl4SjE6U^IV@9Uo~ikBaP3S~ltxXQK=I;hEKy1B%3X zWOrK;%l6~oRLirnMkuSRSB?YYi0yCvDLM80K_RqFkX8b4`}sSb7*2fqAR}4}1mqxJ zZDyiAz(P-detxox(gNeOA+nWMKz*v|gEiN8=!+6qh1ngf20h@<3Nw;%dzh$zX{uU7 zLe({4!%CkLGm{f_TdGov>!iB(&STtdt-45ZDNKNP`!aH%BLPX5npRTi3Wc7B5FX() zE>DaMoUwrdEszYY1WnW^p@YKm7&E^d>)QZ#{Dg^eR5)2hSweO8-}WvZ@+#k6M0Pfs z&Jnw)JwM=I@6cZ@eYYGxqMtD?=}}S~I6Xb!Z7EcDLm({uczq2H>}$nZ8>Pu>Ew#_M zqRyPCQ<#V+T0~9W%c9JP`#J@SQ_RSO{%*+maDk>3I7n);7SDcrJ!R$9i=N(-Bg4Z( zF6=BG4foWOfN-B_KKC7VZa%9Aqbp#Wd6o&xJFZT|fZo4%<_dtR7{cWO`~u;obg1?I zRt{Bp^>bR~|Fs417_Q=79SFH1@*YYA;|5mhs@jv3Z7Qs%Ogvj>IH~g@G`I1XEqG$Y zRYe`?a)ChF9PMGHaW$>>*HhZG|CefQyG!ho`4Dc(C*uBU_UIISAfp9Om1eRx1lbp2 z(-cDJl=*VWlsQ=yq$vbYk<_FkfixkSJgTI~$bP{DGp~#6 zpLsV#ga#A#y2o_uT2|ioq^X-$m*|Kv?=mIo%$pX#V$3`%{`L=B}bNY6eN*`cla z_42LztVZ1=tXV_vNl~D}Ir1wU$zX!qac|f;oMs2Rm=29x0mk#bn8FumsC)DZXbr+B z$-J!CK<9h~{5jQaHEEO<^{2S$k67`7GYgo|l0wP_Q;#O?R}o`1)#~-ia0siCm~r^0 zRdcnm!!Q7-u)}hI=VK*SY;cK}C&gGNMCiNptCMQ+0n`mxc|cgl-KED5z$`9-I@zwt z_c7rbt!qcQ$XS2G_HkEoQDGk+*IWVN3f%FK=B?vhBS#6wjkosEEE)3lDR?b%k9Ca$ zbFl&WVAvse|4jOdNT??UZ65x9s@`vQjDWJrs`9nHAh}o(=s$|o&umKY`9!i81JYEN zDoYu}Z#|2D28@EIB@INvOe}3K-uF@}T#(@tpbMX%`bzyU5Fn%f_I|-__Xoh?Q@~#Y zH*?WZv=A3T##!uT6y51)M}4BwBRBQ^M2O?%W6Ng)zf{ZDx^v_9l<^7E?wx9g^o4VDLpGYgu;r5MKMd+>Lmvd4Nl3kh9 z9^ZI7p`le{Ca6AA`b_$SGicUF%gy5b+5Inm!pK%jV`tCxQLg5?w)waD zCc`;ipYuE1$#giqz8q+3i%=C@Jr?6HD@ZTl5oHm%sMmBXF{T|z7eOUWOY8Za+@7*# zk3&S|6!7)$@xAi!%bDwb)!^zIs{#8;wtxQxl;bYRt<4&Z)mS_wJa0lpoQ2Z=_KzYl z7e9&gf%>e?<7&L4ijYx=l6Hnwsw zWP21wZ2zAl^1s;NBq(%?CCa>kJ3Ugt?E3Gc?(7oLR29oU7|kk(hcQ$aE>@p^IorFZ zE8Ol`Uk?`;BlUqL75DWUK&jDyGENbW%_uq(NAXho-%MHp8>gQHE-%T5fxhQ6TOJXz zYBChjXUBL_lWq&>9b5m8nI@#4b zFeV>~n?3J0@2Er7xBXFfuntVXJ@-^Ea3^NrXZ2vyyHe5Q>XyiFu@B3{ZIAz6AqoB*TN!;9{Dq*oI4q* z&wmZHz!uDB6{`NR*`GF@Y+r&7&S}lY)?7EjZ-{FjC&p`Adg6d~y?30`v>7Z$#kYhb zTiTxxAa4{FZpoz`vZo?qvI_XJoeF*Z6&JHNF&961>wn`~|BC@dH&TPBUEh~E=&))p%p7ACobI$~mZ-dIy@nZHGb z!5{YhUu@dHdUi&^W~E$*bC$VD^LKSTjJ|=@sqOgZwDXppBl$Av#oMaoJh&0xK9Giu zdX~Tnor_Z6aW^s0f0FTLX!szF9g3!p5`mqoNu_2?37EO-|I;P;U`+V`R(|?7ZN63i z{F&8`HptO}2L^)!_+T;Z1&ir|>)$hBCZ$j>*?fO$2$|^5*s3=_mRokAP*qRSA(h+z z8m9%%1HUW7$$*uPU@Cy-v-eaxqHY@zFLlQG)8CS>4lF%J-8c(n5NwHuw6;Hm-}XL; z=<2q;3sz*j#xJwhXBmBrC*-{LNY2#63!Af`xD~Kac9iO7y5?ayE4G|E@lt7~{j6jD z0S^i~TFp9C7c&`ZFkPp%YGYr&S|KYv5&j^EA<|s&syAfom|s)vq9?cs{M2j+J>l#e zH5?$sqr(zAEUNqAAf)fsMtlpsrHr)`{o;W2Dh7a;1cIq_`DXG?dVC_Kv zbiC6)oeZoQPDa|PX43Y40EXOz8Yts0H0nR3WL^X$-%ura<=7>7wLegZvc|ER+GVH~ z#}P{Cr|B!t!np1#pHF%pBpMmH#HJk@+MJ-fzN(`$wYd4KoaWTT>j1jmegFo77EZ1p z;xUf*Dw5j7JoOfDd2lAO2lf|4=3P-_M})Op?&rFz&d*MUSA_fy#Jq7HCx9!nwv>~k z>Wh?vQO?N?+DC3PNz-q>O7|25P#zPd59T=n`%k(?+gh0t^MAcQT^KyQpd zf6Jhdo$7!GlkB%=rZ3!$;_uyOVgGyi6-J~*4Ns*WcJ9X2=lKEF*G0j+lx82QFTLh^ zJw$Ln%9BfWAtuU7`FgHxFk9U0t9;hYPJc<5H$T79S?)FKc8kz{5YcPWGF!AY@ZCJM zp8Xb-uC~=gD!x0{9@b!j*_Oj0VwIZvEgNM zZsf!ppgg2evNtr9&=}Bo|3NRO$dN_-=#>VCe@z9Kx_0u{=g#||t}#CmgthljK(&ah zP2O4>ow#kn_kT#`9ZyXW+CtVL;p3Akz?h*<}}%T ztImbDX9zQWa^L2+H(;($+&*S{hn5%mT)`tJ{6u7dB%U|5sqF1%Qp8fYNowJFR%&Ov z$Hqn;4i7Zw1g#Q%Tb=h=^G9C#V6z#FnrL~HmoR{|^Rm*oZK1xFYX8kd7vFnwu;DHZ zP+DT74+BOg=3goZu}Av5J$f8%89$0BxmxXfYZ7z3wKJ#U_U%YTYqiF^GsCE^^}yQ< zBjV0ra^sQe%11eOMu!eCgKyw)e|M`oG=}Q*X}cS0Y4}F9#m!}z!lyMmMk*}u6L(YV zcAMmG=FN_%t-IzJwV_M73b&IuGZH@TV(3ZpM^)(2q2g818^;1Al5r_Fc1w0sopgE% zuW`US)rXx~qzJe2v}5w;8Dw!AFFDI><1fk;z8Jz-_b=itMtVuhKo{{JfO6fm-omzf zXrN#z)q&t*P|Hs6UNeoA5l?ChOp<_W9-Pi- z$Ec`YLX)z!&Fz7OLn-jot>I_AM!=7`r5#R(l{Gjkqw81Kev)G?LVN=lm zUymMjIA9qZ9GI?Dik$u$7qPten>8B$J{&) zec$}GLZ4JfNadxvh=E-`!WSaKjodx^{adhLAWSl8Z1$Rh>3(2b1yAzYo^tBv z=5B`7^N=6s;eVw4cXiLE5&xzyZ|(d%PWnUtf5)iny=9mZY!i6F*vcRPc=a|6vlBXhTyvk*#O0-I#Pg8u zTtk|~krnMWE}>FhSDF6OS;6!jqjkEvw>00p6pcL!*Ve5S2TaC%lI*Wd-a$ZIRXI4 ziVPK(z~o2b|JQOYbkCS}RZ-e)Q>{WBS2!HkQ_o3ifakYi;ygX02C=w=XqPfz_13N- zEV%TI@-ReKQt6Ob2R=@SekjrrE>>*Q7TFw*BA0xjOG@t<;0XlW94y2i9fAZ)2*9Ft z3|QBqcUXi0Ev^DfO+xO5G$b1LWi^qpj4EULr28}u<~CoJ$vr`mFGdC8RRIBh-l|Uz z;#bzL9S4pU$yl4*ST*FDTLQ|Bc&Ix@R=7kBx|^#GlC*vRRJ{vbRgffB&`O=^h4>pc z^zSwWQV@et@sSD(1L*Cdkg$*$5!TkG6ymN9R36yHl4smOKLEvKZ0o{U?(IJSTfLIF3&NyR)T` zTv7exmw4XiRYM{Yji2flYQq{Vv{&c|W|L;^Sd&iw;!rs8x6A* z@&oW79qA5SatzRkxq`Aih~Z|L;h)n!2r!L{Yvb{Gu+67#Wa2iM}-@*bse)_Q1+owGpR`j4K&-J}@>y*9| zv}S$aI}Wpz_!h&xfYvwMmB()#3?oqmJ8JO2^bA}08zj_|`EXybWkcuN+rG6etb*0G zk3a`coFy>(DYR$lu!fUnoxQTB|1t-B!B?3}1N(qgG^B?-JDJiZ{NGF^`3tPKGyWD% zwFMeEW^npWF}ip36((GcU#GE*(+Z|(?NinkaE|TGVT=F)qQw%J| znZo-Sh0On0?9EEX`SNPF;VSoUka}or=W~h)u-iQ=CyUL>mEh?jpSyGIO`fjytU_EkRu@z=0{&rBxGyk*xM~=s;STsy0h%n9 z38nyT_saq1+W+WD-mfA1safs@8!-yUBP8&F*Q-~-<*Q0vz!yVhA!p|3?@8P3N)x)v!Ul{;Rd06%26kT@_3yAGP`%6!j|L7H~**n*#LnLF`NeY(#Q{8{ly(J8KC(e8=chVEjI z5rlO<$7V6cWO1zU%z9Q$@|W?NV8;8cl0f2Lj8{+w$^|VA!3hb%$LX*P=jj;QT1W>C zib6gjzMv^nh`TtZDSC?;=;vr{9deFx99y z>y-2Aa5364aalW^d8?6G>x;cE=#cdK6YUNLOaxEzpCR`EOj>GqcykQd4-^t6%$NWv z8BJ{L8!0bTgiJUo{g{;RcTlS7Pi9t0r7in1om%;yizJS$qZ4^^5~I?46hGstTam8d z(np5#>0!}hw#}h3sR7iI*oe_xna}nqxlb?!#M0={?-{(0ls5`rp!0Zm1s8(q8I$S} z(fZfb=NmT&Bm9PL%kUsTZi0f%ta23kbkrzTnnCTrxP}Q)ehrnp&$PNu($(%p6aXoa zK(9Lrq>^L51_#Yt;aPs1Z49(;GNrD!oeXfLCeP>EtZN*L+^XyGxkw=9wAMj1V!%!E zgl*^K6e&+dB3u+a?$^Pu(0VjWl^ywOl$f6mzu(oRy`MXQzA=OQLWBHM&Pxv_=e78M z$a%pwM-cO=0Ti_2@PRwwfLQ@bYSM2%7Y2Vj%JqjGN;M8D(u&b-h2VWbs8eagnAl+0&UYbvzwiSxzHY zPv5bRj~)rYu7ladUMJv1(8K`9B4|wtPzX>Z1#SvwLHO|kLQ0(mwsHl~Xm6V8@M-Jk zmq&6c43KJhIVkdNk259Xo{g;vx<)qOQt+h}Xu+NYhfNyFSX>@Cj)@ub`t%oLRo=9( z(WV8OP^S1k!eofP9=M^ov4Ir*%~&RU^@bVK9yUH~h_?5BM|j@{JnsV=?ArEa8us~@ zE{?r9taAdkonOY-aO|q|Yn>~-d>4n`WVP6*s8qg7dUp9yx)$6SBgKsj@!1}ZNRc6s8B>5W^BggUbf47eOq0}e?iK`H z@XE4QX)!3=deC2UOxMP7UVFAVTzAxG{zdLZy$2+pk{hrq`Z@)Pzc6`^i43geZjkEE zu^N6jm)TjN5x&s|a_evcpXkfYyhq)rT{}qyc2Wr%X zV16=CSE7*J$a`xtQHU=`K+X&@pNO~hCB!lc%Y3!{U3H#|)NPZS zDt+9enH9j z%Ni3=4w<9|_Bz6(04)>g>s->;1(T!~_rnOIm&?uuPyF1OTb;3WyGxt6o|Q#KKH6rr znH7nlCIvY4I>k;h=%Cq|ywg7_b1`k~DR|OU{hCFwS5)LNUj{*lCPFcoo<~8GMY5xq9>sRQNwxTO*Rcn7O5Ay?e*OhCMP^Mto6bJX$ks z#`=C9VRTs-%>{0Br82EEi3(WZ+PP}J5fXxVV^tP?-P%qzqW75?iz59jM-8eOq8HbM zOk~!o?&X9Z#2eNNtC-O9HI-8GXehVJ3edB(MTnbNOSNcBii3i5=$Y^$kt){_s_dKG zMQH0tj*Z?Jh<60ISn$vF(f%hf>QT%Sa26#Vh7zsPhzW@J+7P6>uKrOc0y zowT<)NbOrFC(1x+(%FRo%$6b45*ATa#+gFLj`u4Rj&;@F37Lia*NaCyJ@?C~%#nTX zi~`0?m!$hz&1=7>9wse5?b&EaxRSU5i8jH{&VT$|SI~u9VB$!E$@TL5a2}Ov0 zdUt>SK@5!t!#FC&5Zv-&mG6p(c&U+LSi=N5FSp^{Umxftvz z`&W-jS`X$IJ9)^*YHgKq!5a~S^VZ1x&KVzvR5@rvAPK7I+UMkv_}%85?XAd|nHWZq z**2u?mxjad5+FOWm`~J9Ge&q==gOfamWHLtE0549hYLvG0V2T;V7y3U_7t==tEY>l zLqS0)Gq3LpA>(>|gVTocxHumK)1MiuqS-%A!B1ayNTCIMmwOxo?|CCKyO6Gy=R`;$ z=H;Ows6Iwfe5j&tpF98DG^TAe><6G}XPvWDxFQK|k3NqPinC(14i|+=?;o_q4Hqco z1qTyA@xcJcKvJ}VwcI>?NBcb27Sncg?#%PL+dw*(7}lqNsoqb;lmM!uw06fBq2jj7 zOgGbt<5~SttCZTXsl@Zc*pQ@rj&mFLH>iXC58A+0)fEtF<&wcy2HbP%=gRKI>G(2L zG%setxx;M323wR$-tr#63K|^ULh)0)-JA04+3MRkFFRnR?ETOD$C}Aay&IzGs8Vb< z$XQ6En{BIgrIi;5zNP4ow6OF>2Kmmci-rrPsP`6u#aQOpJ960GPnHm`zJUT=m|SEI zC8FMs5udF@T7kNtO*l3*M`v7EahWnLY^Q`VTHf}aaF)f1pK_P9$#|~s`f9m z^ECt={lqjxmLNGs2ImVKiwZa2*UV@wp$~(05nN>~m-!ZNmi7q}8n}w-Rpm%YkRr*~ zlWGS}S5X2{rC6d%&w*ScU<~Kh%?Gho6?O$nMl^|Y*v>2aZ~GV6P#r?HW~ZiUG;8V2mLenWcrOT}^p z74JU@&gzsNxM@rYQ_?(GH}_Mk^oGpRPa)CjAi-gF@P%W+vTpc;%joc_U?#*1Q5#wQtdz(nK7`gHJd^gsy8?$BsXaep-(YW`gyO)e^%mj6zgwu zex0!Ry(hDI(NL(W$BjR;o=f34EQ2GzmsOWS4SpG!c#}FxDW96Uc4F9k3l94=r(1rs zr`JS5en8$+i_hbGXBwaLv=*lcT08qezIkYiysV;V)y+4RKtL^UN_HP`~J znm|=i6dNH;O-2eew1`B#WhVM|s2ex7^%aMb#ApV({G1iIowHk^n9EIN=|52Zl$Knh1b$=F zt0`}@0+wt)Cd7%Qp;AzkmnV=d(#qb%AEO#Cd;HW{CujvOFhAX|p<--8_LECwtihOG zYCw?2eBE*IvDe&TwQx(?N5&4FognIJda`!n6yDWOv0F5+#lz=#gt!y1%2!g-EN&VK zmTCqxEb2kNVgg2<-@pq>A8b^Foc3Zj!$hY(QMWDvGDad>T^Tfe01%nM zk5f}67YYrs{sPFzoiCVU0HJOB6GJ)>xvU}PP zRUh+eQ57oE*|qvC&(ji3&)#BH9izmjI%e3YjFhpg6~pfwQ#ju3x!L3yd*85dg&tK* zefUB$n#QWM0vZx~39oLG+!A3+&nHJ07ezCgMUThpm=JcVs>Qv}`q=K>>ietn1&L&J z8n!B5&!uAL*`p&a9!yx&}>ZwyfuI9zmO2#Ax%&v(#XGU%ohNjF|GOnQ5bAZ{P3{k(vN)#d1?ao{t5_ zD0H!hMuCP6vS@ssK7b_Xt(()!e`bSuq{*1QS4ESz!W>AJh$D>OiX~tZv%#z;w^K!Q zS~$n7EJVHeA(Qa}Cg~g0ek9U1uYmiBc!tv0gE&>9t8PGOpf|&IZ*?j%r!KZ&V-nMh zT{jVx$*6+IH~NMKXGS?Df2(dihq+?m;ZdFd0Rv-W`QF?E!?li(A$&JTMILvpW5P+) zf&7Z{Q$Y6@Dq3-mcog3TA4eugD zi}}V#s}Qr%^8s;Qs+#rX&1TO;RoMJRuR{kJ{ns zAa^u{AC@peDV`{OhfaSG_#mIoG&C_(T{4sI*vSC`1(2eNFVxrH>4-4thFSinRoKww z3?WyPw4~+I4IU1!wRLr|BhKEFzvNE4>jsvNp1Yp>N-Rs%``$itF+K$@K~`G ztz;g56OXanh8r%5TGXDVh=HO==3VMY*;X9~q9Dfy2m%t=TI7vHa#FH4{XtG=l6%2G z942sBnyAu1hZt2l2&MspAXP|LSIQ(Lq~i<&I1}-nI>lCbf`_0(P+ApL)uFm{N4v7( zGOIT{<~W50sYP>q85B}#rSUkEQmmaEdWClbbp=y*yiZom4!6=eGteFyKUKgMHce=- zh0Vwx3mZfa*n7Xj!exHZwUeS@Z;K6w?B-~KXlgjGoHujA7aJUwIhv0}Km^FT3C1uj zfTl{?Z5ddF0)5k4p`4NRf;{yOUlvy-1d>l$JjKxUh%_i7!97iu42NF_fJt7+j5MYl zm7hoQq(q0uMELJFW!%dcnC;;O{MRaOks>tI;6m3>c6Ur%AyIW)`$&A2kn27kMRiuOat7~YaUYHAv64`BV7~8~XQdZL4aud_hU8+O!RmhI zly2UJ*oLN+-ZfYlkZ(G>L+N=eh#xjmU&ACoVnPt%gA!k)r-uW8akxO_d|OrYsJ*k& z#0DO8cmb1 zk%Ji##yzsHbunisfwDnXq)=Q2iQFp1@(51dehx43ju7naf!URrNXN+-#5^?QnW}tP zL_jpDG-vVbKfj;bBzcjMep9qNt)j}{##SBIRI^~l!hw$p6MWmE|2`u_Fy_To$NQ~K z^5Clrx6ejs?At5PH-{jHgs$m@b+MsEn3X{4NL7YC%*^fKsr2KS;fm~qGj*Ycvxe&R zskS8VLc{V92)^}^w-fDVs0glciPR<@s^7XfO9OLUb|2~gSKC*IWw~wpejp{C(kUR_ z-QC?Ojndr`qBMeZBi$k0-5_1UM=0IWNP1sb%e6dbxzDrDy?6i1^YP9(#~fqKH-51` zWN8^5*AxLR;^4JKDlNCwJs*Nj(0|u}H$e}mHPO&HCuONgnIZGZ8Bv48e026qDx=3F zROW5AS$5uINvnmMKxmN0FbDJHftcWs-+UqBqmhIQZSQpTenYbTG$a zT_v$DR@y~%Z zGRPLnLu~e1;Cu1BhXN@g)H}fZ)-h(WG;ZsfJS<7|HCGb8?JjXNt(;lPHl14UnIn zI-3<<0AGsw<~&=5U;SiX3f7iq|7Z#?ODP_$VZ23CNTtM#sQlqQ#3M}LIp7ke@Ta>S zpxm9V92jWrpj^%7WhRzKQWU19%1Ql36gx2oVocO8D6lMxs(8W)0R_uef+@i7oedHS zW+DYH<}gbR0G%f@B$G}MiERu%M8=e+5F=kmd+x~g@o>5-;vgk!+uw0Zy&-%{f?tG_ z%%`1%lQHD1Ax8&ElnPW)(dudG5Q1a2h>~x<)A52 z^!_EnPpNRKnr|T&jQB~F0hb{Yy%QWW;^b(b-hDpThAp5)ZO2Unh3gc2pbk<-!I3=B zNWn?cmZ#83`t~*3GtNfo(2Kg&uJEIqrJj%3JN9T*gCNTTRNJP7 z?w6lEV9&vxTIZ^-mQ@W8rs{77A7JIRrxkm7m#ujni-iuCaSwoQn{AUh`4eyXH%%(5 zCo~TTb@0S6kuTymoi8oSw|nd9Pi(IVlhwvcqGRfzv5Dd>k<@qJe)4$pKC{q6GCwbq z&N($Et6zZ&hpK*h{TgZYm#cjMK;)wF;EBGdtGi^#N~p-hQrxb znLt`q#bBMbhBQn?cuS_9ep35!2K`%92qi4Eu|}BeW6cS}mJj%06`Nf{pC{I)snZ)+ zRC6xF-C7Kp+`fCI+b%j$D6peAii+79ippid{T}36)RQSDT00|kK2RSuMAh3 zof@%{2KAi+5u-q|-xe9nWLB%I`=x}uG7sNQwRf82gedqcRo;1=`pa$gsjC6jUse$b ztU~ekt4P+6>`Iv((4ZeRl40!}uegvEGT%Ru=JnH*y4NKqq3V_`%K@!mJ;2;8F#dwM zTYO6dJsSJW>j&~}VGsma+?m~jSc4t^AGYi_F^)9j96sYf-j|L1RQQ0@x04kVuc`Y4 zD3T+*c`E~a_67$3D}BZ*dhwQ8*{h7L3u@hU#KVOb)377f4nSHfM-yekpP-DPkK{i3 zy(ENCs*8wyDnsVR#FSc_V*c%ygu15|kPg{wnphwf68NQOcfrZ5Z$B zt9=nQKr&RNG|!pJMMH8%L%n1xO?hR^_I@ix@ndm)g(QHqwxy< z8lmxB`*5uVc{mKc5zP5YNDa_(9C#KBhU96d*{mU{Lc(p>m2Ps0#M4` zY13uzJRoObHb$WA(dP9-wfhs^ZVW@@m@2l=H94*n!6T!frY$}62o?*kIqrMP=HEpC zsL=Dn1<}{#{$_`W2JKRItO@56h?7(w$RYptwO%RE_|fN#VQ2{Vc{#pOV0nepHue$S z_#%zHPsZ0RvQIXvj|$6}c3wqgKh>j+8{-;7f3gAf=tFAPAVgl{;Y4dB_%<6qqH0t3 z_?+jDLMCQC5IFH`Up{8IOz~h9=b2?^Stoz>!ppXgj_BCNlBv_=Y2-%Wa?^^w^k`fc zHDpHgrqsH8VOKX6G&7;|E?lOsmw})YF_m=RVdH%rTNPaKy2B~ZC|a_sXXk3F?ou%R zT?0h~y`E?Szh8v~_cNq#29%#Q%>`m#1Z10(ZNNmj-{z#X)8+u!dJIZxM;4am&S8L53Sszx1Q8KI%=It+}wZkV) zFlxTa6vHrv9?RF0A!dSUwtR9l;h&4fn~0;JD_`aQ%c*fs6F^b-_?=DK;FrH*4a3aVbG;n%&KCk0*mpI$IqQs1Ac!&e{0@9G`5naHqII6Xr{z) zZ}4;>)g*45ayvhxl&;Pp{dpfP_ZHprUI6rj0du_SS$&Lz?!~fJnoLfKDb`Cwsno6# z9LIvQf(J`1yJ(M7ZXFtT8#=@yUk6TM;ah%rfk@mqqFk||!VQ*|tP*h56a0IZi@WZT z^S=!ASYHTYMM5+qzJX4EGaZ`WJ>NqCCFGY zF;6O_%&NC(wYK|2M&c8u>I{yFf$LllZ&VUNJ%q3sqDmQLuZq);DAD2UCR{d_KC66T zX$`eC)oaui8(ZIB`-Z zU?L}nq&5OV>cMbu&H>0ozlf-#A)Ta@c#%otTWh$vJpU$on`W%rk0&CpOeoZ1Z1Fu9 zQ!y;+T! zramal`f5e5gkEZ)JA}60y_L$vl?d8NK6&*n*aT!rXx=Pp@WHQx@q6~>!cuf@%^c$K ztu{~iE5Uk`TW$OE3&UN9^HDZmh38f0yf6C>BSW0E#%+x3>fK-L?dt}Lvr0p<*gaAi zY~CHA<<(~ey`-UTZ;Z{-cUJ}Qnk=_T+Oa=Ch{>farUrx2{n=CTDmQ}r3s_;}EpR?m z`7-N+KR`)!aYLoe0qUOxKndM$fnLd?Pk9@&)tc5ydD8{N3su$E!iC@4hc{r7|8Z;v8JQU- zDwRD-d`XSDTTUiigY+qdhPvk|UC+o`tZ`NCRxn&c`c}VRwl$VHH7}6~JnTRM4lOp+ z>$lFk_*t3o+ut$S3DHTV#|U5GU2uz+Q0mzXC;R|KV8$+6T^TKFPPy}KoU>BGEKLC1 z51w9x3UAc|X_xf=X1FCY*AtKUO}F?m@@7-*=4a8m`$!qDUnOHbMQ}j3Kny`r!j+pIE^6J{f{NBxb+Y-au$2V_!5a5#95*w^%a?~ zD%3<%Wk2!2ozAdoSN<2Fe@M#KD)lfU0V}9|aEkxo`>CgN!7!Uqae9joMR2>*_vxA? za&xu=+!>;Uh9%2z%yafu_-bw9vD-T4>sgp(2c%8vX3EeXiRzLE>LYJp@JA1wf2EkI5nANJB?kk zSV`@@y=WeZ;X1Q3M`m9)|Hfzm8`)n_LIA#t_$sGb4W5_O?5S>xJ;q z^ymgB!YVk+IeN(Ma|QcDt#94r16j-8e4n*J4R|uE`*&JC`3vrRI~$BZ&H@P60h-?V zy*rD$xFyf0_FRfwR-DNPb=t-bMSfLU0U>J~tD@K;!c@-k(=-&s%k@}@d^~Ig3m0wF zT!vptdQNXOiLIB^=kU<$_MSVb_6{#ae@d6Nszi9YA15Q+a$C~r6LQ4gWTQtj&p}b; z-Xocgl}u=re>^Le;=WG4P|xEBY2IJtAlKYNxjvNxfik+c03I>Rcc@*5D!7|43zP$M*tArl9iw0W2z}t>gd5X;hBr*T*KX&cpAC3 zi&)W0pgyIW-pxs+OqRp-IqL;5>B7tT818`*2;`vbvFd#*A`yZN`bmsS+9b2keWA_S zlu~4v4G@6h6~}Y+^6`S@$0l9+7!&iv1^k+l&K=vvSg${kf!zSBzpp3-Djq7{Cn{8B zH&c!reK$jChyKa*zHD+YI1r=Sjy%rBseK;pojERJxz3zQhVros$QhM|0##15onj&g zSKz1EJ|upVQ(S_LETd)5qur}(N&k5Il3*z*imt;n$HYQ*61c!Tt+h#|a$SVI*>9r5 zarEHjl(|SZxi*;>kzp52PoAwy)G-_g+sL+M2^MCnQsk$w*lBs)tus*8s56~u=%?1x zGknGHITkS!furQHN3W*#lsrzuDqQ)<#VA7Ld;u6+`mP5l?jEw(C*HV~3sU08FRxnN z%<>3IhR|z$Ug_(tC=#1{0+xb_z}!tIL}i|JEI2Lr+Uf72&N<=)H=FS80#SlRS2J&I z#s;B+F0B6SqrhTNhp2Hkrs`-!}qo$a8CXlwKvoE*6ePd-H=|>4&58UO9l<5{cxYc8*J_OVc;8#OG-@!ja08h zhCFyIb7aavv8=>`N9BNZBM*mSq;Ld0@jBPE8~^zl$^KX4l8()}tK7@HA0Ui_tR1?V zy(gROPFT-;tT<-JiHD%SYkC`K3t|F}Y`Gw6;AL-SJLpjch_;J=H5szlcJfZ6Tks;A z$h&bVNZInG{V$z4=o{j)sZMl}F5<}XMXMCvHLO+UHnj*$@<~||B9yJr6gx{Fi39xE z$Mj*J8@@hS*NcsFlf{-f%+aT~-`rpE-Q?;faFW#KE;n5zvz_jLT8|MIXOw+gZ#$&~ zd^Ra%@859ge0ICLZKleM$^?sqm!R)*lP_-R9u9*r9{GWPyGYnai?0049cKoAfqAL= zCfK%PX7y&$1AFU;LKg#9tA5=7=v=;j1~n38roZVKOW9)K56}j;b3vNfyMo{yCnS%= zWruDP(_BwmwNH)M#@g5pJ72u(Q2Zl?6RBo-x4lc(z4w-O(dw13G_99UkO98sH?_9U zho?M4>JRGXKz0CVBc{md(fVb9 zZdmLG5p^@pqbGa9_csHLx>+P~mACu8e(W8tLIV}tZQ7a^*zf9k=si4prDORD4;qpE zMNrd;93bG&6Kb?N&3YHI0d;wJna!O$Ew|dM$j8^MPqAYj*%kwKw7dAe`Iq?%(YyTP zpJ8aUuW8>f>M=Urc6X@MM@nerh+u_g@O9FyppJvaJQ3@30C_mefYDcJ^-buy;%3Axy1YFL|7I9vQ|mcNlONp!JbC2VB25L>uldfjfVE8lO4Gcv5z?Pwg>h<17i z@kF4lmDVv&d20i8yDg&Q%QVx>%|Q8e!V|?wJ5KXy?GJvvtNIR_eD#9Yu1f~fPJ*_> zXe`?2^dAf*o)8V!X@NgI5?1F3fl#P;V?gL4sgA|9!;>V6({(%>6V|)bGirOf@WDXR z_)`})Yj%&d@Gq0g&0~H{7;3)0V-97{d zNFMgx^u5}W{sfFVHLw&;UM>dB(Y&7!p`fSG&C|qJ0HY?-k5&Tzu@?qAg0f@GmV~ti z%g7UCx4dkuiRGi`E2so9F~>S0fuvQ6*6lpVYR3Uvq3={7w|gX*@*sH}?x^E1L&}M9 zXy?{H>tU2JY)sAGHhK6KZB7#PR9~ZDCmA0NCdzs-`Cc%X-lh1eHKt_kv%20v*I~T3 zb6k_=Y_L6%;`TRRU004VgUvBn3A>Y;dUVm8gAw%Iq+lBR_Fy-xE=Xp$1$6}Q54tOK zpDbuY+0%%P`y`fjo&4godI_01J89&uF-d7CFUguW5pzes9IbKILzh6*jWZK zg}AoZ_l&%wZ$Ep~QCrTO9{b8%6kc)^yB_SZOr8kl2^J>TRd7dAl+c=n z71Xbc+7mcTr+(=o$rO@|EzfU|%CIVrkmw;nk>om>mBjV$pfUQov0yOD{@qu;c`-tL zkM&S%&#H{fN4@SQetG3qll8n{+$9D&#ZWu(ig(=ArG5~)q8giqQ}Or!sUTA8Yez$z z)(ROJr4nGDYBkuiTE4Pg<7ul{&a$T+y~l4&1lfzs%VC(+Sk*e4)>>Rv4XoOcL(W8ijv= z78;^_61`QvWfdm)w||uMnOGl~WMtW2S?(hbKg1q?7fXcqG3eRlQy--*B2nTTk2Acy zie3S0IG!)1>?G z-0auyHL4B2VlpZ(vPJP?VAPtqm(LG+aAElSkCT6@xknD1Hw9u+2j{R|z2$nWAgb?% z8r#kX1qCB143X8TqmMVjXSR8>4CTIhp7eSJjn_R)LxrL-e;3QPcP-sol)&8yr%;dz z?r<9~?GqmZ&q$@WO^bCvv)F!-NR+-bz0}1o6PW_sAaYY}_V-1kFmJ+`k~7ad5`qxL^&r{eg)a1}#^=Hjnj)*#ncv4pY+m*WFB?|wied&@71@M{AH7Yh-0;r@@8o$Y=-Sm*NN;; z%a-okA7T-BUCk1kv_=R+vH7m_E&Ik0i&CQ@J~_R_OLUJM`gs9-C`RPv*&4Va^lq)f|t0uY~DN}+qjW0551YRUoB$5f>@+8AO9FO@#h*4J65LA@Oa%V>X;L~WD#%Ir zNiq4Fm^*`HhJ-FsJA%x?JV!@jG!7Lwl-X+Oo*c#C*fO_Gj?{wrVYM_~7F<#=G6C9@ zaTywMtSt~Hb^}-|=c@bkS^Z`7ray<0+~+Sh%BB*+3G)M(H}@0%E3jElzz1-B*m-0% zWx7fDp(?h*7Ak-pP@Xq47iWy~BFVb^r3^t~p2{H#w5kb$t&)7!FWwi1T?T3kMCW?X zD~bWblw7`lku@G^Hi|HpDy}S%-SsOXIdj_$AyzuZ#%! z@YI<)!A{Y6;v$zT2gUX-gF6buf2YmEcK+dt^3QsVfVKK;+D^g7-wwDUGfV8&zlDMY z!3}%v>~+mU7psN zrCO0CGS_KRsdMt0r^~;rTM0<~_K{_>D(d!&&WEC=BI z0)IWZuZ&4euXCrm8A4VcRcnhDY(s{G-vF*LGB8I_?1zU%3`V7eJrwDUVf)5q_40Lb z6pHAc-+$S<@4(jm!M6Kn!T%<2C6ijG4y{{SJy-BrAK$z-k(j)|% zs8pcD`3OG(_n+Q4k&0a~cvpE}E-F{f-T~d^Uqv<8%KkesfwmSW)naWUC#HIrT}QzI zkyM@Bm49fDGRy1sP(fx`zep)Kb*^yw`i|<#1fwH2Ge_#?cQNR?MqTF87>M%ilq{wg zC^ZS#Qb1weN&MxBL)`YyFe?2LgrB|^m{?W&um5C<{YPn{eQWQ+rn;V0n}`FQ|%w*Iq`{qCUn zP2PyJ-Qt6NfrB&)g|_|&NK#s8 z1vhlr%+f3VdrTjQe#}lhKR7>Rf6|jN0ysE0zd(ONQs9~3UwLB06P2@uK6($q34%b| z{wf3o#Wuw@#g617^9W#|Yu--Zjc9vazHi&L*%hFjG!Or8Qi`yOC~&7r1R-(+Vp#;v ztH)ymfc#wr<4FGZ4$%PD`SKmSfgd2Llxwqn<-pTfpDG2~Y47imNOtHo z`C3#W^iz7Qg@_zeRAExv*~74LrAuRX&+$L1yj2BTCb4c$p3tT9_2YaU@-pwPne?{f zvqKMx(m}UQ=WuR^`y2rsR#hfUtsjNqpD#5C@2|Ye73)zf-q1&`A<94GH1sT?{QDNg zRdtRM)+&Wh8%N)1UBc(JA_mX*-=<#HZ7Viu8||EMWzcSHu9|K*U{=f5A#pN$^$Qxt z3#fq9r9Zv;Tmm^EtoaxY5(ymqIG+1PL0*Q0Z(4$s6Q1stFfW`|#>GZ8uMvsS)@j(` zHURpi8T$utlH+Pq&PqPNfQy=gqhdkB6z}J0IT8PTbtxIsPl=9{2AIx=o!zxhrA=+V zSGou957e85>JuWA)SeaeQ^EfLHMY$SG5WG#-`cdi003~~0cXaF>o<@-q&|llZ@7mc zUN;8P(T9aiIvkjNy~W!iUrEZ!I?&6Y#z78!Oa_@b#pRw0c{C*|^8oEHRfor_X|t@* ztU<@!VAwZ$Mv4J1XUEL&LZw?|Nncp{UA>SvGqH4D#hX$o3@kM?h$*3(Fcd9K+ZO#~ zUit5(DX|*clh4B&y%26;xzQZEdi78lCt?EL8Rjuet{W$uP&a|%4BTVqxY>y1SYaRaF1cO#b zBd2;?c!8Gph$_~zm&PwN5G0p>1Au5e2H4#!BVX)Ke)d#}c6GI^-D+mjls<)-Xx<;% z;I_~;yIf|qUL8n18=iexWEm=5ro_XbDlPOVrx24W#F-fpz_XVgB}W(WU3PSgr`%h& z(3-Q}X1fWWYDmsm&3N5=ir_JLDt(*L<{5i|2|Kbl0mgo{q!k)dWer}@m<|Xqv_4OT zJiyCz==7Avs0({XDkGqDM2J|_A{}a5d~7U8i702f8O7s#mLB*-St%#E)ECKVLf9|G)e<$>AiR+Huan17t9Ek6-35?sqJ`yO=>aD`Y_m! zDKwK(9}P--kHvIE=y((%tx38@mhIf=ofx>pEfOIQ_5P@RI|Gey{r->Bt2I8Zkj~ zd?3r>Wzm7>onT^}!3105#{EGJJ9M4d81IhoV{Y}u&JYIt(mBE*Ql7*Ln($Ff*M2zM zC#93j8Z>4{$f~J7KyR_c1NZambY}ZLj)%ysKG)~4NMYxdZBBlx&NeN#uwA5*#v1fC z40I55iW4+jLDH!^8|3N+_!Ml5^}bM=5>7 zZ!wHE*M?3{C$a(3-o{&*+T@=0Ani!hT zlSi}Kj_E@uH}Jn{vG}2T^Rn%PGQ*)!qY}I(qVR465R;gYr6YxKt-w;Ap zoa|*YTjwwJ8`M97G4^a>0;O`93aTKuqMkl(g!A#mS73kKWtk?HO`m*LgwHMq2O6&)$?J5cE=@7w$!^htPt~+!PH-UFY6D;%@$G6ov^yY4|yX?|Wb7)2- z@}eVxqfkC&S%kKwU_I)=crWw?k}nT}&=Q-px=;tBjz2^rj#ZvqFM8CBRMVG zTvVsLvZ-hs$vOnBDF{Q3?RHyKW=|apZV1x7mN8;(kOASLW@b81S=P@h z$3$<_#m!O`wE8O+!}maFv2yNizWgx5o4*HL%)hKcSgxEmn(tI$MR)s>6&~UTXjw>H zQ-Q|B#UQ@saKlIJgY=y5|)TxsF6 zj9P40<5j>Dv)8)2SR)P|vgM%b-11M2`>K@|ZdW|OvcpRj6uE(VMdk|ySb6Pb5CSVw zOmHAqFnAjg6chU#SE^@$YF@m_^&mnmor70WjK)^R5pzRY7@Y=UB&t3C8ER44ye67W zN*TC_d=aGb#zCva+aqAJc0W=Aht6yb2n!yqz7--HZd9 zo6NQejBf!B8(f-%MRz&$-!_Xs+-1BkvR@W?F!qXUQlS%n4xAGa(nXQ0ritW78u0yW z`W3|4`8u!TA9<%wtuBWr@nn0}9YAN1;C~tU_oZ{~ww?>x=e}b(G+6gi zRx7O!xnw zA-ZD=vyRLzx42k8O|zb7P~SRVwLTJ|HZWFw9^)H-Fx2F3QhQ*auKuu?oREv{R&U7qEKqe-RJB z33As_vcrQdLG1~Rl`S55Cc_GmjNUs;bM?lA@~CO z>+}a^<-wM)4S8sd?qqJFW>riwrM^D9rOR&Ui@!zlLhvpzAab7 zr2UKVH$6pqFBB%uGn*Q;SD2(A`R&M$u~Z{@ikvW%HA_jkq|ZCi{n^M8I$;a8BzbCNRwe$O38@!HPw56MiwC+uM+p{(LjN1VzD!8Z$EHQ!^Y5{-$rdWYP$12Hk?@|3(AYD~G_{W`YP2jB z*)eGJm!gXMz{2KO>dctLmUH3CufkWmguA{oFa%_AS!!lp+g?#)T{TepSr023UH*@? zgx@bIt$=?q|9K>RUBG3-5HP7>0Q)+78>eS`%lfk)B|+~G&#XNTAw{t4Sm{$TqP0hG z$)kAj-km0Va@-zPzS?o%wXm%{$?F<)#}Ge6G6?+yHxn|op~>0L2EiJxi+aT|nJ@P# z0kXnhjlS6GzDfVtOnyJH|5fk)eg6Hvy(UR}-b^2j&U!L*-cot7=V!la)ty@=JJtN^ nVGwosLU*7qp+V(+E0Tn@7N{|!;2|-#wq`O-fVCWvYLmH$T6p)ebE|JcmLj)v;MjD3t z)}VWT`<(Zj_x%G{YnWN_%(}1VcU|`rq97;!0P8Up3JS^tNr^W~C@83A!1g%C9pHC7 zI_e?t2h~AITm+?fkZc{;K?e)V3ZtNuMcunFxDD)M+Dd3TprBy4A%9T2Y`z$wpfD6l zz7bY&(c7Lai_F$&*uCDpuz9y@n*6S77gAju{rd??lj&fwo-4^ zP(jg;97XJnG`m|RYf)ojI_mt^nO@Or__W?h)YZM@QoZ!ZiOIFp@UcMbEu!w7Z}%m# zUK3FT;A9`pcwbI>OGXol1mhwuPv+0AS@iKmFxV)RasqJ-+jAuDQMCvE{&y_(^8lQu zbYfZ!w-DJD!J$NNC?l0L~)$WgPWo7p3<*O|@MZrT2whor{orpz6+4e4>r%24?Rp zhncEam}lqLEY;SU?u^ksZr}v%+*3Rj-=&$(`7LZhoAxDQe4g|aXx3y9xDVVHT2H%u z7-`pv{m9cq;rYQ5cwZzqa~+DmUe0`W024Z!Y`GdgLvAtlEhxE};GNiFt7t+ zpS*HjXY=bkZ7BF+&eb}Rb=a^SvO4>9rE>Z}r3yFZ-J0P?VUp(>mQKW8Cn9sB?LK+) zl~nfLSe-`i(!87_qKoZ3N~_B%`2tSGE-I;I3l!#cKcc!ID=TA zIF|6(QE45*UTveCR8kp5N(Y#wywT`Cf1kB{cnhB@HQjf9gQO)*syE%=Y-764ET!nU z({qPBTVj%#5bo4feNqdIQiuHW5tTs1+^q*6inYc19jD@L4`m6}IV=Jb+clKcm)|pIbC5}jV622MmE;dHw@eC=mB(CA!SMU|-C`AfMINRrsDKDnd_}A%* z92qTHpGekkZo*a7eM{$leFt+1Z${_w1{{}x<(w-i`w)cH;!4}z8T9Bj6l*WJT06Y- z1c=UqhFe=sE+7HK1rHT-uFqSp&-t*bkQK_y>S~#Y!^6w}wWnT^2{>4}apyYX3~`Lb zqh21 zcO`HD&ZOX*=&Pvf)2JFE^PAd*zaUSavMsLP5Fa1wppgD{Vc;cjnWC*K%ah93%=nE8t%ED zFyqu`7}~*cZ?t`{Z!j^+1g~9M>e}#Fs4@x#DG*wkQD`%@FW=)({s>}3KH?=USM?sQ zAINKo8{l(swkUkvJqN_|Q%zLw^PlLfIki#;#Jjt-5rY9WBFoPAAqW`81?{7EWE5g| zsX8!gar_*t4>Fl`v|8kcwt-^Gojlle$UEZET~xh2Oqt0^R@X!#2aT8`FyXJd%g^>1 z^yG+keXAgB#cpr8BS}B@PdQV!BWmm%?o$cYK2*(V@jUOGEYWAx-+>~PLi5$8f;N3c z{`BI^ly9F|1)Nc$SR38hErs~S5sZlN(rnz1aVXX-aAb6Cj>Oy5ZT&(Izov8Sd8BRA zP#0x-sn+FQa~X>iu5RR>O+6Ukyb2);mk-bD;ZFof~cV>o)VvdnfbW z^XbgiB?-ix<(O8wk6#=)cs?$)w1iyI>uhoKu5Vd6qfVjjn2C0iC1Q+nhHND7T5OSw z=5t@zKVx*Rj~}{Ffu3~;`uC)rTUgs^al{J~H-mf1on~s+OUuODTeX;Z#?C6|5 z>KGHIg%UYV?G(F5JDF1M@=0wAbmABm0?)2joNQI@#C;KxUObXMroGEJGxo5-$Dm- zF{Py3qX1#BDedzk1&Gl#1F1%7|5T%Ys@vu9N*_^OgXsU4vKZ}O0XT-s|BWJktJ@S% zgHbDwGxF#vRm9)C{8W@7r?7r?D3sx7dSAgsRZ4PESIFipMb@^gY1eIaPc%wp+rS+j z7tc@S0{LkDYa4V39>;8PbvYLbm9^dDdB&Re&BnWO_e`dvWDyv*E;LgJ?hFf+{YG%1 z1GNP1#?+V9u4%>U(TZJSH21)G9^TV2`y(Oym8KXK-SUD?_H+!bXw`>IB801AUpAZK z#787GL*AW{pYMOkY`WA2kYw7exfVxg8n|p5X{>oX9@rSni&bvr-A;Ow7PisdIidG* zqt)Gr%wz8&3C;5^x%}#KSjBPu6cg(@jee&0?%=oj%{hu#DCXO9={yP7V`l|$%t?`h zfobyQWho}hQlGWuJa)eEPXuvOK;xCp&UdfNdJ6h4>B_fx##m1x z^7vzDo=EJQPZZTGyaembS{+7GwUYx6>gw~rdIpJ9bk#tCRBW2mss7xqIgG+oP!7yU zO(vsDL$m$LEHK`RB6T|VZsgl^^c^Ith87j6Z~$lD6Aq@8}#@NwA*ZjQLfCzMA0Vu>Q*x#cqOMV#8UDVu8$`5 z6Itht?((=&ERiq`=%Q~M7jBXqLLbF0aj0wBG=6`)^!?YdE6>YMUyu9W z)$keiTfSyvizFMFtoFXFce13e3O!8LHG5$;E@`o8c)094OI88MpkaOQ>b*)A9>vFa z8G59m!Rj7gs4ur?xSPE$-Ol!9Ay0~8HQk)1KY?=_=0xnB-0zp7C61cFlp(yc*4#ff z%?!Ve3F&JbLRBq3zn0G>kdn`KAH4IC9|Cd=TbxLG9sQ^nVlx*ar=jY7vC13&$@uEp z_j2uJsb{Ewrr8TcE{^PFd2{ zb%HLTq%+s^z_ocB(!zIRH(J6z3=~7`PCKg+U0Wvka<#WO zq#T71SKInK=eQ=?m-RNbhYilQX#@eOCE^!60`S-3xsN>U&( z{U25CzkyC8I;2UXzcFcl55d)w$p`TSB0u23 z=YwvUlHO2bNWohDGlF`|JH|auUq+@;%Y>-DCnar*>wae{>>u9LE@p}AJwKrnCZf7` zK>bP;V=vDmnEKLI>D5~4Xonx97Vda9F`7eqAJJ$*YhT02Klu(`=z2-Ajb&pIeI&F^ ziPF&fdaS~kG*I>f(r8RPY`>fSU1?zCfaZ%yuV(I*0EZTL6yDK2<2E4a?ZWFEQzSV`O15J^9ayWn0ll!0b$PFi;`0 z;dSM=UaqzZExxbZQXUm4O{9WMe8yJ1TT-tObYZ1mLYp2&54Jn7`>#xh4%b+QA>|W% z7q@S2=!Z1YxsgqnDMk$QwwZQF7!apXL*oayI~J1z%@$8-m1_`6Nbp-Upv;v}CfI1X zLTv7@bTy?hKvlzT1vZtxU`fm)QGR9V*c$!?i_Yy%^U$ZPFdVg=b+T*DcApTXfqGd- zHF4qEyigNz^6`_~_QC*^EPdkPlK{jpLQ~*tY~s&HFK_Ed0p|~nB!*~!lFN3}4(;JxeRUh`IxLyQ|cGAO>F~)T*6)hK8Ujx0v-TU*U)M{PH?> zi2}wN4BTc5BiqG$YMqJ~vN8^>2$Z+38xLJQtq3jd)BDqxo{wtZ*!_u2ddD(S>V20PE?kx8?{PUh2k}y*PjrJV%Qy1 zg&w*St#KJ|@(DP37Wo>=tI63F80G_5$ezvHH+i{4@#XMx`2VI0IZU?gs&QfWCmUX9y=65 zstuo8^0Qq~)~Xl7Z77|o1P3DJ;(5s*xd`t|(GMWK7@;0?@GQe^*)UM|oLP;Gh=b=6 z#Fl1Rjvw?HYlnV_kgw81A{rufPFJkk1pM>vaR)KNPNUHydJP!Q|A1S+z2>)10KLWim<4j5r?k$_7*y@u|1;V7 zr{gRG?s8sWvXme$g&zK7(gJJdoIM3UBe3wS|F)&IHOgM2gaJ?;jj5^ckkdc~{cX87^HMW_Ga$o_Dl(-|qR^A^D z^&Xp{-)6Lo66Fz;N;kgJph4=zU%t|{_WJB(hq=ad`^YD-Q9N(rm+)+OfIIm(;i62^0 zp~61uL(4RXL3g{3b-R9FE%f3&jx#XXq;M0O8>@Gi@GapBY)+2>`X;V=*dM1KD;TR; zCa2LueJsyZXz4I-Su}xNrAnba|HNVNEGPu(h#&v5B(&JjdYH!XUrgsdXKO9itSlzc zk>w~|a3YQ>_%SZ?(y`gu-;vX0l4g4DPGUMMg(+>(Iqm4PTxsvTGq{yeCr@D0iOV~r zl96jDKvSJ{x75KxL;CgppfwQ7Wf;lH5E$d#jbzQgD!tSOVb}!$jFE?Rr0;zRIkRV7 zp>5v@XzOg(Da4$A`7ICdrfN6dG(W3!?kylhAQgh(XVez^IUIzWB*iJ9;X!rFe~{WL z(yPgdAUGQAdkgbMj1R;VbM8ixrMeU@uDqGX+_7p(6s%x+K!?>NL>dG+x6IYDS#!~w z{9aOUSv&B(o>*Xhj9aLU9W;ODo$r0l-G#G=O_?v@eSREKqTiy|{G$Ma=+$t@bTx1K zm9To}*6&m+eWGUO!O0)Dx_`k)jE8^QjlD?0?eXjk({uS*S3AWYj}NfZmk<4vzX_xK&X_aRZ^<_f)FX{)HtTO!rz9HJQ z8ZA1N++$+rOSn!x?%#3PyJ)|0%4Rv)oFu62pZ4YD{8(4MUWM{TupE=ftoG9j6h)#+ z%@S8QCzvhK7B!IZioIcTJ-F+b_hwQ*^=nNx*HPfu>_9?nJ^a9i6k^X*1#0tYS_fk%eckW*S2TZGpLoZl>P6gUIY>AQU^Q0PwimGgqZYL!t(DiXu?Ldsjk^dU|k8AaM{}7P)_L6M?0d}7asVhwkVz)&B3Gvjk zz0I|)zkDmo-@dg*;X#5Oq_+Xm>O2@Sd*fT(gq+OW>L3lD$V?%QlM#0GB!j$7JSO!{ zzwAN^lz#z7;y&7v^A@ZfhcDSfy4o%yU(9USdH84RN5$?pG8K4AYba)bM?|}nPaMv7 z`fFz_K=^-=6lvzdERy)7W8sov$YdH!)?Y*DLKVO-kIc5^OprO75_>F=iT_I&w^hTc z-z;(S9w`ioP6`ZVrL zjn7nR_TEnlR$y*yuORl)=8t3+f++I7w;d1WF}RBSTJ_Zf;r*z{OkWz#wBnYi=6k;% z^9et(nyxXkg`&8`n_w)qNP45pG%mcg7e+?lXqS# z(WjlN7{a@D&!T8B4+ZHdHdc@C0=Wy2%|)+91ZquZT#47G+l6Kv$!xDvkCv__Gq%tn zBf5S5&fdSoQl>P4;?-k-P`W0#yUCPcg9R8+5$Mpw3SPRHdl@be$(v0?Xgs&n|rbje7~db#f-Eh z**zqu_c5>yO;gML`SsW)f~sBq*dUZs_0!d6Neheq&gaH}sdU?^q@=Ky3k`1x2?FlT zvSYEfPkkP7;gEZ_Y$=>gKJsYQm zYB7i*4PLe0;lM`0^RkH?;)%0xs_r~ZK7=WjN|DmU!>?YoY-6g{B~jVA4N|) zcrNufu=y?5i`hbvICkw}Fr29tGW)mS1wDtLJ7+UkM_}SJF1URHcMM||j8pEC-t&Q; zof(1}J>0(`QiOwnhojiF^M|dKeIxb(^TOryrdd z#Anq{eQ%zsb#D|q)RmHBT~nCUJrUU*vYR`RGy~)gN2nM1urdyji}kvdO+YI;LPj*o z>tA)W9K`5{eM=P{8(nodlF^(-zlsBmJ!|jVkVf~!EvOSKnTjQmPUwUbh_=k?sT;Us z?)6fTRlNH)qHk9`8d_(pkfC>{t3GFSVy0!PSsB-Qs>-cXwCgMwpvqv%mH(h>ld2(J zYgYjO>CPG}1iYzTc8!#Ndo4?Gr>~YbeXjl|-&pErjOo*Ns=Hk*WGQ;)t$6|VAIFDv z|C>dO5bHfCF4U9PP`vt@>^P3M)6kIZvq^z%TX3}r8ygF+YH26giw}9Faa&NZZf=4$8L>sD+1w{Y%Kd5 z`G;}X>jj#6202%Htl= zb$u8!xHX4u^p>?<{7^a6Q8VG<44!$hseiq3f!vB)Assmg$K2kU%;Qt|X%58gY3vq3 z!wK~1R7n86vAI%!jB(GQ1ZvfJ&G_%hvBh((3yzdN`%=n{@AiUb1kp?b^S=G1GBTG* z(+1h#Gq35#lfSVI1;947g3cGO#IRSXm%X<+dQF^4l89Gk&6Y%h_mE?f>3y}Z+j6}+ zQ)4q=;Y{z>g0zXGfKA+}#@E9ZyfU$4vGvlI*9uBLem|^)!{S(Ag!`>mVEx0yB*0bg zI~wyFj%A>}9K-HV@6)0@IErN2F-Kh~r`jAg;<(%P}JWPU-j=W0F}0Z&asL(AHY@W2x1V+d>6c zCROt}{(|q4SRXpyR{<-GJzP!fTk7kA}-@5LB zp``_GS#t$b;5g;PqQFYtfYXm&=Fz#&#-i~57{{V8H#}|=)svKM0_p7K4PHa2@vYtr zwr%E*zi}!BTP)L1zVnf{BslX>n}k`t$m$OIxYyis0b9-lqOQsGGG~0M;o+|z)Pis* zC*)a2Y?Nlb$pF&`9C%LKh|sT*u{TUSus?dWxp6{G z^Nkkfpu=uGx2$-S!ss4qh+Dq5ZdtLzH_vLkT^YG+-cr((CM9JC!Pqm9WbcET;4QO2 z2Z@q#7PCJXCBXWzyB#P&Cx~r-;Ms{uMke@iH(Akt@QHt-A0+2=&ypi4$o^i&jcN3w z|F>A_Uo_@7`UpBlSNl8UzRQS(!xgkvZqpoXO6qG^$^PM;D5l0YC7CX57MXxQTYDY0 zAERboOHskSa1kmy#8bVzmqSF=zUH|>T}0O@R{WFavvEVf7tE}ezucL!Wj|OZ+burZ z{djo*|$(GDSwc|79rHju80-&+;oGuKv2}M1TxyN6zwK9EiT1w@0b5L`4Ai^IRB*N-l zn6{=5_sG5;CijN|-8!bQDWRg$@?A+`$-hW~n%Pk?ED>kD0+rs+*dzO8EWj+HA*;&F z8JbCSk^1aMAd{d7ib+;jhpwxor++`&k*425P3X!~&`>yT8I85z1!Lvp-qGHg(Sat! zGBiX{>RTO7W|KqYrtM-&&59Rph<+*{*IV z`FDNiUWg(3i__Ri3rA|w!t|mkUfEmv*@h+JeP&z0RB*VrM69aM#j7!CN+kB63fbr#m1iCyVu%_Ps7KA(!0$aDw6 z_wijO(3s;gw{Fb_bBTB;W!6eCS!y2<5gCID4)|u3x9?8>Bg!T==V=s%zJ5l>O*iMZ zdg;kAl&I`#S2o_R6ELs)LYwj=Fs43Zv&Fw}LPU#PL`Bdp`h5s#MZUGP@^R>AMf+#J z1L~aHh|w$#^nICGBNw_q?S9X>>FceXW0{x1-qo42C_LkHIh|=%I1dX3H=iwN_WbC9lN{upUVZ1Zul7+8sGz+B9M=1E zy)Ls(U%0lolUamXXP>`|cFBs{RYlP}=_)tq8eCJP+oV;$wb>9UWLhISyz4>#sjAXDi@G@@C|QT7BPW1kbj?G|K9?wP?1 zT{kGga2APDp=?k&fs$y4| zVk#KtV#FY-hm>I$tYl2_t9;TofF&jHWfbH!%kgEhTjV=3@fILL64hnX6C8$_M$p6~ zWtr801ww_*9%DZ?G6dd;htrkrMYCZK5U+o4HyeDRwONVxBTPWk(*vk@o*3-(lc|j=myjHe)cAU-o_y0=~E_&-$ouS9t>WXT0H07|L6M@7kp>say`ymo=~Lu>*ZzIyu|p zkJ>aI!FgQuTWYMe8?*kN{(XiS(-60E{ti~C-LSkU-8(BA*+5(P;vLHqZ_d&ik*kTU z%Q%G~61&a{{+Z&Ex-dvzo3RB|5jNGc%xVef<}vd@*1UGQR)cD*S_y%4{>+hwzb^dS zN2H11(6-@47mbtsDbMp3?e+`gv|gA$a~O9Mgd84Q&&l5?f!kp(Hj6cZhpzK|WOJ&( zLEdeGCpSj<67}!tKSEwR$A$@mNbobkre5*A23b|+c=3_uyLI(lyiW8+>WmY}%-Hkq z%=jvmU(0j3=w5SMy=L*oHCM%DTvY{HO;yz3*CM~&Jit+?Jf~=ytq=0386;-gp=mS2 zKE$XfqL|@+g_s4o@x;fCY~W;3!D0!o-*O2HF1>Kn=S6BkJ%Sy7agqjjeaYbNa$QuJ z`!BAyRDwLol0b?q(Iaz{g8ZY%%jTOs&Zq@$)&SJmG5{oSbiUhiaE)U1@?>D%YDxiE Q%!nc>CikXT-(j_o-N;e`>QUWqGgMx%~cSv_PL&t!04$Z#? zeD;2x{qFDmj_+T`p|EC{757@}Ig@Z|giG+lN^IBS583_qR8~FX;E;{hP zT_QFq@Z*kyveYZ2{64aEU41cU+l=^kr`8M z!iHFNZvG|~h;-5pIk!dG@^N2;;9ev_AtORpo0BD-x>V}c`o*H+;vnXphxyt~a^)dG zLC|OT<-k_ZP41056uRROgl{M-KYaM$!6X-AyFBF4QQDuh!hb@bbinQpz8BI^*{iV8 zSK$=rVXm0uj@sJ^;q@QbC1+h3gY9%N+K+b0uoyA=gT0RhbuvXH?3Pcds5}})uAn}N zXZ7F?8>NYd52}J01#mD&vRh=eOoGb{17-N-`I9pPg zbga@$@_hQ@;>)xdgN2|}Jd-bXph^Wk3M z9qGcfTOo-@m28ad&9clG)_is4-{#bJ?z5aF+rUaV&koow9a_KkpYV2`->b-uO`t0o zxxN(FhLe+Y6W3SlPRhiA{bEnxV~kbJ4LQu+uMZyyxOADel*&}vERtnF2r1YXuWI_= zf$h@fTuN$RTUa|%@(4A^{4jne)u8Q@Ch>*-_)3V4$ugkdeNLetF}O;)P7}pR=-gcU zgFdZ271*0)Z2Dc2m|3v2Ys`cK-x;Dk{}Dqz6cE_u(7$sB^@Yx#&CCc|Wvf9dX+0yjbnJM)A0!i@BmIXGAqDHe*W}(h5eO z?=@7?_`nFd<3EfwdS>Kn3d_DP@yZVnS$fA~a=K4Z12GF>5#I&h{atsi0JGK#2-9Xa z2iL$s!{KwL`Jd+)jVd@$Ai<-^BMhUvBAn0S=>1YDym6cIe=*Jj1uS&qc5pY%SC=O(E@$bV@d^m(i-Yh6 zUXc`&<}+Kx(|nHScINwejO1M=nY?DazO|qDb&EURuz0FSF2mZWV(Hh~3r-$$QhM-r z%2?$A8;naA%v@u3(ZJ|EhbOrfk8yr)433L(_&+Gsz|+?t!bYnpF1XfTr=yUHwr7^A zsum6WqKYj=mqOCk3E&bTC1J$C9_H+S#eGmCjsVS^(5^PADI-6&P?CnHe zGy83NPc^UJ1nkaG)(|=_GG&OTQ zz-mQreBV)WGwFml_$s+8HVy7=B4n%Pr|RUBhBg_q2k}n~)~DgQZ}UyF?a>mVRv_!laY` zSw())3tpw&+U`=j??vA|Fl#jgP1;{^`x(6u0;R3P>|AxfmW_*Nf3g;O@~Tk0>+YHt zL`TM;#pS$6X2@nUdn0?m3jtsxlyknuXl7a5;`lE++ZDilT zzm2iTWO$=N(l@|Jcg9IRBqmqX1GLWlt3Wwh%%Hb5{K~KES-hc3!jI_!T7|RFI0n!? z={!kTD+(_N~$4%bxO&wy6@ z4Ee2}(Vji!!s@yU0$wxyU?T|?B@b0fh(#WG4jGMb#0$?mlu2}>t} zj{PrfkI!tu!^`7|D*2?t+Qfa$mJ=ZD)A&sGvr!W3<|kPNcyC3c6l|s@Vswbew(iAr zJ)=fRjPag_jvX#-eZCUw4tn|!IHBg@>+|;2>foN?CZh-;=M^JLED;S345sd149*=9 z$rOeQuH9C&bM3pI9ryWudS5`Kw(#_OY|}03=o!FEN}J14xprJX3wAHKUR*9QVc5Dw zduC2pR**%k$dhg^9Hc4Oji(NI^?ied3&CtQH|9E*E#bJB*Ba(OxmU@*z4nru;<2Qx z^J_Z4dtgH$HgLSWjoKsXZ8n85w!NUHh9i~!O>_6&p^7!s z?8(V2NMtk1pM+wES#Ao`CiB?k*6~h0Wd&>Y4rznFRZX^O=uxogT7^ajo6DP6O$`J0Jpcx; zX^+^<^`Ua@r*9oPB_2s0(4Hjqb} z!-9i?f;Jye*11MT=+(wqQ555)j#~6ml-`}&$db@+sn##=QI15OVX2hoo<{c{-S&Qz z^Oo!Yjfjf7u%r# zs-*9znf%iB+k#b_8?^g#g1POzJ(*hBer$`DP7cw|ZEby_n)LQxmrc#z%{dFJ51P8> zjp#Q`>8$%x1EtA-bV7N&Z$dF*D6=%iiJodt$E$1EXv8`c@4oS`!Zb2e65~Pe7 zO`zH!&^t^~Q*88eLRwJv(0eJpU%K?VV4>*k#u36*#4 zmk%X?Mo1l}u7oScgfBwA2(BkhP5Q1K9ApDN81XnK#aGE|DQ%wcV6v1bJm2{mdKLB9 z`?MF=hap8Nx|iDz%K#|W$amYkABC{w-WkBYCd!&w6LGM$8DietxDsM@=z1UB18x80 zwf7i@j@+f};zi6^){pz&=OzbCpPFBo(%aNP5IdfcQQAD7={P<=&UtI)F00G)xsb24 zR!^DN89%Lv;bDGPtY9eUdayxcWmQ$*rH(?bz(!)B;MxY7e(?PZ)Qdns%Yc>N_QJF! zGoj>OaS_|vD*#^Ibyk^Dwf_r z_s88R`|!%P-mLO7Y}I7*eo2c~kr>U)Wi)b;`S$=(Qz0q-~@Hq>DzNo7bJFmc-Omp4ZBlX>ij!Bn!xFGj2n5>tOH7`?Bzzsnbi57#pVc zxxi2Y{q?J@j}wl)R}dRk&8lqVf*_rK6W!gHAOZYxlc97SilX_CL7SSr_HJgH=cM}4 zz0Lu~-RW;R-5Cm;MU62ak|=y#RL_ zW?OpK&_^0tntNrD$YEyFb{+cRbvEx+KRYMSGzb&zBip&e@>!<;Tk#APL~k7_o|7AX zae(0+^O^imtwcv#;l2G@>b(Tx{X8pM62o#X_Z7t_7ttu8SHtK)N6&+mC;6O;9Yf39 z_KP`RE|aZf4tW4j0r7yBOV-rNGV(;!t8;X5=>|;CbBmSCd{L{)OAF({{@=Y2l zDtm`F#EhhRewMQ@j4Lf4#iGOiJPk#0~l28N9=HzqWjUL~REr``g z9LiJEYwpRBgS>(Z0fJdiitql|xKQU$BmFD9t}rDVK375{Sby>=G>2vAR5ljN){4gc z{v!((fielSqUtjBMrV9IwUp3NRygYW{9SR82 z0bCKM9PMu;Wvw&bFeO~5z1>>ew(~sL59$pQLV*Qser*CH&Qqr%>YKUF1i#&M!qGEEFY1wUU zl_uEf^B%KC_38p#3VCGTd}y8X6zp3^;7@HCW5u8dFsm4*fTvxI3*oZXufbwz_us*O zo^*O4wq>s?%ltV~=gE>7-WBcnj?V8;RxBMnVUdcyhkxTvkU{MYZ1LH=y}hrNA)Vct8Tu=Hdpc zb(X(q!KG|&GRMEW37s$c(-j7evNyMF_3?3H^Op&(>pf-^13THTbGivUQ3?HN9euV| zOlC#hr~r@9Bv0-X2NqE5rpN%EHkEBzw!ubEJcr=b$&!`z(e?M^u`z-Okt<%)z9eFx zJa^L@j*MfxkbM+SGlqG`xymCzk&94)rAO7EY~mZcka?sogJEdL$5k1EG~E#7IK{HD zHfbh*0Y3uLnSHH^mzLBQlOjNCpv)-0LEZP^tNC%tH@Gz7vh0K3Ohj3lTTDM~Z`ErYIMepz|5`8KQgRV2x}V*-h$nxvD&OW3n{_3ROx%?SZjI)OW)LW=z4 zYTZNYj#g9iRwi4`w&2c{v*EkrlEM`?4@F+LI&;Jf5&`@u_Ctf`=}?P2Xx0Q*Tin-j zEER2lvAs!}Us32oc`_}}+jaIEQK4%s)&I3Qv4t4ySJtG*d+)MWLdOa*4TC^+w>5Vi zwzoUm=;;T6g#*Belx#ng5ZB9jb^iyJQAN6JEVCR`eAQapz%sw*p$&Y;Kr;s)n-z26 zYt8l;i}dN+?!l2)4KiU=DKMq<)p;NlKbjg#GW4Y*0yyS*SRO z=b|eu45ZROer*?~?|s5^;(mZic_JMtX6qrf|K*OU{w3QiVRxrVWRCL|qA~)Dz}H*_ z8V1!^=K6b${ycryTZ%S>vvHSB#F3haVu`3g48(C!1{;h1tX6ebT_HNWJ~nG{dIS8| z`~?mBhTmNxlM%AYg9nitYW7N@%gDqtg`=lA*FA{^kkui7o+{71rbg%W3({IQn~fkwDVdR(f@lJFc26O^c0iJd7QxqXnIP$4K%&14KHuC1Z1lR;glhMAs(tz$-se}RnKLG}cqPyf_A?b} zt8bRr)xK`fMqJ69=-hW<)4<)lZLkpyxmdH9Nu0ohZ6$_ zE+CupuqL(%ciUKiF;MbpW1nhOf;ON9J($Fb`eyOxGz0M{>(3gE^P+Hmk6;W9p_g6o z_pVLX&)ow`OI9f-J(Jt7ESO zZyxhFjc($PV{OMTn0lSGCec&IpG^NODk_yJSDn=RkzMtAx}*4^YU`MlpPDdGx3Ebo zK#R7ePilt3L~}vkMrB_=C}~NM-+_{#k%V)V_wQDtjgjRLreB!E*bh_A5z_wPBDFZx z<0>p9fMCWw?dGp?OsFNt{?zfsfB5L;as?Wk_Ju!@uH3Vo602LrCQVZF~ zk6`9y`75x7gvC6xcebk6KHp18XgObH|*wgXcy|XUGr>8cKBb#yPVGMD$>H zmj{QhQhkb}Com?zJw{=zgV3l;0$#t}F=)(lGXv)WgT>tv(WC-XY2hkGHO;w6Nz&{XIgw)U2;o-Cztf z%zPE|`16Q#=I}^$OqMyIx((K-+yGM2_wniP>ke0vSjr5!>5_6!$(lLa;lZlERg0e_ zz0%0W0D+|42o<2nl5@{r$K<$<9Lww#!*YE-x6I<>=sT=`E`QZc z*wFQZ@{;aq!xntfO{qIS^aHDNSlroa+rY9$)bgiEyeIkRFuN&DYEtdbp4vVQU71?p zYGVgEBhMGzC?vE(^}Y_I-q#)0X84$#2^H!qP%e5pU6)B^bv&!Szm6KJ-(oZmMQ?o@ z;#tL4-)|CMi_==RrdP{V%p{Ox&zH}VkUQ>|HDes}jx;8Z7h>@P*T2jzwEf?QZGE?S zjLmz`=bXv=Y!?kzXjl89zrj6JHc;f(UY5LptaI}pDk>ok@(8C8>dIEhjOu-u#`ZAM z^6BNhKm>ReyQP;DToLVr0OYkpHPHj$90u;x@-$Maxxd7Vn&I~u#V`0C3L5xGn@Kv_%dZQ5IE ze+)P)h-FnShUq~h-U@VXOcdPcdyNf3+f zy-+*J`(K&>p=W~&KFaw-I@17Pk%~aEUG2$%trV}C!o{++zE6b)Z}59gS>_aNL->vv zozV+KXTXzI6_}^!y&pY|r}GbLLGCpT&*2+dFBD2vi0&J|2Y4@6n_lnoqO4b5XC9Om zx9)aV1NbA2|N72RR)J$Zy0a1M2dhARn%F)i`PlEZ6hLA_idHq^Y<1w+Ii#>$#2kYX(kZrNe_)~t%Wxjh$ z;l%ZLSnZZ?Sf3bqEYvAh)~|@SH|}zPb!xXOdmJxqJSHNR92pX}*WQ_e?X3OW>*X#g zc#9PQ&c@H1PpJB<$?~gv%)l?p(9N3~20|QfWkTc1Ip^heh1(-bJ~3AIH=2y5QzMcw zL9|DMI+-!n^KO7;MnWo^1JaJ!QcXxmEDd91P*Sz(Ud@oB2RIOr$8KM>$ z0ecOjj>o?6I*j94(E$N zDJ)8eW1e5G@*g!mdE6}kfSKgLb(N(d1kp-Na#*dQ8C-(`8>Ndl^ALsmL8SFWSz%dO zS!=YSaJ*GYmgaW7Z46G15K5Dbk{9U7S|Suf;}3yK(DEdi%LOZ=gEp^_Nt|gDiq{?y zRLJ!d=5l@K$M_4@6dP@onF4q6S1iek0ib=<9~>&MDrB^6l+HHZK9~BKC|xeZm^xKu z<{7JU+$yFfe^Qd&r9R29@R^IjK)(RGD3esaa=Y}~+BRKFM@ZT&zC3u~%wT10ZM}E0 z6ydPi6Th54D2LU(>63iKqgD6$HNsF^QzMg%%f|b!XeWDvb|RH_Qk+7)b7rCz{xKs# z^q-94R!@YBWG-bMF5uV82t6sW3KOF99(~{L$>J=hZgp~I{U~1%pIIfRo9GvL}vLbvpuRu;p}7dV&vHOD#$Kr`8(>NR{dX550k+Y9T?~dhs8E>j&0#6T3yN} ze`)r3Y|+CSzDeI#PsIj;GuC-~(>;4rNm_bc2f>b@>exhy|Zk6##E(K`=Lba~wDp z;M)A|{wM6IMa~LH&JW)sDruD3|39FfC(H`UXH-BaU;_1WoKyLoFUh-pCXN-@@!2^S-bj(7nYBm@cs8HXZ(Ieo!@doi`_9KxrLiMhK@R zqaBSgvTl8CFfEDS>BS@?+pT2QQ&)VWTTxMAY8S&e>2rSGtnwiJ3l&mtg)ORLEQFbl zSe$<$0I2MS^#fgAlKHv|fItJqtHb7@K-XoG27H1Y1M?wb6~yDY47r>JSZhW7{*LSA zN`pLiNUiZ7kkY9jAcRNEi05at4Lcbl~Kd4K6;48(=_7 zIO$kDpwjU~S|X@SM`XRDgFCg&88zAnP?kte4qv^c(0z8*rB&60;dVG)q?um>>Qv2Ui;CI z+Ru%~3ze3P!%wBR*%7i71B5oYZ}@xTRAq}HALk~184xh2mMa;rmno0I>+dIMZZrbh znaau^Ec_uzxQH53rPV3H29VKB8i4O1h|uT8(e$KlrF@%#*V|e$2Bnrt(tFwY<43C? zD7E9Nh4fgz-8f?;14v|38pe5$JGX3Q{-fDw?a$lO-`@VF*uZ_eDtU}x5cbIv6@|8l z5#H*`-2II_&JDEpQPx1EK~~ODlKzcGnfjkP*iw!h*v}euVm&LtWFl8fzH43Z2x-bW zL1#4LRU!_cgE=O6cBdAqHGxKo*Z`p{QLijt(zgv;wB7zQ(UIUv2IbL%oD*Va5hN1y zWUUm(d>Pjj1Wf!GUf%j3$A;}Eee zqip)D!3=~vXx05kfxHJh5)JVec7VvNL`tv%qyWqMm07n45HO zCy~H1%n}xL^90Ah(Tq+JWv_JsMc;Q{4^Ypl+oO|bg%oZfSlsy^H0B&==-pqIFD$rO zGCsPGO0GZ~fpYhEu8W_dLv(qhF^y7IQ4&>h7rN{q=dVbW6F?m2291*xnSU~3%oE45 zcXbVGI(ZUz-w_?s2>E&$irl*SZdkeNL+DK6@3l!{R^_D~hfi*YH|&loZA)fiYHDh6 zubmPGsD+)n>a7J}V4%{6`MMI{p(dNT>ok@AjQWR|*6fSjc&n9dNV0SpEKF!>#S~hA z`^EzaPvRo^exB#2D$vBH1UY2|Nik z4@-gW4L0~Z!)(Q>jlK3kG3N~<(fnyOMOw51W{peXaB0Okv%rvWgp2w=B|2` zS?JD(;p&HaDz^T=XgBKU!j zk#=UJ6kIQm6f|5^0GeAxC?FSgkUf`1H0hBuUrq@d60Lrp9oW}&e{jzkixhW&Y_`$9qUcXae)VYUTE$zDnA4`|IZ^(RIu0v^-RJTXj&q zgkHA+mYMNx#?)<)S*86ciQCuVH8am*apVG$LrRTovbmG!d7k{aU}A2Y&K97NbEd&mq-D@F zvwp^G2-YZBPu5O**<93i+zD)M&CWhGns!vD;>>7)6Z1{T3m>lMoQuOV>>y3LScT{c zK>PPw>6LNKCC5gU?s(}5nb~SjFExP|46;9WWU!=$VElyx&<o{Qy{z*Qb86 ziD1l$O18g=S4VWW;#IMH&BO8_#>~zFo_082SeYDc z7M#JS*Xg$?Kv$+P-F^E3_dKK51U?ZBZ?c0VhZCpqU z(pcYElN|}xwEt1G>bkVeq>PB-r6>_MGLA_W0{;#PS{E~!uMz1njcoqOdv|{?e65}(kXJn5(fXogK*ME-xj>Y` z_pLutl`bd>L?QFbHLd1p-iD zirlvaRJPn#AujpyZB9ZE*CmLm~lv>tPQ@do?XT<5k9Ifo%S|;Ao=?QG@YE1(lX}b zpozmd3Mxp0VvR19W0ANg#)@S(+#{vpqki6-h&mN6XY2}tKW)1s(7A0rH^VVk24b0; z=qa$l4)lqY=dZ0ccGMA`O_!5*42gtbf0}g2U;z;^KFQ$Q;xeiUx9Zqdgib}eWsARF zw=t63Mi?6Zir3Y~&+w;2Jv3>;v?QrLLz+5aY7E$3j7=F08Rh4}6MSN=Z??|X8hTjg zML1K=6%g8uqbJoov8o@vjV+-Ej&m*~Wl{4z8Al5$c_ZHn3B1v1Ll=5(VK+z`v@lU* zq#hM!Us7aOYs@{YPC6pK{J*v9$b)lb%Br_gRm#75^MBX%f6G5u?GcpY7N2fzAu5NA zA9E6boYbA5cq=D`Wjy$d0Dhc}fp21D{Ep8j84)fa!u0w7p&j*U{AF6aqk+TYZo6@G zT)=_*f3Phyw;kr)nu@j};C7TZKDFG2GmMeUb_jMW?B;R73y(S#z3syhB0UbW0ANMa z&aoF_M8Cb}bqO=(M-{69EZhC`VK=QwLn73khua_Usk$E^Z#bCfAS>JX$#10iIKNNX z_aZ4ylK{KNsma4xHttb@NXD%+A!M>`5R!wflc9>)U{%00oShdK6a52U^5Th)_=-is zDu138O26e7>(L1fEt=_{J!A88#kLN9%$Y=@*R!t13+=4tlWZ# zHwsiB;*DWj9JMySTVDKo8sDfSuYKpw&R4p%J3E)cwUcS%+VK78m{1a;kYd z7k%cz2#VRXxjhPs1pq0i&fk%_xB2d1`zbIfR;#0tI$XZKt*V<(|F1ZJ^9KjYqsE@x z;=t3UOf5^Jba3NbsszuMye~9rktDn>B`!84Nwf{KWOH=#U2cg45+pM(R1dEvD(f3F zwK@ZRbG30el^Kt=sn+~oUW4e05CPc$QPlub!Q){#cr+n(MlLJ{0|k{ zR|DGADWw6Dq4cBzqX`_KW=G-_b}Nlcjm;J8XM@vJ_by&=eRfTeiX=!Qq~Uzd`Q6+C zNY@=WvV`dWFcjmSntR@4vZG54Q9$|?NB_wW#{E1)Yq`63&x^Zz+4_0D1bi_s(t2`1 zz;x;Cd#Afl(3Gy)J?En0$8{WB7F^U1f)GXGQP6^c)$xe!IhwQh zf{XX|vC{n&B4^!pD3_N%w_#Td=WsfgVC>Vi))3ZanEuX93g90$!2e1X{tGU=gExa$ z=G`e$U<>>-JuSj4Q-n6SYMwsoxp@6ylt#+-Y4x(y{c?qN0J9*zky`|zLyhD&m~&U; zAVy?t&hQbCkZuNRHkILMA!>9A0os_ML7N;g>JcC(@K1GdV(>q~mUa$k#Es|%W)awE z6eG&J!Q3Pj4M+8A7VG0!O$tmWKq#gjq`etn`?8FHJUpM1W>Cpw?8^%xb;BO__q~vajrS`LE{rUYlV0aVPff@&Y#CyL5C0kPz*UGRC~4g^scDjG4Yd z!7t^EYFI!DzmC8mm1}7yOG(2LH(w0XS?m& zOViN?py}2i&EWm$vZQaiIj>&S3OLXRXhi0eu_@04P2j!q2Ew|vznzQ425_Rs*yB_C z++Fm6IQspZEHozW?_QTL?*Wl{^KZE~501ST5aW_Y&5!pZ_yBNuybf+bLEA869pah>yTD}MA{9cqHMM!9-Sto=)HAD6WcF?X;J(r#EjqY)>MFd9uf6Mm z#A2}~NXi6Ly9If?N0ebvy?c2GV5np(@@FbYY(DtytXq?YGbpYcW_xoc)pVaFW>=vE#0 zn%&>&Z#y@h5An?eO7l|+1D`qa?2Mb-t@b{@E&62v*JT8{;s>mydTotw#aZ)=*-zXd(bZSZj*qcYTGKDmNyg{aQTS4N`41b++ zd@Gaea#KB9spUZMb7@@VbAv->f?q*)OX}GZ*g7TWdSjajUk{34q}4kHfU)5FuN@JD zlM6W0OgP0P#*#eQ_CIG`u+Bw z7Km`@zf_O^Qpo;gj*nuPJ4%<@#O z%<@Fntk61N`D^pRlc(1%5?ve?0x_a$B%b{($fPTRTe~4hd2!jW0G2FT-r@F?OW6 zyTf(n6%`sWL|i@yMAzgS73>++LPWq;1d&0shQLg=jxQ3p;?s|>4t<%tcUodJv`f(l zPn$8yPnyr_7rZ&muFeZ4kiOLvU)H)r7`5oiPfedLH2wWUSg^R$|7 zXPmSoGYUEfh!7KwOxf;-!Dl-Ai7z*5cMb-t`k}qlkcQ;E_P%|dVAjg>X<#R0gSST> zUuc|x{m~srOp#j~Cm`KA+y)*%s47B^mms{Rjh*}>PzyoYZbR#Wc~)%LN+$PQKq56X zsT4wDh)@-q(VIsP9=L5BHN0Udju5N8{>F1M`!cSwqoTgB@tm?6?R)w7RKb2L#+7e7 z!RUKWr6VS3m@H`ZIAeZazfXImTtk>^QyU>ca*lfTDcJg;k>Z2GvKwFu{(Oa zdw8Bj^|5I&!s@Dw9oHwy=i1DlwUh^>@K@{2UOZKSAHQz`J}TdKAw6jIe4{QScCP0 zOCs3~5&+umgb|Hv@;r#Q>i?AY7_?pt}T&a?K+1&8e~ z^W6mWhx-)hZ?VhrDxvi{B!BDDh?q;as_4X*rA}!^;BcLVo(5V88IRiI4N7aR%b)rp zg4^j4+d}?cywrVcvdnt9DBZzfzKGIxTr@FFpz-8rPfQRlVCq&jJ?rfU>k*Ky;|aw* z|5#E=QPXjr!m8o5N-+zE%3h40t*l*)uiJf+N?|*JK^pvF6ao^vdk|3BxkJU=JY!xv zfxDMrBf<4b%c&kE)}8}}KrdW#Ie(M9b}5_ZL^xz~iM>G31#72!mf!Ar##Fl8-`AY- zywY!z1HKy}chU&Mp`eF$3HKeuNR?}x911OPiPSYhL>}rnfPCTwPCodA?_hLV>X>vl z_}7GP_n%K-Ft$Y8n@wMuYf>ay*qb}sl)c;#c8wIdwsL_sd$^y&n!PBmPmueI?jLhG zU(g*9hHZERwmZ4zO4?GfPo~_{Mv7nDCG_xwWX$0h(2m&$q?Y~4(%@G6PZ2T8CMUX* zcEvHp9m!fiKL2;%Lb}oiEsf;v@zjAh)|H~0v>Q8n8mOaKfjODh76Y`2-klgzs4uV< z$O+`!tOvR>n|~%7w7=?iK1&^!3PTQ?oi}~r9ZoJBfj~7zseY3vc4RG5b)h(*a~0|! zTfDr|{AhM2y?dxRd%Z5OAOO2FVUfJ@BQSi|0lo2#_x5n0>$(78TC}YJJ*k=~t|{)G zH}0NF{F>W{7cZ53mfC$@`P>h;G-#>(+@Z4>U+82%+?Kx_P(R_K z86F7ZDh%U~vCe!3v0zSWip^=VDB=*KME94x5MqdsG+4fN4@78k$wyhJFrM*j3}SG5 zM&9eQh1Of4vSJP)RJJ~Rmk(l_Dtpd(_SVumbYw{NuxN544ahv~s9qStIDyZh(?0?- z5l)a^ou{%`#sD~wPNJ26^n+d#^G;SHB`wt`k}qbmUsCsZ!UP46n!P``ZZZnG_~yCt zaJZg(U|g4vFBDymd!!8CF?jB=OK^3y?qF5c*W`!6mn4Y)LHnfR5&HFh{MGRWB)+)z z@*KD2jK6UKj(n+ zF2w?aV5!tQb^aATt0}&$2P{52eQdu>otJVLE9}%AQ36W>UC&iaCoUe=VNU5_e`>|D z65=Uzd2`O_V;i3H)4H9n{$fHC>`hJ5Ot-zbHKW_cbm^l&By#oWj5J#$ADANH@yse- z1I)+&ERFy?Imf~4Z=Lv$&>#^{%+3pJRPyMN^y{Kxa3`uj&RxW$IthuPbca!L49J8X z9jc>)=84DiVz7kDT?uiUm$6kN)A@EZ7p6;U%1NuT`jt&Ap1;9nlsXISw%ruVU4(tR zgrcr}>&FphWpiYL0Aq@o?T_^k!8_igoy)uaa1D3S59#BRrTe|(c#f?PhdX3GI5Hqx zoDhSZvSGNnR4^6M>^y}wjct4B1YCI;+bnrqnb-5#^-i;~Q=eh!r5#OZ&cfHlYMI<{ zj3`}BZSAYa!IgbMOu&m{mKT)*dTqknB$b)tCpD2qJl+==XB**`UOkoQ2It0$aFNR3 z4iGRbqCY3zacSYs+{7rzDK)g(Y;uQm>1T904N=L_E!j#~|ER#TJvGT@Hw~z%SqYy7 z4(_dBAb=$aq?9mo>il`-x+jmL_Wis7{V=x8;yO8RMXBKtQiDvM^*b}nZ-7?0{*dtd zmNLE!vt2O5#b%~0%7NN8N7H3&6|&BJ)O*tSe*IN}2v5lMZX^7?4~%`MQ|EQk+|~|z ztdYp@rCVwQ;c1(T&5F+%^lB|>y7J;MqPd<@pnnK9*H%|2%xu^00Y5m#fICR#S|4(> zRIJUW`CLsZ&b4(%ctPkkV3jT2SDlYs>`q7e6JdPhhto~F(SrFkTQEO}W@y_y{v0T>0^Y_@n;A=qwX=ThamFr7-Oen0 z6@3=cb|jG2cEqI>aS_*m&Ae-W*=aAG34Y6SMK`uhHMy zoAy3CqMzZYFKDMdWD0tq)}+~!MkwnClJ+j1{oiq`O1-?=o_63x|at9P6eohK1o}$0cTgazexAlx} zh9+Cp2lO3`V$3cJ_1A{E(7A|~UOR;gzW)SC=FC~vGH>X*K03_1V1Xp=hfiSZ-DCDG z>PT*6*iv`fcR4=T-63D@TlTe$WYTLvOq?}&5WJaONaG<#m5TEkoj)=$5i#v6ZA0pe zbZ!}UHWJtw^DZ5p&b&}Rw9l(kcWeB#0l2`5o~sI1<_7(86R#_K^hMkWyqBy6aQzBK z^kESVb}6xUdbB&P+P0t-&B88=T;92bH0%In2xWU}1yGD+AT@O;E-q?_4(zD?Qva~( zU^ZFZ;Mq16@NF3EA_rC4`I?U{cxv~X+^!ZMO?&u@E}Q(L2d6yQ!WG#Y9M=Td5j#g# z%IYkyu*~(%34vv{A4Vdk8~pK)tG&rwVQ9i9lwLMf8<ZI{w2 zX3)=%;42_>LY5(6mvEn>Da%eFlz*RRWYDIuzM%alkDXeMNk;ZDtb*Dmy1t~>+;|8+ z_ecDqDvtG9+BI-(7l`V$AN6htZU|7k+*CAt<}&l#`@+cE+md0D|K%fPV3Mx5I1(<| z2b4^{y~AO_c}Tc-xHSv{8_a~P4#N8JcBWOd53%cdms-|OgdmyQLcPJmod=TEz@L{& zU0%HN*sbL;AnQ|s*LQo&`QS-9=3Ui=RzoT*1bTxry{5#rhL6K0O$y;nfUg!@A z3gCdqbKl5iWe)R5WD|H;Si63dI9_|WdS@6H{%9Idk&9LjMeB2eg}ac`7{%QlI^ka& z4TeoT*KvmoIL z56T1iakU!c;NldLVw~kQRnIn!eXMaa4=_MB?Doqq(qCtGb4H&Bzq;x35V_pvg=07z zW56U4VJjf@G!LYn=rVqMa2H0za*~0s`G{lFxt(m34gWDc|8xm0#hdA;#>=*t zG+(2?;yYW0JK$a3nKtLe@c{MVuZ8^c6DSLb;1Sg29&T+*@M`R769bmu5v<5z?t+=F zo@BNiML!JV8{1RRdwp9n^a`GA5f^a(#i7?_xp*PRc1;Avj=TSkW5M34tWiGsth$e9 z%8K~Gq~2W$A)q!-<6(se3K{CuoCiQmo4Ebz9+%(FUc7J1Ne_G_$0@tK$ZSgC*Cw;^ ztTQCx(IbA4y5VRZZ}gBBcyI{iS$)U=TfzHU!=GbsVqY zr$hCSRk-)oxa(pp?!m}7WOe=g>OsGw&-Ss$_A|)k*{gyGMHbIQsru7qJ<68;(=#4( z7G^^z+@@!zLE7W}1A_iRwTAXmW zZj3G8UaSiQUMD*;t)Fc;1}-*Mzhk>lZ2|mJxpIeRTYElxyDpUc`t_y6lDn&`PP;HH z-5TBFxs@g__edA1U+-;#Z7}v~3zZmXqhJ0f#rgh@J8hNLn!5Y*WO`Dc3SO#pW~b`q z?kp)<77#TbUbcK=CKtEOs~W(#0VsR>&M(_+3Trj*otOJPKlHz2%cR%Kv8k~(sX%#n zmZg+Bh^0&%1ThNiUk0XBvv&_3#HWXT7;AGzS3xNC5jE#}6#NH*d1l5t$5?Yq-yl~= zwGZ=!2`}c|F5nlO{sKtbsk_5FzkF6k*OsXTUHi$Ah+O8n#cBg1=EyHE$_3c^YhMj) zEY0ds#_PL&qU)(i6E=iFt@q=p3kKY9D*wMq&itRvJb~k*TB2GsTC`LgW7sO$wzM^J zP)83&*QjIa2&1J--B&c1qtO~mmUd;U$f%bZuh!L8MbXeAH7%jl))h%4LrX3~5<;?1 z%*^i2Yk%5*;Q8fwUf<{Y{#;yhbJ#Mx8oviE(8d-jUKQL3?>0uU8>}I`0L_m@O&UG> zpNs2k(d|P8;4uj>uxQrqiGS_TtvEI1sL(q4_cwCkB^A3Q$37xXysdK@8YiR<)d6D%S{ruM{mw$e1WlPB5XT z_Ab*x>TnHZkHXcd7&?Y@LKwk1gPxJaFK)S9Zg-$@N#lyet8{zc!s!3W3+g@oEg-l; z?Ik*8T72rmmm;ci3Gcoa5xhe2$xXb#@6PT2d#l27csQqYQYzVA)$6}Z-#E}?@g@~v z+L-g^5wW9;_+j9-eyiHJU!_y$5oo3M6O}Lty#SY$6^c3~pgC=b8OVZhm=Ht%fZRBz zO#A(ahwS$@l={^-9I>Jnt4C(Ox{$1Os^n~3z3ToDdnrrK*aW`iFh;8hD^nm>a7qSpQ%{O)q5c zAQ?ih?yjcrp-YlTNa&Nz`fOfO#LW^N^g6NeClQUKThH=dh#aH$+8{QC@8rQk%c%s@ zaq!3KYq^F5uy{n(a(y(Rs5;+=%&y9kjC_t=W9BY)-zCw~JW)(RDt|+sDK-$fc6%8l z6`|s%m;h7w;B{Wl-jj$ z3k^9`@hIqTd1M!d6$1yuB-1f%qn<4s@?3p*LXv3qD*;94`2rzQz*D_wKBK0ngyW}& z3o9X8dnGfjSMUGwW%*B(#1{RnjEsQxxD6)u)uF}~r3X!Hc@%)7$Z5Ro+-O#0-=tP^b_LW^Chei% zO!1s(tWdz*Th%ho!8Yd5oQlW-i)LY$qMV_2`xpcuvp^W`6(I z_0b9B=AM3D3R57!{tVk&-1`*`^VuLKsq2N}nlPS&!$}on^cCKCKb`O`VU$|E-??dt z^-fYLuSM44*SF=WVqvpz{d%1FX8tz|OY;@Izm>h39d4CPSp@n72#Nc2>1~51ZX@?2 z09C;Rs4IDw2O*3V9XWfDF}q%;v>wxP@ARvy+~sdn5-aoRcX#Uysxd?9jix3Gc8y7* z?3{c~#uGW1J~c5tlbHaDV*G`BJC4tm(L+zp9KFl55eKGSYdVB2VH1>}Tv~IrX4gHtQR`R2!2H+1#Q)xm}5Tw(e zGg?JAD&-=^(hNxnCVC9wFg^N|Uvdd(V+Qt9)gXMzs1mMwLyyP`Ajl7sU|HJdH@=Od zzjL0S?KfUqYizuP@O_2BQ(%;D*dYDEHLH6TM>K&J+V6LCvyi;=nLHamdFS;L>h>yNH@kGKYbFwWmALrBI41J-WF#90A_C{(GoB3iYE!)9oY` zntpJ)jez7&8OFaFjL1-vMffa*w!9M>e#flt54@0p>}ctsJ>SeMH!|+Xin8noy>8qT z#_8}OhhR2W8{Bo(hRWKOq4NJOj8HcDvA4c~(^Lu?Ba5bBsy+HSl{T^b+_*Qm&kmS~ P{Nk*apJ&sl$h?06H;xJu literal 0 HcmV?d00001 diff --git a/src/main.cpp b/src/main.cpp index 5c95b04e..a05b359e 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -10,7 +10,7 @@ #include #include -constexpr int sizePOT = 1 << 16; // feel free to change the size of array +constexpr int sizePOT = 1 << 26; // feel free to change the size of array constexpr int sizeNPOT = sizePOT - 3; // Non-Power-Of-Two /// If true, run additional simpler tests.