From 2ca234d0543da26221198f584d6fb429f0413f84 Mon Sep 17 00:00:00 2001 From: Rachel Lin Date: Tue, 16 Sep 2025 15:56:20 -0400 Subject: [PATCH 01/17] cpu scan and stream compaction --- stream_compaction/cpu.cu | 63 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 5 deletions(-) diff --git a/stream_compaction/cpu.cu b/stream_compaction/cpu.cu index 719fa115..e0a391ce 100644 --- a/stream_compaction/cpu.cu +++ b/stream_compaction/cpu.cu @@ -19,7 +19,19 @@ namespace StreamCompaction { */ void scan(int n, int *odata, const int *idata) { timer().startCpuTimer(); - // TODO + + // check size of idata to avoid accessing a null reference + if (n == 0) { + return; + } + + // compute exclusive prefix sum (ignore last element) + odata[0] = 0; + + for (int i = 1; i < n; i++) { + odata[i] = odata[i - 1] + idata[i - 1]; + } + timer().endCpuTimer(); } @@ -30,9 +42,20 @@ namespace StreamCompaction { */ int compactWithoutScan(int n, int *odata, const int *idata) { timer().startCpuTimer(); - // TODO + + // number of elements remaining + int numElems = 0; + + // compaction + for (int i = 0; i < n; i++) { + if (idata[i] != 0) { + odata[numElems] = idata[i]; + numElems++; + } + } + timer().endCpuTimer(); - return -1; + return numElems; } /** @@ -42,9 +65,39 @@ namespace StreamCompaction { */ int compactWithScan(int n, int *odata, const int *idata) { timer().startCpuTimer(); - // TODO + + // temporary array to indicate the element should be kept/discarded + int* isValid = new int[n]; + for (int i = 0; i < n; i++) { + isValid[i] = 0; + if (idata[i] != 0) { + isValid[i] = 1; + } + } + + // exclusive prefix sum scan on temp array + // represents the index in odata that i in idata should be mapped to + int* indices = new int[n]; + scan(n, indices, isValid); + + // number of elements remaining + int numElems = 0; + + // scatter + for (int i = 0; i < n; i++) { + if (isValid[i] == 1) { + int idx = indices[i]; + odata[idx] = idata[i]; + numElems++; + } + } + timer().endCpuTimer(); - return -1; + + delete[] isValid; + delete[] indices; + + return numElems; } } } From a29f262e5f5bb129e6ae000b8327847c9e886bdb Mon Sep 17 00:00:00 2001 From: Rachel Lin Date: Tue, 16 Sep 2025 17:43:49 -0400 Subject: [PATCH 02/17] naive gpu scan algorithm --- stream_compaction/cpu.cu | 11 +++++-- stream_compaction/naive.cu | 62 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 69 insertions(+), 4 deletions(-) diff --git a/stream_compaction/cpu.cu b/stream_compaction/cpu.cu index e0a391ce..3d12e7eb 100644 --- a/stream_compaction/cpu.cu +++ b/stream_compaction/cpu.cu @@ -20,7 +20,7 @@ namespace StreamCompaction { void scan(int n, int *odata, const int *idata) { timer().startCpuTimer(); - // check size of idata to avoid accessing a null reference + // check size of idata if (n == 0) { return; } @@ -68,6 +68,7 @@ namespace StreamCompaction { // temporary array to indicate the element should be kept/discarded int* isValid = new int[n]; + for (int i = 0; i < n; i++) { isValid[i] = 0; if (idata[i] != 0) { @@ -78,7 +79,13 @@ namespace StreamCompaction { // exclusive prefix sum scan on temp array // represents the index in odata that i in idata should be mapped to int* indices = new int[n]; - scan(n, indices, isValid); + + // compute exclusive prefix sum (ignore last element) + indices[0] = 0; + + for (int i = 1; i < n; i++) { + indices[i] = indices[i - 1] + isValid[i - 1]; + } // number of elements remaining int numElems = 0; diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index 43088769..7d46747d 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -3,6 +3,13 @@ #include "common.h" #include "naive.h" +/***************** +* Configuration * +*****************/ + +/*! Block size used for CUDA kernel launch. */ +#define blockSize 256 + namespace StreamCompaction { namespace Naive { using StreamCompaction::Common::PerformanceTimer; @@ -11,15 +18,66 @@ namespace StreamCompaction { static PerformanceTimer timer; return timer; } - // TODO: __global__ + + __global__ void naiveParallelScanKernel(int n, int* odata, const int* idata, int k) { + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + + if (index < n) { + int kernelStart = 1 << (k - 1); + + if (index >= kernelStart) { + odata[index] = idata[index] + idata[index - kernelStart]; + } else { + odata[index] = idata[index]; + } + } + } /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ void scan(int n, int *odata, const int *idata) { + // use global memory + // ilog2ceil(n) kernel invocations + // since GPU threads are not guaranteed to run simultaneously, we can't operate on an array in-place on the GPU (it will cause race conditions) + // instead, create two device arrays and swap them every iteration + + // device array buffers + int* dev_arrA; + int* dev_arrB; + + // allocate buffers + cudaMalloc((void**)&dev_arrA, n * sizeof(int)); + checkCUDAError("cudaMalloc dev_arrA failed!"); + cudaMalloc((void**)&dev_arrB, n * sizeof(int)); + checkCUDAError("cudaMalloc dev_arrB failed!"); + + cudaMemcpy(dev_arrA, idata, n * sizeof(int), cudaMemcpyHostToDevice); + + dim3 fullBlocksPerGrid((n + blockSize - 1) / blockSize); + timer().startGpuTimer(); - // TODO + + // for each kernel + for (int k = 1; k <= ilog2ceil(n); k++) { + // call naive scan kernel + naiveParallelScanKernel << > > (n, dev_arrB, dev_arrA, k); + checkCUDAError("naiveParallelScanKernel failed!"); + + // swap array buffers + std::swap(dev_arrA, dev_arrB); + } + cudaDeviceSynchronize(); + timer().endGpuTimer(); + + // copy data back to the CPU side from the GPU + odata[0] = 0; + cudaMemcpy(odata + 1, dev_arrA, (n - 1) * sizeof(int), cudaMemcpyDeviceToHost); + + // cleanup + cudaFree(dev_arrA); + cudaFree(dev_arrB); } } } From f49d000a690b6b7ee5ddbebf2374aaf4c871b6aa Mon Sep 17 00:00:00 2001 From: Rachel Lin Date: Wed, 17 Sep 2025 00:14:31 -0400 Subject: [PATCH 03/17] work-efficient GPU scan and stream compaction, thrust's implementation --- stream_compaction/common.cu | 19 +++- stream_compaction/efficient.cu | 195 ++++++++++++++++++++++++++++++++- stream_compaction/naive.cu | 5 +- stream_compaction/thrust.cu | 26 ++++- 4 files changed, 235 insertions(+), 10 deletions(-) diff --git a/stream_compaction/common.cu b/stream_compaction/common.cu index 2ed6d630..0a4b8914 100644 --- a/stream_compaction/common.cu +++ b/stream_compaction/common.cu @@ -23,7 +23,15 @@ namespace StreamCompaction { * which map to 0 will be removed, and elements which map to 1 will be kept. */ __global__ void kernMapToBoolean(int n, int *bools, const int *idata) { - // TODO + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + + if (index < n) { + if (idata[index] == 0) { + bools[index] = 0; + } else { + bools[index] = 1; + } + } } /** @@ -32,7 +40,14 @@ namespace StreamCompaction { */ __global__ void kernScatter(int n, int *odata, const int *idata, const int *bools, const int *indices) { - // TODO + + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + + if (index < n) { + if (bools[index] == 1) { + odata[indices[index]] = idata[index]; + } + } } } diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index 2db346ee..597bbf61 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -3,6 +3,14 @@ #include "common.h" #include "efficient.h" +/***************** +* Configuration * +*****************/ + +/*! Block size used for CUDA kernel launch. */ +#define blockSize 256 + + namespace StreamCompaction { namespace Efficient { using StreamCompaction::Common::PerformanceTimer; @@ -12,13 +20,146 @@ namespace StreamCompaction { return timer; } + + + // up-sweep + __global__ void kernUpsweep(int n, int d, int* idata) { + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + int stride = 1 << (d + 1); // 2^(d+1) + + if (index < n) { + int idx1 = index * stride + stride - 1;; + int idx2 = idx1 - (stride >> 1); + idata[idx1] += idata[idx2]; + } + } + + // down-sweep + __global__ void kernDownsweep(int n, int d, int* idata) { + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + int stride = 1 << (d + 1); // 2^(d+1) + + if (index < n) { + int idx1 = index * stride + stride - 1; + int idx2 = idx1 - (stride >> 1); + + int t = idata[idx2]; + idata[idx2] = idata[idx1]; + idata[idx1] += t; + } + } + /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ void scan(int n, int *odata, const int *idata) { + + // set new n + int newN = 1 << ilog2ceil(n); + + // device array buffer + int* dev_arr; + + // allocate buffers + cudaMalloc((void**)&dev_arr, newN * sizeof(int)); + checkCUDAError("cudaMalloc dev_arr failed!"); + + cudaMemset(dev_arr, 0, newN * sizeof(int)); + cudaMemcpy(dev_arr, idata, n * sizeof(int), cudaMemcpyHostToDevice); + + + timer().startGpuTimer(); - // TODO + + // upsweep + for (int d = 0; d < ilog2ceil(n); d++) { + + int numNodes = newN >> (d + 1); + + dim3 fullBlocksPerGrid((numNodes + blockSize - 1) / blockSize); + + kernUpsweep << > > (newN, d, dev_arr); + + checkCUDAError("kernUpsweep failed!"); + cudaDeviceSynchronize(); + } + + // downsweep + cudaMemset(dev_arr + (newN - 1), 0, sizeof(int)); + for (int d = ilog2ceil(n) - 1; d >= 0; d--) { + + int numNodes = newN >> (d + 1); + + dim3 fullBlocksPerGrid((numNodes + blockSize - 1) / blockSize); + + kernDownsweep << > > (newN, d, dev_arr); + + checkCUDAError("kernDownsweep failed!"); + cudaDeviceSynchronize(); + } + cudaDeviceSynchronize(); + timer().endGpuTimer(); + + // copy data back to the CPU side from the GPU + cudaMemcpy(odata, dev_arr, n * sizeof(int), cudaMemcpyDeviceToHost); + + // cleanup + cudaFree(dev_arr); + } + + + /** + * Performs prefix-sum (aka scan) on idata, storing the result into odata (device only). + */ + void compactScan(int n, int* dev_odata, const int* dev_idata) { + + // set new n + int newN = 1 << ilog2ceil(n); + + // device array buffer + int* dev_arr; + + // allocate buffers + cudaMalloc((void**)&dev_arr, newN * sizeof(int)); + checkCUDAError("cudaMalloc dev_arr failed!"); + + cudaMemset(dev_arr, 0, newN * sizeof(int)); + cudaMemcpy(dev_arr, dev_idata, n * sizeof(int), cudaMemcpyDeviceToDevice); + + + // upsweep + for (int d = 0; d < ilog2ceil(n); d++) { + int numNodes = newN / (1 << (d + 1)); + + dim3 fullBlocksPerGrid((numNodes + blockSize - 1) / blockSize); + + kernUpsweep << > > (numNodes, d, dev_arr); + + checkCUDAError("kernUpsweep failed!"); + cudaDeviceSynchronize(); + } + + cudaMemset(dev_arr + (newN - 1), 0, sizeof(int)); + + // downsweep + for (int d = ilog2ceil(n) - 1; d >= 0; d--) { + int numNodes = newN / (1 << (d + 1)); + + dim3 fullBlocksPerGrid((numNodes + blockSize - 1) / blockSize); + + kernDownsweep << > > (numNodes, d, dev_arr); + + checkCUDAError("kernDownsweep failed!"); + cudaDeviceSynchronize(); + } + cudaDeviceSynchronize(); + + // copy data back to the CPU side from the GPU + cudaMemcpy(dev_odata, dev_arr, n * sizeof(int), cudaMemcpyDeviceToDevice); + + // cleanup + cudaFree(dev_arr); } /** @@ -31,10 +172,58 @@ namespace StreamCompaction { * @returns The number of elements remaining after compaction. */ int compact(int n, int *odata, const int *idata) { + int newN = 1 << ilog2ceil(n); + + // allocate buffers + int* dev_idata; + int* dev_odata; + int* dev_isValid; + int* dev_indices; + + cudaMalloc((void**)&dev_idata, n * sizeof(int)); + checkCUDAError("cudaMalloc dev_idata failed!"); + cudaMalloc((void**)&dev_odata, n * sizeof(int)); + checkCUDAError("cudaMalloc dev_odata failed!"); + cudaMalloc((void**)&dev_isValid, n * sizeof(int)); + checkCUDAError("cudaMalloc dev_isValid failed!"); + cudaMalloc((void**)&dev_indices, n * sizeof(int)); + checkCUDAError("cudaMalloc dev_indices failed!"); + + cudaMemcpy(dev_idata, idata, n * sizeof(int), cudaMemcpyHostToDevice); + + dim3 fullBlocksPerGrid((n + blockSize - 1) / blockSize); + + timer().startGpuTimer(); - // TODO + + // temporary array to indicate the element should be kept/discarded + StreamCompaction::Common::kernMapToBoolean << > > (n, dev_isValid, dev_idata); + + // exclusive scan on temporary array + compactScan(n, dev_indices, dev_isValid); + + // scatter + StreamCompaction::Common::kernScatter << > > (n, dev_odata, dev_idata, dev_isValid, dev_indices); + cudaDeviceSynchronize(); + timer().endGpuTimer(); - return -1; + + + // copy data from dev_odata to odata + int lastValid = 0; + cudaMemcpy(&lastValid, dev_isValid + n - 1, sizeof(int), cudaMemcpyDeviceToHost); + int lastIdx = 0; + cudaMemcpy(&lastIdx, dev_indices + n - 1, sizeof(int), cudaMemcpyDeviceToHost); + int numElems = lastValid + lastIdx; + cudaMemcpy(odata, dev_odata, numElems * sizeof(int), cudaMemcpyDeviceToHost); + + // cleanup + cudaFree(dev_idata); + cudaFree(dev_odata); + cudaFree(dev_indices); + cudaFree(dev_isValid); + + return numElems; } } } diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index 7d46747d..17488bca 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -19,10 +19,11 @@ namespace StreamCompaction { return timer; } - __global__ void naiveParallelScanKernel(int n, int* odata, const int* idata, int k) { + __global__ void kernNaiveParallelScan(int n, int* odata, const int* idata, int k) { int index = (blockIdx.x * blockDim.x) + threadIdx.x; if (index < n) { + // 2^(d-1) int kernelStart = 1 << (k - 1); if (index >= kernelStart) { @@ -61,7 +62,7 @@ namespace StreamCompaction { // for each kernel for (int k = 1; k <= ilog2ceil(n); k++) { // call naive scan kernel - naiveParallelScanKernel << > > (n, dev_arrB, dev_arrA, k); + kernNaiveParallelScan << > > (n, dev_arrB, dev_arrA, k); checkCUDAError("naiveParallelScanKernel failed!"); // swap array buffers diff --git a/stream_compaction/thrust.cu b/stream_compaction/thrust.cu index 1def45e7..47a59633 100644 --- a/stream_compaction/thrust.cu +++ b/stream_compaction/thrust.cu @@ -18,11 +18,31 @@ namespace StreamCompaction { * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ void scan(int n, int *odata, const int *idata) { - timer().startGpuTimer(); - // TODO use `thrust::exclusive_scan` + + // use `thrust::exclusive_scan` // example: for device_vectors dv_in and dv_out: // thrust::exclusive_scan(dv_in.begin(), dv_in.end(), dv_out.begin()); - timer().endGpuTimer(); + + if (n > 0) { + // copy input from host pointer to host vector + thrust::host_vector host_iData(idata, idata + n); + + // cast as device vector + thrust::device_vector dev_iData = host_iData; + + // device output vector + thrust::device_vector dev_oData(n); + + timer().startGpuTimer(); + + // perform exclusive scan on GPU + thrust::exclusive_scan(dev_iData.begin(), dev_iData.end(), dev_oData.begin()); + + timer().endGpuTimer(); + + // copy result to host output vector + thrust::copy(dev_oData.begin(), dev_oData.end(), odata); + } } } } From bab70c6da54da198c58fc60791779867bd378287 Mon Sep 17 00:00:00 2001 From: Rachel Lin <43388455+RachelDLin@users.noreply.github.com> Date: Wed, 17 Sep 2025 00:26:46 -0400 Subject: [PATCH 04/17] Update README.md --- README.md | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 0e38ddb1..55dde77f 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,19 @@ CUDA Stream Compaction ====================== -**University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 2** +**University of Pennsylvania, CIS 565: GPU Programming and Architecture, +Project 2 - Stream Compaction** -* (TODO) YOUR NAME HERE - * (TODO) [LinkedIn](), [personal website](), [twitter](), etc. -* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) +* Rachel Lin -### (TODO: Your README) + * [LinkedIn](https://www.linkedin.com/in/rachel-lin-452834213/) + * [personal website](https://www.artstation.com/rachellin4) + * [Instagram](https://www.instagram.com/lotus_crescent/) -Include analysis, etc. (Remember, this is public, so don't put -anything here that you don't want to share with the world.) +* Tested on: (TODO) Windows 11, 12th Gen Intel(R) Core(TM) i7-12700H @ 2.30GHz, NVIDIA GeForce RTX 3080 Laptop GPU (16 GB) +* Features: + * stream compaction on the CPU and GPU + * exclusive prefix sum on the CPU and GPU + * work-efficient algorithm that avoids race conditions + * scanning using thrust library From 6bf9ea8ce5dca23e1f80ac1e89be0115135ccba8 Mon Sep 17 00:00:00 2001 From: Rachel Lin <43388455+RachelDLin@users.noreply.github.com> Date: Wed, 17 Sep 2025 00:27:35 -0400 Subject: [PATCH 05/17] Update README.md --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 55dde77f..0673c15f 100644 --- a/README.md +++ b/README.md @@ -17,3 +17,8 @@ Project 2 - Stream Compaction** * exclusive prefix sum on the CPU and GPU * work-efficient algorithm that avoids race conditions * scanning using thrust library + +# Description + + +# Performance Analysis From 334b4b91b476dbd3b58707134591ebd32377eed4 Mon Sep 17 00:00:00 2001 From: Rachel Lin <43388455+RachelDLin@users.noreply.github.com> Date: Wed, 17 Sep 2025 00:28:07 -0400 Subject: [PATCH 06/17] Update README.md --- README.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 0673c15f..d70db83e 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,7 @@ CUDA Stream Compaction ====================== -**University of Pennsylvania, CIS 565: GPU Programming and Architecture, -Project 2 - Stream Compaction** +**University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 2** * Rachel Lin @@ -18,7 +17,7 @@ Project 2 - Stream Compaction** * work-efficient algorithm that avoids race conditions * scanning using thrust library -# Description +## Description -# Performance Analysis +## Performance Analysis From 4f00201bd30b4193a368c392fa87f9acfc6af658 Mon Sep 17 00:00:00 2001 From: Rachel Lin <43388455+RachelDLin@users.noreply.github.com> Date: Wed, 17 Sep 2025 23:02:20 -0400 Subject: [PATCH 07/17] Update README.md --- README.md | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 71 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index d70db83e..f86dec77 100644 --- a/README.md +++ b/README.md @@ -11,13 +11,79 @@ CUDA Stream Compaction * Tested on: (TODO) Windows 11, 12th Gen Intel(R) Core(TM) i7-12700H @ 2.30GHz, NVIDIA GeForce RTX 3080 Laptop GPU (16 GB) -* Features: - * stream compaction on the CPU and GPU - * exclusive prefix sum on the CPU and GPU - * work-efficient algorithm that avoids race conditions - * scanning using thrust library ## Description +This project offers parallel scan and stream compaction algorithms in CUDA. Features include: + * stream compaction to remove unwanted elements (zeros) from an input data array and scatter the valid elements into a compacted output buffer + * exclusive (prefix sum) scanning + * on the CPU using a simple for-loop + * on the GPU using a naive algorithm + * on the GPU using a work-efficient algorithm that avoids race conditions + * on the GPU using thrust library ## Performance Analysis + +### Comparison of GPU Scan Implementations + + + +### Performance Bottlenecks + + + +### Example Output for Array Size 256 +``` +**************** +** SCAN TESTS ** +**************** + [ 24 2 28 23 36 34 30 21 22 40 10 0 17 ... 20 0 ] +==== cpu scan, power-of-two ==== + elapsed time: 0.0005ms (std::chrono Measured) + [ 0 24 26 54 77 113 147 177 198 220 260 270 270 ... 6029 6049 ] +==== cpu scan, non-power-of-two ==== + elapsed time: 0.0005ms (std::chrono Measured) + [ 0 24 26 54 77 113 147 177 198 220 260 270 270 ... 5891 5934 ] + passed +==== naive scan, power-of-two ==== + elapsed time: 0.823296ms (CUDA Measured) + passed +==== naive scan, non-power-of-two ==== + elapsed time: 0.13824ms (CUDA Measured) + passed +==== work-efficient scan, power-of-two ==== + elapsed time: 0.635904ms (CUDA Measured) + passed +==== work-efficient scan, non-power-of-two ==== + elapsed time: 0.311296ms (CUDA Measured) + passed +==== thrust scan, power-of-two ==== + elapsed time: 0.203776ms (CUDA Measured) + passed +==== thrust scan, non-power-of-two ==== + elapsed time: 0.094208ms (CUDA Measured) + passed + +***************************** +** STREAM COMPACTION TESTS ** +***************************** + [ 0 2 0 3 0 2 2 1 0 0 2 2 1 ... 2 0 ] +==== cpu compact without scan, power-of-two ==== + elapsed time: 0.0008ms (std::chrono Measured) + [ 2 3 2 2 1 2 2 1 3 1 2 3 3 ... 2 2 ] + passed +==== cpu compact without scan, non-power-of-two ==== + elapsed time: 0.0004ms (std::chrono Measured) + [ 2 3 2 2 1 2 2 1 3 1 2 3 3 ... 1 3 ] + passed +==== cpu compact with scan ==== + elapsed time: 0.0048ms (std::chrono Measured) + [ 2 3 2 2 1 2 2 1 3 1 2 3 3 ... 2 2 ] + passed +==== work-efficient compact, power-of-two ==== + elapsed time: 0.538624ms (CUDA Measured) + passed +==== work-efficient compact, non-power-of-two ==== + elapsed time: 0.223232ms (CUDA Measured) + passed +``` From eaf38a8e4c7f1004f234b2f3ecbe2052f4fccd38 Mon Sep 17 00:00:00 2001 From: Rachel Lin Date: Wed, 17 Sep 2025 23:02:40 -0400 Subject: [PATCH 08/17] plots --- ...Time vs. Array Size (Non-Power of Two).png | Bin 0 -> 22489 bytes ...can Time vs. Array Size (Power of Two).png | Bin 0 -> 22341 bytes src/main.cpp | 10 +++++++ stream_compaction/efficient.cu | 25 ++++++++++-------- stream_compaction/thrust.cu | 4 +-- 5 files changed, 26 insertions(+), 13 deletions(-) create mode 100644 img/Scan Time vs. Array Size (Non-Power of Two).png create mode 100644 img/Scan Time vs. Array Size (Power of Two).png diff --git a/img/Scan Time vs. Array Size (Non-Power of Two).png b/img/Scan Time vs. Array Size (Non-Power of Two).png new file mode 100644 index 0000000000000000000000000000000000000000..fe58597e53041e6e8af4653c4bf533c87d7b1757 GIT binary patch literal 22489 zcmd43WmuGJ7e8vV0RaUGK|tw7KpF;>?k-6|Kw7#6MhTG+q@|^qp;4quy1S&LJ7$O( za?UgQZa2RB{hv?gT<6S}xrXPCb+3EHZ^c89ijp)Q4mr-HOPBCuWhB%tUAm%k>C$DV zzit43nO}1Ia_N%xCs~On8t#T$(^wzxNFIyrdhLg{Nmi<(J>60q16S6RCHr1^nm?9^ zy#H4HG7(Gmkb3iLrbr1@RaI^#K?_yK4@q>`gaKyPT}k=6w|lq!k@fa?y!*qMDe>DA z>svc39_thFg((v$KGxZ@&_d|>-OIq%ud{rzIe_m=ue9*Dp7-|4cQUxKd|Pp^Bumv0NBTq`J_NuO&998wlL%LFyY z;Nar6oUcw*^36sTTD6)57UAMl!J1euB7IGn$mM!zvcAFG<_2W|Q;dg6i)b89accr31 zm2+BYDR1T2D5VI2bs>{`tvHNL7hx8;oG+nG5*G@EyM!c!Vc*&F^l{ zb3)WzxEafK9^b$J)$$Ytzuom7Yv2Xe!ulO+3KZ|s%8Giu&zZ#Z>OcnV`gE>(j*^Md z>24FjBJ!kTWe=LyOl(c9P3kkdzcwV@OxCbwBU@aNm@r#>`XE=xefLxD0*4Rdz?yiw zEJ}ksb#QKoBr7Xxo^y6(Nogtbu-0aRyF|3cK&n)<{$#tFhN`l%^7cWi3rs!T_Q#N) zKJswVK2Nid7ODP_Od~H+?o0UR_mdCoam)2C<$UIClv{dqKU^`cq7Z&QU&@8g~7*$uth- zFYgoi?McEZgq&YCFOa3yEmtWf@cF7vd*S?PIr!wpyb$T^=4yS;XMFQ+0Y}@90%ITMqAamAb>{-?e4?p$Wvskh2iZZTsme%_;O^lyWq0 z_zsVc_bGBI&IrGaS2cOUwNcjJK`%6xO_yffyf+D|B?+*t%T{%#jg@T(#S2rKs#{A4g;~ftA6duOZYCNcO8C)?q~C$5 zwLIWUG4^!l%il~NSH!)k@5x0y?%ja%Q*4*btIHzvKEVBC4)47}av>FV(Q{M}3|mDd zq!5rjR8&&ZJ^1BDQSzG5dWUQ|kdcU0ZOv?+HYYrx#;^!=*xOYR_YUIS+I8g8P8T{B zGn%hU8|#qXj72zOe=79iTdCtx=WbO=d$X))MW^i#L)O+DZ4`*n`&7)`YtfQEOpgHv z2d8=OmCcKy>*%GEnxu{Qi=c&%OG``fg*=lS0&jkpO=YB$3%vT28!}=IlLOdzFhAGU z?qF4FYfcVnHO+H3n20idF~zW5yI^R#Ez4_HUo~B7unSs}h(|WXvg>_=N85Iy#(y&P zVHkX-)~m^=GjyS=+m05=vgd88415j~@;pWD4GH?NLc!#2T8 zv3)jwfYM8gIE4tiZQtZvn0aWJouc-Tb|CPU`o@y?xk>~oI#R#aPnBMYX=H2O-mOOA zEFfAWhIk`7>^5~j>L@e^`^Zlv#W$w@-olIU)V}V^!kSJiy~KS5=P$tD{IN|0$0%C9 zTbQq#1dVW1C_0AYaPm;CqJFfW(8c(3#~iY6n9N-b+w58Nf8%vYCmU`7778AF&#R>{ z%>W8$LcYhYtO~oQQFtDmLJ(P#WEA*QLzO|wHQ!E7+9PP^4tne|?$y(KIZn9EdXpLk zqR&wa;d4SD+m8W%yKE>;u`vNrO?p{Pr( zIc60(Qa*cJDPbRG&AY=a=L~Q7G$HmJlvVNx{;CZ13ZnxtwLTjQeG!gfic$)l(yK+s z1%27prN}8pIL;U&t@2s>kbkx779sLT`xPPNB(ATc;~QY$TF&3|gJ_7k`C9$h7Jdnh zYiB1S_$e&GWBf@>Iuq=XcAUEW0i{%gHM zBma=nS{QYysWzl0iAe8YIGq?C*LT}GNj~59S-nrKPHT`yzRThsw4;2J{n4QUH2i&; zzPDlt6_K?r4b&#%^zw~x??un;o*2<(k-`wKJMkg~buS0fcTLrU!u3h=0IS@&VT^!e z(oUEbj4PWPVMPU1#Pi{Q;^`vFcc0av1ATZU3K@z+8Mk#Uu0^=lq2_T*sOl?^db$_x zFLEwdG?VMVg3CqXF_%7a0_BK1X|^dcWOJ+`qHaGD4~b&f&U$z7q_5+K+gbQs9)_vC z2wBqF(_%%f&|0Z=&_g+mIJxSFg?%}QiZEro1X0nCN+(+7$zBGihUTsc{`;e(xUBP> zQOi1_jO%=|2s*!a-=pqOa`G!Frh)iI9X<5e;iLStF}8~{X4@qTuCx#? ze!D3FRYpjAL#@YD)Hf_IQ#Q3AWmwUhEQFAb5L#UBa9rU~9aiM$;I&kxQ>%AL$;88# z9>Sb~Ul{wM?=AfGXt}*nNw)x*HvR{`u*S%RfY5rj#XU6Q>NgZS^Gu*r%~A)QSJJbh z8D*Esd$Yw>BP>nNpi#A2j>RnKr;K-%oIMQARTKm7ZM5{I_14pM3}LtWD^Fud1+A-D zk}4c>J8(bnvK8Rsp-n!topLQ)k%-&b(|N>u6qWz3{@%6Tj%=2h^TnYK@Q3JH>sFUV zKInqw;^SqVg)j|xY+Ka&G1P`Os^jaa<~<{q*BzFBxeHxC&r=FZlmn}IAiD1N;nB?M&2OR!ULnS4lO8(M!Y}bc&cP+2Z9Th} z1o8&)s~#&iohT&h+{Ybr?y!8 zJQoAG{xBEoPAxJ;rz)bRVDIB}PLM`Sc1g0J$gNTFHD3L!cP?2y?igm0N;wq}yyuD*z zM*1iQm3a)PSP^Y9`a!&hP1P^M{IOkUZ{)H-5IZ)>f+6;K898+gN`naEv~pF1dfcQI zDiARb7dvh2-PnVY9o+vm>w`)ZV>CQ9U*_jvyo^i2cFSHSrAsClnReUWgS&$&XEYBv zq}#jTQNif%C0UUZAojyOwN-eFBs_`Z!DtdssB)Bpje9C${6Sr^&32|c>S}Ci;6kp? zI1F_FuR{0<4(u_)xI=H;sME=Q1DQ3k}9sw#bMid1LAtMW@OnI~c& zC)%>BQ`I~uLnm30k8==NSdXQ@((7CjiZU%iMF+-Dj3)|V&8!h`eJZR%>f-{gSTb>7Q_Y_Ettg#2+CsY^q&p%ZlKeQoiZXF+sUu&>7ZEmE|-Q$K9lCZTuRn#`KnX4*GKZCfoKkAHkHft zt-CFcMK1ttj#&#UXuz{p~?Hc&$m!j`|?kg$C79Q+#`;=A*L9trYeeq?S*n zjn?Zv&zCV~Y}CC;xQ@tFzaauQqQ15&EDwfhe7!b|Rl`ZgKI$ zQ~ercOt!$P|I-sx-7w&8)(5lXQFW^6uHRfrg7c#)dS<|TJ5H#*HR#@pE3 zRQp3_(!QFis<%PLhd@H6?i3;Kkcf8O7*(8FHWV}rH}F0fnKNYF?%@vWnfH#2iz9V_ z{AhUN+{Iq*PfZT8*My{B5DGFOw4OAbR?>E%*RdkVfVgHqSs*fo1org%Kr zZASL`_3QJ_x6x*&$hQ=Bx3*qGhVR2SB8wut9t|7bHsvIq-RV#Bllg{!PvDjzLx@AI z#C(BuP0wWGWCnL(6cfWbU-OCB8m#ad>x_3@L2GBbdn(Zwu$BBKr;joOIqk+ZcC{Fn zkga&6Su@5N&6tIexK?##QDWX3A#DuP&O?fGC%{2G8cqOXL=(F;ULp==vVhslzhprv zs}BW?@r!#aijY_zA(n1;0SO7Eoq9B=Cy`${>6sdV3L(sS|jGXs^Ym_ zxk9rJuo^nQ<+iYeKB-3&{c%3>TJIaY@#mTU5jR*DVJ*Xgw^HGVXKRku(P!Vb71@%} ztHtDi+sOoyBtd?QNrFzfH8uJQ6Ad~3t)X{`3>hZ^R{=}V(io3L?xfsHAa?o~Fe=9G z5A(x@$C~RI7LAC>P_e<`65@Zea*kl>qXt z8uu~nz8fyQuq3iAH9yAdx~apZ71U;Ymx$3Nwew`^b8`;12c$V2q(p3APhs!mD`GSL zFoG^G=R8w_OSQbwJ=LoXpBiM7I}%=XRubDX>85Zl6s3F_;ln+;7gu08lpVCFf$z=_ zQbKxP?G3z!d(Zm(^vL;St9cI-rIjtumTJOD@P38_vh)umqCzGa}2-}QED%!Y40mX@Y zIqC8G{5q+=@r7gH`=U=~(HuVc&+}KP1*TjlZPN`IOd^R6@dT*fv=>hvo#=Pkyxx$<5O%=ST)4Iy8z5MW2;f#mq%uvf|6Y;Hu5iu9xi}9%yK2NKkw4 zN63_B^4We@=}QrX78idIa@~|E&@1N?Aq0juh-lO6NYT^Nr!&gW&*atAh&XrwF3x#C zk~Clgv!2>q=e4c{c>hLGy;4sCFD_@EW**5A800XyaWJ@+lS);16&NKGutAtVKW`$m z5X!Tf8KsmCRnK`Fm+C$rG&k93{6R@z&VSb%824JEf}=4~Dx6AjCB@4Q<+Js@3|d;M zHnmK}wmlHc$j=`KB$)hse}2^baHwYs=D%MHX){iC{cbsTLTUu&T3TFuH+gcnm4VAy z^H3k_!4>{kw>nThwO{q&%I}!1^)9@W+Sr^06wClGoo;i>27rpB=t30B}vN^;n?^ zSZk#>844hvx6)Uf)S6sbn`P|Zg`eM|trcVj70MNB73#~z8G16G&6C!Dx`9XDe|~n- z|=-Vv-eO^s$lF8pJ4FM=h zxL)Q8$x}-dw|l zg&P3V{C-apo+rv@LHg(iV;)fgj{+_{`ux1eh)x>`c?Q$^MU6B-%|AyEL+z)`5o3?` zOA=dYN_{YXW9jkb|OuRof*07U<@!8)o&=&L@RPL#GbO!KZdz-Qg*;zrNk!tt3KBrW>H zs=tX=?m06$w*MIb6(6xu@Qq*nXOC8kd6?XQ*THdxV_7HR{~EB;@xY9x@!5W@azW`e zLbiuJop13DABXP0MXds~CThYb5=pBbDSbxDLkEjSpIyb!so8YnslnoBm_@cM@n!KnQtlB!3&yDg)A-e&%oP12wH)&AZj))L&w;yYa7#ytHj z#(0UU>@yk%*ysKbun!>5zmB4p4L|ZUT^$tY^M2kpr7Z<8oW_i>wv&NtHPrw z#rl1-YpsEtK~30NXb)nm7+Z|=t?}8kQ~lQdETU8#YSkg6cwFmTh=>IMJ07wp1DpI7 z@N(z*&&KjExAPbSqoJB#p91H|8Y+HJ{m(g+z=n5;5>Bi-$-Ow=XZ()=R$zp@uSR`f zAo=0Gii2*fR5>+90OZS_iy!{JJoXV(s4i8)39PGmv-=l2{x#ss7B#TXQP}@sKn%)8 z^cJwfNMP1t-%L1G6Xv+kM>e@szjZ;L9z^Y<`G7FLZ^hhxo0S&BFcv%t7}jrf>~0NQ zsk3sm+0L5|%u727{$TtBr<>0v7xa4#9*WfUgp*D{LO+^+eJ+0RGZ1w$ zx(ifjhyH64QEF4mld6L1K)ao#w0Xp4@2@lR3>Bw&{$@gt0$9q2BH?6xt79VdKU-u2 zTl}L0@feu^ZbP%WRP2{b^e8mFUH|90Uce{5^l?V!c^${9eLC*N$HB<&3*d+t^Y_a_ zN4Df3Zx+cE-(b&8sVoY`aH)fwPXl9Yez^qtU}X>bf#h2GwNcnL)|^kC%XQqCd!@G; z|Acj6RNv;#Qzs))q-5nYO?J#OLF9a^mw(G%(EQ$MTv}yO9Zc?v^J~nF4;Mcn_|3-< z($pl{%MMl_lyApsYV55ZnqroK155m4nBenGe%~vR!B~|WR{(yK6g~F?{gyluR*(p# z_(okCuCV=gquT%Q-GtYsANTiJOqnF#;TCyJ@+&Gy-RlM%1g@+OZajpjWP10lh()Z#gV!w&tX|<47dQ7KX0Aizu92nhY!~ygW zk6HxdW&dMfxj=|-y;qmD(E-V(7+*T)_tV?{^8ZnA=7PV}7EDQ9XNnsC_kj3H!y|e;LVYl(86Z$0=9Vih|wnr6b8}B{! z#|23M1O+*noP7xta5aG3!gRG;4uFQsG|w%C*1f2 z9a`ZNP6CfAYCF8L7Due@2L1vdNT{5={2FOJN)iP^8Ulr~ph+*}c2)>S6%zn=P(Ob9 z?_UBe07~t}HW)whE84Ftc48}3EC$fAw6>v#8Oo>NSy}rEt_!bct)bA_{!o+4EnfmE z3`Y8eL~L7NWyAA+{Q=44=**QLt{q>m0pKqQKs*w76nvSp=yRlr%@9!WRRk*8*=gc| zYLb!%=lP|2<#vxuTSIrxh?GURz`|rV2eBEGZq*U|N!X3PagNBmHmo(5=PNouMV>)1 zK?0Lx@6sug?VQmY5hFVSZhdtJH|DDhg#s3^Y6dl|Y8+b|B%RzPH>n z((-jd-7j1O@lGZ^lmp27bar%L7dg!ZkV*|-t(y?M{I_1z`LNs^ESMG@&W#HcF|6g- zycfcb9b4$0@K)+QdC=u;7(S(t3eZQe>34{%&(DGa>GN$d7+{XY&11@zVjV?iR2I|hRO0)RP6rk#MYD~@Mr zS$2(H3n7B~oE;AsxJ^s)PrdTf;q%;oS#^r&is5a-GV?`)+>V3S0z^G!KR-Viv{Yo`mqNGa~3G6FwNo(Ai&i14vrq&~3%hgR9E}q8u z`^=ng?hlB1i~CL1pA78eqJTW+(4Wt{-288pm!F9|a;lD+H?{U{;i=kVe(GX%ZDIef zANQ23#_fLF{kXB#fE(rEdbg_vgE3>X)svkJ-yHn09(7=d-`{Tdx#i1cSH_k$fCrr0 z`f6(F95(_TFUAGl;F3(WeM6UIF!(0Q3K`$!k*{i>4;K+1bg!U^o+fXL>B z(4UcwW>J8iH%+CdvoXNV7=ZEez9TSA*3O}b(`^c1QhWO(mru9 z^4gUD4F+JK6U0?#YfZ-y0MN#0HB*&uWunF-jY|*+N3QnTCw@2ffxzBSpKqm`r!N!G z>w$(hS0zOSLjGV}s*lrCzsZyCU_gz5P!K34sO((~=u{7OkKZL6U;~=z@bHsXATS0K z9{fQFm682TJUfV|&Sv9QWYjzc++B<@Gu9h}h#7T~UeeEJ{!7Z4PEgVJ8MVQ|AC!q1 z{^#?V)HPykt@=tJe7f^f2=a1s-x+$Y(F*}N@-?7Og`}&n_VC&FDwn)c27xyhPiLm> z;PHXHb|X!%+KXvL=JY3dfFh05csP;sTHU#R<<0Jr!|}le#(8A@YxWq5I3^vr9>cYq zZRe;4woxqhImj~#SgDT0CYdeL%!_s4ef8HxPIOYG$nC%J0d&60O~T0SA;~Wd^D5@E zn_@C|Sh(vE~%h=Sd90GtF8vIPIah#}J+TrJpK-b$E;EuAy z&_{jFoygmE7cA3X^FNt8{IQ9^E4uqP zzV2N@&T~giYcJ;i`$B|4i4;}a#yfZ6MZ#nuKMfDB15`n_O@V|z(VFhHH ze=Ha5aB(ns^^*tYeJPm6yvCv%)_<_ObS(QU4SM&Pw~k!hFaULxg2M~2M}buN8j$u4 zCx>7(dhw!0A9{K@u2%VplhW638?#}C$14G;7)<+}NxJGQ&>u(Zn@hhxvNM-#b1xrM&#PCu5H0am${Hly z)+1!|z)=xpf2Z!>Ykn~+qFc1E>KI}CUQZROmDL@FLe@D8t-&Uz~G~DBIRT)dA)l_w~TqngYCACv!1_M+pn4ZfllWxo`$1S9t2<5)(QRiT$7v7N2i5ZcP1~6QPD$ z%$`Mtl!+6=H>I?i8PiufvloMbq0p<}O^IJov6AzRB{BI~y^?TdNwi}=oqA-=93 z5vx+Mz(A>575w&|e#c@k)|bg7eJ(xj2Xs2T>K7w;CS+UX925pJuW!e0vVIY9Q{Z=h?oy`-8JB-zH+2R2dFgu$Uxr^z4F4Cu6XYGTV4 zgE0ZBn(g=p@4r25{DceBqmtF+otVhB#G~ePm*rEhQLI22SwTJv zh(`a;`gAIkLxDX`ddkdARAVMk8g@G39`EgMb!a;D@dx=%C>8=8%MW{OfcW(9NGZB5 z73UWQ+28M;E9f z=#;dOr?uMlq2=490wv8M;cgvZp{d>PDvl#X;m-`%0pD=Eza|c}DnA0WU8dj)R`6Ky z6Y9U&3uOi3v(KQku6dh|k|JbqOK`Z``IvJ>BQH%Q;^wJNK_MT2S=K99LmM5x>Hz!t z{=KhI%1&@p9y+YJ92pFSGo#HJC<`GjOqJxLV(Eod4~8xe4Zp=Z{yPUD_kK)zc)iJ# z)MdsNy<`yzXHEr0;LnoC70OB9?)Uy!4Ae$1)!jm zp_!)z&8wigu=ooqBa7XcaA5t}b7RO3@d7G3jGTp#b-R`&k?)CtB*j;AA&V4`Y?)FP zR@N3y9H3K41}O48^xkd6T8HVDc%Qgpz_=-cyY2TcX(^&!YR{iPaM>J>cvfy#P&n^- zF(&lj89R}1JXmeu?i}_1OSr23u3X`|k>b+loV<}|LA{UIoR8OXqQ)o^Ugb-MkUX*; zWfSt;XF6L`hNz8p<13+V5K!IYY5cEB0JHU_i^su78cw5SbCFhaTB*-$Ln3{s{Uh^k zjJ-3$U*`}SD(=_RHjpG@@UPmba@~@{G?D|o$RRJXh3<*CtDL@%F$R!f2)Is#b}sWn zl=wUU3&B)8tydA%-46jXdB(4UXLo?>B0mqQktc;&zhuU&Tx!Kpdk?=nCi_q^h}pJjykuceL$&#XpPa%GC&c8^nO z+MDTCTM=)0z?mcTDXY$ezKg7P<+|jvfx+c+&e?NUN(kAgLiwCan?@lK}|iK&0T zGn*h`ZCnS+sbq|;V@jJq^M9G`q?SR;AAt>1Nl@O?Jws7YdmX?)SuRTN!w3~e7v$FB zsPEtZatlx~YNFdGK)07O2smfbz5Q%~!~iv*^@R z zYSrnqrBlEkt^`PrXkPn?Sa7XK{y12{Y!Xg{zRNvZ@0}R?6J+#k=^L5CuydpXPORs! z=C}V*z>(|{CC>zKkh7i}!4~L_N*<3|i5)S398~GF1ByV(ji%a=pthf(3=1=J8qlzk z)GM%X(*ZJFR>e^d=PjCgt89*K_Bx-hBN{23Q0PE*8>vxY+)i-k{T{zB^ulBJ2`rJ zk$J*12P!^6E<`-hid6D}_SMhmwCPi8+Tv7HwS#^IUB_GY5LO?-c1)$1LtI@)hiRMi z&YgZBD`u`wdGFiBRl^pN0#~X5>+n2;b$I=Y$J>2e1MOm1bQoE_7USa2s0$ttdGzQ3 z#Y5vuXM3sWn`7{`C+C_E1i~+dBb5BMwyTi>NQj!8AuEF|!LDTDEH=INK~Xkhjc9~7 z5))&{7P}{Drh>dBA`gJjMh&R=wIF~3BM?igqdW?jGKsgazy;P?3+H59W5~ znB9u2^EfX7{Mx=bOh5G;CSjaWtK>0M_-O}m-LN9=aEJ-z4MO?Q}ZdgwkdGqzv>S=$ScxKdCm zq>kXsYWL7EzGr;fKp+r7Za|uleD?i_kMlfg)713UqI4;l#!U9-)QIAjp{dcJ^NhMEEh{h^giTk)y zjYC4J{=CiNX@{ntbuRcAc(H48f}Fa(to9Wq)u0EIL$CaV;`Iqs8I8E&^<*`l-I%xJ zo^z|#y)|yU+9U2ap~v^Bb7vHm_aYz*4kuQ7Xn*6nt1}%M28pnrfv%I^#iY!2zZW-> z{6b_Si)46u_v8vuq<*Zf3FwwjZjJoWW2m?N#)moIRetO~aEZz9U;@P)-4S^dbY&7P ze;%X#!G2p-b7`dwsFCGz+~u`t$N3daSXqU>@WS%ej5&i68!8*$b z0}W{A9?torQkFC;qZkzfGTO9byrVJx?1mJ>J8b&B299miiPtfOq=dspb)sKjT3g+u zS)I#S_|W>v=MG}DTygH?#+XMNOC2f15u|A&cBL&IWktWC_Nc@(0?d|4H5zAVx?Vqcndl3w-nb(@h{ z;$h6odyz^e&YR7W<_S=G>~-qD-iFL0gz=h$aU^5(+3FjXvf{5?Z|acg7ku>5e$Qg#=V=NBIbgu2wEKGq% z@XW6b?^l@V3ne2F5>jE!8Isdt+U?B^yp%RQM5 z1e}qOmL)Din5k**;URu10%*>_6t@0B0;xo%8bH*_&* zX#TBVU&sokw{n1xwN^`UhBd1)F(ZmFqf>DM`pGuWbcj=5KFjDP9fFZMBPRA!J&Woy z)tH9|w}1@03b2075FngVfN(n6e7{z7d{gz`%SV$f5o4_-Gq-saq@1KD;ghtA(YM#% zxMNyiSIshPeb8RJcrxeZQWkn$zFgfHB8Gst#gX|JrX21SoibkcagX;sGRopD$x)yG zS)#qb8Q@xuf~45*dh5DlAW_VY zI(0Ia!2%JcOn$*h4Y5nr!J4g>J*m%D#)onBzaMaB9C#6hCgNPnwKq%6dg6i&k<5=|SdyI==e#(Az7D)<{$fwEl*uxGZs;^o{5s`DwcK*Esh; z5|Y!ie=%x_)D!ph+9d$}y^!O`Qwlw4q#FD2)gyk<$4@|8D?OUj@tPxdQ#CL)=btot zy-nM@HZ8aYuD7`dbR@9NE027C_g!Uy){fFEmSVZ$bYCDO&8}(X_gBK2aS-)*dyw;K zOrkFmdFi_2aTLTroXhN~1Sc{2yS=sr4%Hc7byPS*#1`rWrVf0;*PGEw5rfjr#8A*N zXHH%(w7Xi%(i6c0I+oPOb+O@Dfbf^}@Y=W|aa`?2pV8>0MG(s8URx#0FwT4-~f$f8ptHxk^7< zNa?&E!1yg_B%%#19y42@t}PtKDH|oMe^V;_qi8Z#E@TV_+YW&-il&3)LA1SDQn_SLtsdg^ zrKgC6oKo!2&z?SOi-H~HL_x%zxZH@3t(tj$Gd z!~U?Pz^q~0U~T;mN3VP<^GvYsEUI?|inh?DnIw&Djj77k6Etsb!aiA0Tz9Q`T1OAz zPC6HkKee{cYIxHat8l2BJosMOIm*){XqggfcGcE)J>an z_9{r%>y2Wi_tmC7ZSIY{J|Z?@xn6WV?hqFMDr<3m&EqWbI>fHv1n^|IQM91W*C>pI zo*k^0NV%f#=JirvGnVnZn~xVHglYS^z@~c{K>BOmx0CX%R9M!$AuDK$|3sLYI3|lP zj3wgn6RH^S;SjdTW-cxH^%2>~tt5YDWA|zJx#y z_@A56mz_M~q=8l~7!cHocF6DXo#f_!tI7;MaynFnpG-G@I(QX6!rG^{X z*iJe}^MmX4X$_;&lF1Z;YodReOhZ>noc?!NG5h<)+zy?}44NX!n&D8xXI0aj4A;vN z>?cWBJWq(ZUK-#VuE;+17A_CDy!J*^v>k0b-97{3WMBe&?%OfE< zYC1JN($wIkEH6A#)#X8;4qc_NKiD5X*}{3Ppf&CbGun!U(gTt9%AOdhPmEksIzJOr z+5JI=D6(g3RSzJ_qhASvYc-Y{tPfSj{x?TnXnUBg&Ue7G)hbRh)I9C8we`PB;j5Jk zNrA~#LW`_?tOZKupKPXFrdBiFaYxvD=p-HDGHUKwM?i+o?sTJ{dgD87CtNi3MLvrqCN&Zm59*e74ae84TGq6FmguPAABBIfVtE3Pw%J{+k_u$Jj=U@ z0^WjPN?(k=m&qH*$H*2!ke{~ZMe;%eWccurl`ezX^Iq6H+b@K`b+AVfP_^S@;Fq!| z_;NZW*40O*wc*D;1`i?`w%@A}v7EQeqNQR(a2fa5Ho(CTHKDET^=N6t%?;)21dqa3 z3s}HMe6WuLr7X}Y0UGGM{P;B=6!rBG@%lU649aGADRbrC3Pk*9_9Y4(n6wyt-D3*v zJ5C!mTVmX=&)QKtIZBqa&w~jcr9&-_5rSAC{G{^RXudGps?)>ufSTnWr@=Wb!zGoQOFkkk= zf{|h{_KLuH78RqU6aPswvq(zAXP4P_qrw&u8&Vf2~%-4qH?ME)e zNz0T*${T9+C)L&A1X}hE%)~BL?`nw9sxdNWG>HA#au=rJvw8sh)w4k5YFT?ocSF?e zlLWlW+_=1t&NMtB(<+`zlgPD!%A%mmyKwiM9{Iem?1x|R{4%^ zPAYKb$0>5p2iN;u&hk@!@Z7$e@pw*^1xP{@yLcrd5@r;D%t|EAxf2($i%7Vfm~bMC z`58xPoE*N!>`=0A3VY{gDHhmnfA2SH%Ohoqm&e;jG>sk>nqr=Is{B=u!w?OKV|DH# zlcD9qrRP;%qfY6_rGq8vL9dM)Lk&F(5~aeX5_p{FmD)PavnJ z5~6~p z(`i&2c}0YYcv3G-tA>YtEt03VPnBQt^O2AFto&Xs7mRBig|3dhBk<5acb*ZhQJK;N z*dwd~qDO~;y89h&w5vVD-jY{_O{B1@>`uK^W!3T>N5-|%%hyUgiqvz=yqn3OoaKsH z6Ji``E`jCmGCQdESvfP8od&%yU4wt4m!x~Ck%&(MS#3lvVS1{v)IEitFpf-LV){_41bv8Z)Q3$~q;#%y6J8w(sYZw67}-4yP&Sf~aS*<$r_9GzE7A_Z4MGI zSG1#up=)8~R{}rY7ZR=f6e6IAJjxz;5V!XOg9t-T@D@G@NhI1tVlZaYL4wFuku#Xv-pJ1q`#1|vkF&OeA2*G}$x z%v8S*u0D&B^E@wL@em4e^*)vU&b5gi2rgL}{ARf_7#+Vd^llB3(U)`PSEX3#E2LZ* zOqJ?RTbBF^cUrKH%Rr1zNKj6TT-jQgz7Os#L@*fEkZOHGo`@;<-CVRqF=dP&?z{7BqtC?(BeAa+1g zb`a%cIa9Isvhxe>%f%A9wP|Qv2;JI|>{yTlbOiNQGiZ>~xo4b2HR!_xqR0UmtnRY4 z?3@l*H4e|DV|#MeZ2ljOD3;NJM!of|Y+`4A<4vt@jGVmPQolruumv+~ zjEcy|bfx?iLm#f%l7#hiDUn2jYwP$GJghph#Rgu#vn(ZnK09W z(SVaM(~EJK6nl_;Awhy3v=%MQDE7D~M^s^JrADHU?PhX{m;LlDu6FQg*hG4cW%E4t z+?%;o+d<`Fi=@^M#*{!&(ZeGxH63_kGmfKbmB#G-HL&CWR)&WBKrLaHwnn?}bH!`$j-+N1!G8XujX?GFV&a#=ZBZNt}jJw#jXRl;E0t{V0B`c)AG zr=Zc7kl+?lm7{drT<6B?8r*0wOOh9eGHDO;Wg#!oa-%!Za^Xwj%Pl%5v!>z1mm8sp zmTjAfmODv_%x;bB(d8%-meWHL7CjHvqcPrKPb12y{xHYU@~r8N?}O`_1VP*u6Mm1j zCb3?24r1k^eNQCUfUffWMd4SI z{D^K1*XmL1NKG1ym#6LWb?Z6dDxZD+A?yYTtkE9l1qHcjxJ zFH=m2aD@GmUb;phwZwTcQ4{^TJRa$}W2Vn@?r7grQRmfNs_*f7PZ#PYXl{ZN1g`RZI?+$n^W&(odnvXw@DXe#AO%e z6grKnAX}W)qDJ;!wzmon+b`%uyxuil-=zy-1>q|fqQ(K&N+dPIy_S~S+!{=?VmyC& z?o=%GBih_u&)q~}21K>Wot_Mng z?Mlv|kd9Mz8%(C<;e21eIN1E1duo|GpP84+%IWTuLUdxC3`8U_s;i84CtdgkpR(bN zvQLv(<)KV?fm4cjZ@-Xa&GnBC+w|8u8=(VfIz|1cvPb=>@#3oUIt`jqaNj zzwbH$r9}Bnv`Td1TW^^$ds^d%skAAdtffSspvlj!8Hi=CAAUqj zv@~xIzFarGEqgmIF7A*-c0f-vAc|&Tt<2*jE~HsTiMF9-qew*zRJQyNTY&yQK25E+ zRS}g~AzokR*JkU*3>;dB|I_%&o4X62W&zAmC2Br|nUPU?YNp(t+5mXruag|}s@C%T zMxku+K>Xn{Yi*!(KV0c#VKw=+pbGOZ-Ci>T^K@%Hd4cx2Kqa9yI(W#_6jEH^$$d(h zV$5tdpsxY=YB2fNPyNDApI-M0uH18R_!eT6qldNpf`=h!{fB^oNn~!XZPKkM2^Zdo zA=AiQ4qz>P8=zL>e6$od)aOGoeUs4ORA^dAhGUXTRs-vSPz#-Ea-sk|Fx@I40W)#! z-3-wWa-jUpfcf`j%M`w!QSPMmO6B;GU9Jj*BTwEb0TWjkl!9c_s}laNX3jILsWc74 zpw1$z4n>M|Lhw~T07dl#k zp{+XD{_RlCwiiG}O;wThpDnhkA5}~@BNm6~+#%P&wG?|RXj-w>oGvmL`J~?mgB7>V z%Xm=BfYn8PTep7cu_|0sbhrWKVLL{k;$g+rtI)^t9kDJ+3yj#~hE1yxO%dmP8ev?& zHJC$&C*(mKR1CP{a-jd=Se`sGRWCOFkAXz^!UfIvk0L(DGj49xnKE8*Bz0tq`PFHA z;rl+K?S0$q(>1|AhjHLr=cDb#MB&_*<}q=OZrmqxI53qTT3T^OxX^cy9VHY}qB@df zUuq6YY<|h%I9>DQAQ1~x>zC7vtD`yYUGCJ(cBz;*U$)EiT%!tA^pBal^= zx+`iVg6!#E2w42xSOM2GuNrC?);+izmBhI(JddtXsFflbP9|Oxm_;`n` z@Kq>r2iGzF(ZYu(Cf>xg8FL=hz5Yiej4A9w5swB?629A_hw<7=1+D;Mc8=`Pdc0k) zPdc>cU|G-0h2@Nry$-sU27Jg{ah6B-qW0`jmY;yV3O_e_0AjbLr%P3iF}NcfgH6XBR+NHd1s!756i((8~7jzZ3XUj>-SK6B>*;e7R`YWbV8{FxvzN(O? zDxuH(YF!$=FZ#wk=EOiOri*w=FMzG)^IAmb2{NTU)<5+Fi_O@5@y`xVl*DmM0aoy0 zOTdUQI~vE^GQ_9nxQul*hcZ$~Gj#rNXnQ%tXfub4TKM#n`4kE&Pa9(&MpZGsyf9d| zX{g3QR&bkux``Gqb8Msq@fZs274tRV4^#6Okj303&6{3QQ&a2DKkKtwqDiiKIw0y! z=#Wx)Vbr|((P$t+MkXUYJT%1o+{Dd8J3mOUqP3Q#*P8^Me5tj79o88!AL-bFb~7Jz zy!h1Q{bdUg`ZKY1n0=k5p0(8v-?xX!WNxkh=AY^RXeY^bnA4(4!MLC#Hv(}l@C9GTY5ssgZW3 z=^2xpxbM@4MEF1E!N2(($9X0uD@&05^qH9bL+2I*4v&R^@Prj^Hk+z*&&c6x&Nni@ zIv4Cu4QX8-O!K!G2`6nk*QuuBlz=q zBb?!{?5w!+mh$)tKB*sZRuO}@)GlT`S?*Hr9djk7ZPvD<0Pf$T)#-kECqgF`#HmKtzUfr3(LQC9 zAPi@9c2w%%Cra*vxgz^_d*&lxj#bPnY%%J&V=??L^-g9v06q}1)WY(%Q6{J9EA^mL zNrQB}QX94HFOn0Q(&{%fFNtO@$v|S0uAGDwtbM2O`s1}UlE>77eS>&DOp{NXO8$r@ z0`mVP@U4-TI*-7=?bt1JZ8Twt!znietxjnu*z;>1j;x(czO{aIXNYh7%We0JzDEYX z?u@JJ8{p>kC7Og#SwX|%XRO?@?)B_7^%V9wIykkgbFWN_V38g~Pg>%vXynYqL(v}s zE7N@$8kM-4eO>sd)yDdLx}vwW9tQ{+Kl`-CHh>dm5uZ)t92cvWRwqg&O3ay#%Uu7d z4J`Lc42G4^_R66meIrGRiO2z^D9N0^r!;M#eh%KtqX zXrr4PTRfADsssbhEuv(Ja)acPj(?eM1al%P-Yj!+rZBXvlxf!N}gtpme9?C^d9UZK8%gM&jTCo8FngL7UB z2j`sQmm4uN9w=GRVD0Nk7u(6LQ zuQa5ntLvw=nQ^!NX*X0NpZO;SU{{=y;rlD+vHy@IBH_gTH-w=D`+EuH|G$16k2k8- z<4)o=x#YY&_!@lp{r2I`kaDhhe;U8{DU6++T@^CgQ;%w*f$%n6`Vbcz8z}hxD8PHm z5fOs$T>UKgr9fLk!v8A1c6OBsH>&vz?0I^CR`m9At&V z9+9#tsY9+0dupr5WwrR>g$Ux{K=)tWQ=GA3ci7QbQ=k!r{eD3N$Nwn(Cp& zK6W%hHdn_DlLf6+A@;G*NmsM#u!YWup-y^l1u7~khTf5NO#;@imFI&h&0Z&u_Iek) zV@5PCI-hB%t1j;>56#n`vmwBVq^@Gpx-Aoa!|+z4`);~K08tQK$cGP)CaP`eomamo z4u{c-XnH1oVPB9h&72RRPuVWciBQ%MXi= z=0iZkc@?)YF|#O0KKqU5^oWTdQr5OAyJQ@kqwyr8c1P;S&O(J$;kc&bEqOG(2^{SC z{rf=1idBX6q~L&AKLP~apS!T~`JEp$Rk%pTs}#*fwDIuk2HItPVgN8vyZ1YE-uDpI z!i4^V=61`W>;ZGQCks&>0R{Js-D9^}*O%gW-p^*Zcg0O+l<0 zROau4Al;rFh&iYzY3JGzs=2r%Tw|EqbiNfXHs})_!(4UQ6l#o`Kf6+Nqnb2{1Z+&uv zofjOsy1(1EW}X0N(5Vm1PVa9Su4$Vv9FX(~(>UtxL-MQ)WCU1{es4ct0_Vafca_>s zcyM-lqL{jk1YOvu(`@eAH%d%MpwNqGwi|IaP2{(X{z}{1JjNInmndXw&{g-WYg9Vp z3r)5B$;mNp4}NAHY-6gnWz>=6Jz{X!x#lV*&n@Fo_$Km)qD-ur`K8qF2lh&zF$6F?;5gu@RBB|PG7n#b+bilK2v5xqH?oo z&QGwo^{i{N#b7;a(PzG)$o;V5?b`=CR$G8+&@h(;hf~WJ6a#M#ZYmVUFw+efn6!J{ zJ-Q$I>|+x13TACQL-X9r_7`#`HLm`|Z?_^+&+6xSZ%y^SKeNx>fADD)?QJ;qyd4p} z=xx;jk{@?`uRk@#2WaH+3QC2!M%6w0aX~qpR)nRgtwzQ#0qxz6jz&?0>44zgyCaZ! zJ~9f55Ce9Djz&q?E)@vb&k)070GTLx>ZjEaL_)s?ZAK#_@p~sKpZl#!Gm^6I&aqef zcob5?&(9-McF?G~^(aK#J)7>XPoKz9t))?o{>L%!ZHFSE=%Y}R_{2oZx=%0a$I8q? zv^vC+@_a7f1=q z6LKN;-D=tm{l;HkMbGv_lOmf7me{oDVn5y+U%>D=z7HSu#U*I-$WN$Le#7&S6X~8I zd(@|(&lEL*TP?RSuxQrd$4md-4c->>q+_VbOrHt%B(^azW!MgcD=&B&s+@?fi|PpG zt<@j6;vEl^wTq((7iLF#PYxb@>OFAex$h;v7hP7UTYYKtD_s0^-=rj5Sr>g*DP0RN zh)w>uOG}>@k=$vxWShHn*%8}Jf6F2`)MbJar@%zl*Q^bX2h7zfCa~4ax1kuh=&kpx zv%(H^!8>>J$X@wLCWyB-)FYe;d`DVmG&I-%@ue4;A}W_M*R|A98y(uqT&tW;R#bdB z0yXEnWclF&9#J3?MwZehc4JHIzVowKgd1o#%0zh(=aEmXul-8Fb-M%3gdJL+pQd*-Wzy?(qPs?`*&==c&vMyAmAp zgpLEja<2>#iF|!`9BoIpusoxtaJ`8Y7^ManlHQuLIwlTo9(L1~^6Rg~>w#KHb4f?Vx9b>0{nT_t zi^{ptqIU@#q*>F%xaIUxRPm0o6m_Seox^(sJM?8e`ot{-56=Gh##ut4jbonSrq1itKmVgw=EG{q_{q>h(7B&kDM8Lc6l zTPMSdx_ji*9!5*;grY@}tF;LAJ?ZTl?u|l|t=QecEP21;Fl2eJS=59SKN?2F;w{6- z?p{cC)V+o@+JYfWDo4EOstv9pF8KkzIEz8477RKY(o=zJ&mg%Ad>qtLBxy4?LBg&aK=z4o_SM0W#v#RK9;fV zs6NY!ZVz&DC_Tc4J&kVUbEmmH)q^W9+0(+Q`90iuqV@`HL@-y;Eino95mK4noCSf+ z3P~Be8#$92*AK?;-{+qn3T&9VCxDBY+R9Z64{9*BnX0i*S2AFFZ?aj#`kHvJqo1nZ zJOaa40tMsAFS^Q-H3}x4+(03XrpRl!RZ-+PTD@Br?tK)R4K(X$ zsd0E&VShFLCdjm*EVA=X7pPP=K`TFZ&wTF&SS0v3-%C};n-kIC&ON_~B@nVvM& zsjK$~mXM<4ii)R99P=Q&JjXIBxdKn>peYvTzKEMI3d0GoQOF){zGLn{eLhXe$2*U- zzhirR;p8gkOn+1tDzdLZaQwt{&ynXKLgwz7=((5o<5{&FLNw}ISLO$jw*t^F-Jn?;Rx{aYITx_QQ{rt(zpkt!$zUQRV%4jblmf}nekmxy_ zRqZq*UOHu?W8)_T^w6-zE62F(of(t_^!LILgkcxTZj+rlL@nOcs8}M){qAhlcqlrv z1bYmcxG4m)x|K$Q?(uxypd+V>BDLV`@R?AU?yxXTVXs5JZMbQugnMoE#AKv>h?~jn z8u9~THGP*!ip2IPzBeqwgzTBr@qy#^VajD`<73Zfo4I`!Ceu*eky3-LJjEgiYhzsX zjrsocJ)VENY~V)}Iy5}ZtewMouLD5WP!HyPD*-SRoy&6CpGv$5GYZ)W=nuLParX7B z3F-+Z%b=6o7w13ihm(R%KYRsID4iz<8E`i7L%DCWO;G6mqgvFGXj*{BqZ-I751oDa zHyF!`A)A4&WfI1ZA+7uTRGt19wg}q!&Zuun!4+Zb;LDpwb~i^Z_y&v97jqEPmClH3 z>Wi*Fx~r7ff>2%{Q^vXqb~9hLy8~YBSFJ2&A-{s+^E;!O@76l-bW4WLa5vrM(KWdO14xu`aF&YLyK?42Z%Kq_lCn-V&cOyr4BJN(NXgL=eWyZ z9yo4xBJRaFsu%FdGs)a~{0Ug;qOxUktf1-IO@pe9Am&WsigPcAC46x$Uw7tg-s5W` z;hNCX)VQwfcFmHLIukFv(Vc)5r-b7FB~yFJ-nD?v37O+uqmjjB(wfuKt$OheNCrbF z?=uf~%(Uw=0O%QZ&roM4%|-|7w%+m);-~l4XBtH^!O<4I&hIA$joU6emo8fi)~9XQ zG@rN#dmcJ%9ss$t%{=+O<1;&|(X)eoZz8Zz?;7}S704tC0C-ZPdX9vZwzJ^H(nO3< zxsufBDW*9;q~h%c$az*9Jd#hhR75G#-7ZCm(cD z6QEHfSDkAsEQeUE#!47Qa#SRk-T~B+SQl07eZoNthG~g@gL@xM&bBQaAH&?y?G(9y zQFe~E`_&#ke7ISOzr7gQ{JmuP!zv^bfMvrOGQ{XNG32Ky;>VkQ`)o0kMf5yIFQF*_ zeeD;+r0nZ)=~TR(nwpyPK!caZ%UMSXb%plz4!>HRC-6t5*vpjW1wGDA zFhg9`QzX$m)T2RUqy`bC zBfPI3D3o#Q6bvI%K5wmhW9nxIG9ti_pMNR9FEVLwI+t^qgtKI%#4Ky)0bT+id9wX# zVf7`_R~Dg$Z>M^9r(I`*48mo1r3d0WTg6`Bn2GLF480x|lCYWdKsB-w-CA+zK#1?h zYTMiM0_--)rs?2Y$?imzwS22TA!$Udd4#fl_foyfI!W=8Uy5GZ)!7zMSNR-kHbJpr z6d{dZps57d=1V~$^ms8wdGK&|wS9rY0O})XGj)f~duKp~9|#PGT0KUFgUwdr3$~Iz zo*#}FK_Jk6ioJ*1!BRpuDk5bEGOQH3J3!|+-_~r7o^N*=)pwQBW7SlEHUdGfRbjjF zap~0ut%QPeA{~RYCP_&@O>`S{1nk)+E*S+@%haM;W%i;iUfp$XpS^IVcHL48r~X#U zdEN0iBXokfrRAFQfxLSB^WsL$os~?htMSwL|n6<2kGtl-G{rO<;!)1ijq(Kv`R`yHi@aVa*bNJ+AJw+Gufw*>B8P1I~n;LqL zr<|J(QUSltHc)KqcXKOSQ!1f6bL|(?UfBX5d*IL=nB7-3nU!f{31JLf05}6C3W;)f|;&X`bpWrZF%bG`8 zPgGo&8$&O2`t_Rf7E#DoIEO{Ji9kKnJDiQ%C8zx@npsui;*6v`K1GaZQBlDi=MURR zl><-Oy@zzGZNh5kUg~3yGfNRimbrVqB89;doyqZCs zMpUG%&hQ@ORCyZ$-_2Vxy-A78x|xRF(f9ZSGaJIBEw;ot#$=z_%!A!aNN*J{du^sI57wydcHaz_W2PMvXH}8ZkI8%xvm-ImPsE+2-U66fVV2qX9d?;F_Sybmn?8 zEoWbey{}O`r(8Wa9}qz2E<4;Sso?OT=^PTM`al=fh?fcI5aZyG|G)mVwBm7z%i7pO z=au1i1fYf|$5V|SuCn?clLVH5s)LxcZBE6~1}HNi+)FphIHPHoNo#)7u4_HLy%`3L z?mlEZi;J7iDnCDy-`A&r-UoUaPD6@9 zxT{!g=cD5B<9nCVmB9zEu*F)>WjQM$x^v-rL`X=;GSFot`f?6(v{6Hkuz*4y_BfX1?F zZ#)-a@x^bM{;5URZZ|T34<$l(zGd!bYWP;Vtn1Y6e$icymgjy~=e#-y2{?NawBbjN@HJ2CHIa?HHnMFIs!)mA0Q@0tKf!BWX;Z21cBAkBP+c$=)P zQZO+IC(p-!Z~o91H~W6>Qv?U_NMOV%pwWFcux!bDepZ1sr`MFrgy;c!KmyiwJbdyu z|55t?qmZfS#D*qy{n#5=`BMpL3VePoTtB~nCK1@!(4=4maNnC}kk(79w|*I?XrFU> z=^dVM&2YOMR?_j2k{01#dX{s5>BG1l`QpZP7Wor?dHBmHwCHa3&7sy*J@sln63!eZ z7ycI|=^>I>mJi&NsHGnkIM^!Nk?&Q){Y0p7%O`m09h+c0h<8_;jbv~?m)iw z@u3j`Ow~_EQO~Uv}k6-0z|h!RQ*Dgr(lb zMji7|34_2d@5fuf7Q<5A_V(!c)INJk$(GzA3KD!FOZ)2pF;l~9C(jGHczM1#@tb4@ zVqPbmkVf?%g9ddVKYj&-?4PkZn{&YjOc4*HuHj!U(Pc#6k6t20T%uo{&t~*Mk`wx1 zh0Fwm{Ks5T`#Xk|HKmT#iF?yy_erq3Fao>$6UFF`0ECIf1fQJ=QomD;uSOp@DB10k zB@Da2R3W#8K_96o6+wLm)Pd6U-@k$>Nhzm3Rd>k=VApQ|*8j)U=r*KNn&H&&K~%9` ztf2+!FJrY2mArHK$ChdPS9Qq&F$36gX{We>q2$oK<=B(^zeIQ@2=)>m7@ECwsoEC2 zrjf7qBhIUyN-X^{NAmc7!8D4jd!*cxfNl4$;CeQTy0ijD6-{#qS8 z-+H1Ux)DrT;Ecy^Zdd;ba7ca*Amy(Utt*8dhu-0-IiYWyo`3wI=YKwNVC-OIceJ?_ zC-l=D8p9>;w*K*{Cd^^VD{pjMirCdb<(U_=t08dI0woIk3Edz3ojG(1_Cxgnubg(* zFjnTNVl%L@z>CipejT**_JE^&ogIR!HinJG4tUk_*i&LbNVD@R=~i2y#~tngqUPF8 zFXkVY`<(|A=F(}sB!KUQpkHwR`&U(%LqCy?cTPsg-7Ws*AIU2~7TCh~ z@4Q|ui>7jQu{-Kax@wbvO$Jfag5Pm}9aAfq)6JjJFziow_>ZTl>1lro%wPG`&+Au^ zWOvAXnkpLlc~-IoUwkJ1rEMs>+x{T8NSd!YAG^)|{Eu?=vtWk)bm_?u+MUoQ0VwVO zx*)?M9H#*(3Kf9)T?OplFPR;|%=DT0-$TE3-k-v)s}qw>dOA)y8m$|fkb*#A{bspysR z?$sWeYe#3zcDK9)Fk0x8jgY=WH**Ve$|Ei@k&TWX40~f!x14Q6dvPx0NA>>+Qex0w zhkR3v!VI!z=UmvVNeh4(^JiMusp~MW|A4E9IDhOKPE2fp^jR165FvINjkCtrR8hH3 zNX;Jz;3$~{L$6xONv9DF$Pac8jH|N%1hmV{HUh9rU|KJ|dEkajrGTwrj+Un!`w77T zIDeSuCVg78k4N!@GPh=arLnxPfaM@l-B*8_9F2TU>|o!1rLdwRRw-3jY6ziB(OX=< zA)LhX)c0^OcC5;prtxHNlA$*bQp{ZyWlv0wv5*3&IR16h8$W(xhvRl>1XW{-?lZPf z3!wr^4owN@XA6N{Mu1k!6`?JlQ#A~5H0RRw{oy$P<=%?ElaXxOln9Ig$@P#OG`3fB z@s7=r7k^a|8o}ogvPC+39P0TMz;%Euj6b-cY4$+xX#~-z{gz>Ug|kBttQB)+2Qwbx35B@ItZV^cX2V zw2xl!Q)AqGivfUbFw>tu21l5*zgY_|Wvpg#cS$(qO#X}3nkhK)|6O^i+<7f@QLe~(nS4PS_q?}+Qe zpp1Ew=f3THb=z!@oUJY{>|CC^`3I;4;YxW~ZB~W=5T_*E?C2&Ly)IvBPs;X{0^}cX z`mVkHT$3LvjGd%E^#(m4QB$qs!-qa~lsEqb<9-N|CEa;D-%8^M2q(Y?0AB#peu?#o z%8a$O7nN6TcKrdLO%@o6WsA~IzbH19lU9qcfvZ@wfcqq5jt034#XOgKf$8RN_#Hf< zc=f6ceQEk}lb=#>L}8-R?#f7ep#yZ!#SZ)j+79T0iHl8jOF4_we-F+7>v8-qW(oSz ziHV6T#&&o9e4Ii4Jcjnz;qiA*ykF`8ZJY1FRf|Q;syFg^;2*GG100c%W7+@S6R(<{ z5J=AeuK-NqJVn-FbU*obc993$?w@fj*`0e-*`tDfn0?_7rjXGy`pm+}%70w9wbK@> zJd+l{^q5isf~|jZeyr4FI8%<)nb)H4cbjPxO|t^RzL=U;Wd3>(2RIl=7WVU!c`6zH zl-`v_E$4fBdPpE7X~NFIKnH^%-q2&clHX?P-fq@g-ru!WuN77%Q25AWq!OgVeDBBZ z1vxpgR~?FVtGV^P4wmMt?n4aRm`92Y;g^VR-dSnU{@oiYOWpQIe}heg!)-%KWaj=@ zywSsvKZqE6e^;E(5s{ZL*c~tQe^9Ns*45isWITKo7LsD|KL}dhO?g+`>}M7u;xMaA zxBmx=V(-+w9W3c9;B0c>iFeyx6^5;1zX$o9U7|E1g^!<0@{_N~GXlr`hr0?I8bp7z z>FZXDGPe804LR#qi|W4z(0e@r&ad6?bTD_+`5EA*nQcbpk4B4tmPtkeHdAkaoFxT@ zgP|2w7+psk8rFVq3Fj7e=|E@(0OA2h!fUS_j*G01#wQnV z-Wik+%lDDE_Zx%E7>-usgWcsm9KI@zjdP&WX=FWFrPw$B318FFlFfRiArauinMGHK z{@ku};kMNm4jzHlZ>@!~@t0kl!#~KO|4n~0hR5uO0UpEZUX$Af z^IFQmnoBW>Uv6qKvC}XYH>LCUXwK>YD2*E5Rec@wU&rI4M`nu$ z%U_ka^Criov~f#W$u$6)4WtW*zB3uwjT<>3E$@C0*ob1;dA+!kcd=0XnmiQ|Z zwQBfjy!u_IGKf8!O2&pyE|m*B2WqSm_d{_3=N0ulpcIYmbA+}yu>5{>Op7gPE~xP} z@J>y&rL(iPcYaYoxbLp;dmTf02kzX1|FJl6M3vP52yn`ve$W!`4}bpO5|Jr9&I4HY zxI7{mh7PkP{wek)LTbrEoyv5e@W;%;=VmTi{~j#2j~ebb$TDfgd`ZaD$o^@Ql^U$b zf`5n{q2KArSUZ1iLn;Uf2@fW|OZ#UlV_ZUsQ`o{DMjDd$A|#?yq88Ph&8>I^3*@W( z3FH4pm}N8FDev3TcS+u8shdGyTlH0lP`tBrZlz!@{CH8^oHX(_wnygW`A_|v{|280 zqaFrE#skHOPVv&5p78w};WP}<`F)qX)qvK30^Xmkfr2>0MTAU!&2mfG#nmU$$ACCM zC!jmeEEa8N-2=@LBm?Q@XtK7)UnCGy*N7PbS|NYKLes)14tXim2%ZT^G9a88O!Nrz z!N7|#S!0-qZ`w;pxnUIZB-&D!5I$&|23bMy5%pUHX7v9vL3dqz27uowMOiAOTp@-)+1em8wT3r4HU0<}E_p0uleP3s>-7=Zq_@S)%N{uk)NnzcPkQTg7E?p-unil7P;glhNnah5KLH+}hEs&(_f zkR|-fAQ_9+;w0Z2tp-~a+rNB+;CCWSr`daQbY+;dq_&^sjZ{5gxtPs7QNr-cz0sI~ zu?Hw%fJJT&iPgdyUG-V(d#9^KsH6Omsvqosu!N!V?`Vj20i)73)j0l7&at*V02_s~*K~miM8Ju}3X5KFgFkg15cBn(F-5flSPz zm?Sxpr$LZ>fxvGrwcDxH|FPd+EF-q2i0?upWcnG4H?V162W#_W0%L8Dl}_O0_aE% zFz$5%E-VkNSI;V~ekoLg$az?`UOM@kb|)sR?hR$=Po-n5P;y6r-+95OgCx8@cUU1Q zruJo7qyW6-C~ivDh28ns>JpocJBMTHe82*$hqGn^m z-FS9-G~D3k=p0uO0HVV-x_>mD6NQXa-eOi3>~ohJja&BjJ!3Hkcs{j7X z?5FQSKuvFgLoWEuB=K85Du4DdJtgJlA#fESA7C~qn}l?t_l$kp8zBHBokxFd4Oq@j zwo(1^J1^|I+2rsfyz#LHiYx}rwqGR=n6`sB68YK7j7#j5IW>75(B-%M@Ctw49%!%U z0yAsu-QB1@b$>O)c`BRCi>rh3Am)h9+jNeqUEK_?KKw>Y`o4M)I-o|(N!OYkJ_jx= z(4BbRs?GBjinEz4C`u38RA_w!KwuKYqL#GurD;#h^v*W?tYOgHH6qq16&S9zbb2+z zKZ#-VB&>6QYvO^oGTvf;GIh0Uj0r|oV?o~q-n4 z3|9RZnjLkCky%TW65biiyU{XF$91_qjCjZYuhXYqYT$Uj5wgyTsKVr#u0IymuU0Z* z9LQZ5v04Wdx4fn5>%9F@XsFbrvuy!O;Q@$~Q*iFL?uE(=R)-$ceJB%$vq|w|R~u3c z`3=$_qUC(e=WL=wC7%K6`yCiV5o>q!1DEG%%Q!~>aj;XbL1;z+nW- zn%?jcrR%`5-+@8sZRhm~jjMJT_r}<`I4RG=op-u6wNmSmA4I46(%(IhCQIkQa#T`JHrQSS;8L${Hpv#zp=AK>fTx0CH)NB^fM?w_>J zmHrO8GP(rmwt^Q|&^9(bpo(TpkKy-hK5Lyz&n#_EKwI_t#Z+`e4`Nk~-NxG@3q$NGY;Crx=ITO=7@$zm_96%V`0kB=J zWM8@)mJ~@y$@B7bE1~(~>i*6xLygOy{vcO9jUwzkToM@VJtcOXdrxC!i|(#`c{d*? zKfH1c0Gh!4_=xhAnZ|nAG6T0IGOOvjO06_FTp8Pa*t~!54_tpmYfHk&$YWu{rl;Iq z$3HzWru}0wA6=f}kE;I^OPNSG(~x<<3kb$fJQg~_+An5=D${Pwxi--f-i!F%@Qgj6 zt)UpP3l}C4wq>?P9s|ePjHUAz^X(=iD}@;Wv8k5KLY^4e=@per-%W6KLhs?i8(L7&a!o?F@v&axMssg`Lab*XEd3b zoj|nwYp%9@^L7)i(zfk+#HID5(nixE;hqnSjp<>H8168sw5@Hai$@#v>12 z%sVk%Wxc^QU796YyD|vQT@3!u+|jVEJ*PMHeN8_3$(3&XXuYir_UYQNyEJKvMNOv- zoVAZn^CWc_)RJz#Z5FkS*R7AS?x_MsRX2X3uVs=9fNSh@LAaxg&^WFfLAjKRFI@;?IDo0& zF`^^db%}6Mdy`b90Nda%uY2nE*EmH!fYR*SQLc9J`?q@rj*kJr9$TPKm4ak%*2R+t zKCJ;ezR%HJf4X60yrl2QaZrxT&1e2G^3-U(bo3JeKt@c|*BIR0?2MiqiQEq9dj9lO zt-%4`4D8@~APSKqvWQ?B-`}71=&mo+<|*CcJR>{^C+5!I%*$sX#7>c304x10@Q^r@ zMUGxqFs&iekD**};$)#}gXuZ5;d+ipz1`JZOs-PvX4mQ;`;#{nvv=kk6}%_&3#+SN?&MspD++$5kRBTF9M8s^Z&9RjLe;(# z!RfO_`@%5($;n+{t%gWgy-otN9_xqz7jTtBA*BAPM9*K6X_)Z8(6-zRZlRSa6rue_ z$1BOCIp6UyBOrGqo{yHkIRMsq7-K#@vb0c8#m~_EJfSYz8tbEMVB$ml6X)j{hjtX< zGeL-*od)t6$(bQu;$%C^*;@V!Ok}M z0HFQXR8oUW`ZnZJbUt2hbpE(5S5kP(Y8HUugBRJ$Pp8!zV5N2b*2e9f@Wz)7t5ylD zn)zq4r$aHrBdS>YkOL%mYrirNIORFVA`hVWG?K1!^kF>eL8yL zgp3V*+OWGgtv+gJHYczsA}E`}QlRiBxHQxV8hG38vl!gFxSM6={0OE^<@7DfT+$(K z*Hmn4O>r8^(oIQEr@S=%r9re*#~t^8$dzsKGh-_UJ9e;NmP_Hnm7eI6cCvab$Ib=Z=ep&4Ahd|`Q=1HBb z4%8O4o$POwvL7u^o(QXSvpFkr{m5g-?J?Xx4drMR>Y(T4ZUQwTQArDKiO$D80Xq;b z%Tp0gQ`PJ$j_#RhrwGXD>OrGFH=^R1)f3WEdsohHt;5oer2<9-DzT$@#}x`?;h%BY zAH+N1+9RXvO;b}nr)DO8;8q3@(AcBXRUb=App<;T&luKNgCr?7oxmf_(V$N(nn<;8 zWru93z3f!jSYY)+XVUx(sTA@IW^rgv%cxJwn(bANbiSfRTW_^GXEnP94b7oCwr17H zPSpdVirf9sy`PATidim|9}&3RWB=D(3AmbZn~>!<4qUe@mw>tIS{!S4x$kK-i38z2 z3v@C$Md(biDRgMXYVUf`2+}M_V|S~zpbD%KuVqbGG#|Uq15m-=E_n3g^9sj)%}Btb zxP(`(te0>c=bNP~q`K#*e9`=wE8)7#qWr0P23s0%2=}Edj4PMyw~)5#qfU}Arau=_ zMp7P}_)N}-ZfU$7#ln;cFQ!@>j^_NA*z`_hD!z^Kjx@(8ISY8irm)POM)Z*6pK&M` z2nyZup3!mscq?@w0~-~;!7Hb*fuNMF{EM|ta1Djm%-vzt>;|Q;%94aG_w}51O#>&I zt@d%r{+)xp;{eeRFP$_kYxh?`3LFt={Z0*+{j(^FPaCVR5Kbv?4siSAUrhF1$acny zUaj0RGgp~G(qB}<+tSJxGGIbQNvv4KZ&Pfr&Ju&-`c0UXa0P?MO|k~Lz@`5xnZoTB zXkt3|Y}Z?*^Mg+x^Da>^&pclOakF4@-o=i8_nB+FNPq|>Nb#t+Zg_&6J}ur#96iW; zU#J`{?pIdq>aT5p++ ziCm}?x7F8X98qUd_i-!WW<-UPFS;^=JLCiQ-@L|#??0_sjc1`mX?rS1-R<*qzA0J) z>&w}R$zpQOJZNj1GF>p0LcymBOS!D%_2W>lEpr8Clz%gx@`bIdYfaO(T6eeI%{o}I z1ogL@I)-L#78rbbPvNFJ_YfM5pSY<@a$Zff2bpW8NZQ|JCe2?XSImWWpLZ5m-4E_^ zt)O>#(lh<#8Qj5te9-FnXs{Ct7c9V+b$b=bM4KMac6vLRg7rxmy);C%QC+cMm zY)cI%4m_njR+@$>bc;i;s_q;{!H&Rp1&#{E^W~+>+gx^uT03vo57<0rT;kX zZPy@p_KQ1NNW6 z+ck2Q>UawsC@XX3SFk1CK3XurUa|Q7)D%&o_w&-q<$Z^^>@2_ZD&E$NRek6SxpY`| zw@aWz$V||yG_!y+@?$?ni&LMna^kjQkdIynI&OrM;8r zPkX@Qc;{(A+tx1(s8nZk+m}+Kq$->eWTupPY`mH&4dX_?lJ?|aMk8wrf)hYxr+b24 z^gyYPaXk`$^sRV{v)LaOAgm+Y)N_?%!9?g!xk2NUWFCR!8=Id4OoH1y&zq_4LWP7p zMXFzU^(kkCgRpv{DqgQt{$><@5aSlOkh_Zj3J9OK3BY$ND z#OAQjk{nsWXcH(imqwngnCG-qqp9zz{fOa4fUSSZ`NfB;2h+k&miP9}#Ut|gV~$z< zu||@xskteNTZ3t4qtP|f6h-I0_|yD`QoKFD)vkSkEMpgE6V=^*unkap@oB|L&)6RH zpNcKgfq4oL>=`rqu}5fK^#)V$f&Am)g!cXzRwb1E{Gz@GE3Ua#f|y3WEd!9|y5Z$1 z=T96CbMJtK|1}C=Y|p8QwU}`sZWPK3q!SkD6;6+zfv|~NIS(4Y>p~i0*-i&?Q~0KFn*NAgyE<6! zLeo3GVo@0?;t?Xbys|W_2M5z4s@)TDLA&bcI4|7EZf}Q+2YU^QGhxJ$=G|uRx?^Q? zxEWCXN8PS(Q{Qw3!(Ok5SoX_#OZ>V0>?>T6TucKAuS^{J#;sYH+aU|_Qu3Sf9)NW* z=O@HGmo?<|JP%(XSk7`^wCQon7pM&&3_QE?ptyeRL?{}i<-N7;6^6x1%u3>f%GouX zwJ8nB^o~xCvK$6yuVI_<|8U5iR(pES>bI}b4h8vh-8bqVQ8zWjDt+rv$`i%Is0{RV z$IdPh^w^AWI^85o0HPgPskul!mGD@wh}qk|^pO1BW2G2zSAhfNaHiRoJ!3{?l)p!J z`P%4LxmeB+Z@HOWOduWSSuXFQD@tv)^0D221hC_rWOdJIdZVYHymMJGFUh6<-6?^5 z8{wPVU-OTHA)MFwa*$0JYBR8xx7MS$@l90h_AI1LnEAQpt510wm|(QhX-b?AT1DpQ zQirUy_2U@<;1XVz;VV|mq@~c(; z^{yG2RVC|;n%QZb1`%eCagAPL_l`mehHZv6#v$sNQh{Di;8XE&R_RVkJhe^G4Hls)c4Z=&SlEZInq@>?{05m zx>32c?j8pVsE=hfBkdunQ#6~5&hRQ=2j$<9VS1e0OTs*!&;RsHFY1o-B!BEg;ksfm z=tPxCewU@`fit4vT0CVUmpAQMg=|$?Du%AH;X5CCMqh}Ky!m;L5AI6#4|?YB=ZC!& z%K+|P;J}1GJF@ub1qw6NWzUv_nn14$s3`5)yA^E9xo1xj??^QZ@LTWPnc#mXWTtQu z8{8?byzwrqYr0sl*5eD4D~6+M8X-(Zcz_!h+Wh8mwjm`0Z{AzZL(5zC6Lz-R^E*rj zF4YX!H(b1pDG$oOMK>F9s06*{D}K&Y&1A?Vd*tAqNr-U$ytP0h8O`rCdo!wLXX>s3 z)7>+Dw6ko);4F(MX*^E5+6H_-1q#>P7SDuGc+*95Do%+sU`z0E(Df#{KoWh>B7K@pNa| z=38R{I#Hn~0})n>B$`X7{*0ARP3%zwv;AeJpsNvz+V<-xY3)gIL zgPbkDIC?JEBzDH8oOBnVX1V7eoYokmw0L%hXs(Wo&b$p@x}t2WjV0VcR0;&+%-SCrfg8t9v=*ECitnwrNhftNZbEAw9H zx+Novv}F@tcYh>~X9p5!1-rA|CRqU8Wxe`G&Bx{6Ua<&tUV25>;HLNL>#csgvyxX2 zB!4!uN)A&0GA)xf;!VKSYZYU@oVsMj3(r0jaQk-F z9Nas_(anbp4$N}eAo~g*V90_#6c851AktqXdfqQgJ>k$1Tp#6kJ0|2ed!$G$`psoU zq>5;l)*DnwJNt4%$wrc1*~8x;X^*oq?S!N$i_gZiHH7H+`_yf%bu~?0bn`lF)zeoP?OcK3Eu&0yV2y*FYZ831u;DYuQ z;@23#6fU{cjyqtwUvYN-{@sG=+n>4X{DC<0m?4^Vol>e2qfKkG)Am+|iq@>hGOIi- zmAIbe^bJ@2?$`P6X2;C)6}p+K_x2O!hxIfg^1nYQp8Y&y>m|?OG5dg{ZdNv?el#5| zW;O8E_ViKX;k3NrZao~la(3DCUM551y#^I+%ST}{f{0FOfs7+*Ap(&(MBXsfsOVU7 zhKc3Mg*XUmC z*)Gy&@PMz!?PzIhONWEEL_5B(gZPypZ+2_}OSP)zP#XbpT2ano1B_;=(a}}5`z)Pd z$$R`x=3=Es{1x{`TFtxl7kh$=XVPzrfwM1o3J(?;&{~@JhgW=EB}(T!+mhD-zR9NI zB$}mhD52v7K}={n6yP5h*96+j{!|>E=~AqCm5shhEPBSG-0!q>i9a6OO z=%{E%4*|;69t|HZS;GvYhT`dtHc0b?78Tu{#=O6`TnBwV$w)&qhs)UFP)@b8$;v*7JJ~gRVD8 z3d(CR?0kG`qF~rQ&ouBVOAqzC52w1}(N3?JZi#BA92%DEXineQqj!>xCK(-JPfB1; zcW2w(OIiiT=%PjZgC1Pk4LAHDK`cJzh?wV~k)IRMsWjlH5 zp`}Wazu7oA{EGQaO(^Bf=o&k=@6(l{ZufP!gkdB}Z7E#i{bJXn`vPBOfAxHdAF9N> z8dS$H*~{mICq5(s3V>|@u=CNL;@4ME#?9+1v%q)x)JBCKO?a%y@+blQ1!JpoX;aYz z%Dg2@C6K8kvkk$BnnyNcWvbLQIj@?M3>1KSB}3SIN0tM{k!j28L^PY8-pB=~Gf04B zZS^EeUGl1+WrOKJ9A)OOLB+_-z)l=0W!{fGOp*cH5g|->ircbIwVB)H@HU-(G^~Yx z4fiN0D2Cc83=0abTlMsP(9 zL3sAzg7B;OQuy#YUig%T!AyO!bhD#!NvQj_0rhT==k;Dp@vO4gjK}5ncV=@m#IM?i zqqipf_`mse@M8+4EB3>-$`*$-D;CQs9`%$J8z9UYX9(ChE_pxXI2*mMrKz~8od1kS zSohLUl17Zh`bxKx>B)KFGPC=%VijUq;czgepapcbTP>aQ#2)lfq()5Z8{L?aNwqaW zJOfRYYV(NCpxamSL+FOFyX_v#ea|~)Ij(zq`89fbIZ(vshaTTJ-QlT*$3wd3{V z@uhAhSt@Q4_|_ov8ywu|x0vss+XmdAj8m%kJ}vEiRX3&3q>t`=Zzw(M>L7FcL*L$8 zve5Aq?%g zHFG9WSG)5oL;a#^k8Lr-b2Z7ki0Y6`3)@dmqi&?SnCBY4)(DRwnp^I|cJG}vfh&r+ z75qF#5gsKqG8{>#Cr3l@vt#qWZsxM9@9*1?0V!=7c$d*>ZHyPa-xYP+X(3FA{WeuW+T$Lw1z!p~0*chxhb3G*}ylWNiPd#6gOBk85$#QMAV>IZC&;8tR`2yj~pV?SSR|bRBj7jo~@^Y_WKM}Yo zcFyu^4@Yq(@ReaQ$0l=E!4p3=aa5%RHo5jzO~H61ixknuq^Yl=Mh1M6`}$Xp^MfC9 z)evp{vffuqZD6Suv~OZoYTEMra8j`kq~2o&_Q^}iyY_#YIrn!q(=?8&wwq<9j$=nz zlu}ybP}P>5q(!MJs&yVE4ymnnw~B~yrr6S;L6}ZEMpUUvDN(0JNwiX@Ss_a55HvxE zX(9xPL^IFZy{`Qm=BHfuZ}mdG*klIxN(4%{i!n!z>ikb6gxKOpH>! z9D?qN`u;bZW2(0n)ak`sXgOTTeNd;13sB)KuE=cBSiu=iKqF;~tB+Zdc!x?=puz9Q zpZ)eC|LZ;c4|c?Ny90 z_pHn@A z^N7!*J~V|#Bthl__0C?l5MZE2TNi^9KmNxM;ZLK{fYw<83u zVZa)ln_#_8I>Cw?vJfpl!7)I3VHSqe7RsWZpPZZ&0Wo}(p1ySggeQcrAqu4zv~D~s zCUsYRvXM@jClmOr&M`syc4rZ@j&KL>@`Bs;MoH2fIg};nIllNo(bt6}kJv+1e^;pM z3K~Aq@wnslqCyu3V*ZXXMI2g`2S@Wdv&LL$o!0 z8#X(VsY-5qy{-&&?;b{=TpF{27qv;4&%Ln;T(pmCKb>MU( z|N0YfI_9MfQE>QgI4Q(w3zw^w2UpFfEMF43sREU$QoDdQ-b1XTCD_$_PRK_vC^wu@{9*U}2aC z160Yf^&y&|WI?(FbQZt}8>Z_#c#a$AzDuK02n2Lv329a=p6|(K$A*U+?k=6CC{56& zJ_kxAQ~zo_uoWZ}>~1z@=+*nQUu7>AV@XJx&JeA%gqMQD1lQ!>Zsn!)*g2lss&5AF zu=-Un(0Ztig56)XErXL+Y7Aj4!e>!!56MC{BiM#91AycIrt4y=+QJxOeZC*&d+CUc zjmAq$%=Q`}!#Rxe~F>~vw$3rIoU4->FOZlB%1r6aOm7L!zQuyk% zd1BGG1QnYJikO`*0>I7~2X{6)(ZybWJM!<@G*m)?Ql792nfP%(c#!^lM8>>0wy zSS$-*fcHrjeVk4WsI~|p+wvSM@Z>0r7J}O+n=MEQvr{UIxJB8iI6s3j+154fkV&ee z8uQzNZ6^9&hL9>uch20@;}~P+sdq%rLwApL6rcuC6a`|{8bwhdrW6>|p-@CXQ6z|+ ztOW6k{&0WQ1Rh$S`dgPW+<#fZC8*YcTu1V9`25)Y;u!iUlz*A4qNjlfgaC~GpLHZi zcsduLR7#c)g4&}!L4b^U7b+EthfTYBQ_|DESVOON4vMk1-NccOU}RyRo4ULrP|2a5 zEBhIC9qrURP%lLHPNi=m3PJ9k16rTbi)spLiBhl6-y%m1nV{0((^{)ZRf4xt&sfS&-wcm8p=vbM>d(ER_QvixA$zftgQ#aoZju7kzgJ9 z4fEMuq>8>%!VQoRri;7+?l$~IwmHm-ylfQfQXb3O2gXRqHPHQvLpocib5f($?luUs znF5iMYpv&JHXb8gp7G`=8U1ngYmZVU?Hp|<>KbvItI(BWFx z*D5@E;?jk}TF)n4mslC^cFk6665GrtHk{{j`}VERMEx7wr{Y0C3d1XX9UsOpkM*!0 z-S-S@BsbZsPzn+r^&vZr$1dgM6vOTse4pUapqTrQWYMz=ug5b~6o|?oMnFw#+XGli zo*}_U6OSqL9kYmcYQ^)jKAA)yHG&s!Caow8(&_`h{&SSS%lFz2(> (d + 1); - if (index < n) { - int idx1 = index * stride + stride - 1;; + if (index < numNodes) { + int idx1 = index * stride + stride - 1; int idx2 = idx1 - (stride >> 1); idata[idx1] += idata[idx2]; } @@ -38,8 +44,9 @@ namespace StreamCompaction { __global__ void kernDownsweep(int n, int d, int* idata) { int index = (blockIdx.x * blockDim.x) + threadIdx.x; int stride = 1 << (d + 1); // 2^(d+1) + int numNodes = n >> (d + 1); - if (index < n) { + if (index < numNodes) { int idx1 = index * stride + stride - 1; int idx2 = idx1 - (stride >> 1); @@ -81,7 +88,6 @@ namespace StreamCompaction { kernUpsweep << > > (newN, d, dev_arr); checkCUDAError("kernUpsweep failed!"); - cudaDeviceSynchronize(); } // downsweep @@ -95,7 +101,6 @@ namespace StreamCompaction { kernDownsweep << > > (newN, d, dev_arr); checkCUDAError("kernDownsweep failed!"); - cudaDeviceSynchronize(); } cudaDeviceSynchronize(); @@ -134,13 +139,12 @@ namespace StreamCompaction { dim3 fullBlocksPerGrid((numNodes + blockSize - 1) / blockSize); - kernUpsweep << > > (numNodes, d, dev_arr); + kernUpsweep << > > (newN, d, dev_arr); checkCUDAError("kernUpsweep failed!"); - cudaDeviceSynchronize(); } - cudaMemset(dev_arr + (newN - 1), 0, sizeof(int)); + kernZeroLast << <1, 1 >> > (newN, dev_arr); // downsweep for (int d = ilog2ceil(n) - 1; d >= 0; d--) { @@ -148,10 +152,9 @@ namespace StreamCompaction { dim3 fullBlocksPerGrid((numNodes + blockSize - 1) / blockSize); - kernDownsweep << > > (numNodes, d, dev_arr); + kernDownsweep << > > (newN, d, dev_arr); checkCUDAError("kernDownsweep failed!"); - cudaDeviceSynchronize(); } cudaDeviceSynchronize(); diff --git a/stream_compaction/thrust.cu b/stream_compaction/thrust.cu index 47a59633..96c16163 100644 --- a/stream_compaction/thrust.cu +++ b/stream_compaction/thrust.cu @@ -25,10 +25,10 @@ namespace StreamCompaction { if (n > 0) { // copy input from host pointer to host vector - thrust::host_vector host_iData(idata, idata + n); + //thrust::host_vector host_iData(idata, idata + n); // cast as device vector - thrust::device_vector dev_iData = host_iData; + thrust::device_vector dev_iData(idata, idata + n); // device output vector thrust::device_vector dev_oData(n); From 3ad98f40ae9a5cfa05ed50ae60b83824836f02e0 Mon Sep 17 00:00:00 2001 From: Rachel Lin <43388455+RachelDLin@users.noreply.github.com> Date: Wed, 17 Sep 2025 23:04:04 -0400 Subject: [PATCH 09/17] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index f86dec77..4807bb74 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,8 @@ This project offers parallel scan and stream compaction algorithms in CUDA. Feat ### Comparison of GPU Scan Implementations + + ### Performance Bottlenecks From a3de879197cfbf6c2a5642d65971849696ffe49d Mon Sep 17 00:00:00 2001 From: Rachel Lin <43388455+RachelDLin@users.noreply.github.com> Date: Wed, 17 Sep 2025 23:04:16 -0400 Subject: [PATCH 10/17] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4807bb74..a388c7ff 100644 --- a/README.md +++ b/README.md @@ -26,8 +26,8 @@ This project offers parallel scan and stream compaction algorithms in CUDA. Feat ### Comparison of GPU Scan Implementations - - + + ### Performance Bottlenecks From 4279eca07d8149600cf75e87f2396f5697efa68f Mon Sep 17 00:00:00 2001 From: Rachel Lin <43388455+RachelDLin@users.noreply.github.com> Date: Wed, 17 Sep 2025 23:09:05 -0400 Subject: [PATCH 11/17] Update README.md --- README.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/README.md b/README.md index a388c7ff..b8bb83b6 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,27 @@ This project offers parallel scan and stream compaction algorithms in CUDA. Feat +#### Power of Two +| Array Size | CPU Naive | Work-Efficient | Thrust | +| --------- | --------- | --------- | --------- | +| 256 | 0.0006 | 0.1969493333 | 0.2730666667 | 0.1140053333 | +| 1024 | 0.0018 | 0.2085546667 | 0.41984 | 0.1235733333 | +| 16384 | 0.0284 | 0.254976 | 0.9103373333 | 0.1314133333 | +| 131072 | 0.2284333333 | 0.6356693333 | 0.7360333333 | 0.130048 | +| 1048576 | 1.737733333 | 1.173386667 | 1.053409333 | 0.759808 | +| 4194304 | 7.669166667 | 4.442293333 | 2.438283333 | 0.8376226667 | +| 16777216 | 27.9746 | 13.6956 | 6.76686 | 1.431213333 | + +#### Non-Power of Two +| Array Size | CPU | Naive | Work-Efficient | Thrust | +| --------- | --------- | --------- | --------- | +| 253 | 0.0005 | 0.06144 | 0.2095786667 | 0.05768533333 | +| 1021 | 0.0019 | 0.1505493333 | 0.2740906667 | 0.05290666667 | +| 16383 | 0.03276666667 | 0.1723733333 | 0.372736 | 0.05563733333 | +| 131069 | 0.2283666667 | 0.6361066667 | 0.5046293333 | 0.045056 | +| 1048573 | 2.531466667 | 1.062026667 | 0.9203946667 | 0.75264 | +| 4194301 | 7.606633333 | 5.455966667 | 3.874946667 | 1.58338 | +| 16777213 | 28.6349 | 13.5973 | 6.175863333 | 1.657173333 | ### Performance Bottlenecks From 4f400881f61850682591051114766f198af63c05 Mon Sep 17 00:00:00 2001 From: Rachel Lin <43388455+RachelDLin@users.noreply.github.com> Date: Wed, 17 Sep 2025 23:09:35 -0400 Subject: [PATCH 12/17] Update README.md --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index b8bb83b6..06d4a65f 100644 --- a/README.md +++ b/README.md @@ -41,14 +41,14 @@ This project offers parallel scan and stream compaction algorithms in CUDA. Feat | 16777216 | 27.9746 | 13.6956 | 6.76686 | 1.431213333 | #### Non-Power of Two -| Array Size | CPU | Naive | Work-Efficient | Thrust | -| --------- | --------- | --------- | --------- | -| 253 | 0.0005 | 0.06144 | 0.2095786667 | 0.05768533333 | -| 1021 | 0.0019 | 0.1505493333 | 0.2740906667 | 0.05290666667 | -| 16383 | 0.03276666667 | 0.1723733333 | 0.372736 | 0.05563733333 | -| 131069 | 0.2283666667 | 0.6361066667 | 0.5046293333 | 0.045056 | -| 1048573 | 2.531466667 | 1.062026667 | 0.9203946667 | 0.75264 | -| 4194301 | 7.606633333 | 5.455966667 | 3.874946667 | 1.58338 | +| Array Size | CPU | Naive | Work-Efficient | Thrust | +| --------- | --------- | --------- | --------- | +| 253 | 0.0005 | 0.06144 | 0.2095786667 | 0.05768533333 | +| 1021 | 0.0019 | 0.1505493333 | 0.2740906667 | 0.05290666667 | +| 16383 | 0.03276666667 | 0.1723733333 | 0.372736 | 0.05563733333 | +| 131069 | 0.2283666667 | 0.6361066667 | 0.5046293333 | 0.045056 | +| 1048573 | 2.531466667 | 1.062026667 | 0.9203946667 | 0.75264 | +| 4194301 | 7.606633333 | 5.455966667 | 3.874946667 | 1.58338 | | 16777213 | 28.6349 | 13.5973 | 6.175863333 | 1.657173333 | ### Performance Bottlenecks From 45fcfaee58df5506eb673a608afd2e3c1d18ba2d Mon Sep 17 00:00:00 2001 From: Rachel Lin <43388455+RachelDLin@users.noreply.github.com> Date: Wed, 17 Sep 2025 23:10:26 -0400 Subject: [PATCH 13/17] Update README.md --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 06d4a65f..e2f627c8 100644 --- a/README.md +++ b/README.md @@ -30,8 +30,9 @@ This project offers parallel scan and stream compaction algorithms in CUDA. Feat #### Power of Two + | Array Size | CPU Naive | Work-Efficient | Thrust | -| --------- | --------- | --------- | --------- | +| --------- | --------- | --------- | --------- | --------- | | 256 | 0.0006 | 0.1969493333 | 0.2730666667 | 0.1140053333 | | 1024 | 0.0018 | 0.2085546667 | 0.41984 | 0.1235733333 | | 16384 | 0.0284 | 0.254976 | 0.9103373333 | 0.1314133333 | @@ -41,8 +42,9 @@ This project offers parallel scan and stream compaction algorithms in CUDA. Feat | 16777216 | 27.9746 | 13.6956 | 6.76686 | 1.431213333 | #### Non-Power of Two + | Array Size | CPU | Naive | Work-Efficient | Thrust | -| --------- | --------- | --------- | --------- | +| --------- | --------- | --------- | --------- | --------- | | 253 | 0.0005 | 0.06144 | 0.2095786667 | 0.05768533333 | | 1021 | 0.0019 | 0.1505493333 | 0.2740906667 | 0.05290666667 | | 16383 | 0.03276666667 | 0.1723733333 | 0.372736 | 0.05563733333 | From 96851107e22668d05d67c47877004aacb45a60c6 Mon Sep 17 00:00:00 2001 From: Rachel Lin <43388455+RachelDLin@users.noreply.github.com> Date: Wed, 17 Sep 2025 23:10:47 -0400 Subject: [PATCH 14/17] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e2f627c8..6ddb1aad 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ This project offers parallel scan and stream compaction algorithms in CUDA. Feat #### Power of Two -| Array Size | CPU Naive | Work-Efficient | Thrust | +| Array Size | CPU | Naive | Work-Efficient | Thrust | | --------- | --------- | --------- | --------- | --------- | | 256 | 0.0006 | 0.1969493333 | 0.2730666667 | 0.1140053333 | | 1024 | 0.0018 | 0.2085546667 | 0.41984 | 0.1235733333 | From e65e2182194f78a957ad647c25d4aa0d03abc9f2 Mon Sep 17 00:00:00 2001 From: Rachel Lin <43388455+RachelDLin@users.noreply.github.com> Date: Wed, 17 Sep 2025 23:11:30 -0400 Subject: [PATCH 15/17] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6ddb1aad..b30d1a71 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ This project offers parallel scan and stream compaction algorithms in CUDA. Feat -#### Power of Two +#### Average Scan Time vs. Array Size (Power of Two) | Array Size | CPU | Naive | Work-Efficient | Thrust | | --------- | --------- | --------- | --------- | --------- | @@ -41,7 +41,7 @@ This project offers parallel scan and stream compaction algorithms in CUDA. Feat | 4194304 | 7.669166667 | 4.442293333 | 2.438283333 | 0.8376226667 | | 16777216 | 27.9746 | 13.6956 | 6.76686 | 1.431213333 | -#### Non-Power of Two +#### Average Scan Time vs. Array Size (Non-Power of Two) | Array Size | CPU | Naive | Work-Efficient | Thrust | | --------- | --------- | --------- | --------- | --------- | From c8e9b24c02397f6736ceaa0334332eccf03247b8 Mon Sep 17 00:00:00 2001 From: Rachel Lin <43388455+RachelDLin@users.noreply.github.com> Date: Wed, 17 Sep 2025 23:12:37 -0400 Subject: [PATCH 16/17] Update README.md --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index b30d1a71..bd1be6e6 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,19 @@ This project offers parallel scan and stream compaction algorithms in CUDA. Feat | 4194301 | 7.606633333 | 5.455966667 | 3.874946667 | 1.58338 | | 16777213 | 28.6349 | 13.5973 | 6.175863333 | 1.657173333 | +#### CPU + + +#### Naive + + +#### Work-Efficient + + +#### Thrust + + + ### Performance Bottlenecks From a047484513e32879bc29493907a647597efd9ea6 Mon Sep 17 00:00:00 2001 From: Rachel Lin <43388455+RachelDLin@users.noreply.github.com> Date: Wed, 17 Sep 2025 23:58:33 -0400 Subject: [PATCH 17/17] Update README.md --- README.md | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index bd1be6e6..823ec874 100644 --- a/README.md +++ b/README.md @@ -54,19 +54,16 @@ This project offers parallel scan and stream compaction algorithms in CUDA. Feat | 16777213 | 28.6349 | 13.5973 | 6.175863333 | 1.657173333 | #### CPU - +This implementation does not involve the GPU at all and is purely single-threaded. This makes it faster for small arrays because no kernel launch is required. However, this algorithm scales poorly with array size compared to the other implementations because it does not take advantage of the multi-threaded approach that the other algorithms do. This approach faces bottlenecks in both memory and computation (it becomes slower as the array size gets large). #### Naive - +This algorithm requires two arrays that are swapped every iteration to avoid race conditions. Since it performs computations in parallel, it scales relatively well compared to the CPU approach. This algorithm is not as optimized as it could be; every iteration, all threads with index less than the stride value are idle. However, the most significant bottleneck comes from the kernel-launch overhead (there are log_2(n) kernels) the redundant computations where some threads re-add elements from the input array. #### Work-Efficient - +The work-efficient algorithm uses up-sweep to build a sum tree and down-sweep to distribute prefix sums. Sine it opertes in-place, this saves memory. This approach also takes advantage of parallelism on the GPU and uses log_2(n) kernel launches, but each kernel does less extra work because there are fewer redundant computations. This approach still faces a bottleneck through the kernel-launch overhead (still log_2(n) kernels for upsweep and downsweep). #### Thrust - - - -### Performance Bottlenecks +The thrust approach is very fast on large arrays. It may be using shared memory or minimizing the number of idle threads to further optimize the algorithm.