Skip to content

Commit 1c091b1

Browse files
committed
Add a new AggregateFn for UncompressedSize stat
Signed-off-by: Adam Gutglick <adam@spiraldb.com>
1 parent d2d79f0 commit 1c091b1

15 files changed

Lines changed: 930 additions & 14 deletions

File tree

vortex-array/public-api.lock

Lines changed: 88 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -728,6 +728,56 @@ pub struct vortex_array::aggregate_fn::fns::sum::SumPartial
728728

729729
pub fn vortex_array::aggregate_fn::fns::sum::sum(array: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::scalar::Scalar>
730730

731+
pub mod vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes
732+
733+
pub struct vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes
734+
735+
impl core::clone::Clone for vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes
736+
737+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::clone(&self) -> vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes
738+
739+
impl core::fmt::Debug for vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes
740+
741+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
742+
743+
impl vortex_array::aggregate_fn::AggregateFnVTable for vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes
744+
745+
pub type vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::Options = vortex_array::aggregate_fn::EmptyOptions
746+
747+
pub type vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::Partial = u64
748+
749+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::accumulate(&self, partial: &mut Self::Partial, batch: &vortex_array::Columnar, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<()>
750+
751+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::coerce_args(&self, options: &Self::Options, input_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<vortex_array::dtype::DType>
752+
753+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::combine_partials(&self, partial: &mut Self::Partial, other: vortex_array::scalar::Scalar) -> vortex_error::VortexResult<()>
754+
755+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::deserialize(&self, _metadata: &[u8], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Options>
756+
757+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::empty_partial(&self, _options: &Self::Options, _input_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<Self::Partial>
758+
759+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::finalize(&self, partials: vortex_array::ArrayRef) -> vortex_error::VortexResult<vortex_array::ArrayRef>
760+
761+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::finalize_scalar(&self, partial: &Self::Partial) -> vortex_error::VortexResult<vortex_array::scalar::Scalar>
762+
763+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::id(&self) -> vortex_array::aggregate_fn::AggregateFnId
764+
765+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::is_saturated(&self, _partial: &Self::Partial) -> bool
766+
767+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::partial_dtype(&self, options: &Self::Options, input_dtype: &vortex_array::dtype::DType) -> core::option::Option<vortex_array::dtype::DType>
768+
769+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::reset(&self, partial: &mut Self::Partial)
770+
771+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::return_dtype(&self, _options: &Self::Options, _input_dtype: &vortex_array::dtype::DType) -> core::option::Option<vortex_array::dtype::DType>
772+
773+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::serialize(&self, _options: &Self::Options) -> vortex_error::VortexResult<core::option::Option<alloc::vec::Vec<u8>>>
774+
775+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::to_scalar(&self, partial: &Self::Partial) -> vortex_error::VortexResult<vortex_array::scalar::Scalar>
776+
777+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::try_accumulate(&self, partial: &mut Self::Partial, batch: &vortex_array::ArrayRef, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<bool>
778+
779+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::uncompressed_size_in_bytes(array: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<usize>
780+
731781
pub mod vortex_array::aggregate_fn::kernels
732782

733783
pub trait vortex_array::aggregate_fn::kernels::DynAggregateKernel: 'static + core::marker::Send + core::marker::Sync + core::fmt::Debug
@@ -1264,6 +1314,42 @@ pub fn vortex_array::aggregate_fn::fns::sum::Sum::to_scalar(&self, partial: &Sel
12641314

12651315
pub fn vortex_array::aggregate_fn::fns::sum::Sum::try_accumulate(&self, _state: &mut Self::Partial, _batch: &vortex_array::ArrayRef, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<bool>
12661316

1317+
impl vortex_array::aggregate_fn::AggregateFnVTable for vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes
1318+
1319+
pub type vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::Options = vortex_array::aggregate_fn::EmptyOptions
1320+
1321+
pub type vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::Partial = u64
1322+
1323+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::accumulate(&self, partial: &mut Self::Partial, batch: &vortex_array::Columnar, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<()>
1324+
1325+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::coerce_args(&self, options: &Self::Options, input_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<vortex_array::dtype::DType>
1326+
1327+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::combine_partials(&self, partial: &mut Self::Partial, other: vortex_array::scalar::Scalar) -> vortex_error::VortexResult<()>
1328+
1329+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::deserialize(&self, _metadata: &[u8], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Options>
1330+
1331+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::empty_partial(&self, _options: &Self::Options, _input_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<Self::Partial>
1332+
1333+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::finalize(&self, partials: vortex_array::ArrayRef) -> vortex_error::VortexResult<vortex_array::ArrayRef>
1334+
1335+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::finalize_scalar(&self, partial: &Self::Partial) -> vortex_error::VortexResult<vortex_array::scalar::Scalar>
1336+
1337+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::id(&self) -> vortex_array::aggregate_fn::AggregateFnId
1338+
1339+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::is_saturated(&self, _partial: &Self::Partial) -> bool
1340+
1341+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::partial_dtype(&self, options: &Self::Options, input_dtype: &vortex_array::dtype::DType) -> core::option::Option<vortex_array::dtype::DType>
1342+
1343+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::reset(&self, partial: &mut Self::Partial)
1344+
1345+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::return_dtype(&self, _options: &Self::Options, _input_dtype: &vortex_array::dtype::DType) -> core::option::Option<vortex_array::dtype::DType>
1346+
1347+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::serialize(&self, _options: &Self::Options) -> vortex_error::VortexResult<core::option::Option<alloc::vec::Vec<u8>>>
1348+
1349+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::to_scalar(&self, partial: &Self::Partial) -> vortex_error::VortexResult<vortex_array::scalar::Scalar>
1350+
1351+
pub fn vortex_array::aggregate_fn::fns::uncompressed_size_in_bytes::UncompressedSizeInBytes::try_accumulate(&self, partial: &mut Self::Partial, batch: &vortex_array::ArrayRef, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<bool>
1352+
12671353
impl<T: vortex_array::aggregate_fn::combined::BinaryCombined> vortex_array::aggregate_fn::AggregateFnVTable for vortex_array::aggregate_fn::combined::Combined<T>
12681354

12691355
pub type vortex_array::aggregate_fn::combined::Combined<T>::Options = vortex_array::aggregate_fn::combined::PairOptions<<<T as vortex_array::aggregate_fn::combined::BinaryCombined>::Left as vortex_array::aggregate_fn::AggregateFnVTable>::Options, <<T as vortex_array::aggregate_fn::combined::BinaryCombined>::Right as vortex_array::aggregate_fn::AggregateFnVTable>::Options>
@@ -17604,7 +17690,7 @@ pub fn vortex_array::scalar_fn::internal::row_count::RowCount::child_name(&self,
1760417690

1760517691
pub fn vortex_array::scalar_fn::internal::row_count::RowCount::coerce_args(&self, options: &Self::Options, args: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult<alloc::vec::Vec<vortex_array::dtype::DType>>
1760617692

17607-
pub fn vortex_array::scalar_fn::internal::row_count::RowCount::deserialize(&self, _metadata: &[u8], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Options>
17693+
pub fn vortex_array::scalar_fn::internal::row_count::RowCount::deserialize(&self, metadata: &[u8], session: &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Options>
1760817694

1760917695
pub fn vortex_array::scalar_fn::internal::row_count::RowCount::execute(&self, _options: &Self::Options, _args: &dyn vortex_array::scalar_fn::ExecutionArgs, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::ArrayRef>
1761017696

@@ -18800,7 +18886,7 @@ pub fn vortex_array::scalar_fn::internal::row_count::RowCount::child_name(&self,
1880018886

1880118887
pub fn vortex_array::scalar_fn::internal::row_count::RowCount::coerce_args(&self, options: &Self::Options, args: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult<alloc::vec::Vec<vortex_array::dtype::DType>>
1880218888

18803-
pub fn vortex_array::scalar_fn::internal::row_count::RowCount::deserialize(&self, _metadata: &[u8], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Options>
18889+
pub fn vortex_array::scalar_fn::internal::row_count::RowCount::deserialize(&self, metadata: &[u8], session: &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Options>
1880418890

1880518891
pub fn vortex_array::scalar_fn::internal::row_count::RowCount::execute(&self, _options: &Self::Options, _args: &dyn vortex_array::scalar_fn::ExecutionArgs, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::ArrayRef>
1880618892

vortex-array/src/aggregate_fn/fns/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ pub mod mean;
1010
pub mod min_max;
1111
pub mod nan_count;
1212
pub mod sum;
13+
pub mod uncompressed_size_in_bytes;
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use vortex_error::VortexResult;
5+
use vortex_error::vortex_err;
6+
7+
use crate::ExecutionCtx;
8+
use crate::arrays::BoolArray;
9+
10+
pub(super) fn bool_uncompressed_size_in_bytes(
11+
array: &BoolArray,
12+
ctx: &mut ExecutionCtx,
13+
) -> VortexResult<u64> {
14+
let value_size = super::packed_bit_buffer_size_in_bytes(array.len())?;
15+
let validity_size = super::validity_uncompressed_size_in_bytes(
16+
array
17+
.as_ref()
18+
.validity()?
19+
.execute_mask(array.as_ref().len(), ctx)?,
20+
)?;
21+
22+
value_size
23+
.checked_add(validity_size)
24+
.ok_or_else(|| vortex_err!("uncompressed size in bytes overflowed u64"))
25+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use vortex_error::VortexResult;
5+
use vortex_error::vortex_err;
6+
7+
use crate::ExecutionCtx;
8+
use crate::arrays::DecimalArray;
9+
use crate::arrays::decimal::DecimalArrayExt;
10+
use crate::dtype::DecimalType;
11+
12+
pub(super) fn decimal_uncompressed_size_in_bytes(
13+
array: &DecimalArray,
14+
ctx: &mut ExecutionCtx,
15+
) -> VortexResult<u64> {
16+
let value_size = u64::try_from(array.len())
17+
.map_err(|e| vortex_err!("Failed to convert decimal array length to u64: {e}"))?
18+
.checked_mul(
19+
u64::try_from(
20+
DecimalType::smallest_decimal_value_type(&array.decimal_dtype()).byte_width(),
21+
)
22+
.map_err(|e| vortex_err!("Failed to convert decimal byte width to u64: {e}"))?,
23+
)
24+
.ok_or_else(|| vortex_err!("uncompressed size in bytes overflowed u64"))?;
25+
let validity_size = super::validity_uncompressed_size_in_bytes(
26+
array
27+
.as_ref()
28+
.validity()?
29+
.execute_mask(array.as_ref().len(), ctx)?,
30+
)?;
31+
32+
value_size
33+
.checked_add(validity_size)
34+
.ok_or_else(|| vortex_err!("uncompressed size in bytes overflowed u64"))
35+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use vortex_error::VortexResult;
5+
6+
use crate::ExecutionCtx;
7+
use crate::arrays::ExtensionArray;
8+
use crate::arrays::extension::ExtensionArrayExt;
9+
10+
pub(super) fn extension_uncompressed_size_in_bytes(
11+
array: &ExtensionArray,
12+
ctx: &mut ExecutionCtx,
13+
) -> VortexResult<u64> {
14+
super::uncompressed_size_in_bytes_u64(array.storage_array(), ctx)
15+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use vortex_error::VortexResult;
5+
use vortex_error::vortex_err;
6+
7+
use crate::ExecutionCtx;
8+
use crate::arrays::FixedSizeListArray;
9+
use crate::arrays::fixed_size_list::FixedSizeListArrayExt;
10+
11+
pub(super) fn fixed_size_list_uncompressed_size_in_bytes(
12+
array: &FixedSizeListArray,
13+
ctx: &mut ExecutionCtx,
14+
) -> VortexResult<u64> {
15+
let elements_size = super::uncompressed_size_in_bytes_u64(array.elements(), ctx)?;
16+
let validity_size = super::validity_uncompressed_size_in_bytes(
17+
array
18+
.as_ref()
19+
.validity()?
20+
.execute_mask(array.as_ref().len(), ctx)?,
21+
)?;
22+
23+
elements_size
24+
.checked_add(validity_size)
25+
.ok_or_else(|| vortex_err!("uncompressed size in bytes overflowed u64"))
26+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use vortex_error::VortexResult;
5+
use vortex_error::vortex_err;
6+
7+
use crate::ExecutionCtx;
8+
use crate::arrays::ListViewArray;
9+
use crate::arrays::listview::ListViewArrayExt;
10+
use crate::arrays::listview::ListViewRebuildMode;
11+
12+
pub(super) fn list_uncompressed_size_in_bytes(
13+
array: &ListViewArray,
14+
ctx: &mut ExecutionCtx,
15+
) -> VortexResult<u64> {
16+
let mut size = if array.is_empty() {
17+
0
18+
} else {
19+
let rebuilt = array.rebuild(ListViewRebuildMode::MakeExact)?;
20+
super::uncompressed_size_in_bytes_u64(rebuilt.elements(), ctx)?
21+
};
22+
23+
let view_buffer_size = u64::try_from(array.len())
24+
.map_err(|e| vortex_err!("Failed to convert list array length to u64: {e}"))?
25+
.checked_mul(8)
26+
.ok_or_else(|| vortex_err!("uncompressed size in bytes overflowed u64"))?;
27+
28+
size = size
29+
.checked_add(view_buffer_size)
30+
.and_then(|size| size.checked_add(view_buffer_size))
31+
.ok_or_else(|| vortex_err!("uncompressed size in bytes overflowed u64"))?;
32+
size = size
33+
.checked_add(super::validity_uncompressed_size_in_bytes(
34+
array
35+
.as_ref()
36+
.validity()?
37+
.execute_mask(array.as_ref().len(), ctx)?,
38+
)?)
39+
.ok_or_else(|| vortex_err!("uncompressed size in bytes overflowed u64"))?;
40+
41+
Ok(size)
42+
}

0 commit comments

Comments
 (0)