aisrael · aisrael · Jul 1, 2026 · Jul 1, 2026 · Jul 1, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,13 @@
 # datu Version Notes
 
+## Unreleased
+
+### Improvements
+
+- **REPL**
+  - `select()` supports column/aggregate aliasing via `name: value` (and quoted `"name with space": value`) keyword syntax, e.g. `select(:foo, foo_bar: :bar, total: sum(:qty))`. Works for plain projections, global aggregates, and grouped aggregates (including `group_by` keys), and for ORC's plain-column select path.
+  - `group_by()` keys can also carry their own alias (e.g. `group_by(key: :foo)`), which becomes the default output name for that key; a matching `select()` alias still takes precedence when present. `select()` may refer to the key by its underlying column or by the `group_by()` alias itself (e.g. `group_by(key: :foo) |> select(:key, total: sum(:qty))`).
+
 ## v0.3.6
 
 ### Highlights

diff --git a/docs/REPL.md b/docs/REPL.md
@@ -225,6 +225,44 @@ If `group_by()` is present but `select()` lists only key columns (no aggregates)
 
 `warning: group_by() with no aggregates in select(); showing distinct group keys only (behavior may change)`
 
+#### Aliasing
+
+Any `select()` argument—plain column or aggregate—can be given an output name using `name: value` keyword-argument syntax. This relabels the corresponding output column without changing which input column (or aggregate) is used:
+
+```flt
+read("input.avro") |> group_by(:foo, :bar) |> select(:foo, foo_bar: :bar, total: sum(:qty))
+```
+
+Here, `:foo` keeps its own name, `:bar` is renamed to `foo_bar`, and `sum(:qty)` is renamed to `total`.
+
+If the desired output name isn't a valid bare identifier (for example, it contains a space), quote it:
+
+```flt
+read("input.avro") |> select(:foo, "foo bar": :bar)
+```
+
+Aliasing works the same way for plain projections, global aggregates, and grouped aggregates (including group keys named in `group_by()`).
+
+`group_by()` keys can also carry their own alias, using the same `name: value` / `"quoted name": value` syntax:
+
+```flt
+read("input.avro") |> group_by(key: :foo) |> select(:foo, total: sum(:qty))
+```
+
+`group_by()`'s alias sets the *default* output name for the key (`key` in this example), and `select()` may refer to that key either by its underlying column (`:foo`) or by the alias itself (`:key`)—both forms are equivalent:
+
+```flt
+read("input.avro") |> group_by(key: :foo) |> select(:key, total: sum(:qty))
+# equivalent to: select(:foo, total: sum(:qty))
+```
+
+If `select()` also gives that same column its own alias, `select()`'s alias wins, regardless of whether `select()` referred to the key by its underlying column or by `group_by()`'s alias:
+
+```flt
+read("input.avro") |> group_by(from_group_by: :foo) |> select(from_select: :foo, total: sum(:qty))
+# output column is "from_select", not "from_group_by"
+```
+
 ### Data preview (`head`, `tail`, and `sample`)
 
 `head`, `tail`, and `sample` can either be used after a `read() |> ` expression, or, by themselves by providing the path as the first argument.

diff --git a/features/repl/select.feature b/features/repl/select.feature
@@ -122,3 +122,87 @@ Feature: Select
       ```
     Then the file "$TEMPDIR/select.parquet" should exist
     And that file should be a valid Parquet file
+
+  Scenario: Select with an aliased plain column
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> select(:two, three_alias: :three) |> write("$TEMPDIR/select_alias.csv")
+      ```
+    Then the file "$TEMPDIR/select_alias.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should be: "two,three_alias"
+    And that file should have 4 lines
+
+  Scenario: Select with an aliased aggregate under group_by
+    Given a Parquet file with the following data:
+      ```
+      item_id,quantity
+      1,10
+      1,20
+      2,5
+      ```
+    When the REPL is ran and the user types:
+      ```
+      read("$TEMPDIR/input.parquet") |> group_by(:item_id) |> select(:item_id, total: sum(:quantity)) |> write("$TEMPDIR/select_alias_agg.csv")
+      ```
+    Then the file "$TEMPDIR/select_alias_agg.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should be: "item_id,total"
+
+  Scenario: Select with a quoted alias key containing a space
+    When the REPL is ran and the user types:
+      ```
+      read("fixtures/table.parquet") |> select(:two, "three alias": :three) |> write("$TEMPDIR/select_alias_quoted.csv")
+      ```
+    Then the file "$TEMPDIR/select_alias_quoted.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should be: "two,three alias"
+    And that file should have 4 lines
+
+  Scenario: group_by aliases the group key when select does not override it
+    Given a Parquet file with the following data:
+      ```
+      item_id,quantity
+      1,10
+      1,20
+      2,5
+      ```
+    When the REPL is ran and the user types:
+      ```
+      read("$TEMPDIR/input.parquet") |> group_by(key: :item_id) |> select(:item_id, total: sum(:quantity)) |> write("$TEMPDIR/group_by_alias.csv")
+      ```
+    Then the file "$TEMPDIR/group_by_alias.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should be: "key,total"
+
+  Scenario: select's own alias for a group key overrides group_by's alias
+    Given a Parquet file with the following data:
+      ```
+      item_id,quantity
+      1,10
+      1,20
+      2,5
+      ```
+    When the REPL is ran and the user types:
+      ```
+      read("$TEMPDIR/input.parquet") |> group_by(from_group_by: :item_id) |> select(from_select: :item_id, total: sum(:quantity)) |> write("$TEMPDIR/group_by_alias_override.csv")
+      ```
+    Then the file "$TEMPDIR/group_by_alias_override.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should be: "from_select,total"
+
+  Scenario: select references a group_by key by its alias instead of the underlying column
+    Given a Parquet file with the following data:
+      ```
+      item_id,quantity
+      1,10
+      1,20
+      2,5
+      ```
+    When the REPL is ran and the user types:
+      ```
+      read("$TEMPDIR/input.parquet") |> group_by(key: :item_id) |> select(:key, total: sum(:quantity)) |> write("$TEMPDIR/group_by_alias_reference.csv")
+      ```
+    Then the file "$TEMPDIR/group_by_alias_reference.csv" should exist
+    And that file should be a CSV file
+    And the first line of that file should be: "key,total"
diff --git a/src/cli/repl/builder_bridge.rs b/src/cli/repl/builder_bridge.rs
@@ -1,7 +1,7 @@
 //! Maps validated REPL stages to [`crate::pipeline::PipelineBuilder`].
 
 use super::stage::ReplPipelineStage;
-use crate::pipeline::ColumnSpec;
+use crate::pipeline::GroupByKey;
 use crate::pipeline::PipelineBuilder;
 use crate::pipeline::SelectItem;
 use crate::pipeline::SelectSpec;
@@ -29,7 +29,7 @@ pub(crate) fn repl_stages_to_pipeline_builder(
 
     let mut i = 1usize;
     let mut select_idx: Option<usize> = None;
-    let mut group_keys: Option<Vec<ColumnSpec>> = None;
+    let mut group_keys: Option<Vec<GroupByKey>> = None;
     let mut select_columns: Option<Vec<SelectItem>> = None;
     let mut filters: Vec<(usize, String)> = Vec::new();
 

diff --git a/src/cli/repl/mod.rs b/src/cli/repl/mod.rs
@@ -10,6 +10,7 @@ pub use stage::ReplPipelineStage;
 
 /// Column selection in REPL expressions (re-export of [`crate::pipeline::ColumnSpec`]).
 pub use crate::pipeline::ColumnSpec;
+pub use crate::pipeline::GroupByKey;
 pub use crate::pipeline::SelectItem;
 
 #[cfg(test)]