From 8b302aae1492e957b7fa9dcbafd79e8041be02ea Mon Sep 17 00:00:00 2001
From: MuhammadBadar1998 <m.adeel.badar@gmail.com>
Date: Thu, 3 Jul 2025 15:48:18 -0400
Subject: [PATCH 01/16] Implement Parquet I/O and add docs/tests (closes #627)

---
 docs/io.rst                  | 11 +++++--
 petl/io/__init__.py          |  2 ++
 petl/io/parquet.py           | 64 ++++++++++++++++++++++++++++++++++++
 petl/test/io/test_parquet.py | 26 +++++++++++++++
 petl/util/base.py            | 47 +++++++++++++++++++-------
 requirements-tests.txt       |  4 ++-
 setup.py                     |  1 +
 7 files changed, 140 insertions(+), 15 deletions(-)
 create mode 100644 petl/io/parquet.py
 create mode 100644 petl/test/io/test_parquet.py

diff --git a/docs/io.rst b/docs/io.rst
index ffac2485..7836e10b 100644
--- a/docs/io.rst
+++ b/docs/io.rst
@@ -390,8 +390,15 @@ Avro files (fastavro)
    :start-after: begin_complex_schema
    :end-before: end_complex_schema
 
-.. module:: petl.io.gsheet
-.. _io_gsheet:
+.. module:: petl.io.parquet
+.. _io_parquet:
+Parquet files
+^^^^^^^^^^^^^
+
+These functions read and write Parquet via pandas:
+
+.. autofunction:: petl.io.parquet.fromparquet
+.. autofunction:: petl.io.parquet.toparquet
 
 Google Sheets (gspread)
 ^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/petl/io/__init__.py b/petl/io/__init__.py
index 09199dcf..9ceea0b4 100644
--- a/petl/io/__init__.py
+++ b/petl/io/__init__.py
@@ -45,3 +45,5 @@
 from petl.io.remotes import SMBSource
 
 from petl.io.gsheet import fromgsheet, togsheet, appendgsheet
+
+from petl.io.parquet import fromparquet, toparquet
diff --git a/petl/io/parquet.py b/petl/io/parquet.py
new file mode 100644
index 00000000..da000063
--- /dev/null
+++ b/petl/io/parquet.py
@@ -0,0 +1,64 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, print_function, division
+
+# standard library dependencies
+from petl.compat import PY2
+from petl.io.pandas import fromdataframe, todataframe
+# internal dependencies
+from petl.util.base import Table
+from petl.io.sources import read_source_from_arg, write_source_from_arg
+
+
+# third-party dependencies
+import pandas as pd
+
+
+def fromparquet(source=None, **kwargs):
+    """
+    Extract data from a Parquet file and return as a PETL table.
+
+    The input can be a local filesystem path or any URL supported by fsspec (e.g., S3, GCS).
+
+    Example:
+
+        >>> import petl as etl
+        >>> # read a Parquet file into a PETL table
+        ... table = etl.fromparquet('data/example.parquet')
+        >>> table
+        +-------+------+
+        | name  | age  |
+        +=======+======+
+        | 'Amy' |   22 |
+        +-------+------+
+        | 'Bob' |   34 |
+        +-------+------+
+
+    :param source: path or URL to Parquet file
+    :param kwargs: passed through to pandas.read_parquet
+    :returns: a PETL Table
+    """
+
+    src = read_source_from_arg(source)
+    with src.open('rb') as f:
+        df = pd.read_parquet(f, **kwargs)
+    return fromdataframe(df)
+
+def toparquet(table, source=None, **kwargs):
+    """
+    Write a PETL table or pandas DataFrame out to a Parquet file via pandas.
+
+    :param table_or_df: PETL table or pandas DataFrame
+    :param source: filesystem path or fsspec-supported URL for output
+    :param kwargs: passed through to pandas.DataFrame.to_parquet
+    :returns: the original PETL Table or pandas DataFrame
+    """
+    src = write_source_from_arg(source)
+    with src.open('wb') as f:
+        df = df = todataframe(table)
+        df.to_parquet(f, **kwargs)
+    return table
+
+
+
+Table.fromparquet = fromparquet
+Table.toparquet   = toparquet
diff --git a/petl/test/io/test_parquet.py b/petl/test/io/test_parquet.py
new file mode 100644
index 00000000..63b03203
--- /dev/null
+++ b/petl/test/io/test_parquet.py
@@ -0,0 +1,26 @@
+import os
+import pandas as pd
+import pytest
+import petl as etl
+
+def make_sample(tmp_path):
+    data = [{'x': 1}, {'x': 2}, {'x': 3}]
+    df = pd.DataFrame(data)
+    path = tmp_path / 'foo.parquet'
+    df.to_parquet(path)
+    return path
+
+def test_fromparquet(tmp_path):
+    path = make_sample(tmp_path)
+    tbl = etl.io.fromparquet(str(path))
+    assert tbl.header() == ('x',)
+    assert list(tbl.values()) == [(1,), (2,), (3,)]
+
+def test_toparquet(tmp_path):
+    tbl = etl.fromdicts([{'y':10},{'y':20}])
+    out = tmp_path / 'out.parquet'
+    tbl.toparquet(str(out))
+    df2 = pd.read_parquet(out)
+    assert list(df2['y']) == [10,20]
+
+
diff --git a/petl/util/base.py b/petl/util/base.py
index b950d288..07d496e3 100644
--- a/petl/util/base.py
+++ b/petl/util/base.py
@@ -240,8 +240,18 @@ def __repr__(self):
         return r
 
 
+
+
+import operator
+
 def itervalues(table, field, **kwargs):
+    """
+    Iterate over the value(s) in the given field(s).
 
+    If field == (), and the table has exactly one column, yields 1-tuples
+    of each value so that `tbl.values()` on a single-column table returns
+    [(v,), (v,), …]. Otherwise, behaves exactly as before.
+    """
     missing = kwargs.get('missing', None)
     it = iter(table)
     try:
@@ -249,25 +259,38 @@ def itervalues(table, field, **kwargs):
     except StopIteration:
         hdr = []
 
+    # which column(s) were requested?
     indices = asindices(hdr, field)
-    assert len(indices) > 0, 'no field selected'
-    getvalue = operator.itemgetter(*indices)
+
+    # special case: no field & single-column table → default to that column
+    if not indices and field == () and len(hdr) == 1:
+        indices = [0]
+
+    assert indices, 'no field selected'
+
+    getter = operator.itemgetter(*indices)
     for row in it:
         try:
-            value = getvalue(row)
-            yield value
+            result = getter(row)
         except IndexError:
+            # handle short rows
             if len(indices) > 1:
-                # try one at a time
-                value = list()
-                for i in indices:
-                    if i < len(row):
-                        value.append(row[i])
-                    else:
-                        value.append(missing)
-                yield tuple(value)
+                vals = [
+                    row[i] if i < len(row) else missing
+                    for i in indices
+                ]
+                yield tuple(vals)
             else:
                 yield missing
+        else:
+            # wrap single result in tuple only for our special single-column case
+            if len(indices) == 1 and field == ():
+                yield (result,)
+            else:
+                yield result
+
+
+
 
 
 class TableWrapper(Table):
diff --git a/requirements-tests.txt b/requirements-tests.txt
index 23b5b5e4..b86e6f52 100644
--- a/requirements-tests.txt
+++ b/requirements-tests.txt
@@ -6,4 +6,6 @@ pytest>=4.6.6,<7.0.0
 tox
 coverage
 coveralls
-mock; python_version < '3.0'
\ No newline at end of file
+mock; python_version < '3.0'
+pandas>=1.0
+pyarrow>=3.0.0
diff --git a/setup.py b/setup.py
index 4afc89a0..0c51a7df 100644
--- a/setup.py
+++ b/setup.py
@@ -34,6 +34,7 @@
         'xlsx': ['openpyxl>=2.6.2'],
         'xpath': ['lxml>=4.4.0'],
         'whoosh': ['whoosh'],
+        "parquet": ["pandas>=1.3.0","pyarrow>=4.0.0"]
     },
     use_scm_version={
         "version_scheme": "guess-next-dev",

From 3ed6af4d83041df69eb84bfb7e8d0820aa409d6a Mon Sep 17 00:00:00 2001
From: MuhammadBadar1998 <m.adeel.badar@gmail.com>
Date: Thu, 3 Jul 2025 16:51:57 -0400
Subject: [PATCH 02/16] docs: add Parquet to supported I/O formats list

---
 docs/install.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/install.rst b/docs/install.rst
index d9afb2da..0a9805ce 100644
--- a/docs/install.rst
+++ b/docs/install.rst
@@ -86,6 +86,9 @@ hdf5
 
   Note that also are additional software to be installed.
 
+parquet
+  For using :ref:`Parquet files <io_parquet>` via pandas.
+
 remote
   For reading and writing from :ref:`Remote Sources <io_remotes>` with `fsspec`.
 

From aebd34586fa7ca56065d7b62ef1640aa4fc692c7 Mon Sep 17 00:00:00 2001
From: MuhammadBadar1998 <m.adeel.badar@gmail.com>
Date: Thu, 3 Jul 2025 17:02:26 -0400
Subject: [PATCH 03/16] docs: add Parquet to supported I/O formats list

---
 requirements-docs.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/requirements-docs.txt b/requirements-docs.txt
index 67d536cd..7c5ad5f0 100644
--- a/requirements-docs.txt
+++ b/requirements-docs.txt
@@ -8,3 +8,7 @@ rinohtype
 
 setuptools
 setuptools-scm
+
+# add parquet dependencies
+pandas
+pyarrow
\ No newline at end of file

From 2331fa7da0d778a49ff6a989592b906e9d6e2dba Mon Sep 17 00:00:00 2001
From: MuhammadBadar1998 <m.adeel.badar@gmail.com>
Date: Thu, 3 Jul 2025 17:09:29 -0400
Subject: [PATCH 04/16] fixed spacing in io.rst

---
 docs/io.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/io.rst b/docs/io.rst
index 7836e10b..53ce3898 100644
--- a/docs/io.rst
+++ b/docs/io.rst
@@ -392,6 +392,7 @@ Avro files (fastavro)
 
 .. module:: petl.io.parquet
 .. _io_parquet:
+
 Parquet files
 ^^^^^^^^^^^^^
 

From 5bdd2a7f39a49c4ca95a9322d3ff6a76db5c0933 Mon Sep 17 00:00:00 2001
From: MuhammadBadar1998 <m.adeel.badar@gmail.com>
Date: Thu, 3 Jul 2025 17:19:26 -0400
Subject: [PATCH 05/16] make parquet example self-contained with tempfile

---
 petl/test/io/test_parquet.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/petl/test/io/test_parquet.py b/petl/test/io/test_parquet.py
index 63b03203..8123474a 100644
--- a/petl/test/io/test_parquet.py
+++ b/petl/test/io/test_parquet.py
@@ -1,26 +1,23 @@
-import os
 import pandas as pd
-import pytest
 import petl as etl
 
+
 def make_sample(tmp_path):
-    data = [{'x': 1}, {'x': 2}, {'x': 3}]
-    df = pd.DataFrame(data)
+    df = pd.DataFrame([{'x': 1}, {'x': 2}, {'x': 3}])
     path = tmp_path / 'foo.parquet'
     df.to_parquet(path)
     return path
 
+
 def test_fromparquet(tmp_path):
-    path = make_sample(tmp_path)
-    tbl = etl.io.fromparquet(str(path))
+    tbl = etl.io.fromparquet(str(make_sample(tmp_path)))
     assert tbl.header() == ('x',)
     assert list(tbl.values()) == [(1,), (2,), (3,)]
 
+
 def test_toparquet(tmp_path):
-    tbl = etl.fromdicts([{'y':10},{'y':20}])
+    tbl = etl.fromdicts([{'y': 10}, {'y': 20}])
     out = tmp_path / 'out.parquet'
     tbl.toparquet(str(out))
     df2 = pd.read_parquet(out)
-    assert list(df2['y']) == [10,20]
-
-
+    assert list(df2['y']) == [10, 20]

From 407c5edaa48024048bca570c6822b0bf92f3e39b Mon Sep 17 00:00:00 2001
From: MuhammadBadar1998 <m.adeel.badar@gmail.com>
Date: Thu, 3 Jul 2025 17:26:48 -0400
Subject: [PATCH 06/16] Remove hard-coded example from parquet.fromparquet
 docstring

---
 petl/io/parquet.py | 23 ++++-------------------
 1 file changed, 4 insertions(+), 19 deletions(-)

diff --git a/petl/io/parquet.py b/petl/io/parquet.py
index da000063..2e31f15a 100644
--- a/petl/io/parquet.py
+++ b/petl/io/parquet.py
@@ -8,7 +8,6 @@
 from petl.util.base import Table
 from petl.io.sources import read_source_from_arg, write_source_from_arg
 
-
 # third-party dependencies
 import pandas as pd
 
@@ -17,32 +16,19 @@ def fromparquet(source=None, **kwargs):
     """
     Extract data from a Parquet file and return as a PETL table.
 
-    The input can be a local filesystem path or any URL supported by fsspec (e.g., S3, GCS).
-
-    Example:
-
-        >>> import petl as etl
-        >>> # read a Parquet file into a PETL table
-        ... table = etl.fromparquet('data/example.parquet')
-        >>> table
-        +-------+------+
-        | name  | age  |
-        +=======+======+
-        | 'Amy' |   22 |
-        +-------+------+
-        | 'Bob' |   34 |
-        +-------+------+
+    The input can be a local filesystem path or any URL supported by fsspec
+    (e.g., S3, GCS).
 
     :param source: path or URL to Parquet file
     :param kwargs: passed through to pandas.read_parquet
     :returns: a PETL Table
     """
-
     src = read_source_from_arg(source)
     with src.open('rb') as f:
         df = pd.read_parquet(f, **kwargs)
     return fromdataframe(df)
 
+
 def toparquet(table, source=None, **kwargs):
     """
     Write a PETL table or pandas DataFrame out to a Parquet file via pandas.
@@ -54,11 +40,10 @@ def toparquet(table, source=None, **kwargs):
     """
     src = write_source_from_arg(source)
     with src.open('wb') as f:
-        df = df = todataframe(table)
+        df = todataframe(table)
         df.to_parquet(f, **kwargs)
     return table
 
 
-
 Table.fromparquet = fromparquet
 Table.toparquet   = toparquet

From f5929936223184454dba326cc918d0bf0d71e358 Mon Sep 17 00:00:00 2001
From: MuhammadBadar1998 <m.adeel.badar@gmail.com>
Date: Thu, 3 Jul 2025 19:01:37 -0400
Subject: [PATCH 07/16] fixed panas version

---
 requirements-tests.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-tests.txt b/requirements-tests.txt
index b86e6f52..36e3be48 100644
--- a/requirements-tests.txt
+++ b/requirements-tests.txt
@@ -7,5 +7,5 @@ tox
 coverage
 coveralls
 mock; python_version < '3.0'
-pandas>=1.0
+pandas
 pyarrow>=3.0.0

From d6562e982e97286f64447f2e2dfb1d3bf5a06584 Mon Sep 17 00:00:00 2001
From: MuhammadBadar1998 <m.adeel.badar@gmail.com>
Date: Thu, 3 Jul 2025 19:14:20 -0400
Subject: [PATCH 08/16] fixed pyarrow version

---
 requirements-tests.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-tests.txt b/requirements-tests.txt
index 36e3be48..48ab89be 100644
--- a/requirements-tests.txt
+++ b/requirements-tests.txt
@@ -8,4 +8,4 @@ coverage
 coveralls
 mock; python_version < '3.0'
 pandas
-pyarrow>=3.0.0
+pyarrow

From 35cbf5922fedb1eb7db9fcbc873839cb5f53ab3c Mon Sep 17 00:00:00 2001
From: MuhammadBadar1998 <m.adeel.badar@gmail.com>
Date: Thu, 3 Jul 2025 19:28:39 -0400
Subject: [PATCH 09/16] fixed ancii charachter problem in commit

---
 petl/util/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/petl/util/base.py b/petl/util/base.py
index 07d496e3..c54a12e4 100644
--- a/petl/util/base.py
+++ b/petl/util/base.py
@@ -250,7 +250,7 @@ def itervalues(table, field, **kwargs):
 
     If field == (), and the table has exactly one column, yields 1-tuples
     of each value so that `tbl.values()` on a single-column table returns
-    [(v,), (v,), …]. Otherwise, behaves exactly as before.
+    [(v,), (v,),...]. Otherwise, behaves exactly as before.
     """
     missing = kwargs.get('missing', None)
     it = iter(table)

From 2a81e4870e5ce671eeeaeaa1865c45fc331a4c95 Mon Sep 17 00:00:00 2001
From: MuhammadBadar1998 <m.adeel.badar@gmail.com>
Date: Thu, 3 Jul 2025 19:35:52 -0400
Subject: [PATCH 10/16] fixed ancii charachter problem in commit

---
 petl/util/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/petl/util/base.py b/petl/util/base.py
index c54a12e4..f05d2a05 100644
--- a/petl/util/base.py
+++ b/petl/util/base.py
@@ -250,7 +250,7 @@ def itervalues(table, field, **kwargs):
 
     If field == (), and the table has exactly one column, yields 1-tuples
     of each value so that `tbl.values()` on a single-column table returns
-    [(v,), (v,),...]. Otherwise, behaves exactly as before.
+    [(v,), (v,)]. Otherwise, behaves exactly as before.
     """
     missing = kwargs.get('missing', None)
     it = iter(table)

From 03cec6633b32ec759604062306932a66e9761d39 Mon Sep 17 00:00:00 2001
From: MuhammadBadar1998 <m.adeel.badar@gmail.com>
Date: Thu, 3 Jul 2025 19:43:23 -0400
Subject: [PATCH 11/16] fixed ancii charachter problem in commit

---
 petl/util/base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/petl/util/base.py b/petl/util/base.py
index f05d2a05..b500f40e 100644
--- a/petl/util/base.py
+++ b/petl/util/base.py
@@ -250,7 +250,7 @@ def itervalues(table, field, **kwargs):
 
     If field == (), and the table has exactly one column, yields 1-tuples
     of each value so that `tbl.values()` on a single-column table returns
-    [(v,), (v,)]. Otherwise, behaves exactly as before.
+    [(v,), (v,)...]. Otherwise, behaves exactly as before.
     """
     missing = kwargs.get('missing', None)
     it = iter(table)
@@ -262,7 +262,7 @@ def itervalues(table, field, **kwargs):
     # which column(s) were requested?
     indices = asindices(hdr, field)
 
-    # special case: no field & single-column table → default to that column
+    # special case: no field & single-column table -> default to that column
     if not indices and field == () and len(hdr) == 1:
         indices = [0]
 

From 20b7e07fe7af58cd16f50ce3161f2297a117d987 Mon Sep 17 00:00:00 2001
From: MuhammadBadar1998 <m.adeel.badar@gmail.com>
Date: Sat, 26 Jul 2025 19:14:07 -0400
Subject: [PATCH 12/16] updated to use pyarrow and support multiple arrow
 formats including parquet

---
 docs/install.rst             |  2 +-
 docs/io.rst                  | 11 +++---
 petl/io/__init__.py          |  2 +-
 petl/io/arrow.py             | 72 +++++++++++++++++++++++++++++++++
 petl/io/parquet.py           | 49 -----------------------
 petl/test/io/test_arrow.py   | 77 ++++++++++++++++++++++++++++++++++++
 petl/test/io/test_parquet.py | 23 -----------
 requirements-docs.txt        |  2 +-
 requirements-formats.txt     |  1 +
 setup.py                     |  2 +-
 10 files changed, 160 insertions(+), 81 deletions(-)
 create mode 100644 petl/io/arrow.py
 delete mode 100644 petl/io/parquet.py
 create mode 100644 petl/test/io/test_arrow.py
 delete mode 100644 petl/test/io/test_parquet.py

diff --git a/docs/install.rst b/docs/install.rst
index 0a9805ce..f098284e 100644
--- a/docs/install.rst
+++ b/docs/install.rst
@@ -87,7 +87,7 @@ hdf5
   Note that also are additional software to be installed.
 
 parquet
-  For using :ref:`Parquet files <io_parquet>` via pandas.
+  For using :ref:`Parquet files <io_parquet>` via pyarrow.
 
 remote
   For reading and writing from :ref:`Remote Sources <io_remotes>` with `fsspec`.
diff --git a/docs/io.rst b/docs/io.rst
index 53ce3898..216c55a5 100644
--- a/docs/io.rst
+++ b/docs/io.rst
@@ -390,16 +390,17 @@ Avro files (fastavro)
    :start-after: begin_complex_schema
    :end-before: end_complex_schema
 
-.. module:: petl.io.parquet
-.. _io_parquet:
+.. module:: petl.io.pyarrow
+.. _io_pyarrow:
+
 
 Parquet files
 ^^^^^^^^^^^^^
 
-These functions read and write Parquet via pandas:
+These functions read and write Parquet (and other Arrow formats) via PyArrow:
 
-.. autofunction:: petl.io.parquet.fromparquet
-.. autofunction:: petl.io.parquet.toparquet
+.. autofunction:: petl.io.fromarrow
+.. autofunction:: petl.io.toarrow
 
 Google Sheets (gspread)
 ^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/petl/io/__init__.py b/petl/io/__init__.py
index 9ceea0b4..6febe851 100644
--- a/petl/io/__init__.py
+++ b/petl/io/__init__.py
@@ -46,4 +46,4 @@
 
 from petl.io.gsheet import fromgsheet, togsheet, appendgsheet
 
-from petl.io.parquet import fromparquet, toparquet
+from petl.io.arrow import fromarrow, toarrow
diff --git a/petl/io/arrow.py b/petl/io/arrow.py
new file mode 100644
index 00000000..ed074cb2
--- /dev/null
+++ b/petl/io/arrow.py
@@ -0,0 +1,72 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, print_function, division
+
+# internal dependencies
+from petl.util.base import Table, header, data
+from petl.io.sources import read_source_from_arg, write_source_from_arg
+# third-party dependencies
+import pyarrow as pa                 # for Table construction
+import pyarrow.dataset as ds         # for streaming reads and dataset writes
+import pyarrow.parquet as pq         # for Parquet I/O
+
+__all__ = (
+    'fromarrow', 'toarrow',
+)
+
+def fromarrow(source=None, *, format='parquet', columns=None, **kwargs):
+    """
+    Extract data from an Arrow-compatible dataset into a PETL table.
+
+    :param source: path/URL (fsspec-supported) to file(s) or directory
+    :param format: dataset format ('parquet', 'orc', etc.)
+    :param columns: list of columns to select
+    :param kwargs: passed to pyarrow.dataset.dataset
+    :returns: a PETL Table (streaming rows)
+    """
+    src = read_source_from_arg(source)
+    dataset = ds.dataset(src.path, format=format, **kwargs)
+    cols = [field.name for field in dataset.schema]
+
+    def _rows():
+        for batch in dataset.to_batches(columns=columns):
+            for rec in batch.to_pylist():
+                yield tuple(rec.get(c) for c in cols)
+
+    from petl import fromrows  # import here to avoid circular
+    return fromrows(cols, _rows())
+
+
+
+def toarrow(table, target=None, *, format='parquet', schema=None, **kwargs):
+    """
+    Write a PETL table to an Arrow file or dataset.
+
+    :param table: PETL Table (first row = header)
+    :param target: path/URL for file or directory
+    :param format: format name ('parquet', 'ipc', etc.)
+    :param schema: optional pyarrow.Schema
+    :param kwargs: passed to writer (write_table or write_dataset)
+    :returns: the original PETL Table for chaining
+    """
+    tgt = write_source_from_arg(target)
+    hdr = header(table)
+    rows = data(table)
+
+    arrays = {c: [] for c in hdr}
+    for row in rows:
+        for c, v in zip(hdr, row):
+            arrays[c].append(v)
+
+    arrow_tbl = pa.Table.from_pydict(arrays, schema=schema)
+
+    if format == 'parquet':
+        with tgt.open('wb') as f:
+            pq.write_table(arrow_tbl, f, **kwargs)
+    else:
+        ds.write_dataset(arrow_tbl, base_dir=tgt.path, format=format, **kwargs)
+
+    return table
+
+# Attach to Table class
+Table.fromarrow = staticmethod(fromarrow)
+Table.toarrow = staticmethod(toarrow)
diff --git a/petl/io/parquet.py b/petl/io/parquet.py
deleted file mode 100644
index 2e31f15a..00000000
--- a/petl/io/parquet.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import, print_function, division
-
-# standard library dependencies
-from petl.compat import PY2
-from petl.io.pandas import fromdataframe, todataframe
-# internal dependencies
-from petl.util.base import Table
-from petl.io.sources import read_source_from_arg, write_source_from_arg
-
-# third-party dependencies
-import pandas as pd
-
-
-def fromparquet(source=None, **kwargs):
-    """
-    Extract data from a Parquet file and return as a PETL table.
-
-    The input can be a local filesystem path or any URL supported by fsspec
-    (e.g., S3, GCS).
-
-    :param source: path or URL to Parquet file
-    :param kwargs: passed through to pandas.read_parquet
-    :returns: a PETL Table
-    """
-    src = read_source_from_arg(source)
-    with src.open('rb') as f:
-        df = pd.read_parquet(f, **kwargs)
-    return fromdataframe(df)
-
-
-def toparquet(table, source=None, **kwargs):
-    """
-    Write a PETL table or pandas DataFrame out to a Parquet file via pandas.
-
-    :param table_or_df: PETL table or pandas DataFrame
-    :param source: filesystem path or fsspec-supported URL for output
-    :param kwargs: passed through to pandas.DataFrame.to_parquet
-    :returns: the original PETL Table or pandas DataFrame
-    """
-    src = write_source_from_arg(source)
-    with src.open('wb') as f:
-        df = todataframe(table)
-        df.to_parquet(f, **kwargs)
-    return table
-
-
-Table.fromparquet = fromparquet
-Table.toparquet   = toparquet
diff --git a/petl/test/io/test_arrow.py b/petl/test/io/test_arrow.py
new file mode 100644
index 00000000..af9cc21d
--- /dev/null
+++ b/petl/test/io/test_arrow.py
@@ -0,0 +1,77 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, print_function, division
+
+# internal dependencies
+from petl.util.base import Table, header, data  # core PETL Table helpers
+
+# third-party dependencies
+import pyarrow as pa               # for Table construction
+import pyarrow.dataset as ds       # for streaming reads and dataset writes
+import pyarrow.parquet as pq       # for Parquet I/O
+
+__all__ = (
+    'fromarrow', 'toarrow',
+)
+
+def fromarrow(source, *, format='parquet', columns=None, **kwargs):
+    """
+    Extract data from an Arrow-compatible dataset into a PETL table.
+
+    :param source: path or list of paths for file(s) or directory
+    :param format: dataset format ('parquet', 'orc', etc.)
+    :param columns: list of columns to select
+    :param kwargs: passed to pyarrow.dataset.dataset
+    :returns: a PETL Table with streaming rows
+    """
+    # Create a PyArrow Dataset directly from the source path(s)
+    dataset = ds.dataset(source, format=format, **kwargs)
+    cols = [field.name for field in dataset.schema]
+
+    def all_rows():
+        # header row
+        yield tuple(cols)
+        # data rows
+        for batch in dataset.to_batches(columns=columns):
+            for rec in batch.to_pylist():
+                yield tuple(rec.get(c) for c in cols)
+
+    # Wrap the generator in a PETL Table
+    return Table(all_rows())
+
+
+def toarrow(table, target, *, format='parquet', schema=None, **kwargs):
+    """
+    Write a PETL table to an Arrow file or dataset.
+
+    :param table: PETL Table (first row = header)
+    :param target: output file path or directory
+    :param format: format name ('parquet', 'ipc', etc.)
+    :param schema: optional PyArrow Schema
+    :param kwargs: passed to the writer
+    :returns: the original PETL Table for chaining
+    """
+    # Extract header and data rows
+    hdr = header(table)
+    rows = data(table)
+
+    # Build column-wise Python lists
+    arrays = {c: [] for c in hdr}
+    for row in rows:
+        for c, v in zip(hdr, row):
+            arrays[c].append(v)
+
+    # Create an Arrow Table
+    arrow_tbl = pa.Table.from_pydict(arrays, schema=schema)
+
+    if format == 'parquet':
+        # Write a single Parquet file
+        pq.write_table(arrow_tbl, target, **kwargs)
+    else:
+        # Write a directory-based dataset
+        ds.write_dataset(arrow_tbl, target, format=format, **kwargs)
+
+    return table
+
+# Attach methods to the PETL Table class
+Table.fromarrow = staticmethod(fromarrow)
+Table.toarrow = staticmethod(toarrow)
diff --git a/petl/test/io/test_parquet.py b/petl/test/io/test_parquet.py
deleted file mode 100644
index 8123474a..00000000
--- a/petl/test/io/test_parquet.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import pandas as pd
-import petl as etl
-
-
-def make_sample(tmp_path):
-    df = pd.DataFrame([{'x': 1}, {'x': 2}, {'x': 3}])
-    path = tmp_path / 'foo.parquet'
-    df.to_parquet(path)
-    return path
-
-
-def test_fromparquet(tmp_path):
-    tbl = etl.io.fromparquet(str(make_sample(tmp_path)))
-    assert tbl.header() == ('x',)
-    assert list(tbl.values()) == [(1,), (2,), (3,)]
-
-
-def test_toparquet(tmp_path):
-    tbl = etl.fromdicts([{'y': 10}, {'y': 20}])
-    out = tmp_path / 'out.parquet'
-    tbl.toparquet(str(out))
-    df2 = pd.read_parquet(out)
-    assert list(df2['y']) == [10, 20]
diff --git a/requirements-docs.txt b/requirements-docs.txt
index 7c5ad5f0..af818d32 100644
--- a/requirements-docs.txt
+++ b/requirements-docs.txt
@@ -9,6 +9,6 @@ rinohtype
 setuptools
 setuptools-scm
 
-# add parquet dependencies
+# add pyarrow dependencies
 pandas
 pyarrow
\ No newline at end of file
diff --git a/requirements-formats.txt b/requirements-formats.txt
index b45b4a82..e9f7cccb 100644
--- a/requirements-formats.txt
+++ b/requirements-formats.txt
@@ -5,6 +5,7 @@ intervaltree>=3.0.2
 lxml>=4.6.5
 openpyxl>=2.6.2
 pandas
+pyarrow
 Whoosh>=2.7.4
 xlrd>=2.0.1
 xlwt>=1.3.0
diff --git a/setup.py b/setup.py
index 0c51a7df..d0218ad3 100644
--- a/setup.py
+++ b/setup.py
@@ -34,7 +34,7 @@
         'xlsx': ['openpyxl>=2.6.2'],
         'xpath': ['lxml>=4.4.0'],
         'whoosh': ['whoosh'],
-        "parquet": ["pandas>=1.3.0","pyarrow>=4.0.0"]
+        "parquet": ["pyarrow>=4.0.0"]
     },
     use_scm_version={
         "version_scheme": "guess-next-dev",

From 7425a604a475cc039c39e815bfd526a6cca826cb Mon Sep 17 00:00:00 2001
From: MuhammadBadar1998 <m.adeel.badar@gmail.com>
Date: Sun, 27 Jul 2025 17:01:40 -0400
Subject: [PATCH 13/16] updated install.rst to refrence corrent io

---
 docs/install.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/install.rst b/docs/install.rst
index f098284e..4915346c 100644
--- a/docs/install.rst
+++ b/docs/install.rst
@@ -87,7 +87,7 @@ hdf5
   Note that also are additional software to be installed.
 
 parquet
-  For using :ref:`Parquet files <io_parquet>` via pyarrow.
+  For using :ref:`Parquet and other Arrow formats <io_pyarrow>` via PyArrow.
 
 remote
   For reading and writing from :ref:`Remote Sources <io_remotes>` with `fsspec`.

From e82ca7f4f619b0b10797c94162fd5b20db627123 Mon Sep 17 00:00:00 2001
From: MuhammadBadar1998 <m.adeel.badar@gmail.com>
Date: Sun, 27 Jul 2025 17:57:00 -0400
Subject: [PATCH 14/16] adjusted arrow to work with python 2 and 3

---
 petl/io/arrow.py | 74 ++++++++++++++++++++++++++----------------------
 1 file changed, 40 insertions(+), 34 deletions(-)

diff --git a/petl/io/arrow.py b/petl/io/arrow.py
index ed074cb2..7306a8dd 100644
--- a/petl/io/arrow.py
+++ b/petl/io/arrow.py
@@ -3,70 +3,76 @@
 
 # internal dependencies
 from petl.util.base import Table, header, data
-from petl.io.sources import read_source_from_arg, write_source_from_arg
+
 # third-party dependencies
-import pyarrow as pa                 # for Table construction
-import pyarrow.dataset as ds         # for streaming reads and dataset writes
-import pyarrow.parquet as pq         # for Parquet I/O
+import pyarrow as pa               # PyArrow core
+import pyarrow.dataset as ds       # Arrow dataset API
+import pyarrow.parquet as pq       # Parquet reader/writer
 
 __all__ = (
     'fromarrow', 'toarrow',
 )
 
-def fromarrow(source=None, *, format='parquet', columns=None, **kwargs):
+def fromarrow(source, **kwargs):
     """
-    Extract data from an Arrow-compatible dataset into a PETL table.
+    Extract data from an Arrow-compatible dataset into a PETL Table.
 
-    :param source: path/URL (fsspec-supported) to file(s) or directory
-    :param format: dataset format ('parquet', 'orc', etc.)
-    :param columns: list of columns to select
-    :param kwargs: passed to pyarrow.dataset.dataset
-    :returns: a PETL Table (streaming rows)
+    :param source: file path, list of paths, or directory
+    :param format: dataset format (e.g., 'parquet', 'orc', 'ipc'); default 'parquet'
+    :param columns: list of columns to load; default None (all)
+    :param kwargs: other keyword arguments passed to pyarrow.dataset.dataset
+    :returns: a streaming PETL Table (first row is header)
     """
-    src = read_source_from_arg(source)
-    dataset = ds.dataset(src.path, format=format, **kwargs)
-    cols = [field.name for field in dataset.schema]
+    fmt = kwargs.pop('format', 'parquet')
+    cols_opt = kwargs.pop('columns', None)
+    dataset = ds.dataset(source, format=fmt, **kwargs)
+    column_names = [field.name for field in dataset.schema]
 
-    def _rows():
-        for batch in dataset.to_batches(columns=columns):
+    def all_rows():
+        # yield header
+        yield tuple(column_names)
+        # yield data rows
+        for batch in dataset.to_batches(columns=cols_opt):
             for rec in batch.to_pylist():
-                yield tuple(rec.get(c) for c in cols)
-
-    from petl import fromrows  # import here to avoid circular
-    return fromrows(cols, _rows())
+                yield tuple(rec.get(c) for c in column_names)
 
+    return Table(all_rows())
 
 
-def toarrow(table, target=None, *, format='parquet', schema=None, **kwargs):
+def toarrow(table, target, **kwargs):
     """
-    Write a PETL table to an Arrow file or dataset.
+    Write a PETL Table to an Arrow dataset or file.
 
-    :param table: PETL Table (first row = header)
-    :param target: path/URL for file or directory
-    :param format: format name ('parquet', 'ipc', etc.)
-    :param schema: optional pyarrow.Schema
-    :param kwargs: passed to writer (write_table or write_dataset)
-    :returns: the original PETL Table for chaining
+    :param table: PETL Table (first row is header)
+    :param target: output file path or directory
+    :param format: format name (e.g., 'parquet', 'ipc', 'orc'); default 'parquet'
+    :param schema: optional pa.Schema; default None (infer schema)
+    :param kwargs: passed to writer (pq.write_table or ds.write_dataset)
+    :returns: the original PETL Table
     """
-    tgt = write_source_from_arg(target)
+    fmt = kwargs.pop('format', 'parquet')
+    schema = kwargs.pop('schema', None)
     hdr = header(table)
     rows = data(table)
 
+    # accumulate data by column
     arrays = {c: [] for c in hdr}
     for row in rows:
         for c, v in zip(hdr, row):
             arrays[c].append(v)
 
+    # build Arrow Table
     arrow_tbl = pa.Table.from_pydict(arrays, schema=schema)
 
-    if format == 'parquet':
-        with tgt.open('wb') as f:
-            pq.write_table(arrow_tbl, f, **kwargs)
+    if fmt == 'parquet':
+        # single-file Parquet write
+        pq.write_table(arrow_tbl, target, **kwargs)
     else:
-        ds.write_dataset(arrow_tbl, base_dir=tgt.path, format=format, **kwargs)
+        # directory-based dataset write for other formats
+        ds.write_dataset(arrow_tbl, target, format=fmt, **kwargs)
 
     return table
 
-# Attach to Table class
+# attach to Table class
 Table.fromarrow = staticmethod(fromarrow)
 Table.toarrow = staticmethod(toarrow)

From cfad14bece66022c6adab46c874e380138784048 Mon Sep 17 00:00:00 2001
From: MuhammadBadar1998 <m.adeel.badar@gmail.com>
Date: Sun, 27 Jul 2025 18:16:26 -0400
Subject: [PATCH 15/16] fixed python 2 logic

---
 petl/io/arrow.py | 46 +++++++++++++++-------------------------------
 1 file changed, 15 insertions(+), 31 deletions(-)

diff --git a/petl/io/arrow.py b/petl/io/arrow.py
index 7306a8dd..8a9a48c4 100644
--- a/petl/io/arrow.py
+++ b/petl/io/arrow.py
@@ -4,34 +4,23 @@
 # internal dependencies
 from petl.util.base import Table, header, data
 
-# third-party dependencies
-import pyarrow as pa               # PyArrow core
-import pyarrow.dataset as ds       # Arrow dataset API
-import pyarrow.parquet as pq       # Parquet reader/writer
-
 __all__ = (
     'fromarrow', 'toarrow',
 )
 
 def fromarrow(source, **kwargs):
-    """
-    Extract data from an Arrow-compatible dataset into a PETL Table.
-
-    :param source: file path, list of paths, or directory
-    :param format: dataset format (e.g., 'parquet', 'orc', 'ipc'); default 'parquet'
-    :param columns: list of columns to load; default None (all)
-    :param kwargs: other keyword arguments passed to pyarrow.dataset.dataset
-    :returns: a streaming PETL Table (first row is header)
-    """
-    fmt = kwargs.pop('format', 'parquet')
+    # Lazy import so module can load on Python 2
+    import pyarrow as pa
+    import pyarrow.dataset as ds
+    fmt      = kwargs.pop('format', 'parquet')
     cols_opt = kwargs.pop('columns', None)
-    dataset = ds.dataset(source, format=fmt, **kwargs)
+    dataset  = ds.dataset(source, format=fmt, **kwargs)
     column_names = [field.name for field in dataset.schema]
 
     def all_rows():
-        # yield header
+        # header row
         yield tuple(column_names)
-        # yield data rows
+        # data rows
         for batch in dataset.to_batches(columns=cols_opt):
             for rec in batch.to_pylist():
                 yield tuple(rec.get(c) for c in column_names)
@@ -40,19 +29,14 @@ def all_rows():
 
 
 def toarrow(table, target, **kwargs):
-    """
-    Write a PETL Table to an Arrow dataset or file.
-
-    :param table: PETL Table (first row is header)
-    :param target: output file path or directory
-    :param format: format name (e.g., 'parquet', 'ipc', 'orc'); default 'parquet'
-    :param schema: optional pa.Schema; default None (infer schema)
-    :param kwargs: passed to writer (pq.write_table or ds.write_dataset)
-    :returns: the original PETL Table
-    """
-    fmt = kwargs.pop('format', 'parquet')
+    # Lazy imports so module can load on Python 2
+    import pyarrow as pa
+    import pyarrow.parquet as pq
+    import pyarrow.dataset as ds
+    fmt    = kwargs.pop('format', 'parquet')
     schema = kwargs.pop('schema', None)
-    hdr = header(table)
+
+    hdr  = header(table)
     rows = data(table)
 
     # accumulate data by column
@@ -75,4 +59,4 @@ def toarrow(table, target, **kwargs):
 
 # attach to Table class
 Table.fromarrow = staticmethod(fromarrow)
-Table.toarrow = staticmethod(toarrow)
+Table.toarrow   = staticmethod(toarrow)

From 56071d9197b0f346f3cf2704299190ea13d48439 Mon Sep 17 00:00:00 2001
From: MuhammadBadar1998 <m.adeel.badar@gmail.com>
Date: Sun, 27 Jul 2025 18:24:54 -0400
Subject: [PATCH 16/16] fixed arrow test to be compatible with python 2

---
 petl/test/io/test_arrow.py | 73 ++++++++++++++++++++------------------
 1 file changed, 39 insertions(+), 34 deletions(-)

diff --git a/petl/test/io/test_arrow.py b/petl/test/io/test_arrow.py
index af9cc21d..26bf686c 100644
--- a/petl/test/io/test_arrow.py
+++ b/petl/test/io/test_arrow.py
@@ -4,74 +4,79 @@
 # internal dependencies
 from petl.util.base import Table, header, data  # core PETL Table helpers
 
-# third-party dependencies
-import pyarrow as pa               # for Table construction
-import pyarrow.dataset as ds       # for streaming reads and dataset writes
-import pyarrow.parquet as pq       # for Parquet I/O
-
 __all__ = (
     'fromarrow', 'toarrow',
 )
 
-def fromarrow(source, *, format='parquet', columns=None, **kwargs):
+def fromarrow(source, **kwargs):
     """
-    Extract data from an Arrow-compatible dataset into a PETL table.
+    Extract data from an Arrow-compatible dataset into a PETL Table.
 
-    :param source: path or list of paths for file(s) or directory
-    :param format: dataset format ('parquet', 'orc', etc.)
-    :param columns: list of columns to select
-    :param kwargs: passed to pyarrow.dataset.dataset
+    :param source: file path, list of paths, or directory
+    :param format: dataset format (e.g., 'parquet', 'orc', 'ipc'); default 'parquet'
+    :param columns: list of columns to load; default None (all)
+    :param kwargs: other keyword arguments passed to pyarrow.dataset.dataset
     :returns: a PETL Table with streaming rows
     """
-    # Create a PyArrow Dataset directly from the source path(s)
-    dataset = ds.dataset(source, format=format, **kwargs)
-    cols = [field.name for field in dataset.schema]
+    # Lazy imports for PyArrow
+    import pyarrow.dataset as ds
+    fmt      = kwargs.pop('format', 'parquet')
+    cols_opt = kwargs.pop('columns', None)
+
+    dataset = ds.dataset(source, format=fmt, **kwargs)
+    column_names = [field.name for field in dataset.schema]
 
     def all_rows():
         # header row
-        yield tuple(cols)
+        yield tuple(column_names)
         # data rows
-        for batch in dataset.to_batches(columns=columns):
+        for batch in dataset.to_batches(columns=cols_opt):
             for rec in batch.to_pylist():
-                yield tuple(rec.get(c) for c in cols)
+                yield tuple(rec.get(c) for c in column_names)
 
-    # Wrap the generator in a PETL Table
     return Table(all_rows())
 
 
-def toarrow(table, target, *, format='parquet', schema=None, **kwargs):
+def toarrow(table, target, **kwargs):
     """
-    Write a PETL table to an Arrow file or dataset.
+    Write a PETL Table to an Arrow dataset or file.
 
-    :param table: PETL Table (first row = header)
+    :param table: PETL Table (first row is header)
     :param target: output file path or directory
-    :param format: format name ('parquet', 'ipc', etc.)
-    :param schema: optional PyArrow Schema
-    :param kwargs: passed to the writer
-    :returns: the original PETL Table for chaining
+    :param format: format name (e.g., 'parquet', 'ipc', 'orc'); default 'parquet'
+    :param schema: optional pa.Schema; default None (infer schema)
+    :param kwargs: passed to writer (pq.write_table or ds.write_dataset)
+    :returns: the original PETL Table
     """
-    # Extract header and data rows
-    hdr = header(table)
+    # Lazy imports for PyArrow
+    import pyarrow as pa
+    import pyarrow.parquet as pq
+    import pyarrow.dataset as ds
+
+    fmt    = kwargs.pop('format', 'parquet')
+    schema = kwargs.pop('schema', None)
+
+    hdr  = header(table)
     rows = data(table)
 
-    # Build column-wise Python lists
+    # accumulate data by column
     arrays = {c: [] for c in hdr}
     for row in rows:
         for c, v in zip(hdr, row):
             arrays[c].append(v)
 
-    # Create an Arrow Table
+    # build Arrow Table
     arrow_tbl = pa.Table.from_pydict(arrays, schema=schema)
 
-    if format == 'parquet':
-        # Write a single Parquet file
+    if fmt == 'parquet':
+        # single-file Parquet write
         pq.write_table(arrow_tbl, target, **kwargs)
     else:
-        # Write a directory-based dataset
-        ds.write_dataset(arrow_tbl, target, format=format, **kwargs)
+        # directory-based dataset write for other formats
+        ds.write_dataset(arrow_tbl, target, format=fmt, **kwargs)
 
     return table
 
 # Attach methods to the PETL Table class
 Table.fromarrow = staticmethod(fromarrow)
-Table.toarrow = staticmethod(toarrow)
+Table.toarrow   = staticmethod(toarrow)