-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstats.py
More file actions
106 lines (87 loc) · 2.99 KB
/
Copy pathstats.py
File metadata and controls
106 lines (87 loc) · 2.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import argparse
import csv
import statistics
from pathlib import Path
from typing import Dict, List, Optional, Tuple
def _to_float(value: str) -> Optional[float]:
s = value.strip()
if not s:
return None
try:
return float(s)
except ValueError:
return None
def _read_rows(path: Path, encoding: str) -> Tuple[List[str], List[Dict[str, str]]]:
with path.open("r", encoding=encoding, newline="") as f:
sample = f.read(65536)
f.seek(0)
try:
dialect = csv.Sniffer().sniff(sample)
except csv.Error:
dialect = csv.excel
try:
has_header = csv.Sniffer().has_header(sample)
except csv.Error:
has_header = True
if has_header:
reader = csv.DictReader(f, dialect=dialect)
if not reader.fieldnames:
raise ValueError("CSV appears to have no header row (field names not found).")
rows = [row for row in reader if row]
return list(reader.fieldnames), rows
reader2 = csv.reader(f, dialect=dialect)
first = next(reader2, None)
if first is None:
raise ValueError("CSV file is empty.")
fieldnames = [f"col_{i+1}" for i in range(len(first))]
rows2: List[Dict[str, str]] = [dict(zip(fieldnames, first))]
for r in reader2:
if not r:
continue
rows2.append(dict(zip(fieldnames, r)))
return fieldnames, rows2
def main() -> int:
parser = argparse.ArgumentParser(
description="Compute mean/min/max/sum for numeric columns in a CSV file."
)
parser.add_argument("csv_path", help="Path to CSV file")
parser.add_argument(
"--encoding",
default="utf-8",
help="File encoding (default: utf-8). Example: cp1251",
)
args = parser.parse_args()
path = Path(args.csv_path)
if not path.exists():
raise SystemExit(f"File not found: {path}")
if not path.is_file():
raise SystemExit(f"Not a file: {path}")
fieldnames, rows = _read_rows(path, args.encoding)
values_by_col: Dict[str, List[float]] = {name: [] for name in fieldnames}
for row in rows:
for name in fieldnames:
raw = row.get(name, "")
if raw is None:
continue
v = _to_float(raw)
if v is None:
continue
values_by_col[name].append(v)
printed_any = False
for name in fieldnames:
vals = values_by_col[name]
if not vals:
continue
printed_any = True
print(f"Column: {name}")
print(f" count: {len(vals)}")
print(f" mean : {statistics.mean(vals)}")
print(f" min : {min(vals)}")
print(f" max : {max(vals)}")
print(f" sum : {sum(vals)}")
print()
if not printed_any:
print("No numeric columns found (or all numeric values are empty).")
return 0
if __name__ == "__main__":
raise SystemExit(main())