-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
60 lines (48 loc) · 1.91 KB
/
utils.py
File metadata and controls
60 lines (48 loc) · 1.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from tqdm import tqdm
import json
import torch
from typing import Any, List, Dict, Tuple
from transformers import AutoModelForCausalLM, AutoTokenizer
import pandas as pd
import numpy as np
import pickle
import os
def load_model(model_name_or_path: str, torch_dtype: Any = torch.bfloat16) -> Tuple[Any, Any]:
"""Load a pretrained causal LM and its tokenizer."""
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, torch_dtype=torch_dtype)
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
return model, tokenizer
def sorted_highest_score(cluster_ucb: Dict[int, float]) -> List[int]:
"""Return cluster ids sorted by descending UCB score."""
sorted_items = sorted(cluster_ucb.items(), key=lambda x: x[1], reverse=True)
sorted_keys = [item[0] for item in sorted_items]
return sorted_keys
def load_pickle(file_path: str) -> Any:
"""Load and return content from a Pickle file."""
try:
with open(file_path, 'rb') as f:
data = pickle.load(f)
return data
except Exception as e:
print(f"Error loading pickle file: {e}")
return None
def load_jsonl(filepath: str) -> List[Dict]:
"""Load a JSONL file as a list of dicts."""
data = []
with open(filepath, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if line:
data.append(json.loads(line))
return data
def dump_jsonl(data: List[Dict], filepath: str) -> None:
"""Write a list of dicts to a JSONL file (UTF-8)."""
make_needed_dir(filepath)
with open(filepath, 'w', encoding='utf-8') as f:
for item in data:
f.write(json.dumps(item, ensure_ascii=False) + '\n')
def make_needed_dir(file_path: str) -> None:
"""Create parent directories if they do not exist."""
dir_path = os.path.dirname(file_path)
if dir_path and not os.path.exists(dir_path):
os.makedirs(dir_path)