If the s3dlio library is installed, then it is used to generate synthetic data. However, the object used to hold the data in memory is incompatible with the logic used to implement the other libraries (MinIO SDK and s3torchconnector). Below is the error I received using the MinIO SDK.
Error executing job with overrides: ['workload=unet3d_h100_s3', '++workload.workflow.generate_data=True', '++workload.workflow.train=False']
Traceback (most recent call last):
File "/home/keithpij/DLIO_local_changes/dlio_benchmark/main.py", line 517, in run_benchmark
benchmark.initialize()
File "/home/keithpij/DLIO_local_changes/.venv/lib/python3.12/site-packages/dftracer/python/common.py", line 504, in wrapper
x = f(*args, **kwargs)
^^^^^^^^^^^^^^^^^^
File "/home/keithpij/DLIO_local_changes/dlio_benchmark/main.py", line 199, in initialize
self.data_generator.generate()
File "/home/keithpij/DLIO_local_changes/.venv/lib/python3.12/site-packages/dftracer/python/common.py", line 504, in wrapper
x = f(*args, **kwargs)
^^^^^^^^^^^^^^^^^^
File "/home/keithpij/DLIO_local_changes/dlio_benchmark/data_generator/npz_generator.py", line 102, in generate
self._generate_files(_write, "NPZ Data")
File "/home/keithpij/DLIO_local_changes/dlio_benchmark/data_generator/data_generator.py", line 247, in _generate_files
f.result()
File "/usr/lib/python3.12/concurrent/futures/_base.py", line 449, in result
return self.__get_result()
^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/concurrent/futures/_base.py", line 401, in __get_result
raise self._exception
File "/usr/lib/python3.12/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/keithpij/DLIO_local_changes/dlio_benchmark/data_generator/data_generator.py", line 215, in _upload
self.storage.put_data(path, buf)
File "/home/keithpij/DLIO_local_changes/.venv/lib/python3.12/site-packages/dftracer/python/common.py", line 504, in wrapper
x = f(*args, **kwargs)
^^^^^^^^^^^^^^^^^^
File "/home/keithpij/DLIO_local_changes/dlio_benchmark/storage/obj_store_lib.py", line 501, in put_data
writer.write(data.getvalue() if hasattr(data, 'getvalue') else data)
File "/home/keithpij/DLIO_local_changes/dlio_benchmark/storage/obj_store_lib.py", line 111, in write
self.buffer.write(data.encode())
^^^^^^^^^^^
AttributeError: 'builtins.BytesView' object has no attribute 'encode'
If the s3dlio library is installed, then it is used to generate synthetic data. However, the object used to hold the data in memory is incompatible with the logic used to implement the other libraries (MinIO SDK and s3torchconnector). Below is the error I received using the MinIO SDK.
Error executing job with overrides: ['workload=unet3d_h100_s3', '++workload.workflow.generate_data=True', '++workload.workflow.train=False']
Traceback (most recent call last):
File "/home/keithpij/DLIO_local_changes/dlio_benchmark/main.py", line 517, in run_benchmark
benchmark.initialize()
File "/home/keithpij/DLIO_local_changes/.venv/lib/python3.12/site-packages/dftracer/python/common.py", line 504, in wrapper
x = f(*args, **kwargs)
^^^^^^^^^^^^^^^^^^
File "/home/keithpij/DLIO_local_changes/dlio_benchmark/main.py", line 199, in initialize
self.data_generator.generate()
File "/home/keithpij/DLIO_local_changes/.venv/lib/python3.12/site-packages/dftracer/python/common.py", line 504, in wrapper
x = f(*args, **kwargs)
^^^^^^^^^^^^^^^^^^
File "/home/keithpij/DLIO_local_changes/dlio_benchmark/data_generator/npz_generator.py", line 102, in generate
self._generate_files(_write, "NPZ Data")
File "/home/keithpij/DLIO_local_changes/dlio_benchmark/data_generator/data_generator.py", line 247, in _generate_files
f.result()
File "/usr/lib/python3.12/concurrent/futures/_base.py", line 449, in result
return self.__get_result()
^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/concurrent/futures/_base.py", line 401, in __get_result
raise self._exception
File "/usr/lib/python3.12/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/keithpij/DLIO_local_changes/dlio_benchmark/data_generator/data_generator.py", line 215, in _upload
self.storage.put_data(path, buf)
File "/home/keithpij/DLIO_local_changes/.venv/lib/python3.12/site-packages/dftracer/python/common.py", line 504, in wrapper
x = f(*args, **kwargs)
^^^^^^^^^^^^^^^^^^
File "/home/keithpij/DLIO_local_changes/dlio_benchmark/storage/obj_store_lib.py", line 501, in put_data
writer.write(data.getvalue() if hasattr(data, 'getvalue') else data)
File "/home/keithpij/DLIO_local_changes/dlio_benchmark/storage/obj_store_lib.py", line 111, in write
self.buffer.write(data.encode())
^^^^^^^^^^^
AttributeError: 'builtins.BytesView' object has no attribute 'encode'