KPMP-5807: rename files in-place in globus dir before copying to dlu#167
KPMP-5807: rename files in-place in globus dir before copying to dlu#167HaneenT wants to merge 1 commit into
Conversation
WalkthroughThe pull request modifies file handling in ChangesFile Rename-Then-Copy Workflow
🚥 Pre-merge checks | ✅ 2✅ Passed checks (2 passed)
✏️ Tip: You can configure your own custom pre-merge checks in the settings. ✨ Finishing Touches🧪 Generate unit tests (beta)
Tip 💬 Introducing Slack Agent: The best way for teams to turn conversations into code.Slack Agent is built on CodeRabbit's deep understanding of your code, so your team can collaborate across the entire SDLC without losing context.
Built for teams:
One agent for your entire SDLC. Right inside Slack. Thanks for using CodeRabbit! It's free for OSS, and your support helps us grow. If you like it, consider giving us a shout-out. Comment |
There was a problem hiding this comment.
Actionable comments posted: 3
🧹 Nitpick comments (1)
data_management/services/dlu_filesystem.py (1)
137-137: 💤 Low valueAvoid reusing loop variable name.
Line 137 reassigns the loop variable
fileto a newDLUFileinstance. This shadows the original loop variable and makes the code harder to understand. Use a distinct name likedlu_filefor clarity.♻️ Proposed variable rename
- file = DLUFile(name=dest_file_name, path=dest_package_directory, + dlu_file = DLUFile(name=dest_file_name, path=dest_package_directory, checksum=calculate_checksum(dest_path), size=os.path.getsize(dest_path)) - dluFiles.append(file) + dluFiles.append(dlu_file)
ℹ️ Review info
⚙️ Run configuration
Configuration used: Organization UI
Review profile: CHILL
Plan: Pro
Run ID: 96f24bf9-83e9-4997-aa9e-269ed7181ca8
📒 Files selected for processing (1)
data_management/services/dlu_filesystem.py
| for file in file_list: | ||
| dest_file = os.path.join(dest_package_directory, slide_name_map[file.name]) | ||
| logger.info("Copying file " + os.path.join(source_package_directory, file.name) + " to " | ||
| + os.path.join(dest_package_directory, slide_name_map[file.name])) | ||
| shutil.copy(os.path.join(source_package_directory, file.name), | ||
| dest_file) | ||
| file = DLUFile(name=slide_name_map[file.name], path=dest_package_directory, | ||
| checksum=calculate_checksum(dest_file), size=os.path.getsize(dest_file)) | ||
| dest_file_name = slide_name_map[file.name] | ||
| renamed_src_path = os.path.join(source_package_directory, dest_file_name) | ||
| dest_path = os.path.join(dest_package_directory, dest_file_name) | ||
|
|
||
| logger.info("Renaming file " + os.path.join(source_package_directory, file.name) + " to " + renamed_src_path) | ||
| os.rename(os.path.join(source_package_directory, file.name), renamed_src_path) | ||
| logger.info("Copying file " + renamed_src_path + " to " + dest_path) | ||
| shutil.copy(renamed_src_path, dest_path) | ||
| file = DLUFile(name=dest_file_name, path=dest_package_directory, | ||
| checksum=calculate_checksum(dest_path), size=os.path.getsize(dest_path)) | ||
| dluFiles.append(file) | ||
| return dluFiles |
There was a problem hiding this comment.
Add error handling and validate unique destination names.
The method lacks error handling around file operations and doesn't check for duplicate destination filenames. This can cause:
- Silent overwrites: If multiple source files map to the same
dest_file_name, later files will overwrite earlier ones without warning - Unhandled exceptions: Any I/O error (permissions, disk full, etc.) will crash without cleanup
- Partial state: Failed operations leave destination directory in an inconsistent state
🛡️ Proposed fix with validation and error handling
def rename_and_move_files(self, file_list: list[DLUFile], slide_name_map, package_id ):
dluFiles = []
dest_package_directory = os.path.join(self.dlu_data_directory, self.dlu_package_dir_prefix + package_id)
if os.path.exists(dest_package_directory):
shutil.rmtree(dest_package_directory)
if not os.path.exists(dest_package_directory):
logger.info("Creating directory " + dest_package_directory)
os.makedirs(dest_package_directory, exist_ok=True)
source_package_directory = self.globus_data_directory + '/' + self.globus_dir_prefix + package_id
+
+ # Validate no duplicate destination names
+ dest_names = set()
for file in file_list:
+ if file.name not in slide_name_map:
+ raise ValueError(f"File '{file.name}' not found in slide_name_map")
dest_file_name = slide_name_map[file.name]
+ if dest_file_name in dest_names:
+ raise ValueError(f"Duplicate destination filename: '{dest_file_name}'")
+ dest_names.add(dest_file_name)
+
+ # Process files with error handling
+ for file in file_list:
+ dest_file_name = slide_name_map[file.name]
- renamed_src_path = os.path.join(source_package_directory, dest_file_name)
+ src_path = os.path.join(source_package_directory, file.name)
dest_path = os.path.join(dest_package_directory, dest_file_name)
-
- logger.info("Renaming file " + os.path.join(source_package_directory, file.name) + " to " + renamed_src_path)
- os.rename(os.path.join(source_package_directory, file.name), renamed_src_path)
- logger.info("Copying file " + renamed_src_path + " to " + dest_path)
- shutil.copy(renamed_src_path, dest_path)
- file = DLUFile(name=dest_file_name, path=dest_package_directory,
+
+ try:
+ logger.info(f"Copying file {src_path} to {dest_path}")
+ shutil.copy(src_path, dest_path)
+ except (IOError, OSError) as e:
+ logger.error(f"Failed to copy {src_path} to {dest_path}: {e}")
+ raise
+
+ dlu_file = DLUFile(name=dest_file_name, path=dest_package_directory,
checksum=calculate_checksum(dest_path), size=os.path.getsize(dest_path))
- dluFiles.append(file)
+ dluFiles.append(dlu_file)
return dluFiles| dest_file) | ||
| file = DLUFile(name=slide_name_map[file.name], path=dest_package_directory, | ||
| checksum=calculate_checksum(dest_file), size=os.path.getsize(dest_file)) | ||
| dest_file_name = slide_name_map[file.name] |
There was a problem hiding this comment.
Add KeyError protection for missing slide name mappings.
Line 129 accesses slide_name_map[file.name] without checking if the key exists. If a file name is not present in the mapping, this will raise a KeyError and crash the process, potentially leaving the source directory in a partially renamed state.
🛡️ Proposed fix with KeyError handling
for file in file_list:
- dest_file_name = slide_name_map[file.name]
+ if file.name not in slide_name_map:
+ raise ValueError(f"File '{file.name}' not found in slide_name_map")
+ dest_file_name = slide_name_map[file.name]
renamed_src_path = os.path.join(source_package_directory, dest_file_name)📝 Committable suggestion
‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.
| dest_file_name = slide_name_map[file.name] | |
| if file.name not in slide_name_map: | |
| raise ValueError(f"File '{file.name}' not found in slide_name_map") | |
| dest_file_name = slide_name_map[file.name] |
| logger.info("Renaming file " + os.path.join(source_package_directory, file.name) + " to " + renamed_src_path) | ||
| os.rename(os.path.join(source_package_directory, file.name), renamed_src_path) | ||
| logger.info("Copying file " + renamed_src_path + " to " + dest_path) | ||
| shutil.copy(renamed_src_path, dest_path) |
There was a problem hiding this comment.
Critical: In-place source mutation creates data loss risk.
The code renames files directly in the source Globus directory (line 134), then copies them (line 136). This approach is risky because:
- Data loss on failure: If
shutil.copy()fails afteros.rename()succeeds, the original file is permanently renamed and not copied, leaving the source in an inconsistent state - No rollback: If the process crashes partway through, previously renamed files cannot be easily restored
- Concurrent access issues: Other processes reading from the Globus directory may fail if files are renamed mid-operation
- Source preservation: The source directory should typically remain unchanged until the entire operation succeeds
The standard pattern is to copy directly from source to destination with the new name, leaving the source intact until success is confirmed.
♻️ Proposed fix: Copy directly without mutating source
for file in file_list:
+ if file.name not in slide_name_map:
+ raise ValueError(f"File '{file.name}' not found in slide_name_map")
dest_file_name = slide_name_map[file.name]
- renamed_src_path = os.path.join(source_package_directory, dest_file_name)
+ src_path = os.path.join(source_package_directory, file.name)
dest_path = os.path.join(dest_package_directory, dest_file_name)
- logger.info("Renaming file " + os.path.join(source_package_directory, file.name) + " to " + renamed_src_path)
- os.rename(os.path.join(source_package_directory, file.name), renamed_src_path)
- logger.info("Copying file " + renamed_src_path + " to " + dest_path)
- shutil.copy(renamed_src_path, dest_path)
+ logger.info(f"Copying file {src_path} to {dest_path}")
+ shutil.copy(src_path, dest_path)
file = DLUFile(name=dest_file_name, path=dest_package_directory,
checksum=calculate_checksum(dest_path), size=os.path.getsize(dest_path))📝 Committable suggestion
‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.
| logger.info("Renaming file " + os.path.join(source_package_directory, file.name) + " to " + renamed_src_path) | |
| os.rename(os.path.join(source_package_directory, file.name), renamed_src_path) | |
| logger.info("Copying file " + renamed_src_path + " to " + dest_path) | |
| shutil.copy(renamed_src_path, dest_path) | |
| for file in file_list: | |
| if file.name not in slide_name_map: | |
| raise ValueError(f"File '{file.name}' not found in slide_name_map") | |
| dest_file_name = slide_name_map[file.name] | |
| src_path = os.path.join(source_package_directory, file.name) | |
| dest_path = os.path.join(dest_package_directory, dest_file_name) | |
| logger.info(f"Copying file {src_path} to {dest_path}") | |
| shutil.copy(src_path, dest_path) | |
| file = DLUFile(name=dest_file_name, path=dest_package_directory, | |
| checksum=calculate_checksum(dest_path), size=os.path.getsize(dest_path)) |
Summary by CodeRabbit