From 39f2cbb8e6adb52c9ff1df7d096083345839945b Mon Sep 17 00:00:00 2001 From: Ameer Azam <30064373+AMEERAZAM08@users.noreply.github.com> Date: Wed, 9 Jul 2025 12:14:17 +0000 Subject: [PATCH] Added Youtube Video Downloader script and Video Splitter as per Mention in Paper --- donwload_video.py | 74 +++++++++++++++++++++++++++++++++++++++++ split_time_video.py | 80 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 154 insertions(+) create mode 100644 donwload_video.py create mode 100644 split_time_video.py diff --git a/donwload_video.py b/donwload_video.py new file mode 100644 index 0000000..73fea1d --- /dev/null +++ b/donwload_video.py @@ -0,0 +1,74 @@ +from pytube import YouTube +from moviepy.editor import VideoFileClip, AudioFileClip, CompositeAudioClip +import os + + + +def rename_video(video_path, new_name): + # Get the directory path and the current filename + directory, filename = os.path.split(video_path) + + # Remove spaces from the new name and keep the file extension + new_name = new_name.replace(" ", "_") + + # Split the filename and extension + base_name, extension = os.path.splitext(filename) + + # Create the new filename with the new name and original extension + new_filename = f"{new_name}{extension}" + + # Create the new path with the new filename + new_path = os.path.join(directory, new_filename) + + # Rename the file + os.rename(video_path, new_path) + + +# Read video URLs from the text file +file_path = "HDTF_dataset/WRA_video_url.txt" + +with open(file_path, "r") as file: + video_urls = file.readlines() + +output_dir = "HDTF/video_dataset/" +# Iterate through each video URL and download +for url in video_urls: + try: + vid_name, vid_url = url.strip().split(" ") + # Replace spaces with underscores in the video name + vid_name = vid_name.replace(" ", "_") + # Download the video + + yt = YouTube(vid_url.strip()) + video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first() + # Create output directory if it doesn't exist + + # os.makedirs(output_dir, exist_ok=True) + # Download the video with the modified name + video_path_ = video.download(output_dir) + rename_video(video_path_,vid_name) + + print(f"Downloaded: {yt.title}") + except Exception as e: + print(f"Error downloading {url}: {e}") + + + + + +# # Adjust frame rate to 25fps +# output_path = '/path_to_save_video/adjusted_video.mp4' +# clip = VideoFileClip(video_path) +# clip = clip.set_fps(25) + +# # Adjust audio to match the new frame rate +# audio_clip = AudioFileClip(video_path) +# adjusted_audio = audio_clip.set_fps(25) + +# # Composite video with adjusted audio +# final_clip = clip.set_audio(adjusted_audio) + +# # Write the final clip with adjusted frame rate and audio +# final_clip.write_videofile(output_path) + +# print("Video downloaded and frame rate adjusted successfully with matching audio.") \ No newline at end of file diff --git a/split_time_video.py b/split_time_video.py new file mode 100644 index 0000000..73e344c --- /dev/null +++ b/split_time_video.py @@ -0,0 +1,80 @@ +from moviepy.editor import VideoFileClip +import os + +def split_video_with_audio(video_path, time_intervals, output_directory): + # Load the video clip + video_clip = VideoFileClip(video_path) + audio_clip = video_clip.audio + + # Set the audio for the entire video clip + video_clip = video_clip.set_audio(audio_clip) + + # Iterate over each time interval + for i, time_interval in enumerate(time_intervals): + # Parse the time interval + start_time, end_time = time_interval.split('-') + start_minutes, start_seconds = map(int, start_time.split(':')) + end_minutes, end_seconds = map(int, end_time.split(':')) + + # Convert start and end times to seconds + start_time_seconds = start_minutes * 60 + start_seconds + end_time_seconds = end_minutes * 60 + end_seconds + + # Extract the clip + clip = video_clip.subclip(start_time_seconds, end_time_seconds) + + # Create the output directory if it doesn't exist + os.makedirs(output_directory, exist_ok=True) + + # Construct the output file path + output_filename = os.path.join(output_directory, f"{os.path.splitext(os.path.basename(video_path))[0]}_{i+1}.mp4") + + clip.write_videofile(output_filename, codec="libx264") + + # Close the clip + clip.close() + + # Close the original video clip + video_clip.close() + +# Example usage: +annot_time_split = "HDTF_dataset/WRA_annotion_time.txt" + +with open(annot_time_split, "r") as file: + annot_time_split_data = file.readlines() +for a_d in annot_time_split_data: + vid_name, interval = a_d.split(" ")[0],a_d.split(" ")[1:] + print(vid_name,interval) + video_path = f"HDTF/video_dataset/{vid_name}" + time_intervals = interval # List of time intervals in the format "start-end" + output_directory = "./split_time" # Output directory where the videos will be saved + # exit() + try: + split_video_with_audio(video_path, time_intervals, output_directory) + except Exception as e : + print("Error is getting " ,e) + + + +# import os + +# def write_sorted_video_names_to_txt(directory, output_file): +# # Get all files in the directory +# files = os.listdir(directory) + +# # Filter out only the video files +# video_files = [file for file in files if file.endswith('.mp4')] + +# # Sort the video file names +# sorted_video_files = sorted(video_files) + +# # Write sorted video names to the output file +# with open(output_file, 'w') as f: +# for video_file in sorted_video_files: +# f.write(video_file + '\n') + +# # Example usage: +# directory = "HDTF/split_time" +# output_file = "./training_video_name.txt" + +# write_sorted_video_names_to_txt(directory, output_file) \ No newline at end of file