4 mēneši atpakaļ · c1def12a03
--- a/audio_to_visualization/audio_to_visualization.py
+++ b/audio_to_visualization/audio_to_visualization.py
@@ -18,6 +18,20 @@ def restricted_float(x):
 
				   return x
			
 
				 
			
 
				 
			
 
				+# Arg validation for aspect ratio (e.g., "3:2", "16:9")
			
 
				+def aspect_ratio(x):
			
 
				+  try:
			
 
				+    parts = x.split(':')
			
 
				+    if len(parts) != 2:
			
 
				+      raise ValueError()
			
 
				+    w, h = int(parts[0]), int(parts[1])
			
 
				+    if w <= 0 or h <= 0:
			
 
				+      raise ValueError()
			
 
				+    return (w, h)
			
 
				+  except ValueError:
			
 
				+    raise argparse.ArgumentTypeError("%r is not a valid aspect ratio (use format W:H, e.g., 3:2)" % (x,))
			
 
				+
			
 
				+
			
 
				 def call_video_creator_with_args():
			
 
				   parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, add_help=True)
			
 
				   parser.add_argument("--audio",
			
@@ -38,34 +52,88 @@ def call_video_creator_with_args():
 
				                       help="background color for visualization waveforms")
			
 
				   parser.add_argument("--background-color-opacity", type=restricted_float, default=0.5,
			
 
				                       help="opacity for visualization background color (0.0-1.0)", required=False)
			
 
				+  parser.add_argument("--aspect-ratio", type=aspect_ratio, default=None,
			
 
				+                      help="crop/resize image to aspect ratio with minimal loss (format: W:H, e.g., 3:2, 16:9)", required=False)
			
 
				+  parser.add_argument("--test-frames", type=int, default=None,
			
 
				+                      help="output only N frames for testing (e.g., 50 for ~2 seconds at 25fps)", required=False)
			
 
				 
			
 
				   args, _ = parser.parse_known_args()
			
 
				   create_visualization(args.audio, args.background, args.output, args.vis_background_to_vid_ratio,
			
 
				                        args.vis_waves_to_vid_ratio, args.vis_color, args.vis_color_opacity,
			
 
				-                       args.background_color, args.background_color_opacity)
			
 
				+                       args.background_color, args.background_color_opacity, args.aspect_ratio,
			
 
				+                       args.test_frames)
			
 
				 
			
 
				 def create_visualization(audio, background, output, vis_background_to_vid_ratio,
			
 
				                          vis_waves_to_vid_ratio, vis_color, vis_color_opacity,
			
 
				-                         background_color, background_color_opacity):
			
 
				+                         background_color, background_color_opacity, aspect_ratio=None,
			
 
				+                         test_frames=None):
			
 
				   # Get metadata for visualization
			
 
				   duration = get_audio_duration(audio)
			
 
				-  (bg_height, bg_width) = get_image_resolution(background)
			
 
				+  (bg_width, bg_height) = get_image_resolution(background)
			
 
				+
			
 
				+  # Apply aspect ratio crop if specified
			
 
				+  bg_stream = ffmpeg.input(background)
			
 
				+  if aspect_ratio:
			
 
				+    (bg_width, bg_height, crop_x, crop_y) = calculate_crop_for_aspect_ratio(bg_width, bg_height, aspect_ratio)
			
 
				+    bg_stream = bg_stream.filter('crop', bg_width, bg_height, crop_x, crop_y)
			
 
				+
			
 
				   waves_height = floor(bg_height * vis_waves_to_vid_ratio)
			
 
				   waves_background_height = floor(bg_height * vis_background_to_vid_ratio)
			
 
				 
			
 
				+  # Ensure visualization spans full width of background
			
 
				+  viz_width = bg_width
			
 
				+  viz_height = waves_background_height
			
 
				+
			
 
				+  # Debug output
			
 
				+  print(f"Background: {bg_width}x{bg_height}")
			
 
				+  print(f"Waves height: {waves_height}, Background height: {waves_background_height}")
			
 
				+  print(f"Viz width: {viz_width}")
			
 
				+
			
 
				   # Compile the waves and a background color
			
 
				   stream = ffmpeg.input(audio)
			
 
				   vis_colors = "|".join(vis_color)
			
 
				-  vid_stream = get_audio_waveforms(stream, bg_width, waves_height, vis_colors, vis_color_opacity)
			
 
				-  background_stream = generate_background_color(bg_width, waves_background_height, background_color,
			
 
				+  vid_stream = get_audio_waveforms(stream, viz_width, waves_height, vis_colors, vis_color_opacity)
			
 
				+  background_stream = generate_background_color(viz_width, waves_background_height, background_color,
			
 
				                                                 background_color_opacity, duration)
			
 
				   waves_center_offset = floor((waves_background_height - waves_height)/2)
			
 
				   viz = ffmpeg.filter([background_stream, vid_stream], 'overlay', 0, waves_center_offset)
			
 
				   waves_background_center_offset = floor((bg_height - waves_background_height)/2)
			
 
				+  waves_background_center_x = floor((bg_width - viz_width) / 2)
			
 
				+
			
 
				+  # Overlay the waves stream on top of our static image, centered both horizontally and vertically
			
 
				+  vid_stream = ffmpeg.filter([bg_stream, viz], 'overlay', waves_background_center_x, waves_background_center_offset)
			
 
				+
			
 
				+  if test_frames:
			
 
				+    # Output only N frames for testing (no audio)
			
 
				+    ffmpeg.output(vid_stream, output, vframes=test_frames).run()
			
 
				+  else:
			
 
				+    ffmpeg.output(stream.audio, vid_stream, output).run()
			
 
				+
			
 
				+
			
 
				+# Calculate crop dimensions for target aspect ratio with minimal image loss
			
 
				+def calculate_crop_for_aspect_ratio(width, height, aspect_ratio):
			
 
				+  target_w, target_h = aspect_ratio
			
 
				+  target_ratio = target_w / target_h
			
 
				+  current_ratio = width / height
			
 
				+
			
 
				+  if current_ratio > target_ratio:
			
 
				+    # Image is wider than target, crop width (left and right)
			
 
				+    new_width = floor(height * target_ratio)
			
 
				+    new_height = height
			
 
				+  else:
			
 
				+    # Image is taller than target, crop height (top and bottom)
			
 
				+    new_width = width
			
 
				+    new_height = floor(width / target_ratio)
			
 
				+
			
 
				+  # Ensure dimensions are even (required by many video codecs)
			
 
				+  new_width = new_width - (new_width % 2)
			
 
				+  new_height = new_height - (new_height % 2)
			
 
				+
			
 
				+  # Center the crop
			
 
				+  crop_x = floor((width - new_width) / 2)
			
 
				+  crop_y = floor((height - new_height) / 2)
			
 
				 
			
 
				-  # Overlay the waves stream on top of our static image
			
 
				-  vid_stream = ffmpeg.filter([ffmpeg.input(background), viz], 'overlay', 0, waves_background_center_offset)
			
 
				-  ffmpeg.output(stream.audio, vid_stream, output).run()
			
 
				+  return (new_width, new_height, crop_x, crop_y)
			
 
				 
			
 
				 
			
 
				 # Generate a static color background video stream
			
@@ -80,7 +148,7 @@ def generate_background_color(width, height, color, opacity, duration_in_seconds
 
				 def get_audio_waveforms(av_stream, width, height, colors, opacity):
			
 
				   return (
			
 
				     av_stream
			
 
				-      .filter("showwaves", s="%dx%d" % (width, height), mode="cline", colors=colors)
			
 
				+      .filter("showwaves", s="%dx%d" % (width, height), mode="cline", colors=colors, split_channels=0, rate=25, scale="sqrt")
			
 
				       .filter("format", "rgba")
			
 
				       .filter("colorchannelmixer", aa=opacity)
			
 
				   )
			
@@ -90,7 +158,7 @@ def get_image_resolution(image_filename):
 
				   metadata = get_metadata(image_filename)
			
 
				   height = metadata["streams"][0]["height"]
			
 
				   width = metadata["streams"][0]["width"]
			
 
				-  return (height, width)
			
 
				+  return (width, height)
			
 
				 
			
 
				 
			
 
				 # Get audio duration using ffprobe