Pārlūkot izejas kodu

Fix waveform positioning and add aspect ratio crop feature

- Fix width/height swap in get_image_resolution that caused waveform
  to be incorrectly sized and positioned
- Add --aspect-ratio parameter to crop image to target ratio (e.g., 3:2, 16:9)
  with minimal image loss using center crop
- Add --test-frames parameter for quick testing with limited frame output
- Improve showwaves filter with sqrt scaling for better visibility
- Add debug output for dimensions

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
Fszontagh 2 mēneši atpakaļ
vecāks
revīzija
c1def12a03
1 mainītis faili ar 78 papildinājumiem un 10 dzēšanām
  1. 78 10
      audio_to_visualization/audio_to_visualization.py

+ 78 - 10
audio_to_visualization/audio_to_visualization.py

@@ -18,6 +18,20 @@ def restricted_float(x):
   return x
 
 
+# Arg validation for aspect ratio (e.g., "3:2", "16:9")
+def aspect_ratio(x):
+  try:
+    parts = x.split(':')
+    if len(parts) != 2:
+      raise ValueError()
+    w, h = int(parts[0]), int(parts[1])
+    if w <= 0 or h <= 0:
+      raise ValueError()
+    return (w, h)
+  except ValueError:
+    raise argparse.ArgumentTypeError("%r is not a valid aspect ratio (use format W:H, e.g., 3:2)" % (x,))
+
+
 def call_video_creator_with_args():
   parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, add_help=True)
   parser.add_argument("--audio",
@@ -38,34 +52,88 @@ def call_video_creator_with_args():
                       help="background color for visualization waveforms")
   parser.add_argument("--background-color-opacity", type=restricted_float, default=0.5,
                       help="opacity for visualization background color (0.0-1.0)", required=False)
+  parser.add_argument("--aspect-ratio", type=aspect_ratio, default=None,
+                      help="crop/resize image to aspect ratio with minimal loss (format: W:H, e.g., 3:2, 16:9)", required=False)
+  parser.add_argument("--test-frames", type=int, default=None,
+                      help="output only N frames for testing (e.g., 50 for ~2 seconds at 25fps)", required=False)
 
   args, _ = parser.parse_known_args()
   create_visualization(args.audio, args.background, args.output, args.vis_background_to_vid_ratio,
                        args.vis_waves_to_vid_ratio, args.vis_color, args.vis_color_opacity,
-                       args.background_color, args.background_color_opacity)
+                       args.background_color, args.background_color_opacity, args.aspect_ratio,
+                       args.test_frames)
 
 def create_visualization(audio, background, output, vis_background_to_vid_ratio,
                          vis_waves_to_vid_ratio, vis_color, vis_color_opacity,
-                         background_color, background_color_opacity):
+                         background_color, background_color_opacity, aspect_ratio=None,
+                         test_frames=None):
   # Get metadata for visualization
   duration = get_audio_duration(audio)
-  (bg_height, bg_width) = get_image_resolution(background)
+  (bg_width, bg_height) = get_image_resolution(background)
+
+  # Apply aspect ratio crop if specified
+  bg_stream = ffmpeg.input(background)
+  if aspect_ratio:
+    (bg_width, bg_height, crop_x, crop_y) = calculate_crop_for_aspect_ratio(bg_width, bg_height, aspect_ratio)
+    bg_stream = bg_stream.filter('crop', bg_width, bg_height, crop_x, crop_y)
+
   waves_height = floor(bg_height * vis_waves_to_vid_ratio)
   waves_background_height = floor(bg_height * vis_background_to_vid_ratio)
 
+  # Ensure visualization spans full width of background
+  viz_width = bg_width
+  viz_height = waves_background_height
+
+  # Debug output
+  print(f"Background: {bg_width}x{bg_height}")
+  print(f"Waves height: {waves_height}, Background height: {waves_background_height}")
+  print(f"Viz width: {viz_width}")
+
   # Compile the waves and a background color
   stream = ffmpeg.input(audio)
   vis_colors = "|".join(vis_color)
-  vid_stream = get_audio_waveforms(stream, bg_width, waves_height, vis_colors, vis_color_opacity)
-  background_stream = generate_background_color(bg_width, waves_background_height, background_color,
+  vid_stream = get_audio_waveforms(stream, viz_width, waves_height, vis_colors, vis_color_opacity)
+  background_stream = generate_background_color(viz_width, waves_background_height, background_color,
                                                 background_color_opacity, duration)
   waves_center_offset = floor((waves_background_height - waves_height)/2)
   viz = ffmpeg.filter([background_stream, vid_stream], 'overlay', 0, waves_center_offset)
   waves_background_center_offset = floor((bg_height - waves_background_height)/2)
+  waves_background_center_x = floor((bg_width - viz_width) / 2)
+
+  # Overlay the waves stream on top of our static image, centered both horizontally and vertically
+  vid_stream = ffmpeg.filter([bg_stream, viz], 'overlay', waves_background_center_x, waves_background_center_offset)
+
+  if test_frames:
+    # Output only N frames for testing (no audio)
+    ffmpeg.output(vid_stream, output, vframes=test_frames).run()
+  else:
+    ffmpeg.output(stream.audio, vid_stream, output).run()
+
+
+# Calculate crop dimensions for target aspect ratio with minimal image loss
+def calculate_crop_for_aspect_ratio(width, height, aspect_ratio):
+  target_w, target_h = aspect_ratio
+  target_ratio = target_w / target_h
+  current_ratio = width / height
+
+  if current_ratio > target_ratio:
+    # Image is wider than target, crop width (left and right)
+    new_width = floor(height * target_ratio)
+    new_height = height
+  else:
+    # Image is taller than target, crop height (top and bottom)
+    new_width = width
+    new_height = floor(width / target_ratio)
+
+  # Ensure dimensions are even (required by many video codecs)
+  new_width = new_width - (new_width % 2)
+  new_height = new_height - (new_height % 2)
+
+  # Center the crop
+  crop_x = floor((width - new_width) / 2)
+  crop_y = floor((height - new_height) / 2)
 
-  # Overlay the waves stream on top of our static image
-  vid_stream = ffmpeg.filter([ffmpeg.input(background), viz], 'overlay', 0, waves_background_center_offset)
-  ffmpeg.output(stream.audio, vid_stream, output).run()
+  return (new_width, new_height, crop_x, crop_y)
 
 
 # Generate a static color background video stream
@@ -80,7 +148,7 @@ def generate_background_color(width, height, color, opacity, duration_in_seconds
 def get_audio_waveforms(av_stream, width, height, colors, opacity):
   return (
     av_stream
-      .filter("showwaves", s="%dx%d" % (width, height), mode="cline", colors=colors)
+      .filter("showwaves", s="%dx%d" % (width, height), mode="cline", colors=colors, split_channels=0, rate=25, scale="sqrt")
       .filter("format", "rgba")
       .filter("colorchannelmixer", aa=opacity)
   )
@@ -90,7 +158,7 @@ def get_image_resolution(image_filename):
   metadata = get_metadata(image_filename)
   height = metadata["streams"][0]["height"]
   width = metadata["streams"][0]["width"]
-  return (height, width)
+  return (width, height)
 
 
 # Get audio duration using ffprobe