Browse Source

Add background effects and update README

- Add --bg-effect flag to enable audio-reactive background effects
- Add --bg-zoom parameter for base zoom level (allows movement without edges)
- Add --bg-movement parameter for movement intensity
- Add --bg-blur parameter for optional blur effect
- Background uses smooth sin/cos based panning movement
- Update README with all new parameters and usage examples

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
fszontagh 2 months ago
parent
commit
24096a73a6
2 changed files with 151 additions and 10 deletions
  1. 72 2
      README.md
  2. 79 8
      audio_to_visualization/audio_to_visualization.py

+ 72 - 2
README.md

@@ -1,6 +1,6 @@
 # Audio to Visualization
 
-The purpose of this small Python script is to transform an audio file in to a video using a background image and an audio visualizer. This tool was written leveraging ffmpeg and requires it be installed and accessible via the `ffmpeg` command on the command line.
+The purpose of this small Python script is to transform an audio file into a video using a background image and an audio visualizer. This tool was written leveraging ffmpeg and requires it be installed and accessible via the `ffmpeg` command on the command line.
 
 ## FFMPEG
 Download `ffmpeg` and get access to the documentation at https://www.ffmpeg.org/
@@ -20,10 +20,80 @@ Install via pip:
 |----------|-------------|----------|---------|
 | --audio | path to audio file to visualize | true | N/A |
 | --background | path to image to use for background | true | N/A |
-| --output | path and name of output file. Must end in .mp4 | true | N/a |
+| --output | path and name of output file. Must end in .mp4 | true | N/A |
 | --vis-background-to-vid-ratio | ratio of visualization background height to input image height (0.0-1.0) | false | 0.2 |
 | --vis-waves-to-vid-ratio | ratio of visualization waves height to input image height (0.0-1.0) | false | 0.15 |
 | --vis-color | color for visualization waveforms. can be used multiple times | false | "0xffffff" |
 | --vis-color-opacity | opacity of vis colors (0.0-1.0) | false | 0.9 |
 | --background-color | background color for visualization waveforms | false | "0x000000" |
 | --background-color-opacity | opacity for visualization background color (0.0-1.0) | false | 0.5 |
+| --aspect-ratio | crop/resize image to aspect ratio with minimal loss (format: W:H, e.g., 3:2, 16:9) | false | none |
+| --test-frames | output only N frames for testing (e.g., 50 for ~2 seconds at 25fps) | false | none |
+| --bg-effect | enable audio-reactive background effects (zoom, movement, blur) | false | false |
+| --bg-zoom | base zoom level for background effect (0.0-1.0, default 0.1 = 10% zoom) | false | 0.1 |
+| --bg-movement | movement intensity for background effect (0.0-1.0) | false | 0.02 |
+| --bg-blur | blur intensity for background effect (0.0-1.0, 0.0 = no blur) | false | 0.0 |
+
+## Examples
+
+### Basic usage
+```bash
+python audio_to_visualization/audio_to_visualization.py \
+  --audio input.mp3 \
+  --background image.jpg \
+  --output video.mp4
+```
+
+### With aspect ratio cropping
+```bash
+python audio_to_visualization/audio_to_visualization.py \
+  --audio input.mp3 \
+  --background image.jpg \
+  --output video.mp4 \
+  --aspect-ratio 16:9
+```
+
+### With background effects
+```bash
+python audio_to_visualization/audio_to_visualization.py \
+  --audio input.mp3 \
+  --background image.jpg \
+  --output video.mp4 \
+  --bg-effect \
+  --bg-zoom 0.15 \
+  --bg-movement 0.03
+```
+
+### Quick test (limited frames)
+```bash
+python audio_to_visualization/audio_to_visualization.py \
+  --audio input.mp3 \
+  --background image.jpg \
+  --output test.mp4 \
+  --test-frames 50
+```
+
+### Full featured example
+```bash
+python audio_to_visualization/audio_to_visualization.py \
+  --audio input.mp3 \
+  --background image.jpg \
+  --output video.mp4 \
+  --aspect-ratio 3:2 \
+  --vis-color 0xff5500 0x00ff55 \
+  --vis-color-opacity 0.8 \
+  --background-color 0x222222 \
+  --background-color-opacity 0.6 \
+  --bg-effect \
+  --bg-zoom 0.1 \
+  --bg-movement 0.02 \
+  --bg-blur 0.05
+```
+
+## Features
+
+- **Audio waveform visualization**: Displays animated waveforms synced to the audio
+- **Customizable colors**: Set waveform and background colors with opacity control
+- **Aspect ratio cropping**: Automatically crop images to target aspect ratios (e.g., 16:9, 3:2) with minimal image loss using center crop
+- **Background effects**: Optional smooth panning/movement effect with zoom and blur
+- **Test mode**: Generate short clips for quick testing

+ 79 - 8
audio_to_visualization/audio_to_visualization.py

@@ -56,17 +56,25 @@ def call_video_creator_with_args():
                       help="crop/resize image to aspect ratio with minimal loss (format: W:H, e.g., 3:2, 16:9)", required=False)
   parser.add_argument("--test-frames", type=int, default=None,
                       help="output only N frames for testing (e.g., 50 for ~2 seconds at 25fps)", required=False)
+  parser.add_argument("--bg-effect", action="store_true", default=False,
+                      help="enable audio-reactive background effects (zoom, movement, blur)", required=False)
+  parser.add_argument("--bg-zoom", type=restricted_float, default=0.1,
+                      help="base zoom level for background effect (0.0-1.0, default 0.1 = 10%% zoom)", required=False)
+  parser.add_argument("--bg-movement", type=restricted_float, default=0.02,
+                      help="movement intensity for background effect (0.0-1.0, default 0.02)", required=False)
+  parser.add_argument("--bg-blur", type=restricted_float, default=0.0,
+                      help="blur intensity for background effect (0.0-1.0, default 0.0 = no blur)", required=False)
 
   args, _ = parser.parse_known_args()
   create_visualization(args.audio, args.background, args.output, args.vis_background_to_vid_ratio,
                        args.vis_waves_to_vid_ratio, args.vis_color, args.vis_color_opacity,
                        args.background_color, args.background_color_opacity, args.aspect_ratio,
-                       args.test_frames)
+                       args.test_frames, args.bg_effect, args.bg_zoom, args.bg_movement, args.bg_blur)
 
 def create_visualization(audio, background, output, vis_background_to_vid_ratio,
                          vis_waves_to_vid_ratio, vis_color, vis_color_opacity,
                          background_color, background_color_opacity, aspect_ratio=None,
-                         test_frames=None):
+                         test_frames=None, bg_effect=False, bg_zoom=0.1, bg_movement=0.02, bg_blur=0.0):
   # Get metadata for visualization
   duration = get_audio_duration(audio)
   (bg_width, bg_height) = get_image_resolution(background)
@@ -77,17 +85,28 @@ def create_visualization(audio, background, output, vis_background_to_vid_ratio,
     (bg_width, bg_height, crop_x, crop_y) = calculate_crop_for_aspect_ratio(bg_width, bg_height, aspect_ratio)
     bg_stream = bg_stream.filter('crop', bg_width, bg_height, crop_x, crop_y)
 
-  waves_height = floor(bg_height * vis_waves_to_vid_ratio)
-  waves_background_height = floor(bg_height * vis_background_to_vid_ratio)
+  # Store original dimensions for final output
+  output_width = bg_width
+  output_height = bg_height
+
+  # Apply background effects if enabled
+  if bg_effect:
+    bg_stream = apply_audio_reactive_background(bg_stream, audio, bg_width, bg_height,
+                                                 duration, bg_zoom, bg_movement, bg_blur)
+
+  waves_height = floor(output_height * vis_waves_to_vid_ratio)
+  waves_background_height = floor(output_height * vis_background_to_vid_ratio)
 
   # Ensure visualization spans full width of background
-  viz_width = bg_width
+  viz_width = output_width
   viz_height = waves_background_height
 
   # Debug output
-  print(f"Background: {bg_width}x{bg_height}")
+  print(f"Background: {output_width}x{output_height}")
   print(f"Waves height: {waves_height}, Background height: {waves_background_height}")
   print(f"Viz width: {viz_width}")
+  if bg_effect:
+    print(f"Background effects enabled: zoom={bg_zoom}, movement={bg_movement}, blur={bg_blur}")
 
   # Compile the waves and a background color
   stream = ffmpeg.input(audio)
@@ -97,8 +116,8 @@ def create_visualization(audio, background, output, vis_background_to_vid_ratio,
                                                 background_color_opacity, duration)
   waves_center_offset = floor((waves_background_height - waves_height)/2)
   viz = ffmpeg.filter([background_stream, vid_stream], 'overlay', 0, waves_center_offset)
-  waves_background_center_offset = floor((bg_height - waves_background_height)/2)
-  waves_background_center_x = floor((bg_width - viz_width) / 2)
+  waves_background_center_offset = floor((output_height - waves_background_height)/2)
+  waves_background_center_x = floor((output_width - viz_width) / 2)
 
   # Overlay the waves stream on top of our static image, centered both horizontally and vertically
   vid_stream = ffmpeg.filter([bg_stream, viz], 'overlay', waves_background_center_x, waves_background_center_offset)
@@ -110,6 +129,58 @@ def create_visualization(audio, background, output, vis_background_to_vid_ratio,
     ffmpeg.output(stream.audio, vid_stream, output).run()
 
 
+# Apply audio-reactive background effects (zoom, movement, blur)
+def apply_audio_reactive_background(bg_stream, audio, width, height, duration, zoom_level, movement, blur_intensity):
+  # Calculate zoomed dimensions (zoom in to allow movement without showing edges)
+  zoom_factor = 1.0 + zoom_level
+  zoomed_width = floor(width * zoom_factor)
+  zoomed_height = floor(height * zoom_factor)
+
+  # Ensure even dimensions
+  zoomed_width = zoomed_width - (zoomed_width % 2)
+  zoomed_height = zoomed_height - (zoomed_height % 2)
+
+  # Maximum movement in pixels (based on the extra space from zoom)
+  max_move_x = floor((zoomed_width - width) / 2 * movement * 10)
+  max_move_y = floor((zoomed_height - height) / 2 * movement * 10)
+
+  # Center position for crop
+  center_x = floor((zoomed_width - width) / 2)
+  center_y = floor((zoomed_height - height) / 2)
+
+  # Scale up the image first to allow for movement
+  bg_stream = bg_stream.filter('scale', zoomed_width, zoomed_height)
+
+  # Apply blur if specified
+  if blur_intensity > 0:
+    blur_radius = floor(blur_intensity * 20) + 1
+    bg_stream = bg_stream.filter('avgblur', sizeX=blur_radius, sizeY=blur_radius)
+
+  # Loop the image to create a video stream for the full duration
+  bg_stream = bg_stream.filter('loop', loop=-1, size=1)
+  bg_stream = bg_stream.filter('setpts', 'N/25/TB')
+
+  # Create movement using crop with time-based expressions
+  # Using sin/cos functions to create smooth movement
+  crop_x_expr = f"{center_x}+{max_move_x}*sin(2*PI*t/4)"
+  crop_y_expr = f"{center_y}+{max_move_y}*cos(2*PI*t/3)"
+
+  # Apply crop for movement effect
+  bg_stream = bg_stream.filter(
+    'crop',
+    w=width,
+    h=height,
+    x=crop_x_expr,
+    y=crop_y_expr
+  )
+
+  # Trim to duration
+  bg_stream = bg_stream.filter('trim', duration=duration)
+  bg_stream = bg_stream.filter('setpts', 'PTS-STARTPTS')
+
+  return bg_stream
+
+
 # Calculate crop dimensions for target aspect ratio with minimal image loss
 def calculate_crop_for_aspect_ratio(width, height, aspect_ratio):
   target_w, target_h = aspect_ratio