audio_to_visualization.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. #!/usr/bin/env python
  2. import argparse
  3. import errno
  4. from math import floor
  5. import ffmpeg
  6. import sys
  7. # Arg validation for floats
  8. def restricted_float(x):
  9. try:
  10. x = float(x)
  11. except ValueError:
  12. raise argparse.ArgumentTypeError("%r not a floating-point literal" % (x,))
  13. if x < 0.0 or x > 1.0:
  14. raise argparse.ArgumentTypeError("%r not in range [0.0, 1.0]"%(x,))
  15. return x
  16. def call_video_creator_with_args():
  17. parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, add_help=True)
  18. parser.add_argument("--audio",
  19. help="input audio filename", required=True)
  20. parser.add_argument("--background",
  21. help="visualization background filename", required=True)
  22. parser.add_argument("--output",
  23. help="output video filename", required=True)
  24. parser.add_argument("--vis-background-to-vid-ratio", type=restricted_float, default=0.2,
  25. help="ratio of visualization background height to input image height (0.0-1.0)", required=False)
  26. parser.add_argument("--vis-waves-to-vid-ratio", type=restricted_float, default=0.15,
  27. help="ratio of visualization waves height to input image height (0.0-1.0)", required=False)
  28. parser.add_argument("--vis-color", nargs='*', required=False, default=["0xffffff"],
  29. help="colors for visualization waveforms")
  30. parser.add_argument("--vis-color-opacity", type=restricted_float, default=0.9,
  31. help="opacity of vis colors (0.0-1.0)", required=False)
  32. parser.add_argument("--background-color", required=False, default="0x000000",
  33. help="background color for visualization waveforms")
  34. parser.add_argument("--background-color-opacity", type=restricted_float, default=0.5,
  35. help="opacity for visualization background color (0.0-1.0)", required=False)
  36. args, _ = parser.parse_known_args(args.audio, args.background, args.output, args.vis_background_to_vid_ratio,
  37. args.vis_waves_to_vid_ratio, args.vis_color, args.vis_color_opacity,
  38. args.background_color, args.background_color_opacity)
  39. def create_vizualization(audio, background, output, vis_background_to_vid_ratio,
  40. vis_waves_to_vid_ratio, vis_color, vis_color_opacity,
  41. background_color, background_color_opacity):
  42. # Get metadata for visualization
  43. duration = get_audio_duration(audio)
  44. (bg_height, bg_width) = get_image_resolution(background)
  45. waves_height = floor(bg_height * vis_waves_to_vid_ratio)
  46. waves_background_height = floor(bg_height * vis_background_to_vid_ratio)
  47. # Compile the waves and a background color
  48. stream = ffmpeg.input(audio)
  49. vis_colors = "|".join(vis_color)
  50. vid_stream = get_audio_waveforms(stream, bg_width, waves_height, vis_colors, vis_color_opacity)
  51. background_stream = generate_background_color(bg_width, waves_background_height, background_color,
  52. background_color_opacity, duration)
  53. waves_center_offset = floor((waves_background_height - waves_height)/2)
  54. viz = ffmpeg.filter([background_stream, vid_stream], 'overlay', 0, waves_center_offset)
  55. waves_background_center_offset = floor((bg_height - waves_background_height)/2)
  56. # Overlay the waves stream on top of our static image
  57. vid_stream = ffmpeg.filter([ffmpeg.input(background), viz], 'overlay', 0, waves_background_center_offset)
  58. ffmpeg.output(stream.audio, vid_stream, output).run()
  59. # Generate a static color background video stream
  60. def generate_background_color(width, height, color, opacity, duration_in_seconds):
  61. return (
  62. ffmpeg.input("color=c=%s:s=%dx%d:d=%ss" % (color, width, height, duration_in_seconds), f="lavfi")
  63. .filter("format", "rgba")
  64. .filter("colorchannelmixer", aa=opacity)
  65. )
  66. # Given an input AV source, generate visualization waves
  67. def get_audio_waveforms(av_stream, width, height, colors, opacity):
  68. return (
  69. av_stream
  70. .filter("showwaves", s="%dx%d" % (width, height), mode="cline", colors=colors)
  71. .filter("format", "rgba")
  72. .filter("colorchannelmixer", aa=opacity)
  73. )
  74. # Get image resolution using ffprobe
  75. def get_image_resolution(image_filename):
  76. metadata = get_metadata(image_filename)
  77. height = metadata["streams"][0]["height"]
  78. width = metadata["streams"][0]["width"]
  79. return (height, width)
  80. # Get audio duration using ffprobe
  81. def get_audio_duration(audio_filename):
  82. metadata = get_metadata(audio_filename)
  83. return metadata["format"]["duration"]
  84. # Get metadata about file from ffprob
  85. def get_metadata(filename):
  86. metadata = ffmpeg.probe(filename)
  87. return metadata
  88. if __name__ == "__main__":
  89. try:
  90. call_video_creator_with_args()
  91. except KeyboardInterrupt:
  92. # The user asked the program to exit
  93. sys.exit(1)
  94. except IOError as e:
  95. # When this program is used in a shell pipeline and an earlier program in
  96. # the pipeline is terminated, we'll receive an EPIPE error. This is normal
  97. # and just an indication that we should exit after processing whatever
  98. # input we've received -- we don't consume standard input so we can just
  99. # exit cleanly in that case.
  100. if e.errno != errno.EPIPE:
  101. raise
  102. # We still exit with a non-zero exit code though in order to propagate the
  103. # error code of the earlier process that was terminated.
  104. sys.exit(1)
  105. sys.exit(0)