Merge pull request #102360 from berarma/theora_seek

Implement seek operation for Theora video files, improve multi-channel audio resampling.
2026-03-03 20:55:48 +00:00 · 2025-05-22 12:15:07 -05:00
parent 8085fd3102 b9bebf7081
commit 5b2525673a
6 changed files with 541 additions and 218 deletions
--- a/doc/classes/VideoStreamPlayer.xml
+++ b/doc/classes/VideoStreamPlayer.xml
@@ -16,7 +16,6 @@
 			<return type="float" />
 			<description>
 				The length of the current stream, in seconds.
-				[b]Note:[/b] For [VideoStreamTheora] streams (the built-in format supported by Godot), this value will always be zero, as getting the stream length is not implemented yet. The feature may be supported by video formats implemented by a GDExtension add-on.
 			</description>
 		</method>
 		<method name="get_stream_name" qualifiers="const">
@@ -79,7 +78,6 @@
 		</member>
 		<member name="stream_position" type="float" setter="set_stream_position" getter="get_stream_position">
 			The current position of the stream, in seconds.
-			[b]Note:[/b] Changing this value won't have any effect as seeking is not implemented yet, except in video formats implemented by a GDExtension add-on.
 		</member>
 		<member name="volume" type="float" setter="set_volume" getter="get_volume">
 			Audio volume as a linear value.
--- a/modules/theora/video_stream_theora.cpp
+++ b/modules/theora/video_stream_theora.cpp
--- a/modules/theora/video_stream_theora.h
+++ b/modules/theora/video_stream_theora.h
@@ -51,8 +51,19 @@ class VideoStreamPlaybackTheora : public VideoStreamPlayback {
 	Point2i size;
 	Rect2i region;

+	float *audio_buffer = nullptr;
+	int audio_buffer_size = 0;
+	int audio_ptr_start = 0;
+	int audio_ptr_end = 0;
+
 	int buffer_data();
 	int queue_page(ogg_page *page);
+	int read_page(ogg_page *page);
+	int feed_pages();
+	double get_page_time(ogg_page *page);
+	int64_t seek_streams(double p_time, int64_t &video_granulepos, int64_t &audio_granulepos);
+	void find_streams(th_setup_info *&ts);
+	void read_headers(th_setup_info *&ts);
 	void video_write(th_ycbcr_buffer yuv);
 	double get_time() const;

@@ -60,7 +71,6 @@ class VideoStreamPlaybackTheora : public VideoStreamPlayback {
 	bool vorbis_eos = false;

 	ogg_sync_state oy;
-	ogg_page og;
 	ogg_stream_state vo;
 	ogg_stream_state to;
 	th_info ti;
@@ -71,19 +81,21 @@ class VideoStreamPlaybackTheora : public VideoStreamPlayback {
 	vorbis_block vb;
 	vorbis_comment vc;
 	th_pixel_fmt px_fmt;
-	double frame_duration;
+	double frame_duration = 0;
+	double stream_length = 0;
+	int64_t stream_data_offset = 0;
+	int64_t stream_data_size = 0;

-	int theora_p = 0;
-	int vorbis_p = 0;
 	int pp_level_max = 0;
 	int pp_level = 0;
 	int pp_inc = 0;

 	bool playing = false;
-	bool buffering = false;
 	bool paused = false;

 	bool dup_frame = false;
+	bool has_video = false;
+	bool has_audio = false;
 	bool video_ready = false;
 	bool video_done = false;
 	bool audio_done = false;
@@ -100,6 +112,20 @@ class VideoStreamPlaybackTheora : public VideoStreamPlayback {
 protected:
 	void clear();

+	_FORCE_INLINE_ bool send_audio() {
+		if (audio_ptr_end > 0) {
+			int mixed = mix_callback(mix_udata, &audio_buffer[audio_ptr_start * vi.channels], audio_ptr_end - audio_ptr_start);
+			audio_ptr_start += mixed;
+			if (audio_ptr_start == audio_ptr_end) {
+				audio_ptr_start = 0;
+				audio_ptr_end = 0;
+			} else {
+				return false;
+			}
+		}
+		return true;
+	}
+
 public:
 	virtual void play() override;
 	virtual void stop() override;
--- a/scene/gui/video_stream_player.cpp
+++ b/scene/gui/video_stream_player.cpp
@@ -339,7 +339,6 @@ void VideoStreamPlayer::play() {
 	if (playback.is_null()) {
 		return;
 	}
-	playback->stop();
 	playback->play();
 	set_process_internal(true);
 	last_audio_time = 0;
@@ -468,7 +467,9 @@ double VideoStreamPlayer::get_stream_position() const {

 void VideoStreamPlayer::set_stream_position(double p_position) {
 	if (playback.is_valid()) {
+		resampler.flush();
 		playback->seek(p_position);
+		last_audio_time = 0;
 	}
 }

--- a/servers/audio/audio_rb_resampler.cpp
+++ b/servers/audio/audio_rb_resampler.cpp
@@ -75,23 +75,37 @@ uint32_t AudioRBResampler::_resample(AudioFrame *p_dest, int p_todo, int32_t p_i
 			p_dest[i] = AudioFrame(v0, v1);
 		}

-		// This will probably never be used, but added anyway
+		// Downmix to stereo. Apply -3dB to center, and sides, -6dB to rear.
+
+		// four channels - channel order: front left, front right, rear left, rear right
 		if constexpr (C == 4) {
-			float v0 = rb[(pos << 2) + 0];
-			float v1 = rb[(pos << 2) + 1];
-			float v0n = rb[(pos_next << 2) + 0];
-			float v1n = rb[(pos_next << 2) + 1];
+			float v0 = rb[(pos << 2) + 0] + rb[(pos << 2) + 2] / 2;
+			float v1 = rb[(pos << 2) + 1] + rb[(pos << 2) + 3] / 2;
+			float v0n = rb[(pos_next << 2) + 0] + rb[(pos_next << 2) + 2] / 2;
+			float v1n = rb[(pos_next << 2) + 1] + rb[(pos_next << 2) + 3] / 2;
 			v0 += (v0n - v0) * frac;
 			v1 += (v1n - v1) * frac;
 			p_dest[i] = AudioFrame(v0, v1);
 		}

+		// six channels - channel order: front left, center, front right, rear left, rear right, LFE
 		if constexpr (C == 6) {
-			float v0 = rb[(pos * 6) + 0];
-			float v1 = rb[(pos * 6) + 1];
-			float v0n = rb[(pos_next * 6) + 0];
-			float v1n = rb[(pos_next * 6) + 1];
+			float v0 = rb[(pos * 6) + 0] + rb[(pos * 6) + 1] / Math::SQRT2 + rb[(pos * 6) + 3] / 2;
+			float v1 = rb[(pos * 6) + 2] + rb[(pos * 6) + 1] / Math::SQRT2 + rb[(pos * 6) + 4] / 2;
+			float v0n = rb[(pos_next * 6) + 0] + rb[(pos_next * 6) + 1] / Math::SQRT2 + rb[(pos_next * 6) + 3] / 2;
+			float v1n = rb[(pos_next * 6) + 2] + rb[(pos_next * 6) + 1] / Math::SQRT2 + rb[(pos_next * 6) + 4] / 2;
+			v0 += (v0n - v0) * frac;
+			v1 += (v1n - v1) * frac;
+			p_dest[i] = AudioFrame(v0, v1);
+		}

+		// eight channels - channel order: front left, center, front right, side left, side right, rear left, rear
+		// right, LFE
+		if constexpr (C == 8) {
+			float v0 = rb[(pos << 3) + 0] + rb[(pos << 3) + 1] / Math::SQRT2 + rb[(pos << 3) + 3] / Math::SQRT2 + rb[(pos << 3) + 5] / 2;
+			float v1 = rb[(pos << 3) + 2] + rb[(pos << 3) + 1] / Math::SQRT2 + rb[(pos << 3) + 4] / Math::SQRT2 + rb[(pos << 3) + 6] / 2;
+			float v0n = rb[(pos_next << 3) + 0] + rb[(pos_next << 3) + 1] / Math::SQRT2 + rb[(pos_next << 3) + 3] / Math::SQRT2 + rb[(pos_next << 3) + 5] / 2;
+			float v1n = rb[(pos_next << 3) + 2] + rb[(pos_next << 3) + 1] / Math::SQRT2 + rb[(pos_next << 3) + 4] / Math::SQRT2 + rb[(pos_next << 3) + 6] / 2;
 			v0 += (v0n - v0) * frac;
 			v1 += (v1n - v1) * frac;
 			p_dest[i] = AudioFrame(v0, v1);
@@ -125,6 +139,9 @@ bool AudioRBResampler::mix(AudioFrame *p_dest, int p_frames) {
 			case 6:
 				src_read = _resample<6>(p_dest, target_todo, increment);
 				break;
+			case 8:
+				src_read = _resample<8>(p_dest, target_todo, increment);
+				break;
 		}

 		if (src_read > read_space) {
@@ -159,7 +176,7 @@ int AudioRBResampler::get_num_of_ready_frames() {
 }

 Error AudioRBResampler::setup(int p_channels, int p_src_mix_rate, int p_target_mix_rate, int p_buffer_msec, int p_minbuff_needed) {
-	ERR_FAIL_COND_V(p_channels != 1 && p_channels != 2 && p_channels != 4 && p_channels != 6, ERR_INVALID_PARAMETER);
+	ERR_FAIL_COND_V(p_channels != 1 && p_channels != 2 && p_channels != 4 && p_channels != 6 && p_channels != 8, ERR_INVALID_PARAMETER);

 	int desired_rb_bits = nearest_shift(MAX((p_buffer_msec / 1000.0) * p_src_mix_rate, p_minbuff_needed));

--- a/servers/audio/audio_rb_resampler.h
+++ b/servers/audio/audio_rb_resampler.h
@@ -152,6 +152,19 @@ public:
 					wp = (wp + 1) & rb_mask;
 				}
 			} break;
+			case 8: {
+				for (uint32_t i = 0; i < p_frames; i++) {
+					rb[(wp << 3) + 0] = read_buf[(i << 3) + 0];
+					rb[(wp << 3) + 1] = read_buf[(i << 3) + 1];
+					rb[(wp << 3) + 2] = read_buf[(i << 3) + 2];
+					rb[(wp << 3) + 3] = read_buf[(i << 3) + 3];
+					rb[(wp << 3) + 4] = read_buf[(i << 3) + 4];
+					rb[(wp << 3) + 5] = read_buf[(i << 3) + 5];
+					rb[(wp << 3) + 6] = read_buf[(i << 3) + 6];
+					rb[(wp << 3) + 7] = read_buf[(i << 3) + 7];
+					wp = (wp + 1) & rb_mask;
+				}
+			} break;
 		}

 		rb_write_pos.set(wp);