OpenRA · PunkPun · Oct 21, 2025 · Oct 28, 2025 · Oct 21, 2025 · Oct 28, 2025
diff --git a/OpenRA.Game/Primitives/SegmentStream.cs b/OpenRA.Game/Primitives/SegmentStream.cs
@@ -172,5 +172,20 @@ public static Stream CreateWithoutOwningStream(Stream stream, long offset, int c
 			stream.Seek(offset, SeekOrigin.Begin);
 			return new MemoryStream(stream.ReadBytes(count));
 		}
+
+		public static ReadOnlySpan<byte> GetReadableData(Stream stream, long offset, int size)
+		{
+			if (stream is MemoryStream ms)
+			{
+				// avoid copying where possible
+				var buf = ms.GetBuffer();
+				return new ReadOnlySpan<byte>(buf, (int)offset, Math.Min(size, buf.Length - (int)offset));
+			}
+
+			var buffer = new byte[size];
+			stream.Seek(offset, SeekOrigin.Begin);
+			stream.ReadExactly(buffer);
+			return buffer;
+		}
 	}
 }
diff --git a/OpenRA.Game/Sound/Sound.cs b/OpenRA.Game/Sound/Sound.cs
@@ -30,6 +30,7 @@ public interface ISoundFormat : IDisposable
 		int SampleRate { get; }
 		float LengthInSeconds { get; }
 		Stream GetPCMInputStream();
+		byte[] GetPCMData();
 	}
 
 	public enum SoundType { World, UI }
@@ -93,7 +94,7 @@ public void Initialize(ISoundLoader[] loaders, IReadOnlyFileSystem fileSystem)
 			this.loaders = loaders;
 			this.fileSystem = fileSystem;
 			ISoundSource LoadIntoMemory(ISoundFormat soundFormat) => soundEngine.AddSoundSourceFromMemory(
-				soundFormat.GetPCMInputStream().ReadAllBytes(), soundFormat.Channels, soundFormat.SampleBits, soundFormat.SampleRate);
+				soundFormat.GetPCMData(), soundFormat.Channels, soundFormat.SampleBits, soundFormat.SampleRate);
 			sounds = new Cache<string, ISoundSource>(filename => LoadSound(filename, LoadIntoMemory));
 			currentSounds.Clear();
 			currentNotifications.Clear();

diff --git a/OpenRA.Mods.Cnc/AudioLoaders/AudLoader.cs b/OpenRA.Mods.Cnc/AudioLoaders/AudLoader.cs
@@ -54,6 +54,12 @@ public sealed class AudFormat : ISoundFormat
 		public int SampleRate => sampleRate;
 		public float LengthInSeconds => lengthInSeconds;
 		public Stream GetPCMInputStream() { return audStreamFactory(); }
+		public byte[] GetPCMData()
+		{
+			using var s = audStreamFactory();
+			return s.ReadAllBytes();
+		}
+
 		public void Dispose() { sourceStream.Dispose(); }
 
 		readonly Stream sourceStream;

diff --git a/OpenRA.Mods.Cnc/AudioLoaders/VocLoader.cs b/OpenRA.Mods.Cnc/AudioLoaders/VocLoader.cs
@@ -42,6 +42,12 @@ public sealed class VocFormat : ISoundFormat
 		public int SampleRate { get; }
 		public float LengthInSeconds => (float)totalSamples / SampleRate;
 		public Stream GetPCMInputStream() { return new VocStream(new VocFormat(this)); }
+		public byte[] GetPCMData()
+		{
+			using var pcmStream = GetPCMInputStream();
+			return pcmStream.ReadAllBytes();
+		}
+
 		public void Dispose() { stream.Dispose(); }
 
 		readonly byte[] buffer = new byte[4096];

diff --git a/OpenRA.Mods.Cnc/FileFormats/AudReader.cs b/OpenRA.Mods.Cnc/FileFormats/AudReader.cs
@@ -50,7 +50,8 @@ public static AudChunk Read(Stream s)
 
 	public static class AudReader
 	{
-		public static bool LoadSound(Stream s, out Func<Stream> result, out int sampleRate, out int sampleBits, out int channels, out float lengthInSeconds)
+		public static bool LoadSound(Stream s, out Func<Stream> result,
+			out int sampleRate, out int sampleBits, out int channels, out float lengthInSeconds)
 		{
 			result = null;
 			var startPosition = s.Position;
@@ -102,9 +103,9 @@ sealed class ImaAdpcmAudStream : ReadOnlyAdapterStream
 			readonly int outputSize;
 			int dataSize;
 
-			int currentSample;
+			short currentSample;
 			int baseOffset;
-			int index;
+			byte index;
 
 			public ImaAdpcmAudStream(Stream stream, int outputSize, int dataSize)
 				: base(stream)

diff --git a/OpenRA.Mods.Cnc/FileFormats/VqaVideo.cs b/OpenRA.Mods.Cnc/FileFormats/VqaVideo.cs
@@ -171,7 +171,7 @@ void CollectAudioData()
 		{
 			var audio1 = new MemoryStream(); // left channel / mono
 			var audio2 = new MemoryStream(); // right channel
-			var adpcmIndex = 0;
+			byte adpcmIndex = 0;
 			var compressed = false;
 			for (var i = 0; i < FrameCount; i++)
 			{

diff --git a/OpenRA.Mods.Common/AudioLoaders/Mp3Loader.cs b/OpenRA.Mods.Common/AudioLoaders/Mp3Loader.cs
@@ -67,6 +67,12 @@ public sealed class Mp3Format : ISoundFormat
 		public int SampleRate => mp3.Frequency;
 		public float LengthInSeconds { get; }
 		public Stream GetPCMInputStream() { return new MP3Stream(Clone(this)); }
+		public byte[] GetPCMData()
+		{
+			using var pcmStream = GetPCMInputStream();
+			return pcmStream.ReadAllBytes();
+		}
+
 		public void Dispose() { mp3.Dispose(); }
 
 		readonly MP3Stream mp3;

diff --git a/OpenRA.Mods.Common/AudioLoaders/OggLoader.cs b/OpenRA.Mods.Common/AudioLoaders/OggLoader.cs
@@ -42,6 +42,12 @@ public sealed class OggFormat : ISoundFormat
 		public int SampleRate => reader.SampleRate;
 		public float LengthInSeconds { get; }
 		public Stream GetPCMInputStream() { return new OggStream(new OggFormat(this)); }
+		public byte[] GetPCMData()
+		{
+			using var pcmStream = GetPCMInputStream();
+			return pcmStream.ReadAllBytes();
+		}
+
 		public void Dispose() { reader.Dispose(); }
 
 		readonly VorbisReader reader;

diff --git a/OpenRA.Mods.Common/AudioLoaders/WavLoader.cs b/OpenRA.Mods.Common/AudioLoaders/WavLoader.cs
@@ -55,10 +55,12 @@ public sealed class WavFormat : ISoundFormat
 		public int SampleRate => sampleRate;
 		public float LengthInSeconds => lengthInSeconds;
 		public Stream GetPCMInputStream() { return wavStreamFactory(); }
+		public byte[] GetPCMData() { return pcmDataFactory(); }
 		public void Dispose() { sourceStream.Dispose(); }
 
 		readonly Stream sourceStream;
 		readonly Func<Stream> wavStreamFactory;
+		readonly Func<byte[]> pcmDataFactory;
 		readonly short channels;
 		readonly int sampleBits;
 		readonly int sampleRate;
@@ -68,7 +70,7 @@ public WavFormat(Stream stream)
 		{
 			sourceStream = stream;
 
-			if (!WavReader.LoadSound(stream, out wavStreamFactory, out channels, out sampleBits, out sampleRate, out lengthInSeconds))
+			if (!WavReader.LoadSound(stream, out wavStreamFactory, out pcmDataFactory, out channels, out sampleBits, out sampleRate, out lengthInSeconds))
 				throw new InvalidDataException();
 		}
 	}

diff --git a/OpenRA.Mods.Common/FileFormats/ImaAdpcmReader.cs b/OpenRA.Mods.Common/FileFormats/ImaAdpcmReader.cs
@@ -10,6 +10,10 @@
 #endregion
 
 using System;
+using System.IO;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using OpenRA.Primitives;
 
 namespace OpenRA.Mods.Common.FileFormats
 {
@@ -30,39 +34,49 @@ public static class ImaAdpcmReader
 			16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767
 		];
 
-		public static short DecodeImaAdpcmSample(byte b, ref int index, ref int current)
+		/// <summary>
+		/// Decodes a single IMA ADPCM nibble to a PCM sample.
+		/// </summary>
+		/// <remarks>
+		/// Branchless and only the output variables leave registers.
+		/// </remarks>
+		[MethodImpl(MethodImplOptions.AggressiveInlining)]
+		public static short DecodeImaAdpcmSample(byte nibble, ref byte idx, ref short pred)
 		{
-			var sb = (b & 8) != 0;
-			b &= 7;
+			var step = StepTable[idx];
+			var diff = step >> 3;
 
-			var delta = StepTable[index] * b / 4 + StepTable[index] / 8;
-			if (sb)
-				delta = -delta;
+			var mask = nibble & 7;
+			diff += ((mask >> 2) & 1) * step;
+			diff += ((mask >> 1) & 1) * (step >> 1);
+			diff += (mask & 1) * (step >> 2);
 
-			current += delta;
-			if (current > short.MaxValue)
-				current = short.MaxValue;
+			// branchless negation via bitmask
+			var sign = (nibble & 8) != 0 ? -1 : 1;
+			diff *= sign;
 
-			if (current < short.MinValue)
-				current = short.MinValue;
+			var sample = pred + diff;
 
-			index += IndexAdjust[b];
-			if (index < 0)
-				index = 0;
+			// branchless clamping (fast saturating logic)
+			if ((uint)(sample - short.MinValue) > ushort.MaxValue)
+				sample = sample > 0 ? short.MaxValue : short.MinValue;
 
-			if (index > 88)
-				index = 88;
+			pred = (short)sample;
 
-			return (short)current;
+			var newIdx = idx + IndexAdjust[mask];
+			newIdx = newIdx < 0 ? 0 : newIdx > 88 ? 88 : newIdx;
+			idx = (byte)newIdx;
+
+			return pred;
 		}
 
-		public static byte[] LoadImaAdpcmSound(ReadOnlySpan<byte> raw, ref int index)
+		public static byte[] LoadImaAdpcmSound(ReadOnlySpan<byte> raw, ref byte index)
 		{
-			var currentSample = 0;
+			short currentSample = 0;
 			return LoadImaAdpcmSound(raw, ref index, ref currentSample);
 		}
 
-		public static byte[] LoadImaAdpcmSound(ReadOnlySpan<byte> raw, ref int index, ref int currentSample)
+		public static byte[] LoadImaAdpcmSound(ReadOnlySpan<byte> raw, ref byte index, ref short currentSample)
 		{
 			var dataSize = raw.Length;
 			var outputSize = raw.Length * 4;
@@ -85,5 +99,114 @@ public static byte[] LoadImaAdpcmSound(ReadOnlySpan<byte> raw, ref int index, re
 
 			return output;
 		}
+
+		public static byte[] ReadData(Stream stream, long dataOffset, int dataSize, short blockAlign, short channels)
+		{
+			const int SamplesPerGroup = 8;
+
+			ArgumentNullException.ThrowIfNull(stream);
+
+			var sourceData = SegmentStream.GetReadableData(stream, dataOffset, dataSize);
+
+			var numBlocks = dataSize / blockAlign;
+
+			var predictorSize = 4 * channels;
+			var blockDataSize = blockAlign - predictorSize;
+
+			// We get two samples per nibble
+			var samplesPerChannel = blockDataSize * 2 / channels;
+
+			// 8 samples from a 4-byte group
+			var groupCount = samplesPerChannel / SamplesPerGroup;
+
+			var predOut = numBlocks * channels * 2;
+			var groupOut = numBlocks * groupCount * channels * SamplesPerGroup * 2;
+			var estimatedOutDataSize = predOut + groupOut;
+
+			var outData = new byte[estimatedOutDataSize];
+
+			// PERF: The output is 16-bit PCM, so we can write bytes as if they were shorts for less CPU churn.
+			var outShorts = MemoryMarshal.Cast<byte, short>(outData.AsSpan());
+
+			// NOTE: decoding a block is sequentually dependent on predictor/index.
+			Span<short> predictor = stackalloc short[channels];
+			Span<byte> index = stackalloc byte[channels];
+
+			// PERF: Avoid bounds checks by using refs.
+			ref var srcRef = ref MemoryMarshal.GetReference(sourceData);
+			ref var outRef = ref MemoryMarshal.GetReference(outShorts);
+			ref var predRef = ref MemoryMarshal.GetReference(predictor);
+			ref var idxRef = ref MemoryMarshal.GetReference(index);
+
+			// Global decoded sample counter
+			var src = 0;
+			var outSample = 0;
+
+			for (var block = 0; block < numBlocks; block++)
+			{
+				// Initial states
+				for (var c = 0; c < channels; c++)
+				{
+					var offset = src + c * 4;
+
+					// Load initial values.
+					var pred = (short)(Unsafe.Add(ref srcRef, offset) | (Unsafe.Add(ref srcRef, offset + 1) << 8));
+					var idx = Unsafe.Add(ref srcRef, offset + 2);
+
+					Unsafe.Add(ref predRef, c) = pred;
+					Unsafe.Add(ref idxRef, c) = idx;
+				}
+
+				src += predictorSize;
+
+				// Write initial predictor samples interleaved
+				for (var c = 0; c < channels; c++)
+					Unsafe.Add(ref outRef, outSample + c) = Unsafe.Add(ref predRef, c);
+
+				outSample += channels;
+
+				for (var iter = 0; iter < groupCount; iter++)
+				{
+					// Decode 8 samples sequentially per channel
+					for (var c = 0; c < channels; c++)
+					{
+						ref var pred = ref Unsafe.Add(ref predRef, c);
+						ref var idx = ref Unsafe.Add(ref idxRef, c);
+
+						var b0 = Unsafe.Add(ref srcRef, src + 0);
+						var b1 = Unsafe.Add(ref srcRef, src + 1);
+						var b2 = Unsafe.Add(ref srcRef, src + 2);
+						var b3 = Unsafe.Add(ref srcRef, src + 3);
+
+						src += 4;
+
+						// PERF: Decode into temporary variables so they could be easily inlined directly to output.
+						var s0 = DecodeImaAdpcmSample((byte)(b0 & 0x0F), ref idx, ref pred);
+						var s1 = DecodeImaAdpcmSample((byte)(b0 >> 4), ref idx, ref pred);
+						var s2 = DecodeImaAdpcmSample((byte)(b1 & 0x0F), ref idx, ref pred);
+						var s3 = DecodeImaAdpcmSample((byte)(b1 >> 4), ref idx, ref pred);
+						var s4 = DecodeImaAdpcmSample((byte)(b2 & 0x0F), ref idx, ref pred);
+						var s5 = DecodeImaAdpcmSample((byte)(b2 >> 4), ref idx, ref pred);
+						var s6 = DecodeImaAdpcmSample((byte)(b3 & 0x0F), ref idx, ref pred);
+						var s7 = DecodeImaAdpcmSample((byte)(b3 >> 4), ref idx, ref pred);
+
+						// Write interleaved samples (one sample per channel)
+						var basePos = outSample + c;
+						Unsafe.Add(ref outRef, basePos + channels * 0) = s0;
+						Unsafe.Add(ref outRef, basePos + channels * 1) = s1;
+						Unsafe.Add(ref outRef, basePos + channels * 2) = s2;
+						Unsafe.Add(ref outRef, basePos + channels * 3) = s3;
+						Unsafe.Add(ref outRef, basePos + channels * 4) = s4;
+						Unsafe.Add(ref outRef, basePos + channels * 5) = s5;
+						Unsafe.Add(ref outRef, basePos + channels * 6) = s6;
+						Unsafe.Add(ref outRef, basePos + channels * 7) = s7;
+					}
+
+					outSample += channels * 8;
+				}
+			}
+
+			return outData;
+		}
 	}
 }