这是indexloc提供的服务,不要输入任何密码
Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions OpenRA.Game/Primitives/SegmentStream.cs
Original file line number Diff line number Diff line change
Expand Up @@ -172,5 +172,20 @@ public static Stream CreateWithoutOwningStream(Stream stream, long offset, int c
stream.Seek(offset, SeekOrigin.Begin);
return new MemoryStream(stream.ReadBytes(count));
}

public static ReadOnlySpan<byte> GetReadableData(Stream stream, long offset, int size)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method is a bit weird and I think should be moved to the one caller as a private helper.

  • If you pass in a MemoryStream, we don't advance the stream position.
  • You're calling this to try and avoid an alloc/copy, but if you don't have a MemoryStream we're going to alloc a new array. A caller might prefer a TryGet... method instead, in case it could reuse an existing buffer in the non-MemoryStream case.

{
if (stream is MemoryStream ms)
{
// avoid copying where possible
var buf = ms.GetBuffer();
return new ReadOnlySpan<byte>(buf, (int)offset, Math.Min(size, buf.Length - (int)offset));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

buf.AsSpan().Slice(offset, len)

here you truncate, but below you don't.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

below it's already the right size

}

var buffer = new byte[size];
stream.Seek(offset, SeekOrigin.Begin);
stream.ReadExactly(buffer);
return buffer;
}
}
}
3 changes: 2 additions & 1 deletion OpenRA.Game/Sound/Sound.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ public interface ISoundFormat : IDisposable
int SampleRate { get; }
float LengthInSeconds { get; }
Stream GetPCMInputStream();
byte[] GetPCMData();
}

public enum SoundType { World, UI }
Expand Down Expand Up @@ -93,7 +94,7 @@ public void Initialize(ISoundLoader[] loaders, IReadOnlyFileSystem fileSystem)
this.loaders = loaders;
this.fileSystem = fileSystem;
ISoundSource LoadIntoMemory(ISoundFormat soundFormat) => soundEngine.AddSoundSourceFromMemory(
soundFormat.GetPCMInputStream().ReadAllBytes(), soundFormat.Channels, soundFormat.SampleBits, soundFormat.SampleRate);
soundFormat.GetPCMData(), soundFormat.Channels, soundFormat.SampleBits, soundFormat.SampleRate);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To avoid polluting the existing classes with extra API surface here, perhaps instead the Stream could detect a ReadAllBytes call by checking if a call to Read will consume the entire Stream - when it does it can run the fast-path scenario internally.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean codegen?

Copy link
Member

@RoosterDragon RoosterDragon Nov 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, I mean in the Read method of the Stream you could do something conceptually akin to:

public override int Read(Span<byte> buffer)
{
  if (position == 0 && buffer.Length == totalLengthOfFile)
  {
    // This is a request to read the whole stream in one go - use the fast-path here.
  }

  // This is a request to read a portion of the file, use the slow-path.
}

This means we can have a fast-path and slow-path - but they're both exposed via the existing Stream abstraction, so we don't need a new API surface for getting a byte array directly.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or if that's still too late in the process - then copy things into a MemoryStream ahead of time and return that instead.

sounds = new Cache<string, ISoundSource>(filename => LoadSound(filename, LoadIntoMemory));
currentSounds.Clear();
currentNotifications.Clear();
Expand Down
6 changes: 6 additions & 0 deletions OpenRA.Mods.Cnc/AudioLoaders/AudLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ public sealed class AudFormat : ISoundFormat
public int SampleRate => sampleRate;
public float LengthInSeconds => lengthInSeconds;
public Stream GetPCMInputStream() { return audStreamFactory(); }
public byte[] GetPCMData()
{
using var s = audStreamFactory();
return s.ReadAllBytes();
}

public void Dispose() { sourceStream.Dispose(); }

readonly Stream sourceStream;
Expand Down
6 changes: 6 additions & 0 deletions OpenRA.Mods.Cnc/AudioLoaders/VocLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ public sealed class VocFormat : ISoundFormat
public int SampleRate { get; }
public float LengthInSeconds => (float)totalSamples / SampleRate;
public Stream GetPCMInputStream() { return new VocStream(new VocFormat(this)); }
public byte[] GetPCMData()
{
using var pcmStream = GetPCMInputStream();
return pcmStream.ReadAllBytes();
}

public void Dispose() { stream.Dispose(); }

readonly byte[] buffer = new byte[4096];
Expand Down
7 changes: 4 additions & 3 deletions OpenRA.Mods.Cnc/FileFormats/AudReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ public static AudChunk Read(Stream s)

public static class AudReader
{
public static bool LoadSound(Stream s, out Func<Stream> result, out int sampleRate, out int sampleBits, out int channels, out float lengthInSeconds)
public static bool LoadSound(Stream s, out Func<Stream> result,
out int sampleRate, out int sampleBits, out int channels, out float lengthInSeconds)
{
result = null;
var startPosition = s.Position;
Expand Down Expand Up @@ -102,9 +103,9 @@ sealed class ImaAdpcmAudStream : ReadOnlyAdapterStream
readonly int outputSize;
int dataSize;

int currentSample;
short currentSample;
int baseOffset;
int index;
byte index;

public ImaAdpcmAudStream(Stream stream, int outputSize, int dataSize)
: base(stream)
Expand Down
2 changes: 1 addition & 1 deletion OpenRA.Mods.Cnc/FileFormats/VqaVideo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ void CollectAudioData()
{
var audio1 = new MemoryStream(); // left channel / mono
var audio2 = new MemoryStream(); // right channel
var adpcmIndex = 0;
byte adpcmIndex = 0;
var compressed = false;
for (var i = 0; i < FrameCount; i++)
{
Expand Down
6 changes: 6 additions & 0 deletions OpenRA.Mods.Common/AudioLoaders/Mp3Loader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ public sealed class Mp3Format : ISoundFormat
public int SampleRate => mp3.Frequency;
public float LengthInSeconds { get; }
public Stream GetPCMInputStream() { return new MP3Stream(Clone(this)); }
public byte[] GetPCMData()
{
using var pcmStream = GetPCMInputStream();
return pcmStream.ReadAllBytes();
}

public void Dispose() { mp3.Dispose(); }

readonly MP3Stream mp3;
Expand Down
6 changes: 6 additions & 0 deletions OpenRA.Mods.Common/AudioLoaders/OggLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ public sealed class OggFormat : ISoundFormat
public int SampleRate => reader.SampleRate;
public float LengthInSeconds { get; }
public Stream GetPCMInputStream() { return new OggStream(new OggFormat(this)); }
public byte[] GetPCMData()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

above you have the same code for a diff audio format.

feels like this function should be in a base class. or a helper.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's here just a s place holder. Ideally every format would implement their own fast path

{
using var pcmStream = GetPCMInputStream();
return pcmStream.ReadAllBytes();
}

public void Dispose() { reader.Dispose(); }

readonly VorbisReader reader;
Expand Down
4 changes: 3 additions & 1 deletion OpenRA.Mods.Common/AudioLoaders/WavLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,12 @@ public sealed class WavFormat : ISoundFormat
public int SampleRate => sampleRate;
public float LengthInSeconds => lengthInSeconds;
public Stream GetPCMInputStream() { return wavStreamFactory(); }
public byte[] GetPCMData() { return pcmDataFactory(); }
public void Dispose() { sourceStream.Dispose(); }

readonly Stream sourceStream;
readonly Func<Stream> wavStreamFactory;
readonly Func<byte[]> pcmDataFactory;
readonly short channels;
readonly int sampleBits;
readonly int sampleRate;
Expand All @@ -68,7 +70,7 @@ public WavFormat(Stream stream)
{
sourceStream = stream;

if (!WavReader.LoadSound(stream, out wavStreamFactory, out channels, out sampleBits, out sampleRate, out lengthInSeconds))
if (!WavReader.LoadSound(stream, out wavStreamFactory, out pcmDataFactory, out channels, out sampleBits, out sampleRate, out lengthInSeconds))
throw new InvalidDataException();
}
}
Expand Down
163 changes: 143 additions & 20 deletions OpenRA.Mods.Common/FileFormats/ImaAdpcmReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
#endregion

using System;
using System.IO;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using OpenRA.Primitives;

namespace OpenRA.Mods.Common.FileFormats
{
Expand All @@ -30,39 +34,49 @@ public static class ImaAdpcmReader
16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767
];

public static short DecodeImaAdpcmSample(byte b, ref int index, ref int current)
/// <summary>
/// Decodes a single IMA ADPCM nibble to a PCM sample.
/// </summary>
/// <remarks>
/// Branchless and only the output variables leave registers.
/// </remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static short DecodeImaAdpcmSample(byte nibble, ref byte idx, ref short pred)
{
var sb = (b & 8) != 0;
b &= 7;
var step = StepTable[idx];
var diff = step >> 3;

var delta = StepTable[index] * b / 4 + StepTable[index] / 8;
if (sb)
delta = -delta;
var mask = nibble & 7;
diff += ((mask >> 2) & 1) * step;
diff += ((mask >> 1) & 1) * (step >> 1);
diff += (mask & 1) * (step >> 2);

current += delta;
if (current > short.MaxValue)
current = short.MaxValue;
// branchless negation via bitmask
var sign = (nibble & 8) != 0 ? -1 : 1;
diff *= sign;

if (current < short.MinValue)
current = short.MinValue;
var sample = pred + diff;

index += IndexAdjust[b];
if (index < 0)
index = 0;
// branchless clamping (fast saturating logic)
if ((uint)(sample - short.MinValue) > ushort.MaxValue)
sample = sample > 0 ? short.MaxValue : short.MinValue;

if (index > 88)
index = 88;
pred = (short)sample;

return (short)current;
var newIdx = idx + IndexAdjust[mask];
newIdx = newIdx < 0 ? 0 : newIdx > 88 ? 88 : newIdx;
idx = (byte)newIdx;

return pred;
}

public static byte[] LoadImaAdpcmSound(ReadOnlySpan<byte> raw, ref int index)
public static byte[] LoadImaAdpcmSound(ReadOnlySpan<byte> raw, ref byte index)
{
var currentSample = 0;
short currentSample = 0;
return LoadImaAdpcmSound(raw, ref index, ref currentSample);
}

public static byte[] LoadImaAdpcmSound(ReadOnlySpan<byte> raw, ref int index, ref int currentSample)
public static byte[] LoadImaAdpcmSound(ReadOnlySpan<byte> raw, ref byte index, ref short currentSample)
{
var dataSize = raw.Length;
var outputSize = raw.Length * 4;
Expand All @@ -85,5 +99,114 @@ public static byte[] LoadImaAdpcmSound(ReadOnlySpan<byte> raw, ref int index, re

return output;
}

public static byte[] ReadData(Stream stream, long dataOffset, int dataSize, short blockAlign, short channels)
{
const int SamplesPerGroup = 8;

ArgumentNullException.ThrowIfNull(stream);

var sourceData = SegmentStream.GetReadableData(stream, dataOffset, dataSize);

var numBlocks = dataSize / blockAlign;

var predictorSize = 4 * channels;
var blockDataSize = blockAlign - predictorSize;

// We get two samples per nibble
var samplesPerChannel = blockDataSize * 2 / channels;

// 8 samples from a 4-byte group
var groupCount = samplesPerChannel / SamplesPerGroup;

var predOut = numBlocks * channels * 2;
var groupOut = numBlocks * groupCount * channels * SamplesPerGroup * 2;
var estimatedOutDataSize = predOut + groupOut;

var outData = new byte[estimatedOutDataSize];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I recall from the spec these files can have a form of compression applied, so they might be larger once uncompressed?

I see the commit is saying the chunk for the uncompressed size is often not reliable - but even if we're ignoring what the file claims will be the final size, I'm not seing how we're dealing with it here?

Copy link
Member Author

@PunkPun PunkPun Nov 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we are calculating the total uncompressed size (unrelated: in many ways this is the bound check)

Copy link
Member Author

@PunkPun PunkPun Nov 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ImaAdpcm format is the compression, it reduces file size to 1/4th the size and the headers do remain uncompressed


// PERF: The output is 16-bit PCM, so we can write bytes as if they were shorts for less CPU churn.
var outShorts = MemoryMarshal.Cast<byte, short>(outData.AsSpan());

// NOTE: decoding a block is sequentually dependent on predictor/index.
Span<short> predictor = stackalloc short[channels];
Span<byte> index = stackalloc byte[channels];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'll need to validate the channel count is reasonable - wouldn't want to stack allocate if the file claimed to have short.MaxValue channels for example.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it should always be 1 or 2 channels. We could of course reject if there are more

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason it's important is that the process can't recover from a stack overflow - so a malformed or malicious file will crash the process if we stackalloc too large a value. For other parsing errors we could try-catch and just not play the audio - but a stack overflow kills our process outright.


// PERF: Avoid bounds checks by using refs.
ref var srcRef = ref MemoryMarshal.GetReference(sourceData);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unsafe code for an operation as dangerous as file parsing to avoid a few bounds checks is insane to me.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we are working on already bound checked data, removing bound checks in hot loops did improve the perf by 3-4%

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But the bounds check you've calculated is based on input controlled by the file. You can't trust the file. If the file is malformed or malicious, then we won't have the correct bounds.

We appear to have exactly those sorts of bugs here, we'll read input of length dataSize - which is the chunk size the file claims to have. And we write output based on the calculation of the output size. This is based on other inputs from the file such as blockAlign and channels.

A well-formed file will have everything match up so we consume the right amount of input and produce the right amount of output. A malformed or malicious file can make false claims where things don't line up, meaning we'll walk off the end of the input or output arrays.

If that happened in our existing parsers, we'd get an array out of bounds exception and could recover and keep running. By using unsafe, we've elevated the bug into either an unrecoverable crash if we read/write out of our process memory - or worse a security issue if the file is controlled by an attacker and the bad accesses can be manipulated in a manner advantageous to them.

If we used safe code, then at least bugs like this have reduced impact potiential - by dropping to unsafe we've signed ourselves up to much worse outcomes if the code contains any bugs.

If I had a magical lever to downgrade the impact of bugs I wrote from "security issue/denial-of-service" to "mostly recoverable" and it only cost me 3-4% perf, I'd pull that lever most times (in this case, the lever is "don't use unsafe code to skip bounds checks")

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, if we really are sensitive to 3-4% perf then the better solution is to load those files up-front behind the load screen and keep them in ram!

ref var outRef = ref MemoryMarshal.GetReference(outShorts);
ref var predRef = ref MemoryMarshal.GetReference(predictor);
ref var idxRef = ref MemoryMarshal.GetReference(index);

// Global decoded sample counter
var src = 0;
var outSample = 0;

for (var block = 0; block < numBlocks; block++)
{
// Initial states
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would put this init block in a separate function. Assuming you don't reference too many local vars.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's so little code it's using all local variables

for (var c = 0; c < channels; c++)
{
var offset = src + c * 4;

// Load initial values.
var pred = (short)(Unsafe.Add(ref srcRef, offset) | (Unsafe.Add(ref srcRef, offset + 1) << 8));
var idx = Unsafe.Add(ref srcRef, offset + 2);

Unsafe.Add(ref predRef, c) = pred;
Unsafe.Add(ref idxRef, c) = idx;
}

src += predictorSize;

// Write initial predictor samples interleaved
for (var c = 0; c < channels; c++)
Unsafe.Add(ref outRef, outSample + c) = Unsafe.Add(ref predRef, c);

outSample += channels;

for (var iter = 0; iter < groupCount; iter++)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're decoding in a batch of 8 at a time, I assume as a form of loop unrolling?

Is there a guarentee that chunks have a multiple of 8 in them? If not, we're need another loop to catch the leftovers or we're going to miss out some samples.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is guaranteed, if there's an incomplete group at the end of the file we should just skip it

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

though, the fast path will just fail and decode garbage

{
// Decode 8 samples sequentially per channel
for (var c = 0; c < channels; c++)
{
ref var pred = ref Unsafe.Add(ref predRef, c);
ref var idx = ref Unsafe.Add(ref idxRef, c);

var b0 = Unsafe.Add(ref srcRef, src + 0);
var b1 = Unsafe.Add(ref srcRef, src + 1);
var b2 = Unsafe.Add(ref srcRef, src + 2);
var b3 = Unsafe.Add(ref srcRef, src + 3);

src += 4;

// PERF: Decode into temporary variables so they could be easily inlined directly to output.
var s0 = DecodeImaAdpcmSample((byte)(b0 & 0x0F), ref idx, ref pred);
var s1 = DecodeImaAdpcmSample((byte)(b0 >> 4), ref idx, ref pred);
var s2 = DecodeImaAdpcmSample((byte)(b1 & 0x0F), ref idx, ref pred);
var s3 = DecodeImaAdpcmSample((byte)(b1 >> 4), ref idx, ref pred);
var s4 = DecodeImaAdpcmSample((byte)(b2 & 0x0F), ref idx, ref pred);
var s5 = DecodeImaAdpcmSample((byte)(b2 >> 4), ref idx, ref pred);
var s6 = DecodeImaAdpcmSample((byte)(b3 & 0x0F), ref idx, ref pred);
var s7 = DecodeImaAdpcmSample((byte)(b3 >> 4), ref idx, ref pred);

// Write interleaved samples (one sample per channel)
var basePos = outSample + c;
Unsafe.Add(ref outRef, basePos + channels * 0) = s0;
Unsafe.Add(ref outRef, basePos + channels * 1) = s1;
Unsafe.Add(ref outRef, basePos + channels * 2) = s2;
Unsafe.Add(ref outRef, basePos + channels * 3) = s3;
Unsafe.Add(ref outRef, basePos + channels * 4) = s4;
Unsafe.Add(ref outRef, basePos + channels * 5) = s5;
Unsafe.Add(ref outRef, basePos + channels * 6) = s6;
Unsafe.Add(ref outRef, basePos + channels * 7) = s7;
}

outSample += channels * 8;
}
}

return outData;
}
}
}
Loading