From 269d1622b946525683169ddd74b5b6011e362650 Mon Sep 17 00:00:00 2001
From: Michael Weimann <michaelw@matrix.org>
Date: Tue, 31 Jan 2023 09:48:30 +0100
Subject: [PATCH] Implement more robust broadcast chunk header detection
 (#10006)

---
 .../audio/VoiceBroadcastRecorder.ts           | 38 ++++++++++++++-----
 .../audio/VoiceBroadcastRecorder-test.ts      | 18 +++++++--
 2 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/src/voice-broadcast/audio/VoiceBroadcastRecorder.ts b/src/voice-broadcast/audio/VoiceBroadcastRecorder.ts
index 3d64a5ddc3..5323d32573 100644
--- a/src/voice-broadcast/audio/VoiceBroadcastRecorder.ts
+++ b/src/voice-broadcast/audio/VoiceBroadcastRecorder.ts
@@ -14,7 +14,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
+import { isEqual } from "lodash";
 import { Optional } from "matrix-events-sdk";
+import { logger } from "matrix-js-sdk/src/logger";
 import { TypedEventEmitter } from "matrix-js-sdk/src/models/typed-event-emitter";
 
 import { getChunkLength } from "..";
@@ -38,6 +40,12 @@ export interface ChunkRecordedPayload {
     length: number;
 }
 
+// char sequence of "OpusHead"
+const OpusHead = [79, 112, 117, 115, 72, 101, 97, 100];
+
+// char sequence of "OpusTags"
+const OpusTags = [79, 112, 117, 115, 84, 97, 103, 115];
+
 /**
  * This class provides the function to seamlessly record fixed length chunks.
  * Subscribe with on(VoiceBroadcastRecordingEvents.ChunkRecorded, (payload: ChunkRecordedPayload) => {})
@@ -47,11 +55,11 @@ export class VoiceBroadcastRecorder
     extends TypedEventEmitter<VoiceBroadcastRecorderEvent, EventMap>
     implements IDestroyable
 {
-    private headers = new Uint8Array(0);
+    private opusHead?: Uint8Array;
+    private opusTags?: Uint8Array;
     private chunkBuffer = new Uint8Array(0);
     // position of the previous chunk in seconds
     private previousChunkEndTimePosition = 0;
-    private pagesFromRecorderCount = 0;
     // current chunk length in seconds
     private currentChunkLength = 0;
 
@@ -73,7 +81,7 @@ export class VoiceBroadcastRecorder
     public async stop(): Promise<Optional<ChunkRecordedPayload>> {
         try {
             await this.voiceRecording.stop();
-        } catch {
+        } catch (e) {
             // Ignore if the recording raises any error.
         }
 
@@ -82,7 +90,6 @@ export class VoiceBroadcastRecorder
         const chunk = this.extractChunk();
         this.currentChunkLength = 0;
         this.previousChunkEndTimePosition = 0;
-        this.headers = new Uint8Array(0);
         return chunk;
     }
 
@@ -103,11 +110,19 @@ export class VoiceBroadcastRecorder
 
     private onDataAvailable = (data: ArrayBuffer): void => {
         const dataArray = new Uint8Array(data);
-        this.pagesFromRecorderCount++;
 
-        if (this.pagesFromRecorderCount <= 2) {
-            // first two pages contain the headers
-            this.headers = concat(this.headers, dataArray);
+        // extract the part, that contains the header type info
+        const headerType = Array.from(dataArray.slice(28, 36));
+
+        if (isEqual(OpusHead, headerType)) {
+            // data seems to be an "OpusHead" header
+            this.opusHead = dataArray;
+            return;
+        }
+
+        if (isEqual(OpusTags, headerType)) {
+            // data seems to be an "OpusTags" header
+            this.opusTags = dataArray;
             return;
         }
 
@@ -134,9 +149,14 @@ export class VoiceBroadcastRecorder
             return null;
         }
 
+        if (!this.opusHead || !this.opusTags) {
+            logger.warn("Broadcast chunk cannot be extracted. OpusHead or OpusTags is missing.");
+            return null;
+        }
+
         const currentRecorderTime = this.voiceRecording.recorderSeconds;
         const payload: ChunkRecordedPayload = {
-            buffer: concat(this.headers, this.chunkBuffer),
+            buffer: concat(this.opusHead!, this.opusTags!, this.chunkBuffer),
             length: this.getCurrentChunkLength(),
         };
         this.chunkBuffer = new Uint8Array(0);
diff --git a/test/voice-broadcast/audio/VoiceBroadcastRecorder-test.ts b/test/voice-broadcast/audio/VoiceBroadcastRecorder-test.ts
index a2848b4857..e5deae23b2 100644
--- a/test/voice-broadcast/audio/VoiceBroadcastRecorder-test.ts
+++ b/test/voice-broadcast/audio/VoiceBroadcastRecorder-test.ts
@@ -67,8 +67,10 @@ describe("VoiceBroadcastRecorder", () => {
 
     describe("instance", () => {
         const chunkLength = 30;
-        const headers1 = new Uint8Array([1, 2]);
-        const headers2 = new Uint8Array([3, 4]);
+        // 0... OpusHead
+        const headers1 = new Uint8Array([...Array(28).fill(0), 79, 112, 117, 115, 72, 101, 97, 100]);
+        // 0... OpusTags
+        const headers2 = new Uint8Array([...Array(28).fill(0), 79, 112, 117, 115, 84, 97, 103, 115]);
         const chunk1 = new Uint8Array([5, 6]);
         const chunk2a = new Uint8Array([7, 8]);
         const chunk2b = new Uint8Array([9, 10]);
@@ -79,12 +81,16 @@ describe("VoiceBroadcastRecorder", () => {
         let onChunkRecorded: (chunk: ChunkRecordedPayload) => void;
 
         const simulateFirstChunk = (): void => {
+            // send headers in wrong order and multiple times to test robustness for that
+            voiceRecording.onDataAvailable(headers2);
+            voiceRecording.onDataAvailable(headers1);
             voiceRecording.onDataAvailable(headers1);
             voiceRecording.onDataAvailable(headers2);
             // set recorder seconds to something greater than the test chunk length of 30
             // @ts-ignore
             voiceRecording.recorderSeconds = 42;
             voiceRecording.onDataAvailable(chunk1);
+            voiceRecording.onDataAvailable(headers1);
         };
 
         const expectOnFirstChunkRecorded = (): void => {
@@ -155,7 +161,7 @@ describe("VoiceBroadcastRecorder", () => {
             expect(voiceBroadcastRecorder.contentType).toBe(contentType);
         });
 
-        describe("when the first page from recorder has been received", () => {
+        describe("when the first header from recorder has been received", () => {
             beforeEach(() => {
                 voiceRecording.onDataAvailable(headers1);
             });
@@ -163,7 +169,7 @@ describe("VoiceBroadcastRecorder", () => {
             itShouldNotEmitAChunkRecordedEvent();
         });
 
-        describe("when a second page from recorder has been received", () => {
+        describe("when the second header from recorder has been received", () => {
             beforeEach(() => {
                 voiceRecording.onDataAvailable(headers1);
                 voiceRecording.onDataAvailable(headers2);
@@ -229,6 +235,10 @@ describe("VoiceBroadcastRecorder", () => {
 
                 // simulate a second chunk
                 voiceRecording.onDataAvailable(chunk2a);
+
+                // send headers again to test robustness for that
+                voiceRecording.onDataAvailable(headers2);
+
                 // add another 30 seconds for the next chunk
                 // @ts-ignore
                 voiceRecording.recorderSeconds = 72;