import AAC from './aac-helper';
import MP4 from './mp4-generator';
import { ErrorDetails, ErrorTypes } from '../errors';
import { Events } from '../events';
import { PlaylistLevelType } from '../types/loader';
import { type ILogger, Logger } from '../utils/logger';
import {
timestampToString,
toMsFromMpegTsClock,
} from '../utils/timescale-conversion';
import type { HlsConfig } from '../config';
import type { HlsEventEmitter } from '../events';
import type { SourceBufferName } from '../types/buffer';
import type {
AudioSample,
DemuxedAudioTrack,
DemuxedMetadataTrack,
DemuxedUserdataTrack,
DemuxedVideoTrack,
VideoSample,
} from '../types/demuxer';
import type {
InitSegmentData,
Mp4Sample,
RemuxedMetadata,
RemuxedTrack,
RemuxedUserdata,
Remuxer,
RemuxerResult,
} from '../types/remuxer';
import type { TrackSet } from '../types/track';
import type { TypeSupported } from '../utils/codecs';
import type {
RationalTimestamp,
TimestampOffset,
} from '../utils/timescale-conversion';
const MAX_SILENT_FRAME_DURATION = 10 * 1000;
const AAC_SAMPLES_PER_FRAME = 1024;
const MPEG_AUDIO_SAMPLE_PER_FRAME = 1152;
const AC3_SAMPLES_PER_FRAME = 1536;
let chromeVersion: number | null = null;
let safariWebkitVersion: number | null = null;
function createMp4Sample(
isKeyframe: boolean,
duration: number,
size: number,
cts: number,
): Mp4Sample {
return {
duration,
size,
cts,
flags: {
isLeading: 0,
isDependedOn: 0,
hasRedundancy: 0,
degradPrio: 0,
dependsOn: isKeyframe ? 2 : 1,
isNonSync: isKeyframe ? 0 : 1,
},
};
}
export default class MP4Remuxer extends Logger implements Remuxer {
private readonly observer: HlsEventEmitter;
private readonly config: HlsConfig;
private readonly typeSupported: TypeSupported;
private ISGenerated: boolean = false;
private _initPTS: TimestampOffset | null = null;
private _initDTS: TimestampOffset | null = null;
private nextVideoTs: number | null = null;
private nextAudioTs: number | null = null;
private videoSampleDuration: number | null = null;
private isAudioContiguous: boolean = false;
private isVideoContiguous: boolean = false;
private videoTrackConfig?: {
width?: number;
height?: number;
pixelRatio?: [number, number];
};
constructor(
observer: HlsEventEmitter,
config: HlsConfig,
typeSupported: TypeSupported,
logger: ILogger,
) {
super('mp4-remuxer', logger);
this.observer = observer;
this.config = config;
this.typeSupported = typeSupported;
this.ISGenerated = false;
if (chromeVersion === null) {
const userAgent = navigator.userAgent || '';
const result = userAgent.match(/Chrome\/(\d+)/i);
chromeVersion = result ? parseInt(result[1]) : 0;
}
if (safariWebkitVersion === null) {
const result = navigator.userAgent.match(/Safari\/(\d+)/i);
safariWebkitVersion = result ? parseInt(result[1]) : 0;
}
}
destroy() {
this.config = this.videoTrackConfig = this._initPTS = this._initDTS = null;
}
resetTimeStamp(defaultTimeStamp: TimestampOffset | null) {
const initPTS = this._initPTS;
if (
!initPTS ||
!defaultTimeStamp ||
defaultTimeStamp.trackId !== initPTS.trackId ||
defaultTimeStamp.baseTime !== initPTS.baseTime ||
defaultTimeStamp.timescale !== initPTS.timescale
) {
this.log(
`Reset initPTS: ${initPTS ? timestampToString(initPTS) : initPTS} > ${defaultTimeStamp ? timestampToString(defaultTimeStamp) : defaultTimeStamp}`,
);
}
this._initPTS = this._initDTS = defaultTimeStamp;
}
resetNextTimestamp() {
this.log('reset next timestamp');
this.isVideoContiguous = false;
this.isAudioContiguous = false;
}
resetInitSegment() {
this.log('ISGenerated flag reset');
this.ISGenerated = false;
this.videoTrackConfig = undefined;
}
getVideoStartPts(videoSamples: VideoSample[]) {
let rolloverDetected = false;
const firstPts = videoSamples[0].pts;
const startPTS = videoSamples.reduce((minPTS, sample) => {
let pts = sample.pts;
let delta = pts - minPTS;
if (delta < -4294967296) {
rolloverDetected = true;
pts = normalizePts(pts, firstPts);
delta = pts - minPTS;
}
if (delta > 0) {
return minPTS;
}
return pts;
}, firstPts);
if (rolloverDetected) {
this.debug('PTS rollover detected');
}
return startPTS;
}
remux(
audioTrack: DemuxedAudioTrack,
videoTrack: DemuxedVideoTrack,
id3Track: DemuxedMetadataTrack,
textTrack: DemuxedUserdataTrack,
timeOffset: number,
accurateTimeOffset: boolean,
flush: boolean,
playlistType: PlaylistLevelType,
): RemuxerResult {
let video: RemuxedTrack | undefined;
let audio: RemuxedTrack | undefined;
let initSegment: InitSegmentData | undefined;
let text: RemuxedUserdata | undefined;
let id3: RemuxedMetadata | undefined;
let independent: boolean | undefined;
let audioTimeOffset = timeOffset;
let videoTimeOffset = timeOffset;
const hasAudio = audioTrack.pid > -1;
const hasVideo = videoTrack.pid > -1;
const length = videoTrack.samples.length;
const enoughAudioSamples = audioTrack.samples.length > 0;
const enoughVideoSamples = (flush && length > 0) || length > 1;
const canRemuxAvc =
((!hasAudio || enoughAudioSamples) &&
(!hasVideo || enoughVideoSamples)) ||
this.ISGenerated ||
flush;
if (canRemuxAvc) {
if (this.ISGenerated) {
const config = this.videoTrackConfig;
if (
(config &&
(videoTrack.width !== config.width ||
videoTrack.height !== config.height ||
videoTrack.pixelRatio?.[0] !== config.pixelRatio?.[0] ||
videoTrack.pixelRatio?.[1] !== config.pixelRatio?.[1])) ||
(!config && enoughVideoSamples) ||
(this.nextAudioTs === null && enoughAudioSamples)
) {
this.resetInitSegment();
}
}
if (!this.ISGenerated) {
initSegment = this.generateIS(
audioTrack,
videoTrack,
timeOffset,
accurateTimeOffset,
);
}
const isVideoContiguous = this.isVideoContiguous;
let firstKeyFrameIndex = -1;
let firstKeyFramePTS;
if (enoughVideoSamples) {
firstKeyFrameIndex = findKeyframeIndex(videoTrack.samples);
if (!isVideoContiguous && this.config.forceKeyFrameOnDiscontinuity) {
independent = true;
if (firstKeyFrameIndex > 0) {
this.warn(
`Dropped ${firstKeyFrameIndex} out of ${length} video samples due to a missing keyframe`,
);
const startPTS = this.getVideoStartPts(videoTrack.samples);
videoTrack.samples = videoTrack.samples.slice(firstKeyFrameIndex);
videoTrack.dropped += firstKeyFrameIndex;
videoTimeOffset +=
(videoTrack.samples[0].pts - startPTS) /
videoTrack.inputTimeScale;
firstKeyFramePTS = videoTimeOffset;
} else if (firstKeyFrameIndex === -1) {
this.warn(`No keyframe found out of ${length} video samples`);
independent = false;
}
}
}
if (this.ISGenerated) {
if (enoughAudioSamples && enoughVideoSamples) {
const startPTS = this.getVideoStartPts(videoTrack.samples);
const tsDelta =
normalizePts(audioTrack.samples[0].pts, startPTS) - startPTS;
const audiovideoTimestampDelta = tsDelta / videoTrack.inputTimeScale;
audioTimeOffset += Math.max(0, audiovideoTimestampDelta);
videoTimeOffset += Math.max(0, -audiovideoTimestampDelta);
}
if (enoughAudioSamples) {
if (!audioTrack.samplerate) {
this.warn('regenerate InitSegment as audio detected');
initSegment = this.generateIS(
audioTrack,
videoTrack,
timeOffset,
accurateTimeOffset,
);
}
audio = this.remuxAudio(
audioTrack,
audioTimeOffset,
this.isAudioContiguous,
accurateTimeOffset,
hasVideo ||
enoughVideoSamples ||
playlistType === PlaylistLevelType.AUDIO
? videoTimeOffset
: undefined,
);
if (enoughVideoSamples) {
const audioTrackLength = audio ? audio.endPTS - audio.startPTS : 0;
if (!videoTrack.inputTimeScale) {
this.warn('regenerate InitSegment as video detected');
initSegment = this.generateIS(
audioTrack,
videoTrack,
timeOffset,
accurateTimeOffset,
);
}
video = this.remuxVideo(
videoTrack,
videoTimeOffset,
isVideoContiguous,
audioTrackLength,
);
}
} else if (enoughVideoSamples) {
video = this.remuxVideo(
videoTrack,
videoTimeOffset,
isVideoContiguous,
0,
);
}
if (video) {
video.firstKeyFrame = firstKeyFrameIndex;
video.independent = firstKeyFrameIndex !== -1;
video.firstKeyFramePTS = firstKeyFramePTS;
}
}
}
if (this.ISGenerated && this._initPTS && this._initDTS) {
if (id3Track.samples.length) {
id3 = flushTextTrackMetadataCueSamples(
id3Track,
timeOffset,
this._initPTS,
this._initDTS,
);
}
if (textTrack.samples.length) {
text = flushTextTrackUserdataCueSamples(
textTrack,
timeOffset,
this._initPTS,
);
}
}
return {
audio,
video,
initSegment,
independent,
text,
id3,
};
}
computeInitPts(
basetime: number,
timescale: number,
presentationTime: number,
type: 'audio' | 'video',
): number {
const offset = Math.round(presentationTime * timescale);
let timestamp = normalizePts(basetime, offset);
if (timestamp < offset + timescale) {
this.log(
`Adjusting PTS for rollover in timeline near ${(offset - timestamp) / timescale} ${type}`,
);
while (timestamp < offset + timescale) {
timestamp += 8589934592;
}
}
return timestamp - offset;
}
generateIS(
audioTrack: DemuxedAudioTrack,
videoTrack: DemuxedVideoTrack,
timeOffset: number,
accurateTimeOffset: boolean,
): InitSegmentData | undefined {
const audioSamples = audioTrack.samples;
const videoSamples = videoTrack.samples;
const typeSupported = this.typeSupported;
const tracks: TrackSet = {};
const _initPTS = this._initPTS;
let computePTSDTS = !_initPTS || accurateTimeOffset;
let container = 'audio/mp4';
let initPTS: number | undefined;
let initDTS: number | undefined;
let timescale: number | undefined;
let trackId: number = -1;
if (computePTSDTS) {
initPTS = initDTS = Infinity;
}
if (audioTrack.config && audioSamples.length) {
audioTrack.timescale = audioTrack.samplerate;
switch (audioTrack.segmentCodec) {
case 'mp3':
if (typeSupported.mpeg) {
container = 'audio/mpeg';
audioTrack.codec = '';
} else if (typeSupported.mp3) {
audioTrack.codec = 'mp3';
}
break;
case 'ac3':
audioTrack.codec = 'ac-3';
break;
}
tracks.audio = {
id: 'audio',
container: container,
codec: audioTrack.codec,
initSegment:
audioTrack.segmentCodec === 'mp3' && typeSupported.mpeg
? new Uint8Array(0)
: MP4.initSegment([audioTrack]),
metadata: {
channelCount: audioTrack.channelCount,
},
};
if (computePTSDTS) {
trackId = audioTrack.id;
timescale = audioTrack.inputTimeScale;
if (!_initPTS || timescale !== _initPTS.timescale) {
initPTS = initDTS = this.computeInitPts(
audioSamples[0].pts,
timescale,
timeOffset,
'audio',
);
} else {
computePTSDTS = false;
}
}
}
if (videoTrack.sps && videoTrack.pps && videoSamples.length) {
videoTrack.timescale = videoTrack.inputTimeScale;
tracks.video = {
id: 'main',
container: 'video/mp4',
codec: videoTrack.codec,
initSegment: MP4.initSegment([videoTrack]),
metadata: {
width: videoTrack.width,
height: videoTrack.height,
},
};
if (computePTSDTS) {
trackId = videoTrack.id;
timescale = videoTrack.inputTimeScale;
if (!_initPTS || timescale !== _initPTS.timescale) {
const basePTS = this.getVideoStartPts(videoSamples);
const baseDTS = normalizePts(videoSamples[0].dts, basePTS);
const videoInitDTS = this.computeInitPts(
baseDTS,
timescale,
timeOffset,
'video',
);
const videoInitPTS = this.computeInitPts(
basePTS,
timescale,
timeOffset,
'video',
);
initDTS = Math.min(initDTS as number, videoInitDTS);
initPTS = Math.min(initPTS as number, videoInitPTS);
} else {
computePTSDTS = false;
}
}
this.videoTrackConfig = {
width: videoTrack.width,
height: videoTrack.height,
pixelRatio: videoTrack.pixelRatio,
};
}
if (Object.keys(tracks).length) {
this.ISGenerated = true;
if (computePTSDTS) {
if (_initPTS) {
this.warn(
`Timestamps at playlist time: ${accurateTimeOffset ? '' : '~'}${timeOffset} ${initPTS! / timescale!} != initPTS: ${_initPTS.baseTime / _initPTS.timescale} (${_initPTS.baseTime}/${_initPTS.timescale}) trackId: ${_initPTS.trackId}`,
);
}
this.log(
`Found initPTS at playlist time: ${timeOffset} offset: ${initPTS! / timescale!} (${initPTS}/${timescale}) trackId: ${trackId}`,
);
this._initPTS = {
baseTime: initPTS as number,
timescale: timescale as number,
trackId: trackId as number,
};
this._initDTS = {
baseTime: initDTS as number,
timescale: timescale as number,
trackId: trackId as number,
};
} else {
initPTS = timescale = undefined;
}
return {
tracks,
initPTS,
timescale,
trackId,
};
}
}
remuxVideo(
track: DemuxedVideoTrack,
timeOffset: number,
contiguous: boolean,
audioTrackLength: number,
): RemuxedTrack | undefined {
const timeScale: number = track.inputTimeScale;
const inputSamples: Array<VideoSample> = track.samples;
const outputSamples: Array<Mp4Sample> = [];
const nbSamples = inputSamples.length;
const initPTS = this._initPTS as RationalTimestamp;
const initTime = (initPTS.baseTime * timeScale) / initPTS.timescale;
let nextVideoTs = this.nextVideoTs;
let offset = 8;
let mp4SampleDuration = this.videoSampleDuration;
let firstDTS;
let lastDTS;
let minPTS: number = Number.POSITIVE_INFINITY;
let maxPTS: number = Number.NEGATIVE_INFINITY;
let sortSamples = false;
if (!contiguous || nextVideoTs === null) {
const pts = initTime + timeOffset * timeScale;
const cts =
inputSamples[0].pts -
normalizePts(inputSamples[0].dts, inputSamples[0].pts);
if (
chromeVersion &&
nextVideoTs !== null &&
Math.abs(pts - cts - (nextVideoTs + initTime)) < 15000
) {
contiguous = true;
} else {
nextVideoTs = pts - cts - initTime;
}
}
const nextVideoPts = nextVideoTs + initTime;
for (let i = 0; i < nbSamples; i++) {
const sample = inputSamples[i];
sample.pts = normalizePts(sample.pts, nextVideoPts);
sample.dts = normalizePts(sample.dts, nextVideoPts);
if (sample.dts < inputSamples[i > 0 ? i - 1 : i].dts) {
sortSamples = true;
}
}
if (sortSamples) {
inputSamples.sort(function (a, b) {
const deltadts = a.dts - b.dts;
const deltapts = a.pts - b.pts;
return deltadts || deltapts;
});
}
firstDTS = inputSamples[0].dts;
lastDTS = inputSamples[inputSamples.length - 1].dts;
const inputDuration = lastDTS - firstDTS;
const averageSampleDuration = inputDuration
? Math.round(inputDuration / (nbSamples - 1))
: mp4SampleDuration || track.inputTimeScale / 30;
if (contiguous) {
const delta = firstDTS - nextVideoPts;
const foundHole = delta > averageSampleDuration;
const foundOverlap = delta < -1;
if (foundHole || foundOverlap) {
if (foundHole) {
this.warn(
`${(track.segmentCodec || '').toUpperCase()}: ${toMsFromMpegTsClock(
delta,
true,
)} ms (${delta}dts) hole between fragments detected at ${timeOffset.toFixed(
3,
)}`,
);
} else {
this.warn(
`${(track.segmentCodec || '').toUpperCase()}: ${toMsFromMpegTsClock(
-delta,
true,
)} ms (${delta}dts) overlapping between fragments detected at ${timeOffset.toFixed(
3,
)}`,
);
}
if (
!foundOverlap ||
nextVideoPts >= inputSamples[0].pts ||
chromeVersion
) {
firstDTS = nextVideoPts;
const firstPTS = inputSamples[0].pts - delta;
if (foundHole) {
inputSamples[0].dts = firstDTS;
inputSamples[0].pts = firstPTS;
} else {
let isPTSOrderRetained = true;
for (let i = 0; i < inputSamples.length; i++) {
if (inputSamples[i].dts > firstPTS && isPTSOrderRetained) {
break;
}
const prevPTS = inputSamples[i].pts;
inputSamples[i].dts -= delta;
inputSamples[i].pts -= delta;
if (i < inputSamples.length - 1) {
const nextSamplePTS = inputSamples[i + 1].pts;
const currentSamplePTS = inputSamples[i].pts;
const currentOrder = nextSamplePTS <= currentSamplePTS;
const prevOrder = nextSamplePTS <= prevPTS;
isPTSOrderRetained = currentOrder == prevOrder;
}
}
}
this.log(
`Video: Initial PTS/DTS adjusted: ${toMsFromMpegTsClock(
firstPTS,
true,
)}/${toMsFromMpegTsClock(
firstDTS,
true,
)}, delta: ${toMsFromMpegTsClock(delta, true)} ms`,
);
}
}
}
firstDTS = Math.max(0, firstDTS);
let nbNalu = 0;
let naluLen = 0;
let dtsStep = firstDTS;
for (let i = 0; i < nbSamples; i++) {
const sample = inputSamples[i];
const units = sample.units;
const nbUnits = units.length;
let sampleLen = 0;
for (let j = 0; j < nbUnits; j++) {
sampleLen += units[j].data.length;
}
naluLen += sampleLen;
nbNalu += nbUnits;
sample.length = sampleLen;
if (sample.dts < dtsStep) {
sample.dts = dtsStep;
dtsStep += (averageSampleDuration / 4) | 0 || 1;
} else {
dtsStep = sample.dts;
}
minPTS = Math.min(sample.pts, minPTS);
maxPTS = Math.max(sample.pts, maxPTS);
}
lastDTS = inputSamples[nbSamples - 1].dts;
(need 8 more bytes to fill length and mpdat type) */
const mdatSize = naluLen + 4 * nbNalu + 8;
let mdat;
try {
mdat = new Uint8Array(mdatSize);
} catch (err) {
this.observer.emit(Events.ERROR, Events.ERROR, {
type: ErrorTypes.MUX_ERROR,
details: ErrorDetails.REMUX_ALLOC_ERROR,
fatal: false,
error: err,
bytes: mdatSize,
reason: `fail allocating video mdat ${mdatSize}`,
});
return;
}
const view = new DataView(mdat.buffer);
view.setUint32(0, mdatSize);
mdat.set(MP4.types.mdat, 4);
let stretchedLastFrame = false;
let minDtsDelta = Number.POSITIVE_INFINITY;
let minPtsDelta = Number.POSITIVE_INFINITY;
let maxDtsDelta = Number.NEGATIVE_INFINITY;
let maxPtsDelta = Number.NEGATIVE_INFINITY;
for (let i = 0; i < nbSamples; i++) {
const VideoSample = inputSamples[i];
const VideoSampleUnits = VideoSample.units;
let mp4SampleLength = 0;
for (let j = 0, nbUnits = VideoSampleUnits.length; j < nbUnits; j++) {
const unit = VideoSampleUnits[j];
const unitData = unit.data;
const unitDataLen = unit.data.byteLength;
view.setUint32(offset, unitDataLen);
offset += 4;
mdat.set(unitData, offset);
offset += unitDataLen;
mp4SampleLength += 4 + unitDataLen;
}
let ptsDelta;
if (i < nbSamples - 1) {
mp4SampleDuration = inputSamples[i + 1].dts - VideoSample.dts;
ptsDelta = inputSamples[i + 1].pts - VideoSample.pts;
} else {
const config = this.config;
const lastFrameDuration =
i > 0
? VideoSample.dts - inputSamples[i - 1].dts
: averageSampleDuration;
ptsDelta =
i > 0
? VideoSample.pts - inputSamples[i - 1].pts
: averageSampleDuration;
if (config.stretchShortVideoTrack && this.nextAudioTs !== null) {
const gapTolerance = Math.floor(config.maxBufferHole * timeScale);
const deltaToFrameEnd =
(audioTrackLength
? minPTS + audioTrackLength * timeScale
: this.nextAudioTs + initTime) - VideoSample.pts;
if (deltaToFrameEnd > gapTolerance) {
mp4SampleDuration = deltaToFrameEnd - lastFrameDuration;
if (mp4SampleDuration < 0) {
mp4SampleDuration = lastFrameDuration;
} else {
stretchedLastFrame = true;
}
this.log(
`It is approximately ${
deltaToFrameEnd / 90
} ms to the next segment; using duration ${
mp4SampleDuration / 90
} ms for the last video frame.`,
);
} else {
mp4SampleDuration = lastFrameDuration;
}
} else {
mp4SampleDuration = lastFrameDuration;
}
}
const compositionTimeOffset = Math.round(
VideoSample.pts - VideoSample.dts,
);
minDtsDelta = Math.min(minDtsDelta, mp4SampleDuration);
maxDtsDelta = Math.max(maxDtsDelta, mp4SampleDuration);
minPtsDelta = Math.min(minPtsDelta, ptsDelta);
maxPtsDelta = Math.max(maxPtsDelta, ptsDelta);
outputSamples.push(
createMp4Sample(
VideoSample.key,
mp4SampleDuration,
mp4SampleLength,
compositionTimeOffset,
),
);
}
if (outputSamples.length) {
if (chromeVersion) {
if (chromeVersion < 70) {
const flags = outputSamples[0].flags;
flags.dependsOn = 2;
flags.isNonSync = 0;
}
} else if (safariWebkitVersion) {
if (
maxPtsDelta - minPtsDelta < maxDtsDelta - minDtsDelta &&
averageSampleDuration / maxDtsDelta < 0.025 &&
outputSamples[0].cts === 0
) {
this.warn(
'Found irregular gaps in sample duration. Using PTS instead of DTS to determine MP4 sample duration.',
);
let dts = firstDTS;
for (let i = 0, len = outputSamples.length; i < len; i++) {
const nextDts = dts + outputSamples[i].duration;
const pts = dts + outputSamples[i].cts;
if (i < len - 1) {
const nextPts = nextDts + outputSamples[i + 1].cts;
outputSamples[i].duration = nextPts - pts;
} else {
outputSamples[i].duration = i
? outputSamples[i - 1].duration
: averageSampleDuration;
}
outputSamples[i].cts = 0;
dts = nextDts;
}
}
}
}
mp4SampleDuration =
stretchedLastFrame || !mp4SampleDuration
? averageSampleDuration
: mp4SampleDuration;
const endDTS = lastDTS + mp4SampleDuration;
this.nextVideoTs = nextVideoTs = endDTS - initTime;
this.videoSampleDuration = mp4SampleDuration;
this.isVideoContiguous = true;
const moof = MP4.moof(
track.sequenceNumber++,
firstDTS,
Object.assign(track, {
samples: outputSamples,
}),
);
const type: SourceBufferName = 'video';
const data = {
data1: moof,
data2: mdat,
startPTS: (minPTS - initTime) / timeScale,
endPTS: (maxPTS + mp4SampleDuration - initTime) / timeScale,
startDTS: (firstDTS - initTime) / timeScale,
endDTS: nextVideoTs / timeScale,
type,
hasAudio: false,
hasVideo: true,
nb: outputSamples.length,
dropped: track.dropped,
};
track.samples = [];
track.dropped = 0;
return data;
}
getSamplesPerFrame(track: DemuxedAudioTrack) {
switch (track.segmentCodec) {
case 'mp3':
return MPEG_AUDIO_SAMPLE_PER_FRAME;
case 'ac3':
return AC3_SAMPLES_PER_FRAME;
default:
return AAC_SAMPLES_PER_FRAME;
}
}
remuxAudio(
track: DemuxedAudioTrack,
timeOffset: number,
contiguous: boolean,
accurateTimeOffset: boolean,
videoTimeOffset?: number,
): RemuxedTrack | undefined {
const inputTimeScale: number = track.inputTimeScale;
const mp4timeScale: number = track.samplerate
? track.samplerate
: inputTimeScale;
const scaleFactor: number = inputTimeScale / mp4timeScale;
const mp4SampleDuration: number = this.getSamplesPerFrame(track);
const inputSampleDuration: number = mp4SampleDuration * scaleFactor;
const initPTS = this._initPTS as RationalTimestamp;
const rawMPEG: boolean =
track.segmentCodec === 'mp3' && this.typeSupported.mpeg;
const outputSamples: Array<Mp4Sample> = [];
const alignedWithVideo = videoTimeOffset !== undefined;
let inputSamples: Array<AudioSample> = track.samples;
let offset: number = rawMPEG ? 0 : 8;
let nextAudioTs: number = this.nextAudioTs || -1;
const initTime = (initPTS.baseTime * inputTimeScale) / initPTS.timescale;
const timeOffsetMpegTS = initTime + timeOffset * inputTimeScale;
this.isAudioContiguous = contiguous =
contiguous ||
((inputSamples.length &&
nextAudioTs > 0 &&
((accurateTimeOffset &&
Math.abs(timeOffsetMpegTS - (nextAudioTs + initTime)) < 9000) ||
Math.abs(
normalizePts(inputSamples[0].pts, timeOffsetMpegTS) -
(nextAudioTs + initTime),
) <
20 * inputSampleDuration)) as boolean);
inputSamples.forEach(function (sample) {
sample.pts = normalizePts(sample.pts, timeOffsetMpegTS);
});
if (!contiguous || nextAudioTs < 0) {
const sampleCount = inputSamples.length;
inputSamples = inputSamples.filter((sample) => sample.pts >= 0);
if (sampleCount !== inputSamples.length) {
this.warn(
`Removed ${inputSamples.length - sampleCount} of ${sampleCount} samples (initPTS ${initTime} / ${inputTimeScale})`,
);
}
if (!inputSamples.length) {
return;
}
if (videoTimeOffset === 0) {
nextAudioTs = 0;
} else if (accurateTimeOffset && !alignedWithVideo) {
nextAudioTs = Math.max(0, timeOffsetMpegTS - initTime);
} else {
nextAudioTs = inputSamples[0].pts - initTime;
}
}
if (track.segmentCodec === 'aac') {
const maxAudioFramesDrift = this.config.maxAudioFramesDrift;
for (
let i = 0, nextPts = nextAudioTs + initTime;
i < inputSamples.length;
i++
) {
const sample = inputSamples[i];
const pts = sample.pts;
const delta = pts - nextPts;
const duration = Math.abs((1000 * delta) / inputTimeScale);
if (
delta <= -maxAudioFramesDrift * inputSampleDuration &&
alignedWithVideo
) {
if (i === 0) {
this.warn(
`Audio frame @ ${(pts / inputTimeScale).toFixed(
3,
)}s overlaps marker by ${Math.round(
(1000 * delta) / inputTimeScale,
)} ms.`,
);
this.nextAudioTs = nextAudioTs = pts - initTime;
nextPts = pts;
}
}
else if (
delta >= maxAudioFramesDrift * inputSampleDuration &&
duration < MAX_SILENT_FRAME_DURATION &&
alignedWithVideo
) {
let missing = Math.round(delta / inputSampleDuration);
nextPts = pts - missing * inputSampleDuration;
while (nextPts < 0 && missing && inputSampleDuration) {
missing--;
nextPts += inputSampleDuration;
}
if (i === 0) {
this.nextAudioTs = nextAudioTs = nextPts - initTime;
}
this.warn(
`Injecting ${missing} audio frames @ ${(
(nextPts - initTime) /
inputTimeScale
).toFixed(3)}s due to ${Math.round(
(1000 * delta) / inputTimeScale,
)} ms gap.`,
);
for (let j = 0; j < missing; j++) {
let fillFrame = AAC.getSilentFrame(
track.parsedCodec || track.manifestCodec || track.codec,
track.channelCount,
);
if (!fillFrame) {
this.log(
'Unable to get silent frame for given audio codec; duplicating last frame instead.',
);
fillFrame = sample.unit.subarray();
}
inputSamples.splice(i, 0, {
unit: fillFrame,
pts: nextPts,
});
nextPts += inputSampleDuration;
i++;
}
}
sample.pts = nextPts;
nextPts += inputSampleDuration;
}
}
let firstPTS: number | null = null;
let lastPTS: number | null = null;
let mdat: any;
let mdatSize: number = 0;
let sampleLength: number = inputSamples.length;
while (sampleLength--) {
mdatSize += inputSamples[sampleLength].unit.byteLength;
}
for (let j = 0, nbSamples = inputSamples.length; j < nbSamples; j++) {
const audioSample = inputSamples[j];
const unit = audioSample.unit;
let pts = audioSample.pts;
if (lastPTS !== null) {
const prevSample = outputSamples[j - 1];
prevSample.duration = Math.round((pts - lastPTS) / scaleFactor);
} else {
if (contiguous && track.segmentCodec === 'aac') {
pts = nextAudioTs + initTime;
}
firstPTS = pts;
if (mdatSize > 0) {
(need 8 more bytes to fill length and mdat type) */
mdatSize += offset;
try {
mdat = new Uint8Array(mdatSize);
} catch (err) {
this.observer.emit(Events.ERROR, Events.ERROR, {
type: ErrorTypes.MUX_ERROR,
details: ErrorDetails.REMUX_ALLOC_ERROR,
fatal: false,
error: err,
bytes: mdatSize,
reason: `fail allocating audio mdat ${mdatSize}`,
});
return;
}
if (!rawMPEG) {
const view = new DataView(mdat.buffer);
view.setUint32(0, mdatSize);
mdat.set(MP4.types.mdat, 4);
}
} else {
return;
}
}
mdat.set(unit, offset);
const unitLen = unit.byteLength;
offset += unitLen;
outputSamples.push(createMp4Sample(true, mp4SampleDuration, unitLen, 0));
lastPTS = pts;
}
const nbSamples = outputSamples.length;
if (!nbSamples) {
return;
}
const lastSample = outputSamples[outputSamples.length - 1];
nextAudioTs = (lastPTS as number) - initTime;
this.nextAudioTs = nextAudioTs + scaleFactor * lastSample.duration;
const moof = rawMPEG
? new Uint8Array(0)
: MP4.moof(
track.sequenceNumber++,
firstPTS! / scaleFactor,
Object.assign({}, track, { samples: outputSamples }),
);
track.samples = [];
const start = (firstPTS! - initTime) / inputTimeScale;
const end = this.nextAudioTs / inputTimeScale;
const type: SourceBufferName = 'audio';
const audioData = {
data1: moof,
data2: mdat,
startPTS: start,
endPTS: end,
startDTS: start,
endDTS: end,
type,
hasAudio: true,
hasVideo: false,
nb: nbSamples,
};
this.isAudioContiguous = true;
return audioData;
}
}
export function normalizePts(value: number, reference: number | null): number {
let offset;
if (reference === null) {
return value;
}
if (reference < value) {
offset = -8589934592;
} else {
offset = 8589934592;
}
if diff between value and reference is bigger than half of the amplitude (2^32) then it means that
PTS looping occured. fill the gap */
while (Math.abs(value - reference) > 4294967296) {
value += offset;
}
return value;
}
function findKeyframeIndex(samples: Array<VideoSample>): number {
for (let i = 0; i < samples.length; i++) {
if (samples[i].key) {
return i;
}
}
return -1;
}
export function flushTextTrackMetadataCueSamples(
track: DemuxedMetadataTrack,
timeOffset: number,
initPTS: TimestampOffset,
initDTS: TimestampOffset,
): RemuxedMetadata | undefined {
const length = track.samples.length;
if (!length) {
return;
}
const inputTimeScale = track.inputTimeScale;
for (let index = 0; index < length; index++) {
const sample = track.samples[index];
sample.pts =
normalizePts(
sample.pts - (initPTS.baseTime * inputTimeScale) / initPTS.timescale,
timeOffset * inputTimeScale,
) / inputTimeScale;
sample.dts =
normalizePts(
sample.dts - (initDTS.baseTime * inputTimeScale) / initDTS.timescale,
timeOffset * inputTimeScale,
) / inputTimeScale;
}
const samples = track.samples;
track.samples = [];
return {
samples,
};
}
export function flushTextTrackUserdataCueSamples(
track: DemuxedUserdataTrack,
timeOffset: number,
initPTS: RationalTimestamp,
): RemuxedUserdata | undefined {
const length = track.samples.length;
if (!length) {
return;
}
const inputTimeScale = track.inputTimeScale;
for (let index = 0; index < length; index++) {
const sample = track.samples[index];
sample.pts =
normalizePts(
sample.pts - (initPTS.baseTime * inputTimeScale) / initPTS.timescale,
timeOffset * inputTimeScale,
) / inputTimeScale;
}
track.samples.sort((a, b) => a.pts - b.pts);
const samples = track.samples;
track.samples = [];
return {
samples,
};
}