[x, fs] = mp3read('california.mp3');
[rec1, fs1] = mp3read('recording1.mp3'); % Re-recording of x
[rec2, fs2] = mp3read('recording2.mp3'); % Speech sample

% Preprocess
x = mean(x,2);
rec1 = mean(rec1,2);
rec2 = mean(rec2,2);
% Match the timing of rec1 with the original music
sample = x(59*fs:65*fs);
sync_index = 245000;
rec1 = rec1(sync_index:sync_index+6*fs1);
% Not much to look at in the high frequencies, so we'll downsample
new_fs = 8000;
new_sample = resample(sample, new_fs, fs);
new_rec1 = resample(rec1, new_fs, fs1);
new_rec2 = resample(rec2, new_fs, fs2);

% Show the frequency domain using the fft
figure(4)
plot([1:length(sample)]/length(sample)*fs - fs/2, abs(fftshift(fft(sample))))
xlabel('frequency')

% This demo shows how different the re-recorded sound is in the time-domain
figure(1)
plot(new_sample)
%spectrogram(new_sample, round(0.05*new_fs), round(0.025*new_fs), [], new_fs, 'yaxis')
figure(2)
plot(new_rec1)
%spectrogram(new_rec1, round(0.05*new_fs), round(0.025*new_fs), [], new_fs, 'yaxis')


% The spectrogram of speech
figure(3)
spectrogram(new_rec2, round(0.05*new_fs), round(0.025*new_fs), [], new_fs, 'yaxis')
