Here we show evaluation results for all techniques discussed in the paper. We use evaluation metrics from BSS Eval version 4.
In the paper we show only the SDR metric due to the space limit. Here we include all metrics: SDR, SIR, SAR, and ISR.
import pandas
import seaborn as sns
import math
sns.set(palette='Set2', font_scale=1.5, context='paper')
%load_ext autoreload
%autoreload 1
%aimport utils
import utils as u
results, unpivoted, models, datasets = u.load_results()
nmf_results = u.read_evaluation_results('/Users/matan/gdrive/Grad/Thesis/Training/Wave-U-Net/nmf_evaluation_summary')
noscore_extract_satb = results[results.model == '020']
noscore_extract_single = u.experiment_results(4, results)
res = pandas.concat([
noscore_extract_satb.assign(train_type='Wave-U-Net: all voices'),
noscore_extract_single.assign(train_type='Wave-U-Net: single voice'),
nmf_model_b_on_v6.assign(train_type='NMF')
])
fig = sns.catplot(
data=u.rename_for_display(u.to_long_form(res)),
y='value (dB)',
x='source',
col='metric',
col_wrap=2,
showfliers=False,
hue='train_type',
kind='box',
aspect=1.5,
width=0.6,
hue_order=['NMF', 'Wave-U-Net: all voices', 'Wave-U-Net: single voice']
)
ex8_noscore2 = results.query('model == "020"').assign(Condition='Without score')
ex8_bestish = u.experiment_results(8, results).query('score_type == "midi norm" & score_concat == "in"').assign(Condition='With score')
ex8_with_noscore2 = pandas.concat([ex8_bestish, ex8_noscore2])
g = sns.catplot(
data=u.rename_for_display(u.to_long_form(ex8_with_noscore2)),
y='value (dB)',
hue='Condition',
x='source',
col='metric',
col_wrap=2,
showfliers=False,
hue_order=['Without score', 'With score'],
kind='box',
aspect=1.4,
height=4,
width=0.4,
)
g = sns.catplot(
data=u.rename_for_display(u.to_long_form(u.experiment_results(8, results))),
y='value (dB)',
hue='Score Type',
col='metric',
row='source',
x='Conditioning Location',
showfliers=False,
kind='box',
sharex=False,
aspect=1,
height=4,
width=0.6,
)
ex9_noscore = results[results.model == '058'].fillna({'score_type': 'no score', 'score_concat': 'no score'})
ex9_vs_noscore = pandas.concat([u.experiment_results(9, results), ex9_noscore])
fig = sns.catplot(
data=u.rename_for_display(u.to_long_form(ex9_vs_noscore)),
y='value (dB)',
x='Conditioning Location',
col='metric',
col_wrap=2,
hue='Score Type',
order=['input', 'input-output', 'output', 'no score'],
hue_order=['pure tone', 'normalized pitch', 'pitch and amplitude', 'piano roll', 'no score'],
kind='box',
showfliers=False,
width=0.6,
aspect=1.6,
)
g = sns.catplot(
data=u.rename_for_display(u.to_long_form(u.experiment_results(10, results))),
y='value (dB)',
hue='Score Type',
row='source',
col='metric',
x='Conditioning Location',
showfliers=False,
hue_order=['pure tone', 'normalized pitch', 'pitch and amplitude', 'piano roll', ],
order=['input', 'input-output'],
kind='box',
aspect=1,
height=4,
width=0.6,
)
# Take pitch-and-amplitude/input from all score experiments, and also compare no score
ex9_noscore = results[results.model.isin(['036', '057', '058', '059'])].fillna({'score_type': 'no score', 'score_concat': 'no score'}).assign(condition='Extract single: without score')
ex8 = u.experiment_results(8, results).query('score_type == "pitch and amplitude" and score_concat == "in"').assign(condition='Extract all: with score')
ex8_noscore = results.query('model == "020"').fillna({'score_type': 'no score', 'score_concat': 'no score'}).assign(condition='Extract all: without score')
ex9 = results[results.model.isin(['069', '094', '095', '096'])].assign(condition='Extract single: with score')
ex10 = u.experiment_results(10, results).query('model == "062"').assign(condition='Extract all: with score (multi-source)')
nmf_model_b_on_v6 = nmf_results.query(
'model == "B" & evaluation_dataset == "chorales_synth_v6"').assign(condition='NMF (baseline)')
# museval returns nan SDR when estimated frame power is 0, this is wrong, SDR should be 0 (assuming reference frame power is != 0)
# eliminate this by replacing nan with 0
nmf_model_b_on_v6.fillna({'sdr': 0}, inplace=True)
compare = pandas.concat([ex9, ex9_noscore, ex8_noscore, ex8, ex10, nmf_model_b_on_v6])
fig = sns.catplot(
data=u.rename_for_display(u.to_long_form(compare)),
y='value (dB)',
x='source',
col='metric',
col_wrap=2,
hue='condition',
hue_order=['Extract single: with score', 'Extract single: without score', 'Extract all: with score (multi-source)', 'Extract all: with score', 'Extract all: without score', 'NMF (baseline)'],
kind='box',
showfliers=False,
width=0.6,
aspect=1.6,
)