Commit 32fcbc32 authored by Yuqi Zhang's avatar Yuqi Zhang
Browse files

The change environment settings exp result analysis

parent 46a97525
import matplotlib.pyplot as plt
import matplotlib
import statistics
data_skipped = 0
err_sp = 1e10
total_rounds = 16
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['savefig.bbox'] = 'tight'
matplotlib.rcParams['savefig.pad_inches'] = 0.01
matplotlib.rcParams['legend.loc'] = 'best'
matplotlib.rcParams['xtick.labelsize'] = 8
matplotlib.rcParams['ytick.labelsize'] = 8
def dt_read(root_dir, dt_type, server, speed, delay, low_delay, diff_file, lambda_diff):
def read_one_round(data_dir):
global data_skipped
ovhd_max = -1
diff = 0
diff_c = 0
with open(data_dir + "/ovhd.csv", "r") as file:
for line in file.readlines():
tmp = [float(x) for x in line.split(',')]
if tmp[0] > err_sp or tmp[1] > err_sp:
data_skipped += 1
continue
if ovhd_max < tmp[1] / tmp[0]:
ovhd_max = tmp[1] / tmp[0]
with open(data_dir + "/"+diff_file, "r") as file:
for line in file.readlines():
tmp = [float(x) for x in line.split(',')]
for data in tmp:
if data > err_sp:
data_skipped += 1
else:
diff += lambda_diff(tmp)
diff_c += 1
return diff/diff_c, ovhd_max
diff_return = 0
ovhd_return = 0
for rounds in range(total_rounds):
d = "{dir}/{r}/{t}_{s},{sp},({d},{ld})".format(dir=root_dir, r=rounds, t=dt_type,
s=server, sp=speed, d=delay, ld=low_delay)
diff_one, ovhd_one = read_one_round(d)
diff_return += diff_one
ovhd_return += ovhd_one
return diff_return/total_rounds, ovhd_return/total_rounds
def rpq_read(root_dir, dt_type, server, speed, delay, low_delay):
return dt_read(root_dir, dt_type, server, speed, delay, low_delay, "max.csv", lambda x: abs(x[1] - x[3]))
def list_read(root_dir, dt_type, server, speed, delay, low_delay):
return dt_read(root_dir, dt_type, server, speed, delay, low_delay, "distance.csv", lambda x: x[1])
def plot_bar(r_data, rwf_data, name, x_lable, y_lable, s_name=None):
if s_name is None:
s_name = name
bar_width = 0.35
index1 = [x for x in range(len(name))]
index2 = [x + bar_width for x in range(len(name))]
index = [x + bar_width / 2 for x in range(len(name))]
l1 = plt.bar(index1, r_data, bar_width, tick_label=s_name)
l2 = plt.bar(index2, rwf_data, bar_width, tick_label=s_name)
plt.xlabel(x_lable)
plt.ylabel(y_lable)
plt.legend(handles=[l1, l2, ], labels=['Rmv-Win', 'RWF'])
plt.xticks(index)
def plot_line(r_data, rwf_data, name, x_lable, y_lable):
plt.plot(name, r_data, linestyle="-", label="Rmv-Win")
plt.plot(name, rwf_data, linestyle="-", label="RWF")
plt.xlabel(x_lable)
plt.ylabel(y_lable)
plt.legend()
def gather_plot_data(root_dir, exp_settings, read_func):
r_read_diff = []
r_ovhd = []
rwf_read_diff = []
rwf_ovhd = []
for setting in exp_settings:
a, b = read_func(root_dir, "r", *setting)
r_read_diff.append(a)
r_ovhd.append(b)
a, b = read_func(root_dir, "rwf", *setting)
rwf_read_diff.append(a)
rwf_ovhd.append(b)
return r_read_diff, r_ovhd, rwf_read_diff, rwf_ovhd
def plot_generic(r_read_diff, r_ovhd, rwf_read_diff, rwf_ovhd, name, ylable_read, xlable, pname):
fig = plt.figure(figsize=(11, 4))
plt.subplot(1, 2, 1)
plot_bar(r_read_diff, rwf_read_diff, name, xlable, ylable_read)
plt.subplot(1, 2, 2)
plot_bar(r_ovhd, rwf_ovhd, name, xlable,
'average max overhead per element: bytes')
plt.tight_layout()
plt.savefig("{}.pdf".format(pname))
plt.close(fig)
def cmp_delay(root_dir, read_func, ylable_read, pnane_prefix, dft_speed):
delays = ["{hd}ms,\n{ld}ms".format(
hd=20 + x * 40, ld=4 + x * 8) for x in range(10)]
exp_settings = [[9, dft_speed, 20 + x * 40, 4 + x * 8] for x in range(10)]
a, b, c, d = gather_plot_data(root_dir+"/delay", exp_settings, read_func)
plot_generic(a, b, c, d, delays, ylable_read,
"latency: between DC, within DC", pnane_prefix+"_delay")
def cmp_replica(root_dir, read_func, ylable_read, pnane_prefix, base_speed):
replicas = [3, 6, 9, 12, 15]
exp_settings = [[r, r*base_speed, 50, 10] for r in replicas]
a, b, c, d = gather_plot_data(root_dir+"/replica", exp_settings, read_func)
plot_generic(a, b, c, d, replicas, ylable_read,
"num of replicas", pnane_prefix+"_replica")
def cmp_speed(root_dir, read_func, ylable_read, pnane_prefix, low, high, step):
speeds = [x for x in range(low, high+step, step)]
exp_settings = [[9, n, 50, 10] for n in speeds]
a, b, c, d = gather_plot_data(root_dir+"/speed", exp_settings, read_func)
speed_plot(a, b, c, d, speeds, ylable_read, pnane_prefix)
def speed_plot(r_read_diff, r_ovhd, rwf_read_diff, rwf_ovhd, name, ylable_read, pnane_prefix):
xlable = 'op/second'
pname = pnane_prefix+'_op_speed'
s_name = [x for x in name]
for i in range(len(s_name)):
if (i - 500) % 10 != 0:
s_name[i] = ''
fig = plt.figure(figsize=(11, 4))
plt.subplot(1, 2, 1)
# plot_bar(om_avg, rm_avg, name, xlable, ylable_read, s_name=s_name)
plot_line(r_read_diff, rwf_read_diff, name, xlable, ylable_read)
plt.subplot(1, 2, 2)
plot_bar(r_ovhd, rwf_ovhd, name, xlable,
'average max overhead per element: bytes', s_name=s_name)
plt.tight_layout()
plt.savefig("{}.pdf".format(pname))
plt.close(fig)
def cmp_all(root_dir, read_func, ylable_read, pnane_prefix, low, high, step, dft_speed, base_speed):
cmp_delay(root_dir, read_func, ylable_read, pnane_prefix, dft_speed)
cmp_replica(root_dir, read_func, ylable_read,
pnane_prefix, base_speed)
cmp_speed(root_dir, read_func, ylable_read, pnane_prefix, low, high, step)
cmp_all("rpq", rpq_read, "average read_max diff",
"rpq", 500, 10000, 100, 10000, 1000)
cmp_all("list,cmp", list_read, "average list edit distance",
"list_cmp", 50, 1000, 50, 1000, 100)
print("Data skipped: ", data_skipped)
import matplotlib.pyplot as plt
import matplotlib
import statistics
data_skipped = 0
err_sp = 1e10
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['savefig.bbox'] = 'tight'
matplotlib.rcParams['savefig.pad_inches'] = 0.01
matplotlib.rcParams['legend.loc'] = 'best'
matplotlib.rcParams['xtick.labelsize'] = 8
matplotlib.rcParams['ytick.labelsize'] = 8
def freq(li):
return len([x for x in li if x != 0]) / len(li)
def read(ztype, server, op, delay, low_delay, root_dir='.'):
d = "{dir}/{t}_{s},{o},({d},{ld})".format(dir=root_dir,
t=ztype, s=server, o=op, d=delay, ld=low_delay)
global data_skipped
ovhd = []
rmax = []
with open(d + "/ovhd.csv", "r") as file:
for line in file.readlines():
tmp = [float(x) for x in line.split(',')]
if tmp[0] > err_sp or tmp[1] > err_sp:
data_skipped += 1
continue
ovhd.append(tmp[1] / tmp[0])
with open(d + "/max.csv", "r") as file:
for line in file.readlines():
tmp = [float(x) for x in line.split(',')]
if tmp[1] > err_sp or tmp[3] > err_sp:
data_skipped += 1
continue
rmax.append(abs(tmp[1] - tmp[3]))
return rmax, ovhd
def cmp_or(name, directory, server=9, op=10000, delay=50, low_delay=10):
xlable = 'time: second'
ormax, oovhd = read("o", server, op, delay, low_delay, root_dir=directory)
rrmax, rovhd = read("r", server, op, delay, low_delay, root_dir=directory)
lmax = min(len(ormax), len(rrmax))
lovhd = min(len(oovhd), len(rovhd))
x1 = [i for i in range(lovhd)]
y1o = oovhd[:lovhd]
y1r = rovhd[:lovhd]
x2 = [i for i in range(lmax)]
y2o = ormax[:lmax]
y2r = rrmax[:lmax]
fig = plt.figure(figsize=(11, 4))
plt.subplot(1, 2, 1)
plot_line(y2o, y2r, x2, xlable, 'read max diff')
plt.subplot(1, 2, 2)
plot_line(y1o, y1r, x1, xlable, 'overhead: byte')
plt.tight_layout()
plt.savefig("{}.pdf".format(name))
plt.close(fig)
print(name)
print("add-win", statistics.mean(y2o), freq(y2o))
print("rmv-win", statistics.mean(y2r), freq(y2r))
def preliminary_dispose(oms, oos, rms, ros):
om_avg = []
om_count = []
for m in oms:
om_avg.append(statistics.mean(m))
om_count.append(freq(m))
rm_avg = []
rm_count = []
for m in rms:
rm_avg.append(statistics.mean(m))
rm_count.append(freq(m))
oo_max = []
for o in oos:
oo_max.append(o[-1])
ro_max = []
for o in ros:
ro_max.append(o[-1])
return om_avg, rm_avg, om_count, rm_count, oo_max, ro_max
def plot_bar(o_data, r_data, name, x_lable, y_lable, s_name=None):
if s_name is None:
s_name = name
bar_width = 0.35
index1 = [x for x in range(len(name))]
index2 = [x + bar_width for x in range(len(name))]
index = [x + bar_width / 2 for x in range(len(name))]
l1 = plt.bar(index1, o_data, bar_width, tick_label=s_name)
l2 = plt.bar(index2, r_data, bar_width, tick_label=s_name)
plt.xlabel(x_lable)
plt.ylabel(y_lable)
plt.legend(handles=[l1, l2, ], labels=['Add-Win', 'Rmv-Win'])
plt.xticks(index)
def plot_line(o_data, r_data, name, x_lable, y_lable):
plt.plot(name, o_data, linestyle="-", label="Add-Win")
plt.plot(name, r_data, linestyle="-", label="Rmv-Win")
plt.xlabel(x_lable)
plt.ylabel(y_lable)
plt.legend()
def _cmp_generic(exp_settings, directory, x_paras=None, plot_func=None):
oms = []
oos = []
for setting in exp_settings:
setting[-1] = directory
m, o = read("o", *setting)
oms.append(m)
oos.append(o)
rms = []
ros = []
for setting in exp_settings:
setting[-1] = directory
m, o = read("r", *setting)
rms.append(m)
ros.append(o)
om_avg, rm_avg, om_count, rm_count, oo_max, ro_max = preliminary_dispose(
oms, oos, rms, ros)
if plot_func is not None and x_paras is not None:
plot_func(om_avg, rm_avg, om_count, rm_count, oo_max, ro_max, x_paras)
return om_avg, rm_avg, om_count, rm_count, oo_max, ro_max
def cmp_generic(x_paras, dirs, exp_settings, plot_func, low, high):
om_avg_collect = [[] for _ in range(len(x_paras))]
rm_avg_collect = [[] for _ in range(len(x_paras))]
om_count_collect = [[] for _ in range(len(x_paras))]
rm_count_collect = [[] for _ in range(len(x_paras))]
oo_max_collect = [[] for _ in range(len(x_paras))]
ro_max_collect = [[] for _ in range(len(x_paras))]
for setting in exp_settings:
setting.append("")
for root_dir in dirs:
a, b, c, d, e, f = _cmp_generic(exp_settings, root_dir)
for i in range(len(x_paras)):
om_avg_collect[i].append(a[i])
rm_avg_collect[i].append(b[i])
om_count_collect[i].append(c[i])
rm_count_collect[i].append(d[i])
oo_max_collect[i].append(e[i])
ro_max_collect[i].append(f[i])
om_avg = []
rm_avg = []
om_count = []
rm_count = []
oo_max = []
ro_max = []
for i in range(len(x_paras)):
om_avg.append(statistics.mean(
om_avg_collect[i][low:(len(dirs) - high)]))
rm_avg.append(statistics.mean(
rm_avg_collect[i][low:(len(dirs) - high)]))
om_count.append(statistics.mean(
om_count_collect[i][low:(len(dirs) - high)]))
rm_count.append(statistics.mean(
rm_count_collect[i][low:(len(dirs) - high)]))
oo_max.append(statistics.mean(
oo_max_collect[i][low:(len(dirs) - high)]))
ro_max.append(statistics.mean(
ro_max_collect[i][low:(len(dirs) - high)]))
return plot_func(om_avg, rm_avg, om_count, rm_count, oo_max, ro_max, x_paras)
def cmp_delay(rounds, low=0, high=0):
delays = ["{hd}ms,\n{ld}ms".format(
hd=20 + x * 40, ld=4 + x * 8) for x in range(10)]
dirs = ["delay/{}".format(x) for x in range(rounds)]
exp_settings = [[9, 10000, 20 + x * 40, 4 + x * 8] for x in range(10)]
return cmp_generic(delays, dirs, exp_settings, delay_plot, low, high)
def delay_plot(om_avg, rm_avg, om_count, rm_count, oo_max, ro_max, name):
xlable = 'latency: between DC, within DC'
pname = 'delay'
# fig = plt.figure(figsize=(18, 4))
fig = plt.figure(figsize=(11, 4))
# plt.subplot(1, 3, 1)
plt.subplot(1, 2, 1)
plot_bar(om_avg, rm_avg, name, xlable, 'average read_max diff')
# plt.subplot(1, 3, 2)
plt.subplot(1, 2, 2)
plot_bar(om_count, rm_count, name, xlable,
'frequency of read_max being wrong')
# plt.subplot(1, 3, 3)
# plot_bar(oo_max, ro_max, name, xlable, 'average max overhead per element: bytes')
plt.tight_layout()
plt.savefig("{}.pdf".format(pname))
plt.close(fig)
rtn = (oo_max, ro_max, name, xlable,
'average max overhead per element: bytes')
return rtn
def cmp_replica(rounds, low=0, high=0):
replicas = [3, 6, 9, 12, 15]
dirs = ["replica/{}".format(x) for x in range(rounds)]
exp_settings = [[r, 10000, 50, 10] for r in replicas]
return cmp_generic(replicas, dirs, exp_settings, replica_plot, low, high)
def replica_plot(om_avg, rm_avg, om_count, rm_count, oo_max, ro_max, name):
xlable = 'num of replicas'
pname = 'replica'
# fig = plt.figure(figsize=(16, 4))
fig = plt.figure(figsize=(11, 4))
# plt.subplot(1, 3, 1)
plt.subplot(1, 2, 1)
plot_bar(om_avg, rm_avg, name, xlable, 'average read_max diff')
# plt.subplot(1, 3, 2)
plt.subplot(1, 2, 2)
plot_bar(om_count, rm_count, name, xlable,
'frequency of read_max being wrong')
# plt.subplot(1, 3, 3)
# plot_bar(oo_max, ro_max, name, xlable, 'average max overhead per element: bytes')
plt.tight_layout()
plt.savefig("{}.pdf".format(pname))
plt.close(fig)
rtn = (oo_max, ro_max, name, xlable,
'average max overhead per element: bytes')
return rtn
def cmp_speed(rounds, low=0, high=0):
speeds = [500 + x * 100 for x in range(96)]
dirs = ["speed/{}".format(x) for x in range(rounds)]
exp_settings = [[9, n, 50, 10] for n in speeds]
return cmp_generic(speeds, dirs, exp_settings, speed_plot, low, high)
def speed_plot(om_avg, rm_avg, om_count, rm_count, oo_max, ro_max, name):
xlable = 'op/second'
pname = 'op_speed'
s_name = [x for x in name]
for i in range(len(s_name)):
if (i - 500) % 10 != 0:
s_name[i] = ''
# fig = plt.figure(figsize=(16, 4))
fig = plt.figure(figsize=(11, 4))
# plt.subplot(1, 3, 1)
plt.subplot(1, 2, 1)
# plot_bar(om_avg, rm_avg, name, xlable, 'average read_max diff', s_name=s_name)
plot_line(om_avg, rm_avg, name, xlable, 'average read_max diff')
# plt.subplot(1, 3, 2)
plt.subplot(1, 2, 2)
# plot_bar(om_count, rm_count, name, xlable, 'frequency of read_max being wrong')
plot_line(om_count, rm_count, name, xlable,
'frequency of read_max being wrong')
# plt.subplot(1, 3, 3)
# plot_bar(oo_max, ro_max, name, xlable, 'average max overhead per element: bytes', s_name=s_name)
plt.tight_layout()
plt.savefig("{}.pdf".format(pname))
plt.close(fig)
rtn = (oo_max, ro_max, name, xlable,
'average max overhead per element: bytes')
return rtn, s_name
cmp_or("inc_d", "replica/0")
cmp_or("ar_d", "ardominant")
sp, sn = cmp_speed(30)
rp = cmp_replica(30)
dl = cmp_delay(30)
figure = plt.figure(figsize=(11, 4))
plt.subplot(1, 2, 1)
plot_bar(*sp, s_name=sn)
plt.subplot(1, 2, 2)
plot_bar(*dl)
plt.tight_layout()
plt.savefig("ovhd_sd.pdf")
plt.close(figure)
figure = plt.figure(figsize=(6, 4))
plot_bar(*rp)
plt.savefig("ovhd_r.pdf")
plt.close(figure)
print("Data skipped: ", data_skipped)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment