import numpy as np
import matplotlib.pyplot as plt
import re
import os
import pstats
import sys
import io
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import shutil
import argparse
import matplotlib
from matplotlib import cycler
import pickle
import matplotlib.ticker as mticker
import matplotlib
import glob
# font = {'family' : 'normal',
# 'size' : 20}
# matplotlib.rc('font', **font)
# # plt.rcParams["font.weight"] = "bold"
# plt.rcParams["axes.labelweight"] = "bold"
default_sfs = str(20)
default_stims = str(1)
default_pop = str(500)
POP_SCALING_FACTOR=500
def title_and_save(fig,title, pdf):
global fig_count
if title:
plt.title("Fig {}: ".format(fig_count) + title, fontsize=20)
else:
plt.title("Fig {}: ".format(fig_count), fontsize=20)
pdf.savefig(fig, bbox_inches='tight')
fig_count += 1
plt.close(fig)
def format_logname_general(node, pop, nCpu, stim, sf, path, how, title=None):
possible_names = []
for folder in os.listdir('summit_outputs'):
if f'{node}N' in folder:
for file in os.listdir(os.path.join('summit_outputs',folder)):
if '.log' in file and 'gpu' not in file:
possible_names.append(os.path.join('summit_outputs',folder,file))
return possible_names
def set_custom_params_plt():
colors = cycler('color',
['#EE6666', '#3388BB', '#9988DD',
'#EECC55', '#88BB44', '#FFBBBB'])
# plt.rc('axes', facecolor='#E6E6E6', edgecolor='none',
# axisbelow=True, grid=True)
# plt.rc('grid', color='w', linestyle='solid')
# plt.rc('patch', edgecolor='#E6E6E6')
# plt.rc('lines', linewidth=2)
# plt.rcParams['font.family'] = 'serif'
# plt.rcParams['font.serif'] = ['Times New Roman'] + plt.rcParams['font.serif']
plt.rcParams.update({'font.size': 18})
set_custom_params_plt()
def restore_default_mpl_params():
matplotlib.rcParams.update(matplotlib.rcParamsDefault)
def format_logname_general(node, pop, nCpu, stim, sf, path, how, title=None):
possible_names = []
for folder in os.listdir('summit_outputs'):
if f'{node}N' in folder:
for file in os.listdir(os.path.join('summit_outputs',folder)):
if '.log' in file and 'gpu' not in file:
possible_names.append(os.path.join('summit_outputs',folder,file))
return possible_names
def format_logname(node, pop, nCpu, stim, sf, path, how, title=None):
if how == 'vanilla':
stim = default_stims
sf = default_sfs
title = "Population"
elif how == 'stims':
pop = default_pop
sf = default_sfs
title = "Stims"
elif how == "sfs":
pop = default_pop
stim = default_stims
title = "Sfs"
elif how == "full":
assert title
pass
else:
raise NotImplementedError
# title=" PLOTTING METHOD NOT RECOGNIZED"
if os.path.isfile("{}/{}N_{}C_{}O_{}S_{}SF/{}N_{}C_{}O_{}S_{}SF.log".format(path, node,nCpu, pop, stim, sf, node,nCpu,pop, stim, sf)):
return "{}/{}N_{}C_{}O_{}S_{}SF/{}N_{}C_{}O_{}S_{}SF.log".format(path, node,nCpu, pop, stim, sf, node,nCpu,pop, stim, sf), title
else:
print("NO FILE ", "{}/{}N_{}C_{}O_{}S_{}SF/{}N_{}C_{}O_{}S_{}SF.log".format(path, node,nCpu, pop, stim, sf, node,nCpu,pop, stim, sf))
return "{}/{}N_{}C_{}O_{}S_{}SF/{}N_{}C_{}O_{}S_{}SF.log".format(path, node,nCpu, pop, stim, sf, node,nCpu,pop, stim, sf), title
def format_gpu_util_name(node, pop, nCpu, stim, sf, path, how, title=None):
if how == 'vanilla':
stim = default_stims
sf = default_sfs
title = "Population"
elif how == 'stims':
pop = default_pop
sf = default_sfs
title = "Stims"
elif how == "sfs":
pop = default_pop
stim = default_stims
title = "Sfs"
elif how == "full":
assert title
pass
else:
raise NotImplementedError
gpu_logs = [os.path.join("{}/{}N_{}C_{}O_{}S_{}SF/".format(path,node,nCpu, pop, stim, sf), filename) \
for filename in os.listdir("{}/{}N_{}C_{}O_{}S_{}SF/".format(path, node,nCpu, pop, stim, sf)) \
if "gpu_utillization" in filename]
return gpu_logs, len(gpu_logs)
def find_gpu_logs(basepath):
files = os.listdir(basepath)
gpu_logs = sorted([os.path.join(basepath, file) for file in files if "gpu_utillization" in file])
return gpu_logs
def read_gpu_logs(fn):
basepath = os.path.dirname(fn)
gpu_logs = find_gpu_logs(basepath)
fn = gpu_logs[-1]
with open(fn, 'r') as gpu_f:
lines = gpu_f.readlines()
gpu_df = pd.DataFrame([sub.split(",") for sub in lines])
gpu_df.columns = gpu_df.iloc[0]
gpu_df = gpu_df[1:]
gpu_df = gpu_df.rename({' name': 'name', ' utilization.gpu [%]' : 'utilization'}, axis=1)
# remove label rows
gpu_df = gpu_df[gpu_df['name'] != ' name']
gpu_df = gpu_df[gpu_df['timestamp'] != '\n']
gpu_df = gpu_df[gpu_df['utilization'] != ' utilization.memory [%]']
# this will only work for the year
gpu_df = gpu_df[gpu_df['timestamp'].str.count("2021") < 2]
gpu_df['timestamp'] = pd.to_datetime(gpu_df['timestamp'], errors='coerce')#gpu_df['timestamp'].astype('datetime64[ns]')
gpu_df = gpu_df.dropna(axis=0, subset=['timestamp'])
# WE GET ONE MEASUREMENT PER SECOND
total_elapsed = (max(gpu_df.timestamp) - min(gpu_df.timestamp)).seconds
gpu_df['utilization'] = gpu_df['utilization'].str.replace(" \%","").astype(int)
percent_utilization = (np.sum(gpu_df['utilization'] ) / (6 * total_elapsed))
#gpu_df[['Date','Time']] = gpu_df.timestamp.str.split(expand=True)
return percent_utilization, gpu_df
def processLog(f):
with open(f, "r") as file:
startEndPairs = []
runtimes = []
sfs = []
neuroGPUStartTimes = []
neuroGPUEndTimes = []
procToSf = {}
procStartDict = {}
procEndDict = {}
compStartDict = {}
io_times = []
evalTimes = []
neuroGPUTimes = []
gens = []
file_lines =file.readlines()
readingEnds = False
readingStarts = False
firstGen = True
for line in file_lines:
if "Date:" in line:
print(line)
if "absolute start" in line:
numbers = re.findall(r'\d+', line)
firstGen = True
abs_start = [ '.'.join(x) for x in zip(numbers[0::2], numbers[1::2]) ][0]
if "nCpus" in line:
nCpus = int(re.match('.*?([0-9]+)$', line).group(1))
#assert nCpus == cpu, "expected {} but got {} cpus in log".format(cpu,nCpus)
if "nGPUS" in line:
try:
curr_idx = line.find('nGPUS')
curr_line = line[curr_idx:curr_idx+8]
nGpus = int(re.match('.*?([0-9]+)$', curr_line).group(1))
except:
# default to using experimental name due to GPU line being scrambled
nGpus = int( re.findall(r'[1-9]S',f)[0][0])
if "took:" in line:
runtime = float(re.findall(r"[-+]?\d*\.\d+|\d+",line)[1])
#print(runtime)
# if runtime > 100:
# runtime = 22
if firstGen:
firstGen = False
continue
runtimes.append(runtime)
if "launched PIDS" in line:
start = re.findall(r'\d+', line)[0] # second half is in miliseconds, don't need that precision
if "finished PIDS" in line:
end = re.findall(r'\d+', line)[0]
startEndPairs.append((start,end))
if "process" in line and "started" in line:
stSplit = line.split(" ")
sf = [stSplit[i] for i in range(2,len(stSplit)-2) if stSplit[i-1] == "is" and stSplit[i+1] == "and"][0]
sfs.append(sf)
line = re.sub(r'(?<=is)(.*)(?=and)', "", line)
numbers = re.findall(r'\d+', line)
procToSf[numbers[0]] = sf
if numbers[0] in procStartDict.keys():
procStartDict[numbers[0]].append(numbers[1])
else:
procStartDict[numbers[0]] = [numbers[1]]
if "returning" in line:
numbers = re.findall(r'\d+', line)
if numbers[0] in procEndDict.keys():
procEndDict[numbers[0]].append(numbers[1])
else:
procEndDict[numbers[0]] = [numbers[1]]
if "computing" in line:
numbers = re.findall(r'\d+', line)
if numbers[0] in compStartDict.keys():
compStartDict[numbers[0]].append(numbers[1])
else:
compStartDict[numbers[0]] = [numbers[1]]
if "evaluation:" in line and not firstGen:
numbers = re.findall(r'\d+', line)
numbers = [ '.'.join(x) for x in zip(numbers[0::2], numbers[1::2]) ]
if "evalTimes" in locals():
evalTimes = np.append(evalTimes, np.array(list(numbers), dtype=np.float32))
else:
evalTimes = np.array(list(numbers), dtype=np.float32)
avgEval = np.mean(evalTimes)
if "neuroGPU" in line and "starts" not in line and "ends" not in line and not firstGen:
numbers = re.findall(r'\d+', line)
numbers = [ '.'.join(x) for x in zip(numbers[0::2], numbers[1::2]) ]
if "neuroGPUTimes" in locals():
neuroGPUTimes = np.append(neuroGPUTimes, np.array(list(numbers), dtype=np.float32))
else:
neuroGPUTimes = np.array(list(numbers),dtype=np.float32)
avgNGPU = np.mean(neuroGPUTimes)
if ("neuroGPU" in line and "starts" in line and "ends" not in line) or readingEnds and not firstGen:
readingEnds = True
numbers = re.findall(r'\d+', line)
numbers = [ '.'.join([x1,x2]) + "e+" + str(x3) for x1,x2,x3 in zip(numbers[0::3], numbers[1::3], numbers[2::3]) ]
neuroGPUStartTimes += numbers
if "]" in line:
readingEnds = False
if ("neuroGPU" in line and "starts" not in line and "ends" in line) or readingStarts and not firstGen:
readingStarts = True
numbers = re.findall(r'\d+', line)
numbers = [ '.'.join([x1,x2]) + "e+" + str(x3) for x1,x2,x3 in zip(numbers[0::3], numbers[1::3], numbers[2::3]) ]
neuroGPUEndTimes += numbers
if "]" in line:
readingStarts = False
if "IO:" in line:
numbers = re.findall(r'\d+', line)
numbers = [ '.'.join([x1,x2]) + "e+" + str(x3) for x1,x2,x3 in zip(numbers[0::3], numbers[1::3], numbers[2::3]) ]
io_times.append(numbers)
if 'gen size' in line:
numbers = re.findall(r'\d+', line)
gens.append(float(numbers[0]))
# if "gen1 took" in line:
# break
try:
# we already skip first gen because we don't record it
# if len(runtimes) > 1:
# runtimes = runtimes[1:]
# evalTimes = evalTimes[1:]
# neuroGPUTimes = neuroGPUTimes[1:]
res = {"procStartDict": procStartDict,"procEndDict": procEndDict,\
"startEndPairs": startEndPairs,"runtimes": runtimes,\
"compStartDict": compStartDict,"sfs": sfs,\
"evalTimes": evalTimes,"neuroGPUTimes": neuroGPUTimes,\
"procToSf": procToSf, "absStart": abs_start, \
"neuroGPUStartTimes": neuroGPUStartTimes, \
"neuroGPUEndTimes": neuroGPUEndTimes, "ioTimes": io_times,
"nGpus": nGpus, 'gen_sizes':gens}
except UnboundLocalError as e:
print(e)
print("MISREAD LOG : ", f, " but I am in PERMISSIVE mode so it's ok")
# raise e
return {"procStartDict": {},"procEndDict": {},\
"startEndPairs": [],"runtimes": [],\
"compStartDict": {},"sfs": [],\
"evalTimes": [],"neuroGPUTimes": [],\
"procToSf": {}, "absStart": 0, \
"neuroGPUStartTimes": [], \
"neuroGPUEndTimes": [], "ioTimes": [],
"nGpus": 6, 'gen_sizes':[]}
return res
def makeCustomProfile(node, nCpu, pop, stim, sf, vers, path, show=True):
f, _ = format_logname(node, pop, nCpu, stim, sf, path, how='full', title="None")
#f = "runTimeLogs/runTime.log"
logRes = processLog(f)
print("making profile for {}".format(f))
absStart = float(logRes['absStart'])
start_data = np.array([float(start) for start in logRes["neuroGPUStartTimes"]])
end_data = np.array(logRes["neuroGPUEndTimes"]).astype(float)
print(len(start_data))
times = logRes["neuroGPUTimes"]
total_time = float(logRes['startEndPairs'][-1][1]) - float(logRes['absStart'])
# bugged timer
end_data = np.mean(times) + start_data
procEndDict = logRes['procEndDict']
sfsMap = logRes['procToSf']
sfsMapMap = {}
counter = 0
for val in set(list(sfsMap.values())):
sfsMapMap[val] = counter
counter +=1
nGpus = 6# THIS SHOULD BE IN LOG RES logRes['nGpus']
compStartDict = logRes['compStartDict']
procStartDict = logRes['procStartDict']
startEndPairs = logRes['startEndPairs']
#print(absStart)
#print(startEndPairs)
startEndPairs = [(float(pair0) - float(absStart), float(pair1) - float(absStart)) for pair0, pair1 in startEndPairs]
#print(procStartDict)
#print(startEndPairs)
# Create figure and axes
fig, ax = plt.subplots(figsize=(13,9))
# YLIM XLIM
plt.ylim(0,300)
plt.xlim(0,200)
x_anchors= []
x_ends = []
def calc_y_anchor(x_anchor,width,x_anchors,x_ends):
curr_ht = 0
for x_anc, x_end in zip(x_anchors,x_ends):
if x_anchor > x_anc and x_anchor < x_end:
curr_ht += 10
elif x_anc > x_anchor and x_anc < x_anchor+width:
curr_ht += 10
return curr_ht
cur_start = 0
y_base = 0
firstGo = True
count = 0
for start, end in startEndPairs:
if firstGo:
firstGo = False
plt.axvline(x=start,color="blue", label="CPU Eval Start")
plt.axvline(x=end,color="red", label="CPU Eval Done")
else:
plt.axvline(x=start,color="blue")
plt.axvline(x=end,color="red")
count += 1
if count > 4:
break
idx = 1
labels = list(compStartDict.keys())
box_ht = 10
runs = 0
for procStart,procEnd,proc in \
zip(list(procStartDict.values()),list(procEndDict.values()), list(procEndDict.keys()) ):
for pStart,pEnd in zip(procStart,procEnd):
x_anchor = float(pStart) - float(absStart)#float(procStart) - float(absStart)
y_anchor = y_base + 10
if x_anchor > float(startEndPairs[cur_start][1]) and cur_start < len(startEndPairs) - 1 :
cur_start += 1
y_base = 0
runs += 1
else:
y_base += 10
if y_base > 1200: # MAX HEIGHT EXCEEDED
#print('max height exc.')
break
width =(float(pEnd) - float(absStart)) - (float(pStart) - float(absStart)) #(float(procEnd) - float(absStart)) - (float(procStart) - float(absStart))
#y_anchor = calc_y_anchor(x_anchor,width,x_anchors,x_ends)
x_anchors.append(float(x_anchor)), x_ends.append(width)
rect = patches.Rectangle((x_anchor, y_anchor), width, box_ht, \
linewidth=2, edgecolor='black', facecolor='lightblue', fill=True, zorder=0)
curr_sf = sfsMapMap[sfsMap[proc]]
# ax.annotate(curr_sf, (x_anchor + 2.5, y_anchor + 5), color='black', weight='bold', \
# fontsize=7, ha='center', va='center', zorder=4)
# Add the patch to the Axes
ax.add_patch(rect)
idx += 1
# if y_base > 1200:
# print("max height exceeded")
# break
if runs > 4:
break
# Create a Rectangle patches
box_ht = 15 # constant box height
cur_start = 0
y_base = 0
runs = 0
for start,end,idx in zip(start_data,end_data, np.arange(len(end_data))):
x_anchor = start-absStart
if x_anchor > float(startEndPairs[cur_start][1]):
cur_start += 2
y_base = 0
runs += 1
else:
y_base += 15
y_anchor = y_base
width = end - start
x_anchors.append(float(x_anchor)), x_ends.append(width)
rect = patches.Rectangle((x_anchor, y_anchor), width, box_ht, \
linewidth=2.5, edgecolor='black', facecolor='palegreen', fill=True, zorder=10)
# ax.annotate("GPU {}".format(idx % nGpus), (x_anchor + (total_time / 10), y_anchor + 8), color='black', weight='bold',
# fontsize=10, ha='center', va='center', zorder=20)
# Add the patch to the Axes
# break
if runs > 4:
break
ax.add_patch(rect)
plt.title("Profile for {} Node Parallel over Population (pop size {})".format(node, pop))
#plt.title("Custom Profile for {} CPUs, {} Pop Size and {} Nodes".format(nCpus,nodes,popSize))
plt.legend()
plt.xlabel("time (s)")
#plt.show()
print("TODO: add legend later")
out_dir = os.path.dirname(f)
plt.savefig(os.path.join(out_dir,"custom_profile"), bbox_inches='tight')
plt.close()
sfsMap = logRes['procToSf']
sfsMapMap = {}
counter = 0
for val in set(list(sfsMap.values())):
sfsMapMap[val] = counter
counter +=1
make_legend(sfsMapMap)
plt.savefig(os.path.join(out_dir,"legend"), bbox_inches='tight')
plt.close()
def make_legend(top):
fig, ax = plt.subplots(figsize=(8, 5))
y = 9
level = 0
start = 9
for name, val in top.items():
ax.text(start, y - level, str(val) + "--> " + name, fontsize=20)
level += 1
ax.axis([0, 10, 0, 10])
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
ax.axis('off')
plt.show()
[docs]def plot_CPUGPU_bottleneck(nCpus, nodes,pops, sfs, stims, versions, path, how='vanilla', title=None, show=True):
"""
TODO: consider changing this to be a single plot output
"""
#f = "runTimeLogs/runTime.log"
runtimes = []
labels = []
fig, axs = plt.subplots()
plt.subplots_adjust(bottom=None, right=None, top=None, wspace=None, hspace=.5)
plot_idxs = []
# Figure size
#fig, axs = plt.figure(figsize=(10,5))
pops, nodes = np.array(pops).astype(int), np.array(nodes).astype(int)
inds = np.argsort(pops/nodes)[::]
nodes,pops,nCpus,stims, sfs, verss = nodes[inds],pops[inds],nCpus[inds],stims[inds], sfs[inds], versions[inds]
if title:
figname = title + "_cpuVgpu.png"
title = title + ": Time on CPU vs. GPU"
seen = []
idx = 0
for node,pop,nCpu,stim, sf, vers in zip(nodes,pops,nCpus, stims, sfs, versions):
if [pop,node] in seen:
continue
else:
seen.append([pop,node])
if not title:
f, title = format_logname(node,pop,nCpu,stim, sf, path, how=how, title=title)
figname = title + "_cpuVgpu.png"
title = title + ": Time on CPU vs. GPU"
else:
f, _ = format_logname(node,pop,nCpu,stim, sf, path, how=how, title=title)
seen.append(f)
xlabel = "Population/Node"
logRes = processLog(f)
total_time = time_to_50(logRes['absStart'], logRes['startEndPairs'])
total_time = total_time / 50
mean_deap, std_deap = calculate_deap_time(logRes['startEndPairs'])
mean_runtime, std_runtime = np.mean(logRes['runtimes']), np.std(logRes['runtimes'])
mean_eval, std_eval = np.mean(logRes['evalTimes']), np.std(logRes['evalTimes'])
mean_neuroGPU, std_neuroGPU = np.mean(logRes['neuroGPUTimes']), np.std(logRes['neuroGPUTimes'])
mean_deap = mean_deap - mean_neuroGPU # DEAP runtime consideration covers first simulation round too so we subtract it
# Width of a bar
width = 0.25
# Plotting
plot_idx = idx * 1.5
if idx == 0:
# axs.bar(idx/1.5, total_time , width, label='Full Runtime',color="black")
axs.bar(plot_idx/1.5, mean_deap , width, yerr=std_deap, label='Time Running DEAP', color="orange",edgecolor='black', linewidth=1.5)
axs.bar(plot_idx/1.5 + width, mean_neuroGPU , width, yerr=std_neuroGPU, label='Running time on GPU',color="palegreen",edgecolor='black', linewidth=1.5)
axs.bar(plot_idx/1.5 + 2* width, mean_eval, width, yerr=std_eval, label='Running time on CPU', color="lightblue",edgecolor='black', linewidth=1.5)
else:
# axs.bar(idx/1.5, total_time , width ,color="black")
# axs.bar(plot_idx/1.5, mean_deap , width, yerr=std_deap, color="orange",edgecolor='black', linewidth=1.5)
axs.bar(plot_idx/1.5 + width, mean_neuroGPU , width, yerr=std_neuroGPU, color="palegreen", edgecolor='black', linewidth=1.5)
axs.bar(plot_idx/1.5 + 2*width, mean_eval, width, yerr=std_eval, color="lightblue",edgecolor='black', linewidth=1.5)
plot_idxs.append(plot_idx)
idx += 1
# axs.legend(bbox_to_anchor=(1.25, 1), loc='upper right', ncol=1)
# axs.legend(loc='upper right', ncol=1, fontsize=14)
# axs.set_xlabel(xlabel, font)
axs.set_xlabel(xlabel)
axs.set_ylim(0,140)
axs.set_xticks(ticks=[p_idx/1.5 + .25 for p_idx in plot_idxs])
# axs.set_xticks(plot_idxs)
# THIS HAPPENS WHEN YOU PLOT COMPUTE SCALES CPU V GPU
try:
axs.set_xticklabels(labels=\
np.unique(np.array(pops).astype(int)\
/np.array(nodes).astype(int)\
).astype(int), rotation = 65)
except:
print('tried to make CPUGPU plot on weak scaling')
if how == 'vanilla':
axs.set_xticklabels(labels=["{}/{}".format(node,pop) for node, pop in zip(nodes,pops)])
elif how == 'stims':
axs.set_xticklabels(labels=["{}/{}".format(node,stim) for node, stim in zip(nodes,stims)])
elif how == 'sfs':
axs.set_xticklabels(labels=["{}/{}".format(node,sf) for node, sf in zip(nodes,sfs)])
axs.set_ylabel('Time (s)')
# axs.set_title(title,fontweight='bold')
fig.savefig(os.path.join(path, figname), bbox_inches='tight')
def list_other_logs(f):
path = os.path.dirname(f)
files = [file for file in os.listdir(path) if ".log" in file and "gpu" not in file]
return os.path.join(path,files[0])
def plotScaling(nCpus,nodes,pops, sfs, stims, versions, path, how='vanilla', title=None, show=True):
#f = "runTimeLogs/runTime.log"
runtimes = []
labels = []
stds = []
if title:
figname = title + "_scaling.png"
title = title #+ " Scaling"
for idx,(node,pop,nCpu,stim, sf, vers) in enumerate(zip(nodes,pops,nCpus, stims, sfs, versions)):
if not title:
f, title = format_logname(node,pop,nCpu,stim, sf, path, how=how, title=title)
figname = title + "_scaling.png"
title = title + " Scaling"
else:
f, _ = format_logname(node,pop,nCpu,stim, sf, path, how=how, title=title)
try:
logRes = processLog(f)
except:
print("found no master log for ", f, " using first")
prev_f = f
f = list_other_logs(prev_f)#re.sub(".log","_0.log", f)
logRes = processLog(f)
shutil.copyfile(f, prev_f)
if len(logRes['runtimes']) < 1:
continue
runtime = np.mean(logRes['runtimes'])
if len(logRes['runtimes']) > 1:
stds.append(np.std(logRes['runtimes']))
print("not using back up", node)
else:
stds.append(np.mean(backup_stddev[node]))
print(" using back up standard deviation for {}.... get more trials".format(node))
if (nodes[0] == nodes).all():
label = "{}".format(pop)
else:
label = "{}".format(node)
runtimes.append(runtime)
labels.append(label)
if (pops[0] == pops).all():
lin_decr = runtimes[0]/ np.array([label.replace("N","") for label in labels]).astype(int)
bench_name = 'Ideal'
elif (nodes[0] == nodes).all():
lin_decr = [runtimes[i] * (i+1) for i in range(len(runtimes))]
bench_name = 'Exponential'
else:
bench_name = 'Ideal'
lin_decr = np.repeat(runtimes[0],len(runtimes))
fig = plt.figure()
plt.scatter(np.arange(len(runtimes)), lin_decr, color='orange', label=bench_name, s=15)
plt.plot(np.arange(len(runtimes)), lin_decr, color='orange')
ax = fig.axes[0]
plt.scatter(np.arange(len(runtimes)), runtimes, color='blue', label="Observed", s=15)
plt.plot(np.arange(len(runtimes)), runtimes, color='blue')
runtimes, stds = np.array(runtimes), np.array(stds)
plt.fill_between(np.arange(len(runtimes)), runtimes-stds, runtimes+stds, alpha=.5)
plt.yscale("log")
if (pops[0] == pops).all():
plt.ylim(bottom=1)
plt.xlabel("Nodes")
#ax.yaxis.set_minor_formatter(mticker.ScalarFormatter())
elif (nodes[0] == nodes).all():
plt.xlabel("Population")
else:
plt.ylim(bottom=10)
plt.xlabel("Nodes")
ax.yaxis.set_minor_formatter(mticker.ScalarFormatter())
plt.xticks(ticks=np.arange(len(runtimes)), labels=labels, rotation=45)
plt.ylabel("Log(Total Runtime (s))")
plt.title(title)
plt.legend()
plt.savefig(os.path.join(path, figname), bbox_inches='tight')
def compare_scaling(strong_df, weak_df, path):
#f = "runTimeLogs/runTime.log"
#assert (strong_df['offspring'].values == weak_df['offspring'].values).all()
fig = plt.figure()
plt.title("Population Scaling Comparison")
labels = strong_df['offspring'].values
y = strong_df['Runtime'].values
err = strong_df['Runtime Stddev'].values
plt.plot(labels, y, color='blue', label="strong scaling")
plt.fill_between(labels, y - err, y+ err, color='blue', alpha=.4)
# revisit this line
labels = weak_df['offspring'].values
y = weak_df['Runtime'].values
err = weak_df['Runtime Stddev'].values
plt.plot(labels, y, color='red', label="weak scaling")
plt.fill_between(labels, y - err, y+ err, color='red', alpha=.4)
plt.ylabel("time (s)")
plt.xlabel("pop size")
plt.legend()
plt.ylim(bottom=0)
plt.savefig(os.path.join(path, "scaling_time_compare"), bbox_inches='tight')
plt.close(fig)
fig2 = plt.figure()
plt.title("FOM comparison where FOM = pop size / nGPUs / runtime ")
labels = strong_df['offspring'].values
y = strong_df['FOM'].values
plt.plot(labels, y, color='blue', label="strong scaling")
plt.ylim(bottom=0)
# revisit this line
labels = weak_df['offspring'].values
y = weak_df['FOM'].values
err = weak_df['Runtime Stddev'].values
plt.plot(labels, y, color='red', label="weak scaling")
plt.ylabel("FOM")
plt.xlabel("pop size")
plt.legend()
plt.savefig(os.path.join(path, "scaling_fom_compare"), bbox_inches='tight')
plt.close(fig)
def compare_stim_scaling(strong_df, weak_df, path):
#f = "runTimeLogs/runTime.log"
strong_df = strong_df[strong_df['score functions'] == 20.0]
strong_df = strong_df.sort_values(by='stims')
fig = plt.figure()
plt.title("Stim Scaling Comparison")
labels = strong_df['stims'].values
y = strong_df['Runtime'].values
err = strong_df['Runtime Stddev'].values
plt.plot(labels, y, color='blue', label="strong scaling")
plt.fill_between(labels, y - err, y+ err, color='blue', alpha=.4)
# MONKEY PATCH
weak_df =weak_df[~(weak_df['nodes'] > weak_df['stims'])]
y = weak_df['Runtime'].values
err = weak_df['Runtime Stddev'].values
plt.plot(labels, y, color='red', label="weak scaling")
plt.fill_between(labels, y - err, y+ err, color='red', alpha=.4)
plt.ylabel("time (s)")
plt.xlabel("number of stims")
plt.legend()
plt.ylim(bottom=0)
plt.savefig(os.path.join(path, "stim_scaling_time_compare"), bbox_inches='tight')
plt.close(fig)
def compare_sf_scaling(strong_df, weak_df, path):
#f = "runTimeLogs/runTime.log"
strong_df = strong_df[strong_df['stims'] == 1.0]
strong_df = strong_df[strong_df['offspring'] == 500.0]
strong_df = strong_df[strong_df['score functions'] < 71]
weak_df = weak_df[~((weak_df['score functions'] == 20.0) & (weak_df['nodes'] == 1.0))]
strong_df = strong_df.sort_values(by='score functions')
fig = plt.figure()
plt.title("Score Function Scaling Comparison")
labels = strong_df['score functions'].values
y = strong_df['Runtime'].values
err = strong_df['Runtime Stddev'].values
plt.plot(labels, y, color='blue', label="strong scaling")
plt.fill_between(labels, y - err, y+ err, color='blue', alpha=.4)
y = weak_df['Runtime'].values
err = weak_df['Runtime Stddev'].values
plt.plot(labels, y, color='red', label="weak scaling")
plt.fill_between(labels, y - err, y+ err, color='red', alpha=.4)
plt.ylabel("time (s)")
plt.xlabel("# of score functions")
plt.legend()
plt.ylim(bottom=0)
plt.savefig(os.path.join(path, "sf_scaling_time_compare"), bbox_inches='tight')
plt.close(fig)
def read_exps(exp_names, condition="vanilla", args=None):
nodes = []
pops = []
nCpus = []
sfs = []
stims = []
version_dict = {}
use_constraint = False
for exp_name in exp_names:
try:
curr_node, curr_core, curr_pop, curr_stims, curr_sfs, version = re.findall(r'\d+', exp_name) # TODO: use version appropriately
except ValueError as e:
print(exp_name, " doesn't confrom")
continue
shared_exp_name = exp_name[:-2]
if shared_exp_name not in version_dict:
version_dict[shared_exp_name] = [version]
else:
continue
# here we can filter to only use relevant experiments
# TODO: if one wanted to see scaling in multiple dimensions this will not work
# need to expand conditions to allow something like "stims_sfs"
if args and args.constraint_file:
use_constraint = True
constraints = {}
with open(args.constraint_file, "r") as f:
lines = f.readlines()
for line in lines:
line = line.replace("\n", "")
line = line.split("=")
constraints[line[0]] = line[1].split(",")
if use_constraint and ((curr_stims not in constraints['n_stims']) or (curr_node not in constraints['N']) \
or (curr_pop not in constraints['offspring']) or (curr_sfs not in constraints['n_sfs']) or (not len( np.where(np.array(constraints['offspring'])==curr_pop)[0]) > 1 and (np.where(np.array(constraints['offspring'])==curr_pop)[0] != np.where(np.array(constraints['N'])==curr_node)[0]).all())):
continue
elif not use_constraint:
if "vanilla" in condition and (curr_stims != default_stims or curr_sfs != default_sfs):
continue
# MONKEY PATCH IF STATEMENT, ex; if you have a study with 2N 500 pop
elif "vanilla" in condition and int(curr_pop) < (int(curr_node) * POP_SCALING_FACTOR):
continue
elif condition == "stims" and (curr_pop != default_pop or curr_sfs != default_sfs):
continue
elif condition == "sfs" and (curr_pop != default_pop or curr_stims != default_stims):
continue
print("consuming ", curr_node, curr_pop, curr_stims, curr_sfs)
nodes.append(curr_node), pops.append(curr_pop), nCpus.append(curr_core)
sfs.append(curr_sfs), stims.append(curr_stims)
max_version_list = [max(version_dict[key]) for key in version_dict]
# TODO: fix this monkey patch on which version to use... this always use 0 versino
#max_version_list = np.zeros(shape=len(nodes)).astype(int).astype(str)
sort_inds = np.argsort(np.array(nodes).astype(int))
if len(nodes)< 1:
print(' NO EXPERIMENTS FOUND')
print(1/0)
if (nodes[0] == np.array(nodes)).all():
sort_inds = np.argsort(np.array(pops).astype(int))
if 'strong' in condition:
sort_inds = np.argsort(np.array(pops).astype(int))
# if condition == "vanilla":
# sort_inds = np.argsort(np.array(pops).astype(int))
# elif condition == "stims":
# sort_inds = np.argsort(np.array(stims).astype(int))
# elif condition == 'sfs':
# sort_inds = np.argsort(np.array(sfs).astype(int))
nodes = np.array(nodes)[sort_inds]
pops = np.array(pops)[sort_inds]
nCpus = np.array(nCpus)[sort_inds]
sfs = np.array(sfs)[sort_inds]
stims = np.array(stims)[sort_inds]
max_version_list = np.zeros(shape=len(nodes)).astype(int).astype(str)
max_version_listmax_version_list = np.array(max_version_list)[sort_inds]
return nodes, pops, nCpus, sfs, stims, max_version_list
def wrapProfileMaker(nCpus,nodes,pops,stims,sfs, versions, path):
for idx,(node,pop,nCpu,stim, sf, vers) in enumerate(zip(nodes,pops,nCpus,stims, sfs, versions)):
if int(node) > 10:
continue
makeCustomProfile(node,nCpu,pop,stim ,sf, vers, path)
def plot_gpu_pies(df, figname):
df = df.drop_duplicates(subset=["nodes", "total gpu", "offspring", "stims", "score functions"])
rows =int(np.sqrt(len(df)))
cols = len(df) // rows
fig, axs = plt.subplots(nrows=rows, ncols=cols, figsize=(rows*6, cols*2))
for ind, ax in enumerate(axs.flatten()):
row = df.iloc[ind]
f, _ = format_logname(str(int(row['nodes'])),str(int(row['offspring'])),\
str(int(row['total cpu'])),str(int(row['stims'])), \
str(int(row['score functions'])), how="full", title=figname)
#logRes = processLog(f)
x = [int(row['gpu_util']), 100-int(row['gpu_util'])]
labels=['% of Time on GPU', '% of Time GPU Idle']
ax.pie(x, labels=labels)
ax.set_title("{} Nodes, {} GPUs, {} Stims, {} Pop".format(int(row['nodes']),\
int(row['total gpu']),\
int(row['stims']),\
int(row['offspring'])))
plt.savefig("outputs/{}_Pie.png".format(figname))
plt.close(fig)
def drop_constant(df, preserve_list=[]):
res = df.loc[:, (df != df.iloc[0]).any()]
if len(preserve_list) > 0:
preserved_cols = df.loc[:,preserve_list]
dropped = list(preserved_cols.columns)
res.loc[:,preserve_list] = preserved_cols
cols = list(res)
if 'Nodes' in dropped:
cols.insert(0, cols.pop(cols.index('Nodes')))
cols.insert(1, cols.pop(cols.index('Offspring')))
elif 'Offspring' in dropped:
cols.insert(2, cols.pop(cols.index('Offspring')))
res = res.loc[:, cols]
return res
def plus_minus_cols(df, main, std, drop=True):
df[main] = df[main].astype(str).apply(lambda x: x[:5]) \
+ " ± " + df[std].astype(str).apply(lambda x: x[:5])
if drop:
df = df.drop(std,axis=1)
return df
def time_to_50(abstart, start_end_pairs):
try:
end = start_end_pairs[49][1]
res = float(end) - float(abstart)
except IndexError:
print(f"WARNING only {len(start_end_pairs)} start end pairs")
res = 0
return res
def calculate_gen_time(start_end_pairs):
total = []
num_pairs = len(start_end_pairs)
# need to start one deep
for idx in range(1,num_pairs):
item1 = start_end_pairs[idx][1]
item2 = start_end_pairs[idx][0]
if len(item1) != 10 or len(item2) != 10: # case where times are not formatted correctly
continue
# difference between gen start and end can't be less than 1 second
if float(start_end_pairs[idx][1]) - float(start_end_pairs[idx][0]) < 1:
continue
if float(start_end_pairs[idx][1]) == float(start_end_pairs[idx][0]):
continue
total.append(float(start_end_pairs[idx][1]) - float(start_end_pairs[idx][0]))
total = np.array(total)
return np.mean(total), np.std(total)
def calculate_deap_time(start_end_pairs):
total = []
num_pairs = len(start_end_pairs)
# need to start one deep
for idx in range(1,num_pairs):
item1 = start_end_pairs[idx][0]
item2 = start_end_pairs[idx-1][1]
if len(item1) != 10 or len(item2) != 10: # case where times are not formatted correctly
continue
if float(item1) - float(item2) < 1 :
continue
total.append(float(start_end_pairs[idx][0]) - float(start_end_pairs[idx-1][1]))
total = np.array(total)
if len(total) > 25:
print("MONKEY PATCH INCORRECTLY FORMATTED TIMES")
total = total[25:]
return np.mean(total), np.std(total)
def generate_result_table(nCpus,nodes,pops, sfs, stims, versions, path, title=None, how='vanilla'):
df = pd.DataFrame(columns=['Nodes','Total Cpus', 'Total Gpus',\
'Offspring', 'Stimuli', 'Score Functions', \
'Runtime', 'Runtime Stddev', 'FOM', 'FOM Std Dev','GPU Utilization', 'Mean Eval Time', 'Std Eval Time', 'Mean Sim Time', 'Std Sim Time','DEAP time', 'DEAP Time Std Dev', 'Time to 50 Gen', 'Mean Gen Size', 'Std Gen Size', 'Num Trials'])
if title:
figname = title + "_scale.tex"
df_name = title + "_scale.csv"
fn_to_gpu_df = {}
for idx,(node,pop,nCpu,stim, sf, vers) in enumerate(zip(nodes,pops,nCpus, stims, sfs, versions)):
if not title:
f, title = format_logname(node,pop,nCpu,stim, sf, path, how=how, title=title)
gpu_logs, num_logs = format_gpu_util_name(node,pop,nCpu,stim, sf, path, how=how, title=title)
figname = title + "_scale.tex"
df_name = title + "_scale.csv"
else:
f, _ = format_logname(node,pop,nCpu,stim, sf, path, how=how, title=title)
gpu_logs, num_logs = format_gpu_util_name(node,pop,nCpu,stim, sf, path, how=how, title=title)
if len(gpu_logs) > 0:
fn = gpu_logs[0]
try:
percent_utilization, gpu_df = read_gpu_logs(fn)
fn_to_gpu_df[fn] = gpu_df
except Exception as e:
# import pdb; pdb.set_trace()
# read_gpu_logs(fn)
print(e, " error generating GPU result table")
percent_utilization = 0
logRes = processLog(f)
if 'core_neuron' in os.path.abspath('./'):
# if we are in core neuron we get 8 start end pairs on rank 1, but we
# only care about the last
total_time = time_to_50(logRes['absStart'], logRes['startEndPairs'][::8])
mean_deap, std_deap = calculate_deap_time(logRes['startEndPairs'][::8])
else:
total_time = time_to_50(logRes['absStart'], logRes['startEndPairs'])
mean_deap, std_deap = calculate_deap_time(logRes['startEndPairs'])
mean_runtime, std_runtime = np.mean(logRes['runtimes']), np.std(logRes['runtimes'])
mean_eval, std_eval = np.mean(logRes['evalTimes']), np.std(logRes['evalTimes'])
mean_neuroGPU, std_neuroGPU = np.mean(logRes['neuroGPUTimes']), np.std(logRes['neuroGPUTimes'])
mean_gen_size, std_gen_size = np.mean(logRes['gen_sizes']), np.std(logRes['gen_sizes'])
mean_deap = mean_deap - mean_neuroGPU # DEAP runtime consideration covers first simulation round too so we subtract it
nGpus = logRes['nGpus']
FOM = int(pop)/(nGpus*int(node))/np.array(logRes['runtimes'], dtype=np.float64)
fom_mean = np.mean(FOM)
fom_dev = np.std(FOM)
num_trials = len(logRes['neuroGPUTimes'])
pct_util = float(percent_utilization)
if np.isnan(mean_runtime):
continue
df.loc[idx] = [int(node),int(nCpu)*int(node), nGpus*int(node), \
int(pop),int(stim), int(sf), float(mean_runtime), float(std_runtime), fom_mean, fom_dev, pct_util, mean_eval, std_eval, mean_neuroGPU, std_neuroGPU, mean_deap, std_deap, total_time, mean_gen_size, std_gen_size, num_trials]
df = df.sort_values('Nodes', ascending=True)
# SAVE CSV
df.to_csv(os.path.join(path, df_name))
skip_latex=True
if not skip_latex:
# SAVE LATEX
latex_df = drop_constant(df, preserve_list=['Nodes','Offspring'] )
latex_df = plus_minus_cols(latex_df, main='Runtime',std='Runtime Stddev')
latex_df = plus_minus_cols(latex_df, main='FOM',std='FOM Std Dev')
latex_df['GPU Utilization'] = latex_df['GPU Utilization'].astype(str).apply(lambda x: x[:4]) + "%"
# formaters = {"Runtime": "{:0.2f}".format, "Runtime Stddev": "{:0.4f}".format, "cori fom" : "{:0.2f}".format, "fom std dev" : "{:0.3f}".format, 'gpu_util': "{:0.2f}".format }
# df.to_latex(os.path.join(path, figname), formatters=formaters, float_format="%.0f", index=False)
col_fmt = "|".join(np.repeat('c', len(df.columns)))
col_fmt = "|" + col_fmt + "|"
latex_df.to_latex(os.path.join(path, figname), float_format="%.0f", index=False, column_format=col_fmt)
else:
print("WARNING: skipped latex")
print("WARNING: made a bunch of gpu dfs but not doing much with em .. could plot")
return df
def write_all_files(dest, srcs, overwrite=True):
if not overwrite:
try:
assert srcs[0][-6:-4] == 'SF', f"this should not be a minion log, but it ends in a {srcs[0][-6:-4] } instead of SF.log"
except:
import pdb; pdb.set_trace()
srcs = srcs[1:]
open_mode = 'a+'
else:
open_mode = 'w'
with open(dest, open_mode) as outfile: # dest is in src, but gets cleaned out here, good
for fname in srcs:
with open(fname) as infile:
for line in infile:
outfile.write(line)
[docs]def check_for_first_merge(log_path, old_log):
"""
check that this 'old log' is should wipe out and replace log path
you only want this if old list is the first log of it's kind to get processed
"""
listing = sorted(glob.glob(os.path.dirname(log_path) + '*'))[1:] # check all matching folders
assert len(listing) > 0, f"only master dir exists for {log_path}"
matching = os.path.basename(listing[0]) == os.path.basename(old_log).replace('.log', '')
return matching
def merge_experiments(src, dest, version, path):
src_path = os.path.join(path,src)
curr_node, curr_core, curr_pop, curr_stims, curr_sfs, version = re.findall(r'\d+', src) # TODO: use version appropriately
prof_name = "{}N_{}C_{}O_{}S_{}SF.prof".format(curr_node, curr_core, curr_pop, curr_stims, curr_sfs)
new_prof_name = "{}N_{}C_{}O_{}S_{}SF.{}.prof".format(curr_node, curr_core, curr_pop, curr_stims, curr_sfs, version)
# move profile
if not os.path.isdir(dest):
os.makedirs(dest)
try:
shutil.copy(os.path.join(src_path,prof_name), os.path.join(dest,new_prof_name))
except FileNotFoundError:
print("no profile for ", src_path)
# move gpu util log
gpu_util_logname = "gpu_utillization.log"
new_gpu_util_logname = "gpu_utillization.{}.log".format(version)
try:
shutil.copy(os.path.join(src_path,gpu_util_logname), os.path.join(dest,new_gpu_util_logname))
except FileNotFoundError:
print("no gpu util for ", src_path)
# comebine regular log
log_path = os.path.join(dest, "{}N_{}C_{}O_{}S_{}SF.log".format(curr_node, curr_core, curr_pop, curr_stims, curr_sfs, version))
old_log = os.path.join(src_path, "{}N_{}C_{}O_{}S_{}SF_{}.log".format(curr_node, curr_core, curr_pop, curr_stims, curr_sfs, version))
first_merge = check_for_first_merge(log_path, old_log)
if os.path.isfile(log_path) and os.path.isfile(old_log) and first_merge: # you've already processed folder
write_all_files(log_path, [log_path, old_log])
elif os.path.isfile(log_path) and os.path.isfile(old_log):
write_all_files(log_path, [log_path, old_log], overwrite=False)
elif os.path.isfile(old_log): # you haven't processed folder
#assert int(version) == 1, "no master log and version is not 1?"
write_all_files(log_path, [old_log])
else:
print("WARNING: No log merge for ", log_path)
print("not deleting ... could be though")
print(src_path, log_path)
# print(1/0)
# shutil.rmtree(src_path)
def collapse_exps(exp_names, path):
nodes = []
pops = []
nCpus = []
stims = []
sfs = []
exp_names = sorted(exp_names)
for exp_name in exp_names:
try:
curr_node, curr_core, curr_pop, curr_stims, curr_sfs, version = re.findall(r'\d+', exp_name) # TODO: use version appropriately
except ValueError as e:
print(exp_name, " doesn't confrom")
continue
nodes.append(curr_node), pops.append(curr_pop), nCpus.append(curr_core)
sfs.append(curr_sfs), stims.append(curr_stims)
agg_exp_path = os.path.join(path,"{}N_{}C_{}O_{}S_{}SF".format(curr_node, curr_core, curr_pop, curr_stims, curr_sfs))
if int(version) == 0 or not os.path.isdir(agg_exp_path):
if os.path.isdir(agg_exp_path):
shutil.rmtree(agg_exp_path)
shutil.copytree(os.path.join(path,exp_name), agg_exp_path)
log_name = [file for file in os.listdir(agg_exp_path) if ".log" in file and "gpu" not in file]
try:
os.rename(os.path.join(agg_exp_path,log_name[0]), os.path.join(agg_exp_path,log_name[0][:-6] + log_name[0][-4:] ))
print(log_name)
except:
print("no log for : ", agg_exp_path)
print(os.path.dirname(agg_exp_path))
# shutil.rmtree(os.path.dirname(agg_exp_path)
continue
else:
merge_experiments(exp_name, agg_exp_path, version, path )
return
def sf_plot_strategy(exp_names, args, collapse=False):
if collapse:
collapse_exps(exp_names, args.path)
nodes, pops, nCpus, sfs, stims, versions = read_exps(exp_names, condition='sfs', args=args)
plt.title("Population Size Scaling w. Nodes")
# step 1
plotScaling(nCpus,nodes,pops, sfs, stims, versions, args.path, how='sfs')
# step 2
#wrapProfileMaker(nCpus, nodes, pops, versions)
# step 3
plot_CPUGPU_bottleneck(nCpus,nodes,pops, sfs, stims, versions, args.path, how='sfs')
# step 4
generate_result_table(nCpus,nodes,pops, sfs, stims, versions, args.path, how='sfs')
def stim_plot_strategy(exp_names, args, collapse=False):
if collapse:
collapse_exps(exp_names, args.path)
nodes, pops, nCpus, sfs, stims, versions = read_exps(exp_names, condition='stims', args=args)
plt.title("Population Size Scaling w. Nodes")
# step 1
how ='vanilla'
if args.constraint_file:
how = 'full'
plotScaling(nCpus,nodes,pops, sfs, stims, versions, args.path, how=how)
# step 2
#wrapProfileMaker(nCpus, nodes, pops, versions)
# step 3
plot_CPUGPU_bottleneck(nCpus,nodes,pops, sfs, stims, versions, args.path, how=how)
# step 4
generate_result_table(nCpus,nodes,pops, sfs, stims, versions, args.path, how=how)
def vanilla_plot_strategy(exp_names, args, collapse=False):
if collapse:
collapse_exps(exp_names,args.path)
print("NOT COLLAPSING CHANGE L8R")
nodes, pops, nCpus, sfs, stims, versions = read_exps(exp_names, args=args)
plt.title("Population Size Scaling w. Nodes")
set_custom_params_plt()
print("CUSTOMING PARAMS")
# step 1
how ='vanilla'
title = None
figname='population'
if args.constraint_file:
how = 'full'
title = os.path.basename(args.constraint_file)
figname = os.path.basename(args.constraint_file)
plotScaling(nCpus,nodes,pops, sfs, stims, versions, args.path, how=how, title=title)
# step 2
#wrapProfileMaker(nCpus, nodes, pops, stims, sfs, versions, args.path)
# step 3
plot_CPUGPU_bottleneck(nCpus,nodes,pops, sfs, stims, versions, args.path, how=how, title=title)
# step 4
df = generate_result_table(nCpus,nodes,pops, sfs, stims, versions, args.path, how=how, title=title)
restore_default_mpl_params()
# step 5
#plot_gpu_pies(df,figname)
def strong_plot_strategy(exp_names, args, collapse=False):
if collapse:
collapse_exps(exp_names, args.path)
how = "strong_vanilla"
weak_name = 'pop_scale.csv'
if args.stims:
how = "strong_stims"
weak_name= "stim_scale.csv"
elif args.sfs:
how = "strong_sfs"
weak_name = 'sf_scale.csv'
nodes, pops, nCpus, sfs, stims, versions = read_exps(exp_names, condition=how, args=args)
plt.title("Population Size Scaling w. Nodes")
# step 4
strong_df = generate_result_table(nCpus,nodes,pops, sfs, stims, versions, args.path, how=how)
weak_df = pd.read_csv(os.path.join("weak_outputs",weak_name))
if not args.stims and not args.sfs:
compare_scaling(strong_df, weak_df)
elif args.stims:
compare_stim_scaling(strong_df, weak_df)
elif args.sfs:
compare_sf_scaling(strong_df, weak_df)
def check_collapse(exp_names, path):
for exp_name in exp_names:
if os.path.isfile(os.path.join(path, exp_name)):
continue
if not os.path.isdir(os.path.join(path, exp_name.split("SF")[0] + "SF")):
return True
return False
def find_largest_std(exp_names, args):
stds = {}
nodes, pops, nCpus, sfs, stims, versions = read_exps(exp_names, condition="permissive")
for node, pop, nCpu, sf, stim, version in zip(nodes, pops, nCpus, sfs, stims, versions):
f_list = format_logname_general(node, pop, nCpu, stim, sf, args.path, 'vanilla')
logRes = None
for f in f_list:
try:
logRes = processLog(f)
break
except:
continue
if not logRes:
continue
# shutil.copyfile(f, prev_f)
mean_runtime, std_runtime = np.mean(logRes['runtimes']), np.std(logRes['runtimes'])
# ignore an obvious case where algorithm glitched for some reason and took 300 seconds or something
if node == '1' and std_runtime > 20:
continue
if node in stds:
stds[node].append(std_runtime)
else:
stds[node] = [std_runtime]
stds_keys = list(stds.keys())
for idx, key in enumerate(stds_keys):
if np.isnan(np.mean(stds[key])) or not np.mean(stds[key]):
stds[key] = stds[stds_keys[idx-1]]
# print( np.mean(stds[key]), "STD KEY")
with open("std_dev_backup.pkl",'wb') as f:
pickle.dump(stds,f)
exit()
if __name__ == "__main__":
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description='Benchmarking viz')
parser.add_argument('--stims', action="store_true")
parser.add_argument('--sfs', action="store_true")
parser.add_argument('--strong', action="store_true")
parser.add_argument('--constraint_file', type=str, required=False, default=None)
parser.add_argument('--path', type=str, required=False, default="outputs")
args = parser.parse_args()
exp_names = [dirname for dirname in os.listdir(args.path) if "_" in dirname and "ipynb" not in dirname] # make this more strict later --> should match coresnodes_POPSIZE_iteration
# find_largest_std(exp_names, args)
collapse = check_collapse(exp_names, args.path)
collapse = False
# if args.path != 'outputs':
# collapse = False
print(collapse, "SHOULD I COLLAPSE ?? IM SPITTING THESE RAPS TIL THE DAY THAT I DROP")
if args.stims:
stim_plot_strategy(exp_names, args, collapse=collapse)
elif args.sfs:
sf_plot_strategy(exp_names, args, collapse=collapse)
else:
vanilla_plot_strategy(exp_names, args, collapse=collapse)
if args.strong:
strong_plot_strategy(exp_names, args, collapse=collapse)