Source code for benchmark_ea.python.plotting.plot_benchmarking

import numpy as np
import matplotlib.pyplot as plt
import re
import os
import pstats
import sys
import io
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import shutil
import argparse
import matplotlib
from matplotlib import cycler
import pickle
import matplotlib.ticker as mticker
import matplotlib
import glob



# font = {'family' : 'normal',
#         'size'   : 20}

# matplotlib.rc('font', **font)
# # plt.rcParams["font.weight"] = "bold"
# plt.rcParams["axes.labelweight"] = "bold"

default_sfs = str(20)
default_stims = str(1)
default_pop = str(500)
POP_SCALING_FACTOR=500


def title_and_save(fig,title, pdf):
    global fig_count
    if title:
        plt.title("Fig {}: ".format(fig_count) + title, fontsize=20)
    else:
        plt.title("Fig {}: ".format(fig_count), fontsize=20)
    pdf.savefig(fig, bbox_inches='tight')
    fig_count += 1
    plt.close(fig)


def format_logname_general(node, pop, nCpu, stim, sf, path, how, title=None):
    possible_names = []
    for folder in os.listdir('summit_outputs'):
        if f'{node}N' in folder:
            for file in os.listdir(os.path.join('summit_outputs',folder)):
                if '.log' in file and 'gpu' not in file:
                    possible_names.append(os.path.join('summit_outputs',folder,file))
    return possible_names
        

def set_custom_params_plt():

    colors = cycler('color',
                    ['#EE6666', '#3388BB', '#9988DD',
                    '#EECC55', '#88BB44', '#FFBBBB'])
    # plt.rc('axes', facecolor='#E6E6E6', edgecolor='none',
    #     axisbelow=True, grid=True)
    # plt.rc('grid', color='w', linestyle='solid')
    # plt.rc('patch', edgecolor='#E6E6E6')
    # plt.rc('lines', linewidth=2)
    # plt.rcParams['font.family'] = 'serif'
    # plt.rcParams['font.serif'] = ['Times New Roman'] + plt.rcParams['font.serif']
    plt.rcParams.update({'font.size': 18})


    
set_custom_params_plt()

def restore_default_mpl_params():
    matplotlib.rcParams.update(matplotlib.rcParamsDefault)

    
def format_logname_general(node, pop, nCpu, stim, sf, path, how, title=None):
    possible_names = []
    for folder in os.listdir('summit_outputs'):
        if f'{node}N' in folder:
            for file in os.listdir(os.path.join('summit_outputs',folder)):
                if '.log' in file and 'gpu' not in file:
                    possible_names.append(os.path.join('summit_outputs',folder,file))
    return possible_names
        
    
def format_logname(node, pop, nCpu, stim, sf, path, how, title=None):
    if how == 'vanilla':
        stim = default_stims
        sf = default_sfs
        title = "Population"
    elif how == 'stims':
        pop = default_pop
        sf = default_sfs
        title = "Stims"
    elif how == "sfs":
        pop = default_pop
        stim = default_stims
        title = "Sfs"
    elif how == "full":
        assert title
        pass
    else:
        raise NotImplementedError
#         title=" PLOTTING METHOD NOT RECOGNIZED"
        
    if os.path.isfile("{}/{}N_{}C_{}O_{}S_{}SF/{}N_{}C_{}O_{}S_{}SF.log".format(path, node,nCpu, pop, stim, sf, node,nCpu,pop, stim, sf)):
        return "{}/{}N_{}C_{}O_{}S_{}SF/{}N_{}C_{}O_{}S_{}SF.log".format(path, node,nCpu, pop, stim, sf, node,nCpu,pop, stim, sf), title
    else:
        print("NO FILE ", "{}/{}N_{}C_{}O_{}S_{}SF/{}N_{}C_{}O_{}S_{}SF.log".format(path, node,nCpu, pop, stim, sf, node,nCpu,pop, stim, sf))
        return "{}/{}N_{}C_{}O_{}S_{}SF/{}N_{}C_{}O_{}S_{}SF.log".format(path, node,nCpu, pop, stim, sf, node,nCpu,pop, stim, sf), title

def format_gpu_util_name(node, pop, nCpu, stim, sf, path, how, title=None):
    if how == 'vanilla':
        stim = default_stims
        sf = default_sfs
        title = "Population"
    elif how == 'stims':
        pop = default_pop
        sf = default_sfs
        title = "Stims"
    elif how == "sfs":
        pop = default_pop
        stim = default_stims
        title = "Sfs"
    elif how == "full":
        assert title
        pass
    else:
        raise NotImplementedError
    gpu_logs = [os.path.join("{}/{}N_{}C_{}O_{}S_{}SF/".format(path,node,nCpu, pop, stim, sf), filename) \
                for filename in os.listdir("{}/{}N_{}C_{}O_{}S_{}SF/".format(path, node,nCpu, pop, stim, sf)) \
                if "gpu_utillization" in filename]
    return gpu_logs, len(gpu_logs)


def find_gpu_logs(basepath):
    files = os.listdir(basepath)
    gpu_logs = sorted([os.path.join(basepath, file) for file in files if "gpu_utillization" in file])
    return gpu_logs
    
    
def read_gpu_logs(fn):
    basepath = os.path.dirname(fn)
    gpu_logs = find_gpu_logs(basepath)
    fn = gpu_logs[-1]
    
    with open(fn, 'r') as gpu_f: 
        lines = gpu_f.readlines()
    gpu_df = pd.DataFrame([sub.split(",") for sub in lines])
    gpu_df.columns = gpu_df.iloc[0]
    gpu_df = gpu_df[1:]
    gpu_df = gpu_df.rename({' name': 'name', ' utilization.gpu [%]' : 'utilization'}, axis=1)
    # remove label rows

    gpu_df = gpu_df[gpu_df['name'] != ' name']
    gpu_df = gpu_df[gpu_df['timestamp'] != '\n']
    gpu_df  = gpu_df[gpu_df['utilization'] != ' utilization.memory [%]']
    # this will only work for the year
    gpu_df = gpu_df[gpu_df['timestamp'].str.count("2021") < 2]
    gpu_df['timestamp'] = pd.to_datetime(gpu_df['timestamp'], errors='coerce')#gpu_df['timestamp'].astype('datetime64[ns]')
    gpu_df = gpu_df.dropna(axis=0, subset=['timestamp']) 
    # WE GET ONE MEASUREMENT PER SECOND
    total_elapsed = (max(gpu_df.timestamp) - min(gpu_df.timestamp)).seconds 
    gpu_df['utilization'] = gpu_df['utilization'].str.replace(" \%","").astype(int)
    percent_utilization = (np.sum(gpu_df['utilization'] ) / (6 * total_elapsed))
    #gpu_df[['Date','Time']] = gpu_df.timestamp.str.split(expand=True)
    return percent_utilization, gpu_df

def processLog(f):
    with open(f, "r") as file:
        startEndPairs = []
        runtimes = []
        sfs = []
        neuroGPUStartTimes = []
        neuroGPUEndTimes = []
        procToSf = {}
        procStartDict = {}
        procEndDict = {}
        compStartDict = {}
        io_times = []
        evalTimes = []
        neuroGPUTimes = []
        gens = []
        file_lines =file.readlines()
        readingEnds = False
        readingStarts = False
        firstGen = True
        for line in file_lines:
            if "Date:" in line:
                print(line)
            if "absolute start" in line:
                numbers = re.findall(r'\d+', line)
                firstGen = True
                abs_start = [ '.'.join(x) for x in zip(numbers[0::2], numbers[1::2]) ][0]
            if "nCpus" in line:
                nCpus = int(re.match('.*?([0-9]+)$', line).group(1))
                #assert nCpus  == cpu, "expected {} but got {} cpus in log".format(cpu,nCpus)
            if "nGPUS" in line:
                try:
                    curr_idx = line.find('nGPUS')
                    curr_line = line[curr_idx:curr_idx+8]
                    nGpus = int(re.match('.*?([0-9]+)$', curr_line).group(1))
                except:
                    # default to using experimental name due to GPU line being scrambled
                    nGpus = int( re.findall(r'[1-9]S',f)[0][0])
                
                
            if "took:" in line:
                runtime = float(re.findall(r"[-+]?\d*\.\d+|\d+",line)[1])
                #print(runtime)
#                 if runtime > 100:
#                     runtime = 22
                if firstGen:	
                    firstGen = False	
                    continue
                runtimes.append(runtime)
                
            if "launched PIDS" in line:
                start = re.findall(r'\d+', line)[0] # second half is in miliseconds, don't need that precision
                
            if "finished PIDS" in line:
                end = re.findall(r'\d+', line)[0] 
                startEndPairs.append((start,end))
            if "process"  in line and "started" in line:
                stSplit = line.split(" ")
                sf = [stSplit[i] for i in range(2,len(stSplit)-2) if stSplit[i-1] == "is" and  stSplit[i+1] == "and"][0]
                sfs.append(sf)
                line = re.sub(r'(?<=is)(.*)(?=and)', "", line)
                numbers = re.findall(r'\d+', line)

                procToSf[numbers[0]] = sf
                if numbers[0] in procStartDict.keys():
                    procStartDict[numbers[0]].append(numbers[1])
                else:
                    procStartDict[numbers[0]] = [numbers[1]]
            if "returning" in line:
                numbers = re.findall(r'\d+', line)
                if numbers[0] in procEndDict.keys():
                    procEndDict[numbers[0]].append(numbers[1])
                else:
                    procEndDict[numbers[0]] = [numbers[1]]
            if "computing" in line:
                numbers = re.findall(r'\d+', line)
                if numbers[0] in compStartDict.keys():
                    compStartDict[numbers[0]].append(numbers[1])
                else:
                    compStartDict[numbers[0]] = [numbers[1]]
            if "evaluation:" in line and not firstGen:
                numbers = re.findall(r'\d+', line)
                numbers = [ '.'.join(x) for x in zip(numbers[0::2], numbers[1::2]) ]
                if "evalTimes" in locals():
                    evalTimes = np.append(evalTimes,  np.array(list(numbers), dtype=np.float32))
                else:
                    evalTimes = np.array(list(numbers), dtype=np.float32)
                avgEval = np.mean(evalTimes)
            if "neuroGPU" in line and "starts" not in line and "ends" not in line and not firstGen:
                numbers = re.findall(r'\d+', line)
                numbers = [ '.'.join(x) for x in zip(numbers[0::2], numbers[1::2]) ]
                
                if "neuroGPUTimes" in locals():
                    neuroGPUTimes = np.append(neuroGPUTimes,  np.array(list(numbers), dtype=np.float32))
                else:
                    neuroGPUTimes = np.array(list(numbers),dtype=np.float32)
                avgNGPU = np.mean(neuroGPUTimes)
            if ("neuroGPU" in line and "starts" in line and "ends" not in line) or readingEnds and not firstGen:
                readingEnds = True
                numbers = re.findall(r'\d+', line)
                numbers = [ '.'.join([x1,x2]) + "e+" + str(x3) for x1,x2,x3 in zip(numbers[0::3], numbers[1::3], numbers[2::3]) ]
                neuroGPUStartTimes += numbers
                if "]" in line:
                    readingEnds = False
            if ("neuroGPU" in line and "starts" not in line and "ends" in line) or readingStarts and not firstGen:
                readingStarts = True
                numbers = re.findall(r'\d+', line)
                numbers = [ '.'.join([x1,x2]) + "e+" + str(x3) for x1,x2,x3 in zip(numbers[0::3], numbers[1::3], numbers[2::3]) ]
                neuroGPUEndTimes += numbers
                if "]" in line:
                    readingStarts = False
            if "IO:" in line:
                numbers = re.findall(r'\d+', line)
                numbers = [ '.'.join([x1,x2]) + "e+" + str(x3) for x1,x2,x3 in zip(numbers[0::3], numbers[1::3], numbers[2::3]) ]
                io_times.append(numbers)
            if 'gen size' in line:
                numbers = re.findall(r'\d+', line)
                gens.append(float(numbers[0]))
#             if "gen1 took" in line:
#                 break
    try:
        # we already skip first gen because we don't record it
        # if len(runtimes) > 1:
        #     runtimes = runtimes[1:]
        #     evalTimes = evalTimes[1:]
        #     neuroGPUTimes = neuroGPUTimes[1:]
        res = {"procStartDict": procStartDict,"procEndDict": procEndDict,\
               "startEndPairs": startEndPairs,"runtimes": runtimes,\
               "compStartDict": compStartDict,"sfs": sfs,\
               "evalTimes": evalTimes,"neuroGPUTimes": neuroGPUTimes,\
              "procToSf": procToSf, "absStart": abs_start, \
               "neuroGPUStartTimes": neuroGPUStartTimes, \
               "neuroGPUEndTimes": neuroGPUEndTimes, "ioTimes": io_times,
              "nGpus": nGpus, 'gen_sizes':gens}
    except UnboundLocalError as e:
        print(e)
        print("MISREAD LOG : ", f, "  but I am in PERMISSIVE mode so it's ok")
#         raise e
        return {"procStartDict": {},"procEndDict": {},\
               "startEndPairs": [],"runtimes": [],\
               "compStartDict": {},"sfs": [],\
               "evalTimes": [],"neuroGPUTimes": [],\
              "procToSf": {}, "absStart": 0, \
               "neuroGPUStartTimes": [], \
               "neuroGPUEndTimes": [], "ioTimes": [],
               "nGpus": 6, 'gen_sizes':[]}
    return res


def makeCustomProfile(node, nCpu, pop, stim, sf, vers, path, show=True):
    f, _ = format_logname(node, pop, nCpu, stim, sf, path, how='full', title="None")
    #f  = "runTimeLogs/runTime.log"
    logRes = processLog(f)
    print("making profile for {}".format(f))
    absStart = float(logRes['absStart'])
    start_data = np.array([float(start) for start in logRes["neuroGPUStartTimes"]]) 
    end_data = np.array(logRes["neuroGPUEndTimes"]).astype(float)
    print(len(start_data))
    times = logRes["neuroGPUTimes"]
    total_time = float(logRes['startEndPairs'][-1][1]) - float(logRes['absStart'])
    # bugged timer
    end_data = np.mean(times) + start_data
    procEndDict = logRes['procEndDict']
    sfsMap = logRes['procToSf']
    sfsMapMap = {}
    counter = 0
    for val in set(list(sfsMap.values())):
        sfsMapMap[val] = counter
        counter +=1

    nGpus = 6# THIS SHOULD BE IN LOG RES logRes['nGpus']
    compStartDict = logRes['compStartDict']
    procStartDict = logRes['procStartDict']
    
    startEndPairs = logRes['startEndPairs']
    #print(absStart)

    #print(startEndPairs)

    startEndPairs = [(float(pair0) - float(absStart), float(pair1) - float(absStart)) for pair0, pair1 in startEndPairs]
    #print(procStartDict)
    #print(startEndPairs)
    # Create figure and axes
    fig, ax = plt.subplots(figsize=(13,9))
    
    # YLIM XLIM
    plt.ylim(0,300)
    plt.xlim(0,200)
    
    x_anchors= []
    x_ends = []

    def calc_y_anchor(x_anchor,width,x_anchors,x_ends):
        curr_ht = 0
        for x_anc, x_end in zip(x_anchors,x_ends):
            if x_anchor > x_anc and x_anchor < x_end:
                curr_ht += 10
            elif x_anc > x_anchor and x_anc < x_anchor+width:
                curr_ht += 10
        return curr_ht

    cur_start = 0
    y_base = 0
    firstGo = True
    count = 0 
    for start, end in startEndPairs:
        
        if firstGo:
            firstGo = False
            plt.axvline(x=start,color="blue", label="CPU Eval Start")
            plt.axvline(x=end,color="red", label="CPU Eval Done")
        else:
            plt.axvline(x=start,color="blue")
            plt.axvline(x=end,color="red")
       
        count += 1
        if count > 4:
            break
       

    idx = 1
    labels = list(compStartDict.keys())
    box_ht = 10
    runs = 0
    for procStart,procEnd,proc in \
    zip(list(procStartDict.values()),list(procEndDict.values()), list(procEndDict.keys()) ):
        for pStart,pEnd in zip(procStart,procEnd):
            x_anchor = float(pStart)  - float(absStart)#float(procStart) - float(absStart)
            y_anchor = y_base + 10
            if x_anchor > float(startEndPairs[cur_start][1]) and cur_start < len(startEndPairs) - 1 :
                cur_start += 1
                y_base = 0
                runs += 1
            else:
                y_base += 10
            
            if y_base > 1200: # MAX HEIGHT EXCEEDED
                #print('max height exc.')
                break
            width =(float(pEnd) - float(absStart)) - (float(pStart) - float(absStart)) #(float(procEnd) - float(absStart)) - (float(procStart) - float(absStart))
            #y_anchor = calc_y_anchor(x_anchor,width,x_anchors,x_ends)
            x_anchors.append(float(x_anchor)), x_ends.append(width)
            rect = patches.Rectangle((x_anchor, y_anchor), width, box_ht, \
                                     linewidth=2, edgecolor='black', facecolor='lightblue', fill=True, zorder=0)
            curr_sf = sfsMapMap[sfsMap[proc]]
#             ax.annotate(curr_sf, (x_anchor + 2.5, y_anchor + 5), color='black', weight='bold', \
#                         fontsize=7, ha='center', va='center', zorder=4)
            # Add the patch to the Axes
            ax.add_patch(rect)
            idx += 1
#         if y_base > 1200:
#             print("max height exceeded")
#             break
            if runs > 4:
                break

    # Create a Rectangle patches
    box_ht = 15 # constant box height
    cur_start = 0
    y_base = 0
    runs = 0
    for start,end,idx in zip(start_data,end_data, np.arange(len(end_data))):

        x_anchor = start-absStart
        if x_anchor > float(startEndPairs[cur_start][1]):
            cur_start += 2
            y_base = 0
            runs += 1
        else:
            y_base += 15
        y_anchor = y_base
        width =  end - start
        x_anchors.append(float(x_anchor)), x_ends.append(width)
        rect = patches.Rectangle((x_anchor, y_anchor), width, box_ht, \
                                 linewidth=2.5, edgecolor='black', facecolor='palegreen', fill=True, zorder=10)
#         ax.annotate("GPU {}".format(idx %  nGpus), (x_anchor + (total_time / 10), y_anchor + 8), color='black', weight='bold', 
#                     fontsize=10, ha='center', va='center', zorder=20)

        # Add the patch to the Axes
        #             break
        if runs > 4:
            break
        ax.add_patch(rect)
        
    plt.title("Profile for {} Node Parallel over Population (pop size {})".format(node, pop))
    #plt.title("Custom Profile for {} CPUs, {} Pop Size and {} Nodes".format(nCpus,nodes,popSize))
    plt.legend()
    plt.xlabel("time (s)")
    #plt.show()
    print("TODO: add legend later")
    out_dir = os.path.dirname(f)
    plt.savefig(os.path.join(out_dir,"custom_profile"), bbox_inches='tight')
    plt.close()
    sfsMap = logRes['procToSf']
    sfsMapMap = {}
    counter = 0
    for val in set(list(sfsMap.values())):
        sfsMapMap[val] = counter
        counter +=1

    make_legend(sfsMapMap)
    
    plt.savefig(os.path.join(out_dir,"legend"), bbox_inches='tight')
    plt.close()


def make_legend(top):
    fig, ax = plt.subplots(figsize=(8, 5))
    y = 9
    level = 0
    start = 9
    for name, val in top.items():
        ax.text(start, y - level, str(val) + "--> " + name, fontsize=20)
        level += 1

    ax.axis([0, 10, 0, 10])
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    ax.axis('off')
    plt.show()
    
    
[docs]def plot_CPUGPU_bottleneck(nCpus, nodes,pops, sfs, stims, versions, path, how='vanilla', title=None, show=True): """ TODO: consider changing this to be a single plot output """ #f = "runTimeLogs/runTime.log" runtimes = [] labels = [] fig, axs = plt.subplots() plt.subplots_adjust(bottom=None, right=None, top=None, wspace=None, hspace=.5) plot_idxs = [] # Figure size #fig, axs = plt.figure(figsize=(10,5)) pops, nodes = np.array(pops).astype(int), np.array(nodes).astype(int) inds = np.argsort(pops/nodes)[::] nodes,pops,nCpus,stims, sfs, verss = nodes[inds],pops[inds],nCpus[inds],stims[inds], sfs[inds], versions[inds] if title: figname = title + "_cpuVgpu.png" title = title + ": Time on CPU vs. GPU" seen = [] idx = 0 for node,pop,nCpu,stim, sf, vers in zip(nodes,pops,nCpus, stims, sfs, versions): if [pop,node] in seen: continue else: seen.append([pop,node]) if not title: f, title = format_logname(node,pop,nCpu,stim, sf, path, how=how, title=title) figname = title + "_cpuVgpu.png" title = title + ": Time on CPU vs. GPU" else: f, _ = format_logname(node,pop,nCpu,stim, sf, path, how=how, title=title) seen.append(f) xlabel = "Population/Node" logRes = processLog(f) total_time = time_to_50(logRes['absStart'], logRes['startEndPairs']) total_time = total_time / 50 mean_deap, std_deap = calculate_deap_time(logRes['startEndPairs']) mean_runtime, std_runtime = np.mean(logRes['runtimes']), np.std(logRes['runtimes']) mean_eval, std_eval = np.mean(logRes['evalTimes']), np.std(logRes['evalTimes']) mean_neuroGPU, std_neuroGPU = np.mean(logRes['neuroGPUTimes']), np.std(logRes['neuroGPUTimes']) mean_deap = mean_deap - mean_neuroGPU # DEAP runtime consideration covers first simulation round too so we subtract it # Width of a bar width = 0.25 # Plotting plot_idx = idx * 1.5 if idx == 0: # axs.bar(idx/1.5, total_time , width, label='Full Runtime',color="black") axs.bar(plot_idx/1.5, mean_deap , width, yerr=std_deap, label='Time Running DEAP', color="orange",edgecolor='black', linewidth=1.5) axs.bar(plot_idx/1.5 + width, mean_neuroGPU , width, yerr=std_neuroGPU, label='Running time on GPU',color="palegreen",edgecolor='black', linewidth=1.5) axs.bar(plot_idx/1.5 + 2* width, mean_eval, width, yerr=std_eval, label='Running time on CPU', color="lightblue",edgecolor='black', linewidth=1.5) else: # axs.bar(idx/1.5, total_time , width ,color="black") # axs.bar(plot_idx/1.5, mean_deap , width, yerr=std_deap, color="orange",edgecolor='black', linewidth=1.5) axs.bar(plot_idx/1.5 + width, mean_neuroGPU , width, yerr=std_neuroGPU, color="palegreen", edgecolor='black', linewidth=1.5) axs.bar(plot_idx/1.5 + 2*width, mean_eval, width, yerr=std_eval, color="lightblue",edgecolor='black', linewidth=1.5) plot_idxs.append(plot_idx) idx += 1 # axs.legend(bbox_to_anchor=(1.25, 1), loc='upper right', ncol=1) # axs.legend(loc='upper right', ncol=1, fontsize=14) # axs.set_xlabel(xlabel, font) axs.set_xlabel(xlabel) axs.set_ylim(0,140) axs.set_xticks(ticks=[p_idx/1.5 + .25 for p_idx in plot_idxs]) # axs.set_xticks(plot_idxs) # THIS HAPPENS WHEN YOU PLOT COMPUTE SCALES CPU V GPU try: axs.set_xticklabels(labels=\ np.unique(np.array(pops).astype(int)\ /np.array(nodes).astype(int)\ ).astype(int), rotation = 65) except: print('tried to make CPUGPU plot on weak scaling') if how == 'vanilla': axs.set_xticklabels(labels=["{}/{}".format(node,pop) for node, pop in zip(nodes,pops)]) elif how == 'stims': axs.set_xticklabels(labels=["{}/{}".format(node,stim) for node, stim in zip(nodes,stims)]) elif how == 'sfs': axs.set_xticklabels(labels=["{}/{}".format(node,sf) for node, sf in zip(nodes,sfs)]) axs.set_ylabel('Time (s)') # axs.set_title(title,fontweight='bold') fig.savefig(os.path.join(path, figname), bbox_inches='tight')
def list_other_logs(f): path = os.path.dirname(f) files = [file for file in os.listdir(path) if ".log" in file and "gpu" not in file] return os.path.join(path,files[0]) def plotScaling(nCpus,nodes,pops, sfs, stims, versions, path, how='vanilla', title=None, show=True): #f = "runTimeLogs/runTime.log" runtimes = [] labels = [] stds = [] if title: figname = title + "_scaling.png" title = title #+ " Scaling" for idx,(node,pop,nCpu,stim, sf, vers) in enumerate(zip(nodes,pops,nCpus, stims, sfs, versions)): if not title: f, title = format_logname(node,pop,nCpu,stim, sf, path, how=how, title=title) figname = title + "_scaling.png" title = title + " Scaling" else: f, _ = format_logname(node,pop,nCpu,stim, sf, path, how=how, title=title) try: logRes = processLog(f) except: print("found no master log for ", f, " using first") prev_f = f f = list_other_logs(prev_f)#re.sub(".log","_0.log", f) logRes = processLog(f) shutil.copyfile(f, prev_f) if len(logRes['runtimes']) < 1: continue runtime = np.mean(logRes['runtimes']) if len(logRes['runtimes']) > 1: stds.append(np.std(logRes['runtimes'])) print("not using back up", node) else: stds.append(np.mean(backup_stddev[node])) print(" using back up standard deviation for {}.... get more trials".format(node)) if (nodes[0] == nodes).all(): label = "{}".format(pop) else: label = "{}".format(node) runtimes.append(runtime) labels.append(label) if (pops[0] == pops).all(): lin_decr = runtimes[0]/ np.array([label.replace("N","") for label in labels]).astype(int) bench_name = 'Ideal' elif (nodes[0] == nodes).all(): lin_decr = [runtimes[i] * (i+1) for i in range(len(runtimes))] bench_name = 'Exponential' else: bench_name = 'Ideal' lin_decr = np.repeat(runtimes[0],len(runtimes)) fig = plt.figure() plt.scatter(np.arange(len(runtimes)), lin_decr, color='orange', label=bench_name, s=15) plt.plot(np.arange(len(runtimes)), lin_decr, color='orange') ax = fig.axes[0] plt.scatter(np.arange(len(runtimes)), runtimes, color='blue', label="Observed", s=15) plt.plot(np.arange(len(runtimes)), runtimes, color='blue') runtimes, stds = np.array(runtimes), np.array(stds) plt.fill_between(np.arange(len(runtimes)), runtimes-stds, runtimes+stds, alpha=.5) plt.yscale("log") if (pops[0] == pops).all(): plt.ylim(bottom=1) plt.xlabel("Nodes") #ax.yaxis.set_minor_formatter(mticker.ScalarFormatter()) elif (nodes[0] == nodes).all(): plt.xlabel("Population") else: plt.ylim(bottom=10) plt.xlabel("Nodes") ax.yaxis.set_minor_formatter(mticker.ScalarFormatter()) plt.xticks(ticks=np.arange(len(runtimes)), labels=labels, rotation=45) plt.ylabel("Log(Total Runtime (s))") plt.title(title) plt.legend() plt.savefig(os.path.join(path, figname), bbox_inches='tight') def compare_scaling(strong_df, weak_df, path): #f = "runTimeLogs/runTime.log" #assert (strong_df['offspring'].values == weak_df['offspring'].values).all() fig = plt.figure() plt.title("Population Scaling Comparison") labels = strong_df['offspring'].values y = strong_df['Runtime'].values err = strong_df['Runtime Stddev'].values plt.plot(labels, y, color='blue', label="strong scaling") plt.fill_between(labels, y - err, y+ err, color='blue', alpha=.4) # revisit this line labels = weak_df['offspring'].values y = weak_df['Runtime'].values err = weak_df['Runtime Stddev'].values plt.plot(labels, y, color='red', label="weak scaling") plt.fill_between(labels, y - err, y+ err, color='red', alpha=.4) plt.ylabel("time (s)") plt.xlabel("pop size") plt.legend() plt.ylim(bottom=0) plt.savefig(os.path.join(path, "scaling_time_compare"), bbox_inches='tight') plt.close(fig) fig2 = plt.figure() plt.title("FOM comparison where FOM = pop size / nGPUs / runtime ") labels = strong_df['offspring'].values y = strong_df['FOM'].values plt.plot(labels, y, color='blue', label="strong scaling") plt.ylim(bottom=0) # revisit this line labels = weak_df['offspring'].values y = weak_df['FOM'].values err = weak_df['Runtime Stddev'].values plt.plot(labels, y, color='red', label="weak scaling") plt.ylabel("FOM") plt.xlabel("pop size") plt.legend() plt.savefig(os.path.join(path, "scaling_fom_compare"), bbox_inches='tight') plt.close(fig) def compare_stim_scaling(strong_df, weak_df, path): #f = "runTimeLogs/runTime.log" strong_df = strong_df[strong_df['score functions'] == 20.0] strong_df = strong_df.sort_values(by='stims') fig = plt.figure() plt.title("Stim Scaling Comparison") labels = strong_df['stims'].values y = strong_df['Runtime'].values err = strong_df['Runtime Stddev'].values plt.plot(labels, y, color='blue', label="strong scaling") plt.fill_between(labels, y - err, y+ err, color='blue', alpha=.4) # MONKEY PATCH weak_df =weak_df[~(weak_df['nodes'] > weak_df['stims'])] y = weak_df['Runtime'].values err = weak_df['Runtime Stddev'].values plt.plot(labels, y, color='red', label="weak scaling") plt.fill_between(labels, y - err, y+ err, color='red', alpha=.4) plt.ylabel("time (s)") plt.xlabel("number of stims") plt.legend() plt.ylim(bottom=0) plt.savefig(os.path.join(path, "stim_scaling_time_compare"), bbox_inches='tight') plt.close(fig) def compare_sf_scaling(strong_df, weak_df, path): #f = "runTimeLogs/runTime.log" strong_df = strong_df[strong_df['stims'] == 1.0] strong_df = strong_df[strong_df['offspring'] == 500.0] strong_df = strong_df[strong_df['score functions'] < 71] weak_df = weak_df[~((weak_df['score functions'] == 20.0) & (weak_df['nodes'] == 1.0))] strong_df = strong_df.sort_values(by='score functions') fig = plt.figure() plt.title("Score Function Scaling Comparison") labels = strong_df['score functions'].values y = strong_df['Runtime'].values err = strong_df['Runtime Stddev'].values plt.plot(labels, y, color='blue', label="strong scaling") plt.fill_between(labels, y - err, y+ err, color='blue', alpha=.4) y = weak_df['Runtime'].values err = weak_df['Runtime Stddev'].values plt.plot(labels, y, color='red', label="weak scaling") plt.fill_between(labels, y - err, y+ err, color='red', alpha=.4) plt.ylabel("time (s)") plt.xlabel("# of score functions") plt.legend() plt.ylim(bottom=0) plt.savefig(os.path.join(path, "sf_scaling_time_compare"), bbox_inches='tight') plt.close(fig) def read_exps(exp_names, condition="vanilla", args=None): nodes = [] pops = [] nCpus = [] sfs = [] stims = [] version_dict = {} use_constraint = False for exp_name in exp_names: try: curr_node, curr_core, curr_pop, curr_stims, curr_sfs, version = re.findall(r'\d+', exp_name) # TODO: use version appropriately except ValueError as e: print(exp_name, " doesn't confrom") continue shared_exp_name = exp_name[:-2] if shared_exp_name not in version_dict: version_dict[shared_exp_name] = [version] else: continue # here we can filter to only use relevant experiments # TODO: if one wanted to see scaling in multiple dimensions this will not work # need to expand conditions to allow something like "stims_sfs" if args and args.constraint_file: use_constraint = True constraints = {} with open(args.constraint_file, "r") as f: lines = f.readlines() for line in lines: line = line.replace("\n", "") line = line.split("=") constraints[line[0]] = line[1].split(",") if use_constraint and ((curr_stims not in constraints['n_stims']) or (curr_node not in constraints['N']) \ or (curr_pop not in constraints['offspring']) or (curr_sfs not in constraints['n_sfs']) or (not len( np.where(np.array(constraints['offspring'])==curr_pop)[0]) > 1 and (np.where(np.array(constraints['offspring'])==curr_pop)[0] != np.where(np.array(constraints['N'])==curr_node)[0]).all())): continue elif not use_constraint: if "vanilla" in condition and (curr_stims != default_stims or curr_sfs != default_sfs): continue # MONKEY PATCH IF STATEMENT, ex; if you have a study with 2N 500 pop elif "vanilla" in condition and int(curr_pop) < (int(curr_node) * POP_SCALING_FACTOR): continue elif condition == "stims" and (curr_pop != default_pop or curr_sfs != default_sfs): continue elif condition == "sfs" and (curr_pop != default_pop or curr_stims != default_stims): continue print("consuming ", curr_node, curr_pop, curr_stims, curr_sfs) nodes.append(curr_node), pops.append(curr_pop), nCpus.append(curr_core) sfs.append(curr_sfs), stims.append(curr_stims) max_version_list = [max(version_dict[key]) for key in version_dict] # TODO: fix this monkey patch on which version to use... this always use 0 versino #max_version_list = np.zeros(shape=len(nodes)).astype(int).astype(str) sort_inds = np.argsort(np.array(nodes).astype(int)) if len(nodes)< 1: print(' NO EXPERIMENTS FOUND') print(1/0) if (nodes[0] == np.array(nodes)).all(): sort_inds = np.argsort(np.array(pops).astype(int)) if 'strong' in condition: sort_inds = np.argsort(np.array(pops).astype(int)) # if condition == "vanilla": # sort_inds = np.argsort(np.array(pops).astype(int)) # elif condition == "stims": # sort_inds = np.argsort(np.array(stims).astype(int)) # elif condition == 'sfs': # sort_inds = np.argsort(np.array(sfs).astype(int)) nodes = np.array(nodes)[sort_inds] pops = np.array(pops)[sort_inds] nCpus = np.array(nCpus)[sort_inds] sfs = np.array(sfs)[sort_inds] stims = np.array(stims)[sort_inds] max_version_list = np.zeros(shape=len(nodes)).astype(int).astype(str) max_version_listmax_version_list = np.array(max_version_list)[sort_inds] return nodes, pops, nCpus, sfs, stims, max_version_list def wrapProfileMaker(nCpus,nodes,pops,stims,sfs, versions, path): for idx,(node,pop,nCpu,stim, sf, vers) in enumerate(zip(nodes,pops,nCpus,stims, sfs, versions)): if int(node) > 10: continue makeCustomProfile(node,nCpu,pop,stim ,sf, vers, path) def plot_gpu_pies(df, figname): df = df.drop_duplicates(subset=["nodes", "total gpu", "offspring", "stims", "score functions"]) rows =int(np.sqrt(len(df))) cols = len(df) // rows fig, axs = plt.subplots(nrows=rows, ncols=cols, figsize=(rows*6, cols*2)) for ind, ax in enumerate(axs.flatten()): row = df.iloc[ind] f, _ = format_logname(str(int(row['nodes'])),str(int(row['offspring'])),\ str(int(row['total cpu'])),str(int(row['stims'])), \ str(int(row['score functions'])), how="full", title=figname) #logRes = processLog(f) x = [int(row['gpu_util']), 100-int(row['gpu_util'])] labels=['% of Time on GPU', '% of Time GPU Idle'] ax.pie(x, labels=labels) ax.set_title("{} Nodes, {} GPUs, {} Stims, {} Pop".format(int(row['nodes']),\ int(row['total gpu']),\ int(row['stims']),\ int(row['offspring']))) plt.savefig("outputs/{}_Pie.png".format(figname)) plt.close(fig) def drop_constant(df, preserve_list=[]): res = df.loc[:, (df != df.iloc[0]).any()] if len(preserve_list) > 0: preserved_cols = df.loc[:,preserve_list] dropped = list(preserved_cols.columns) res.loc[:,preserve_list] = preserved_cols cols = list(res) if 'Nodes' in dropped: cols.insert(0, cols.pop(cols.index('Nodes'))) cols.insert(1, cols.pop(cols.index('Offspring'))) elif 'Offspring' in dropped: cols.insert(2, cols.pop(cols.index('Offspring'))) res = res.loc[:, cols] return res def plus_minus_cols(df, main, std, drop=True): df[main] = df[main].astype(str).apply(lambda x: x[:5]) \ + " ± " + df[std].astype(str).apply(lambda x: x[:5]) if drop: df = df.drop(std,axis=1) return df def time_to_50(abstart, start_end_pairs): try: end = start_end_pairs[49][1] res = float(end) - float(abstart) except IndexError: print(f"WARNING only {len(start_end_pairs)} start end pairs") res = 0 return res def calculate_gen_time(start_end_pairs): total = [] num_pairs = len(start_end_pairs) # need to start one deep for idx in range(1,num_pairs): item1 = start_end_pairs[idx][1] item2 = start_end_pairs[idx][0] if len(item1) != 10 or len(item2) != 10: # case where times are not formatted correctly continue # difference between gen start and end can't be less than 1 second if float(start_end_pairs[idx][1]) - float(start_end_pairs[idx][0]) < 1: continue if float(start_end_pairs[idx][1]) == float(start_end_pairs[idx][0]): continue total.append(float(start_end_pairs[idx][1]) - float(start_end_pairs[idx][0])) total = np.array(total) return np.mean(total), np.std(total) def calculate_deap_time(start_end_pairs): total = [] num_pairs = len(start_end_pairs) # need to start one deep for idx in range(1,num_pairs): item1 = start_end_pairs[idx][0] item2 = start_end_pairs[idx-1][1] if len(item1) != 10 or len(item2) != 10: # case where times are not formatted correctly continue if float(item1) - float(item2) < 1 : continue total.append(float(start_end_pairs[idx][0]) - float(start_end_pairs[idx-1][1])) total = np.array(total) if len(total) > 25: print("MONKEY PATCH INCORRECTLY FORMATTED TIMES") total = total[25:] return np.mean(total), np.std(total) def generate_result_table(nCpus,nodes,pops, sfs, stims, versions, path, title=None, how='vanilla'): df = pd.DataFrame(columns=['Nodes','Total Cpus', 'Total Gpus',\ 'Offspring', 'Stimuli', 'Score Functions', \ 'Runtime', 'Runtime Stddev', 'FOM', 'FOM Std Dev','GPU Utilization', 'Mean Eval Time', 'Std Eval Time', 'Mean Sim Time', 'Std Sim Time','DEAP time', 'DEAP Time Std Dev', 'Time to 50 Gen', 'Mean Gen Size', 'Std Gen Size', 'Num Trials']) if title: figname = title + "_scale.tex" df_name = title + "_scale.csv" fn_to_gpu_df = {} for idx,(node,pop,nCpu,stim, sf, vers) in enumerate(zip(nodes,pops,nCpus, stims, sfs, versions)): if not title: f, title = format_logname(node,pop,nCpu,stim, sf, path, how=how, title=title) gpu_logs, num_logs = format_gpu_util_name(node,pop,nCpu,stim, sf, path, how=how, title=title) figname = title + "_scale.tex" df_name = title + "_scale.csv" else: f, _ = format_logname(node,pop,nCpu,stim, sf, path, how=how, title=title) gpu_logs, num_logs = format_gpu_util_name(node,pop,nCpu,stim, sf, path, how=how, title=title) if len(gpu_logs) > 0: fn = gpu_logs[0] try: percent_utilization, gpu_df = read_gpu_logs(fn) fn_to_gpu_df[fn] = gpu_df except Exception as e: # import pdb; pdb.set_trace() # read_gpu_logs(fn) print(e, " error generating GPU result table") percent_utilization = 0 logRes = processLog(f) if 'core_neuron' in os.path.abspath('./'): # if we are in core neuron we get 8 start end pairs on rank 1, but we # only care about the last total_time = time_to_50(logRes['absStart'], logRes['startEndPairs'][::8]) mean_deap, std_deap = calculate_deap_time(logRes['startEndPairs'][::8]) else: total_time = time_to_50(logRes['absStart'], logRes['startEndPairs']) mean_deap, std_deap = calculate_deap_time(logRes['startEndPairs']) mean_runtime, std_runtime = np.mean(logRes['runtimes']), np.std(logRes['runtimes']) mean_eval, std_eval = np.mean(logRes['evalTimes']), np.std(logRes['evalTimes']) mean_neuroGPU, std_neuroGPU = np.mean(logRes['neuroGPUTimes']), np.std(logRes['neuroGPUTimes']) mean_gen_size, std_gen_size = np.mean(logRes['gen_sizes']), np.std(logRes['gen_sizes']) mean_deap = mean_deap - mean_neuroGPU # DEAP runtime consideration covers first simulation round too so we subtract it nGpus = logRes['nGpus'] FOM = int(pop)/(nGpus*int(node))/np.array(logRes['runtimes'], dtype=np.float64) fom_mean = np.mean(FOM) fom_dev = np.std(FOM) num_trials = len(logRes['neuroGPUTimes']) pct_util = float(percent_utilization) if np.isnan(mean_runtime): continue df.loc[idx] = [int(node),int(nCpu)*int(node), nGpus*int(node), \ int(pop),int(stim), int(sf), float(mean_runtime), float(std_runtime), fom_mean, fom_dev, pct_util, mean_eval, std_eval, mean_neuroGPU, std_neuroGPU, mean_deap, std_deap, total_time, mean_gen_size, std_gen_size, num_trials] df = df.sort_values('Nodes', ascending=True) # SAVE CSV df.to_csv(os.path.join(path, df_name)) skip_latex=True if not skip_latex: # SAVE LATEX latex_df = drop_constant(df, preserve_list=['Nodes','Offspring'] ) latex_df = plus_minus_cols(latex_df, main='Runtime',std='Runtime Stddev') latex_df = plus_minus_cols(latex_df, main='FOM',std='FOM Std Dev') latex_df['GPU Utilization'] = latex_df['GPU Utilization'].astype(str).apply(lambda x: x[:4]) + "%" # formaters = {"Runtime": "{:0.2f}".format, "Runtime Stddev": "{:0.4f}".format, "cori fom" : "{:0.2f}".format, "fom std dev" : "{:0.3f}".format, 'gpu_util': "{:0.2f}".format } # df.to_latex(os.path.join(path, figname), formatters=formaters, float_format="%.0f", index=False) col_fmt = "|".join(np.repeat('c', len(df.columns))) col_fmt = "|" + col_fmt + "|" latex_df.to_latex(os.path.join(path, figname), float_format="%.0f", index=False, column_format=col_fmt) else: print("WARNING: skipped latex") print("WARNING: made a bunch of gpu dfs but not doing much with em .. could plot") return df def write_all_files(dest, srcs, overwrite=True): if not overwrite: try: assert srcs[0][-6:-4] == 'SF', f"this should not be a minion log, but it ends in a {srcs[0][-6:-4] } instead of SF.log" except: import pdb; pdb.set_trace() srcs = srcs[1:] open_mode = 'a+' else: open_mode = 'w' with open(dest, open_mode) as outfile: # dest is in src, but gets cleaned out here, good for fname in srcs: with open(fname) as infile: for line in infile: outfile.write(line)
[docs]def check_for_first_merge(log_path, old_log): """ check that this 'old log' is should wipe out and replace log path you only want this if old list is the first log of it's kind to get processed """ listing = sorted(glob.glob(os.path.dirname(log_path) + '*'))[1:] # check all matching folders assert len(listing) > 0, f"only master dir exists for {log_path}" matching = os.path.basename(listing[0]) == os.path.basename(old_log).replace('.log', '') return matching
def merge_experiments(src, dest, version, path): src_path = os.path.join(path,src) curr_node, curr_core, curr_pop, curr_stims, curr_sfs, version = re.findall(r'\d+', src) # TODO: use version appropriately prof_name = "{}N_{}C_{}O_{}S_{}SF.prof".format(curr_node, curr_core, curr_pop, curr_stims, curr_sfs) new_prof_name = "{}N_{}C_{}O_{}S_{}SF.{}.prof".format(curr_node, curr_core, curr_pop, curr_stims, curr_sfs, version) # move profile if not os.path.isdir(dest): os.makedirs(dest) try: shutil.copy(os.path.join(src_path,prof_name), os.path.join(dest,new_prof_name)) except FileNotFoundError: print("no profile for ", src_path) # move gpu util log gpu_util_logname = "gpu_utillization.log" new_gpu_util_logname = "gpu_utillization.{}.log".format(version) try: shutil.copy(os.path.join(src_path,gpu_util_logname), os.path.join(dest,new_gpu_util_logname)) except FileNotFoundError: print("no gpu util for ", src_path) # comebine regular log log_path = os.path.join(dest, "{}N_{}C_{}O_{}S_{}SF.log".format(curr_node, curr_core, curr_pop, curr_stims, curr_sfs, version)) old_log = os.path.join(src_path, "{}N_{}C_{}O_{}S_{}SF_{}.log".format(curr_node, curr_core, curr_pop, curr_stims, curr_sfs, version)) first_merge = check_for_first_merge(log_path, old_log) if os.path.isfile(log_path) and os.path.isfile(old_log) and first_merge: # you've already processed folder write_all_files(log_path, [log_path, old_log]) elif os.path.isfile(log_path) and os.path.isfile(old_log): write_all_files(log_path, [log_path, old_log], overwrite=False) elif os.path.isfile(old_log): # you haven't processed folder #assert int(version) == 1, "no master log and version is not 1?" write_all_files(log_path, [old_log]) else: print("WARNING: No log merge for ", log_path) print("not deleting ... could be though") print(src_path, log_path) # print(1/0) # shutil.rmtree(src_path) def collapse_exps(exp_names, path): nodes = [] pops = [] nCpus = [] stims = [] sfs = [] exp_names = sorted(exp_names) for exp_name in exp_names: try: curr_node, curr_core, curr_pop, curr_stims, curr_sfs, version = re.findall(r'\d+', exp_name) # TODO: use version appropriately except ValueError as e: print(exp_name, " doesn't confrom") continue nodes.append(curr_node), pops.append(curr_pop), nCpus.append(curr_core) sfs.append(curr_sfs), stims.append(curr_stims) agg_exp_path = os.path.join(path,"{}N_{}C_{}O_{}S_{}SF".format(curr_node, curr_core, curr_pop, curr_stims, curr_sfs)) if int(version) == 0 or not os.path.isdir(agg_exp_path): if os.path.isdir(agg_exp_path): shutil.rmtree(agg_exp_path) shutil.copytree(os.path.join(path,exp_name), agg_exp_path) log_name = [file for file in os.listdir(agg_exp_path) if ".log" in file and "gpu" not in file] try: os.rename(os.path.join(agg_exp_path,log_name[0]), os.path.join(agg_exp_path,log_name[0][:-6] + log_name[0][-4:] )) print(log_name) except: print("no log for : ", agg_exp_path) print(os.path.dirname(agg_exp_path)) # shutil.rmtree(os.path.dirname(agg_exp_path) continue else: merge_experiments(exp_name, agg_exp_path, version, path ) return def sf_plot_strategy(exp_names, args, collapse=False): if collapse: collapse_exps(exp_names, args.path) nodes, pops, nCpus, sfs, stims, versions = read_exps(exp_names, condition='sfs', args=args) plt.title("Population Size Scaling w. Nodes") # step 1 plotScaling(nCpus,nodes,pops, sfs, stims, versions, args.path, how='sfs') # step 2 #wrapProfileMaker(nCpus, nodes, pops, versions) # step 3 plot_CPUGPU_bottleneck(nCpus,nodes,pops, sfs, stims, versions, args.path, how='sfs') # step 4 generate_result_table(nCpus,nodes,pops, sfs, stims, versions, args.path, how='sfs') def stim_plot_strategy(exp_names, args, collapse=False): if collapse: collapse_exps(exp_names, args.path) nodes, pops, nCpus, sfs, stims, versions = read_exps(exp_names, condition='stims', args=args) plt.title("Population Size Scaling w. Nodes") # step 1 how ='vanilla' if args.constraint_file: how = 'full' plotScaling(nCpus,nodes,pops, sfs, stims, versions, args.path, how=how) # step 2 #wrapProfileMaker(nCpus, nodes, pops, versions) # step 3 plot_CPUGPU_bottleneck(nCpus,nodes,pops, sfs, stims, versions, args.path, how=how) # step 4 generate_result_table(nCpus,nodes,pops, sfs, stims, versions, args.path, how=how) def vanilla_plot_strategy(exp_names, args, collapse=False): if collapse: collapse_exps(exp_names,args.path) print("NOT COLLAPSING CHANGE L8R") nodes, pops, nCpus, sfs, stims, versions = read_exps(exp_names, args=args) plt.title("Population Size Scaling w. Nodes") set_custom_params_plt() print("CUSTOMING PARAMS") # step 1 how ='vanilla' title = None figname='population' if args.constraint_file: how = 'full' title = os.path.basename(args.constraint_file) figname = os.path.basename(args.constraint_file) plotScaling(nCpus,nodes,pops, sfs, stims, versions, args.path, how=how, title=title) # step 2 #wrapProfileMaker(nCpus, nodes, pops, stims, sfs, versions, args.path) # step 3 plot_CPUGPU_bottleneck(nCpus,nodes,pops, sfs, stims, versions, args.path, how=how, title=title) # step 4 df = generate_result_table(nCpus,nodes,pops, sfs, stims, versions, args.path, how=how, title=title) restore_default_mpl_params() # step 5 #plot_gpu_pies(df,figname) def strong_plot_strategy(exp_names, args, collapse=False): if collapse: collapse_exps(exp_names, args.path) how = "strong_vanilla" weak_name = 'pop_scale.csv' if args.stims: how = "strong_stims" weak_name= "stim_scale.csv" elif args.sfs: how = "strong_sfs" weak_name = 'sf_scale.csv' nodes, pops, nCpus, sfs, stims, versions = read_exps(exp_names, condition=how, args=args) plt.title("Population Size Scaling w. Nodes") # step 4 strong_df = generate_result_table(nCpus,nodes,pops, sfs, stims, versions, args.path, how=how) weak_df = pd.read_csv(os.path.join("weak_outputs",weak_name)) if not args.stims and not args.sfs: compare_scaling(strong_df, weak_df) elif args.stims: compare_stim_scaling(strong_df, weak_df) elif args.sfs: compare_sf_scaling(strong_df, weak_df) def check_collapse(exp_names, path): for exp_name in exp_names: if os.path.isfile(os.path.join(path, exp_name)): continue if not os.path.isdir(os.path.join(path, exp_name.split("SF")[0] + "SF")): return True return False def find_largest_std(exp_names, args): stds = {} nodes, pops, nCpus, sfs, stims, versions = read_exps(exp_names, condition="permissive") for node, pop, nCpu, sf, stim, version in zip(nodes, pops, nCpus, sfs, stims, versions): f_list = format_logname_general(node, pop, nCpu, stim, sf, args.path, 'vanilla') logRes = None for f in f_list: try: logRes = processLog(f) break except: continue if not logRes: continue # shutil.copyfile(f, prev_f) mean_runtime, std_runtime = np.mean(logRes['runtimes']), np.std(logRes['runtimes']) # ignore an obvious case where algorithm glitched for some reason and took 300 seconds or something if node == '1' and std_runtime > 20: continue if node in stds: stds[node].append(std_runtime) else: stds[node] = [std_runtime] stds_keys = list(stds.keys()) for idx, key in enumerate(stds_keys): if np.isnan(np.mean(stds[key])) or not np.mean(stds[key]): stds[key] = stds[stds_keys[idx-1]] # print( np.mean(stds[key]), "STD KEY") with open("std_dev_backup.pkl",'wb') as f: pickle.dump(stds,f) exit() if __name__ == "__main__": parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description='Benchmarking viz') parser.add_argument('--stims', action="store_true") parser.add_argument('--sfs', action="store_true") parser.add_argument('--strong', action="store_true") parser.add_argument('--constraint_file', type=str, required=False, default=None) parser.add_argument('--path', type=str, required=False, default="outputs") args = parser.parse_args() exp_names = [dirname for dirname in os.listdir(args.path) if "_" in dirname and "ipynb" not in dirname] # make this more strict later --> should match coresnodes_POPSIZE_iteration # find_largest_std(exp_names, args) collapse = check_collapse(exp_names, args.path) collapse = False # if args.path != 'outputs': # collapse = False print(collapse, "SHOULD I COLLAPSE ?? IM SPITTING THESE RAPS TIL THE DAY THAT I DROP") if args.stims: stim_plot_strategy(exp_names, args, collapse=collapse) elif args.sfs: sf_plot_strategy(exp_names, args, collapse=collapse) else: vanilla_plot_strategy(exp_names, args, collapse=collapse) if args.strong: strong_plot_strategy(exp_names, args, collapse=collapse)