From cb2257f1e31a044b712d5d1a98a16dbea56c3f6e Mon Sep 17 00:00:00 2001 From: Lydia Brothers Date: Mon, 18 Jul 2016 13:26:10 -0400 Subject: [PATCH 1/4] enabling task core specification --- awe/workqueue.py | 5 ++++- scripts/awe-wq | 8 +++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/awe/workqueue.py b/awe/workqueue.py index ae7021b..744351c 100644 --- a/awe/workqueue.py +++ b/awe/workqueue.py @@ -181,9 +181,11 @@ def __init__(self): self.waittime = 10 # in seconds self.wq_logfile = 'debug/wq.log' self.wqstats_logfile = 'debug/wq-stats.log' - self.monitor = False + self.wqtransactions_logfile = 'debug/wq-transactions.log' + self.monitor = True self.summaryfile = '' self.capacity = False + self.cores = 1 self.task_config = { "cores": 1, } @@ -278,6 +280,7 @@ def _mk_wq(self): # Turn cctools WorkQueue object status monitoring on or off if self.monitor: wq.enable_monitoring(self.summaryfile) + wq.specify_transactions_log(self.wqtransactions_logfile) if self.capacity: # Determine the number of workers the WorkQueue object can handle diff --git a/scripts/awe-wq b/scripts/awe-wq index c27fecc..9b0b236 100755 --- a/scripts/awe-wq +++ b/scripts/awe-wq @@ -46,8 +46,9 @@ def getopts(args=None): g.add_option('-p', '--port', help='Port to run the master on (random|) [%default]') g.add_option('--fast-abort', type=float, help='Use this fastabort multiplier [%default]') g.add_option('--debug', action='store_true', help='Write WorkQueue debug information [%default]') - g.add_option('--cores', type=int, help='Use this to specify number of cores for each worker [%default]') - + g.add_option('--worker-cores', type=int, help='Use this to specify number of cores for each worker [%default]') + g.add_option('--task-cores', type=int, help='Use this to specify number of cores for each task [%default]') + p.add_option_group(g) p.set_defaults( @@ -118,7 +119,8 @@ def config(opts): if opts.debug: cfg.debug = opts.debug - cfg.cores = opts.cores + cfg.cores = opts.worker_cores + cfg.task_input["cores"] = opts.task_cores # the "main" function of the worker cfg.execute(os.path.join(INSTANCE_ROOT, 'execute-task.sh')) From 4aba9d75e4b7ad036698e75859af97e14ea35df5 Mon Sep 17 00:00:00 2001 From: Lydia Brothers Date: Wed, 20 Jul 2016 10:48:28 -0400 Subject: [PATCH 2/4] Added functionality to monitor resources and specify task core usage --- awe/workqueue.py | 4 ++-- scripts/awe-verify | 2 +- scripts/awe-wq | 7 ++++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/awe/workqueue.py b/awe/workqueue.py index 744351c..3a4af52 100644 --- a/awe/workqueue.py +++ b/awe/workqueue.py @@ -183,9 +183,9 @@ def __init__(self): self.wqstats_logfile = 'debug/wq-stats.log' self.wqtransactions_logfile = 'debug/wq-transactions.log' self.monitor = True - self.summaryfile = '' + self.summaryfile = 'debug/monitor-summaries' self.capacity = False - self.cores = 1 + self.cores = 1 self.task_config = { "cores": 1, } diff --git a/scripts/awe-verify b/scripts/awe-verify index 28b824a..1f63725 100755 --- a/scripts/awe-verify +++ b/scripts/awe-verify @@ -3,7 +3,7 @@ EXITCODE=0 -ACCEPTABLE_PYTHON_VERSION=(2.6 2.7 3.4) +ACCEPTABLE_PYTHON_VERSION=(2.6 2.7 3.4 3.5) ACCEPTABLE_GROMACS_VERSION=4.5 EXECUTABLES=( diff --git a/scripts/awe-wq b/scripts/awe-wq index 9b0b236..396d5e1 100755 --- a/scripts/awe-wq +++ b/scripts/awe-wq @@ -71,7 +71,7 @@ def getopts(args=None): port = 'random', fast_abort = False, debug = False, - cores = 1, + cores = 1, ) @@ -120,7 +120,7 @@ def config(opts): cfg.debug = opts.debug cfg.cores = opts.worker_cores - cfg.task_input["cores"] = opts.task_cores + cfg.task_config["cores"] = opts.task_cores # the "main" function of the worker cfg.execute(os.path.join(INSTANCE_ROOT, 'execute-task.sh')) @@ -190,7 +190,8 @@ def main(opts): system = system, iterations = opts.iterations, resample = resampler, - checkpointfreq = 1 + checkpointfreq = 1, + log_it = True, ) resampler.traxlogger._picklemode = 2 resampler.traxlogger._pickleprotocol = 2 From afe38d2a4004988636499e26b70654d64f30d824 Mon Sep 17 00:00:00 2001 From: Lydia Brothers Date: Fri, 29 Jul 2016 13:52:11 -0400 Subject: [PATCH 3/4] Updated visualization tools for wq-stats.log for v.5.4.13, added functionality to specify task disk and memory size --- awe-instance-data/openmm/env.sh | 9 +- awe-instance-data/openmm/simulation.py | 173 +++++++++++++++++++++++++ awe/workqueue.py | 8 +- scripts/awe-wq | 17 ++- visualization/awe_plot_history | 25 +++- 5 files changed, 216 insertions(+), 16 deletions(-) diff --git a/awe-instance-data/openmm/env.sh b/awe-instance-data/openmm/env.sh index 00ecc44..3301ea2 100644 --- a/awe-instance-data/openmm/env.sh +++ b/awe-instance-data/openmm/env.sh @@ -46,15 +46,12 @@ check-initial() { run-md() { puts "Running simulation" - echo $GMXLIB - python simulate.py + python simulate.py -s 100000 echo } assign() { - puts "Assigning trajectory" - ./awe-assign cells.dat CellIndices.dat traj.xtc StructureIndices.dat $ASSIGNMENT - echo + echo "Running assignment" } check-result() { @@ -74,7 +71,7 @@ package() { cleanup() { puts "Cleaning up" - rm -rv $CLEANUP + #rm -rv $CLEANUP echo } diff --git a/awe-instance-data/openmm/simulation.py b/awe-instance-data/openmm/simulation.py index e69de29..a5c6c44 100644 --- a/awe-instance-data/openmm/simulation.py +++ b/awe-instance-data/openmm/simulation.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 + +########################################################################## +# this script was generated by openmm-builder. to customize it further, +# you can save the file to disk and edit it with your favorite editor. +########################################################################## + +from __future__ import print_function +from simtk.openmm import app +import simtk.openmm as mm +from simtk import unit +from sys import stdout +import numpy as np +import argparse +import mdtraj +import time + +np.set_printoptions(threshold=np.nan) + +def parse_args(args): + parser = argparse.ArgumentParser() + + parser.add_argument("-c", "--checkpoint", + help="path to an OpenMM checkpoint", + type=str + ) + + parser.add_argument("-s", "--steps", + help="the number of integration steps", + type=int, + default=10000 + ) + + parser.add_argument("-p", "--platform", + help="the OpenMM Platform to use in this run", + type=str, + default="CPU", + choices=["Reference", "CPU", "OpenCL", "CUDA"] + ) + + parser.add_argument("-i", "--input-pdb", + help="path to the input trajectory file", + type=str, + default="structure.pdb" + ) + + parser.add_argument("-o", "--output", + help="path to trajectory output file", + type=str, + default="structure2.pdb", + ) + + parser.add_argument("-a", "--atom-select", + help="MDTraj-compatible atom selection string for computing RMSD", + type=str, + default="all" + ) + + parser.add_argument("--cell-defs", + help="path to the cell definition file", + type=str, + default="cells.dat", + ) + + parser.add_argument("--assignment-out", + help="path to the file to write the cell assignment", + type=str, + default="cell2.dat" + ) + + return parser.parse_args() + + +def read_cells(infile): + cells = [] + ncells = 0 + natoms = 0 + ndims = 0 + with open(infile, 'r') as f: + for line in f: + try: + if "ncells:" in line or "ncoords:" in line or "ndims:" in line: + entries = line.strip().split() + if entries[0] == "ncells:": + ncells = int(entries[1]) + elif entries[0] == "ncoords:": + natoms = int(entries[1]) + else: + ndims = int(entries[1]) + else: + cells.append(float(line)) + except ValueError: + continue + print(ncells, natoms, ndims) + return np.reshape(np.array(cells), (ncells, natoms, ndims)) + + +def determine_assignment(pdb_file, cell_file, atom_select, assignment_out): + cells = read_cells(cell_file) + min_rmsd = float('inf') + assignment = -1 + traj = mdtraj.load(pdb_file) + cell_traj = mdtraj.load(pdb_file) + rmsd_atoms = traj.topology.select(atom_select) + print(rmsd_atoms) + for i in range(0, len(cells)): + cell_traj.xyz = cells[i] + rmsd = mdtraj.rmsd(traj, + cell_traj, + atom_indices=rmsd_atoms, + ) + print(rmsd) + if rmsd < min_rmsd: + min_rmsd = rmsd + assignment = i + with open(assignment_out, 'w') as f: + f.write(str(assignment)) + + + +if __name__ == "__main__": + args = parse_args(None) + print(args) + print("Loading pdb...") + pdb = app.PDBFile(args.input_pdb) + print(pdb.positions) + print(len(pdb.positions)) + print("Loading force field...") + forcefield = app.ForceField('amber99sb.xml', 'amber99_obc.xml') + + print("Creating system...") + system = forcefield.createSystem(pdb.topology, nonbondedMethod=app.NoCutoff, + constraints=None) + print("Creating integrator...") + integrator = mm.LangevinIntegrator(300*unit.kelvin, 1/unit.picoseconds, + 1.0*unit.femtoseconds) + + print("Creating platform...") + try: + platform = mm.Platform.getPlatformByName(args.platform) + except Exception: + print("Error: could not load platform %s. Loading Reference platform" % (args.platform)) + platform = mm.Platform.getPlatformByName("Reference") + print("Creating simulation object...") + simulation = app.Simulation(pdb.topology, system, integrator, platform) + + simulation.context.setPositions(pdb.positions) + + print("Minimizing...") + #simulation.minimizeEnergy() + + print("Equilibrating...") + #simulation.step(100) + + print("Creating PDB reporter...") + simulation.reporters.append(app.PDBReporter(args.output, args.steps)) + + print("Creating checkpointer...") + simulation.reporters.append(app.CheckpointReporter("ckpt.chk", 100)) + + print('Running Production...') + #steps = 0 + #interval = 100 + #while steps < args.steps: + simulation.step(args.steps + 1) + #steps += interval + + print("Determining cell assignment...") + + time.sleep(5) + determine_assignment(args.output, args.cell_defs, args.atom_select, args.assignment_out) + + print("Done!") diff --git a/awe/workqueue.py b/awe/workqueue.py index c6fe5be..67a6e72 100644 --- a/awe/workqueue.py +++ b/awe/workqueue.py @@ -185,9 +185,11 @@ def __init__(self): self.monitor = True self.summaryfile = 'debug/monitor-summaries' self.capacity = False - self.cores = 1 + #self.cores = 1 self.task_config = { "cores": 1, + "memory": 250, + "disk": 250, } self._executable = None self._cache = set() @@ -305,7 +307,7 @@ def _mk_wq(self): awe.util.makedirs_parent(self.wqstats_logfile) _AWE_WORK_QUEUE.specify_log(self.wqstats_logfile) - # Return a reference to teh singleton + # Return a reference to the singleton return _AWE_WORK_QUEUE @@ -658,6 +660,8 @@ def new_task(self): cmd = self.cfg.executable.remotepath task = WQ.Task('./' + cmd) task.specify_cores(self.cfg.task_config["cores"]) + task.specify_memory(self.cfg.task_config["memory"]) + task.specify_disk(self.cfg.task_config["disk"]) ### executable self.cfg.executable.add_to_task(task) diff --git a/scripts/awe-wq b/scripts/awe-wq index 85c9237..e74501f 100755 --- a/scripts/awe-wq +++ b/scripts/awe-wq @@ -46,8 +46,10 @@ def getopts(args=None): g.add_option('-p', '--port', help='Port to run the master on (random|) [%default]') g.add_option('--fast-abort', type=float, help='Use this fastabort multiplier [%default]') g.add_option('--debug', action='store_true', help='Write WorkQueue debug information [%default]') - g.add_option('--worker-cores', type=int, help='Use this to specify number of cores for each worker [%default]') + #g.add_option('--worker-cores', type=int, help='Use this to specify number of cores for each worker [%default]') g.add_option('--task-cores', type=int, help='Use this to specify number of cores for each task [%default]') + g.add_option('--task-disk', type=int, help='Use this to specify disk usage for each task [%default]') + g.add_option('--task-memory', type=int, help='Use this to specify memory usage for each task [%default]') p.add_option_group(g) @@ -76,12 +78,17 @@ def getopts(args=None): name = None, port = 'random', fast_abort = False, - debug = False, + debug = True, ### OpenMM params enable_openmm = False, + openmm_script = os.path.join('awe-instance-data', 'openmm', 'simulation.py'), - cores = 1, + ### Cores params + #worker_cores = 1, + task_cores = 1, + task_disk = 250, + task_memory = 250, ) @@ -129,8 +136,10 @@ def config(opts): if opts.debug: cfg.debug = opts.debug - cfg.cores = opts.worker_cores + #cfg.cores = opts.worker_cores cfg.task_config["cores"] = opts.task_cores + cfg.task_config["disk"] = opts.task_disk + cfg.task_config["memory"] = opts.task_memory # the "main" function of the worker cfg.execute(os.path.join(INSTANCE_ROOT, 'execute-task.sh')) diff --git a/visualization/awe_plot_history b/visualization/awe_plot_history index 404007a..9fafa32 100755 --- a/visualization/awe_plot_history +++ b/visualization/awe_plot_history @@ -35,8 +35,8 @@ cat > index.html << EOF - - + + @@ -78,6 +78,8 @@ set title "Total Workers Utilized in AWE-WQ Run" set xlabel "Timestamp" set ylabel "Worker Info" + +#Use below command for deprecated wq-stats and cctools 5.4.13 wq-stats plot wq_stats using (\$1/1000000):4 title 'workers_idle' with lines ls 1, wq_stats using (\$1/1000000):5 title 'workers_busy' with lines ls 2, wq_stats using (\$1/1000000):2 title "total workers connected" with lines ls 3 #----------------------------------------- @@ -87,7 +89,10 @@ set title "Total Task Utilized in AWE-WQ Run" set xlabel "Timestamp" set ylabel "Task Info" -plot wq_stats using (\$1/1000000):8 title 'tasks_waiting' with lines ls 1, wq_stats using (\$1/1000000):9 title 'tasks_running' with lines ls 2, wq_stats using (\$1/1000000):12 title 'total_tasks_complete' with lines ls 3 +plot wq_stats using (\$1/1000000):14 title 'tasks_waiting' with lines ls 1, wq_stats using (\$1/1000000):16 title 'tasks_running' with lines ls 2, wq_stats using (\$1/1000000):20 title 'total_tasks_complete' with lines ls 3 + +#Use below command for deprecated wq-stats (before v.5.4) +#plot wq_stats using (\$1/1000000):8 title 'tasks_waiting' with lines ls 1, wq_stats using (\$1/1000000):9 title 'tasks_running' with lines ls 2, wq_stats using (\$1/1000000):12 title 'total_tasks_complete' with lines ls 3 #----------------------------------------- @@ -96,8 +101,10 @@ set title "Network Transfer Information for AWE-WQ Run" set xlabel "Timestamp" set ylabel "Transfer Info (In Bytes)" +plot wq_stats using (\$1/1000000):36 title 'bytes_sent' with lines ls 1, wq_stats using (\$1/1000000):37 title 'bytes_received' with lines ls 2 -plot wq_stats using (\$1/1000000):17 title 'bytes_sent' with lines ls 1, wq_stats using (\$1/1000000):18 title 'bytes_received' with lines ls 2 +#Use below command for deprecated wq-stats (before v.5.4) +#plot wq_stats using (\$1/1000000):17 title 'bytes_sent' with lines ls 1, wq_stats using (\$1/1000000):18 title 'bytes_received' with lines ls 2 #----------------------------------------- @@ -113,7 +120,17 @@ plot wq_stats using (\$1/1000000):17 title 'bytes_sent' with lines ls 1, wq_stat set boxwidth 0.8 plot task_execution using 0:1 with boxes title "cmd_execution_time" + +#-------------------------------------------- + set output timestamp_path."cores_capacity.png" + set title "capacity vs. connected workers" + set xlabel "Timestamp" + set ylabel "Cores Capacity Information" + + #below command for newer wq-stats, cctools v.5.4 + plot wq_stats using (\$1/1000000):40 title "capacity_cores" with lines ls 1, wq_stats using (\$1/1000000):2 title "workers_connected" with lines ls 3, wq_stats using (\$1/1000000):46 title "committed_cores" with lines ls 2, wq_stats using (\$1/ 1000000):43 title "total_cores" with lines ls 4 + #----------------------------------------- set title "Task Execution Time Frequency Plot" From 7e96afdc230843b5d63ccc73996538e61971dc4e Mon Sep 17 00:00:00 2001 From: Lydia Brothers Date: Fri, 29 Jul 2016 14:03:43 -0400 Subject: [PATCH 4/4] cleaned up some visualization graphs with specification for graphing deprecated wq-stats and newer (5.4) wq-stats. also added task memory and disk specification --- awe/workqueue.py | 1 - scripts/awe-wq | 3 --- visualization/awe_plot_history | 1 + 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/awe/workqueue.py b/awe/workqueue.py index 67a6e72..c8bc1c9 100644 --- a/awe/workqueue.py +++ b/awe/workqueue.py @@ -185,7 +185,6 @@ def __init__(self): self.monitor = True self.summaryfile = 'debug/monitor-summaries' self.capacity = False - #self.cores = 1 self.task_config = { "cores": 1, "memory": 250, diff --git a/scripts/awe-wq b/scripts/awe-wq index e74501f..f240af8 100755 --- a/scripts/awe-wq +++ b/scripts/awe-wq @@ -46,7 +46,6 @@ def getopts(args=None): g.add_option('-p', '--port', help='Port to run the master on (random|) [%default]') g.add_option('--fast-abort', type=float, help='Use this fastabort multiplier [%default]') g.add_option('--debug', action='store_true', help='Write WorkQueue debug information [%default]') - #g.add_option('--worker-cores', type=int, help='Use this to specify number of cores for each worker [%default]') g.add_option('--task-cores', type=int, help='Use this to specify number of cores for each task [%default]') g.add_option('--task-disk', type=int, help='Use this to specify disk usage for each task [%default]') g.add_option('--task-memory', type=int, help='Use this to specify memory usage for each task [%default]') @@ -85,7 +84,6 @@ def getopts(args=None): openmm_script = os.path.join('awe-instance-data', 'openmm', 'simulation.py'), ### Cores params - #worker_cores = 1, task_cores = 1, task_disk = 250, task_memory = 250, @@ -136,7 +134,6 @@ def config(opts): if opts.debug: cfg.debug = opts.debug - #cfg.cores = opts.worker_cores cfg.task_config["cores"] = opts.task_cores cfg.task_config["disk"] = opts.task_disk cfg.task_config["memory"] = opts.task_memory diff --git a/visualization/awe_plot_history b/visualization/awe_plot_history index 9fafa32..47abce0 100755 --- a/visualization/awe_plot_history +++ b/visualization/awe_plot_history @@ -64,6 +64,7 @@ set format x "%H:%M" set style line 1 lc rgb '#0060ad' lt 1 lw 1 # --- blue set style line 2 lc rgb '#dd181f' lt 1 lw 1 # --- red set style line 3 lc rgb '#33cc33' lt 1 lw 2 # --- green +set style line 4 lc rgb '#ba58f9' lt 1 lw 1 # --- violet #First path indicates directory where log file exists,by default we assume it is in same directory as this file #Second path is used to specify a different directory for the resulting .png files to be placed (instead of working directory), e.g."./debug/"