-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathinteract
More file actions
executable file
·127 lines (112 loc) · 5.01 KB
/
Copy pathinteract
File metadata and controls
executable file
·127 lines (112 loc) · 5.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/bin/bash -l
# Script to start interactive job on ARC systems
# Define color
RED='\033[0;31m'
YELLOW='\033[0;33m'
NC='\033[0m' # No Color
# User-provided arguments
original_args="$@"
# We'll add these
extra_args="--job-name='INTERACT'"
# System-specific defaults
case "${SYSNAME}" in
tinkercliffs)
def_partition="normal_q"
def_gres=""
;;
owl)
def_partition="normal_q"
def_gres=""
;;
falcon)
def_partition="v100_normal_q,t4_normal_q"
def_gres="gpu:1"
;;
*)
echo "SYSNAME '${SYSNAME}' not recognized. Exiting."
exit 1
;;
esac
# Print help if requested or if no argument is provided
if [[ " $original_args" == *" --help"* || $# -eq 0 ]]; then
echo "Request an interactive job"
echo "Usage: interact [OPTIONS]"
echo "where OPTIONS are any valid Slurm (sbatch/salloc/srun) options, or:"
echo " --verbose Print the Slurm command being run before starting job (for debugging purposes)"
echo " --help Print this message"
echo
echo "To run, an account should be provided with -A [account] or --account=[account]. See 'quota' for a list of valid accounts and utilization metrics."
exit
fi
# Add verbosity if requested
[[ " $original_args" == *" --verbose"* ]] && verbose=1 || verbose=0
# If account not specified by user
if [[ ! " $original_args" == *" -A"* && ! " $original_args" == *" --account"* ]]; then
echo "An account must be specified with -A or --account. A list of valid allocation accounts (and remaining balances) can be found with the command 'quota'."
exit 1
fi
# If partition not specified by user
if [[ ! " $original_args" == *" -p"* && ! " $original_args" == *" --partition"* ]]; then
echo -e "${YELLOW} --- No partition specified:${NC} setting --partition=$def_partition"
extra_args="--partition=$def_partition $extra_args"
fi
# If gres not specified by user (and def_gres not empty)
if [[ $original_args != *"--gres=gpu"* && \
$original_args != *"--gres gpu"* && \
$original_args != *"--gpus"* && \
! -z $def_gres ]]; then
echo -e "${YELLOW} --- No gres specified:${NC} setting --gres=$def_gres"
extra_args="--gres=$def_gres $extra_args"
fi
# Check if a GPU is requested on a non-GPU partition
if [[ $original_args =~ "--gres=gpu" || $original_args =~ "--gres gpu" || $original_args =~ "--gpus" ]]; then
# Check if a non-GPU partition is specified using regex to catch -p, --partition, spaces, and equals signs
if [[ $original_args =~ (-p|--partition)[[:space:]=]*(normal_q|preemptable_q) ]]; then
echo -e "${RED} --- Invalid Request:${NC} You cannot request GPUs on a non-GPU partition (normal_q or preemptable_q)."
exit 1
fi
fi
# Resolve the effective partition for QoS mapping & if the user passed -p or --partition, extract it from their args.
if [[ ! " $original_args" == *" -p"* && ! " $original_args" == *" --partition"* ]]; then
effective_partition="$def_partition"
else
effective_partition="$(echo "$original_args" | grep -oP '(?<=-p |--partition=|--partition )\S+')"
fi
# If QoS not specified by user, map the effective partition to its interactive QoS and inject it.
# Todo: (add mapping for new nodes & partitions)
if [[ ! " $original_args" == *" --qos"* ]]; then
case "${SYSNAME}:${effective_partition}" in
tinkercliffs:normal_q) def_qos="tc_normal_int" ;;
tinkercliffs:a100_normal_q) def_qos="tc_a100_normal_int" ;;
tinkercliffs:h200_normal_q) def_qos="tc_h200_normal_int" ;;
owl:normal_q) def_qos="owl_normal_int" ;;
falcon:v100_normal_q,t4_normal_q) def_qos="fal_v100_normal_int" ;;
falcon:v100_normal_q) def_qos="fal_v100_normal_int" ;;
falcon:t4_normal_q) def_qos="fal_t4_normal_int" ;;
falcon:a30_normal_q) def_qos="fal_a30_normal_int" ;;
falcon:l40s_normal_q) def_qos="fal_l40s_normal_int" ;;
*)
echo -e "${YELLOW} --- Warning:${NC} No interactive QoS mapping for ${SYSNAME}:${effective_partition}. Proceeding without --qos."
def_qos=""
;;
esac
if [[ -n "$def_qos" ]]; then
#uncomment to debug QoS mapping
# echo -e "${YELLOW} --- No QoS specified:${NC} setting --qos=$def_qos"
extra_args="--qos=$def_qos $extra_args"
fi
fi
# Command to be run
# Later arguments appear to be given priority so add user-provided
# arguments second on the off chance of conflicts
cmd="srun $extra_args $original_args --pty $SHELL"
# Print the command being run if user requested verbose mode
[[ $verbose -gt 0 ]] && echo "Running: $cmd"
# Print the warning message in red
echo -e "${RED} --- Warning:${NC}"
echo " Your session consumes resources (CPUs, memory, and GPUs) while it remains open."
echo " Close your session whenever you finish your work."
echo " Other users cannot use the resources allocated to your job until you close your session."
echo " Consider the use of batch jobs to optimize resources allocation."
# Run
$cmd