diff --git a/Cargo.lock b/Cargo.lock index fd28509..eec5226 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -156,6 +156,8 @@ dependencies = [ "libc", "log", "nix", + "pest", + "pest_derive", "rand", "rand_distr", "serde", @@ -800,20 +802,19 @@ checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd" [[package]] name = "pest" -version = "2.7.8" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f8023d0fb78c8e03784ea1c7f3fa36e68a723138990b8d5a47d916b651e7a8" +checksum = "e0848c601009d37dfa3430c4666e147e49cdcf1b92ecd3e63657d8a5f19da662" dependencies = [ "memchr", - "thiserror 1.0.58", "ucd-trie", ] [[package]] name = "pest_derive" -version = "2.7.8" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0d24f72393fd16ab6ac5738bc33cdb6a9aa73f8b902e8fe29cf4e67d7dd1026" +checksum = "11f486f1ea21e6c10ed15d5a7c77165d0ee443402f0780849d1768e7d9d6fe77" dependencies = [ "pest", "pest_generator", @@ -821,9 +822,9 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.7.8" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdc17e2a6c7d0a492f0158d7a4bd66cc17280308bbaff78d5bef566dca35ab80" +checksum = "8040c4647b13b210a963c1ed407c1ff4fdfa01c31d6d2a098218702e6664f94f" dependencies = [ "pest", "pest_meta", @@ -834,11 +835,10 @@ dependencies = [ [[package]] name = "pest_meta" -version = "2.7.8" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "934cd7631c050f4674352a6e835d5f6711ffbfb9345c2fc0107155ac495ae293" +checksum = "89815c69d36021a140146f26659a81d6c2afa33d216d736dd4be5381a7362220" dependencies = [ - "once_cell", "pest", "sha2", ] diff --git a/Cargo.toml b/Cargo.toml index c6b81a7..982430a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,3 +26,5 @@ aya-obj = "0.2.1" caps = "0.5.5" io-uring = "0.7.10" enum_dispatch = "0.3.13" +pest = "2.8.1" +pest_derive = "2.8.1" diff --git a/src/script/ast.rs b/src/script/ast.rs new file mode 100644 index 0000000..8500718 --- /dev/null +++ b/src/script/ast.rs @@ -0,0 +1,46 @@ +use std::collections::HashMap; + +#[derive(Debug, Clone, PartialEq)] +pub enum Arg { + /// Simple constant + Const { text: String }, + + /// Variable available at runtime + Var { name: String }, + + /// Helper like random_path + Dynamic { name: String, args: Vec }, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Instruction { + Task { name: Arg, args: Vec }, + Open { path: Arg }, + Debug { text: Arg }, + Ping { server: Arg }, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum MachineInstruction { + Server { port: u16 }, + Profile { target: String }, + Path { value: String }, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Dist { + Exp { rate: f64 }, +} + +#[derive(Debug, Clone)] +pub enum Node { + Machine { + m_instructions: Vec, + }, + Work { + name: String, + args: HashMap, + instructions: Vec, + dist: Option, + }, +} diff --git a/src/script/grammar.peg b/src/script/grammar.peg new file mode 100644 index 0000000..fe7b287 --- /dev/null +++ b/src/script/grammar.peg @@ -0,0 +1,102 @@ +WHITESPACE = _{" " | "\t" | NEWLINE} +COMMENT = _{"//" ~ (!NEWLINE ~ ANY)*} + +ident_char = {ASCII_ALPHA | "_" | "$"} +ident = @{ident_char ~ (ASCII_DIGIT | ident_char)*} + +constant = { + "\"" ~ value ~ "\"" + | ASCII_DIGIT* +} + +randomPath = { "random_path" } +randomString = { "random_string" } + +dynamicName = { + randomPath + | randomString +} + +dynamic = {dynamicName ~ args} + +// constant should be the last +arg = { + dynamic + | ident + | constant +} + +args = {"(" ~ (arg ~ ("," ~ arg)* ~ ","?)? ~ ")"} +value = {(ASCII_ALPHANUMERIC| "." | " " | "/" | ":")*} + +param = {ident ~ "=" ~ value} +params = {"(" ~ (param ~ ("," ~ param)* ~ ","?)? ~ ")"} + +task = { "task" } +network = { "network" } +port = { "port" } +open = { "open" } +ping = { "ping" } +debug = { "debug" } + +funcName = { + task + | network + | port + | open + | ping + | debug +} + +exp = { "exp" } +zipf = { "zipf" } +uniform = { "uniform" } + +distName = { + exp + | zipf + | uniform +} + +expr = { + function + | instruction +} + +opt = {ident ~ "=" ~ value ~ ";" } + +dist = {":" ~ distName ~ "{" ~ opt* ~ "}"} + +work = {ident ~ params?} + +instruction = {funcName ~ args? ~ ";"} + +instructions = { expr* } + +function = { + work + ~ "{" ~ instructions ~ "}" + ~ dist? +} + +server = { "server" } +load = { "load" } +memory = { "memory" } +profile = { "profile" } +path = { "path" } + +mInstrName = { + server + | load + | memory + | profile + | path +} + +machineInstruction = {mInstrName ~ args? ~ ";"} + +machine = {"machine" ~ "{" ~ machineInstruction* ~ "}"} + +file = { + SOI ~ machine? ~ (expr*) ~ EOI +} diff --git a/src/script/mod.rs b/src/script/mod.rs new file mode 100644 index 0000000..7b38a21 --- /dev/null +++ b/src/script/mod.rs @@ -0,0 +1,3 @@ +pub mod ast; +pub mod parser; +pub mod rules; diff --git a/src/script/parser.rs b/src/script/parser.rs new file mode 100644 index 0000000..d4c65e3 --- /dev/null +++ b/src/script/parser.rs @@ -0,0 +1,587 @@ +use log::{debug, trace}; +use pest::{self, Parser, error::Error}; +use std::collections::HashMap; + +use crate::script::ast::{Arg, Dist, Instruction, MachineInstruction, Node}; + +#[derive(Debug)] +pub enum ParseError { + NotSupported, + TypeMismatch, +} + +// ANCHOR: parser +#[derive(pest_derive::Parser)] +#[grammar = "script/grammar.peg"] +struct InstructionParser; +// ANCHOR_END: parser + +// ANCHOR: parse_source +// Rule can be large depending on the grammar, and we don't really control +// this. Thus ignore clippy warning about the large error. +#[allow(clippy::result_large_err)] +pub fn parse_instructions(source: &str) -> Result, Error> { + pest::set_error_detail(true); + let mut ast = vec![]; + let pairs = InstructionParser::parse(Rule::file, source)?; + let expr_rules = [Rule::expr, Rule::machine]; + + for pair in pairs { + if pair.as_rule() != Rule::file { + continue; + } + + for i in pair.into_inner() { + if expr_rules.contains(&i.as_rule()) { + ast.push(build_ast_from_expr(i)); + } + } + } + trace!("Resulting AST: {:?}", ast); + Ok(ast) +} + +fn build_ast_from_expr(pair: pest::iterators::Pair) -> Node { + match pair.as_rule() { + Rule::expr => build_ast_from_expr(pair.into_inner().next().unwrap()), + Rule::machine => Node::Machine { + m_instructions: build_ast_from_minstr(pair.into_inner()), + }, + Rule::function => build_ast_from_function(pair.into_inner()), + unknown => panic!("Unknown expr: {unknown:?}"), + } +} + +fn build_ast_from_minstr( + pair: pest::iterators::Pairs, +) -> Vec { + let mut instr = vec![] as Vec; + + for i in pair { + let mut inner = i.into_inner(); + let name = inner.next().expect("No instruction name"); + + match first_nested_pair(name).as_rule() { + Rule::server => { + let port_pair = + first_nested_pair(inner.next().expect("No port")); + let port: u16 = pair_to_string(port_pair) + .parse() + .expect("Cannot parse port"); + instr.push(MachineInstruction::Server { port }); + } + Rule::profile => { + let target = + first_nested_pair(inner.next().expect("No target")); + instr.push(MachineInstruction::Profile { + target: pair_to_string(target), + }); + } + Rule::path => { + let first_arg = + first_nested_pair(inner.next().expect("No path")); + + let value = match string_from_argument(first_arg) { + Ok(value) => value, + Err(e) => panic!("Cannot parse argument: {e:?}"), + }; + + instr.push(MachineInstruction::Path { value }); + } + unknown => panic!("Unknown machine instruction: {unknown:?}"), + } + } + + instr +} + +fn build_ast_from_work( + pairs: pest::iterators::Pairs, +) -> (String, HashMap) { + let mut work_parts = pairs.clone(); + + let name = work_parts.next().expect("No work name"); + let params = work_parts.next().expect("Wo work parameters"); + + let name_str = pair_to_string(name); + let mut params_map: HashMap = HashMap::new(); + + for param in params.into_inner() { + let mut kv = param.into_inner(); + let key = kv.next().expect("No parameter name"); + let value = kv.next().expect("No parameter value"); + + assert_eq!(key.as_rule(), Rule::ident); + assert_eq!(value.as_rule(), Rule::value); + + params_map.insert(pair_to_string(key), pair_to_string(value)); + } + + (name_str, params_map) +} + +fn build_ast_from_function(pairs: pest::iterators::Pairs) -> Node { + let mut func_parts = pairs.clone(); + + let work = func_parts.next().expect("No work unit"); + let instrs = func_parts.next().expect("No instructions"); + let distribution = func_parts.next(); + + let (name, args) = build_ast_from_work(work.into_inner()); + let instructions = build_ast_from_instr(instrs.into_inner()); + let dist = distribution.map(build_ast_from_dist); + + Node::Work { + name, + args, + instructions, + dist, + } +} + +fn build_ast_from_instr( + pairs: pest::iterators::Pairs, +) -> Vec { + let mut instr = vec![] as Vec; + + for pair in pairs { + let mut instrs = first_nested_pair(pair).into_inner(); + + let name = instrs.next().expect("No instruction name"); + let args_pair = instrs.next().expect("No instruction arguments"); + + let args: Vec = args_pair + .into_inner() + .into_iter() + .map(|arg| { + let a = first_nested_pair(arg); + match a.as_rule() { + Rule::constant => Arg::Const { + text: pair_to_string(first_nested_pair(a)), + }, + Rule::ident => Arg::Var { + name: pair_to_string(a), + }, + Rule::dynamic => { + let mut inner = a.into_inner(); + let name = inner.next().expect("No argument name"); + let args_pair = + inner.next().expect("No argument value"); + + let args: Vec = args_pair + .into_inner() + .into_iter() + .map(|arg| { + let a = + first_nested_pair(first_nested_pair(arg)); + Arg::Const { + text: pair_to_string(a), + } + }) + .collect(); + + Arg::Dynamic { + name: pair_to_string(name), + args, + } + } + unknown => panic!("Unknown arg type {unknown:?}"), + } + }) + .collect(); + + match first_nested_pair(name).as_rule() { + Rule::task => { + let Some((name, arg_list)) = args.split_first() else { + unreachable!() + }; + + instr.push(Instruction::Task { + name: name.clone(), + args: arg_list.to_vec(), + }); + } + Rule::open => { + instr.push(Instruction::Open { + path: args[0].clone(), + }); + } + Rule::debug => { + instr.push(Instruction::Debug { + text: args[0].clone(), + }); + } + Rule::ping => { + instr.push(Instruction::Ping { + server: args[0].clone(), + }); + } + unknown => panic!("Unknown instruction type {unknown:?}"), + } + } + + instr +} + +fn build_ast_from_dist(pair: pest::iterators::Pair) -> Dist { + match pair.as_rule() { + Rule::dist => { + let mut opts: HashMap = HashMap::new(); + + for p in pair.into_inner() { + if let Rule::opt = p.as_rule() { + let mut inner = p.into_inner(); + let key = inner.next().expect("No dist argument key"); + let value = inner.next().expect("No dist argument value"); + + opts.insert(pair_to_string(key), pair_to_string(value)); + } + } + + Dist::Exp { + rate: opts + .get("rate") + .cloned() + .unwrap_or(String::from("0")) + .parse() + .unwrap(), + } + } + unknown => panic!("Unknown dist: {unknown:?}"), + } +} + +fn string_from_constant(pair: pest::iterators::Pair) -> String { + assert_eq!(pair.as_rule(), Rule::constant); + + // Extract "value" rule and convert it to String + pair_to_string(first_nested_pair(pair)) +} + +fn string_from_ident(pair: pest::iterators::Pair) -> String { + assert_eq!(pair.as_rule(), Rule::ident); + + // Extract "name" rule and convert it to String + pair_to_string(first_nested_pair(pair)) +} + +fn string_from_argument( + pair: pest::iterators::Pair, +) -> Result { + assert_eq!(pair.as_rule(), Rule::arg); + + let inner = first_nested_pair(pair); + match inner.as_rule() { + Rule::constant => Ok(string_from_constant(inner)), + Rule::ident => Ok(string_from_ident(inner)), + Rule::dynamic => Err(ParseError::NotSupported), + _ => Err(ParseError::TypeMismatch), + } +} + +fn pair_to_string(pair: pest::iterators::Pair) -> String { + pair.as_span().as_str().to_string() +} + +fn first_nested_pair( + pair: pest::iterators::Pair, +) -> pest::iterators::Pair { + pair.into_inner().next().expect("Cannot get first pair") +} + +#[cfg(test)] +mod tests { + use super::*; + + // Helper to verify a repeated unit + fn test_repeated(node: Node) { + let Node::Work { + ref name, + ref args, + ref instructions, + ref dist, + } = node + else { + unreachable!() + }; + + assert_eq!(name, "repeated"); + + assert_eq!(args.len(), 2); + assert_eq!(args.get("workers").unwrap(), "10"); + assert_eq!(args.get("duration").unwrap(), "100"); + + assert_eq!(instructions.len(), 1); + + assert_eq!( + instructions[0], + Instruction::Open { + path: Arg::Const { + text: "/tmp/test".to_string() + } + } + ); + + let dist_value = dist.clone().unwrap(); + + assert_eq!(dist_value, Dist::Exp { rate: 100.0 }); + } + + // Helper to verify a random unit + fn test_random(node: Node) { + let Node::Work { + ref name, + ref args, + ref instructions, + ref dist, + } = node + else { + unreachable!() + }; + + assert_eq!(name, "random"); + + assert_eq!(args.len(), 2); + assert_eq!(args.get("workers").unwrap(), "10"); + assert_eq!(args.get("duration").unwrap(), "100"); + + assert_eq!(instructions.len(), 1); + + assert_eq!( + instructions[0], + Instruction::Open { + path: Arg::Dynamic { + name: "random_path".to_string(), + args: vec![Arg::Const { + text: "/tmp".to_string() + }], + } + } + ); + + let dist_value = dist.clone().unwrap(); + + assert_eq!(dist_value, Dist::Exp { rate: 100.0 }); + } + + // Helper to verify a task unit + fn test_task(node: Node, global_opts: bool, exp: bool) { + let Node::Work { + ref name, + ref args, + ref instructions, + ref dist, + } = node + else { + unreachable!() + }; + + assert_eq!(name, "main"); + + if global_opts { + assert_eq!(args.len(), 2); + assert_eq!(args.get("workers").unwrap(), "2"); + assert_eq!(args.get("duration").unwrap(), "10"); + } + + assert_eq!(instructions.len(), 2); + + assert_eq!( + instructions[0], + Instruction::Debug { + text: Arg::Const { + text: "run task stub".to_string(), + } + } + ); + + assert_eq!( + instructions[1], + Instruction::Task { + name: Arg::Var { + name: "stub".to_string(), + }, + args: vec![], + } + ); + + if exp { + let dist_value = dist.clone().unwrap(); + + assert_eq!(dist_value, Dist::Exp { rate: 10.0 }); + } + } + + // Helper to verify a ping unit + fn test_ping(node: Node, global_opts: bool, exp: bool) { + let Node::Work { + ref name, + ref args, + ref instructions, + ref dist, + } = node + else { + unreachable!() + }; + + assert_eq!(name, "main"); + + if global_opts { + assert_eq!(args.len(), 2); + assert_eq!(args.get("workers").unwrap(), "2"); + assert_eq!(args.get("duration").unwrap(), "10"); + } + + assert_eq!(instructions.len(), 2); + + assert_eq!( + instructions[0], + Instruction::Debug { + text: Arg::Const { + text: "ping server".to_string(), + } + } + ); + + assert_eq!( + instructions[1], + Instruction::Ping { + server: Arg::Const { + text: "127.0.0.1:8080".to_string(), + }, + } + ); + + if exp { + let dist_value = dist.clone().unwrap(); + + assert_eq!(dist_value, Dist::Exp { rate: 10.0 }); + } + } + + // Helper to verify a machine unit + fn test_machine(node: Node, server: bool, profile: bool) { + let Node::Machine { ref m_instructions } = node else { + unreachable!() + }; + + assert_eq!(m_instructions.len(), 1); + + if server { + assert_eq!( + m_instructions[0], + MachineInstruction::Server { port: 8080 } + ); + } + + if profile { + assert_eq!( + m_instructions[0], + MachineInstruction::Profile { + target: "bpf".to_string(), + } + ); + } + } + + #[test] + fn test_single_work_unit() { + let input = r#" + // open the same file over and over + repeated (workers = 10, duration = 100) { + open("/tmp/test"); + } : exp { + rate = 100.0; + } + "#; + + let ast: Vec = parse_instructions(input).unwrap(); + assert_eq!(ast.len(), 1); + + test_repeated(ast[0].clone()); + } + + #[test] + fn test_multiple_work_units() { + let input = r#" + // open lots of random files under + // a specified directory + random (workers = 10, duration = 100) { + open(random_path("/tmp")); + } : exp { + rate = 100.0; + } + + // open the same file over and over + repeated (workers = 10, duration = 100) { + open("/tmp/test"); + } : exp { + rate = 100.0; + } + "#; + + let ast: Vec = parse_instructions(input).unwrap(); + assert_eq!(ast.len(), 2); + + test_random(ast[0].clone()); + test_repeated(ast[1].clone()); + } + + #[test] + fn test_task_unit() { + let input = r#" + // Named work block + main (workers = 2, duration = 10) { + // Anon work block with only one unit. + // task(name) -- spawn a process with specified name + // debug(text) -- log with DEBUG level + // open(path) -- open file by path, create if needed and write something to it + debug("run task stub"); + task(stub); + } : exp { + // If no distribution provided, do the unit only once. + rate = 10.0; + } + "#; + + let ast: Vec = parse_instructions(input).unwrap(); + assert_eq!(ast.len(), 1); + + test_task(ast[0].clone(), true, true); + } + + #[test] + fn test_task_unit_no_dist() { + let input = r#" + main () { + debug("run task stub"); + task(stub); + } + "#; + + let ast: Vec = parse_instructions(input).unwrap(); + assert_eq!(ast.len(), 1); + + test_task(ast[0].clone(), false, false); + } + + #[test] + fn test_ping_with_machine() { + let input = r#" + machine { + server(8080); + } + + main () { + debug("ping server"); + ping("127.0.0.1:8080"); + } + "#; + + let ast: Vec = parse_instructions(input).unwrap(); + assert_eq!(ast.len(), 2); + + test_machine(ast[0].clone(), true, false); + test_ping(ast[1].clone(), false, false); + } +} diff --git a/src/script/rules.rs b/src/script/rules.rs new file mode 100644 index 0000000..16a227a --- /dev/null +++ b/src/script/rules.rs @@ -0,0 +1,12 @@ +use log::debug; + +use crate::script::ast::{Arg, Dist, Instruction, Node}; + +/// Contains a list of transformation to apply after parsing +/// TODO: Add following rules: +/// - add path if directory is expected +/// - add default worker arguments +pub fn apply_rules(works: Vec<&Node>) -> Vec<&Node> { + debug!("Applying rules"); + works +}