Skip to content

Commit 20f54ff

Browse files
committed
support structs and member access, slices, and closures/lambdas
1 parent 4fe3fa4 commit 20f54ff

6 files changed

Lines changed: 2435 additions & 204 deletions

File tree

README.md

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@
22

33
# Skunk Programming Language
44

5-
**Skunk** is a statically typed, interpreted programming language designed for simplicity, learning, and extensibility. It provides a clean syntax for working with structured data, control flow, and functions while supporting extensible features like user-defined types and type inference.
5+
**Skunk** is a statically typed programming language with both an interpreter and an LLVM-based native compiler. It provides a clean syntax for working with structured data, control flow, and functions while supporting extensible features like user-defined types and type inference.
66

77
## Features
88

99
- **Basic Types**: `byte`, `short`, `int`, `long`, `float`, `double`, `boolean`, `char`, `string`
1010
- **User-Defined Structs**: Define custom types with fields and methods
1111
- **Control Flow**: `if`, `for` loops, and blocks for scoped variable overrides
12-
- **Arrays**: Support for array initialization, slicing (upcoming), and dynamic resizing
13-
- **Functions**: First-class functions with support for closures and higher-order programming (upcoming)
12+
- **Arrays**: Fixed-size arrays with zero initialization, explicit fill initialization, and slice types
13+
- **Functions**: First-class functions with support for closures, lambdas, and higher-order programming
1414
- **Type Checking**: Ensures type correctness at parse-time with detailed error messages
1515
- **Type Inference**: Planned for a cleaner developer experience
1616
- **String Interpolation and Concatenation**: Upcoming for intuitive string operations
@@ -80,10 +80,13 @@ function main(): void {
8080
### Arrays
8181
```skunk
8282
function main(): void {
83-
arr: int[3] = [1, 2, 3];
84-
for (i: int = 0; i < arr.len; i = i + 1) {
85-
print(arr[i]);
86-
}
83+
a: [3]int;
84+
b: [3]int = [3]int::fill(7);
85+
c: [3]int = [1, 2, 3];
86+
87+
print(a[0]); // 0
88+
print(b[1]); // 7
89+
print(c[2]); // 3
8790
}
8891
```
8992

@@ -280,23 +283,21 @@ Skunk now includes an LLVM-based compiler path alongside the interpreter.
280283
Currently supported in `cargo run -- compile ...`:
281284

282285
- Top-level function declarations
283-
- `int`, `boolean`, and string literals
286+
- `byte`, `short`, `int`, `long`, `float`, `double`, `boolean`, `char`, and string literals
284287
- Local variables and assignments
285-
- Arithmetic and comparisons on integers
288+
- Arithmetic and comparisons on numeric primitives
286289
- Boolean logic
287290
- `if`, `for`, `return`
288-
- Function calls
291+
- Function calls, including chained call forms like `f()(1)`
289292
- `print`
293+
- Fixed arrays: zero initialization, `::fill(...)`, inline array literals, indexing, assignment, `.len`, and array pass/return by value
294+
- Structs, field access, nested structs, methods, and method chaining that returns callable values
295+
- Slices: `[]T`, slice literals, `a[lo:hi]`, omitted bounds, `.len`, indexing, and slice parameters
296+
- Closures and lambdas, including captured locals, recursive lambdas, returned functions, and methods returning closures
290297

291-
Not compiled yet:
292-
293-
- Structs
294-
- Arrays
295-
- Closures/lambdas
296-
- Member access
297-
- Chained function-call forms
298+
Current compiler/runtime trade-off:
298299

299-
Those features still work through the interpreter, and the compiler returns a clear error when it hits one of the unsupported constructs.
300+
- The native compiler currently heap-allocates local storage that needs to outlive a stack frame for arrays, slices, structs, and closures. This keeps closure and slice semantics working while the runtime is still small, but it also means there is no allocator API or memory reclamation yet.
300301

301302
## Fibonacci Benchmark
302303

src/ast.rs

Lines changed: 203 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ pub enum Node {
8080
ArrayAccess {
8181
coordinates: Vec<Node>,
8282
},
83+
SliceAccess {
84+
start: Option<Box<Node>>,
85+
end: Option<Box<Node>>,
86+
},
8387
MemberAccess {
8488
member: Box<Node>, // field, function
8589
metadata: Metadata,
@@ -250,7 +254,7 @@ impl PestImpl {
250254
Rule::assignment => self.create_assignment(pair),
251255
Rule::struct_decl => self.create_struct_decl(pair),
252256
Rule::var_decl => self.create_var_decl(pair),
253-
Rule::var_decl_stmt => self.create_var_decl(pair),
257+
Rule::var_decl_stmt => self.create_var_decl(pair.into_inner().next().unwrap()),
254258
Rule::func_decl => self.create_func_decl(pair),
255259
Rule::lambda_expr => self.create_func_decl(pair),
256260
Rule::literal => self.create_literal(pair),
@@ -397,6 +401,20 @@ impl PestImpl {
397401
Node::ArrayAccess { coordinates }
398402
}
399403

404+
fn create_slice_access(&self, pair: Pair<Rule>) -> Node {
405+
assert_eq!(Rule::slice_access, pair.as_rule());
406+
let mut pairs = pair.into_inner();
407+
let start = pairs
408+
.next()
409+
.and_then(|p| p.into_inner().next())
410+
.map(|p| Box::new(self.create_ast(p)));
411+
let end = pairs
412+
.next()
413+
.and_then(|p| p.into_inner().next())
414+
.map(|p| Box::new(self.create_ast(p)));
415+
Node::SliceAccess { start, end }
416+
}
417+
400418
fn create_inline_array_init(&self, pair: Pair<Rule>) -> Node {
401419
let mut inner_pairs = pair.into_inner();
402420
let elements = inner_pairs.map(|p| self.create_ast(p)).collect();
@@ -425,11 +443,16 @@ impl PestImpl {
425443
let mut nodes: Vec<Node> = Vec::new();
426444
let mut inner_pairs = pair.into_inner();
427445
while let Some(inner_pair) = inner_pairs.next() {
428-
match inner_pair.as_rule() {
429-
Rule::IDENTIFIER => nodes.push(self.create_identifier(inner_pair)),
430-
Rule::member_access => nodes.push(self.create_member_access(inner_pair)),
431-
Rule::array_access => nodes.push(self.create_array_access(inner_pair)),
432-
_ => panic!("unsupported chained access node: {:?}", inner_pair),
446+
let mut step_pair = inner_pair;
447+
if step_pair.as_rule() == Rule::access_step {
448+
step_pair = step_pair.into_inner().next().unwrap();
449+
}
450+
match step_pair.as_rule() {
451+
Rule::IDENTIFIER => nodes.push(self.create_identifier(step_pair)),
452+
Rule::member_access => nodes.push(self.create_member_access(step_pair)),
453+
Rule::array_access => nodes.push(self.create_array_access(step_pair)),
454+
Rule::slice_access => nodes.push(self.create_slice_access(step_pair)),
455+
_ => panic!("unsupported chained access node: {:?}", step_pair),
433456
}
434457
}
435458
nodes
@@ -592,6 +615,43 @@ impl PestImpl {
592615
}
593616
}
594617

618+
fn create_type_prefix(&self, pair: Pair<Rule>) -> Option<Node> {
619+
assert_eq!(pair.as_rule(), Rule::type_prefix);
620+
pair.into_inner().next().map(|p| self.create_ast(p))
621+
}
622+
623+
fn apply_type_prefixes(&self, base_type: Type, prefixes: Vec<Option<Node>>) -> Type {
624+
let mut current = base_type;
625+
for prefix in prefixes.into_iter().rev() {
626+
match prefix {
627+
Some(dimension) => match current {
628+
Type::Array {
629+
elem_type,
630+
mut dimensions,
631+
} => {
632+
dimensions.insert(0, dimension);
633+
current = Type::Array {
634+
elem_type,
635+
dimensions,
636+
};
637+
}
638+
other => {
639+
current = Type::Array {
640+
elem_type: Box::new(other),
641+
dimensions: vec![dimension],
642+
};
643+
}
644+
},
645+
None => {
646+
current = Type::Slice {
647+
elem_type: Box::new(current),
648+
};
649+
}
650+
}
651+
}
652+
current
653+
}
654+
595655
fn create_type(&self, pair: Pair<Rule>) -> Type {
596656
match pair.as_rule() {
597657
Rule::base_type => create_base_type_from_str(pair.as_str()),
@@ -620,29 +680,33 @@ impl PestImpl {
620680
unreachable!()
621681
}
622682
}
623-
Rule::slice_type => {
683+
Rule::prefixed_type => {
684+
let mut inner_pairs: Vec<Pair<Rule>> = pair.into_inner().collect();
685+
let base_type = self.create_type(inner_pairs.pop().unwrap());
686+
let prefixes = inner_pairs
687+
.into_iter()
688+
.map(|p| self.create_type_prefix(p))
689+
.collect();
690+
self.apply_type_prefixes(base_type, prefixes)
691+
}
692+
Rule::legacy_slice_type => {
624693
let mut inner_pairs = pair.into_inner();
625694
Type::Slice {
626695
elem_type: Box::new(self.create_type(inner_pairs.next().unwrap())),
627696
}
628697
}
629-
Rule::array_type => {
698+
Rule::legacy_array_type => {
630699
let mut inner_pairs = pair.into_inner();
631700
let elem_type = self.create_type(
632701
inner_pairs
633702
.next()
634-
.filter(|x| matches!(x.as_rule(), Rule::base_type)) // only arrays of primitives ?
703+
.filter(|x| matches!(x.as_rule(), Rule::base_type))
635704
.unwrap_or_else(|| panic!("array type is missing")),
636705
);
637706
let mut dimensions: Vec<Node> = Vec::new();
638707
while let Some(dim_pair) = inner_pairs.next() {
639708
let dim = self.create_ast(dim_pair.into_inner().next().unwrap());
640709
dimensions.push(dim);
641-
// match dim {
642-
// Node::Literal(Literal::Integer(v)) => dimensions.push(v),
643-
// //todo support Access nodes, e.g. identifier
644-
// _ => panic!("incorrect array size literal: {:?}", dim),
645-
// }
646710
}
647711
Type::Array {
648712
elem_type: Box::new(elem_type),
@@ -879,8 +943,45 @@ pub fn type_to_string(t: &Type) -> String {
879943
Type::String => "string".to_string(),
880944
Type::Boolean => "boolean".to_string(),
881945
Type::Char => "char".to_string(),
946+
Type::Array {
947+
elem_type,
948+
dimensions,
949+
} => {
950+
let prefix = dimensions
951+
.iter()
952+
.map(|dim| format!("[{}]", type_expr_to_string(dim)))
953+
.collect::<String>();
954+
format!("{}{}", prefix, type_to_string(elem_type))
955+
}
956+
Type::Slice { elem_type } => format!("[]{}", type_to_string(elem_type)),
957+
Type::Function {
958+
parameters,
959+
return_type,
960+
} => format!(
961+
"({}) -> {}",
962+
parameters
963+
.iter()
964+
.map(type_to_string)
965+
.collect::<Vec<_>>()
966+
.join(", "),
967+
type_to_string(return_type)
968+
),
969+
Type::SkSelf => "self".to_string(),
882970
Type::Custom(v) => v.to_string(),
883-
_ => panic!("unsupported type {:?}", t),
971+
}
972+
}
973+
974+
fn type_expr_to_string(node: &Node) -> String {
975+
match node {
976+
Node::Literal(Literal::Integer(value)) => value.to_string(),
977+
Node::Literal(Literal::Long(value)) => format!("{}L", value),
978+
Node::Literal(Literal::Float(value)) => format!("{}f", value),
979+
Node::Literal(Literal::Double(value)) => value.to_string(),
980+
Node::Literal(Literal::StringLiteral(value)) => format!("{:?}", value),
981+
Node::Literal(Literal::Boolean(value)) => value.to_string(),
982+
Node::Literal(Literal::Char(value)) => format!("{:?}", value),
983+
Node::Identifier(name) => name.clone(),
984+
other => format!("{:?}", other),
884985
}
885986
}
886987

@@ -2237,6 +2338,64 @@ mod tests {
22372338
)
22382339
}
22392340

2341+
#[test]
2342+
fn test_prefix_int_array_fill() {
2343+
let source_code = r#"
2344+
arr: [1]int = [1]int::fill(1);
2345+
"#;
2346+
2347+
assert_eq!(
2348+
Node::Program {
2349+
statements: Vec::from([
2350+
Node::VariableDeclaration {
2351+
var_type: Type::Array {
2352+
elem_type: Box::new(Type::Int),
2353+
dimensions: Vec::from([Node::Literal(Literal::Integer(1))])
2354+
},
2355+
name: "arr".to_string(),
2356+
value: Some(Box::new(Node::StaticFunctionCall {
2357+
_type: Type::Array {
2358+
elem_type: Box::new(Type::Int),
2359+
dimensions: Vec::from([Node::Literal(Literal::Integer(1))])
2360+
},
2361+
name: "fill".to_string(),
2362+
arguments: Vec::from([Node::Literal(Literal::Integer(1))]),
2363+
metadata: Metadata::EMPTY
2364+
})),
2365+
metadata: Metadata::EMPTY
2366+
},
2367+
Node::EOI
2368+
])
2369+
},
2370+
parse(source_code)
2371+
)
2372+
}
2373+
2374+
#[test]
2375+
fn test_prefix_array_without_initializer() {
2376+
let source_code = r#"
2377+
arr: [3]int;
2378+
"#;
2379+
2380+
assert_eq!(
2381+
Node::Program {
2382+
statements: Vec::from([
2383+
Node::VariableDeclaration {
2384+
var_type: Type::Array {
2385+
elem_type: Box::new(Type::Int),
2386+
dimensions: Vec::from([Node::Literal(Literal::Integer(3))])
2387+
},
2388+
name: "arr".to_string(),
2389+
value: None,
2390+
metadata: Metadata::EMPTY
2391+
},
2392+
Node::EOI
2393+
])
2394+
},
2395+
parse(source_code)
2396+
)
2397+
}
2398+
22402399
#[test]
22412400
fn test_array_init_inline() {
22422401
let source_code = r#"
@@ -2324,6 +2483,35 @@ mod tests {
23242483
println!("{:?}", parse(source_code));
23252484
}
23262485

2486+
#[test]
2487+
fn test_prefix_slice_type() {
2488+
let source_code = r#"
2489+
slice: []int = [1, 2, 3];
2490+
"#;
2491+
2492+
let expected_ast = Node::Program {
2493+
statements: vec![
2494+
Node::VariableDeclaration {
2495+
name: "slice".to_string(),
2496+
var_type: Type::Slice {
2497+
elem_type: Box::new(Type::Int),
2498+
},
2499+
value: Some(Box::new(Node::ArrayInit {
2500+
elements: vec![
2501+
Node::Literal(Literal::Integer(1)),
2502+
Node::Literal(Literal::Integer(2)),
2503+
Node::Literal(Literal::Integer(3)),
2504+
],
2505+
})),
2506+
metadata: Metadata::EMPTY,
2507+
},
2508+
Node::EOI,
2509+
],
2510+
};
2511+
2512+
assert_eq!(expected_ast, parse(source_code));
2513+
}
2514+
23272515
#[test]
23282516
fn array_2d_access() {
23292517
let source_code = r#"

0 commit comments

Comments
 (0)