diff --git a/notebooks/example_javascript.ipynb b/notebooks/example_javascript.ipynb new file mode 100644 index 00000000..f9aa0412 --- /dev/null +++ b/notebooks/example_javascript.ipynb @@ -0,0 +1,138 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 3.38it/s]\n", + "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", + "Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 3.42it/s]\n", + "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Creating DFA mask store for LlamaTokenizerFast and javascript, may take more than 10 minutes. Caching at /home/shubham/syncode/cache/mask_stores/LlamaTokenizerFast/grammar_strict_2806037986_32000.pkl.\n", + "Ignore whitespace tokens is True\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 385/385 [06:38<00:00, 1.04s/it]\n" + ] + } + ], + "source": [ + "from syncode import Syncode\n", + "\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "model_name = \"microsoft/Phi-3-mini-128k-instruct\"\n", + "\n", + "# Load the unconstrained original model\n", + "llm = Syncode(model=model_name, mode='original', max_new_tokens=200)\n", + "\n", + "# Load the Syncode augmented model\n", + "syn_llm = Syncode(model=model_name, mode='grammar_strict', grammar='javascript', parse_output_only=True, max_new_tokens=200)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "You are not running the flash-attention implementation, expect numerical differences.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LLM output:\n", + "Certainly! Below is a JavaScript function that takes a string as input and returns the reversed version of that string. This function efficiently utilizes JavaScript's built-in methods to achieve the desired outcome, ensuring optimal time and space complexity for this specific task.\n", + "\n", + "```javascript\n", + "function reverseString(str) {\n", + " // Check if the input is a string, if not, return an empty string or a relevant message\n", + " if (typeof str !== 'string') {\n", + " return \"Input must be a string\";\n", + " }\n", + "\n", + " // Use the built-in split(), reverse(), and join() methods to reverse the string\n", + " return str.split('').reverse().join('');\n", + "}\n", + "\n", + "// Example usage:\n", + "const originalString = \"Hello, World!\";\n", + "const reversedString = reverseString(originalString);\n", + "console.log(reversedString); // Output: \"!dlroW ,ol\n", + "\n" + ] + } + ], + "source": [ + "prompt = \"Write a javascript function that takes a string as input and returns the string reversed.\"\n", + "output = llm.infer(prompt)[0]\n", + "print(f\"LLM output:\\n{output}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LLM output:\n", + "Certainly! Below is a JavaScript function thattakesastringasinputandreturnsitsreversedversionusinganefficientapproachforthispurposewithminimalmemoryandtimecomplexityimpactsusuallyacharactersthefunctionwillhandlecorrectlyforstringsandnotforbinarydataorothercomplextypesensitivelyhandlingtheseedgecasesisnottheprimaryfocusbutitisconsideredinthisimplementationcontextforgeneralpurposeuseandunderstandabilitythefunctionisdesignedtobeefficientandstraightforwardforitsintendedusecasehereisthefunctioncodeimplementingthislogicwithcommentsforclarityandunderstandinghereisthefunctioncodewithinthiscontextandimplementationdetailsconsideredforgeneraluseandunderstandabilityinjavascriptlanguagefunctionreverseString(inputString) {\n", + " // Check if the input is a string\n", + " if (typeof inputString !== 'string') {\n", + " throw new Error('Input must\n", + "\n" + ] + } + ], + "source": [ + "output = syn_llm.infer(prompt)[0]\n", + "print(f\"LLM output:\\n{output}\\n\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "codex", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/syncode/larkm/__pyinstaller/__init__.py b/syncode/larkm/__pyinstaller/__init__.py deleted file mode 100644 index 9da62a33..00000000 --- a/syncode/larkm/__pyinstaller/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -# For usage of lark with PyInstaller. See https://pyinstaller-sample-hook.readthedocs.io/en/latest/index.html - -import os - -def get_hook_dirs(): - return [os.path.dirname(__file__)] diff --git a/syncode/larkm/__pyinstaller/hook-lark.py b/syncode/larkm/__pyinstaller/hook-lark.py deleted file mode 100644 index cf3d8e3d..00000000 --- a/syncode/larkm/__pyinstaller/hook-lark.py +++ /dev/null @@ -1,14 +0,0 @@ -#----------------------------------------------------------------------------- -# Copyright (c) 2017-2020, PyInstaller Development Team. -# -# Distributed under the terms of the GNU General Public License (version 2 -# or later) with exception for distributing the bootloader. -# -# The full license is in the file COPYING.txt, distributed with this software. -# -# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception) -#----------------------------------------------------------------------------- - -from PyInstaller.utils.hooks import collect_data_files - -datas = collect_data_files('lark') diff --git a/syncode/larkm/grammars/python.lark b/syncode/larkm/grammars/python.lark deleted file mode 100644 index 8a75966b..00000000 --- a/syncode/larkm/grammars/python.lark +++ /dev/null @@ -1,302 +0,0 @@ -// Python 3 grammar for Lark - -// This grammar should parse all python 3.x code successfully. - -// Adapted from: https://docs.python.org/3/reference/grammar.html - -// Start symbols for the grammar: -// single_input is a single interactive statement; -// file_input is a module or sequence of commands read from an input file; -// eval_input is the input for the eval() functions. -// NB: compound_stmt in single_input is followed by extra NEWLINE! -// - -single_input: _NEWLINE | simple_stmt | compound_stmt _NEWLINE -file_input: (_NEWLINE | stmt)* -eval_input: testlist _NEWLINE* - -decorator: "@" dotted_name [ "(" [arguments] ")" ] _NEWLINE -decorators: decorator+ -decorated: decorators (classdef | funcdef | async_funcdef) - -async_funcdef: "async" funcdef -funcdef: "def" name "(" [parameters] ")" ["->" test] ":" suite - -parameters: paramvalue ("," paramvalue)* ["," SLASH ("," paramvalue)*] ["," [starparams | kwparams]] - | starparams - | kwparams - -SLASH: "/" // Otherwise the it will completely disappear and it will be undisguisable in the result -starparams: (starparam | starguard) poststarparams -starparam: "*" typedparam -starguard: "*" -poststarparams: ("," paramvalue)* ["," kwparams] -kwparams: "**" typedparam ","? - -?paramvalue: typedparam ("=" test)? -?typedparam: name (":" test)? - - -lambdef: "lambda" [lambda_params] ":" test -lambdef_nocond: "lambda" [lambda_params] ":" test_nocond -lambda_params: lambda_paramvalue ("," lambda_paramvalue)* ["," [lambda_starparams | lambda_kwparams]] - | lambda_starparams - | lambda_kwparams -?lambda_paramvalue: name ("=" test)? -lambda_starparams: "*" [name] ("," lambda_paramvalue)* ["," [lambda_kwparams]] -lambda_kwparams: "**" name ","? - - -?stmt: simple_stmt | compound_stmt -?simple_stmt: small_stmt (";" small_stmt)* [";"] _NEWLINE -?small_stmt: (expr_stmt | assign_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt) -expr_stmt: testlist_star_expr -assign_stmt: annassign | augassign | assign - -annassign: testlist_star_expr ":" test ["=" test] -assign: testlist_star_expr ("=" (yield_expr|testlist_star_expr))+ -augassign: testlist_star_expr augassign_op (yield_expr|testlist) -!augassign_op: "+=" | "-=" | "*=" | "@=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//=" -?testlist_star_expr: test_or_star_expr - | test_or_star_expr ("," test_or_star_expr)+ ","? -> tuple - | test_or_star_expr "," -> tuple - -// For normal and annotated assignments, additional restrictions enforced by the interpreter -del_stmt: "del" exprlist -pass_stmt: "pass" -?flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt -break_stmt: "break" -continue_stmt: "continue" -return_stmt: "return" [testlist] -yield_stmt: yield_expr -raise_stmt: "raise" [test ["from" test]] -import_stmt: import_name | import_from -import_name: "import" dotted_as_names -// note below: the ("." | "...") is necessary because "..." is tokenized as ELLIPSIS -import_from: "from" (dots? dotted_name | dots) "import" ("*" | "(" import_as_names ")" | import_as_names) -!dots: "."+ -import_as_name: name ["as" name] -dotted_as_name: dotted_name ["as" name] -import_as_names: import_as_name ("," import_as_name)* [","] -dotted_as_names: dotted_as_name ("," dotted_as_name)* -dotted_name: name ("." name)* -global_stmt: "global" name ("," name)* -nonlocal_stmt: "nonlocal" name ("," name)* -assert_stmt: "assert" test ["," test] - -?compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | match_stmt - | with_stmt | funcdef | classdef | decorated | async_stmt -async_stmt: "async" (funcdef | with_stmt | for_stmt) -if_stmt: "if" test ":" suite elifs ["else" ":" suite] -elifs: elif_* -elif_: "elif" test ":" suite -while_stmt: "while" test ":" suite ["else" ":" suite] -for_stmt: "for" exprlist "in" testlist ":" suite ["else" ":" suite] -try_stmt: "try" ":" suite except_clauses ["else" ":" suite] [finally] - | "try" ":" suite finally -> try_finally -finally: "finally" ":" suite -except_clauses: except_clause+ -except_clause: "except" [test ["as" name]] ":" suite -// NB compile.c makes sure that the default except clause is last - - -with_stmt: "with" with_items ":" suite -with_items: with_item ("," with_item)* -with_item: test ["as" name] - -match_stmt: "match" test ":" _NEWLINE _INDENT case+ _DEDENT - -case: "case" pattern ["if" test] ":" suite - -?pattern: sequence_item_pattern "," _sequence_pattern -> sequence_pattern - | as_pattern -?as_pattern: or_pattern ("as" NAME)? -?or_pattern: closed_pattern ("|" closed_pattern)* -?closed_pattern: literal_pattern - | NAME -> capture_pattern - | "_" -> any_pattern - | attr_pattern - | "(" as_pattern ")" - | "[" _sequence_pattern "]" -> sequence_pattern - | "(" (sequence_item_pattern "," _sequence_pattern)? ")" -> sequence_pattern - | "{" (mapping_item_pattern ("," mapping_item_pattern)* ","?)?"}" -> mapping_pattern - | "{" (mapping_item_pattern ("," mapping_item_pattern)* ",")? "**" NAME ","? "}" -> mapping_star_pattern - | class_pattern - -literal_pattern: inner_literal_pattern - -?inner_literal_pattern: "None" -> const_none - | "True" -> const_true - | "False" -> const_false - | STRING -> string - | number - -attr_pattern: NAME ("." NAME)+ -> value - -name_or_attr_pattern: NAME ("." NAME)* -> value - -mapping_item_pattern: (literal_pattern|attr_pattern) ":" as_pattern - -_sequence_pattern: (sequence_item_pattern ("," sequence_item_pattern)* ","?)? -?sequence_item_pattern: as_pattern - | "*" NAME -> star_pattern - -class_pattern: name_or_attr_pattern "(" [arguments_pattern ","?] ")" -arguments_pattern: pos_arg_pattern ["," keyws_arg_pattern] - | keyws_arg_pattern -> no_pos_arguments - -pos_arg_pattern: as_pattern ("," as_pattern)* -keyws_arg_pattern: keyw_arg_pattern ("," keyw_arg_pattern)* -keyw_arg_pattern: NAME "=" as_pattern - - - -suite: simple_stmt | _NEWLINE _INDENT stmt+ _DEDENT - -?test: or_test ("if" or_test "else" test)? - | lambdef - | assign_expr - -assign_expr: name ":=" test - -?test_nocond: or_test | lambdef_nocond - -?or_test: and_test ("or" and_test)* -?and_test: not_test_ ("and" not_test_)* -?not_test_: "not" not_test_ -> not_test - | comparison -?comparison: expr (comp_op expr)* -star_expr: "*" expr - -?expr: or_expr -?or_expr: xor_expr ("|" xor_expr)* -?xor_expr: and_expr ("^" and_expr)* -?and_expr: shift_expr ("&" shift_expr)* -?shift_expr: arith_expr (_shift_op arith_expr)* -?arith_expr: term (_add_op term)* -?term: factor (_mul_op factor)* -?factor: _unary_op factor | power - -!_unary_op: "+"|"-"|"~" -!_add_op: "+"|"-" -!_shift_op: "<<"|">>" -!_mul_op: "*"|"@"|"/"|"%"|"//" -// <> isn't actually a valid comparison operator in Python. It's here for the -// sake of a __future__ import described in PEP 401 (which really works :-) -!comp_op: "<"|">"|"=="|">="|"<="|"<>"|"!="|"in"|"not" "in"|"is"|"is" "not" - -?power: await_expr ("**" factor)? -?await_expr: AWAIT? atom_expr -AWAIT: "await" - -?atom_expr: atom_expr "(" [arguments] ")" -> funccall - | atom_expr "[" subscriptlist "]" -> getitem - | atom_expr "." name -> getattr - | atom - -?atom: "(" yield_expr ")" - | "(" _tuple_inner? ")" -> tuple - | "(" comprehension{test_or_star_expr} ")" -> tuple_comprehension - | "[" _exprlist? "]" -> list - | "[" comprehension{test_or_star_expr} "]" -> list_comprehension - | "{" _dict_exprlist? "}" -> dict - | "{" comprehension{key_value} "}" -> dict_comprehension - | "{" _exprlist "}" -> set - | "{" comprehension{test} "}" -> set_comprehension - | name -> var - | number - | string_concat - | "(" test ")" - | "..." -> ellipsis - | "None" -> const_none - | "True" -> const_true - | "False" -> const_false - - -?string_concat: string+ - -_tuple_inner: test_or_star_expr (("," test_or_star_expr)+ [","] | ",") - -?test_or_star_expr: test - | star_expr - -?subscriptlist: subscript - | subscript (("," subscript)+ [","] | ",") -> subscript_tuple -?subscript: test | ([test] ":" [test] [sliceop]) -> slice -sliceop: ":" [test] -?exprlist: (expr|star_expr) - | (expr|star_expr) (("," (expr|star_expr))+ [","]|",") -?testlist: test | testlist_tuple -testlist_tuple: test (("," test)+ [","] | ",") -_dict_exprlist: (key_value | "**" expr) ("," (key_value | "**" expr))* [","] - -key_value: test ":" test - -_exprlist: test_or_star_expr ("," test_or_star_expr)* [","] - -classdef: "class" name ["(" [arguments] ")"] ":" suite - - - -arguments: argvalue ("," argvalue)* ("," [ starargs | kwargs])? - | starargs - | kwargs - | comprehension{test} - -starargs: stararg ("," stararg)* ("," argvalue)* ["," kwargs] -stararg: "*" test -kwargs: "**" test ("," argvalue)* - -?argvalue: test ("=" test)? - - -comprehension{comp_result}: comp_result comp_fors [comp_if] -comp_fors: comp_for+ -comp_for: [ASYNC] "for" exprlist "in" or_test -ASYNC: "async" -?comp_if: "if" test_nocond - -// not used in grammar, but may appear in "node" passed from Parser to Compiler -encoding_decl: name - -yield_expr: "yield" [testlist] - | "yield" "from" test -> yield_from - -number: DEC_NUMBER | HEX_NUMBER | BIN_NUMBER | OCT_NUMBER | FLOAT_NUMBER | IMAG_NUMBER -string: STRING | LONG_STRING - -// Other terminals - -_NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ - -%ignore /[\t \f]+/ // WS -%ignore /\\[\t \f]*\r?\n/ // LINE_CONT -%ignore COMMENT -%declare _INDENT _DEDENT - - -// Python terminals - -!name: NAME | "match" | "case" -NAME: /[^\W\d]\w*/ -COMMENT: /#[^\n]*/ - -STRING: /([ubf]?r?|r[ubf])("(?!"").*?(? ported to Python) +// + // Permission is hereby granted, free of charge, to any person + // obtaining a copy of this software and associated documentation + // files (the "Software"), to deal in the Software without + // restriction, including without limitation the rights to use, + // copy, modify, merge, publish, distribute, sublicense, and/or sell + // copies of the Software, and to permit persons to whom the + // Software is furnished to do so, subject to the following + // conditions: +// + // The above copyright notice and this permission notice shall be + // included in all copies or substantial portions of the Software. +// + // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + // OTHER DEALINGS IN THE SOFTWARE. + // + +start: source_elements? + +source_elements: statement+ + +statement: block + | variable_statement + | empty_statement + | expression_statement + | if_statement + | iteration_statement + | continue_statement + | break_statement + | return_statement + | with_statement + | labelled_statement + | switch_statement + | throw_statement + | try_statement + | debugger_statement + +// For now assuming that the block will not be empty. This is to avoid reduce/reduce conflict with empty object +block: OPEN_BRACE statement_list CLOSE_BRACE + +empty_braces: OPEN_BRACE CLOSE_BRACE + +statement_list: statement+ + +variable_decl_type: "var" | "let" | "const" + +variable_statement : variable_decl_type variable_declaration_list + +variable_declaration_list: variable_declaration ( COMMA variable_declaration )* + +variable_declaration : IDENTIFIER initialiser? + +initialiser : ASSIGN single_expression + +empty_statement : EOS + +expression_statement: expression_sequence + +if_statement: "if" OPEN_PAREN expression_sequence CLOSE_PAREN statement ( "else" statement )? + +iteration_statement: "do" statement "while" OPEN_PAREN expression_sequence CLOSE_PAREN -> do_statement + | "while" OPEN_PAREN expression_sequence CLOSE_PAREN statement -> while_statement + | "for" OPEN_PAREN expression_sequence? EOS expression_sequence? EOS expression_sequence? CLOSE_PAREN statement -> for_statement + | "for" OPEN_PAREN variable_decl_type variable_declaration_list EOS expression_sequence? EOS expression_sequence? CLOSE_PAREN statement -> for_var_statement + | "for" OPEN_PAREN single_expression "in" expression_sequence CLOSE_PAREN statement -> for_in_statement + | "for" OPEN_PAREN variable_decl_type variable_declaration "in" expression_sequence CLOSE_PAREN statement -> for_var_in_statement + +continue_statement : "continue" IDENTIFIER? + +break_statement : "break" IDENTIFIER? + +return_statement : "return" (expression_sequence)? + +with_statement: "with" OPEN_PAREN expression_sequence CLOSE_PAREN statement + +switch_statement : "switch" OPEN_PAREN expression_sequence CLOSE_PAREN case_block + +case_block : OPEN_BRACE case_clauses? ( default_clause case_clauses? )? CLOSE_BRACE + +case_clauses: case_clause+ + +case_clause: "case" expression_sequence COLON statement_list? + +default_clause : "default" COLON statement_list? + +labelled_statement : IDENTIFIER COLON statement + +throw_statement : "throw" expression_sequence + +try_statement : "try" block (catch_production | finally_production | (catch_production finally_production)) + +catch_production: "catch" OPEN_PAREN IDENTIFIER CLOSE_PAREN block + +finally_production: "finally" block + +debugger_statement: "debugger" + +function_declaration : "function" IDENTIFIER OPEN_PAREN formal_parameter_list? CLOSE_PAREN OPEN_BRACE function_body CLOSE_BRACE + +formal_parameter_list: IDENTIFIER ( COMMA IDENTIFIER )* + +function_body: source_elements? + +class_declaration : "class" IDENTIFIER OPEN_BRACE class_body CLOSE_BRACE + +class_body : (method_declaration | property_assignment )* + +method_declaration : IDENTIFIER OPEN_PAREN formal_parameter_list? CLOSE_PAREN OPEN_BRACE function_body CLOSE_BRACE + +array_literal: OPEN_BRACKET element_list? COMMA? elision? CLOSE_BRACKET + +element_list: elision? single_expression ( COMMA elision? single_expression )* + +elision: COMMA+ + +object_literal: empty_braces + | OPEN_BRACE property_name_and_value_list COMMA? CLOSE_BRACE + +property_name_and_value_list: property_assignment ( COMMA property_assignment )* + +property_assignment: property_name COLON single_expression -> property_expression_assignment + | "get" property_name OPEN_PAREN CLOSE_PAREN OPEN_BRACE function_body CLOSE_BRACE -> property_getter + | "set" property_name OPEN_PAREN property_set_parameter_list CLOSE_PAREN OPEN_BRACE function_body CLOSE_BRACE -> property_setter + +property_name: IDENTIFIER | STRING | numeric_literal + +property_set_parameter_list: IDENTIFIER + +arguments: OPEN_PAREN argument_list? CLOSE_PAREN + +argument_list: single_expression ( COMMA single_expression )* + +expression_sequence : single_expression ( COMMA single_expression )* + +identifier_expression : IDENTIFIER + +single_expression : function_declaration + | class_declaration + | single_expression OPEN_BRACKET expression_sequence CLOSE_BRACKET -> member_index_expression + | single_expression DOT identifier_expression -> member_dot_expression + | single_expression arguments -> arguments_expression + | single_expression PLUS_PLUS -> post_increment_expression + | single_expression MINUS_MINUS -> post_decrease_expression + | ("new" | "delete" | "void" | "typeof" | PLUS_PLUS | MINUS_MINUS | PLUS | MINUS | BIT_NOT | NOT) single_expression -> unary_expression + | single_expression ( MULTIPLY | DIVIDE | MODULUS ) single_expression -> multiplicative_expression + | single_expression ( PLUS | MINUS ) single_expression -> additive_expression + | single_expression ( LEFT_SHIFT_ARITHMETIC | RIGHT_SHIFT_ARITHMETIC | RIGHT_SHIFT_LOGICAL ) single_expression -> bit_shift_expression + | single_expression ( LESS_THAN | GREATER_THAN | LESS_THAN_EQUALS | GREATER_THAN_EQUALS ) single_expression -> relational_expression + | single_expression "instanceof" single_expression -> instanceof_expression + | single_expression "in" single_expression -> in_expression + | single_expression ( EQUALS | NOT_EQUALS | IDENTITY_EQUALS | IDENTIFY_NOT_EQUALS ) single_expression -> equality_expression + | single_expression BIT_AND single_expression -> bit_and_expression + | single_expression BIT_XOR single_expression -> bit_xor_expression + | single_expression BIT_OR single_expression -> bit_or_expression + | single_expression AND single_expression -> logical_and_expression + | single_expression OR single_expression -> logical_or_expression + | single_expression QUESTIONMARK single_expression COLON single_expression -> ternary_expression + | single_expression ASSIGN expression_sequence -> assignment_expression + | single_expression assignment_operator expression_sequence -> assignment_operator_expression + | "this" -> this_expression + | identifier_expression + | literal -> literal_expression + | array_literal -> array_literal_expression + | object_literal -> object_literal_expression + | "(" expression_sequence ")" -> parenthesized_expression + +assignment_operator : MULTIPLY_ASSIGN + | DIVIDE_ASSIGN + | MODULUS_ASSIGN + | PLUS_ASSIGN + | MINUS_ASSIGN + | LEFT_SHIFT_ARITHMETIC_ASSIGN + | RIGHT_SHIFT_ARITHMETIC_ASSIGN + | RIGHT_SHIFT_LOGICAL_ASSIGN + | BIT_AND_ASSIGN + | BIT_XOR_ASSIGN + | BIT_OR_ASSIGN + +literal: NULL_LITERAL | BOOLEAN_LITERAL | STRING | numeric_literal + +numeric_literal: DEC_NUMBER | HEX_NUMBER | OCT_NUMBER | FLOAT_NUMBER + +OPEN_BRACKET : "[" +CLOSE_BRACKET : "]" +OPEN_PAREN : "(" +CLOSE_PAREN : ")" +OPEN_BRACE : "{" +CLOSE_BRACE : "}" +SEMICOLON : ";" +COMMA : "," +ASSIGN : "=" +QUESTIONMARK : "?" +COLON : ":" +DOT : "." +PLUS_PLUS : "++" +MINUS_MINUS : "--" +PLUS : "+" +MINUS : "-" +BIT_NOT : "~" +NOT : "!" +MULTIPLY : "*" +DIVIDE : "/" +MODULUS : "%" +RIGHT_SHIFT_ARITHMETIC : ">>" +LEFT_SHIFT_ARITHMETIC : "<<" +RIGHT_SHIFT_LOGICAL : ">>>" +LESS_THAN : "<" +GREATER_THAN : ">" +LESS_THAN_EQUALS : "<=" +GREATER_THAN_EQUALS : ">=" +EQUALS : "==" +NOT_EQUALS : "!=" +IDENTITY_EQUALS : "===" +IDENTIFY_NOT_EQUALS : "!==" +BIT_AND : "&" +BIT_XOR : "^" +BIT_OR : "|" +AND : "&&" +OR : "||" +MULTIPLY_ASSIGN : "*=" +DIVIDE_ASSIGN : "/=" +MODULUS_ASSIGN : "%=" +PLUS_ASSIGN : "+=" +MINUS_ASSIGN : "-=" +LEFT_SHIFT_ARITHMETIC_ASSIGN : "<<=" +RIGHT_SHIFT_ARITHMETIC_ASSIGN : ">>=" +RIGHT_SHIFT_LOGICAL_ASSIGN : ">>>=" +BIT_AND_ASSIGN : "&=" +BIT_XOR_ASSIGN : "^=" +BIT_OR_ASSIGN : "|=" + +NULL_LITERAL: "null" +BOOLEAN_LITERAL: "true" | "false" + +DEC_NUMBER: /0|[1-9]\d*/i +HEX_NUMBER: /0x[\da-f]*/i +OCT_NUMBER: /0o[0-7]*/i +BIN_NUMBER : /0b[0-1]*/i +FLOAT_NUMBER: /((\d+\.\d*|\.\d+)(e[-+]?\d+)?|\d+(e[-+]?\d+))/i +NEWLINE: ( /\r?\n[\t ]*/ )+ +EOS: (NEWLINE+ | SEMICOLON) +STRING: /`.*?`/ | /".*?"/i | /'.*?'/i + +IDENTIFIER: NAME +NAME: /[a-zA-Z_]\w*/ +COMMENT: /\/\/.*/ | /\/\*.*?\*\//s + +%ignore /[\r\u2028\u2029]/ // LINETERMINATOR +%ignore /[\t \f]+/ // WS +%ignore COMMENT diff --git a/tests/test_grammar_javascript.py b/tests/test_grammar_javascript.py new file mode 100644 index 00000000..3276f6ca --- /dev/null +++ b/tests/test_grammar_javascript.py @@ -0,0 +1,29 @@ +import unittest +import sys +import os +sys.path.append(os.path.dirname(os.path.realpath(__file__)) + '/../') +from syncode.parsers import create_parser +from syncode.parsers.grammars.grammar import Grammar +from syncode.parse_result import AcceptSequence, RemainderState + +javascript_grammar = Grammar('javascript') +inc_parser = create_parser(javascript_grammar) + +# Note: If there is no trailing whitespace in the partial code, +# then the current terminal appears in the accept sequence, +# and the remainder state is MAYBE_COMPLETE. + +class TestJavaScriptParser(unittest.TestCase): + def test_java_parser1(self): + inc_parser.reset() + code = """// Declare an array of names +const names = ["John", "Mary", "Jane"]; + +// Iterate over the array using a for loop +for (let i = 0; i < names.length; i++) { + // Print the current name to the console + console.log(names[i]); +} +""" + out = inc_parser.base_parser.parse(code) + print(out.pretty())