diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index d98646febf69c1..bc5d291065c335 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -39,3 +39,7 @@ d4e24021d39e1f80f0055b55d91f8d5f22e15084 e90282be7ba1bc8e3119f6e1a2c80356ceb3f80a 26a9e0b4e31f7b5a9cbd755e0a15823a8fa51bae 2f53985da9ee593fe524d408256835667938c7d7 + +# Win32: EOL code of batch files +23f9a0d655c4d405bb2397a147a1523436205486 +b839989fd22fef85e2af19de1bc83aa72a5b22bd diff --git a/.gitattributes b/.gitattributes index 6ac6e6fcc3f579..f98c091e3f0464 100644 --- a/.gitattributes +++ b/.gitattributes @@ -7,3 +7,8 @@ tool/update-deps diff=ruby tool/make-snapshot diff=ruby tool/format-release diff=ruby tool/leaked-globals diff=ruby + +# To strip CR from the batch files, set the `diff.dos.textconv` filter +# like as `git config diff.dos.textconv $'sed \'s/\r$//\''`. +*.bat diff=dos +*.cmd diff=dos diff --git a/.github/actions/setup/directories/action.yml b/.github/actions/setup/directories/action.yml index 99d1fc0151cc7d..4f71ee592a6607 100644 --- a/.github/actions/setup/directories/action.yml +++ b/.github/actions/setup/directories/action.yml @@ -100,7 +100,7 @@ runs: path: ${{ inputs.srcdir }} fetch-depth: ${{ inputs.fetch-depth }} - - uses: actions/cache@8b402f58fbc84540c8b491a91e594a4576fec3d7 # v5.0.2 + - uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: ${{ inputs.srcdir }}/.downloaded-cache key: ${{ runner.os }}-${{ runner.arch }}-downloaded-cache diff --git a/.github/workflows/spec_guards.yml b/.github/workflows/spec_guards.yml index 856d6f61eb9303..0a5104f1419298 100644 --- a/.github/workflows/spec_guards.yml +++ b/.github/workflows/spec_guards.yml @@ -39,7 +39,6 @@ jobs: # Specs from ruby/spec should still run on all supported Ruby versions. # This also ensures the needed ruby_version_is guards are there, see spec/README.md. ruby: - - ruby-3.2 - ruby-3.3 - ruby-3.4 - ruby-4.0 diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index f997ed56d5f269..f9d1335d464016 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -88,7 +88,7 @@ jobs: - name: Restore vcpkg artifact id: restore-vcpkg - uses: actions/cache/restore@8b402f58fbc84540c8b491a91e594a4576fec3d7 # v5.0.2 + uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: src\vcpkg_installed key: windows-${{ matrix.os }}-vcpkg-${{ hashFiles('src/vcpkg.json') }} @@ -100,7 +100,7 @@ jobs: if: ${{ ! steps.restore-vcpkg.outputs.cache-hit }} - name: Save vcpkg artifact - uses: actions/cache/save@8b402f58fbc84540c8b491a91e594a4576fec3d7 # v5.0.2 + uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: src\vcpkg_installed key: windows-${{ matrix.os }}-vcpkg-${{ hashFiles('src/vcpkg.json') }} diff --git a/NEWS.md b/NEWS.md index 6f8ac408e52e4b..49eec73f4b492c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -46,7 +46,8 @@ releases. * RubyGems 4.1.0.dev * bundler 4.1.0.dev -* prism 1.8.0 +* prism 1.9.0 +* resolv 0.7.1 * stringio 3.2.1.dev * strscan 3.1.7.dev * syntax_suggest 2.0.3 @@ -62,6 +63,7 @@ releases. * debug 1.11.1 * mutex_m 0.3.0 * resolv-replace 0.2.0 +* syslog 0.4.0 * rdoc 7.1.0 ### RubyGems and Bundler diff --git a/array.c b/array.c index b4718238763bab..4a4c44562d28bf 100644 --- a/array.c +++ b/array.c @@ -387,13 +387,14 @@ rb_ary_make_embedded(VALUE ary) if (!ARY_EMBED_P(ary)) { const VALUE *buf = ARY_HEAP_PTR(ary); long len = ARY_HEAP_LEN(ary); + long capa = ARY_HEAP_CAPA(ary); FL_SET_EMBED(ary); ARY_SET_EMBED_LEN(ary, len); MEMCPY((void *)ARY_EMBED_PTR(ary), (void *)buf, VALUE, len); - ary_heap_free_ptr(ary, buf, len * sizeof(VALUE)); + ary_heap_free_ptr(ary, buf, capa * sizeof(VALUE)); } } @@ -428,7 +429,7 @@ ary_resize_capa(VALUE ary, long capacity) if (len > capacity) len = capacity; MEMCPY((VALUE *)RARRAY(ary)->as.ary, ptr, VALUE, len); - ary_heap_free_ptr(ary, ptr, old_capa); + ary_heap_free_ptr(ary, ptr, old_capa * sizeof(VALUE)); FL_SET_EMBED(ary); ARY_SET_LEN(ary, len); @@ -8423,12 +8424,12 @@ rb_ary_deconstruct(VALUE ary) * * [1, 'one', :one, [2, 'two', :two]] * - * - A {%w or %W string-array Literal}[rdoc-ref:syntax/literals.rdoc@25w+and+-25W-3A+String-Array+Literals]: + * - A {%w or %W string-array Literal}[rdoc-ref:syntax/literals.rdoc@w-and-w-String-Array-Literals]: * * %w[foo bar baz] # => ["foo", "bar", "baz"] * %w[1 % *] # => ["1", "%", "*"] * - * - A {%i or %I symbol-array Literal}[rdoc-ref:syntax/literals.rdoc@25i+and+-25I-3A+Symbol-Array+Literals]: + * - A {%i or %I symbol-array Literal}[rdoc-ref:syntax/literals.rdoc@i+and-I-Symbol-Array+Literals]: * * %i[foo bar baz] # => [:foo, :bar, :baz] * %i[1 % *] # => [:"1", :%, :*] @@ -8690,8 +8691,8 @@ rb_ary_deconstruct(VALUE ary) * * First, what's elsewhere. Class \Array: * - * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. - * - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here], + * - Inherits from {class Object}[rdoc-ref:Object@Whats-Here]. + * - Includes {module Enumerable}[rdoc-ref:Enumerable@Whats-Here], * which provides dozens of additional methods. * * Here, class \Array provides methods that are useful for: diff --git a/bootstraptest/test_ractor.rb b/bootstraptest/test_ractor.rb index e2a3e8dd5beff1..ea8eb6535f40c9 100644 --- a/bootstraptest/test_ractor.rb +++ b/bootstraptest/test_ractor.rb @@ -986,6 +986,43 @@ def self.fstr = @fstr a + b + c + d + e + f } +assert_equal <<-output.chomp, %q{ + from Hash default proc + from instance variable @ivar of an instance of Foo + from block's self (an instance of Foo) + from Hash default proc + from instance variable @ivar of an instance of Foo + from member :foo of an instance of Bar +output + class Foo + def initialize + @ivar = Hash.new { |h, k| h[k] = [] } # the default proc holds self, an instance of Foo + end + def inspect = "#" + end + + Bar = Data.define(:foo) + + begin + Ractor.make_shareable(Bar.new(Foo.new)) + rescue Ractor::Error + $!.to_s.lines[1..].join + end +} + +assert_equal '[true, true]', %q{ + class Foo + undef_method :freeze + end + + begin + Ractor.make_shareable Foo.new + rescue Ractor::Error + cause = $!.cause + [cause.class == NoMethodError, cause.name == :freeze] + end +} + assert_equal '["instance-variable", "instance-variable", nil]', %q{ class C @iv1 = "" diff --git a/complex.c b/complex.c index 85d724f273b3ea..1ba786a5bb8703 100644 --- a/complex.c +++ b/complex.c @@ -2645,9 +2645,9 @@ float_arg(VALUE self) * First, what's elsewhere: * * - Class \Complex inherits (directly or indirectly) - * from classes {Numeric}[rdoc-ref:Numeric@What-27s+Here] - * and {Object}[rdoc-ref:Object@What-27s+Here]. - * - Includes (indirectly) module {Comparable}[rdoc-ref:Comparable@What-27s+Here]. + * from classes {Numeric}[rdoc-ref:Numeric@Whats-Here] + * and {Object}[rdoc-ref:Object@Whats-Here]. + * - Includes (indirectly) module {Comparable}[rdoc-ref:Comparable@Whats-Here]. * * Here, class \Complex has methods for: * diff --git a/configure.ac b/configure.ac index 4e7367804d3d6b..2d9ccf1442920e 100644 --- a/configure.ac +++ b/configure.ac @@ -743,6 +743,12 @@ AS_CASE(["$GCC:${warnflags+set}:${extra_warnflags:+set}:"], AS_CASE([ $CFLAGS ], [*" -save-temps="*|*" -save-temps "*], [], [ extra_warnflags="$extra_warnflags -Werror=misleading-indentation" ]) + AS_CASE([$target_os], [mingw*], [ + # 64bit Windows is IL32P64; shorten-64-to-32 causes tons of warnigs + extra_warnflags="$extra_warnflags -Wno-shorten-64-to-32" + ], [ + extra_warnflags="$extra_warnflags -Werror=shorten-64-to-32" + ]) # ICC doesn't support -Werror= AS_IF([test $icc_version -gt 0], [ @@ -756,7 +762,6 @@ AS_CASE(["$GCC:${warnflags+set}:${extra_warnflags:+set}:"], -Werror=implicit-function-declaration \ -Werror=implicit-int \ -Werror=pointer-arith \ - -Werror=shorten-64-to-32 \ -Werror=write-strings \ -Werror=old-style-definition \ -Wimplicit-fallthrough=0 \ diff --git a/dir.rb b/dir.rb index eb1a408ee3ac5b..9b83f688227d6c 100644 --- a/dir.rb +++ b/dir.rb @@ -31,7 +31,7 @@ # A \Dir object is in some ways array-like: # # - It has instance methods #children, #each, and #each_child. -# - It includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here]. +# - It includes {module Enumerable}[rdoc-ref:Enumerable@Whats+Here]. # # == \Dir As Stream-Like # @@ -85,8 +85,8 @@ # # First, what's elsewhere. Class \Dir: # -# - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. -# - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here], +# - Inherits from {class Object}[rdoc-ref:Object@Whats+Here]. +# - Includes {module Enumerable}[rdoc-ref:Enumerable@Whats+Here], # which provides dozens of additional methods. # # Here, class \Dir provides methods that are useful for: diff --git a/doc/float.rb b/doc/float.rb index 01668bfc6dacf1..f9068dfb1b82cc 100644 --- a/doc/float.rb +++ b/doc/float.rb @@ -72,9 +72,9 @@ # First, what's elsewhere. Class \Float: # # - Inherits from -# {class Numeric}[rdoc-ref:Numeric@What-27s+Here] -# and {class Object}[rdoc-ref:Object@What-27s+Here]. -# - Includes {module Comparable}[rdoc-ref:Comparable@What-27s+Here]. +# {class Numeric}[rdoc-ref:Numeric@Whats+Here] +# and {class Object}[rdoc-ref:Object@Whats+Here]. +# - Includes {module Comparable}[rdoc-ref:Comparable@Whats+Here]. # # Here, class \Float provides methods for: # diff --git a/doc/string.rb b/doc/string.rb index 304ab60c298967..e2dfb37c9fa5eb 100644 --- a/doc/string.rb +++ b/doc/string.rb @@ -163,8 +163,8 @@ # # First, what's elsewhere. Class +String+: # -# - Inherits from the {Object class}[rdoc-ref:Object@What-27s+Here]. -# - Includes the {Comparable module}[rdoc-ref:Comparable@What-27s+Here]. +# - Inherits from the {Object class}[rdoc-ref:Object@Whats+Here]. +# - Includes the {Comparable module}[rdoc-ref:Comparable@Whats+Here]. # # Here, class +String+ provides methods that are useful for: # diff --git a/eval.c b/eval.c index deadd5dd6414fb..fd370a43ccd2f2 100644 --- a/eval.c +++ b/eval.c @@ -329,17 +329,24 @@ ruby_exec_node(void *n) /* * call-seq: - * Module.nesting -> array - * - * Returns the list of +Modules+ nested at the point of call. + * Module.nesting -> array + * + * Returns nested module as an array of Module objects: + * + * module M0 + * def self.speak = Module.nesting + * module M1 + * def self.speak = Module.nesting + * module M2 + * def self.speak = Module.nesting + * end + * end + * end + * M0.speak # => [M0] + * M0.speak.first.class # => Module + * M0::M1.speak # => [M0::M1, M0] + * M0::M1::M2.speak # => [M0::M1::M2, M0::M1, M0] * - * module M1 - * module M2 - * $a = Module.nesting - * end - * end - * $a #=> [M1::M2, M1] - * $a[0].name #=> "M1::M2" */ static VALUE diff --git a/ext/-test-/string/cstr.c b/ext/-test-/string/cstr.c index b0b1ef5374a46b..931220b46bdc40 100644 --- a/ext/-test-/string/cstr.c +++ b/ext/-test-/string/cstr.c @@ -111,9 +111,10 @@ bug_str_s_cstr_noembed(VALUE self, VALUE str) FL_SET((str2), STR_NOEMBED); memcpy(buf, RSTRING_PTR(str), capacity); RBASIC(str2)->flags &= ~(STR_SHARED | FL_USER5 | FL_USER6); - RSTRING(str2)->as.heap.aux.capa = capacity; + RSTRING(str2)->as.heap.aux.capa = RSTRING_LEN(str); RSTRING(str2)->as.heap.ptr = buf; RSTRING(str2)->len = RSTRING_LEN(str); + TERM_FILL(RSTRING_END(str2), TERM_LEN(str)); return str2; } diff --git a/ext/coverage/coverage.c b/ext/coverage/coverage.c index 93acdb24806f78..41f33f4fb8c9f3 100644 --- a/ext/coverage/coverage.c +++ b/ext/coverage/coverage.c @@ -70,7 +70,7 @@ rb_coverage_supported(VALUE self, VALUE _mode) * If +lines+ is enabled, +oneshot_lines+ cannot be enabled. * See {Lines Coverage}[rdoc-ref:Coverage@Lines+Coverage]. * - +branches+: Enables branch coverage that records the number of times each - * branch in each conditional was executed. See {Branches Coverage}[rdoc-ref:Coverage@Branch+Coverage]. + * branch in each conditional was executed. See {Branches Coverage}[rdoc-ref:Coverage@Branches+Coverage]. * - +methods+: Enables method coverage that records the number of times each method was exectued. * See {Methods Coverage}[rdoc-ref:Coverage@Methods+Coverage]. * - +eval+: Enables coverage for evaluations (e.g. Kernel#eval, Module#class_eval). diff --git a/file.c b/file.c index 8f4e9d86c8241c..706b60c9997919 100644 --- a/file.c +++ b/file.c @@ -7507,7 +7507,7 @@ const char ruby_null_device[] = * * First, what's elsewhere. Class \File: * - * - Inherits from {class IO}[rdoc-ref:IO@What-27s+Here], + * - Inherits from {class IO}[rdoc-ref:IO@Whats+Here], * in particular, methods for creating, reading, and writing files * - Includes module FileTest, * which provides dozens of additional methods. diff --git a/gc.c b/gc.c index ab0539cd3358a3..407541b309abb8 100644 --- a/gc.c +++ b/gc.c @@ -596,6 +596,7 @@ rb_gc_guarded_ptr_val(volatile VALUE *ptr, VALUE val) #endif static const char *obj_type_name(VALUE obj); +static st_table *id2ref_tbl; #include "gc/default/default.c" #if USE_MODULAR_GC && !defined(HAVE_DLOPEN) @@ -1013,9 +1014,7 @@ newobj_of(rb_ractor_t *cr, VALUE klass, VALUE flags, shape_id_t shape_id, bool w int lev = RB_GC_VM_LOCK_NO_BARRIER(); { size_t slot_size = rb_gc_obj_slot_size(obj); - if (slot_size > RVALUE_SIZE) { - memset((char *)obj + RVALUE_SIZE, 0, slot_size - RVALUE_SIZE); - } + memset((char *)obj + sizeof(struct RBasic), 0, slot_size - sizeof(struct RBasic)); /* We must disable GC here because the callback could call xmalloc * which could potentially trigger a GC, and a lot of code is unsafe @@ -1162,17 +1161,19 @@ rb_objspace_data_type_memsize(VALUE obj) { size_t size = 0; if (RTYPEDDATA_P(obj)) { - const rb_data_type_t *type = RTYPEDDATA_TYPE(obj); const void *ptr = RTYPEDDATA_GET_DATA(obj); - if (RTYPEDDATA_EMBEDDABLE_P(obj) && !RTYPEDDATA_EMBEDDED_P(obj)) { + if (ptr) { + const rb_data_type_t *type = RTYPEDDATA_TYPE(obj); + if (RTYPEDDATA_EMBEDDABLE_P(obj) && !RTYPEDDATA_EMBEDDED_P(obj)) { #ifdef HAVE_MALLOC_USABLE_SIZE - size += malloc_usable_size((void *)ptr); + size += malloc_usable_size((void *)ptr); #endif - } + } - if (ptr && type->function.dsize) { - size += type->function.dsize(ptr); + if (type->function.dsize) { + size += type->function.dsize(ptr); + } } } @@ -1242,6 +1243,113 @@ rb_gc_handle_weak_references(VALUE obj) } } +/* + * Returns true if the object requires a full rb_gc_obj_free() call during sweep, + * false if it can be freed quickly without calling destructors or cleanup. + * + * Objects that return false are: + * - Simple embedded objects without external allocations + * - Objects without finalizers + * - Objects without object IDs registered in id2ref + * - Objects without generic instance variables + * + * This is used by the GC sweep fast path to avoid function call overhead + * for the majority of simple objects. + */ +bool +rb_gc_obj_needs_cleanup_p(VALUE obj) +{ + VALUE flags = RBASIC(obj)->flags; + + if (flags & FL_FINALIZE) return true; + + switch (flags & RUBY_T_MASK) { + case T_IMEMO: + switch (imemo_type(obj)) { + case imemo_constcache: + case imemo_cref: + case imemo_ifunc: + case imemo_memo: + case imemo_svar: + case imemo_throw_data: + return false; + default: + return true; + } + + case T_DATA: + case T_OBJECT: + case T_STRING: + case T_ARRAY: + case T_HASH: + case T_BIGNUM: + case T_STRUCT: + case T_FLOAT: + case T_RATIONAL: + case T_COMPLEX: + break; + + case T_FILE: + case T_SYMBOL: + case T_CLASS: + case T_ICLASS: + case T_MODULE: + case T_REGEXP: + case T_MATCH: + return true; + } + + shape_id_t shape_id = RBASIC_SHAPE_ID(obj); + if (id2ref_tbl && rb_shape_has_object_id(shape_id)) return true; + + switch (flags & RUBY_T_MASK) { + case T_OBJECT: + if (flags & ROBJECT_HEAP) return true; + return false; + + case T_DATA: + if (flags & RUBY_TYPED_FL_IS_TYPED_DATA) { + uintptr_t type = (uintptr_t)RTYPEDDATA(obj)->type; + if (type & TYPED_DATA_EMBEDDED) { + RUBY_DATA_FUNC dfree = ((const rb_data_type_t *)(type & TYPED_DATA_PTR_MASK))->function.dfree; + if (dfree == RUBY_NEVER_FREE || dfree == RUBY_TYPED_DEFAULT_FREE) { + return false; + } + } + } + return true; + + case T_STRING: + if (flags & (RSTRING_NOEMBED | RSTRING_FSTR)) return true; + return rb_shape_has_fields(shape_id); + + case T_ARRAY: + if (!(flags & RARRAY_EMBED_FLAG)) return true; + return rb_shape_has_fields(shape_id); + + case T_HASH: + if (flags & RHASH_ST_TABLE_FLAG) return true; + return rb_shape_has_fields(shape_id); + + case T_BIGNUM: + if (!(flags & BIGNUM_EMBED_FLAG)) return true; + return rb_shape_has_fields(shape_id); + + case T_STRUCT: + if (!(flags & RSTRUCT_EMBED_LEN_MASK)) return true; + if (flags & RSTRUCT_GEN_FIELDS) return rb_shape_has_fields(shape_id); + return false; + + case T_FLOAT: + case T_RATIONAL: + case T_COMPLEX: + return rb_shape_has_fields(shape_id); + + default: + UNREACHABLE_RETURN(true); + } +} + static void io_fptr_finalize(void *fptr) { @@ -1831,7 +1939,6 @@ rb_gc_pointer_to_heap_p(VALUE obj) #define OBJ_ID_INCREMENT (RUBY_IMMEDIATE_MASK + 1) #define LAST_OBJECT_ID() (object_id_counter * OBJ_ID_INCREMENT) static VALUE id2ref_value = 0; -static st_table *id2ref_tbl = NULL; #if SIZEOF_SIZE_T == SIZEOF_LONG_LONG static size_t object_id_counter = 1; diff --git a/gc/default/default.c b/gc/default/default.c index 013c0749946e2d..aaf6f56092b360 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -301,9 +301,24 @@ int ruby_rgengc_debug; #ifndef GC_ENABLE_LAZY_SWEEP # define GC_ENABLE_LAZY_SWEEP 1 #endif + +#ifndef VERIFY_FREE_SIZE +#if RUBY_DEBUG +#define VERIFY_FREE_SIZE 1 +#else +#define VERIFY_FREE_SIZE 0 +#endif +#endif + +#if VERIFY_FREE_SIZE +#undef CALC_EXACT_MALLOC_SIZE +#define CALC_EXACT_MALLOC_SIZE 1 +#endif + #ifndef CALC_EXACT_MALLOC_SIZE # define CALC_EXACT_MALLOC_SIZE 0 #endif + #if defined(HAVE_MALLOC_USABLE_SIZE) || CALC_EXACT_MALLOC_SIZE > 0 # ifndef MALLOC_ALLOCATED_SIZE # define MALLOC_ALLOCATED_SIZE 0 @@ -843,6 +858,7 @@ heap_page_in_global_empty_pages_pool(rb_objspace_t *objspace, struct heap_page * #define GET_HEAP_WB_UNPROTECTED_BITS(x) (&GET_HEAP_PAGE(x)->wb_unprotected_bits[0]) #define GET_HEAP_MARKING_BITS(x) (&GET_HEAP_PAGE(x)->marking_bits[0]) + #define RVALUE_AGE_BITMAP_INDEX(n) (NUM_IN_PAGE(n) / (BITS_BITLENGTH / RVALUE_AGE_BIT_COUNT)) #define RVALUE_AGE_BITMAP_OFFSET(n) ((NUM_IN_PAGE(n) % (BITS_BITLENGTH / RVALUE_AGE_BIT_COUNT)) * RVALUE_AGE_BIT_COUNT) @@ -3481,15 +3497,34 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit rb_asan_unpoison_object(vp, false); if (bitset & 1) { switch (BUILTIN_TYPE(vp)) { - default: /* majority case */ - gc_report(2, objspace, "page_sweep: free %p\n", (void *)p); + case T_MOVED: + if (objspace->flags.during_compacting) { + /* The sweep cursor shouldn't have made it to any + * T_MOVED slots while the compact flag is enabled. + * The sweep cursor and compact cursor move in + * opposite directions, and when they meet references will + * get updated and "during_compacting" should get disabled */ + rb_bug("T_MOVED shouldn't be seen until compaction is finished"); + } + gc_report(3, objspace, "page_sweep: %s is added to freelist\n", rb_obj_info(vp)); + ctx->empty_slots++; + RVALUE_AGE_SET_BITMAP(vp, 0); + heap_page_add_freeobj(objspace, sweep_page, vp); + break; + case T_ZOMBIE: + /* already counted */ + break; + case T_NONE: + ctx->empty_slots++; /* already freed */ + break; + + default: #if RGENGC_CHECK_MODE if (!is_full_marking(objspace)) { if (RVALUE_OLD_P(objspace, vp)) rb_bug("page_sweep: %p - old while minor GC.", (void *)p); if (RVALUE_REMEMBERED(objspace, vp)) rb_bug("page_sweep: %p - remembered.", (void *)p); } #endif - if (RVALUE_WB_UNPROTECTED(objspace, vp)) CLEAR_IN_BITMAP(GET_HEAP_WB_UNPROTECTED_BITS(vp), vp); #if RGENGC_CHECK_MODE @@ -3501,42 +3536,34 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit #undef CHECK #endif - rb_gc_event_hook(vp, RUBY_INTERNAL_EVENT_FREEOBJ); + if (!rb_gc_obj_needs_cleanup_p(vp)) { + if (RB_UNLIKELY(objspace->hook_events & RUBY_INTERNAL_EVENT_FREEOBJ)) { + rb_gc_event_hook(vp, RUBY_INTERNAL_EVENT_FREEOBJ); + } - rb_gc_obj_free_vm_weak_references(vp); - if (rb_gc_obj_free(objspace, vp)) { - // always add free slots back to the swept pages freelist, - // so that if we're compacting, we can re-use the slots (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)p, BASE_SLOT_SIZE); RVALUE_AGE_SET_BITMAP(vp, 0); heap_page_add_freeobj(objspace, sweep_page, vp); - gc_report(3, objspace, "page_sweep: %s is added to freelist\n", rb_obj_info(vp)); + gc_report(3, objspace, "page_sweep: %s (fast path) added to freelist\n", rb_obj_info(vp)); ctx->freed_slots++; } else { - ctx->final_slots++; - } - break; + gc_report(2, objspace, "page_sweep: free %p\n", (void *)p); - case T_MOVED: - if (objspace->flags.during_compacting) { - /* The sweep cursor shouldn't have made it to any - * T_MOVED slots while the compact flag is enabled. - * The sweep cursor and compact cursor move in - * opposite directions, and when they meet references will - * get updated and "during_compacting" should get disabled */ - rb_bug("T_MOVED shouldn't be seen until compaction is finished"); + rb_gc_event_hook(vp, RUBY_INTERNAL_EVENT_FREEOBJ); + + rb_gc_obj_free_vm_weak_references(vp); + if (rb_gc_obj_free(objspace, vp)) { + (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)p, BASE_SLOT_SIZE); + RVALUE_AGE_SET_BITMAP(vp, 0); + heap_page_add_freeobj(objspace, sweep_page, vp); + gc_report(3, objspace, "page_sweep: %s is added to freelist\n", rb_obj_info(vp)); + ctx->freed_slots++; + } + else { + ctx->final_slots++; + } } - gc_report(3, objspace, "page_sweep: %s is added to freelist\n", rb_obj_info(vp)); - ctx->empty_slots++; - RVALUE_AGE_SET_BITMAP(vp, 0); - heap_page_add_freeobj(objspace, sweep_page, vp); - break; - case T_ZOMBIE: - /* already counted */ - break; - case T_NONE: - ctx->empty_slots++; /* already freed */ break; } } @@ -7444,7 +7471,6 @@ enum gc_stat_sym { gc_stat_sym_oldmalloc_increase_bytes, gc_stat_sym_oldmalloc_increase_bytes_limit, #endif - gc_stat_sym_weak_references_count, #if RGENGC_PROFILE gc_stat_sym_total_generated_normal_object_count, gc_stat_sym_total_generated_shady_object_count, @@ -7495,7 +7521,6 @@ setup_gc_stat_symbols(void) S(oldmalloc_increase_bytes); S(oldmalloc_increase_bytes_limit); #endif - S(weak_references_count); #if RGENGC_PROFILE S(total_generated_normal_object_count); S(total_generated_shady_object_count); @@ -8243,6 +8268,11 @@ rb_gc_impl_free(void *objspace_ptr, void *ptr, size_t old_size) } #if CALC_EXACT_MALLOC_SIZE struct malloc_obj_info *info = (struct malloc_obj_info *)ptr - 1; +#if VERIFY_FREE_SIZE + if (old_size && (old_size + sizeof(struct malloc_obj_info)) != info->size) { + rb_bug("buffer %p freed with size %lu, but was allocated with size %lu", ptr, old_size, info->size - sizeof(struct malloc_obj_info)); + } +#endif ptr = info; old_size = info->size; #endif diff --git a/gc/gc.h b/gc/gc.h index 097ddb93949a0b..5979b4a00193e2 100644 --- a/gc/gc.h +++ b/gc/gc.h @@ -100,6 +100,7 @@ MODULAR_GC_FN void rb_gc_after_updating_jit_code(void); MODULAR_GC_FN bool rb_gc_obj_shareable_p(VALUE); MODULAR_GC_FN void rb_gc_rp(VALUE); MODULAR_GC_FN void rb_gc_handle_weak_references(VALUE obj); +MODULAR_GC_FN bool rb_gc_obj_needs_cleanup_p(VALUE obj); #if USE_MODULAR_GC MODULAR_GC_FN bool rb_gc_event_hook_required_p(rb_event_flag_t event); diff --git a/gc/mmtk/mmtk.c b/gc/mmtk/mmtk.c index b8af39cd993d15..d5cfda1be949d9 100644 --- a/gc/mmtk/mmtk.c +++ b/gc/mmtk/mmtk.c @@ -1410,6 +1410,7 @@ enum gc_stat_sym { gc_stat_sym_free_bytes, gc_stat_sym_starting_heap_address, gc_stat_sym_last_heap_address, + gc_stat_sym_weak_references_count, gc_stat_sym_last }; @@ -1428,6 +1429,7 @@ setup_gc_stat_symbols(void) S(free_bytes); S(starting_heap_address); S(last_heap_address); + S(weak_references_count); } } @@ -1463,6 +1465,7 @@ rb_gc_impl_stat(void *objspace_ptr, VALUE hash_or_sym) SET(free_bytes, mmtk_free_bytes()); SET(starting_heap_address, (size_t)mmtk_starting_heap_address()); SET(last_heap_address, (size_t)mmtk_last_heap_address()); + SET(weak_references_count, mmtk_weak_references_count()); #undef SET if (!NIL_P(key)) { diff --git a/gc/mmtk/mmtk.h b/gc/mmtk/mmtk.h index 4cef1668a4fc4c..ffbad1a025cce0 100644 --- a/gc/mmtk/mmtk.h +++ b/gc/mmtk/mmtk.h @@ -129,6 +129,8 @@ void mmtk_declare_weak_references(MMTk_ObjectReference object); bool mmtk_weak_references_alive_p(MMTk_ObjectReference object); +size_t mmtk_weak_references_count(void); + void mmtk_register_pinning_obj(MMTk_ObjectReference obj); void mmtk_object_reference_write_post(MMTk_Mutator *mutator, MMTk_ObjectReference object); diff --git a/gc/mmtk/src/api.rs b/gc/mmtk/src/api.rs index 3515a2408b3714..5eac068672b549 100644 --- a/gc/mmtk/src/api.rs +++ b/gc/mmtk/src/api.rs @@ -317,6 +317,11 @@ pub extern "C" fn mmtk_weak_references_alive_p(object: ObjectReference) -> bool object.is_reachable() } +#[no_mangle] +pub extern "C" fn mmtk_weak_references_count() -> usize { + binding().weak_proc.weak_references_count() +} + // =============== Compaction =============== #[no_mangle] diff --git a/gc/mmtk/src/weak_proc.rs b/gc/mmtk/src/weak_proc.rs index d0a54f01bf6e81..f103822b737272 100644 --- a/gc/mmtk/src/weak_proc.rs +++ b/gc/mmtk/src/weak_proc.rs @@ -92,6 +92,10 @@ impl WeakProcessor { weak_references.push(object); } + pub fn weak_references_count(&self) -> usize { + self.weak_references.lock().unwrap().len() + } + pub fn process_weak_stuff( &self, worker: &mut GCWorker, diff --git a/gems/bundled_gems b/gems/bundled_gems index c8414dee7532cc..e85061bc0f842e 100644 --- a/gems/bundled_gems +++ b/gems/bundled_gems @@ -32,9 +32,9 @@ resolv-replace 0.2.0 https://github.com/ruby/resolv-replace rinda 0.2.0 https://github.com/ruby/rinda drb 2.2.3 https://github.com/ruby/drb nkf 0.2.0 https://github.com/ruby/nkf -syslog 0.3.0 https://github.com/ruby/syslog +syslog 0.4.0 https://github.com/ruby/syslog csv 3.3.5 https://github.com/ruby/csv -repl_type_completor 0.1.12 https://github.com/ruby/repl_type_completor +repl_type_completor 0.1.12 https://github.com/ruby/repl_type_completor 26b8e964557690c0b539cff8940bcfb1591f1fe6 ostruct 0.6.3 https://github.com/ruby/ostruct pstore 0.2.0 https://github.com/ruby/pstore benchmark 0.5.0 https://github.com/ruby/benchmark diff --git a/hash.c b/hash.c index 07eeb779e9f197..83a55913fa7ce4 100644 --- a/hash.c +++ b/hash.c @@ -4492,21 +4492,21 @@ flatten_i(VALUE key, VALUE val, VALUE ary) * Examples; note that entry foo: {bar: 1, baz: 2} is never flattened. * * h = {foo: {bar: 1, baz: 2}, bat: [:bam, [:bap, [:bah]]]} - * h.flatten(1) # => [:foo, {:bar=>1, :baz=>2}, :bat, [:bam, [:bap, [:bah]]]] - * h.flatten(2) # => [:foo, {:bar=>1, :baz=>2}, :bat, :bam, [:bap, [:bah]]] - * h.flatten(3) # => [:foo, {:bar=>1, :baz=>2}, :bat, :bam, :bap, [:bah]] - * h.flatten(4) # => [:foo, {:bar=>1, :baz=>2}, :bat, :bam, :bap, :bah] - * h.flatten(5) # => [:foo, {:bar=>1, :baz=>2}, :bat, :bam, :bap, :bah] + * h.flatten(1) # => [:foo, {bar: 1, baz: 2}, :bat, [:bam, [:bap, [:bah]]]] + * h.flatten(2) # => [:foo, {bar: 1, baz: 2}, :bat, :bam, [:bap, [:bah]]] + * h.flatten(3) # => [:foo, {bar: 1, baz: 2}, :bat, :bam, :bap, [:bah]] + * h.flatten(4) # => [:foo, {bar: 1, baz: 2}, :bat, :bam, :bap, :bah] + * h.flatten(5) # => [:foo, {bar: 1, baz: 2}, :bat, :bam, :bap, :bah] * * With negative integer +depth+, * flattens all levels: * - * h.flatten(-1) # => [:foo, {:bar=>1, :baz=>2}, :bat, :bam, :bap, :bah] + * h.flatten(-1) # => [:foo, {bar: 1, baz: 2}, :bat, :bam, :bap, :bah] * * With +depth+ zero, * returns the equivalent of #to_a: * - * h.flatten(0) # => [[:foo, {:bar=>1, :baz=>2}], [:bat, [:bam, [:bap, [:bah]]]]] + * h.flatten(0) # => [[:foo, {bar: 1, baz: 2}], [:bat, [:bam, [:bap, [:bah]]]]] * * Related: see {Methods for Converting}[rdoc-ref:Hash@Methods+for+Converting]. */ @@ -7216,8 +7216,8 @@ static const rb_data_type_t env_data_type = { * * First, what's elsewhere. Class +Hash+: * - * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. - * - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here], + * - Inherits from {class Object}[rdoc-ref:Object@Whats+Here]. + * - Includes {module Enumerable}[rdoc-ref:Enumerable@Whats+Here], * which provides dozens of additional methods. * * Here, class +Hash+ provides methods that are useful for: @@ -7528,8 +7528,8 @@ Init_Hash(void) * * First, what's elsewhere. Class +ENV+: * - * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. - * - Extends {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here], + * - Inherits from {class Object}[rdoc-ref:Object@Whats+Here]. + * - Extends {module Enumerable}[rdoc-ref:Enumerable@Whats+Here], * * Here, class +ENV+ provides methods that are useful for: * diff --git a/imemo.c b/imemo.c index d949466a776f3d..0f7c260eb92cdd 100644 --- a/imemo.c +++ b/imemo.c @@ -57,7 +57,7 @@ rb_imemo_tmpbuf_new(void) rb_gc_register_pinning_obj((VALUE)obj); obj->ptr = NULL; - obj->cnt = 0; + obj->size = 0; return (VALUE)obj; } @@ -71,7 +71,7 @@ rb_alloc_tmp_buffer_with_count(volatile VALUE *store, size_t size, size_t cnt) *store = (VALUE)tmpbuf; void *ptr = ruby_xmalloc(size); tmpbuf->ptr = ptr; - tmpbuf->cnt = cnt; + tmpbuf->size = size; return ptr; } @@ -94,9 +94,9 @@ rb_free_tmp_buffer(volatile VALUE *store) rb_imemo_tmpbuf_t *s = (rb_imemo_tmpbuf_t*)ATOMIC_VALUE_EXCHANGE(*store, 0); if (s) { void *ptr = ATOMIC_PTR_EXCHANGE(s->ptr, 0); - long cnt = s->cnt; - s->cnt = 0; - ruby_sized_xfree(ptr, sizeof(VALUE) * cnt); + long size = s->size; + s->size = 0; + ruby_sized_xfree(ptr, size); } } @@ -261,7 +261,7 @@ rb_imemo_memsize(VALUE obj) case imemo_throw_data: break; case imemo_tmpbuf: - size += ((rb_imemo_tmpbuf_t *)obj)->cnt * sizeof(VALUE); + size += ((rb_imemo_tmpbuf_t *)obj)->size; break; case imemo_fields: @@ -506,7 +506,7 @@ rb_imemo_mark_and_move(VALUE obj, bool reference_updating) const rb_imemo_tmpbuf_t *m = (const rb_imemo_tmpbuf_t *)obj; if (!reference_updating) { - rb_gc_mark_locations(m->ptr, m->ptr + m->cnt); + rb_gc_mark_locations(m->ptr, m->ptr + (m->size / sizeof(VALUE))); } break; diff --git a/include/ruby/internal/fl_type.h b/include/ruby/internal/fl_type.h index 2afb3f1fa348f2..e3878d9ed7d567 100644 --- a/include/ruby/internal/fl_type.h +++ b/include/ruby/internal/fl_type.h @@ -108,8 +108,6 @@ #define RB_OBJ_FREEZE_RAW RB_OBJ_FREEZE_RAW #define RB_OBJ_FROZEN RB_OBJ_FROZEN #define RB_OBJ_FROZEN_RAW RB_OBJ_FROZEN_RAW -#define RB_OBJ_UNTRUST RB_OBJ_TAINT -#define RB_OBJ_UNTRUSTED RB_OBJ_TAINTED /** @endcond */ /** @@ -134,15 +132,6 @@ #define OBJ_FREEZE_RAW RB_OBJ_FREEZE_RAW /**< @old{RB_OBJ_FREEZE_RAW} */ #define OBJ_FROZEN RB_OBJ_FROZEN /**< @old{RB_OBJ_FROZEN} */ #define OBJ_FROZEN_RAW RB_OBJ_FROZEN_RAW /**< @old{RB_OBJ_FROZEN_RAW} */ -#define OBJ_INFECT RB_OBJ_INFECT /**< @old{RB_OBJ_INFECT} */ -#define OBJ_INFECT_RAW RB_OBJ_INFECT_RAW /**< @old{RB_OBJ_INFECT_RAW} */ -#define OBJ_TAINT RB_OBJ_TAINT /**< @old{RB_OBJ_TAINT} */ -#define OBJ_TAINTABLE RB_OBJ_TAINTABLE /**< @old{RB_OBJ_TAINT_RAW} */ -#define OBJ_TAINTED RB_OBJ_TAINTED /**< @old{RB_OBJ_TAINTED} */ -#define OBJ_TAINTED_RAW RB_OBJ_TAINTED_RAW /**< @old{RB_OBJ_TAINTED_RAW} */ -#define OBJ_TAINT_RAW RB_OBJ_TAINT_RAW /**< @old{RB_OBJ_TAINT_RAW} */ -#define OBJ_UNTRUST RB_OBJ_UNTRUST /**< @old{RB_OBJ_TAINT} */ -#define OBJ_UNTRUSTED RB_OBJ_UNTRUSTED /**< @old{RB_OBJ_TAINTED} */ /** @} */ /** diff --git a/include/ruby/ractor.h b/include/ruby/ractor.h index 8cfca2162107c8..5d71e1001693fe 100644 --- a/include/ruby/ractor.h +++ b/include/ruby/ractor.h @@ -248,7 +248,7 @@ RBIMPL_SYMBOL_EXPORT_END() static inline bool rb_ractor_shareable_p(VALUE obj) { - bool rb_ractor_shareable_p_continue(VALUE obj); + bool rb_ractor_shareable_p_continue(VALUE obj, VALUE *chain); if (RB_SPECIAL_CONST_P(obj)) { return true; @@ -257,7 +257,7 @@ rb_ractor_shareable_p(VALUE obj) return true; } else { - return rb_ractor_shareable_p_continue(obj); + return rb_ractor_shareable_p_continue(obj, NULL); } } diff --git a/insns.def b/insns.def index ceeaf4128e9abf..f9a334d824b31a 100644 --- a/insns.def +++ b/insns.def @@ -145,6 +145,7 @@ getblockparamproxy (lindex_t idx, rb_num_t level) () (VALUE val) +// attr bool zjit_profile = true; { const VALUE *ep = vm_get_ep(GET_EP(), level); VM_ASSERT(VM_ENV_LOCAL_P(ep)); diff --git a/internal/gc.h b/internal/gc.h index ee1f390e104cff..427b2f4553afc0 100644 --- a/internal/gc.h +++ b/internal/gc.h @@ -292,36 +292,6 @@ void rb_gc_writebarrier_remember(VALUE obj); const char *rb_obj_info(VALUE obj); void ruby_annotate_mmap(const void *addr, unsigned long size, const char *name); -#if defined(HAVE_MALLOC_USABLE_SIZE) || defined(HAVE_MALLOC_SIZE) || defined(_WIN32) - -static inline void * -ruby_sized_xrealloc_inlined(void *ptr, size_t new_size, size_t old_size) -{ - return ruby_xrealloc(ptr, new_size); -} - -static inline void * -ruby_sized_xrealloc2_inlined(void *ptr, size_t new_count, size_t elemsiz, size_t old_count) -{ - return ruby_xrealloc2(ptr, new_count, elemsiz); -} - -static inline void -ruby_sized_xfree_inlined(void *ptr, size_t size) -{ - ruby_xfree(ptr); -} - -# define SIZED_REALLOC_N(x, y, z, w) REALLOC_N(x, y, z) - -static inline void * -ruby_sized_realloc_n(void *ptr, size_t new_count, size_t element_size, size_t old_count) -{ - return ruby_xrealloc2(ptr, new_count, element_size); -} - -#else - static inline void * ruby_sized_xrealloc_inlined(void *ptr, size_t new_size, size_t old_size) { @@ -349,8 +319,6 @@ ruby_sized_realloc_n(void *ptr, size_t new_count, size_t element_size, size_t ol return ruby_sized_xrealloc2(ptr, new_count, element_size, old_count); } -#endif /* HAVE_MALLOC_USABLE_SIZE */ - #define ruby_sized_xrealloc ruby_sized_xrealloc_inlined #define ruby_sized_xrealloc2 ruby_sized_xrealloc2_inlined #define ruby_sized_xfree ruby_sized_xfree_inlined diff --git a/internal/imemo.h b/internal/imemo.h index 31cc0be35ae9c3..6534cec5d7ca9f 100644 --- a/internal/imemo.h +++ b/internal/imemo.h @@ -94,7 +94,7 @@ struct vm_ifunc { struct rb_imemo_tmpbuf_struct { VALUE flags; VALUE *ptr; /* malloc'ed buffer */ - size_t cnt; /* buffer size in VALUE */ + size_t size; /* buffer size in bytes */ }; /*! MEMO diff --git a/io.c b/io.c index 25c66550f5c382..8563fa6536c02f 100644 --- a/io.c +++ b/io.c @@ -15469,8 +15469,8 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * * First, what's elsewhere. Class \IO: * - * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. - * - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here], + * - Inherits from {class Object}[rdoc-ref:Object@Whats+Here]. + * - Includes {module Enumerable}[rdoc-ref:Enumerable@Whats+Here], * which provides dozens of additional methods. * * Here, class \IO provides methods that are useful for: diff --git a/iseq.c b/iseq.c index 97047794b1e904..88aa29dce32ecc 100644 --- a/iseq.c +++ b/iseq.c @@ -1142,7 +1142,7 @@ pm_iseq_new_with_opt(pm_scope_node_t *node, VALUE name, VALUE path, VALUE realpa int32_t start_line = node->parser->start_line; pm_line_column_t start = pm_newline_list_line_column(&node->parser->newline_list, location->start, start_line); - pm_line_column_t end = pm_newline_list_line_column(&node->parser->newline_list, location->end, start_line); + pm_line_column_t end = pm_newline_list_line_column(&node->parser->newline_list, location->start + location->length, start_line); rb_code_location_t code_location = (rb_code_location_t) { .beg_pos = { .lineno = (int) start.line, .column = (int) start.column }, diff --git a/lib/bundler/definition.rb b/lib/bundler/definition.rb index 5ab577f504c39d..639740e46b6b04 100644 --- a/lib/bundler/definition.rb +++ b/lib/bundler/definition.rb @@ -1077,7 +1077,7 @@ def converge_specs(specs) end end - if parent_dep + if parent_dep && parent_dep.source.is_a?(Source::Path) replacement_source = parent_dep.source else replacement_source = sources.get(lockfile_source) diff --git a/lib/net/http.rb b/lib/net/http.rb index 98d6793aee033d..98639978a2fbf6 100644 --- a/lib/net/http.rb +++ b/lib/net/http.rb @@ -460,7 +460,7 @@ class HTTPHeaderSyntaxError < StandardError; end # # First, what's elsewhere. Class Net::HTTP: # - # - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. + # - Inherits from {class Object}[rdoc-ref:Object#class-object-whats-here]. # # This is a categorized summary of methods and attributes. # @@ -1304,7 +1304,7 @@ def response_body_encoding=(value) # Sets whether to determine the proxy from environment variable # 'ENV['http_proxy']'; - # see {Proxy Using ENV['http_proxy']}[rdoc-ref:Net::HTTP@Proxy+Using+-27ENV-5B-27http_proxy-27-5D-27]. + # see {Proxy Using ENV['http_proxy']}[rdoc-ref:Net::HTTP@Proxy+Using+ENVHTTPProxy]. attr_writer :proxy_from_env # Sets the proxy address; diff --git a/lib/net/http/requests.rb b/lib/net/http/requests.rb index 939d413f91961c..8dc79a9f665d52 100644 --- a/lib/net/http/requests.rb +++ b/lib/net/http/requests.rb @@ -19,9 +19,9 @@ # # - Request body: optional. # - Response body: yes. -# - {Safe}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Safe_methods]: yes. -# - {Idempotent}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Idempotent_methods]: yes. -# - {Cacheable}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Cacheable_methods]: yes. +# - {Safe}[https://en.wikipedia.org/wiki/HTTP#Safe_method]: yes. +# - {Idempotent}[https://en.wikipedia.org/wiki/HTTP#Idempotent_method]: yes. +# - {Cacheable}[https://en.wikipedia.org/wiki/HTTP#Cacheable_method]: yes. # # Related: # @@ -52,9 +52,9 @@ class Net::HTTP::Get < Net::HTTPRequest # # - Request body: optional. # - Response body: no. -# - {Safe}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Safe_methods]: yes. -# - {Idempotent}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Idempotent_methods]: yes. -# - {Cacheable}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Cacheable_methods]: yes. +# - {Safe}[https://en.wikipedia.org/wiki/HTTP#Safe_method]: yes. +# - {Idempotent}[https://en.wikipedia.org/wiki/HTTP#Idempotent_method]: yes. +# - {Cacheable}[https://en.wikipedia.org/wiki/HTTP#Cacheable_method]: yes. # # Related: # @@ -87,9 +87,9 @@ class Net::HTTP::Head < Net::HTTPRequest # # - Request body: yes. # - Response body: yes. -# - {Safe}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Safe_methods]: no. -# - {Idempotent}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Idempotent_methods]: no. -# - {Cacheable}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Cacheable_methods]: yes. +# - {Safe}[https://en.wikipedia.org/wiki/HTTP#Safe_method]: no. +# - {Idempotent}[https://en.wikipedia.org/wiki/HTTP#Idempotent_method]: no. +# - {Cacheable}[https://en.wikipedia.org/wiki/HTTP#Cacheable_method]: yes. # # Related: # @@ -123,9 +123,9 @@ class Net::HTTP::Post < Net::HTTPRequest # # - Request body: yes. # - Response body: yes. -# - {Safe}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Safe_methods]: no. -# - {Idempotent}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Idempotent_methods]: yes. -# - {Cacheable}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Cacheable_methods]: no. +# - {Safe}[https://en.wikipedia.org/wiki/HTTP#Safe_method]: no. +# - {Idempotent}[https://en.wikipedia.org/wiki/HTTP#Idempotent_method]: yes. +# - {Cacheable}[https://en.wikipedia.org/wiki/HTTP#Cacheable_method]: no. # # Related: # @@ -157,9 +157,9 @@ class Net::HTTP::Put < Net::HTTPRequest # # - Request body: optional. # - Response body: yes. -# - {Safe}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Safe_methods]: no. -# - {Idempotent}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Idempotent_methods]: yes. -# - {Cacheable}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Cacheable_methods]: no. +# - {Safe}[https://en.wikipedia.org/wiki/HTTP#Safe_method]: no. +# - {Idempotent}[https://en.wikipedia.org/wiki/HTTP#Idempotent_method]: yes. +# - {Cacheable}[https://en.wikipedia.org/wiki/HTTP#Cacheable_method]: no. # # Related: # @@ -189,9 +189,9 @@ class Net::HTTP::Delete < Net::HTTPRequest # # - Request body: optional. # - Response body: yes. -# - {Safe}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Safe_methods]: yes. -# - {Idempotent}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Idempotent_methods]: yes. -# - {Cacheable}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Cacheable_methods]: no. +# - {Safe}[https://en.wikipedia.org/wiki/HTTP#Safe_method]: yes. +# - {Idempotent}[https://en.wikipedia.org/wiki/HTTP#Idempotent_method]: yes. +# - {Cacheable}[https://en.wikipedia.org/wiki/HTTP#Cacheable_method]: no. # # Related: # @@ -221,9 +221,9 @@ class Net::HTTP::Options < Net::HTTPRequest # # - Request body: no. # - Response body: yes. -# - {Safe}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Safe_methods]: yes. -# - {Idempotent}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Idempotent_methods]: yes. -# - {Cacheable}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Cacheable_methods]: no. +# - {Safe}[https://en.wikipedia.org/wiki/HTTP#Safe_method]: yes. +# - {Idempotent}[https://en.wikipedia.org/wiki/HTTP#Idempotent_method]: yes. +# - {Cacheable}[https://en.wikipedia.org/wiki/HTTP#Cacheable_method]: no. # # Related: # @@ -256,9 +256,9 @@ class Net::HTTP::Trace < Net::HTTPRequest # # - Request body: yes. # - Response body: yes. -# - {Safe}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Safe_methods]: no. -# - {Idempotent}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Idempotent_methods]: no. -# - {Cacheable}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Cacheable_methods]: no. +# - {Safe}[https://en.wikipedia.org/wiki/HTTP#Safe_method]: no. +# - {Idempotent}[https://en.wikipedia.org/wiki/HTTP#Idempotent_method]: no. +# - {Cacheable}[https://en.wikipedia.org/wiki/HTTP#Cacheable_method]: no. # # Related: # diff --git a/lib/open-uri.rb b/lib/open-uri.rb index 5983c7368b1d35..844865b13ac0a1 100644 --- a/lib/open-uri.rb +++ b/lib/open-uri.rb @@ -4,22 +4,25 @@ require 'time' module URI - # Allows the opening of various resources including URIs. + # Allows the opening of various resources including URIs. Example: # - # If the first argument responds to the 'open' method, 'open' is called on + # require "open-uri" + # URI.open("http://example.com") { |f| f.read } + # + # If the first argument responds to the +open+ method, +open+ is called on # it with the rest of the arguments. # # If the first argument is a string that begins with (protocol)://, it is parsed by - # URI.parse. If the parsed object responds to the 'open' method, - # 'open' is called on it with the rest of the arguments. + # URI.parse. If the parsed object responds to the +open+ method, + # +open+ is called on it with the rest of the arguments. # # Otherwise, Kernel#open is called. # # OpenURI::OpenRead#open provides URI::HTTP#open, URI::HTTPS#open and # URI::FTP#open, Kernel#open. # - # We can accept URIs and strings that begin with http://, https:// and - # ftp://. In these cases, the opened file object is extended by OpenURI::Meta. + # We can accept URIs and strings that begin with http://, https:// and + # ftp://. In these cases, the opened file object is extended by OpenURI::Meta. def self.open(name, *rest, &block) if name.respond_to?(:open) name.open(*rest, &block) diff --git a/lib/prism.rb b/lib/prism.rb index dab3420377214f..781bd4bb0115db 100644 --- a/lib/prism.rb +++ b/lib/prism.rb @@ -20,7 +20,6 @@ module Prism autoload :DSL, "prism/dsl" autoload :InspectVisitor, "prism/inspect_visitor" autoload :LexCompat, "prism/lex_compat" - autoload :LexRipper, "prism/lex_ripper" autoload :MutationCompiler, "prism/mutation_compiler" autoload :Pack, "prism/pack" autoload :Pattern, "prism/pattern" @@ -35,7 +34,6 @@ module Prism # private here. private_constant :LexCompat - private_constant :LexRipper # Raised when requested to parse as the currently running Ruby version but Prism has no support for it. class CurrentVersionError < ArgumentError @@ -68,15 +66,6 @@ def self.lex_compat(source, **options) LexCompat.new(source, **options).result # steep:ignore end - # :call-seq: - # Prism::lex_ripper(source) -> Array - # - # This wraps the result of Ripper.lex. It produces almost exactly the - # same tokens. Raises SyntaxError if the syntax in source is invalid. - def self.lex_ripper(source) - LexRipper.new(source).result # steep:ignore - end - # :call-seq: # Prism::load(source, serialized, freeze) -> ParseResult # diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index d4c9d60c9aa2a4..57d878a33fa299 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -423,10 +423,26 @@ def dump_options_command_line(options) # Return the value that should be dumped for the version option. def dump_options_version(version) - current = version == "current" + checking = + case version + when "current" + RUBY_VERSION + when "latest" + nil + when "nearest" + if RUBY_VERSION <= "3.3" + "3.3" + elsif RUBY_VERSION >= "4.1" + "4.1" + else + RUBY_VERSION + end + else + version + end - case current ? RUBY_VERSION : version - when nil, "latest" + case checking + when nil 0 # Handled in pm_parser_init when /\A3\.3(\.\d+)?\z/ 1 @@ -437,7 +453,7 @@ def dump_options_version(version) when /\A4\.1(\.\d+)?\z/ 4 else - if current + if version == "current" raise CurrentVersionError, RUBY_VERSION else raise ArgumentError, "invalid version: #{version}" diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index 523ad39586b4be..4c516a9de0acb9 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -196,57 +196,6 @@ def deconstruct_keys(keys) "__END__": :on___end__ }.freeze - # When we produce tokens, we produce the same arrays that Ripper does. - # However, we add a couple of convenience methods onto them to make them a - # little easier to work with. We delegate all other methods to the array. - class Token < BasicObject - # Create a new token object with the given ripper-compatible array. - def initialize(array) - @array = array - end - - # The location of the token in the source. - def location - @array[0] - end - - # The type of the token. - def event - @array[1] - end - - # The slice of the source that this token represents. - def value - @array[2] - end - - # The state of the lexer when this token was produced. - def state - @array[3] - end - - # We want to pretend that this is just an Array. - def ==(other) # :nodoc: - @array == other - end - - def respond_to_missing?(name, include_private = false) # :nodoc: - @array.respond_to?(name, include_private) - end - - def method_missing(name, ...) # :nodoc: - @array.send(name, ...) - end - end - - # Tokens where state should be ignored - # used for :on_sp, :on_comment, :on_heredoc_end, :on_embexpr_end - class IgnoreStateToken < Token - def ==(other) # :nodoc: - self[0...-1] == other[0...-1] - end - end - # A heredoc in this case is a list of tokens that belong to the body of the # heredoc that should be appended onto the list of tokens when the heredoc # closes. @@ -290,7 +239,7 @@ def to_a embexpr_balance = 0 tokens.each_with_object([]) do |token, results| #$ Array[Token] - case token.event + case token[1] when :on_embexpr_beg embexpr_balance += 1 results << token @@ -305,9 +254,9 @@ def to_a if split # Split on "\\\n" to mimic Ripper's behavior. Use a lookbehind # to keep the delimiter in the result. - token.value.split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index| + token[2].split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index| column = 0 if index > 0 - results << Token.new([[lineno, column], :on_tstring_content, value, token.state]) + results << [[lineno, column], :on_tstring_content, value, token[3]] lineno += value.count("\n") end else @@ -350,7 +299,7 @@ def initialize # whitespace on plain string content tokens. This allows us to later # remove that amount of whitespace from the beginning of each line. def <<(token) - case token.event + case token[1] when :on_embexpr_beg, :on_heredoc_beg @embexpr_balance += 1 @dedent = 0 if @dedent_next && @ended_on_newline @@ -358,7 +307,7 @@ def <<(token) @embexpr_balance -= 1 when :on_tstring_content if embexpr_balance == 0 - line = token.value + line = token[2] if dedent_next && !(line.strip.empty? && line.end_with?("\n")) leading = line[/\A(\s*)\n?/, 1] @@ -381,7 +330,7 @@ def <<(token) end end - @dedent_next = token.event == :on_tstring_content && embexpr_balance == 0 + @dedent_next = token[1] == :on_tstring_content && embexpr_balance == 0 @ended_on_newline = false tokens << token end @@ -394,7 +343,7 @@ def to_a embexpr_balance = 0 tokens.each do |token| - case token.event + case token[1] when :on_embexpr_beg, :on_heredoc_beg embexpr_balance += 1 results << token @@ -406,9 +355,9 @@ def to_a lineno = token[0][0] column = token[0][1] - token.value.split(/(?<=\n)/).each_with_index do |value, index| + token[2].split(/(?<=\n)/).each_with_index do |value, index| column = 0 if index > 0 - results << Token.new([[lineno, column], :on_tstring_content, value, token.state]) + results << [[lineno, column], :on_tstring_content, value, token[3]] lineno += 1 end else @@ -436,15 +385,15 @@ def to_a results << token index += 1 - case token.event + case token[1] when :on_embexpr_beg, :on_heredoc_beg embexpr_balance += 1 when :on_embexpr_end, :on_heredoc_end embexpr_balance -= 1 when :on_tstring_content if embexpr_balance == 0 - while index < max_index && tokens[index].event == :on_tstring_content && !token.value.match?(/\\\r?\n\z/) - token.value << tokens[index].value + while index < max_index && tokens[index][1] == :on_tstring_content && !token[2].match?(/\\\r?\n\z/) + token[2] << tokens[index][2] index += 1 end end @@ -467,7 +416,7 @@ def to_a # whitespace calculation we performed above. This is because # checking if the subsequent token needs to be dedented is common to # both the dedent calculation and the ignored_sp insertion. - case token.event + case token[1] when :on_embexpr_beg embexpr_balance += 1 results << token @@ -479,7 +428,7 @@ def to_a # Here we're going to split the string on newlines, but maintain # the newlines in the resulting array. We'll do that with a look # behind assertion. - splits = token.value.split(/(?<=\n)/) + splits = token[2].split(/(?<=\n)/) index = 0 while index < splits.length @@ -536,12 +485,12 @@ def to_a ignored = deleted_chars.join line.delete_prefix!(ignored) - results << Token.new([[lineno, 0], :on_ignored_sp, ignored, token[3]]) + results << [[lineno, 0], :on_ignored_sp, ignored, token[3]] column = ignored.length end end - results << Token.new([[lineno, column], token[1], line, token[3]]) unless line.empty? + results << [[lineno, column], token[1], line, token[3]] unless line.empty? index += 1 end else @@ -552,7 +501,7 @@ def to_a end dedent_next = - ((token.event == :on_tstring_content) || (token.event == :on_heredoc_end)) && + ((token[1] == :on_tstring_content) || (token[1] == :on_heredoc_end)) && embexpr_balance == 0 end @@ -563,11 +512,11 @@ def to_a # Here we will split between the two types of heredocs and return the # object that will store their tokens. def self.build(opening) - case opening.value[2] + case opening[2][2] when "~" DedentingHeredoc.new when "-" - DashHeredoc.new(opening.value[3] != "'") + DashHeredoc.new(opening[2][3] != "'") else PlainHeredoc.new end @@ -639,7 +588,7 @@ def result event = RIPPER.fetch(token.type) value = token.value - lex_state = Translation::Ripper::Lexer::State.cached(lex_state) + lex_state = Translation::Ripper::Lexer::State[lex_state] token = case event @@ -647,16 +596,16 @@ def result # Ripper doesn't include the rest of the token in the event, so we need to # trim it down to just the content on the first line. value = value[0..value.index("\n")] - Token.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] when :on_comment - IgnoreStateToken.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] when :on_heredoc_end # Heredoc end tokens can be emitted in an odd order, so we don't # want to bother comparing the state on them. last_heredoc_end = token.location.end_offset - IgnoreStateToken.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] when :on_embexpr_end - IgnoreStateToken.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] when :on_words_sep # Ripper emits one token each per line. value.each_line.with_index do |line, index| @@ -664,7 +613,7 @@ def result lineno += 1 column = 0 end - tokens << Token.new([[lineno, column], event, line, lex_state]) + tokens << [[lineno, column], event, line, lex_state] end tokens.pop when :on_regexp_end @@ -691,12 +640,12 @@ def result counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0 end - Translation::Ripper::Lexer::State.cached(result_value[current_index][1]) + Translation::Ripper::Lexer::State[result_value[current_index][1]] else previous_state end - Token.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] when :on_eof eof_token = token previous_token = result_value[index - 1][0] @@ -721,13 +670,13 @@ def result end_offset += 3 end - tokens << Token.new([[lineno, 0], :on_nl, source.slice(start_offset, end_offset - start_offset), lex_state]) + tokens << [[lineno, 0], :on_nl, source.slice(start_offset, end_offset - start_offset), lex_state] end end - Token.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] else - Token.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] end previous_state = lex_state @@ -813,28 +762,31 @@ def result tokens = tokens[0...-1] # We sort by location because Ripper.lex sorts. - # Manually implemented instead of `sort_by!(&:location)` for performance. tokens.sort_by! do |token| - line, column = token.location - source.line_to_byte_offset(line) + column + line, column = token[0] + source.byte_offset(line, column) end # Add :on_sp tokens - tokens = add_on_sp_tokens(tokens, source, result.data_loc, bom, eof_token) + tokens = insert_on_sp(tokens, source, result.data_loc, bom, eof_token) Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, source) end - def add_on_sp_tokens(tokens, source, data_loc, bom, eof_token) + private + + def insert_on_sp(tokens, source, data_loc, bom, eof_token) new_tokens = [] - prev_token_state = Translation::Ripper::Lexer::State.cached(Translation::Ripper::EXPR_BEG) + prev_token_state = Translation::Ripper::Lexer::State[Translation::Ripper::EXPR_BEG] prev_token_end = bom ? 3 : 0 tokens.each do |token| - line, column = token.location - start_offset = source.line_to_byte_offset(line) + column - # Ripper reports columns on line 1 without counting the BOM, so we adjust to get the real offset + line, column = token[0] + start_offset = source.byte_offset(line, column) + + # Ripper reports columns on line 1 without counting the BOM, so we + # adjust to get the real offset start_offset += 3 if line == 1 && bom if start_offset > prev_token_end @@ -854,50 +806,28 @@ def add_on_sp_tokens(tokens, source, data_loc, bom, eof_token) continuation = sp_value[continuation_index...next_whitespace_index] second_whitespace = sp_value[next_whitespace_index..] - new_tokens << IgnoreStateToken.new([ - [sp_line, sp_column], - :on_sp, - first_whitespace, - prev_token_state - ]) unless first_whitespace.empty? - - new_tokens << IgnoreStateToken.new([ - [sp_line, sp_column + continuation_index], - :on_sp, - continuation, - prev_token_state - ]) - - new_tokens << IgnoreStateToken.new([ - [sp_line + 1, 0], - :on_sp, - second_whitespace, - prev_token_state - ]) unless second_whitespace.empty? + new_tokens << [[sp_line, sp_column], :on_sp, first_whitespace, prev_token_state] unless first_whitespace.empty? + new_tokens << [[sp_line, sp_column + continuation_index], :on_sp, continuation, prev_token_state] + new_tokens << [[sp_line + 1, 0], :on_sp, second_whitespace, prev_token_state] unless second_whitespace.empty? else - new_tokens << IgnoreStateToken.new([ - [sp_line, sp_column], - :on_sp, - sp_value, - prev_token_state - ]) + new_tokens << [[sp_line, sp_column], :on_sp, sp_value, prev_token_state] end end new_tokens << token - prev_token_state = token.state - prev_token_end = start_offset + token.value.bytesize + prev_token_state = token[3] + prev_token_end = start_offset + token[2].bytesize end unless data_loc # no trailing :on_sp with __END__ as it is always preceded by :on_nl end_offset = eof_token.location.end_offset if prev_token_end < end_offset - new_tokens << IgnoreStateToken.new([ + new_tokens << [ [source.line(prev_token_end), source.column(prev_token_end)], :on_sp, source.slice(prev_token_end, end_offset - prev_token_end), prev_token_state - ]) + ] end end diff --git a/lib/prism/lex_ripper.rb b/lib/prism/lex_ripper.rb deleted file mode 100644 index f069e50ba9aa77..00000000000000 --- a/lib/prism/lex_ripper.rb +++ /dev/null @@ -1,55 +0,0 @@ -# frozen_string_literal: true -# :markup: markdown - -require "ripper" - -module Prism - # This is a class that wraps the Ripper lexer to produce almost exactly the - # same tokens. - class LexRipper # :nodoc: - attr_reader :source - - def initialize(source) - @source = source - end - - def result - previous = [] #: [[Integer, Integer], Symbol, String, untyped] | [] - results = [] #: Array[[[Integer, Integer], Symbol, String, untyped]] - - lex(source).each do |token| - case token[1] - when :on_tstring_content - if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@")) - previous[2] << token[2] - else - results << token - previous = token - end - else - results << token - previous = token - end - end - - results - end - - private - - if Ripper.method(:lex).parameters.assoc(:keyrest) - def lex(source) - Ripper.lex(source, raise_errors: true) - end - else - def lex(source) - ripper = Ripper::Lexer.new(source) - ripper.lex.tap do |result| - raise SyntaxError, ripper.errors.map(&:message).join(' ;') if ripper.errors.any? - end - end - end - end - - private_constant :LexRipper -end diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb index 12d19da5629a76..2498ae7e145fed 100644 --- a/lib/prism/parse_result.rb +++ b/lib/prism/parse_result.rb @@ -76,13 +76,13 @@ def slice(byte_offset, length) source.byteslice(byte_offset, length) or raise end - # Converts the line number to a byte offset corresponding to the start of that line - def line_to_byte_offset(line) - l = line - @start_line - if l < 0 || l >= offsets.size - raise ArgumentError, "line #{line} is out of range" - end - offsets[l] + # Converts the line number and column in bytes to a byte offset. + def byte_offset(line, column) + normal = line - @start_line + raise IndexError if normal < 0 + offsets.fetch(normal) + column + rescue IndexError + raise ArgumentError, "line #{line} is out of range" end # Binary search through the offsets to find the line number for the given @@ -103,7 +103,7 @@ def line_end(byte_offset) offsets[find_line(byte_offset) + 1] || source.bytesize end - # Return the column number for the given byte offset. + # Return the column in bytes for the given byte offset. def column(byte_offset) byte_offset - line_start(byte_offset) end @@ -113,7 +113,7 @@ def character_offset(byte_offset) (source.byteslice(0, byte_offset) or raise).length end - # Return the column number in characters for the given byte offset. + # Return the column in characters for the given byte offset. def character_column(byte_offset) character_offset(byte_offset) - character_offset(line_start(byte_offset)) end @@ -146,7 +146,7 @@ def code_units_cache(encoding) CodeUnitsCache.new(source, encoding) end - # Returns the column number in code units for the given encoding for the + # Returns the column in code units for the given encoding for the # given byte offset. def code_units_column(byte_offset, encoding) code_units_offset(byte_offset, encoding) - code_units_offset(line_start(byte_offset), encoding) @@ -253,7 +253,7 @@ def character_offset(byte_offset) byte_offset end - # Return the column number in characters for the given byte offset. + # Return the column in characters for the given byte offset. def character_column(byte_offset) byte_offset - line_start(byte_offset) end @@ -428,19 +428,19 @@ def end_line source.line(end_offset) end - # The column number in bytes where this location starts from the start of + # The column in bytes where this location starts from the start of # the line. def start_column source.column(start_offset) end - # The column number in characters where this location ends from the start of + # The column in characters where this location ends from the start of # the line. def start_character_column source.character_column(start_offset) end - # The column number in code units of the given encoding where this location + # The column in code units of the given encoding where this location # starts from the start of the line. def start_code_units_column(encoding = Encoding::UTF_16LE) source.code_units_column(start_offset, encoding) @@ -452,19 +452,19 @@ def cached_start_code_units_column(cache) cache[start_offset] - cache[source.line_start(start_offset)] end - # The column number in bytes where this location ends from the start of the + # The column in bytes where this location ends from the start of the # line. def end_column source.column(end_offset) end - # The column number in characters where this location ends from the start of + # The column in characters where this location ends from the start of # the line. def end_character_column source.character_column(end_offset) end - # The column number in code units of the given encoding where this location + # The column in code units of the given encoding where this location # ends from the start of the line. def end_code_units_column(encoding = Encoding::UTF_16LE) source.code_units_column(end_offset, encoding) diff --git a/lib/prism/prism.gemspec b/lib/prism/prism.gemspec index 283c7b04aa95e6..20c66a562e9d32 100644 --- a/lib/prism/prism.gemspec +++ b/lib/prism/prism.gemspec @@ -2,7 +2,7 @@ Gem::Specification.new do |spec| spec.name = "prism" - spec.version = "1.8.0" + spec.version = "1.9.0" spec.authors = ["Shopify"] spec.email = ["ruby@shopify.com"] @@ -77,7 +77,6 @@ Gem::Specification.new do |spec| "lib/prism/ffi.rb", "lib/prism/inspect_visitor.rb", "lib/prism/lex_compat.rb", - "lib/prism/lex_ripper.rb", "lib/prism/mutation_compiler.rb", "lib/prism/node_ext.rb", "lib/prism/node.rb", diff --git a/lib/prism/translation/parser/compiler.rb b/lib/prism/translation/parser/compiler.rb index 88056146036411..bd3618b16289e7 100644 --- a/lib/prism/translation/parser/compiler.rb +++ b/lib/prism/translation/parser/compiler.rb @@ -1767,7 +1767,7 @@ def visit_symbol_node(node) end else parts = - if node.value == "" + if node.value_loc.nil? [] elsif node.value.include?("\n") string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening) diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb index 75c48ef667c642..0491e79cd212e6 100644 --- a/lib/prism/translation/parser/lexer.rb +++ b/lib/prism/translation/parser/lexer.rb @@ -18,8 +18,6 @@ class Lexer # The direct translating of types between the two lexers. TYPES = { # These tokens should never appear in the output of the lexer. - MISSING: nil, - NOT_PROVIDED: nil, EMBDOC_END: nil, EMBDOC_LINE: nil, diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb index 735217d2e03608..054ad88ce3e8a3 100644 --- a/lib/prism/translation/ripper.rb +++ b/lib/prism/translation/ripper.rb @@ -88,7 +88,7 @@ def self.lex(src, filename = "-", lineno = 1, raise_errors: false) # # => ["def", " ", "m", "(", "a", ")", " ", "nil", " ", "end"] # def self.tokenize(...) - lex(...).map(&:value) + lex(...).map { |token| token[2] } end # This contains a table of all of the parser events and their @@ -475,7 +475,7 @@ def self.lex_state_name(state) # The current line number of the parser. attr_reader :lineno - # The current column number of the parser. + # The current column in bytes of the parser. attr_reader :column # Create a new Translation::Ripper object with the given source. @@ -3152,14 +3152,13 @@ def visit_super_node(node) # :foo # ^^^^ def visit_symbol_node(node) - if (opening = node.opening)&.match?(/^%s|['"]:?$/) + if node.value_loc.nil? + bounds(node.location) + on_dyna_symbol(on_string_content) + elsif (opening = node.opening)&.match?(/^%s|['"]:?$/) bounds(node.value_loc) - content = on_string_content - - if !(value = node.value).empty? - content = on_string_add(content, on_tstring_content(value)) - end - + content = on_string_add(on_string_content, on_tstring_content(node.value)) + bounds(node.location) on_dyna_symbol(content) elsif (closing = node.closing) == ":" bounds(node.location) diff --git a/lib/prism/translation/ripper/lexer.rb b/lib/prism/translation/ripper/lexer.rb index bed863af081b79..cbcdcd47cc3af0 100644 --- a/lib/prism/translation/ripper/lexer.rb +++ b/lib/prism/translation/ripper/lexer.rb @@ -9,7 +9,6 @@ class Ripper class Lexer < Ripper # :nodoc: # :stopdoc: class State - attr_reader :to_int, :to_s def initialize(i) @@ -39,10 +38,12 @@ def allbits?(i) to_int.allbits?(i) end def anybits?(i) to_int.anybits?(i) end def nobits?(i) to_int.nobits?(i) end - # Instances are frozen and there are only a handful of them so we cache them here. - STATES = Hash.new { |h,k| h[k] = State.new(k) } + # Instances are frozen and there are only a handful of them so we + # cache them here. + STATES = Hash.new { |hash, key| hash[key] = State.new(key) } + private_constant :STATES - def self.cached(i) + def self.[](i) STATES[i] end end @@ -54,7 +55,7 @@ def initialize(pos, event, tok, state, message = nil) @pos = pos @event = event @tok = tok - @state = State.cached(state) + @state = State[state] @message = message end diff --git a/lib/resolv.rb b/lib/resolv.rb index fa7d4e2e4753b3..9720b52c00fef3 100644 --- a/lib/resolv.rb +++ b/lib/resolv.rb @@ -35,7 +35,7 @@ class Resolv # The version string - VERSION = "0.7.0" + VERSION = "0.7.1" ## # Looks up the first IP address for +name+. @@ -487,13 +487,18 @@ def each_name(address) # * Resolv::DNS::Resource::IN::A # * Resolv::DNS::Resource::IN::AAAA # * Resolv::DNS::Resource::IN::ANY + # * Resolv::DNS::Resource::IN::CAA # * Resolv::DNS::Resource::IN::CNAME # * Resolv::DNS::Resource::IN::HINFO + # * Resolv::DNS::Resource::IN::HTTPS + # * Resolv::DNS::Resource::IN::LOC # * Resolv::DNS::Resource::IN::MINFO # * Resolv::DNS::Resource::IN::MX # * Resolv::DNS::Resource::IN::NS # * Resolv::DNS::Resource::IN::PTR # * Resolv::DNS::Resource::IN::SOA + # * Resolv::DNS::Resource::IN::SRV + # * Resolv::DNS::Resource::IN::SVCB # * Resolv::DNS::Resource::IN::TXT # * Resolv::DNS::Resource::IN::WKS # diff --git a/lib/rubygems/remote_fetcher.rb b/lib/rubygems/remote_fetcher.rb index 805f7aaf82ed1a..151c6fd4d8bbdf 100644 --- a/lib/rubygems/remote_fetcher.rb +++ b/lib/rubygems/remote_fetcher.rb @@ -174,7 +174,7 @@ def download(spec, source_uri, install_dir = Gem.dir) end verbose "Using local gem #{local_gem_path}" - when nil then # TODO: test for local overriding cache + when nil then source_path = if Gem.win_platform? && source_uri.scheme && !source_uri.path.include?(":") "#{source_uri.scheme}:#{source_uri.path}" diff --git a/numeric.c b/numeric.c index e8df2a6aa0568c..36101882943761 100644 --- a/numeric.c +++ b/numeric.c @@ -3680,9 +3680,9 @@ rb_int128_to_numeric(rb_int128_t n) * First, what's elsewhere. Class \Integer: * * - Inherits from - * {class Numeric}[rdoc-ref:Numeric@What-27s+Here] - * and {class Object}[rdoc-ref:Object@What-27s+Here]. - * - Includes {module Comparable}[rdoc-ref:Comparable@What-27s+Here]. + * {class Numeric}[rdoc-ref:Numeric@Whats+Here] + * and {class Object}[rdoc-ref:Object@Whats+Here]. + * - Includes {module Comparable}[rdoc-ref:Comparable@Whats+Here]. * * Here, class \Integer provides methods for: * @@ -6365,8 +6365,8 @@ int_s_try_convert(VALUE self, VALUE num) * * First, what's elsewhere. Class \Numeric: * - * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. - * - Includes {module Comparable}[rdoc-ref:Comparable@What-27s+Here]. + * - Inherits from {class Object}[rdoc-ref:Object@Whats+Here]. + * - Includes {module Comparable}[rdoc-ref:Comparable@Whats+Here]. * * Here, class \Numeric provides methods for: * diff --git a/object.c b/object.c index 75186a30c66868..07eb1d8e975251 100644 --- a/object.c +++ b/object.c @@ -4357,8 +4357,8 @@ rb_f_loop_size(VALUE self, VALUE args, VALUE eobj) * * First, what's elsewhere. Class \Object: * - * - Inherits from {class BasicObject}[rdoc-ref:BasicObject@What-27s+Here]. - * - Includes {module Kernel}[rdoc-ref:Kernel@What-27s+Here]. + * - Inherits from {class BasicObject}[rdoc-ref:BasicObject@Whats+Here]. + * - Includes {module Kernel}[rdoc-ref:Kernel@Whats+Here]. * * Here, class \Object provides methods for: * diff --git a/prism/config.yml b/prism/config.yml index 4e5b077a351ff1..4e1560481e9d9e 100644 --- a/prism/config.yml +++ b/prism/config.yml @@ -653,10 +653,6 @@ tokens: comment: "a separator between words in a list" - name: __END__ comment: "marker for the point in the file at which the parser should stop" - - name: MISSING - comment: "a token that was expected but not found" - - name: NOT_PROVIDED - comment: "a token that was not present but it is okay" flags: - name: ArgumentsNodeFlags values: diff --git a/prism/defines.h b/prism/defines.h index e31429c7896dc5..f6bd1dbe40edec 100644 --- a/prism/defines.h +++ b/prism/defines.h @@ -257,4 +257,35 @@ #define PRISM_FALLTHROUGH #endif +/** + * We need to align nodes in the AST to a pointer boundary so that it can be + * safely cast to different node types. Use PRISM_ALIGNAS/PRISM_ALIGNOF to + * specify alignment in a compiler-agnostic way. + */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L /* C11 or later */ + /** Specify alignment for a type or variable. */ + #define PRISM_ALIGNAS _Alignas + + /** Get the alignment requirement of a type. */ + #define PRISM_ALIGNOF _Alignof +#elif defined(__GNUC__) || defined(__clang__) + /** Specify alignment for a type or variable. */ + #define PRISM_ALIGNAS(size) __attribute__((aligned(size))) + + /** Get the alignment requirement of a type. */ + #define PRISM_ALIGNOF(type) __alignof__(type) +#elif defined(_MSC_VER) + /** Specify alignment for a type or variable. */ + #define PRISM_ALIGNAS(size) __declspec(align(size)) + + /** Get the alignment requirement of a type. */ + #define PRISM_ALIGNOF(type) __alignof(type) +#else + /** Void because this platform does not support specifying alignment. */ + #define PRISM_ALIGNAS(size) + + /** Fallback to sizeof as alignment requirement of a type. */ + #define PRISM_ALIGNOF(type) sizeof(type) +#endif + #endif diff --git a/prism/extension.c b/prism/extension.c index 71c2d91b98d0f0..cde10bf360df2a 100644 --- a/prism/extension.c +++ b/prism/extension.c @@ -201,9 +201,24 @@ build_options_i(VALUE key, VALUE value, VALUE argument) { const char *version = check_string(value); if (RSTRING_LEN(value) == 7 && strncmp(version, "current", 7) == 0) { - const char *current_version = RSTRING_PTR(rb_const_get(rb_cObject, rb_intern("RUBY_VERSION"))); - if (!pm_options_version_set(options, current_version, 3)) { - rb_exc_raise(rb_exc_new_cstr(rb_cPrismCurrentVersionError, current_version)); + const char *ruby_version = RSTRING_PTR(rb_const_get(rb_cObject, rb_intern("RUBY_VERSION"))); + if (!pm_options_version_set(options, ruby_version, 3)) { + rb_exc_raise(rb_exc_new_cstr(rb_cPrismCurrentVersionError, ruby_version)); + } + } else if (RSTRING_LEN(value) == 7 && strncmp(version, "nearest", 7) == 0) { + const char *ruby_version = RSTRING_PTR(rb_const_get(rb_cObject, rb_intern("RUBY_VERSION"))); + const char *nearest_version; + + if (ruby_version[0] < '3' || (ruby_version[0] == '3' && ruby_version[2] < '3')) { + nearest_version = "3.3"; + } else if (ruby_version[0] > '4' || (ruby_version[0] == '4' && ruby_version[2] > '1')) { + nearest_version = "4.1"; + } else { + nearest_version = ruby_version; + } + + if (!pm_options_version_set(options, nearest_version, 3)) { + rb_raise(rb_eArgError, "invalid nearest version: %s", nearest_version); } } else if (!pm_options_version_set(options, version, RSTRING_LEN(value))) { rb_raise(rb_eArgError, "invalid version: %" PRIsVALUE, value); @@ -455,23 +470,23 @@ rb_class_new_instance_freeze(int argc, const VALUE *argv, VALUE klass, bool free * Create a new Location instance from the given parser and bounds. */ static inline VALUE -parser_location(const pm_parser_t *parser, VALUE source, bool freeze, const uint8_t *start, size_t length) { - VALUE argv[] = { source, LONG2FIX(start - parser->start), LONG2FIX(length) }; +parser_location(VALUE source, bool freeze, uint32_t start, uint32_t length) { + VALUE argv[] = { source, LONG2FIX(start), LONG2FIX(length) }; return rb_class_new_instance_freeze(3, argv, rb_cPrismLocation, freeze); } /** * Create a new Location instance from the given parser and location. */ -#define PARSER_LOCATION_LOC(parser, source, freeze, loc) \ - parser_location(parser, source, freeze, loc.start, (size_t) (loc.end - loc.start)) +#define PARSER_LOCATION(source, freeze, location) \ + parser_location(source, freeze, location.start, location.length) /** * Build a new Comment instance from the given parser and comment. */ static inline VALUE -parser_comment(const pm_parser_t *parser, VALUE source, bool freeze, const pm_comment_t *comment) { - VALUE argv[] = { PARSER_LOCATION_LOC(parser, source, freeze, comment->location) }; +parser_comment(VALUE source, bool freeze, const pm_comment_t *comment) { + VALUE argv[] = { PARSER_LOCATION(source, freeze, comment->location) }; VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment; return rb_class_new_instance_freeze(1, argv, type, freeze); } @@ -488,7 +503,7 @@ parser_comments(const pm_parser_t *parser, VALUE source, bool freeze) { comment != NULL; comment = (const pm_comment_t *) comment->node.next ) { - VALUE value = parser_comment(parser, source, freeze, comment); + VALUE value = parser_comment(source, freeze, comment); rb_ary_push(comments, value); } @@ -500,9 +515,9 @@ parser_comments(const pm_parser_t *parser, VALUE source, bool freeze) { * Build a new MagicComment instance from the given parser and magic comment. */ static inline VALUE -parser_magic_comment(const pm_parser_t *parser, VALUE source, bool freeze, const pm_magic_comment_t *magic_comment) { - VALUE key_loc = parser_location(parser, source, freeze, magic_comment->key_start, magic_comment->key_length); - VALUE value_loc = parser_location(parser, source, freeze, magic_comment->value_start, magic_comment->value_length); +parser_magic_comment(VALUE source, bool freeze, const pm_magic_comment_t *magic_comment) { + VALUE key_loc = parser_location(source, freeze, magic_comment->key.start, magic_comment->key.length); + VALUE value_loc = parser_location(source, freeze, magic_comment->value.start, magic_comment->value.length); VALUE argv[] = { key_loc, value_loc }; return rb_class_new_instance_freeze(2, argv, rb_cPrismMagicComment, freeze); } @@ -519,7 +534,7 @@ parser_magic_comments(const pm_parser_t *parser, VALUE source, bool freeze) { magic_comment != NULL; magic_comment = (const pm_magic_comment_t *) magic_comment->node.next ) { - VALUE value = parser_magic_comment(parser, source, freeze, magic_comment); + VALUE value = parser_magic_comment(source, freeze, magic_comment); rb_ary_push(magic_comments, value); } @@ -533,10 +548,10 @@ parser_magic_comments(const pm_parser_t *parser, VALUE source, bool freeze) { */ static VALUE parser_data_loc(const pm_parser_t *parser, VALUE source, bool freeze) { - if (parser->data_loc.end == NULL) { + if (parser->data_loc.length == 0) { return Qnil; } else { - return PARSER_LOCATION_LOC(parser, source, freeze, parser->data_loc); + return parser_location(source, freeze, parser->data_loc.start, parser->data_loc.length); } } @@ -554,7 +569,7 @@ parser_errors(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bo ) { VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(error->diag_id))); VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(error->message, encoding)); - VALUE location = PARSER_LOCATION_LOC(parser, source, freeze, error->location); + VALUE location = PARSER_LOCATION(source, freeze, error->location); VALUE level = Qnil; switch (error->level) { @@ -594,7 +609,7 @@ parser_warnings(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, ) { VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(warning->diag_id))); VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(warning->message, encoding)); - VALUE location = PARSER_LOCATION_LOC(parser, source, freeze, warning->location); + VALUE location = PARSER_LOCATION(source, freeze, warning->location); VALUE level = Qnil; switch (warning->level) { @@ -894,8 +909,10 @@ parse_input(pm_string_t *input, const pm_options_t *options) { * version of Ruby syntax (which you can trigger with `nil` or * `"latest"`). You may also restrict the syntax to a specific version of * Ruby, e.g., with `"3.3.0"`. To parse with the same syntax version that - * the current Ruby is running use `version: "current"`. Raises - * ArgumentError if the version is not currently supported by Prism. + * the current Ruby is running use `version: "current"`. To parse with the + * nearest version to the current Ruby that is running, use + * `version: "nearest"`. Raises ArgumentError if the version is not + * currently supported by Prism. */ static VALUE parse(int argc, VALUE *argv, VALUE self) { diff --git a/prism/extension.h b/prism/extension.h index 510faa48e8dfed..4ddc3a7b8617d0 100644 --- a/prism/extension.h +++ b/prism/extension.h @@ -1,7 +1,7 @@ #ifndef PRISM_EXT_NODE_H #define PRISM_EXT_NODE_H -#define EXPECTED_PRISM_VERSION "1.8.0" +#define EXPECTED_PRISM_VERSION "1.9.0" #include #include diff --git a/prism/options.h b/prism/options.h index c00c7bf7553a4f..9a19a2aeadf31a 100644 --- a/prism/options.h +++ b/prism/options.h @@ -82,7 +82,10 @@ typedef void (*pm_options_shebang_callback_t)(struct pm_options *options, const * parse in the same way as a specific version of CRuby would have. */ typedef enum { - /** If an explicit version is not provided, the current version of prism will be used. */ + /** + * If an explicit version is not provided, the current version of prism will + * be used. + */ PM_OPTIONS_VERSION_UNSET = 0, /** The vendored version of prism in CRuby 3.3.x. */ @@ -452,6 +455,9 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options); * | ----- | ------------------------- | * | `0` | use the latest version of prism | * | `1` | use the version of prism that is vendored in CRuby 3.3.0 | + * | `2` | use the version of prism that is vendored in CRuby 3.4.0 | + * | `3` | use the version of prism that is vendored in CRuby 4.0.0 | + * | `4` | use the version of prism that is vendored in CRuby 4.1.0 | * * Each scope is laid out as follows: * diff --git a/prism/parser.h b/prism/parser.h index 95d7aac7108c98..a8d840d3bfd5b2 100644 --- a/prism/parser.h +++ b/prism/parser.h @@ -479,17 +479,11 @@ typedef struct { /** The embedded base node. */ pm_list_node_t node; - /** A pointer to the start of the key in the source. */ - const uint8_t *key_start; + /** The key of the magic comment. */ + pm_location_t key; - /** A pointer to the start of the value in the source. */ - const uint8_t *value_start; - - /** The length of the key in the source. */ - uint32_t key_length; - - /** The length of the value in the source. */ - uint32_t value_length; + /** The value of the magic comment. */ + pm_location_t value; } pm_magic_comment_t; /** diff --git a/prism/prism.c b/prism/prism.c index b158e505b2dc82..2c039612850e92 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -19,22 +19,49 @@ pm_version(void) { #define MAX(a,b) (((a)>(b))?(a):(b)) /******************************************************************************/ -/* Helpful AST-related macros */ +/* Helpful AST-related macros */ /******************************************************************************/ +#define U32(value_) ((uint32_t) (value_)) + #define FL PM_NODE_FLAGS #define UP PM_NODE_UPCAST -#define PM_TOKEN_START(token_) ((token_)->start) -#define PM_TOKEN_END(token_) ((token_)->end) +#define PM_LOCATION_START(location_) ((location_)->start) +#define PM_LOCATION_END(location_) ((location_)->start + (location_)->length) + +#define PM_TOKEN_START(parser_, token_) U32((token_)->start - (parser_)->start) +#define PM_TOKEN_END(parser_, token_) U32((token_)->end - (parser_)->start) +#define PM_TOKEN_LENGTH(token_) U32((token_)->end - (token_)->start) +#define PM_TOKENS_LENGTH(left_, right_) U32((right_)->end - (left_)->start) #define PM_NODE_START(node_) (UP(node_)->location.start) -#define PM_NODE_END(node_) (UP(node_)->location.end) +#define PM_NODE_LENGTH(node_) (UP(node_)->location.length) +#define PM_NODE_END(node_) (UP(node_)->location.start + UP(node_)->location.length) +#define PM_NODES_LENGTH(left_, right_) (PM_NODE_END(right_) - PM_NODE_START(left_)) + +#define PM_TOKEN_NODE_LENGTH(parser_, token_, node_) (PM_NODE_END(node_) - PM_TOKEN_START(parser_, token_)) +#define PM_NODE_TOKEN_LENGTH(parser_, node_, token_) (PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_)) + +#define PM_NODE_START_SET_NODE(left_, right_) (PM_NODE_START(left_) = PM_NODE_START(right_)) +#define PM_NODE_START_SET_TOKEN(parser_, node_, token_) (PM_NODE_START(node_) = PM_TOKEN_START(parser_, token_)) +#define PM_NODE_LENGTH_SET_NODE(left_, right_) (PM_NODE_LENGTH(left_) = PM_NODE_END(right_) - PM_NODE_START(left_)) +#define PM_NODE_LENGTH_SET_TOKEN(parser_, node_, token_) (PM_NODE_LENGTH(node_) = PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_)) +#define PM_NODE_LENGTH_SET_LOCATION(node_, location_) (PM_NODE_LENGTH(node_) = PM_LOCATION_END(location_) - PM_NODE_START(node_)) -#define PM_LOCATION_NULL_VALUE(parser_) ((pm_location_t) { .start = (parser_)->start, .end = (parser_)->start }) -#define PM_LOCATION_TOKEN_VALUE(token_) ((pm_location_t) { .start = PM_TOKEN_START(token_), .end = PM_TOKEN_END(token_) }) -#define PM_LOCATION_NODE_VALUE(node_) ((pm_location_t) { .start = PM_NODE_START(node_), .end = PM_NODE_END(node_) }) -#define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? ((pm_location_t) { 0 }) : PM_LOCATION_TOKEN_VALUE(token)) +#define PM_LOCATION_INIT(start_, length_) ((pm_location_t) { .start = (start_), .length = (length_) }) +#define PM_LOCATION_INIT_UNSET PM_LOCATION_INIT(0, 0) +#define PM_LOCATION_INIT_TOKEN(parser_, token_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_)) +#define PM_LOCATION_INIT_NODE(node_) UP(node_)->location + +#define PM_LOCATION_INIT_TOKENS(parser_, left_, right_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, left_), PM_TOKENS_LENGTH(left_, right_)) +#define PM_LOCATION_INIT_NODES(left_, right_) PM_LOCATION_INIT(PM_NODE_START(left_), PM_NODES_LENGTH(left_, right_)) +#define PM_LOCATION_INIT_TOKEN_NODE(parser_, token_, node_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_NODE_LENGTH(parser_, token_, node_)) +#define PM_LOCATION_INIT_NODE_TOKEN(parser_, node_, token_) PM_LOCATION_INIT(PM_NODE_START(node_), PM_NODE_TOKEN_LENGTH(parser_, node_, token_)) + +#define TOK2LOC(parser_, token_) PM_LOCATION_INIT_TOKEN(parser_, token_) +#define NTOK2LOC(parser_, token_) ((token_) == NULL ? PM_LOCATION_INIT_UNSET : TOK2LOC(parser_, token_)) +#define NTOK2PTR(token_) ((token_).start == NULL ? NULL : &(token_)) /******************************************************************************/ /* Lex mode manipulations */ @@ -422,15 +449,18 @@ debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * call * Append an error to the list of errors on the parser. */ static inline void -pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) { - pm_diagnostic_list_append(&parser->error_list, start, end, diag_id); +pm_parser_err(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) { + pm_diagnostic_list_append(&parser->error_list, start, length, diag_id); } /** - * Append an error to the list of errors on the parser using a format string. + * Append an error to the list of errors on the parser using the location of the + * given token. */ -#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \ - pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__) +static inline void +pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) { + pm_parser_err(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id); +} /** * Append an error to the list of errors on the parser using the location of the @@ -438,15 +468,17 @@ pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_ */ static inline void pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { - pm_parser_err(parser, parser->current.start, parser->current.end, diag_id); + pm_parser_err_token(parser, &parser->current, diag_id); } /** - * Append an error to the list of errors on the parser using the given location - * using a format string. + * Append an error to the list of errors on the parser using the location of the + * previous token. */ -#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \ - PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__) +static inline void +pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { + pm_parser_err_token(parser, &parser->previous, diag_id); +} /** * Append an error to the list of errors on the parser using the location of the @@ -454,61 +486,49 @@ pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { */ static inline void pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) { - pm_parser_err(parser, node->location.start, node->location.end, diag_id); + pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id); } /** - * Append an error to the list of errors on the parser using the location of the - * given node and a format string. - */ -#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \ - PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__) - -/** - * Append an error to the list of errors on the parser using the location of the - * given node and a format string, and add on the content of the node. + * Append an error to the list of errors on the parser using a format string. */ -#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \ - PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start) +#define PM_PARSER_ERR_FORMAT(parser_, start_, length_, diag_id_, ...) \ + pm_diagnostic_list_append_format(&(parser_)->error_list, start_, length_, diag_id_, __VA_ARGS__) /** * Append an error to the list of errors on the parser using the location of the - * previous token. + * given node and a format string. */ -static inline void -pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { - pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id); -} +#define PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, ...) \ + PM_PARSER_ERR_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__) /** * Append an error to the list of errors on the parser using the location of the - * given token. + * given node and a format string, and add on the content of the node. */ -static inline void -pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) { - pm_parser_err(parser, token->start, token->end, diag_id); -} +#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser_, node_, diag_id_) \ + PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, (int) PM_NODE_LENGTH(node_), (const char *) (parser_->start + PM_NODE_START(node_))) /** * Append an error to the list of errors on the parser using the location of the * given token and a format string. */ -#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \ - PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__) +#define PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id, ...) \ + PM_PARSER_ERR_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id, __VA_ARGS__) /** * Append an error to the list of errors on the parser using the location of the * given token and a format string, and add on the content of the token. */ -#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \ - PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start) +#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \ + PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start) /** * Append a warning to the list of warnings on the parser. */ static inline void -pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) { - pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id); +pm_parser_warn(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) { + pm_diagnostic_list_append(&parser->warning_list, start, length, diag_id); } /** @@ -517,7 +537,7 @@ pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm */ static inline void pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) { - pm_parser_warn(parser, token->start, token->end, diag_id); + pm_parser_warn(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id); } /** @@ -526,35 +546,36 @@ pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic */ static inline void pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) { - pm_parser_warn(parser, node->location.start, node->location.end, diag_id); + pm_parser_warn(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id); } /** - * Append a warning to the list of warnings on the parser using a format string. + * Append a warning to the list of warnings on the parser using a format string + * and the given location. */ -#define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \ - pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__) +#define PM_PARSER_WARN_FORMAT(parser_, start_, length_, diag_id_, ...) \ + pm_diagnostic_list_append_format(&(parser_)->warning_list, start_, length_, diag_id_, __VA_ARGS__) /** * Append a warning to the list of warnings on the parser using the location of * the given token and a format string. */ -#define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \ - PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__) +#define PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, ...) \ + PM_PARSER_WARN_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id_, __VA_ARGS__) /** * Append a warning to the list of warnings on the parser using the location of * the given token and a format string, and add on the content of the token. */ -#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \ - PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start) +#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \ + PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start) /** * Append a warning to the list of warnings on the parser using the location of * the given node and a format string. */ -#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \ - PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__) +#define PM_PARSER_WARN_NODE_FORMAT(parser_, node_, diag_id_, ...) \ + PM_PARSER_WARN_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__) /** * Add an error for an expected heredoc terminator. This is a special function @@ -565,8 +586,8 @@ static void pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) { PM_PARSER_ERR_FORMAT( parser, - ident_start, - ident_start + ident_length, + U32(ident_start - parser->start), + U32(ident_length), PM_ERR_HEREDOC_TERM, (int) ident_length, (const char *) ident_start @@ -828,7 +849,7 @@ pm_locals_resize(pm_locals_t *locals) { * @return True if the local was added, and false if the local already exists. */ static bool -pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) { +pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, uint32_t start, uint32_t length, uint32_t reads) { if (locals->size >= (locals->capacity / 4 * 3)) { pm_locals_resize(locals); } @@ -840,7 +861,7 @@ pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start if (local->name == PM_CONSTANT_ID_UNSET) { *local = (pm_local_t) { .name = name, - .location = { .start = start, .end = end }, + .location = { .start = start, .length = length }, .index = locals->size++, .reads = reads, .hash = 0 @@ -861,7 +882,7 @@ pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start if (local->name == PM_CONSTANT_ID_UNSET) { *local = (pm_local_t) { .name = name, - .location = { .start = start, .end = end }, + .location = { .start = start, .length = length }, .index = locals->size++, .reads = reads, .hash = initial_hash @@ -986,7 +1007,7 @@ pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, PM_PARSER_WARN_FORMAT( parser, local->location.start, - local->location.end, + local->location.length, PM_WARN_UNUSED_LOCAL_VARIABLE, (int) constant->length, (const char *) constant->start @@ -1005,7 +1026,7 @@ pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, * Retrieve the constant pool id for the given location. */ static inline pm_constant_id_t -pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) { +pm_parser_constant_id_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) { return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start)); } @@ -1030,16 +1051,7 @@ pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t le */ static inline pm_constant_id_t pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) { - return pm_parser_constant_id_location(parser, token->start, token->end); -} - -/** - * Retrieve the constant pool id for the given token. If the token is not - * provided, then return 0. - */ -static inline pm_constant_id_t -pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) { - return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token); + return pm_parser_constant_id_raw(parser, token->start, token->end); } /** @@ -1211,7 +1223,7 @@ pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) { break; case PM_CALL_NODE: { const pm_call_node_t *cast = (const pm_call_node_t *) node; - if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break; + if (cast->call_operator_loc.length > 0 || cast->message_loc.length == 0) break; const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name); switch (message->length) { @@ -1564,19 +1576,6 @@ pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_pr } } -/** - * In a lot of places in the tree you can have tokens that are not provided but - * that do not cause an error. For example, this happens in a method call - * without parentheses. In these cases we set the token to the "not provided" type. - * For example: - * - * pm_token_t token = not_provided(parser); - */ -static inline pm_token_t -not_provided(pm_parser_t *parser) { - return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start }; -} - /** * This is a special out parameter to the parse_arguments_list function that * includes opening and closing parentheses in addition to the arguments since @@ -1603,22 +1602,29 @@ typedef struct { /** * Retrieve the end location of a `pm_arguments_t` object. */ -static inline const uint8_t * +static inline const pm_location_t * pm_arguments_end(pm_arguments_t *arguments) { if (arguments->block != NULL) { - const uint8_t *end = arguments->block->location.end; - if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) { - end = arguments->closing_loc.end; + uint32_t end = PM_NODE_END(arguments->block); + + if (arguments->closing_loc.length > 0) { + uint32_t arguments_end = PM_LOCATION_END(&arguments->closing_loc); + if (arguments_end > end) { + return &arguments->closing_loc; + } } - return end; + return &arguments->block->location; } - if (arguments->closing_loc.start != NULL) { - return arguments->closing_loc.end; + if (arguments->closing_loc.length > 0) { + return &arguments->closing_loc; } if (arguments->arguments != NULL) { - return arguments->arguments->base.location.end; + return &arguments->arguments->base.location; + } + if (arguments->opening_loc.length > 0) { + return &arguments->opening_loc; } - return arguments->closing_loc.end; + return NULL; } /** @@ -1629,7 +1635,7 @@ static void pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) { // First, check that we have arguments and that we don't have a closing // location for them. - if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) { + if (arguments->arguments == NULL || arguments->closing_loc.length > 0) { return; } @@ -1906,7 +1912,7 @@ pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closin size_t unknown_flags_length = pm_buffer_length(&unknown_flags); if (unknown_flags_length != 0) { const char *word = unknown_flags_length >= 2 ? "options" : "option"; - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags)); } pm_buffer_free(&unknown_flags); } @@ -1940,32 +1946,22 @@ pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) { } #define PM_NODE_ALLOC(parser_, type_) (type_ *) pm_node_alloc(parser_, sizeof(type_)) -#define PM_NODE_INIT(parser_, type_, flags_, start_, end_) (pm_node_t) { \ +#define PM_NODE_INIT(parser_, type_, flags_, location_) (pm_node_t) { \ .type = (type_), \ .flags = (flags_), \ .node_id = ++(parser_)->node_id, \ - .location = { .start = (start_), .end = (end_) } \ + .location = location_ \ } -#define PM_NODE_INIT_UNSET(parser_, type_, flags_) PM_NODE_INIT(parser_, type_, flags_, NULL, NULL) -#define PM_NODE_INIT_BASE(parser_, type_, flags_) PM_NODE_INIT(parser_, type_, flags_, (parser_)->start, (parser_)->start) -#define PM_NODE_INIT_TOKEN(parser_, type_, flags_, token_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(token_), PM_TOKEN_END(token_)) -#define PM_NODE_INIT_NODE(parser_, type_, flags_, node_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(node_), PM_NODE_END(node_)) - -#define PM_NODE_INIT_TOKENS(parser_, type_, flags_, left_, right_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(left_), PM_TOKEN_END(right_)) -#define PM_NODE_INIT_NODES(parser_, type_, flags_, left_, right_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(left_), PM_NODE_END(right_)) -#define PM_NODE_INIT_TOKEN_NODE(parser_, type_, flags_, token_, node_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(token_), PM_NODE_END(node_)) -#define PM_NODE_INIT_NODE_TOKEN(parser_, type_, flags_, node_, token_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(node_), PM_TOKEN_END(token_)) - /** * Allocate a new MissingNode node. */ static pm_missing_node_t * -pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) { +pm_missing_node_create(pm_parser_t *parser, uint32_t start, uint32_t length) { pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t); *node = (pm_missing_node_t) { - .base = PM_NODE_INIT(parser, PM_MISSING_NODE, 0, start, end) + .base = PM_NODE_INIT(parser, PM_MISSING_NODE, 0, ((pm_location_t) { .start = start, .length = length })) }; return node; @@ -1980,10 +1976,10 @@ pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyw pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t); *node = (pm_alias_global_variable_node_t) { - .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_ALIAS_GLOBAL_VARIABLE_NODE, 0, keyword, old_name), + .base = PM_NODE_INIT(parser, PM_ALIAS_GLOBAL_VARIABLE_NODE, 0, PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name)), .new_name = new_name, .old_name = old_name, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword) + .keyword_loc = TOK2LOC(parser, keyword) }; return node; @@ -1998,10 +1994,10 @@ pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_n pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t); *node = (pm_alias_method_node_t) { - .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_ALIAS_METHOD_NODE, 0, keyword, old_name), + .base = PM_NODE_INIT(parser, PM_ALIAS_METHOD_NODE, 0, PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name)), .new_name = new_name, .old_name = old_name, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword) + .keyword_loc = TOK2LOC(parser, keyword) }; return node; @@ -2015,10 +2011,10 @@ pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t); *node = (pm_alternation_pattern_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_ALTERNATION_PATTERN_NODE, 0, left, right), + .base = PM_NODE_INIT(parser, PM_ALTERNATION_PATTERN_NODE, 0, PM_LOCATION_INIT_NODES(left, right)), .left = left, .right = right, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -2034,9 +2030,9 @@ pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *opera pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t); *node = (pm_and_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_AND_NODE, 0, left, right), + .base = PM_NODE_INIT(parser, PM_AND_NODE, 0, PM_LOCATION_INIT_NODES(left, right)), .left = left, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .right = right }; @@ -2051,7 +2047,7 @@ pm_arguments_node_create(pm_parser_t *parser) { pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t); *node = (pm_arguments_node_t) { - .base = PM_NODE_INIT_BASE(parser, PM_ARGUMENTS_NODE, 0), + .base = PM_NODE_INIT(parser, PM_ARGUMENTS_NODE, 0, PM_LOCATION_INIT_UNSET), .arguments = { 0 } }; @@ -2072,11 +2068,11 @@ pm_arguments_node_size(pm_arguments_node_t *node) { static void pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) { if (pm_arguments_node_size(node) == 0) { - node->base.location.start = argument->location.start; + PM_NODE_START_SET_NODE(node, argument); } - if (node->base.location.end < argument->location.end) { - node->base.location.end = argument->location.end; + if (PM_NODE_END(node) < PM_NODE_END(argument)) { + PM_NODE_LENGTH_SET_NODE(node, argument); } pm_node_list_append(&node->arguments, argument); @@ -2097,12 +2093,21 @@ static pm_array_node_t * pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) { pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t); - *node = (pm_array_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_ARRAY_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening), - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), - .elements = { 0 } - }; + if (opening == NULL) { + *node = (pm_array_node_t) { + .base = PM_NODE_INIT(parser, PM_ARRAY_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_UNSET), + .opening_loc = { 0 }, + .closing_loc = { 0 }, + .elements = { 0 } + }; + } else { + *node = (pm_array_node_t) { + .base = PM_NODE_INIT(parser, PM_ARRAY_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, opening)), + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = TOK2LOC(parser, opening), + .elements = { 0 } + }; + } return node; } @@ -2112,12 +2117,12 @@ pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) { */ static inline void pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) { - if (!node->elements.size && !node->opening_loc.start) { - node->base.location.start = element->location.start; + if (!node->elements.size && !node->opening_loc.length) { + PM_NODE_START_SET_NODE(node, element); } pm_node_list_append(&node->elements, element); - node->base.location.end = element->location.end; + PM_NODE_LENGTH_SET_NODE(node, element); // If the element is not a static literal, then the array is not a static // literal. Turn that flag off. @@ -2134,10 +2139,10 @@ pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) { * Set the closing token and end location of an array node. */ static void -pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) { - assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED); - node->base.location.end = closing->end; - node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing); +pm_array_node_close_set(const pm_parser_t *parser, pm_array_node_t *node, const pm_token_t *closing) { + assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == 0); + PM_NODE_LENGTH_SET_TOKEN(parser, node, closing); + node->closing_loc = TOK2LOC(parser, closing); } /** @@ -2149,7 +2154,7 @@ pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *node pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t); *node = (pm_array_pattern_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_ARRAY_PATTERN_NODE, 0, nodes->nodes[0], nodes->nodes[nodes->size - 1]), + .base = PM_NODE_INIT(parser, PM_ARRAY_PATTERN_NODE, 0, PM_LOCATION_INIT_NODES(nodes->nodes[0], nodes->nodes[nodes->size - 1])), .constant = NULL, .rest = NULL, .requireds = { 0 }, @@ -2185,7 +2190,7 @@ pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) { pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t); *node = (pm_array_pattern_node_t) { - .base = PM_NODE_INIT_NODE(parser, PM_ARRAY_PATTERN_NODE, 0, rest), + .base = PM_NODE_INIT(parser, PM_ARRAY_PATTERN_NODE, 0, PM_LOCATION_INIT_NODE(rest)), .constant = NULL, .rest = rest, .requireds = { 0 }, @@ -2206,11 +2211,11 @@ pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t); *node = (pm_array_pattern_node_t) { - .base = PM_NODE_INIT_NODE_TOKEN(parser, PM_ARRAY_PATTERN_NODE, 0, constant, closing), + .base = PM_NODE_INIT(parser, PM_ARRAY_PATTERN_NODE, 0, PM_LOCATION_INIT_NODE_TOKEN(parser, constant, closing)), .constant = constant, .rest = NULL, - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing), + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = TOK2LOC(parser, closing), .requireds = { 0 }, .posts = { 0 } }; @@ -2227,11 +2232,11 @@ pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *openin pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t); *node = (pm_array_pattern_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_ARRAY_PATTERN_NODE, 0, opening, closing), + .base = PM_NODE_INIT(parser, PM_ARRAY_PATTERN_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, opening, closing)), .constant = NULL, .rest = NULL, - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing), + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = TOK2LOC(parser, closing), .requireds = { 0 }, .posts = { 0 } }; @@ -2250,14 +2255,14 @@ pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t static pm_assoc_node_t * pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) { pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t); - const uint8_t *end; + uint32_t end; - if (value != NULL && value->location.end > key->location.end) { - end = value->location.end; - } else if (operator->type != PM_TOKEN_NOT_PROVIDED) { - end = operator->end; + if (value != NULL && PM_NODE_END(value) > PM_NODE_END(key)) { + end = PM_NODE_END(value); + } else if (operator != NULL) { + end = PM_TOKEN_END(parser, operator); } else { - end = key->location.end; + end = PM_NODE_END(key); } // Hash string keys will be frozen, so we can mark them as frozen here so @@ -2278,9 +2283,9 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper } *node = (pm_assoc_node_t) { - .base = PM_NODE_INIT(parser, PM_ASSOC_NODE, flags, key->location.start, end), + .base = PM_NODE_INIT(parser, PM_ASSOC_NODE, flags, ((pm_location_t) { .start = PM_NODE_START(key), .length = U32(end - PM_NODE_START(key)) })), .key = key, - .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator), + .operator_loc = NTOK2LOC(parser, operator), .value = value }; @@ -2296,13 +2301,9 @@ pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t); *node = (pm_assoc_splat_node_t) { - .base = ( - (value == NULL) - ? PM_NODE_INIT_TOKEN(parser, PM_ASSOC_SPLAT_NODE, 0, operator) - : PM_NODE_INIT_TOKEN_NODE(parser, PM_ASSOC_SPLAT_NODE, 0, operator, value) - ), + .base = PM_NODE_INIT(parser, PM_ASSOC_SPLAT_NODE, 0, (value == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, value)), .value = value, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -2317,7 +2318,7 @@ pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t); *node = (pm_back_reference_read_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_BACK_REFERENCE_READ_NODE, 0, name), + .base = PM_NODE_INIT(parser, PM_BACK_REFERENCE_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)), .name = pm_parser_constant_id_token(parser, name) }; @@ -2331,13 +2332,12 @@ static pm_begin_node_t * pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) { pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t); + uint32_t start = begin_keyword == NULL ? 0 : PM_TOKEN_START(parser, begin_keyword); + uint32_t end = statements == NULL ? (begin_keyword == NULL ? 0 : PM_TOKEN_END(parser, begin_keyword)) : PM_NODE_END(statements); + *node = (pm_begin_node_t) { - .base = ( - (statements == NULL) - ? PM_NODE_INIT_TOKEN(parser, PM_BEGIN_NODE, 0, begin_keyword) - : PM_NODE_INIT_TOKEN_NODE(parser, PM_BEGIN_NODE, 0, begin_keyword, statements) - ), - .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword), + .base = PM_NODE_INIT(parser, PM_BEGIN_NODE, 0, ((pm_location_t) { .start = start, .length = U32(end - start) })), + .begin_keyword_loc = NTOK2LOC(parser, begin_keyword), .statements = statements, .end_keyword_loc = { 0 } }; @@ -2350,11 +2350,10 @@ pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_st */ static void pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) { - // If the begin keyword doesn't exist, we set the start on the begin_node - if (!node->begin_keyword_loc.start) { - node->base.location.start = rescue_clause->base.location.start; + if (node->begin_keyword_loc.length == 0) { + PM_NODE_START_SET_NODE(node, rescue_clause); } - node->base.location.end = rescue_clause->base.location.end; + PM_NODE_LENGTH_SET_NODE(node, rescue_clause); node->rescue_clause = rescue_clause; } @@ -2363,7 +2362,10 @@ pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_ */ static void pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) { - node->base.location.end = else_clause->base.location.end; + if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) { + PM_NODE_START_SET_NODE(node, else_clause); + } + PM_NODE_LENGTH_SET_NODE(node, else_clause); node->else_clause = else_clause; } @@ -2372,7 +2374,10 @@ pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause */ static void pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) { - node->base.location.end = ensure_clause->base.location.end; + if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) { + PM_NODE_START_SET_NODE(node, ensure_clause); + } + PM_NODE_LENGTH_SET_NODE(node, ensure_clause); node->ensure_clause = ensure_clause; } @@ -2380,11 +2385,10 @@ pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_ * Set the end keyword and end location of a begin node. */ static void -pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) { - assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING); - - node->base.location.end = end_keyword->end; - node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword); +pm_begin_node_end_keyword_set(const pm_parser_t *parser, pm_begin_node_t *node, const pm_token_t *end_keyword) { + assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == 0); + PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword); + node->end_keyword_loc = TOK2LOC(parser, end_keyword); } /** @@ -2392,16 +2396,13 @@ pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keywo */ static pm_block_argument_node_t * pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) { + assert(operator->type == PM_TOKEN_UAMPERSAND); pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t); *node = (pm_block_argument_node_t) { - .base = ( - (expression == NULL) - ? PM_NODE_INIT_TOKEN(parser, PM_BLOCK_ARGUMENT_NODE, 0, operator) - : PM_NODE_INIT_TOKEN_NODE(parser, PM_BLOCK_ARGUMENT_NODE, 0, operator, expression) - ), + .base = PM_NODE_INIT(parser, PM_BLOCK_ARGUMENT_NODE, 0, (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression)), .expression = expression, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -2415,12 +2416,12 @@ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const p pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t); *node = (pm_block_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_BLOCK_NODE, 0, opening, closing), + .base = PM_NODE_INIT(parser, PM_BLOCK_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, opening, closing)), .locals = *locals, .parameters = parameters, .body = body, - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing) + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = TOK2LOC(parser, closing) }; return node; @@ -2431,18 +2432,14 @@ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const p */ static pm_block_parameter_node_t * pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) { - assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND); + assert(operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND); pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t); *node = (pm_block_parameter_node_t) { - .base = ( - (name->type == PM_TOKEN_NOT_PROVIDED) - ? PM_NODE_INIT_TOKEN(parser, PM_BLOCK_PARAMETER_NODE, 0, operator) - : PM_NODE_INIT_TOKENS(parser, PM_BLOCK_PARAMETER_NODE, 0, operator, name) - ), - .name = pm_parser_optional_constant_id_token(parser, name), - .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .base = PM_NODE_INIT(parser, PM_BLOCK_PARAMETER_NODE, 0, (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name)), + .name = name == NULL ? 0 : pm_parser_constant_id_token(parser, name), + .name_loc = NTOK2LOC(parser, name), + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -2455,28 +2452,28 @@ static pm_block_parameters_node_t * pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) { pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t); - const uint8_t *start; - if (opening->type != PM_TOKEN_NOT_PROVIDED) { - start = opening->start; + uint32_t start; + if (opening != NULL) { + start = PM_TOKEN_START(parser, opening); } else if (parameters != NULL) { - start = parameters->base.location.start; + start = PM_NODE_START(parameters); } else { - start = NULL; + start = 0; } - const uint8_t *end; + uint32_t end; if (parameters != NULL) { - end = parameters->base.location.end; - } else if (opening->type != PM_TOKEN_NOT_PROVIDED) { - end = opening->end; + end = PM_NODE_END(parameters); + } else if (opening != NULL) { + end = PM_TOKEN_END(parser, opening); } else { - end = NULL; + end = 0; } *node = (pm_block_parameters_node_t) { - .base = PM_NODE_INIT(parser, PM_BLOCK_PARAMETERS_NODE, 0, start, end), + .base = PM_NODE_INIT(parser, PM_BLOCK_PARAMETERS_NODE, 0, ((pm_location_t) { .start = start, .length = U32(end - start) })), .parameters = parameters, - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), + .opening_loc = NTOK2LOC(parser, opening), .closing_loc = { 0 }, .locals = { 0 } }; @@ -2488,11 +2485,10 @@ pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *param * Set the closing location of a BlockParametersNode node. */ static void -pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) { - assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING); - - node->base.location.end = closing->end; - node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing); +pm_block_parameters_node_closing_set(const pm_parser_t *parser, pm_block_parameters_node_t *node, const pm_token_t *closing) { + assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == 0); + PM_NODE_LENGTH_SET_TOKEN(parser, node, closing); + node->closing_loc = TOK2LOC(parser, closing); } /** @@ -2503,7 +2499,7 @@ pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t); *node = (pm_block_local_variable_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_BLOCK_LOCAL_VARIABLE_NODE, 0, name), + .base = PM_NODE_INIT(parser, PM_BLOCK_LOCAL_VARIABLE_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)), .name = pm_parser_constant_id_token(parser, name) }; @@ -2517,8 +2513,11 @@ static void pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) { pm_node_list_append(&node->locals, UP(local)); - if (node->base.location.start == NULL) node->base.location.start = local->base.location.start; - node->base.location.end = local->base.location.end; + if (PM_NODE_LENGTH(node) == 0) { + PM_NODE_START_SET_NODE(node, local); + } + + PM_NODE_LENGTH_SET_NODE(node, local); } /** @@ -2530,13 +2529,9 @@ pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t); *node = (pm_break_node_t) { - .base = ( - (arguments == NULL) - ? PM_NODE_INIT_TOKEN(parser, PM_BREAK_NODE, 0, keyword) - : PM_NODE_INIT_TOKEN_NODE(parser, PM_BREAK_NODE, 0, keyword, arguments) - ), + .base = PM_NODE_INIT(parser, PM_BREAK_NODE, 0, (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments)), .arguments = arguments, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword) + .keyword_loc = TOK2LOC(parser, keyword) }; return node; @@ -2552,16 +2547,16 @@ static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = ((PM_CALL_NODE_FLAG static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = ((PM_CALL_NODE_FLAGS_LAST - 1) << 3); /** - * Allocate and initialize a new CallNode node. This sets everything to NULL or - * PM_TOKEN_NOT_PROVIDED as appropriate such that its values can be overridden - * in the various specializations of this function. + * Allocate and initialize a new CallNode node. This sets everything to NULL + * such that its values can be overridden in the various specializations of this + * function. */ static pm_call_node_t * pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) { pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t); *node = (pm_call_node_t) { - .base = PM_NODE_INIT_BASE(parser, PM_CALL_NODE, flags), + .base = PM_NODE_INIT(parser, PM_CALL_NODE, flags, PM_LOCATION_INIT_UNSET), .receiver = NULL, .call_operator_loc = { 0 }, .message_loc = { 0 }, @@ -2600,12 +2595,15 @@ pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_ pm_call_node_t *node = pm_call_node_create(parser, flags); - node->base.location.start = receiver->location.start; - node->base.location.end = pm_arguments_end(arguments); + PM_NODE_START_SET_NODE(node, receiver); + + const pm_location_t *end = pm_arguments_end(arguments); + assert(end != NULL && "unreachable"); + PM_NODE_LENGTH_SET_LOCATION(node, end); node->receiver = receiver; node->message_loc.start = arguments->opening_loc.start; - node->message_loc.end = arguments->closing_loc.end; + node->message_loc.length = (arguments->closing_loc.start + arguments->closing_loc.length) - arguments->opening_loc.start; node->opening_loc = arguments->opening_loc; node->arguments = arguments->arguments; @@ -2626,11 +2624,11 @@ pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags); - node->base.location.start = MIN(receiver->location.start, argument->location.start); - node->base.location.end = MAX(receiver->location.end, argument->location.end); + PM_NODE_START_SET_NODE(node, PM_NODE_START(receiver) < PM_NODE_START(argument) ? receiver : argument); + PM_NODE_LENGTH_SET_NODE(node, PM_NODE_END(receiver) > PM_NODE_END(argument) ? receiver : argument); node->receiver = receiver; - node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator); + node->message_loc = TOK2LOC(parser, operator); pm_arguments_node_t *arguments = pm_arguments_node_create(parser); pm_arguments_node_arguments_append(arguments, argument); @@ -2651,16 +2649,17 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver)); - node->base.location.start = receiver->location.start; - const uint8_t *end = pm_arguments_end(arguments); + PM_NODE_START_SET_NODE(node, receiver); + const pm_location_t *end = pm_arguments_end(arguments); if (end == NULL) { - end = message->end; + PM_NODE_LENGTH_SET_TOKEN(parser, node, message); + } else { + PM_NODE_LENGTH_SET_LOCATION(node, end); } - node->base.location.end = end; node->receiver = receiver; - node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator); - node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message); + node->call_operator_loc = TOK2LOC(parser, operator); + node->message_loc = TOK2LOC(parser, message); node->opening_loc = arguments->opening_loc; node->arguments = arguments->arguments; node->closing_loc = arguments->closing_loc; @@ -2674,7 +2673,7 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o * If the final character is `@` as is the case for `foo.~@`, * we should ignore the @ in the same way we do for symbols. */ - node->name = pm_parser_constant_id_location(parser, message->start, parse_operator_symbol_name(message)); + node->name = pm_parser_constant_id_raw(parser, message->start, parse_operator_symbol_name(message)); return node; } @@ -2684,12 +2683,9 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o static pm_call_node_t * pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) { pm_call_node_t *node = pm_call_node_create(parser, 0); - node->base.location.start = parser->start; - node->base.location.end = parser->end; + node->base.location = (pm_location_t) { .start = 0, .length = U32(parser->end - parser->start) }; node->receiver = receiver; - node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL }; - node->message_loc = (pm_location_t) { .start = NULL, .end = NULL }; node->arguments = arguments; node->name = pm_parser_constant_id_constant(parser, message, strlen(message)); @@ -2704,10 +2700,12 @@ static pm_call_node_t * pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) { pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY); - node->base.location.start = message->start; - node->base.location.end = pm_arguments_end(arguments); + PM_NODE_START_SET_TOKEN(parser, node, message); + const pm_location_t *end = pm_arguments_end(arguments); + assert(end != NULL && "unreachable"); + PM_NODE_LENGTH_SET_LOCATION(node, end); - node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message); + node->message_loc = TOK2LOC(parser, message); node->opening_loc = arguments->opening_loc; node->arguments = arguments->arguments; node->closing_loc = arguments->closing_loc; @@ -2725,7 +2723,7 @@ static pm_call_node_t * pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) { pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY); - node->base.location = PM_LOCATION_NULL_VALUE(parser); + node->base.location = (pm_location_t) { 0 }; node->arguments = arguments; node->name = name; @@ -2742,16 +2740,16 @@ pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *me pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver)); - node->base.location.start = message->start; - if (arguments->closing_loc.start != NULL) { - node->base.location.end = arguments->closing_loc.end; + PM_NODE_START_SET_TOKEN(parser, node, message); + if (arguments->closing_loc.length > 0) { + PM_NODE_LENGTH_SET_LOCATION(node, &arguments->closing_loc); } else { assert(receiver != NULL); - node->base.location.end = receiver->location.end; + PM_NODE_LENGTH_SET_NODE(node, receiver); } node->receiver = receiver; - node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message); + node->message_loc = TOK2LOC(parser, message); node->opening_loc = arguments->opening_loc; node->arguments = arguments->arguments; node->closing_loc = arguments->closing_loc; @@ -2769,11 +2767,13 @@ pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver)); - node->base.location.start = receiver->location.start; - node->base.location.end = pm_arguments_end(arguments); + PM_NODE_START_SET_NODE(node, receiver); + const pm_location_t *end = pm_arguments_end(arguments); + assert(end != NULL && "unreachable"); + PM_NODE_LENGTH_SET_LOCATION(node, end); node->receiver = receiver; - node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator); + node->call_operator_loc = TOK2LOC(parser, operator); node->opening_loc = arguments->opening_loc; node->arguments = arguments->arguments; node->closing_loc = arguments->closing_loc; @@ -2796,11 +2796,11 @@ pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t * pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver)); - node->base.location.start = operator->start; - node->base.location.end = receiver->location.end; + PM_NODE_START_SET_TOKEN(parser, node, operator); + PM_NODE_LENGTH_SET_NODE(node, receiver); node->receiver = receiver; - node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator); + node->message_loc = TOK2LOC(parser, operator); node->name = pm_parser_constant_id_constant(parser, name, strlen(name)); return node; @@ -2814,8 +2814,8 @@ static pm_call_node_t * pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) { pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY); - node->base.location = PM_LOCATION_TOKEN_VALUE(message); - node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message); + node->base.location = TOK2LOC(parser, message); + node->message_loc = TOK2LOC(parser, message); node->name = pm_parser_constant_id_token(parser, message); return node; @@ -2828,11 +2828,11 @@ pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) { static inline bool pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) { return ( - (node->message_loc.start != NULL) && - (node->message_loc.end[-1] != '!') && - (node->message_loc.end[-1] != '?') && - char_is_identifier_start(parser, node->message_loc.start, parser->end - node->message_loc.start) && - (node->opening_loc.start == NULL) && + (node->message_loc.length > 0) && + (parser->start[node->message_loc.start + node->message_loc.length - 1] != '!') && + (parser->start[node->message_loc.start + node->message_loc.length - 1] != '?') && + char_is_identifier_start(parser, parser->start + node->message_loc.start, (ptrdiff_t) node->message_loc.length) && + (node->opening_loc.length == 0) && (node->arguments == NULL) && (node->block == NULL) ); @@ -2868,13 +2868,13 @@ pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t); *node = (pm_call_and_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CALL_AND_WRITE_NODE, FL(target), target, value), + .base = PM_NODE_INIT(parser, PM_CALL_AND_WRITE_NODE, FL(target), PM_LOCATION_INIT_NODES(target, value)), .receiver = target->receiver, .call_operator_loc = target->call_operator_loc, .message_loc = target->message_loc, .read_name = 0, .write_name = target->name, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -2923,14 +2923,14 @@ pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, cons assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE)); *node = (pm_index_and_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_INDEX_AND_WRITE_NODE, FL(target), target, value), + .base = PM_NODE_INIT(parser, PM_INDEX_AND_WRITE_NODE, FL(target), PM_LOCATION_INIT_NODES(target, value)), .receiver = target->receiver, .call_operator_loc = target->call_operator_loc, .opening_loc = target->opening_loc, .arguments = target->arguments, .closing_loc = target->closing_loc, .block = (pm_block_argument_node_t *) target->block, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -2951,14 +2951,14 @@ pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t); *node = (pm_call_operator_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CALL_OPERATOR_WRITE_NODE, FL(target), target, value), + .base = PM_NODE_INIT(parser, PM_CALL_OPERATOR_WRITE_NODE, FL(target), PM_LOCATION_INIT_NODES(target, value)), .receiver = target->receiver, .call_operator_loc = target->call_operator_loc, .message_loc = target->message_loc, .read_name = 0, .write_name = target->name, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1), - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1), + .binary_operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -2983,15 +2983,15 @@ pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE)); *node = (pm_index_operator_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_INDEX_OPERATOR_WRITE_NODE, FL(target), target, value), + .base = PM_NODE_INIT(parser, PM_INDEX_OPERATOR_WRITE_NODE, FL(target), PM_LOCATION_INIT_NODES(target, value)), .receiver = target->receiver, .call_operator_loc = target->call_operator_loc, .opening_loc = target->opening_loc, .arguments = target->arguments, .closing_loc = target->closing_loc, .block = (pm_block_argument_node_t *) target->block, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1), - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1), + .binary_operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3013,13 +3013,13 @@ pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t); *node = (pm_call_or_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CALL_OR_WRITE_NODE, FL(target), target, value), + .base = PM_NODE_INIT(parser, PM_CALL_OR_WRITE_NODE, FL(target), PM_LOCATION_INIT_NODES(target, value)), .receiver = target->receiver, .call_operator_loc = target->call_operator_loc, .message_loc = target->message_loc, .read_name = 0, .write_name = target->name, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3045,14 +3045,14 @@ pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE)); *node = (pm_index_or_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_INDEX_OR_WRITE_NODE, FL(target), target, value), + .base = PM_NODE_INIT(parser, PM_INDEX_OR_WRITE_NODE, FL(target), PM_LOCATION_INIT_NODES(target, value)), .receiver = target->receiver, .call_operator_loc = target->call_operator_loc, .opening_loc = target->opening_loc, .arguments = target->arguments, .closing_loc = target->closing_loc, .block = (pm_block_argument_node_t *) target->block, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3073,7 +3073,7 @@ pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) { pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t); *node = (pm_call_target_node_t) { - .base = PM_NODE_INIT_NODE(parser, PM_CALL_TARGET_NODE, FL(target), target), + .base = PM_NODE_INIT(parser, PM_CALL_TARGET_NODE, FL(target), PM_LOCATION_INIT_NODE(target)), .receiver = target->receiver, .call_operator_loc = target->call_operator_loc, .name = target->name, @@ -3084,11 +3084,8 @@ pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) { * where the call operator was not present. In that case we will have a * problem because it is a required location. In this case we need to fill * it in with a fake location so that the syntax tree remains valid. */ - if (node->call_operator_loc.start == NULL) { - node->call_operator_loc = (pm_location_t) { - .start = target->base.location.start, - .end = target->base.location.start - }; + if (node->call_operator_loc.length == 0) { + node->call_operator_loc = target->base.location; } // Here we're going to free the target, since it is no longer necessary. @@ -3111,7 +3108,7 @@ pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) { assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE)); *node = (pm_index_target_node_t) { - .base = PM_NODE_INIT_NODE(parser, PM_INDEX_TARGET_NODE, FL(target) | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE, target), + .base = PM_NODE_INIT(parser, PM_INDEX_TARGET_NODE, FL(target) | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE, PM_LOCATION_INIT_NODE(target)), .receiver = target->receiver, .opening_loc = target->opening_loc, .arguments = target->arguments, @@ -3135,10 +3132,10 @@ pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_v pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t); *node = (pm_capture_pattern_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CAPTURE_PATTERN_NODE, 0, value, target), + .base = PM_NODE_INIT(parser, PM_CAPTURE_PATTERN_NODE, 0, PM_LOCATION_INIT_NODES(value, target)), .value = value, .target = target, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -3152,11 +3149,11 @@ pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t); *node = (pm_case_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_CASE_NODE, 0, case_keyword, end_keyword), + .base = PM_NODE_INIT(parser, PM_CASE_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, case_keyword, end_keyword == NULL ? case_keyword : end_keyword)), .predicate = predicate, .else_clause = NULL, - .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword), - .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword), + .case_keyword_loc = TOK2LOC(parser, case_keyword), + .end_keyword_loc = NTOK2LOC(parser, end_keyword), .conditions = { 0 } }; @@ -3171,7 +3168,7 @@ pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) { assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE)); pm_node_list_append(&node->conditions, condition); - node->base.location.end = condition->location.end; + PM_NODE_LENGTH_SET_NODE(node, condition); } /** @@ -3180,31 +3177,31 @@ pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) { static void pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) { node->else_clause = else_clause; - node->base.location.end = else_clause->base.location.end; + PM_NODE_LENGTH_SET_NODE(node, else_clause); } /** * Set the end location for a CaseNode node. */ static void -pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) { - node->base.location.end = end_keyword->end; - node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword); +pm_case_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_node_t *node, const pm_token_t *end_keyword) { + PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword); + node->end_keyword_loc = TOK2LOC(parser, end_keyword); } /** * Allocate and initialize a new CaseMatchNode node. */ static pm_case_match_node_t * -pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) { +pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate) { pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t); *node = (pm_case_match_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_CASE_MATCH_NODE, 0, case_keyword, end_keyword), + .base = PM_NODE_INIT(parser, PM_CASE_MATCH_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, case_keyword)), .predicate = predicate, .else_clause = NULL, - .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword), - .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword), + .case_keyword_loc = TOK2LOC(parser, case_keyword), + .end_keyword_loc = { 0 }, .conditions = { 0 } }; @@ -3219,7 +3216,7 @@ pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condi assert(PM_NODE_TYPE_P(condition, PM_IN_NODE)); pm_node_list_append(&node->conditions, condition); - node->base.location.end = condition->location.end; + PM_NODE_LENGTH_SET_NODE(node, condition); } /** @@ -3228,16 +3225,16 @@ pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condi static void pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) { node->else_clause = else_clause; - node->base.location.end = else_clause->base.location.end; + PM_NODE_LENGTH_SET_NODE(node, else_clause); } /** * Set the end location for a CaseMatchNode node. */ static void -pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) { - node->base.location.end = end_keyword->end; - node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword); +pm_case_match_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_match_node_t *node, const pm_token_t *end_keyword) { + PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword); + node->end_keyword_loc = TOK2LOC(parser, end_keyword); } /** @@ -3248,14 +3245,14 @@ pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const p pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t); *node = (pm_class_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_CLASS_NODE, 0, class_keyword, end_keyword), + .base = PM_NODE_INIT(parser, PM_CLASS_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword)), .locals = *locals, - .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword), + .class_keyword_loc = TOK2LOC(parser, class_keyword), .constant_path = constant_path, - .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator), + .inheritance_operator_loc = NTOK2LOC(parser, inheritance_operator), .superclass = superclass, .body = body, - .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword), + .end_keyword_loc = TOK2LOC(parser, end_keyword), .name = pm_parser_constant_id_token(parser, name) }; @@ -3271,10 +3268,10 @@ pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_r pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t); *node = (pm_class_variable_and_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_AND_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_CLASS_VARIABLE_AND_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = target->name, .name_loc = target->base.location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3289,12 +3286,12 @@ pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_varia pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t); *node = (pm_class_variable_operator_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = target->name, .name_loc = target->base.location, - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .binary_operator_loc = TOK2LOC(parser, operator), .value = value, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1) + .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1) }; return node; @@ -3309,10 +3306,10 @@ pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_re pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t); *node = (pm_class_variable_or_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_OR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_CLASS_VARIABLE_OR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = target->name, .name_loc = target->base.location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3328,7 +3325,7 @@ pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t); *node = (pm_class_variable_read_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_CLASS_VARIABLE_READ_NODE, 0, token), + .base = PM_NODE_INIT(parser, PM_CLASS_VARIABLE_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)), .name = pm_parser_constant_id_token(parser, token) }; @@ -3343,7 +3340,7 @@ pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) */ static inline pm_node_flags_t pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) { - if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) { + if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.length == 0) { return flags; } return 0; @@ -3358,10 +3355,10 @@ pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_ pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY); *node = (pm_class_variable_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_WRITE_NODE, flags, read_node, value), + .base = PM_NODE_INIT(parser, PM_CLASS_VARIABLE_WRITE_NODE, flags, PM_LOCATION_INIT_NODES(read_node, value)), .name = read_node->name, - .name_loc = PM_LOCATION_NODE_VALUE(UP(read_node)), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .name_loc = read_node->base.location, + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3377,9 +3374,9 @@ pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_nod pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t); *node = (pm_constant_path_and_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_AND_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_CONSTANT_PATH_AND_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .target = target, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3394,11 +3391,11 @@ pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_pat pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t); *node = (pm_constant_path_operator_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_OPERATOR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_CONSTANT_PATH_OPERATOR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .target = target, - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .binary_operator_loc = TOK2LOC(parser, operator), .value = value, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1) + .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1) }; return node; @@ -3413,9 +3410,9 @@ pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t); *node = (pm_constant_path_or_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_OR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_CONSTANT_PATH_OR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .target = target, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3435,23 +3432,13 @@ pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_to name = pm_parser_constant_id_token(parser, name_token); } - if (parent == NULL) { - *node = (pm_constant_path_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_CONSTANT_PATH_NODE, 0, delimiter, name_token), - .parent = parent, - .name = name, - .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter), - .name_loc = PM_LOCATION_TOKEN_VALUE(name_token) - }; - } else { - *node = (pm_constant_path_node_t) { - .base = PM_NODE_INIT_NODE_TOKEN(parser, PM_CONSTANT_PATH_NODE, 0, parent, name_token), - .parent = parent, - .name = name, - .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter), - .name_loc = PM_LOCATION_TOKEN_VALUE(name_token) - }; - } + *node = (pm_constant_path_node_t) { + .base = PM_NODE_INIT(parser, PM_CONSTANT_PATH_NODE, 0, (parent == NULL) ? PM_LOCATION_INIT_TOKENS(parser, delimiter, name_token) : PM_LOCATION_INIT_NODE_TOKEN(parser, parent, name_token)), + .parent = parent, + .name = name, + .delimiter_loc = TOK2LOC(parser, delimiter), + .name_loc = TOK2LOC(parser, name_token) + }; return node; } @@ -3465,9 +3452,9 @@ pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY); *node = (pm_constant_path_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_WRITE_NODE, flags, target, value), + .base = PM_NODE_INIT(parser, PM_CONSTANT_PATH_WRITE_NODE, flags, PM_LOCATION_INIT_NODES(target, value)), .target = target, - .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3483,10 +3470,10 @@ pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t * pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t); *node = (pm_constant_and_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_AND_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_CONSTANT_AND_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = target->name, .name_loc = target->base.location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3501,12 +3488,12 @@ pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_nod pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t); *node = (pm_constant_operator_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_OPERATOR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_CONSTANT_OPERATOR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = target->name, .name_loc = target->base.location, - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .binary_operator_loc = TOK2LOC(parser, operator), .value = value, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1) + .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1) }; return node; @@ -3521,10 +3508,10 @@ pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *t pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t); *node = (pm_constant_or_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_OR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_CONSTANT_OR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = target->name, .name_loc = target->base.location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3536,11 +3523,11 @@ pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *t */ static pm_constant_read_node_t * pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) { - assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING); + assert(name->type == PM_TOKEN_CONSTANT || name->type == 0); pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t); *node = (pm_constant_read_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_CONSTANT_READ_NODE, 0, name), + .base = PM_NODE_INIT(parser, PM_CONSTANT_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)), .name = pm_parser_constant_id_token(parser, name) }; @@ -3556,10 +3543,10 @@ pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *targ pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY); *node = (pm_constant_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_WRITE_NODE, flags, target, value), + .base = PM_NODE_INIT(parser, PM_CONSTANT_WRITE_NODE, flags, PM_LOCATION_INIT_NODES(target, value)), .name = target->name, .name_loc = target->base.location, - .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3636,23 +3623,19 @@ pm_def_node_create( } *node = (pm_def_node_t) { - .base = ( - (end_keyword->type == PM_TOKEN_NOT_PROVIDED) - ? PM_NODE_INIT_TOKEN_NODE(parser, PM_DEF_NODE, 0, def_keyword, body) - : PM_NODE_INIT_TOKENS(parser, PM_DEF_NODE, 0, def_keyword, end_keyword) - ), + .base = PM_NODE_INIT(parser, PM_DEF_NODE, 0, (end_keyword == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, def_keyword, body) : PM_LOCATION_INIT_TOKENS(parser, def_keyword, end_keyword)), .name = name, - .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc), + .name_loc = TOK2LOC(parser, name_loc), .receiver = receiver, .parameters = parameters, .body = body, .locals = *locals, - .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword), - .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator), - .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen), - .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen), - .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal), - .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword) + .def_keyword_loc = TOK2LOC(parser, def_keyword), + .operator_loc = NTOK2LOC(parser, operator), + .lparen_loc = NTOK2LOC(parser, lparen), + .rparen_loc = NTOK2LOC(parser, rparen), + .equal_loc = NTOK2LOC(parser, equal), + .end_keyword_loc = NTOK2LOC(parser, end_keyword) }; return node; @@ -3666,15 +3649,11 @@ pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t); *node = (pm_defined_node_t) { - .base = ( - (rparen->type == PM_TOKEN_NOT_PROVIDED) - ? PM_NODE_INIT_TOKEN_NODE(parser, PM_DEFINED_NODE, 0, keyword, value) - : PM_NODE_INIT_TOKENS(parser, PM_DEFINED_NODE, 0, keyword, rparen) - ), - .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen), + .base = PM_NODE_INIT(parser, PM_DEFINED_NODE, 0, (rparen == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, value) : PM_LOCATION_INIT_TOKENS(parser, keyword, rparen)), + .lparen_loc = NTOK2LOC(parser, lparen), .value = value, - .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword) + .rparen_loc = NTOK2LOC(parser, rparen), + .keyword_loc = TOK2LOC(parser, keyword) }; return node; @@ -3688,14 +3667,10 @@ pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_stat pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t); *node = (pm_else_node_t) { - .base = ( - ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL)) - ? PM_NODE_INIT_TOKEN_NODE(parser, PM_ELSE_NODE, 0, else_keyword, statements) - : PM_NODE_INIT_TOKENS(parser, PM_ELSE_NODE, 0, else_keyword, end_keyword) - ), - .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword), + .base = PM_NODE_INIT(parser, PM_ELSE_NODE, 0, ((end_keyword == NULL) && (statements != NULL)) ? PM_LOCATION_INIT_TOKEN_NODE(parser, else_keyword, statements) : PM_LOCATION_INIT_TOKENS(parser, else_keyword, end_keyword)), + .else_keyword_loc = TOK2LOC(parser, else_keyword), .statements = statements, - .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword) + .end_keyword_loc = NTOK2LOC(parser, end_keyword) }; return node; @@ -3709,10 +3684,10 @@ pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *openin pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t); *node = (pm_embedded_statements_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_EMBEDDED_STATEMENTS_NODE, 0, opening, closing), - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), + .base = PM_NODE_INIT(parser, PM_EMBEDDED_STATEMENTS_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, opening, closing)), + .opening_loc = TOK2LOC(parser, opening), .statements = statements, - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing) + .closing_loc = TOK2LOC(parser, closing) }; return node; @@ -3726,8 +3701,8 @@ pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t); *node = (pm_embedded_variable_node_t) { - .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_EMBEDDED_VARIABLE_NODE, 0, operator, variable), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .base = PM_NODE_INIT(parser, PM_EMBEDDED_VARIABLE_NODE, 0, PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable)), + .operator_loc = TOK2LOC(parser, operator), .variable = variable }; @@ -3742,10 +3717,10 @@ pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_ pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t); *node = (pm_ensure_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_ENSURE_NODE, 0, ensure_keyword, end_keyword), - .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword), + .base = PM_NODE_INIT(parser, PM_ENSURE_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, ensure_keyword, end_keyword)), + .ensure_keyword_loc = TOK2LOC(parser, ensure_keyword), .statements = statements, - .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword) + .end_keyword_loc = TOK2LOC(parser, end_keyword) }; return node; @@ -3760,7 +3735,7 @@ pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) { pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t); *node = (pm_false_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_FALSE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token) + .base = PM_NODE_INIT(parser, PM_FALSE_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)) }; return node; @@ -3781,7 +3756,7 @@ pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) { pm_node_t *right; if (nodes->size == 1) { - right = UP(pm_missing_node_create(parser, left->location.end, left->location.end)); + right = UP(pm_missing_node_create(parser, PM_NODE_END(left), 0)); } else { right = nodes->nodes[nodes->size - 1]; assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE)); @@ -3795,7 +3770,7 @@ pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) { pm_node_t *right_splat_node = right; #endif *node = (pm_find_pattern_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_FIND_PATTERN_NODE, 0, left, right), + .base = PM_NODE_INIT(parser, PM_FIND_PATTERN_NODE, 0, PM_LOCATION_INIT_NODES(left, right)), .constant = NULL, .left = left_splat_node, .right = right_splat_node, @@ -3859,7 +3834,7 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) { // This should never happen, because we've already checked that the token // is in a valid format. However it's good to be safe. if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) { - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE); + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, PM_ERR_FLOAT_PARSE); xfree((void *) buffer); return 0.0; } @@ -3878,7 +3853,7 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) { ellipsis = ""; } - pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis); + pm_diagnostic_list_append_format(&parser->warning_list, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis); value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL; } @@ -3896,7 +3871,7 @@ pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) { pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t); *node = (pm_float_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_FLOAT_NODE, PM_NODE_FLAG_STATIC_LITERAL, token), + .base = PM_NODE_INIT(parser, PM_FLOAT_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)), .value = pm_double_parse(parser, token) }; @@ -3912,7 +3887,7 @@ pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) { pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t); *node = (pm_imaginary_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token), + .base = PM_NODE_INIT(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)), .numeric = UP(pm_float_node_create(parser, &((pm_token_t) { .type = PM_TOKEN_FLOAT, .start = token->start, @@ -3932,7 +3907,7 @@ pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) { pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t); *node = (pm_rational_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_RATIONAL_NODE, PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL, token), + .base = PM_NODE_INIT(parser, PM_RATIONAL_NODE, PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)), .numerator = { 0 }, .denominator = { 0 } }; @@ -3985,7 +3960,7 @@ pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *t pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t); *node = (pm_imaginary_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token), + .base = PM_NODE_INIT(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)), .numeric = UP(pm_float_node_rational_create(parser, &((pm_token_t) { .type = PM_TOKEN_FLOAT_RATIONAL, .start = token->start, @@ -4013,14 +3988,14 @@ pm_for_node_create( pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t); *node = (pm_for_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_FOR_NODE, 0, for_keyword, end_keyword), + .base = PM_NODE_INIT(parser, PM_FOR_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, for_keyword, end_keyword)), .index = index, .collection = collection, .statements = statements, - .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword), - .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword), - .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword), - .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword) + .for_keyword_loc = TOK2LOC(parser, for_keyword), + .in_keyword_loc = TOK2LOC(parser, in_keyword), + .do_keyword_loc = NTOK2LOC(parser, do_keyword), + .end_keyword_loc = TOK2LOC(parser, end_keyword) }; return node; @@ -4035,7 +4010,7 @@ pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t); *node = (pm_forwarding_arguments_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_ARGUMENTS_NODE, 0, token) + .base = PM_NODE_INIT(parser, PM_FORWARDING_ARGUMENTS_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)) }; return node; @@ -4050,7 +4025,7 @@ pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t); *node = (pm_forwarding_parameter_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_PARAMETER_NODE, 0, token) + .base = PM_NODE_INIT(parser, PM_FORWARDING_PARAMETER_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)) }; return node; @@ -4071,11 +4046,7 @@ pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm } *node = (pm_forwarding_super_node_t) { - .base = ( - (block == NULL) - ? PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_SUPER_NODE, 0, token) - : PM_NODE_INIT_TOKEN_NODE(parser, PM_FORWARDING_SUPER_NODE, 0, token, block) - ), + .base = PM_NODE_INIT(parser, PM_FORWARDING_SUPER_NODE, 0, (block == NULL) ? PM_LOCATION_INIT_TOKEN(parser, token) : PM_LOCATION_INIT_TOKEN_NODE(parser, token, block)), .block = block }; @@ -4091,10 +4062,10 @@ pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t); *node = (pm_hash_pattern_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_HASH_PATTERN_NODE, 0, opening, closing), + .base = PM_NODE_INIT(parser, PM_HASH_PATTERN_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, opening, closing)), .constant = NULL, - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing), + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = TOK2LOC(parser, closing), .elements = { 0 }, .rest = NULL }; @@ -4109,25 +4080,25 @@ static pm_hash_pattern_node_t * pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) { pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t); - const uint8_t *start; - const uint8_t *end; + uint32_t start; + uint32_t end; if (elements->size > 0) { if (rest) { - start = MIN(rest->location.start, elements->nodes[0]->location.start); - end = MAX(rest->location.end, elements->nodes[elements->size - 1]->location.end); + start = MIN(PM_NODE_START(rest), PM_NODE_START(elements->nodes[0])); + end = MAX(PM_NODE_END(rest), PM_NODE_END(elements->nodes[elements->size - 1])); } else { - start = elements->nodes[0]->location.start; - end = elements->nodes[elements->size - 1]->location.end; + start = PM_NODE_START(elements->nodes[0]); + end = PM_NODE_END(elements->nodes[elements->size - 1]); } } else { assert(rest != NULL); - start = rest->location.start; - end = rest->location.end; + start = PM_NODE_START(rest); + end = PM_NODE_END(rest); } *node = (pm_hash_pattern_node_t) { - .base = PM_NODE_INIT(parser, PM_HASH_PATTERN_NODE, 0, start, end), + .base = PM_NODE_INIT(parser, PM_HASH_PATTERN_NODE, 0, ((pm_location_t) { .start = start, .length = U32(end - start) })), .constant = NULL, .elements = { 0 }, .rest = rest, @@ -4152,7 +4123,7 @@ pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) { case PM_NUMBERED_REFERENCE_READ_NODE: // This will only ever happen in the event of a syntax error, but we // still need to provide something for the node. - return pm_parser_constant_id_location(parser, target->location.start, target->location.end); + return pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target)); default: assert(false && "unreachable"); return (pm_constant_id_t) -1; @@ -4168,10 +4139,10 @@ pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t); *node = (pm_global_variable_and_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_AND_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_GLOBAL_VARIABLE_AND_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = pm_global_variable_write_name(parser, target), .name_loc = target->location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -4186,12 +4157,12 @@ pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *ta pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t); *node = (pm_global_variable_operator_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = pm_global_variable_write_name(parser, target), .name_loc = target->location, - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .binary_operator_loc = TOK2LOC(parser, operator), .value = value, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1) + .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1) }; return node; @@ -4206,10 +4177,10 @@ pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t); *node = (pm_global_variable_or_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_OR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_GLOBAL_VARIABLE_OR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = pm_global_variable_write_name(parser, target), .name_loc = target->location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -4224,7 +4195,7 @@ pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t); *node = (pm_global_variable_read_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_GLOBAL_VARIABLE_READ_NODE, 0, name), + .base = PM_NODE_INIT(parser, PM_GLOBAL_VARIABLE_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)), .name = pm_parser_constant_id_token(parser, name) }; @@ -4239,7 +4210,7 @@ pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t); *node = (pm_global_variable_read_node_t) { - .base = PM_NODE_INIT_BASE(parser, PM_GLOBAL_VARIABLE_READ_NODE, 0), + .base = PM_NODE_INIT(parser, PM_GLOBAL_VARIABLE_READ_NODE, 0, PM_LOCATION_INIT_UNSET), .name = name }; @@ -4255,10 +4226,10 @@ pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, con pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY); *node = (pm_global_variable_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_WRITE_NODE, flags, target, value), + .base = PM_NODE_INIT(parser, PM_GLOBAL_VARIABLE_WRITE_NODE, flags, PM_LOCATION_INIT_NODES(target, value)), .name = pm_global_variable_write_name(parser, target), - .name_loc = PM_LOCATION_NODE_VALUE(target), - .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator), + .name_loc = target->location, + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -4273,10 +4244,10 @@ pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constan pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t); *node = (pm_global_variable_write_node_t) { - .base = PM_NODE_INIT_BASE(parser, PM_GLOBAL_VARIABLE_WRITE_NODE, 0), + .base = PM_NODE_INIT(parser, PM_GLOBAL_VARIABLE_WRITE_NODE, 0, PM_LOCATION_INIT_UNSET), .name = name, - .name_loc = PM_LOCATION_NULL_VALUE(parser), - .operator_loc = PM_LOCATION_NULL_VALUE(parser), + .name_loc = { 0 }, + .operator_loc = { 0 }, .value = value }; @@ -4292,9 +4263,9 @@ pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) { pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t); *node = (pm_hash_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_HASH_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening), - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_NULL_VALUE(parser), + .base = PM_NODE_INIT(parser, PM_HASH_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, opening)), + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = { 0 }, .elements = { 0 } }; @@ -4322,9 +4293,9 @@ pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) { } static inline void -pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) { - hash->base.location.end = token->end; - hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token); +pm_hash_node_closing_loc_set(const pm_parser_t *parser, pm_hash_node_t *hash, pm_token_t *token) { + PM_NODE_LENGTH_SET_TOKEN(parser, hash, token); + hash->closing_loc = TOK2LOC(parser, token); } /** @@ -4342,25 +4313,27 @@ pm_if_node_create(pm_parser_t *parser, pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL); pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t); - const uint8_t *end; - if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) { - end = end_keyword->end; + uint32_t start = PM_TOKEN_START(parser, if_keyword); + uint32_t end; + + if (end_keyword != NULL) { + end = PM_TOKEN_END(parser, end_keyword); } else if (subsequent != NULL) { - end = subsequent->location.end; + end = PM_NODE_END(subsequent); } else if (pm_statements_node_body_length(statements) != 0) { - end = statements->base.location.end; + end = PM_NODE_END(statements); } else { - end = predicate->location.end; + end = PM_NODE_END(predicate); } *node = (pm_if_node_t) { - .base = PM_NODE_INIT(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, if_keyword->start, end), - .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword), + .base = PM_NODE_INIT(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, ((pm_location_t) { .start = start, .length = U32(end - start) })), + .if_keyword_loc = TOK2LOC(parser, if_keyword), .predicate = predicate, - .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword), + .then_keyword_loc = NTOK2LOC(parser, then_keyword), .statements = statements, .subsequent = subsequent, - .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword) + .end_keyword_loc = NTOK2LOC(parser, end_keyword) }; return node; @@ -4378,8 +4351,8 @@ pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_t pm_statements_node_body_append(parser, statements, statement, true); *node = (pm_if_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, statement, predicate), - .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword), + .base = PM_NODE_INIT(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, PM_LOCATION_INIT_NODES(statement, predicate)), + .if_keyword_loc = TOK2LOC(parser, if_keyword), .predicate = predicate, .then_keyword_loc = { 0 }, .statements = statements, @@ -4404,16 +4377,14 @@ pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_to pm_statements_node_t *else_statements = pm_statements_node_create(parser); pm_statements_node_body_append(parser, else_statements, false_expression, true); - pm_token_t end_keyword = not_provided(parser); - pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword); - + pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, NULL); pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t); *node = (pm_if_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, predicate, false_expression), + .base = PM_NODE_INIT(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, PM_LOCATION_INIT_NODES(predicate, false_expression)), .if_keyword_loc = { 0 }, .predicate = predicate, - .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark), + .then_keyword_loc = TOK2LOC(parser, qmark), .statements = if_statements, .subsequent = UP(else_node), .end_keyword_loc = { 0 } @@ -4424,15 +4395,15 @@ pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_to } static inline void -pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) { - node->base.location.end = keyword->end; - node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword); +pm_if_node_end_keyword_loc_set(const pm_parser_t *parser, pm_if_node_t *node, const pm_token_t *keyword) { + PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword); + node->end_keyword_loc = TOK2LOC(parser, keyword); } static inline void -pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) { - node->base.location.end = keyword->end; - node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword); +pm_else_node_end_keyword_loc_set(const pm_parser_t *parser, pm_else_node_t *node, const pm_token_t *keyword) { + PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword); + node->end_keyword_loc = TOK2LOC(parser, keyword); } /** @@ -4443,7 +4414,7 @@ pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) { pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t); *node = (pm_implicit_node_t) { - .base = PM_NODE_INIT_NODE(parser, PM_IMPLICIT_NODE, 0, value), + .base = PM_NODE_INIT(parser, PM_IMPLICIT_NODE, 0, PM_LOCATION_INIT_NODE(value)), .value = value }; @@ -4460,7 +4431,7 @@ pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) { pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t); *node = (pm_implicit_rest_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_IMPLICIT_REST_NODE, 0, token) + .base = PM_NODE_INIT(parser, PM_IMPLICIT_REST_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)) }; return node; @@ -4475,7 +4446,7 @@ pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t); *node = (pm_integer_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_INTEGER_NODE, base | PM_NODE_FLAG_STATIC_LITERAL, token), + .base = PM_NODE_INIT(parser, PM_INTEGER_NODE, base | PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)), .value = { 0 } }; @@ -4502,7 +4473,7 @@ pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, cons pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t); *node = (pm_imaginary_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token), + .base = PM_NODE_INIT(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)), .numeric = UP(pm_integer_node_create(parser, base, &((pm_token_t) { .type = PM_TOKEN_INTEGER, .start = token->start, @@ -4523,7 +4494,7 @@ pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t); *node = (pm_rational_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_RATIONAL_NODE, base | PM_NODE_FLAG_STATIC_LITERAL, token), + .base = PM_NODE_INIT(parser, PM_RATIONAL_NODE, base | PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)), .numerator = { 0 }, .denominator = { .value = 1, 0 } }; @@ -4552,7 +4523,7 @@ pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t b pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t); *node = (pm_imaginary_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token), + .base = PM_NODE_INIT(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)), .numeric = UP(pm_integer_node_rational_create(parser, base, &((pm_token_t) { .type = PM_TOKEN_INTEGER_RATIONAL, .start = token->start, @@ -4570,21 +4541,23 @@ static pm_in_node_t * pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) { pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t); - const uint8_t *end; + uint32_t start = PM_TOKEN_START(parser, in_keyword); + uint32_t end; + if (statements != NULL) { - end = statements->base.location.end; - } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) { - end = then_keyword->end; + end = PM_NODE_END(statements); + } else if (then_keyword != NULL) { + end = PM_TOKEN_END(parser, then_keyword); } else { - end = pattern->location.end; + end = PM_NODE_END(pattern); } *node = (pm_in_node_t) { - .base = PM_NODE_INIT(parser, PM_IN_NODE, 0, in_keyword->start, end), + .base = PM_NODE_INIT(parser, PM_IN_NODE, 0, ((pm_location_t) { .start = start, .length = U32(end - start) })), .pattern = pattern, .statements = statements, - .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword), - .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword) + .in_loc = TOK2LOC(parser, in_keyword), + .then_loc = NTOK2LOC(parser, then_keyword) }; return node; @@ -4599,10 +4572,10 @@ pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_vari pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t); *node = (pm_instance_variable_and_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_AND_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_INSTANCE_VARIABLE_AND_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = target->name, .name_loc = target->base.location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -4617,12 +4590,12 @@ pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t); *node = (pm_instance_variable_operator_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = target->name, .name_loc = target->base.location, - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .binary_operator_loc = TOK2LOC(parser, operator), .value = value, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1) + .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1) }; return node; @@ -4637,10 +4610,10 @@ pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_varia pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t); *node = (pm_instance_variable_or_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_OR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_INSTANCE_VARIABLE_OR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = target->name, .name_loc = target->base.location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -4656,7 +4629,7 @@ pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *tok pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t); *node = (pm_instance_variable_read_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_INSTANCE_VARIABLE_READ_NODE, 0, token), + .base = PM_NODE_INIT(parser, PM_INSTANCE_VARIABLE_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)), .name = pm_parser_constant_id_token(parser, token) }; @@ -4673,10 +4646,10 @@ pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY); *node = (pm_instance_variable_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_WRITE_NODE, flags, read_node, value), + .base = PM_NODE_INIT(parser, PM_INSTANCE_VARIABLE_WRITE_NODE, flags, PM_LOCATION_INIT_NODES(read_node, value)), .name = read_node->name, - .name_loc = PM_LOCATION_NODE_VALUE(read_node), - .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator), + .name_loc = read_node->base.location, + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -4735,9 +4708,9 @@ pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_tok pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t); *node = (pm_interpolated_regular_expression_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening), - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(opening), + .base = PM_NODE_INIT(parser, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, opening)), + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = TOK2LOC(parser, opening), .parts = { 0 } }; @@ -4746,11 +4719,11 @@ pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_tok static inline void pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) { - if (node->base.location.start > part->location.start) { - node->base.location.start = part->location.start; + if (PM_NODE_START(node) > PM_NODE_START(part)) { + PM_NODE_START_SET_NODE(node, part); } - if (node->base.location.end < part->location.end) { - node->base.location.end = part->location.end; + if (PM_NODE_END(node) < PM_NODE_END(part)) { + PM_NODE_LENGTH_SET_NODE(node, part); } pm_interpolated_node_append(UP(node), &node->parts, part); @@ -4758,8 +4731,8 @@ pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expressio static inline void pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) { - node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing); - node->base.location.end = closing->end; + node->closing_loc = TOK2LOC(parser, closing); + PM_NODE_LENGTH_SET_TOKEN(parser, node, closing); pm_node_flag_set(UP(node), pm_regular_expression_flags_create(parser, closing)); } @@ -4794,11 +4767,13 @@ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_ #define MUTABLE_FLAGS(node) \ node->base.flags = (pm_node_flags_t) ((FL(node) | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN); - if (node->parts.size == 0 && node->opening_loc.start == NULL) { - node->base.location.start = part->location.start; + if (node->parts.size == 0 && node->opening_loc.length == 0) { + PM_NODE_START_SET_NODE(node, part); } - node->base.location.end = MAX(node->base.location.end, part->location.end); + if (PM_NODE_END(part) > PM_NODE_END(node)) { + PM_NODE_LENGTH_SET_NODE(node, part); + } switch (PM_NODE_TYPE(part)) { case PM_STRING_NODE: @@ -4893,10 +4868,13 @@ pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *openin break; } + uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening); + uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing); + *node = (pm_interpolated_string_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_STRING_NODE, flags, opening, closing), - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .base = PM_NODE_INIT(parser, PM_INTERPOLATED_STRING_NODE, flags, ((pm_location_t) { .start = start, .length = U32(end - start) })), + .opening_loc = NTOK2LOC(parser, opening), + .closing_loc = NTOK2LOC(parser, closing), .parts = { 0 } }; @@ -4914,25 +4892,28 @@ pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *openin * Set the closing token of the given InterpolatedStringNode node. */ static void -pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) { - node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing); - node->base.location.end = closing->end; +pm_interpolated_string_node_closing_set(const pm_parser_t *parser, pm_interpolated_string_node_t *node, const pm_token_t *closing) { + node->closing_loc = TOK2LOC(parser, closing); + PM_NODE_LENGTH_SET_TOKEN(parser, node, closing); } static void pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) { - if (node->parts.size == 0 && node->opening_loc.start == NULL) { - node->base.location.start = part->location.start; + if (node->parts.size == 0 && node->opening_loc.length == 0) { + PM_NODE_START_SET_NODE(node, part); } pm_interpolated_node_append(UP(node), &node->parts, part); - node->base.location.end = MAX(node->base.location.end, part->location.end); + + if (PM_NODE_END(part) > PM_NODE_END(node)) { + PM_NODE_LENGTH_SET_NODE(node, part); + } } static void -pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) { - node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing); - node->base.location.end = closing->end; +pm_interpolated_symbol_node_closing_loc_set(const pm_parser_t *parser, pm_interpolated_symbol_node_t *node, const pm_token_t *closing) { + node->closing_loc = TOK2LOC(parser, closing); + PM_NODE_LENGTH_SET_TOKEN(parser, node, closing); } /** @@ -4942,10 +4923,13 @@ static pm_interpolated_symbol_node_t * pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) { pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t); + uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening); + uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing); + *node = (pm_interpolated_symbol_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening, closing), - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .base = PM_NODE_INIT(parser, PM_INTERPOLATED_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL, ((pm_location_t) { .start = start, .length = U32(end - start) })), + .opening_loc = NTOK2LOC(parser, opening), + .closing_loc = NTOK2LOC(parser, closing), .parts = { 0 } }; @@ -4967,9 +4951,9 @@ pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *openi pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t); *node = (pm_interpolated_x_string_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_X_STRING_NODE, 0, opening, closing), - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .base = PM_NODE_INIT(parser, PM_INTERPOLATED_X_STRING_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, opening, closing)), + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = TOK2LOC(parser, closing), .parts = { 0 } }; @@ -4979,13 +4963,13 @@ pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *openi static inline void pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) { pm_interpolated_node_append(UP(node), &node->parts, part); - node->base.location.end = part->location.end; + PM_NODE_LENGTH_SET_NODE(node, part); } static inline void -pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) { - node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing); - node->base.location.end = closing->end; +pm_interpolated_xstring_node_closing_set(const pm_parser_t *parser, pm_interpolated_x_string_node_t *node, const pm_token_t *closing) { + node->closing_loc = TOK2LOC(parser, closing); + PM_NODE_LENGTH_SET_TOKEN(parser, node, closing); } /** @@ -4996,7 +4980,7 @@ pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *nam pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t); *node = (pm_it_local_variable_read_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_IT_LOCAL_VARIABLE_READ_NODE, 0, name), + .base = PM_NODE_INIT(parser, PM_IT_LOCAL_VARIABLE_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)), }; return node; @@ -5010,7 +4994,7 @@ pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, con pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t); *node = (pm_it_parameters_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_IT_PARAMETERS_NODE, 0, opening, closing), + .base = PM_NODE_INIT(parser, PM_IT_PARAMETERS_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, opening, closing)), }; return node; @@ -5024,7 +5008,7 @@ pm_keyword_hash_node_create(pm_parser_t *parser) { pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t); *node = (pm_keyword_hash_node_t) { - .base = PM_NODE_INIT_UNSET(parser, PM_KEYWORD_HASH_NODE, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS), + .base = PM_NODE_INIT(parser, PM_KEYWORD_HASH_NODE, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS, PM_LOCATION_INIT_UNSET), .elements = { 0 } }; @@ -5043,10 +5027,10 @@ pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *el } pm_node_list_append(&hash->elements, element); - if (hash->base.location.start == NULL) { - hash->base.location.start = element->location.start; + if (PM_NODE_LENGTH(hash) == 0) { + PM_NODE_START_SET_NODE(hash, element); } - hash->base.location.end = element->location.end; + PM_NODE_LENGTH_SET_NODE(hash, element); } /** @@ -5057,9 +5041,9 @@ pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t); *node = (pm_required_keyword_parameter_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_REQUIRED_KEYWORD_PARAMETER_NODE, 0, name), - .name = pm_parser_constant_id_location(parser, name->start, name->end - 1), - .name_loc = PM_LOCATION_TOKEN_VALUE(name), + .base = PM_NODE_INIT(parser, PM_REQUIRED_KEYWORD_PARAMETER_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)), + .name = pm_parser_constant_id_raw(parser, name->start, name->end - 1), + .name_loc = TOK2LOC(parser, name), }; return node; @@ -5073,9 +5057,9 @@ pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t); *node = (pm_optional_keyword_parameter_node_t) { - .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_OPTIONAL_KEYWORD_PARAMETER_NODE, 0, name, value), - .name = pm_parser_constant_id_location(parser, name->start, name->end - 1), - .name_loc = PM_LOCATION_TOKEN_VALUE(name), + .base = PM_NODE_INIT(parser, PM_OPTIONAL_KEYWORD_PARAMETER_NODE, 0, PM_LOCATION_INIT_TOKEN_NODE(parser, name, value)), + .name = pm_parser_constant_id_raw(parser, name->start, name->end - 1), + .name_loc = TOK2LOC(parser, name), .value = value }; @@ -5090,14 +5074,10 @@ pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *ope pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t); *node = (pm_keyword_rest_parameter_node_t) { - .base = ( - (name->type == PM_TOKEN_NOT_PROVIDED) - ? PM_NODE_INIT_TOKEN(parser, PM_KEYWORD_REST_PARAMETER_NODE, 0, operator) - : PM_NODE_INIT_TOKENS(parser, PM_KEYWORD_REST_PARAMETER_NODE, 0, operator, name) - ), - .name = pm_parser_optional_constant_id_token(parser, name), - .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .base = PM_NODE_INIT(parser, PM_KEYWORD_REST_PARAMETER_NODE, 0, (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name)), + .name = name == NULL ? 0 : pm_parser_constant_id_token(parser, name), + .name_loc = NTOK2LOC(parser, name), + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -5119,11 +5099,11 @@ pm_lambda_node_create( pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t); *node = (pm_lambda_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_LAMBDA_NODE, 0, operator, closing), + .base = PM_NODE_INIT(parser, PM_LAMBDA_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, operator, closing)), .locals = *locals, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing), + .operator_loc = TOK2LOC(parser, operator), + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = TOK2LOC(parser, closing), .parameters = parameters, .body = body }; @@ -5141,9 +5121,9 @@ pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t); *node = (pm_local_variable_and_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_AND_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_LOCAL_VARIABLE_AND_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name_loc = target->location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value, .name = name, .depth = depth @@ -5160,12 +5140,12 @@ pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *tar pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t); *node = (pm_local_variable_operator_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name_loc = target->location, - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .binary_operator_loc = TOK2LOC(parser, operator), .value = value, .name = name, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1), + .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1), .depth = depth }; @@ -5182,9 +5162,9 @@ pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, c pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t); *node = (pm_local_variable_or_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_OR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_LOCAL_VARIABLE_OR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name_loc = target->location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value, .name = name, .depth = depth @@ -5203,7 +5183,7 @@ pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_tok pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t); *node = (pm_local_variable_read_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_LOCAL_VARIABLE_READ_NODE, 0, name), + .base = PM_NODE_INIT(parser, PM_LOCAL_VARIABLE_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)), .name = name_id, .depth = depth }; @@ -5239,12 +5219,12 @@ pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY); *node = (pm_local_variable_write_node_t) { - .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_LOCAL_VARIABLE_WRITE_NODE, flags, name_loc, value), + .base = PM_NODE_INIT(parser, PM_LOCAL_VARIABLE_WRITE_NODE, flags, ((pm_location_t) { .start = name_loc->start, .length = PM_NODE_END(value) - name_loc->start })), .name = name, .depth = depth, .value = value, .name_loc = *name_loc, - .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator) + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -5263,8 +5243,13 @@ pm_token_is_it(const uint8_t *start, const uint8_t *end) { * are of the form /^_\d$/). */ static inline bool -pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) { - return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1])); +pm_token_is_numbered_parameter(const pm_parser_t *parser, uint32_t start, uint32_t length) { + return ( + (length == 2) && + (parser->start[start] == '_') && + (parser->start[start + 1] != '0') && + pm_char_is_decimal_digit(parser->start[start + 1]) + ); } /** @@ -5272,9 +5257,9 @@ pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) { * an appropriate error message to the parser. */ static inline void -pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) { - if (pm_token_is_numbered_parameter(start, end)) { - PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start); +pm_refute_numbered_parameter(pm_parser_t *parser, uint32_t start, uint32_t length) { + if (pm_token_is_numbered_parameter(parser, start, length)) { + PM_PARSER_ERR_FORMAT(parser, start, length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + start); } } @@ -5284,11 +5269,11 @@ pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const ui */ static pm_local_variable_target_node_t * pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) { - pm_refute_numbered_parameter(parser, location->start, location->end); + pm_refute_numbered_parameter(parser, location->start, location->length); pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t); *node = (pm_local_variable_target_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_LOCAL_VARIABLE_TARGET_NODE, 0, location), + .base = PM_NODE_INIT(parser, PM_LOCAL_VARIABLE_TARGET_NODE, 0, ((pm_location_t) { .start = location->start, .length = location->length })), .name = name, .depth = depth }; @@ -5306,10 +5291,10 @@ pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t); *node = (pm_match_predicate_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_MATCH_PREDICATE_NODE, 0, value, pattern), + .base = PM_NODE_INIT(parser, PM_MATCH_PREDICATE_NODE, 0, PM_LOCATION_INIT_NODES(value, pattern)), .value = value, .pattern = pattern, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -5325,10 +5310,10 @@ pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t * pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t); *node = (pm_match_required_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_MATCH_REQUIRED_NODE, 0, value, pattern), + .base = PM_NODE_INIT(parser, PM_MATCH_REQUIRED_NODE, 0, PM_LOCATION_INIT_NODES(value, pattern)), .value = value, .pattern = pattern, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -5342,7 +5327,7 @@ pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) { pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t); *node = (pm_match_write_node_t) { - .base = PM_NODE_INIT_NODE(parser, PM_MATCH_WRITE_NODE, 0, call), + .base = PM_NODE_INIT(parser, PM_MATCH_WRITE_NODE, 0, PM_LOCATION_INIT_NODE(call)), .call = call, .targets = { 0 } }; @@ -5358,12 +5343,12 @@ pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t); *node = (pm_module_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_MODULE_NODE, 0, module_keyword, end_keyword), + .base = PM_NODE_INIT(parser, PM_MODULE_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, module_keyword, end_keyword)), .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals), - .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword), + .module_keyword_loc = TOK2LOC(parser, module_keyword), .constant_path = constant_path, .body = body, - .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword), + .end_keyword_loc = TOK2LOC(parser, end_keyword), .name = pm_parser_constant_id_token(parser, name) }; @@ -5378,7 +5363,7 @@ pm_multi_target_node_create(pm_parser_t *parser) { pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t); *node = (pm_multi_target_node_t) { - .base = PM_NODE_INIT_UNSET(parser, PM_MULTI_TARGET_NODE, 0), + .base = PM_NODE_INIT(parser, PM_MULTI_TARGET_NODE, 0, PM_LOCATION_INIT_UNSET), .lefts = { 0 }, .rest = NULL, .rights = { 0 }, @@ -5405,7 +5390,7 @@ pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t if (node->rest == NULL) { node->rest = target; } else { - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST); + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST); pm_node_list_append(&node->rights, target); } } else if (node->rest == NULL) { @@ -5414,12 +5399,12 @@ pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t pm_node_list_append(&node->rights, target); } - if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) { - node->base.location.start = target->location.start; + if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_START(node) > PM_NODE_START(target))) { + PM_NODE_START_SET_NODE(node, target); } - if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) { - node->base.location.end = target->location.end; + if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_END(node) < PM_NODE_END(target))) { + PM_NODE_LENGTH_SET_NODE(node, target); } } @@ -5427,18 +5412,19 @@ pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t * Set the opening of a MultiTargetNode node. */ static void -pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) { - node->base.location.start = lparen->start; - node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen); +pm_multi_target_node_opening_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *lparen) { + PM_NODE_START_SET_TOKEN(parser, node, lparen); + PM_NODE_LENGTH_SET_TOKEN(parser, node, lparen); + node->lparen_loc = TOK2LOC(parser, lparen); } /** * Set the closing of a MultiTargetNode node. */ static void -pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) { - node->base.location.end = rparen->end; - node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen); +pm_multi_target_node_closing_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *rparen) { + PM_NODE_LENGTH_SET_TOKEN(parser, node, rparen); + node->rparen_loc = TOK2LOC(parser, rparen); } /** @@ -5450,13 +5436,13 @@ pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY); *node = (pm_multi_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_MULTI_WRITE_NODE, flags, target, value), + .base = PM_NODE_INIT(parser, PM_MULTI_WRITE_NODE, flags, PM_LOCATION_INIT_NODES(target, value)), .lefts = target->lefts, .rest = target->rest, .rights = target->rights, .lparen_loc = target->lparen_loc, .rparen_loc = target->rparen_loc, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -5476,12 +5462,8 @@ pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t); *node = (pm_next_node_t) { - .base = ( - (arguments == NULL) - ? PM_NODE_INIT_TOKEN(parser, PM_NEXT_NODE, 0, keyword) - : PM_NODE_INIT_TOKEN_NODE(parser, PM_NEXT_NODE, 0, keyword, arguments) - ), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), + .base = PM_NODE_INIT(parser, PM_NEXT_NODE, 0, (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments)), + .keyword_loc = TOK2LOC(parser, keyword), .arguments = arguments }; @@ -5497,7 +5479,7 @@ pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) { pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t); *node = (pm_nil_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_NIL_NODE, PM_NODE_FLAG_STATIC_LITERAL, token) + .base = PM_NODE_INIT(parser, PM_NIL_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)) }; return node; @@ -5513,9 +5495,9 @@ pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *oper pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t); *node = (pm_no_keywords_parameter_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_NO_KEYWORDS_PARAMETER_NODE, 0, operator, keyword), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword) + .base = PM_NODE_INIT(parser, PM_NO_KEYWORDS_PARAMETER_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, operator, keyword)), + .operator_loc = TOK2LOC(parser, operator), + .keyword_loc = TOK2LOC(parser, keyword) }; return node; @@ -5525,11 +5507,11 @@ pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *oper * Allocate and initialize a new NumberedParametersNode node. */ static pm_numbered_parameters_node_t * -pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) { +pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing, uint8_t maximum) { pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t); *node = (pm_numbered_parameters_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_NUMBERED_PARAMETERS_NODE, 0, location), + .base = PM_NODE_INIT(parser, PM_NUMBERED_PARAMETERS_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, opening, closing)), .maximum = maximum }; @@ -5569,14 +5551,14 @@ pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *to unsigned long value = strtoul(digits, &endptr, 10); if ((digits == endptr) || (*endptr != '\0')) { - pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL); + pm_parser_err(parser, U32(start - parser->start), U32(length), PM_ERR_INVALID_NUMBER_DECIMAL); value = 0; } xfree(digits); if ((errno == ERANGE) || (value > NTH_REF_MAX)) { - PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start); + PM_PARSER_WARN_FORMAT(parser, U32(start - parser->start), U32(length), PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start); value = 0; } @@ -5594,7 +5576,7 @@ pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *na pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t); *node = (pm_numbered_reference_read_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_NUMBERED_REFERENCE_READ_NODE, 0, name), + .base = PM_NODE_INIT(parser, PM_NUMBERED_REFERENCE_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)), .number = pm_numbered_reference_read_node_number(parser, name) }; @@ -5609,10 +5591,10 @@ pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, c pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t); *node = (pm_optional_parameter_node_t) { - .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_OPTIONAL_PARAMETER_NODE, 0, name, value), + .base = PM_NODE_INIT(parser, PM_OPTIONAL_PARAMETER_NODE, 0, PM_LOCATION_INIT_TOKEN_NODE(parser, name, value)), .name = pm_parser_constant_id_token(parser, name), - .name_loc = PM_LOCATION_TOKEN_VALUE(name), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .name_loc = TOK2LOC(parser, name), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -5629,10 +5611,10 @@ pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operat pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t); *node = (pm_or_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_OR_NODE, 0, left, right), + .base = PM_NODE_INIT(parser, PM_OR_NODE, 0, PM_LOCATION_INIT_NODES(left, right)), .left = left, .right = right, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -5646,7 +5628,7 @@ pm_parameters_node_create(pm_parser_t *parser) { pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t); *node = (pm_parameters_node_t) { - .base = PM_NODE_INIT_UNSET(parser, PM_PARAMETERS_NODE, 0), + .base = PM_NODE_INIT(parser, PM_PARAMETERS_NODE, 0, PM_LOCATION_INIT_UNSET), .rest = NULL, .keyword_rest = NULL, .block = NULL, @@ -5664,16 +5646,12 @@ pm_parameters_node_create(pm_parser_t *parser) { */ static void pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) { - if (params->base.location.start == NULL) { - params->base.location.start = param->location.start; - } else { - params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start; + if ((params->base.location.length == 0) || PM_NODE_START(params) > PM_NODE_START(param)) { + PM_NODE_START_SET_NODE(params, param); } - if (params->base.location.end == NULL) { - params->base.location.end = param->location.end; - } else { - params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end; + if ((params->base.location.length == 0) || (PM_NODE_END(params) < PM_NODE_END(param))) { + PM_NODE_LENGTH_SET_NODE(params, param); } } @@ -5750,7 +5728,7 @@ pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_st pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t); *node = (pm_program_node_t) { - .base = PM_NODE_INIT_NODE(parser, PM_PROGRAM_NODE, 0, statements), + .base = PM_NODE_INIT(parser, PM_PROGRAM_NODE, 0, PM_LOCATION_INIT_NODE(statements)), .locals = *locals, .statements = statements }; @@ -5766,10 +5744,10 @@ pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_no pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t); *node = (pm_parentheses_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_PARENTHESES_NODE, flags, opening, closing), + .base = PM_NODE_INIT(parser, PM_PARENTHESES_NODE, flags, PM_LOCATION_INIT_TOKENS(parser, opening, closing)), .body = body, - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing) + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = TOK2LOC(parser, closing) }; return node; @@ -5783,11 +5761,11 @@ pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, con pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t); *node = (pm_pinned_expression_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_PINNED_EXPRESSION_NODE, 0, operator, rparen), + .base = PM_NODE_INIT(parser, PM_PINNED_EXPRESSION_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, operator, rparen)), .expression = expression, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen), - .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen) + .operator_loc = TOK2LOC(parser, operator), + .lparen_loc = TOK2LOC(parser, lparen), + .rparen_loc = TOK2LOC(parser, rparen) }; return node; @@ -5801,9 +5779,9 @@ pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t); *node = (pm_pinned_variable_node_t) { - .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_PINNED_VARIABLE_NODE, 0, operator, variable), + .base = PM_NODE_INIT(parser, PM_PINNED_VARIABLE_NODE, 0, PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable)), .variable = variable, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -5817,11 +5795,11 @@ pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, co pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t); *node = (pm_post_execution_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_POST_EXECUTION_NODE, 0, keyword, closing), + .base = PM_NODE_INIT(parser, PM_POST_EXECUTION_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, keyword, closing)), .statements = statements, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing) + .keyword_loc = TOK2LOC(parser, keyword), + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = TOK2LOC(parser, closing) }; return node; @@ -5835,11 +5813,11 @@ pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, con pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t); *node = (pm_pre_execution_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_PRE_EXECUTION_NODE, 0, keyword, closing), + .base = PM_NODE_INIT(parser, PM_PRE_EXECUTION_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, keyword, closing)), .statements = statements, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing) + .keyword_loc = TOK2LOC(parser, keyword), + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = TOK2LOC(parser, closing) }; return node; @@ -5871,11 +5849,14 @@ pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *ope flags |= PM_NODE_FLAG_STATIC_LITERAL; } + uint32_t start = left == NULL ? PM_TOKEN_START(parser, operator) : PM_NODE_START(left); + uint32_t end = right == NULL ? PM_TOKEN_END(parser, operator) : PM_NODE_END(right); + *node = (pm_range_node_t) { - .base = PM_NODE_INIT(parser, PM_RANGE_NODE, flags, (left == NULL ? operator->start : left->location.start), (right == NULL ? operator->end : right->location.end)), + .base = PM_NODE_INIT(parser, PM_RANGE_NODE, flags, ((pm_location_t) { .start = start, .length = U32(end - start) })), .left = left, .right = right, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -5890,7 +5871,7 @@ pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) { pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t); *node = (pm_redo_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_REDO_NODE, 0, token) + .base = PM_NODE_INIT(parser, PM_REDO_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)) }; return node; @@ -5906,10 +5887,10 @@ pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_ pm_node_flags_t flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL; *node = (pm_regular_expression_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_REGULAR_EXPRESSION_NODE, flags, opening, closing), - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .content_loc = PM_LOCATION_TOKEN_VALUE(content), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing), + .base = PM_NODE_INIT(parser, PM_REGULAR_EXPRESSION_NODE, flags, PM_LOCATION_INIT_TOKENS(parser, opening, closing)), + .opening_loc = TOK2LOC(parser, opening), + .content_loc = TOK2LOC(parser, content), + .closing_loc = TOK2LOC(parser, closing), .unescaped = *unescaped }; @@ -5932,7 +5913,7 @@ pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t); *node = (pm_required_parameter_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_REQUIRED_PARAMETER_NODE, 0, token), + .base = PM_NODE_INIT(parser, PM_REQUIRED_PARAMETER_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)), .name = pm_parser_constant_id_token(parser, token) }; @@ -5947,9 +5928,9 @@ pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t); *node = (pm_rescue_modifier_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_RESCUE_MODIFIER_NODE, 0, expression, rescue_expression), + .base = PM_NODE_INIT(parser, PM_RESCUE_MODIFIER_NODE, 0, PM_LOCATION_INIT_NODES(expression, rescue_expression)), .expression = expression, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), + .keyword_loc = TOK2LOC(parser, keyword), .rescue_expression = rescue_expression }; @@ -5964,8 +5945,8 @@ pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) { pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t); *node = (pm_rescue_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_RESCUE_NODE, 0, keyword), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), + .base = PM_NODE_INIT(parser, PM_RESCUE_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, keyword)), + .keyword_loc = TOK2LOC(parser, keyword), .operator_loc = { 0 }, .then_keyword_loc = { 0 }, .reference = NULL, @@ -5978,8 +5959,8 @@ pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) { } static inline void -pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) { - node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator); +pm_rescue_node_operator_set(const pm_parser_t *parser, pm_rescue_node_t *node, const pm_token_t *operator) { + node->operator_loc = TOK2LOC(parser, operator); } /** @@ -5988,7 +5969,7 @@ pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) static void pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) { node->reference = reference; - node->base.location.end = reference->location.end; + PM_NODE_LENGTH_SET_NODE(node, reference); } /** @@ -5998,7 +5979,7 @@ static void pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) { node->statements = statements; if (pm_statements_node_body_length(statements) > 0) { - node->base.location.end = statements->base.location.end; + PM_NODE_LENGTH_SET_NODE(node, statements); } } @@ -6008,7 +5989,7 @@ pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *stat static void pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) { node->subsequent = subsequent; - node->base.location.end = subsequent->base.location.end; + PM_NODE_LENGTH_SET_NODE(node, subsequent); } /** @@ -6017,7 +5998,7 @@ pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subseque static void pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) { pm_node_list_append(&node->exceptions, exception); - node->base.location.end = exception->location.end; + PM_NODE_LENGTH_SET_NODE(node, exception); } /** @@ -6028,14 +6009,10 @@ pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, c pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t); *node = (pm_rest_parameter_node_t) { - .base = ( - (name->type == PM_TOKEN_NOT_PROVIDED) - ? PM_NODE_INIT_TOKEN(parser, PM_REST_PARAMETER_NODE, 0, operator) - : PM_NODE_INIT_TOKENS(parser, PM_REST_PARAMETER_NODE, 0, operator, name) - ), - .name = pm_parser_optional_constant_id_token(parser, name), - .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .base = PM_NODE_INIT(parser, PM_REST_PARAMETER_NODE, 0, (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name)), + .name = name == NULL ? 0 : pm_parser_constant_id_token(parser, name), + .name_loc = NTOK2LOC(parser, name), + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -6050,7 +6027,7 @@ pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) { pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t); *node = (pm_retry_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_RETRY_NODE, 0, token) + .base = PM_NODE_INIT(parser, PM_RETRY_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)) }; return node; @@ -6064,12 +6041,8 @@ pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argumen pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t); *node = (pm_return_node_t) { - .base = ( - (arguments == NULL) - ? PM_NODE_INIT_TOKEN(parser, PM_RETURN_NODE, 0, keyword) - : PM_NODE_INIT_TOKEN_NODE(parser, PM_RETURN_NODE, 0, keyword, arguments) - ), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), + .base = PM_NODE_INIT(parser, PM_RETURN_NODE, 0, (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments)), + .keyword_loc = TOK2LOC(parser, keyword), .arguments = arguments }; @@ -6085,7 +6058,7 @@ pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) { pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t); *node = (pm_self_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_SELF_NODE, 0, token) + .base = PM_NODE_INIT(parser, PM_SELF_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)) }; return node; @@ -6099,7 +6072,7 @@ pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shar pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t); *node = (pm_shareable_constant_node_t) { - .base = PM_NODE_INIT_NODE(parser, PM_SHAREABLE_CONSTANT_NODE, (pm_node_flags_t) value, write), + .base = PM_NODE_INIT(parser, PM_SHAREABLE_CONSTANT_NODE, (pm_node_flags_t) value, PM_LOCATION_INIT_NODE(write)), .write = write }; @@ -6114,13 +6087,13 @@ pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *local pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t); *node = (pm_singleton_class_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_SINGLETON_CLASS_NODE, 0, class_keyword, end_keyword), + .base = PM_NODE_INIT(parser, PM_SINGLETON_CLASS_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword)), .locals = *locals, - .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .class_keyword_loc = TOK2LOC(parser, class_keyword), + .operator_loc = TOK2LOC(parser, operator), .expression = expression, .body = body, - .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword) + .end_keyword_loc = TOK2LOC(parser, end_keyword) }; return node; @@ -6135,7 +6108,7 @@ pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) { pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t); *node = (pm_source_encoding_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_ENCODING_NODE, PM_NODE_FLAG_STATIC_LITERAL, token) + .base = PM_NODE_INIT(parser, PM_SOURCE_ENCODING_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)) }; return node; @@ -6161,7 +6134,7 @@ pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) } *node = (pm_source_file_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_FILE_NODE, flags, file_keyword), + .base = PM_NODE_INIT(parser, PM_SOURCE_FILE_NODE, flags, PM_LOCATION_INIT_TOKEN(parser, file_keyword)), .filepath = parser->filepath }; @@ -6177,7 +6150,7 @@ pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) { pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t); *node = (pm_source_line_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_LINE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token) + .base = PM_NODE_INIT(parser, PM_SOURCE_LINE_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)) }; return node; @@ -6191,12 +6164,8 @@ pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t); *node = (pm_splat_node_t) { - .base = ( - (expression == NULL) - ? PM_NODE_INIT_TOKEN(parser, PM_SPLAT_NODE, 0, operator) - : PM_NODE_INIT_TOKEN_NODE(parser, PM_SPLAT_NODE, 0, operator, expression) - ), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .base = PM_NODE_INIT(parser, PM_SPLAT_NODE, 0, (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression)), + .operator_loc = TOK2LOC(parser, operator), .expression = expression }; @@ -6211,7 +6180,7 @@ pm_statements_node_create(pm_parser_t *parser) { pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t); *node = (pm_statements_node_t) { - .base = PM_NODE_INIT_BASE(parser, PM_STATEMENTS_NODE, 0), + .base = PM_NODE_INIT(parser, PM_STATEMENTS_NODE, 0, PM_LOCATION_INIT_UNSET), .body = { 0 } }; @@ -6226,26 +6195,18 @@ pm_statements_node_body_length(pm_statements_node_t *node) { return node && node->body.size; } -/** - * Set the location of the given StatementsNode. - */ -static void -pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) { - node->base.location = (pm_location_t) { .start = start, .end = end }; -} - /** * Update the location of the statements node based on the statement that is * being added to the list. */ static inline void pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) { - if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) { - node->base.location.start = statement->location.start; + if (pm_statements_node_body_length(node) == 0 || PM_NODE_START(statement) < PM_NODE_START(node)) { + PM_NODE_START_SET_NODE(node, statement); } - if (statement->location.end > node->base.location.end) { - node->base.location.end = statement->location.end; + if (PM_NODE_END(statement) > PM_NODE_END(node)) { + PM_NODE_LENGTH_SET_NODE(node, statement); } } @@ -6303,14 +6264,14 @@ pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, break; } - const uint8_t *start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start); - const uint8_t *end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end); + uint32_t start = PM_TOKEN_START(parser, opening == NULL ? content : opening); + uint32_t end = PM_TOKEN_END(parser, closing == NULL ? content : closing); *node = (pm_string_node_t) { - .base = PM_NODE_INIT(parser, PM_STRING_NODE, flags, start, end), - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), - .content_loc = PM_LOCATION_TOKEN_VALUE(content), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .base = PM_NODE_INIT(parser, PM_STRING_NODE, flags, ((pm_location_t) { .start = start, .length = U32(end - start) })), + .opening_loc = NTOK2LOC(parser, opening), + .content_loc = TOK2LOC(parser, content), + .closing_loc = NTOK2LOC(parser, closing), .unescaped = *string }; @@ -6344,14 +6305,12 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument assert(keyword->type == PM_TOKEN_KEYWORD_SUPER); pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t); - const uint8_t *end = pm_arguments_end(arguments); - if (end == NULL) { - assert(false && "unreachable"); - } + const pm_location_t *end = pm_arguments_end(arguments); + assert(end != NULL && "unreachable"); *node = (pm_super_node_t) { - .base = PM_NODE_INIT(parser, PM_SUPER_NODE, 0, keyword->start, end), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), + .base = PM_NODE_INIT(parser, PM_SUPER_NODE, 0, ((pm_location_t) { .start = PM_TOKEN_START(parser, keyword), .length = PM_LOCATION_END(end) - PM_TOKEN_START(parser, keyword) })), + .keyword_loc = TOK2LOC(parser, keyword), .lparen_loc = arguments->opening_loc, .arguments = arguments->arguments, .rparen_loc = arguments->closing_loc, @@ -6386,7 +6345,7 @@ parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *locat size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor); if (width == 0) { - pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL); + pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL); break; } @@ -6406,7 +6365,7 @@ parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *loca size_t width = encoding->char_width(cursor, end - cursor); if (width == 0) { - pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL); + pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL); break; } @@ -6466,13 +6425,13 @@ parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, con if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) { if (!ascii_only) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name); } } else if (parser->encoding != modifier_encoding) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name); if (modifier == 'n' && !ascii_only) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source)); } } @@ -6483,18 +6442,18 @@ parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, con bool mixed_encoding = false; if (mixed_encoding) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source)); } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) { // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily. bool valid_string_in_modifier_encoding = true; if (!valid_string_in_modifier_encoding) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source)); } } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) { // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now. if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source)); } } @@ -6513,7 +6472,7 @@ parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_str // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report. bool valid_unicode_range = true; if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source)); return flags; } @@ -6522,7 +6481,7 @@ parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_str if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) { // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the // following error message appearing twice. We do the same for compatibility. - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name); } /** @@ -6579,14 +6538,14 @@ static pm_symbol_node_t * pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) { pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t); - const uint8_t *start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start); - const uint8_t *end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end); + uint32_t start = opening == NULL ? PM_TOKEN_START(parser, value) : PM_TOKEN_START(parser, opening); + uint32_t end = closing == NULL ? PM_TOKEN_END(parser, value) : PM_TOKEN_END(parser, closing); *node = (pm_symbol_node_t) { - .base = PM_NODE_INIT(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL | flags, start, end), - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), - .value_loc = PM_LOCATION_TOKEN_VALUE(value), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .base = PM_NODE_INIT(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL | flags, ((pm_location_t) { .start = start, .length = U32(end - start) })), + .opening_loc = NTOK2LOC(parser, opening), + .value_loc = NTOK2LOC(parser, value), + .closing_loc = NTOK2LOC(parser, closing), .unescaped = *unescaped }; @@ -6616,35 +6575,15 @@ pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *open */ static pm_symbol_node_t * pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) { - pm_symbol_node_t *node; - - switch (token->type) { - case PM_TOKEN_LABEL: { - pm_token_t opening = not_provided(parser); - pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end }; + assert(token->type == PM_TOKEN_LABEL); - pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 }; - node = pm_symbol_node_create(parser, &opening, &label, &closing); + pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end }; + pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 }; + pm_symbol_node_t *node = pm_symbol_node_create(parser, NULL, &label, &closing); - assert((label.end - label.start) >= 0); - pm_string_shared_init(&node->unescaped, label.start, label.end); - pm_node_flag_set(UP(node), parse_symbol_encoding(parser, &label, &node->unescaped, false)); - - break; - } - case PM_TOKEN_MISSING: { - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - - pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end }; - node = pm_symbol_node_create(parser, &opening, &label, &closing); - break; - } - default: - assert(false && "unreachable"); - node = NULL; - break; - } + assert((label.end - label.start) >= 0); + pm_string_shared_init(&node->unescaped, label.start, label.end); + pm_node_flag_set(UP(node), parse_symbol_encoding(parser, &label, &node->unescaped, false)); return node; } @@ -6657,8 +6596,8 @@ pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) { pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t); *node = (pm_symbol_node_t) { - .base = PM_NODE_INIT_BASE(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING), - .value_loc = PM_LOCATION_NULL_VALUE(parser), + .base = PM_NODE_INIT(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING, PM_LOCATION_INIT_UNSET), + .value_loc = { 0 }, .unescaped = { 0 } }; @@ -6670,21 +6609,29 @@ pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) { * Check if the given node is a label in a hash. */ static bool -pm_symbol_node_label_p(pm_node_t *node) { - const uint8_t *end = NULL; +pm_symbol_node_label_p(const pm_parser_t *parser, const pm_node_t *node) { + const pm_location_t *location = NULL; switch (PM_NODE_TYPE(node)) { - case PM_SYMBOL_NODE: - end = ((pm_symbol_node_t *) node)->closing_loc.end; + case PM_SYMBOL_NODE: { + const pm_symbol_node_t *cast = (pm_symbol_node_t *) node; + if (cast->closing_loc.length > 0) { + location = &cast->closing_loc; + } break; - case PM_INTERPOLATED_SYMBOL_NODE: - end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end; + } + case PM_INTERPOLATED_SYMBOL_NODE: { + const pm_interpolated_symbol_node_t *cast = (pm_interpolated_symbol_node_t *) node; + if (cast->closing_loc.length > 0) { + location = &cast->closing_loc; + } break; + } default: return false; } - return (end != NULL) && (end[-1] == ':'); + return (location != NULL) && (parser->start[PM_LOCATION_END(location) - 1] == ':'); } /** @@ -6695,14 +6642,19 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t); *new_node = (pm_symbol_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening, closing), - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), + .base = PM_NODE_INIT(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKENS(parser, opening, closing)), + .opening_loc = TOK2LOC(parser, opening), .value_loc = node->content_loc, - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .closing_loc = TOK2LOC(parser, closing), .unescaped = node->unescaped }; - pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end }; + pm_token_t content = { + .type = PM_TOKEN_IDENTIFIER, + .start = parser->start + node->content_loc.start, + .end = parser->start + node->content_loc.start + node->content_loc.length + }; + pm_node_flag_set(UP(new_node), parse_symbol_encoding(parser, &content, &node->unescaped, true)); // We are explicitly _not_ using pm_node_destroy here because we don't want @@ -6731,7 +6683,7 @@ pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) { } *new_node = (pm_string_node_t) { - .base = PM_NODE_INIT_NODE(parser, PM_STRING_NODE, flags, node), + .base = PM_NODE_INIT(parser, PM_STRING_NODE, flags, PM_LOCATION_INIT_NODE(node)), .opening_loc = node->opening_loc, .content_loc = node->value_loc, .closing_loc = node->closing_loc, @@ -6755,7 +6707,7 @@ pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) { pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t); *node = (pm_true_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_TRUE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token) + .base = PM_NODE_INIT(parser, PM_TRUE_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)) }; return node; @@ -6769,7 +6721,7 @@ pm_true_node_synthesized_create(pm_parser_t *parser) { pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t); *node = (pm_true_node_t) { - .base = PM_NODE_INIT_BASE(parser, PM_TRUE_NODE, PM_NODE_FLAG_STATIC_LITERAL) + .base = PM_NODE_INIT(parser, PM_TRUE_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_UNSET) }; return node; @@ -6784,8 +6736,8 @@ pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) { pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t); *node = (pm_undef_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_UNDEF_NODE, 0, token), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(token), + .base = PM_NODE_INIT(parser, PM_UNDEF_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)), + .keyword_loc = TOK2LOC(parser, token), .names = { 0 } }; @@ -6797,7 +6749,7 @@ pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) { */ static void pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) { - node->base.location.end = name->location.end; + PM_NODE_LENGTH_SET_NODE(node, name); pm_node_list_append(&node->names, name); } @@ -6812,10 +6764,10 @@ pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t pm_node_t *end = statements == NULL ? predicate : UP(statements); *node = (pm_unless_node_t) { - .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_UNLESS_NODE, PM_NODE_FLAG_NEWLINE, keyword, end), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), + .base = PM_NODE_INIT(parser, PM_UNLESS_NODE, PM_NODE_FLAG_NEWLINE, PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, end)), + .keyword_loc = TOK2LOC(parser, keyword), .predicate = predicate, - .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword), + .then_keyword_loc = NTOK2LOC(parser, then_keyword), .statements = statements, .else_clause = NULL, .end_keyword_loc = { 0 } @@ -6836,8 +6788,8 @@ pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_statements_node_body_append(parser, statements, statement, true); *node = (pm_unless_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_UNLESS_NODE, PM_NODE_FLAG_NEWLINE, statement, predicate), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword), + .base = PM_NODE_INIT(parser, PM_UNLESS_NODE, PM_NODE_FLAG_NEWLINE, PM_LOCATION_INIT_NODES(statement, predicate)), + .keyword_loc = TOK2LOC(parser, unless_keyword), .predicate = predicate, .then_keyword_loc = { 0 }, .statements = statements, @@ -6849,9 +6801,9 @@ pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const } static inline void -pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) { - node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword); - node->base.location.end = end_keyword->end; +pm_unless_node_end_keyword_loc_set(const pm_parser_t *parser, pm_unless_node_t *node, const pm_token_t *end_keyword) { + node->end_keyword_loc = TOK2LOC(parser, end_keyword); + PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword); } /** @@ -6866,7 +6818,7 @@ pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statemen // All of the block exits that we want to remove should be within the // statements, and since we are modifying the statements, we shouldn't have // to check the end location. - const uint8_t *start = statements->base.location.start; + uint32_t start = statements->base.location.start; for (size_t index = parser->current_block_exits->size; index > 0; index--) { pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1]; @@ -6886,10 +6838,10 @@ pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL); *node = (pm_until_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_UNTIL_NODE, flags, keyword, closing), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), - .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .base = PM_NODE_INIT(parser, PM_UNTIL_NODE, flags, PM_LOCATION_INIT_TOKENS(parser, keyword, closing)), + .keyword_loc = TOK2LOC(parser, keyword), + .do_keyword_loc = NTOK2LOC(parser, do_keyword), + .closing_loc = TOK2LOC(parser, closing), .predicate = predicate, .statements = statements }; @@ -6907,8 +6859,8 @@ pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm pm_loop_modifier_block_exits(parser, statements); *node = (pm_until_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_UNTIL_NODE, flags, statements, predicate), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), + .base = PM_NODE_INIT(parser, PM_UNTIL_NODE, flags, PM_LOCATION_INIT_NODES(statements, predicate)), + .keyword_loc = TOK2LOC(parser, keyword), .do_keyword_loc = { 0 }, .closing_loc = { 0 }, .predicate = predicate, @@ -6926,8 +6878,8 @@ pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) { pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t); *node = (pm_when_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_WHEN_NODE, 0, keyword), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), + .base = PM_NODE_INIT(parser, PM_WHEN_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, keyword)), + .keyword_loc = TOK2LOC(parser, keyword), .statements = NULL, .then_keyword_loc = { 0 }, .conditions = { 0 } @@ -6941,7 +6893,7 @@ pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) { */ static void pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) { - node->base.location.end = condition->location.end; + PM_NODE_LENGTH_SET_NODE(node, condition); pm_node_list_append(&node->conditions, condition); } @@ -6949,9 +6901,9 @@ pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) { * Set the location of the then keyword of a when node. */ static inline void -pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) { - node->base.location.end = then_keyword->end; - node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword); +pm_when_node_then_keyword_loc_set(const pm_parser_t *parser, pm_when_node_t *node, const pm_token_t *then_keyword) { + PM_NODE_LENGTH_SET_TOKEN(parser, node, then_keyword); + node->then_keyword_loc = TOK2LOC(parser, then_keyword); } /** @@ -6959,8 +6911,8 @@ pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_k */ static void pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) { - if (statements->base.location.end > node->base.location.end) { - node->base.location.end = statements->base.location.end; + if (PM_NODE_END(statements) > PM_NODE_END(node)) { + PM_NODE_LENGTH_SET_NODE(node, statements); } node->statements = statements; @@ -6975,10 +6927,10 @@ pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL); *node = (pm_while_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_WHILE_NODE, flags, keyword, closing), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), - .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .base = PM_NODE_INIT(parser, PM_WHILE_NODE, flags, PM_LOCATION_INIT_TOKENS(parser, keyword, closing)), + .keyword_loc = TOK2LOC(parser, keyword), + .do_keyword_loc = NTOK2LOC(parser, do_keyword), + .closing_loc = TOK2LOC(parser, closing), .predicate = predicate, .statements = statements }; @@ -6996,8 +6948,8 @@ pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm pm_loop_modifier_block_exits(parser, statements); *node = (pm_while_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_WHILE_NODE, flags, statements, predicate), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), + .base = PM_NODE_INIT(parser, PM_WHILE_NODE, flags, PM_LOCATION_INIT_NODES(statements, predicate)), + .keyword_loc = TOK2LOC(parser, keyword), .do_keyword_loc = { 0 }, .closing_loc = { 0 }, .predicate = predicate, @@ -7015,10 +6967,10 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t); *node = (pm_while_node_t) { - .base = PM_NODE_INIT_BASE(parser, PM_WHILE_NODE, 0), - .keyword_loc = PM_LOCATION_NULL_VALUE(parser), - .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser), - .closing_loc = PM_LOCATION_NULL_VALUE(parser), + .base = PM_NODE_INIT(parser, PM_WHILE_NODE, 0, PM_LOCATION_INIT_UNSET), + .keyword_loc = { 0 }, + .do_keyword_loc = { 0 }, + .closing_loc = { 0 }, .predicate = predicate, .statements = statements }; @@ -7035,10 +6987,10 @@ pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t); *node = (pm_x_string_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_X_STRING_NODE, PM_STRING_FLAGS_FROZEN, opening, closing), - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .content_loc = PM_LOCATION_TOKEN_VALUE(content), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing), + .base = PM_NODE_INIT(parser, PM_X_STRING_NODE, PM_STRING_FLAGS_FROZEN, PM_LOCATION_INIT_TOKENS(parser, opening, closing)), + .opening_loc = TOK2LOC(parser, opening), + .content_loc = TOK2LOC(parser, content), + .closing_loc = TOK2LOC(parser, closing), .unescaped = *unescaped }; @@ -7060,20 +7012,22 @@ static pm_yield_node_t * pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) { pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t); - const uint8_t *end; - if (rparen_loc->start != NULL) { - end = rparen_loc->end; + uint32_t start = PM_TOKEN_START(parser, keyword); + uint32_t end; + + if (rparen_loc->length > 0) { + end = PM_LOCATION_END(rparen_loc); } else if (arguments != NULL) { - end = arguments->base.location.end; - } else if (lparen_loc->start != NULL) { - end = lparen_loc->end; + end = PM_NODE_END(arguments); + } else if (lparen_loc->length > 0) { + end = PM_LOCATION_END(lparen_loc); } else { - end = keyword->end; + end = PM_TOKEN_END(parser, keyword); } *node = (pm_yield_node_t) { - .base = PM_NODE_INIT(parser, PM_YIELD_NODE, 0, keyword->start, end), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), + .base = PM_NODE_INIT(parser, PM_YIELD_NODE, 0, ((pm_location_t) { .start = start, .length = U32(end - start) })), + .keyword_loc = TOK2LOC(parser, keyword), .lparen_loc = *lparen_loc, .arguments = arguments, .rparen_loc = *rparen_loc @@ -7117,25 +7071,33 @@ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) { */ static inline void pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) { - pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads); + pm_locals_write(&parser->current_scope->locals, constant_id, U32(start - parser->start), U32(end - start), reads); } /** * Add a local variable from a location to the current scope. */ static pm_constant_id_t -pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) { - pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end); +pm_parser_local_add_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) { + pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end); if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads); return constant_id; } +/** + * Add a local variable from a location to the current scope. + */ +static inline pm_constant_id_t +pm_parser_local_add_location(pm_parser_t *parser, pm_location_t *location, uint32_t reads) { + return pm_parser_local_add_raw(parser, parser->start + location->start, parser->start + location->start + location->length, reads); +} + /** * Add a local variable from a token to the current scope. */ static inline pm_constant_id_t pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) { - return pm_parser_local_add_location(parser, token->start, token->end, reads); + return pm_parser_local_add_raw(parser, token->start, token->end, reads); } /** @@ -7169,7 +7131,7 @@ static bool pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) { // We want to check whether the parameter name is a numbered parameter or // not. - pm_refute_numbered_parameter(parser, name->start, name->end); + pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, name), PM_TOKEN_LENGTH(name)); // Otherwise we'll fetch the constant id for the parameter name and check // whether it's already in the current scope. @@ -7434,7 +7396,7 @@ parser_lex_magic_comment_encoding(pm_parser_t *parser) { // issue because we didn't understand the encoding that the user was // trying to use. In this case we'll keep using the default encoding but // add an error to the parser to indicate an unsuccessful parse. - pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT); + pm_parser_err(parser, U32(value_start - parser->start), U32(cursor - value_start), PM_ERR_INVALID_ENCODING_MAGIC_COMMENT); } } @@ -7602,7 +7564,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) { case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID: PM_PARSER_WARN_TOKEN_FORMAT( parser, - parser->current, + &parser->current, PM_WARN_INVALID_MAGIC_COMMENT_VALUE, (int) key_length, (const char *) key_source, @@ -7629,7 +7591,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) { case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID: PM_PARSER_WARN_TOKEN_FORMAT( parser, - parser->current, + &parser->current, PM_WARN_INVALID_MAGIC_COMMENT_VALUE, (int) key_length, (const char *) key_source, @@ -7664,7 +7626,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) { } else { PM_PARSER_WARN_TOKEN_FORMAT( parser, - parser->current, + &parser->current, PM_WARN_INVALID_MAGIC_COMMENT_VALUE, (int) key_length, (const char *) key_source, @@ -7682,10 +7644,8 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) { // Allocate a new magic comment node to append to the parser's list. pm_magic_comment_t *magic_comment; if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) { - magic_comment->key_start = key_start; - magic_comment->value_start = value_start; - magic_comment->key_length = (uint32_t) key_length; - magic_comment->value_length = value_length; + magic_comment->key = (pm_location_t) { .start = U32(key_start - parser->start), .length = U32(key_length) }; + magic_comment->value = (pm_location_t) { .start = U32(value_start - parser->start), .length = value_length }; pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment); } } @@ -7923,7 +7883,7 @@ static inline void pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) { if (invalid != NULL) { pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER; - pm_parser_err(parser, invalid, invalid + 1, diag_id); + pm_parser_err(parser, U32(invalid - parser->start), 1, diag_id); } } @@ -8108,7 +8068,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) { const uint8_t *fraction_start = parser->current.end; const uint8_t *fraction_end = parser->current.end + 2; fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end); - pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION); + pm_parser_err(parser, U32(fraction_start - parser->start), U32(fraction_end - fraction_start), PM_ERR_INVALID_NUMBER_FRACTION); } return type; @@ -8208,7 +8168,7 @@ lex_global_variable(pm_parser_t *parser) { // $0 isn't allowed to be followed by anything. pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL; - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id); + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, diag_id); } return PM_TOKEN_GLOBAL_VARIABLE; @@ -8245,8 +8205,8 @@ lex_global_variable(pm_parser_t *parser) { // If we get here, then we have a $ followed by something that // isn't recognized as a global variable. pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL; - const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); - PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start); + size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); + PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), diag_id, (int) (PM_TOKEN_LENGTH(&parser->current) + U32(width)), (const char *) parser->current.start); } return PM_TOKEN_GLOBAL_VARIABLE; @@ -8445,8 +8405,8 @@ current_token_starts_line(pm_parser_t *parser) { * handle interpolation. This function performs that check. It returns a token * type representing what it found. Those cases are: * - * * PM_TOKEN_NOT_PROVIDED - No interpolation was found at this point. The - * caller should keep lexing. + * * 0 - No interpolation was found at this point. The caller should keep + * lexing. * * PM_TOKEN_STRING_CONTENT - No interpolation was found at this point. The * caller should return this token type. * * PM_TOKEN_EMBEXPR_BEGIN - An embedded expression was found. The caller @@ -8463,9 +8423,9 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) { return PM_TOKEN_STRING_CONTENT; } - // Now we'll check against the character that follows the #. If it constitutes - // valid interplation, we'll handle that, otherwise we'll return - // PM_TOKEN_NOT_PROVIDED. + // Now we'll check against the character that follows the #. If it + // constitutes valid interplation, we'll handle that, otherwise we'll return + // 0. switch (pound[1]) { case '@': { // In this case we may have hit an embedded instance or class variable. @@ -8499,7 +8459,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) { // string content. This is like if we get "#@-". In this case the caller // should keep lexing. parser->current.end = pound + 1; - return PM_TOKEN_NOT_PROVIDED; + return 0; } case '$': // In this case we may have hit an embedded global variable. If there's @@ -8549,7 +8509,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) { // In this case we've hit a #$ that does not indicate a global variable. // In this case we'll continue lexing past it. parser->current.end = pound + 1; - return PM_TOKEN_NOT_PROVIDED; + return 0; case '{': // In this case it's the start of an embedded expression. If we have // already consumed content, then we need to return that content as string @@ -8573,7 +8533,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) { // mark that by returning the not provided token type. This tells the // consumer to keep lexing forward. parser->current.end = pound + 1; - return PM_TOKEN_NOT_PROVIDED; + return 0; } } @@ -8628,9 +8588,9 @@ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length, const // codepoint and not a surrogate pair. if (value >= 0xD800 && value <= 0xDFFF) { if (error_location != NULL) { - pm_parser_err(parser, error_location->start, error_location->end, PM_ERR_ESCAPE_INVALID_UNICODE); + pm_parser_err(parser, error_location->start, error_location->length, PM_ERR_ESCAPE_INVALID_UNICODE); } else { - pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE); + pm_parser_err(parser, U32(string - parser->start), U32(length), PM_ERR_ESCAPE_INVALID_UNICODE); } return 0xFFFD; } @@ -8658,14 +8618,14 @@ escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t fla // literal. if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) { if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) { - PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name); + PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(end - start), PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name); } parser->explicit_encoding = PM_ENCODING_UTF_8_ENTRY; } if (!pm_buffer_append_unicode_codepoint(buffer, value)) { - pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE); + pm_parser_err(parser, U32(start - parser->start), U32(end - start), PM_ERR_ESCAPE_INVALID_UNICODE); pm_buffer_append_byte(buffer, 0xEF); pm_buffer_append_byte(buffer, 0xBF); pm_buffer_append_byte(buffer, 0xBD); @@ -8680,7 +8640,7 @@ static inline void escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) { if (byte >= 0x80) { if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name); } parser->explicit_encoding = parser->encoding; @@ -8751,7 +8711,7 @@ escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *t PM_PARSER_WARN_TOKEN_FORMAT( parser, - parser->current, + &parser->current, PM_WARN_INVALID_CHARACTER, FLAG(flags), FLAG(flag), @@ -8879,7 +8839,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre if (parser->current.end == parser->end) { const uint8_t *start = parser->current.end - 2; - PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start); + PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start); } else if (peek(parser) == '{') { const uint8_t *unicode_codepoints_start = parser->current.end - 2; parser->current.end++; @@ -8908,7 +8868,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre if (hexadecimal_length > 6) { // \u{nnnn} character literal allows only 1-6 hexadecimal digits - pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG); + pm_parser_err(parser, U32(unicode_start - parser->start), U32(hexadecimal_length), PM_ERR_ESCAPE_INVALID_UNICODE_LONG); } else if (hexadecimal_length == 0) { // there are not hexadecimal characters @@ -8918,8 +8878,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre // error instead of us. pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start)); } else { - pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE); - pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM); + pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE); + pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE_TERM); } return; @@ -8940,11 +8900,11 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre // ?\u{nnnn} character literal should contain only one codepoint // and cannot be like ?\u{nnnn mmmm}. if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) { - pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL); + pm_parser_err(parser, U32(extra_codepoints_start - parser->start), U32(parser->current.end - 1 - extra_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL); } if (parser->current.end == parser->end) { - PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start); + PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start); } else if (peek(parser) == '}') { parser->current.end++; } else { @@ -8954,7 +8914,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre // instead of us. pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start)); } else { - pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM); + pm_parser_err(parser, U32(unicode_codepoints_start - parser->start), U32(parser->current.end - unicode_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_TERM); } } @@ -8969,7 +8929,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start)); } else { const uint8_t *start = parser->current.end - 2; - PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start); + PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start); } } else if (length == 4) { uint32_t value = escape_unicode(parser, parser->current.end, 4, NULL); @@ -9018,7 +8978,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre parser->current.end++; if (match(parser, 'u') || match(parser, 'U')) { - pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER); return; } @@ -9054,7 +9014,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre if (peek(parser) != '-') { size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); - pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL); return; } @@ -9075,7 +9035,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre parser->current.end++; if (match(parser, 'u') || match(parser, 'U')) { - pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER); return; } @@ -9094,7 +9054,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre default: { if (!char_is_ascii_printable(peeked)) { size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); - pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL); return; } @@ -9112,7 +9072,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre if (peek(parser) != '-') { size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); - pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META); return; } @@ -9128,7 +9088,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre parser->current.end++; if (match(parser, 'u') || match(parser, 'U')) { - pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER); return; } @@ -9147,7 +9107,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre default: if (!char_is_ascii_printable(peeked)) { size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); - pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META); return; } @@ -9167,7 +9127,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre default: { if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) { size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); - pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META); return; } if (parser->current.end < parser->end) { @@ -9280,7 +9240,7 @@ lex_at_variable(pm_parser_t *parser) { } size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end); - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start); } else { pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE; pm_parser_err_token(parser, &parser->current, diag_id); @@ -9315,7 +9275,7 @@ parser_comment(pm_parser_t *parser, pm_comment_type_t type) { *comment = (pm_comment_t) { .type = type, - .location = { parser->current.start, parser->current.end } + .location = TOK2LOC(parser, &parser->current) }; return comment; @@ -9334,7 +9294,7 @@ lex_embdoc(pm_parser_t *parser) { if (newline == NULL) { parser->current.end = parser->end; } else { - pm_newline_list_append(&parser->newline_list, newline); + pm_newline_list_append(&parser->newline_list, U32(newline - parser->start + 1)); parser->current.end = newline + 1; } @@ -9342,6 +9302,7 @@ lex_embdoc(pm_parser_t *parser) { parser_lex_callback(parser); // Now, create a comment that is going to be attached to the parser. + const uint8_t *comment_start = parser->current.start; pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC); if (comment == NULL) return PM_TOKEN_EOF; @@ -9367,14 +9328,14 @@ lex_embdoc(pm_parser_t *parser) { if (newline == NULL) { parser->current.end = parser->end; } else { - pm_newline_list_append(&parser->newline_list, newline); + pm_newline_list_append(&parser->newline_list, U32(newline - parser->start + 1)); parser->current.end = newline + 1; } parser->current.type = PM_TOKEN_EMBDOC_END; parser_lex_callback(parser); - comment->location.end = parser->current.end; + comment->location.length = (uint32_t) (parser->current.end - comment_start); pm_list_append(&parser->comment_list, (pm_list_node_t *) comment); return PM_TOKEN_EMBDOC_END; @@ -9387,7 +9348,7 @@ lex_embdoc(pm_parser_t *parser) { if (newline == NULL) { parser->current.end = parser->end; } else { - pm_newline_list_append(&parser->newline_list, newline); + pm_newline_list_append(&parser->newline_list, U32(newline - parser->start + 1)); parser->current.end = newline + 1; } @@ -9397,7 +9358,7 @@ lex_embdoc(pm_parser_t *parser) { pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM); - comment->location.end = parser->current.end; + comment->location.length = (uint32_t) (parser->current.end - comment_start); pm_list_append(&parser->comment_list, (pm_list_node_t *) comment); return PM_TOKEN_EOF; @@ -9701,7 +9662,7 @@ pm_lex_percent_delimiter(pm_parser_t *parser) { parser_flush_heredoc_end(parser); } else { // Otherwise, we'll add the newline to the list of newlines. - pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1); + pm_newline_list_append(&parser->newline_list, PM_TOKEN_END(parser, &parser->current) + U32(eol_length)); } uint8_t delimiter = *parser->current.end; @@ -9786,7 +9747,7 @@ parser_lex(pm_parser_t *parser) { if (match_eol_offset(parser, 1)) { chomping = false; } else { - pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN); + pm_parser_warn(parser, PM_TOKEN_END(parser, &parser->current), 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN); parser->current.end++; space_seen = true; } @@ -9799,7 +9760,7 @@ parser_lex(pm_parser_t *parser) { parser->heredoc_end = NULL; } else { parser->current.end += eol_length + 1; - pm_newline_list_append(&parser->newline_list, parser->current.end - 1); + pm_newline_list_append(&parser->newline_list, PM_TOKEN_END(parser, &parser->current)); space_seen = true; } } else if (pm_char_is_inline_whitespace(*parser->current.end)) { @@ -9893,7 +9854,7 @@ parser_lex(pm_parser_t *parser) { } if (parser->heredoc_end == NULL) { - pm_newline_list_append(&parser->newline_list, parser->current.end - 1); + pm_newline_list_append(&parser->newline_list, PM_TOKEN_END(parser, &parser->current)); } } @@ -10092,7 +10053,7 @@ parser_lex(pm_parser_t *parser) { // , case ',': if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type)); } lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL); @@ -10218,7 +10179,7 @@ parser_lex(pm_parser_t *parser) { } else if (lex_state_beg_p(parser)) { type = PM_TOKEN_USTAR_STAR; } else if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix"); } if (lex_state_operator_p(parser)) { @@ -10243,7 +10204,7 @@ parser_lex(pm_parser_t *parser) { } else if (lex_state_beg_p(parser)) { type = PM_TOKEN_USTAR; } else if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix"); } if (lex_state_operator_p(parser)) { @@ -10369,7 +10330,7 @@ parser_lex(pm_parser_t *parser) { bool ident_error = false; if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) { - pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER); + pm_parser_err(parser, U32(ident_start - parser->start), U32(ident_length), PM_ERR_HEREDOC_IDENTIFIER); ident_error = true; } @@ -10402,7 +10363,7 @@ parser_lex(pm_parser_t *parser) { } else { // Otherwise, we want to indicate that the body of the // heredoc starts on the character after the next newline. - pm_newline_list_append(&parser->newline_list, body_start); + pm_newline_list_append(&parser->newline_list, U32(body_start - parser->start + 1)); body_start++; } @@ -10421,7 +10382,7 @@ parser_lex(pm_parser_t *parser) { } if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document"); } if (lex_state_operator_p(parser)) { @@ -10547,7 +10508,7 @@ parser_lex(pm_parser_t *parser) { } else if (lex_state_beg_p(parser)) { type = PM_TOKEN_UAMPERSAND; } else if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix"); } if (lex_state_operator_p(parser)) { @@ -10623,7 +10584,7 @@ parser_lex(pm_parser_t *parser) { } if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator"); } lex_state_set(parser, PM_LEX_STATE_BEG); @@ -10664,7 +10625,7 @@ parser_lex(pm_parser_t *parser) { } if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator"); } lex_state_set(parser, PM_LEX_STATE_BEG); @@ -10763,7 +10724,7 @@ parser_lex(pm_parser_t *parser) { } if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal"); } if (lex_state_operator_p(parser)) { @@ -10948,7 +10909,7 @@ parser_lex(pm_parser_t *parser) { } if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal"); } lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG); @@ -10984,40 +10945,40 @@ parser_lex(pm_parser_t *parser) { // token after adding an appropriate error message. if (!width) { if (*parser->current.start >= 0x80) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start); } else if (*parser->current.start == '\\') { switch (peek_at(parser, parser->current.start + 1)) { case ' ': parser->current.end++; - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space"); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space"); break; case '\f': parser->current.end++; - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed"); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed"); break; case '\t': parser->current.end++; - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab"); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab"); break; case '\v': parser->current.end++; - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab"); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab"); break; case '\r': if (peek_at(parser, parser->current.start + 2) != '\n') { parser->current.end++; - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return"); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return"); break; } PRISM_FALLTHROUGH default: - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash"); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash"); break; } } else if (char_is_ascii_printable(*parser->current.start)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start); } else { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start); } goto lex_next_token; @@ -11043,15 +11004,15 @@ parser_lex(pm_parser_t *parser) { // correct column information for it. const uint8_t *cursor = parser->current.end; while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) { - pm_newline_list_append(&parser->newline_list, cursor++); + pm_newline_list_append(&parser->newline_list, U32(++cursor - parser->start)); } parser->current.end = parser->end; parser->current.type = PM_TOKEN___END__; parser_lex_callback(parser); - parser->data_loc.start = parser->current.start; - parser->data_loc.end = parser->current.end; + parser->data_loc.start = PM_TOKEN_START(parser, &parser->current); + parser->data_loc.length = PM_TOKEN_LENGTH(&parser->current); LEX(PM_TOKEN_EOF); } @@ -11076,7 +11037,7 @@ parser_lex(pm_parser_t *parser) { !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) && (type == PM_TOKEN_IDENTIFIER) && ((pm_parser_local_depth(parser, &parser->current) != -1) || - pm_token_is_numbered_parameter(parser->current.start, parser->current.end)) + pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))) ) { lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL); } @@ -11104,7 +11065,7 @@ parser_lex(pm_parser_t *parser) { whitespace += 1; } } else { - whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list); + whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list, PM_TOKEN_END(parser, &parser->current)); } if (whitespace > 0) { @@ -11219,7 +11180,7 @@ parser_lex(pm_parser_t *parser) { LEX(PM_TOKEN_STRING_CONTENT); } else { // ... else track the newline. - pm_newline_list_append(&parser->newline_list, parser->current.end); + pm_newline_list_append(&parser->newline_list, PM_TOKEN_END(parser, &parser->current) + 1); } parser->current.end++; @@ -11247,7 +11208,7 @@ parser_lex(pm_parser_t *parser) { if (*breakpoint == '#') { pm_token_type_t type = lex_interpolation(parser, breakpoint); - if (type == PM_TOKEN_NOT_PROVIDED) { + if (!type) { // If we haven't returned at this point then we had something // that looked like an interpolated class or instance variable // like "#@" but wasn't actually. In this case we'll just skip @@ -11357,7 +11318,7 @@ parser_lex(pm_parser_t *parser) { // would have already have added the newline to the // list. if (parser->heredoc_end == NULL) { - pm_newline_list_append(&parser->newline_list, parser->current.end - 1); + pm_newline_list_append(&parser->newline_list, PM_TOKEN_END(parser, &parser->current)); } } else { parser->current.end = breakpoint + 1; @@ -11404,7 +11365,7 @@ parser_lex(pm_parser_t *parser) { // If we've hit a newline, then we need to track that in // the list of newlines. if (parser->heredoc_end == NULL) { - pm_newline_list_append(&parser->newline_list, breakpoint); + pm_newline_list_append(&parser->newline_list, U32(breakpoint - parser->start + 1)); parser->current.end = breakpoint + 1; breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false); break; @@ -11452,7 +11413,7 @@ parser_lex(pm_parser_t *parser) { LEX(PM_TOKEN_STRING_CONTENT); } else { // ... else track the newline. - pm_newline_list_append(&parser->newline_list, parser->current.end); + pm_newline_list_append(&parser->newline_list, PM_TOKEN_END(parser, &parser->current) + 1); } parser->current.end++; @@ -11499,7 +11460,7 @@ parser_lex(pm_parser_t *parser) { // interpolation. pm_token_type_t type = lex_interpolation(parser, breakpoint); - if (type == PM_TOKEN_NOT_PROVIDED) { + if (!type) { // If we haven't returned at this point then we had // something that looked like an interpolated class or // instance variable like "#@" but wasn't actually. In @@ -11617,7 +11578,7 @@ parser_lex(pm_parser_t *parser) { // would have already have added the newline to the // list. if (parser->heredoc_end == NULL) { - pm_newline_list_append(&parser->newline_list, parser->current.end - 1); + pm_newline_list_append(&parser->newline_list, PM_TOKEN_END(parser, &parser->current)); } } else { parser->current.end = breakpoint + 1; @@ -11669,7 +11630,7 @@ parser_lex(pm_parser_t *parser) { // for the terminator in case the terminator is a // newline character. if (parser->heredoc_end == NULL) { - pm_newline_list_append(&parser->newline_list, breakpoint); + pm_newline_list_append(&parser->newline_list, U32(breakpoint - parser->start + 1)); parser->current.end = breakpoint + 1; breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true); break; @@ -11723,7 +11684,7 @@ parser_lex(pm_parser_t *parser) { LEX(PM_TOKEN_STRING_CONTENT); } else { // ... else track the newline. - pm_newline_list_append(&parser->newline_list, parser->current.end); + pm_newline_list_append(&parser->newline_list, PM_TOKEN_END(parser, &parser->current) + 1); } parser->current.end++; @@ -11752,7 +11713,7 @@ parser_lex(pm_parser_t *parser) { case '#': { pm_token_type_t type = lex_interpolation(parser, breakpoint); - if (type == PM_TOKEN_NOT_PROVIDED) { + if (!type) { // If we haven't returned at this point then we had something that // looked like an interpolated class or instance variable like "#@" // but wasn't actually. In this case we'll just skip to the next @@ -11852,7 +11813,7 @@ parser_lex(pm_parser_t *parser) { (memcmp(terminator_start, ident_start, ident_length) == 0) ) { if (newline != NULL) { - pm_newline_list_append(&parser->newline_list, newline); + pm_newline_list_append(&parser->newline_list, U32(newline - parser->start + 1)); } parser->current.end = terminator_end; @@ -11924,7 +11885,7 @@ parser_lex(pm_parser_t *parser) { LEX(PM_TOKEN_STRING_CONTENT); } - pm_newline_list_append(&parser->newline_list, breakpoint); + pm_newline_list_append(&parser->newline_list, U32(breakpoint - parser->start + 1)); // If we have a - or ~ heredoc, then we can match after // some leading whitespace. @@ -12044,7 +12005,7 @@ parser_lex(pm_parser_t *parser) { const uint8_t *end = parser->current.end; if (parser->heredoc_end == NULL) { - pm_newline_list_append(&parser->newline_list, end); + pm_newline_list_append(&parser->newline_list, U32(end - parser->start + 1)); } // Here we want the buffer to only @@ -12076,7 +12037,7 @@ parser_lex(pm_parser_t *parser) { case '#': { pm_token_type_t type = lex_interpolation(parser, breakpoint); - if (type == PM_TOKEN_NOT_PROVIDED) { + if (!type) { // If we haven't returned at this point then we had // something that looked like an interpolated class // or instance variable like "#@" but wasn't @@ -12390,10 +12351,10 @@ expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) { if (accept1(parser, type)) return; const uint8_t *location = parser->previous.end; - pm_parser_err(parser, location, location, diag_id); + pm_parser_err(parser, U32(location - parser->start), 0, diag_id); parser->previous.start = location; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } /** @@ -12405,10 +12366,10 @@ expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_di if (accept2(parser, type1, type2)) return; const uint8_t *location = parser->previous.end; - pm_parser_err(parser, location, location, diag_id); + pm_parser_err(parser, U32(location - parser->start), 0, diag_id); parser->previous.start = location; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } /** @@ -12422,7 +12383,7 @@ expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ide } else { pm_parser_err_heredoc_term(parser, ident_start, ident_length); parser->previous.start = parser->previous.end; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } } @@ -12436,10 +12397,11 @@ static void expect1_opening(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id, const pm_token_t *opening) { if (accept1(parser, type)) return; - pm_parser_err(parser, opening->start, opening->end, diag_id); + const uint8_t *start = opening->start; + pm_parser_err(parser, U32(start - parser->start), U32(opening->end - start), diag_id); parser->previous.start = parser->previous.end; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } static pm_node_t * @@ -12663,7 +12625,7 @@ parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) { default: break; } - pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end); + pm_constant_id_t name = pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target)); pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0); pm_node_destroy(parser, target); @@ -12725,8 +12687,8 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p target->type = PM_GLOBAL_VARIABLE_TARGET_NODE; return target; case PM_LOCAL_VARIABLE_READ_NODE: { - if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) { - PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start); + if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) { + PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(target)); pm_node_unreference(parser, target); } @@ -12777,10 +12739,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p // target then this is either a method call or a local variable // write. if ( - (call->message_loc.start != NULL) && - (call->message_loc.end[-1] != '!') && - (call->message_loc.end[-1] != '?') && - (call->opening_loc.start == NULL) && + (call->message_loc.length > 0) && + (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') && + (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') && + (call->opening_loc.length == 0) && (call->arguments == NULL) && (call->block == NULL) ) { @@ -12794,15 +12756,14 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p // When it was parsed in the prefix position, foo was seen as a // method call with no receiver and no arguments. Now we have an // =, so we know it's a local variable write. - const pm_location_t message_loc = call->message_loc; - - pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0); + pm_location_t message_loc = call->message_loc; + pm_constant_id_t name = pm_parser_local_add_location(parser, &message_loc, 0); pm_node_destroy(parser, target); return UP(pm_local_variable_target_node_create(parser, &message_loc, name, 0)); } - if (peek_at(parser, call->message_loc.start) == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) { + if (peek_at(parser, parser->start + call->message_loc.start) == '_' || parser->encoding->alnum_char(parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) { if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) { pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION); } @@ -12910,22 +12871,21 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod case PM_LOCAL_VARIABLE_READ_NODE: { pm_local_variable_read_node_t *local_read = (pm_local_variable_read_node_t *) target; + pm_location_t location = target->location; pm_constant_id_t name = local_read->name; - pm_location_t name_loc = target->location; - uint32_t depth = local_read->depth; pm_scope_t *scope = pm_parser_scope_find(parser, depth); - if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) { + if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) { pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED; - PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start); + PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), diag_id, parser->start + PM_NODE_START(target)); pm_node_unreference(parser, target); } pm_locals_unread(&scope->locals, name); pm_node_destroy(parser, target); - return UP(pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator)); + return UP(pm_local_variable_write_node_create(parser, name, depth, value, &location, operator)); } case PM_IT_LOCAL_VARIABLE_READ_NODE: { pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2); @@ -12962,10 +12922,10 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod // target then this is either a method call or a local variable // write. if ( - (call->message_loc.start != NULL) && - (call->message_loc.end[-1] != '!') && - (call->message_loc.end[-1] != '?') && - (call->opening_loc.start == NULL) && + (call->message_loc.length > 0) && + (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') && + (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') && + (call->opening_loc.length == 0) && (call->arguments == NULL) && (call->block == NULL) ) { @@ -12979,19 +12939,19 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod // When it was parsed in the prefix position, foo was seen as a // method call with no receiver and no arguments. Now we have an // =, so we know it's a local variable write. - const pm_location_t message = call->message_loc; + pm_location_t message_loc = call->message_loc; - pm_parser_local_add_location(parser, message.start, message.end, 0); + pm_refute_numbered_parameter(parser, message_loc.start, message_loc.length); + pm_parser_local_add_location(parser, &message_loc, 0); pm_node_destroy(parser, target); - pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end); - target = UP(pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator)); + pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, parser->start + PM_LOCATION_START(&message_loc), parser->start + PM_LOCATION_END(&message_loc)); + target = UP(pm_local_variable_write_node_create(parser, constant_id, 0, value, &message_loc, operator)); - pm_refute_numbered_parameter(parser, message.start, message.end); return target; } - if (char_is_identifier_start(parser, call->message_loc.start, parser->end - call->message_loc.start)) { + if (char_is_identifier_start(parser, parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) { // When we get here, we have a method call, because it was // previously marked as a method call but now we have an =. This // looks like: @@ -13006,8 +12966,8 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod call->arguments = arguments; pm_arguments_node_arguments_append(arguments, value); - call->base.location.end = arguments->base.location.end; - call->equal_loc = PM_LOCATION_TOKEN_VALUE(operator); + PM_NODE_LENGTH_SET_NODE(call, arguments); + call->equal_loc = TOK2LOC(parser, operator); parse_write_name(parser, &call->name); pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY)); @@ -13025,11 +12985,11 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod } pm_arguments_node_arguments_append(call->arguments, value); - target->location.end = value->location.end; + PM_NODE_LENGTH_SET_NODE(target, value); // Replace the name with "[]=". call->name = pm_parser_constant_id_constant(parser, "[]=", 3); - call->equal_loc = PM_LOCATION_TOKEN_VALUE(operator); + call->equal_loc = TOK2LOC(parser, operator); // Ensure that the arguments for []= don't contain keywords pm_index_arguments_check(parser, call->arguments, call->block); @@ -13080,7 +13040,7 @@ parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t default: break; } - pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1); + pm_constant_id_t name = pm_parser_local_add_location(parser, &target->location, 1); pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals); pm_node_destroy(parser, target); @@ -13242,9 +13202,9 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) { // This is an inlined version of accept1 because the error that we // want to add has varargs. If this happens again, we should // probably extract a helper function. - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type)); parser->previous.start = parser->previous.end; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } } @@ -13269,20 +13229,20 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) { */ static void pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) { - const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true); + const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start, parser->start_line, literals, node, true); if (duplicated != NULL) { pm_buffer_t buffer = { 0 }; - pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated); + pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start, parser->start_line, parser->encoding->name, duplicated); pm_diagnostic_list_append_format( &parser->warning_list, duplicated->location.start, - duplicated->location.end, + duplicated->location.length, PM_WARN_DUPLICATED_HASH_KEY, (int) pm_buffer_length(&buffer), pm_buffer_value(&buffer), - pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line + pm_newline_list_line_column(&parser->newline_list, PM_NODE_START(node), parser->start_line).line ); pm_buffer_free(&buffer); @@ -13297,14 +13257,14 @@ static void pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) { pm_node_t *previous; - if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) { + if ((previous = pm_static_literals_add(&parser->newline_list, parser->start, parser->start_line, literals, node, false)) != NULL) { pm_diagnostic_list_append_format( &parser->warning_list, - node->location.start, - node->location.end, + PM_NODE_START(node), + PM_NODE_LENGTH(node), PM_WARN_DUPLICATED_WHEN_CLAUSE, - pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line, - pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line + pm_newline_list_line_column(&parser->newline_list, PM_NODE_START(node), parser->start_line).line, + pm_newline_list_line_column(&parser->newline_list, PM_NODE_START(previous), parser->start_line).line ); } } @@ -13350,7 +13310,6 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod pm_node_t *key = UP(pm_symbol_node_label_create(parser, &label)); pm_hash_key_static_literals_add(parser, literals, key); - pm_token_t operator = not_provided(parser); pm_node_t *value = NULL; if (token_begins_expression_p(parser->current.type)) { @@ -13364,7 +13323,7 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 }; if (identifier.end[-1] == '!' || identifier.end[-1] == '?') { - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ); + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ); } else { depth = pm_parser_local_depth(parser, &identifier); } @@ -13376,11 +13335,11 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod } } - value->location.end++; + value->location.length++; value = UP(pm_implicit_node_create(parser, value)); } - element = UP(pm_assoc_node_create(parser, key, &operator, value)); + element = UP(pm_assoc_node_create(parser, key, NULL, value)); break; } default: { @@ -13394,16 +13353,14 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod pm_hash_key_static_literals_add(parser, literals, key); - pm_token_t operator; - if (pm_symbol_node_label_p(key)) { - operator = not_provided(parser); - } else { + pm_token_t operator = { 0 }; + if (!pm_symbol_node_label_p(parser, key)) { expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET); operator = parser->previous; } pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1)); - element = UP(pm_assoc_node_create(parser, key, &operator, value)); + element = UP(pm_assoc_node_create(parser, key, NTOK2PTR(operator), value)); break; } } @@ -13434,14 +13391,14 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod static inline bool argument_allowed_for_bare_hash(pm_parser_t *parser, pm_node_t *argument) { - if (pm_symbol_node_label_p(argument)) { + if (pm_symbol_node_label_p(parser, argument)) { return true; } switch (PM_NODE_TYPE(argument)) { case PM_CALL_NODE: { pm_call_node_t *cast = (pm_call_node_t *) argument; - if (cast->opening_loc.start == NULL && cast->arguments != NULL) { + if (cast->opening_loc.length == 0 && cast->arguments != NULL) { if (PM_NODE_FLAG_P(cast->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS | PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) { return false; } @@ -13560,7 +13517,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1)); if (parsed_bare_hash) { - pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT); + pm_parser_err(parser, PM_TOKEN_START(parser, &operator), PM_NODE_END(expression) - PM_TOKEN_START(parser, &operator), PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT); } argument = UP(pm_splat_node_create(parser, &operator, expression)); @@ -13585,7 +13542,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for // ... operator. if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) { pm_range_node_t *range = (pm_range_node_t *) right; - pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR); + pm_parser_err(parser, range->operator_loc.start, range->operator_loc.length, PM_ERR_UNEXPECTED_RANGE_OPERATOR); } argument = UP(pm_range_node_create(parser, NULL, &operator, right)); @@ -13613,16 +13570,14 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for bool contains_keywords = false; bool contains_keyword_splat = false; - if (argument_allowed_for_bare_hash(parser, argument)){ + if (argument_allowed_for_bare_hash(parser, argument)) { if (parsed_bare_hash) { pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH); } - pm_token_t operator; + pm_token_t operator = { 0 }; if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) { operator = parser->previous; - } else { - operator = not_provided(parser); } pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser); @@ -13634,7 +13589,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for // Finish parsing the one we are part way through. pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1)); - argument = UP(pm_assoc_node_create(parser, argument, &operator, value)); + argument = UP(pm_assoc_node_create(parser, argument, NTOK2PTR(operator), value)); pm_keyword_hash_node_elements_append(bare_hash, argument); argument = UP(bare_hash); @@ -13691,7 +13646,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for // `foo(bar 1 do end, 2)` should be rejected. if (PM_NODE_TYPE_P(argument, PM_CALL_NODE)) { pm_call_node_t *call = (pm_call_node_t *) argument; - if (call->opening_loc.start == NULL && call->arguments != NULL && call->block != NULL) { + if (call->opening_loc.length == 0 && call->arguments != NULL && call->block != NULL) { pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA); break; } @@ -13723,7 +13678,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) { expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER); pm_multi_target_node_t *node = pm_multi_target_node_create(parser); - pm_multi_target_node_opening_set(node, &parser->previous); + pm_multi_target_node_opening_set(parser, node, &parser->previous); do { pm_node_t *param; @@ -13771,7 +13726,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) { accept1(parser, PM_TOKEN_NEWLINE); expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER); - pm_multi_target_node_closing_set(node, &parser->previous); + pm_multi_target_node_closing_set(parser, node, &parser->previous); return node; } @@ -13887,7 +13842,7 @@ parse_parameters( parser_lex(parser); pm_token_t operator = parser->previous; - pm_token_t name; + pm_token_t name = { 0 }; bool repeated = false; if (accept1(parser, PM_TOKEN_IDENTIFIER)) { @@ -13895,11 +13850,10 @@ parse_parameters( repeated = pm_parser_parameter_name_check(parser, &name); pm_parser_local_add_token(parser, &name, 1); } else { - name = not_provided(parser); parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK; } - pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator); + pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, NTOK2PTR(name), &operator); if (repeated) { pm_node_flag_set_repeated_parameter(UP(param)); } @@ -13994,7 +13948,7 @@ parse_parameters( // reads of that parameter, then we need to warn that we // have a circular definition. if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) { - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR); + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &name, PM_ERR_PARAMETER_CIRCULAR); } context_pop(parser); @@ -14034,9 +13988,9 @@ parse_parameters( local.end -= 1; if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) { - pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT); + pm_parser_err(parser, PM_TOKEN_START(parser, &local), PM_TOKEN_LENGTH(&local), PM_ERR_ARGUMENT_FORMAL_CONSTANT); } else if (local.end[-1] == '!' || local.end[-1] == '?') { - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE); + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE); } bool repeated = pm_parser_parameter_name_check(parser, &local); @@ -14085,7 +14039,7 @@ parse_parameters( if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser); if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) { - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR); + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_PARAMETER_CIRCULAR); } param = UP(pm_optional_keyword_parameter_node_create(parser, &name, value)); @@ -14120,7 +14074,7 @@ parse_parameters( parser_lex(parser); pm_token_t operator = parser->previous; - pm_token_t name; + pm_token_t name = { 0 }; bool repeated = false; if (accept1(parser, PM_TOKEN_IDENTIFIER)) { @@ -14128,11 +14082,10 @@ parse_parameters( repeated = pm_parser_parameter_name_check(parser, &name); pm_parser_local_add_token(parser, &name, 1); } else { - name = not_provided(parser); parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS; } - pm_node_t *param = UP(pm_rest_parameter_node_create(parser, &operator, &name)); + pm_node_t *param = UP(pm_rest_parameter_node_create(parser, &operator, NTOK2PTR(name))); if (repeated) { pm_node_flag_set_repeated_parameter(param); } @@ -14162,7 +14115,7 @@ parse_parameters( param = UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous)); } else { - pm_token_t name; + pm_token_t name = { 0 }; bool repeated = false; if (accept1(parser, PM_TOKEN_IDENTIFIER)) { @@ -14170,11 +14123,10 @@ parse_parameters( repeated = pm_parser_parameter_name_check(parser, &name); pm_parser_local_add_token(parser, &name, 1); } else { - name = not_provided(parser); parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS; } - param = UP(pm_keyword_rest_parameter_node_create(parser, &operator, &name)); + param = UP(pm_keyword_rest_parameter_node_create(parser, &operator, NTOK2PTR(name))); if (repeated) { pm_node_flag_set_repeated_parameter(param); } @@ -14236,7 +14188,7 @@ parse_parameters( pm_do_loop_stack_pop(parser); // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`. - if (params->base.location.start == params->base.location.end) { + if (PM_NODE_START(params) == PM_NODE_END(params)) { pm_node_destroy(parser, UP(params)); return NULL; } @@ -14260,7 +14212,7 @@ token_newline_index(const pm_parser_t *parser) { // start of a heredoc, so we cannot rely on looking at the previous // offset of the newline list, and instead must go through the whole // process of a binary search for the line number. - return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0); + return (size_t) pm_newline_list_line(&parser->newline_list, PM_TOKEN_START(parser, &parser->current), 0); } } @@ -14334,8 +14286,8 @@ parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_ind // Otherwise, add a warning. PM_PARSER_WARN_FORMAT( parser, - closing_token->start, - closing_token->end, + PM_TOKEN_START(parser, closing_token), + PM_TOKEN_LENGTH(closing_token), PM_WARN_INDENTATION_MISMATCH, (int) (closing_token->end - closing_token->start), (const char *) closing_token->start, @@ -14375,7 +14327,7 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_ // we're going to have an empty list of exceptions to rescue (which // implies StandardError). parser_lex(parser); - pm_rescue_node_operator_set(rescue, &parser->previous); + pm_rescue_node_operator_set(parser, rescue, &parser->previous); pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1)); reference = parse_target(parser, reference, false, false); @@ -14405,7 +14357,7 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_ // If we hit a `=>` then we're going to parse the exception variable. Once // we've done that, we'll break out of the loop and parse the statements. if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) { - pm_rescue_node_operator_set(rescue, &parser->previous); + pm_rescue_node_operator_set(parser, rescue, &parser->previous); pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1)); reference = parse_target(parser, reference, false, false); @@ -14420,11 +14372,11 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_ if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) { - rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous); + rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous); } } else { expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM); - rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous); + rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous); } if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) { @@ -14462,11 +14414,10 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_ // since we won't know the end until we've found all subsequent // clauses. This sets the end location on all rescues once we know it. if (current != NULL) { - const uint8_t *end_to_set = current->base.location.end; pm_rescue_node_t *clause = parent_node->rescue_clause; while (clause != NULL) { - clause->base.location.end = end_to_set; + PM_NODE_LENGTH_SET_NODE(clause, current); clause = clause->subsequent; } } @@ -14547,10 +14498,10 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_ if (match1(parser, PM_TOKEN_KEYWORD_END)) { if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false); - pm_begin_node_end_keyword_set(parent_node, &parser->current); + pm_begin_node_end_keyword_set(parser, parent_node, &parser->current); } else { - pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; - pm_begin_node_end_keyword_set(parent_node, &end_keyword); + pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_KEYWORD_END, .start = parser->previous.end, .end = parser->previous.end }; + pm_begin_node_end_keyword_set(parser, parent_node, &end_keyword); } } @@ -14560,11 +14511,11 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_ */ static pm_begin_node_t * parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) { - pm_token_t begin_keyword = not_provided(parser); - pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements); - + pm_begin_node_t *node = pm_begin_node_create(parser, NULL, statements); parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1)); - node->base.location.start = start; + + node->base.location.start = U32(start - parser->start); + PM_NODE_LENGTH_SET_TOKEN(parser, node, &parser->current); return node; } @@ -14602,7 +14553,7 @@ parse_block_parameters( } pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening); - if ((opening->type != PM_TOKEN_NOT_PROVIDED)) { + if (opening != NULL) { accept1(parser, PM_TOKEN_NEWLINE); if (accept1(parser, PM_TOKEN_SEMICOLON)) { @@ -14715,8 +14666,8 @@ parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK); } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) { pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK); - } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) { - numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0')); + } else if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) { + numbered_parameter = MAX(numbered_parameter, (uint8_t) (parser->start[node->location.start + 1] - '0')); } else { assert(false && "unreachable"); } @@ -14735,9 +14686,7 @@ parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_ for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) { scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER; } - - const pm_location_t location = { .start = opening->start, .end = closing->end }; - return UP(pm_numbered_parameters_node_create(parser, &location, numbered_parameter)); + return UP(pm_numbered_parameters_node_create(parser, opening, closing, numbered_parameter)); } if (it_parameter) { @@ -14773,7 +14722,7 @@ parse_block(pm_parser_t *parser, uint16_t depth) { expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM); } - pm_block_parameters_node_closing_set(block_parameters, &parser->previous); + pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous); } accept1(parser, PM_TOKEN_NEWLINE); @@ -14823,22 +14772,22 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) { found |= true; - arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous); + arguments->opening_loc = TOK2LOC(parser, &parser->previous); if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { - arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous); + arguments->closing_loc = TOK2LOC(parser, &parser->previous); } else { pm_accepts_block_stack_push(parser, true); parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1)); if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type)); parser->previous.start = parser->previous.end; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } pm_accepts_block_stack_pop(parser); - arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous); + arguments->closing_loc = TOK2LOC(parser, &parser->previous); } } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) { found |= true; @@ -14853,7 +14802,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept // then we have a trailing comma where we need to check whether it is // allowed or not. if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type)); } pm_accepts_block_stack_pop(parser); @@ -15157,7 +15106,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); pm_token_t keyword = parser->previous; - pm_token_t then_keyword = not_provided(parser); + pm_token_t then_keyword = { 0 }; pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1)); pm_statements_node_t *statements = NULL; @@ -15169,15 +15118,14 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); } - pm_token_t end_keyword = not_provided(parser); pm_node_t *parent = NULL; switch (context) { case PM_CONTEXT_IF: - parent = UP(pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword)); + parent = UP(pm_if_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL)); break; case PM_CONTEXT_UNLESS: - parent = UP(pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements)); + parent = UP(pm_unless_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements)); break; default: assert(false && "unreachable"); @@ -15191,7 +15139,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl if (context == PM_CONTEXT_IF) { while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) { if (parser_end_of_line_p(parser)) { - PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL); + PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL); } parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false); @@ -15205,7 +15153,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl pm_accepts_block_stack_pop(parser); accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); - pm_node_t *elsif = UP(pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword)); + pm_node_t *elsif = UP(pm_if_node_create(parser, &elsif_keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL)); ((pm_if_node_t *) current)->subsequent = elsif; current = elsif; } @@ -15253,12 +15201,12 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl while (recursing) { switch (PM_NODE_TYPE(current)) { case PM_IF_NODE: - pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous); + pm_if_node_end_keyword_loc_set(parser, (pm_if_node_t *) current, &parser->previous); current = ((pm_if_node_t *) current)->subsequent; recursing = current != NULL; break; case PM_ELSE_NODE: - pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous); + pm_else_node_end_keyword_loc_set(parser, (pm_else_node_t *) current, &parser->previous); recursing = false; break; default: { @@ -15270,7 +15218,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl break; } case PM_CONTEXT_UNLESS: - pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous); + pm_unless_node_end_keyword_loc_set(parser, (pm_unless_node_t *) parent, &parser->previous); break; default: assert(false && "unreachable"); @@ -15385,10 +15333,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) { // "aaa #{bbb} #@ccc ddd" // ^^^^ ^ ^^^^ case PM_TOKEN_STRING_CONTENT: { - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - - pm_node_t *node = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing)); + pm_node_t *node = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL)); pm_node_flag_set(node, parse_unescaped_encoding(parser)); parser_lex(parser); @@ -15423,9 +15368,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) { parser->brace_nesting = brace_nesting; lex_state_set(parser, state); - expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END); - pm_token_t closing = parser->previous; // If this set of embedded statements only contains a single // statement, then Ruby does not consider it as a possible statement @@ -15434,7 +15377,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) { pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE); } - return UP(pm_embedded_statements_node_create(parser, &opening, statements, &closing)); + return UP(pm_embedded_statements_node_create(parser, &opening, statements, &parser->previous)); } // Here the lexer has returned the beginning of an embedded variable. @@ -15490,7 +15433,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) { // missing node. default: expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID); - variable = UP(pm_missing_node_create(parser, parser->current.start, parser->current.end)); + variable = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))); break; } @@ -15522,9 +15465,7 @@ parse_operator_symbol_name(const pm_token_t *name) { static pm_node_t * parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) { - pm_token_t closing = not_provided(parser); - pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing); - + pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, NULL); const uint8_t *end = parse_operator_symbol_name(&parser->current); if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state); @@ -15567,9 +15508,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s break; } - pm_token_t closing = not_provided(parser); - pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing); - + pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, NULL); pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end); pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false)); @@ -15581,10 +15520,13 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s if (match1(parser, PM_TOKEN_STRING_END)) { if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state); parser_lex(parser); + pm_token_t content = { + .type = PM_TOKEN_STRING_CONTENT, + .start = parser->previous.start, + .end = parser->previous.start + }; - pm_token_t content = not_provided(parser); - pm_token_t closing = parser->previous; - return UP(pm_symbol_node_create(parser, &opening, &content, &closing)); + return UP(pm_symbol_node_create(parser, &opening, &content, &parser->previous)); } // Now we can parse the first part of the symbol. @@ -15615,7 +15557,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED); } - pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous); + pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous); return UP(symbol); } @@ -15638,12 +15580,10 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s // interpolated string node, so that's what we'll do here. if (match1(parser, PM_TOKEN_STRING_CONTENT)) { pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening); - pm_token_t bounds = not_provided(parser); - - pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped)); + pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped)); pm_interpolated_symbol_node_append(symbol, part); - part = UP(pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string)); + part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->current, NULL, &parser->current_string)); pm_interpolated_symbol_node_append(symbol, part); if (next_state != PM_LEX_STATE_NONE) { @@ -15653,7 +15593,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s parser_lex(parser); expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC); - pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous); + pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous); return UP(symbol); } } else { @@ -15681,20 +15621,15 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s static inline pm_node_t * parse_undef_argument(pm_parser_t *parser, uint16_t depth) { switch (parser->current.type) { - case PM_CASE_OPERATOR: { - const pm_token_t opening = not_provided(parser); - return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE); - } + case PM_CASE_OPERATOR: + return parse_operator_symbol(parser, NULL, PM_LEX_STATE_NONE); case PM_CASE_KEYWORD: case PM_TOKEN_CONSTANT: case PM_TOKEN_IDENTIFIER: case PM_TOKEN_METHOD_NAME: { parser_lex(parser); - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing); - + pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL); pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end); pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false)); @@ -15708,7 +15643,7 @@ parse_undef_argument(pm_parser_t *parser, uint16_t depth) { } default: pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT); - return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end)); + return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))); } } @@ -15721,10 +15656,8 @@ parse_undef_argument(pm_parser_t *parser, uint16_t depth) { static inline pm_node_t * parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) { switch (parser->current.type) { - case PM_CASE_OPERATOR: { - const pm_token_t opening = not_provided(parser); - return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE); - } + case PM_CASE_OPERATOR: + return parse_operator_symbol(parser, NULL, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE); case PM_CASE_KEYWORD: case PM_TOKEN_CONSTANT: case PM_TOKEN_IDENTIFIER: @@ -15732,10 +15665,7 @@ parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) { if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM); parser_lex(parser); - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing); - + pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL); pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end); pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false)); @@ -15758,7 +15688,7 @@ parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) { return UP(pm_global_variable_read_node_create(parser, &parser->previous)); default: pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT); - return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end)); + return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))); } } @@ -15770,7 +15700,7 @@ static pm_node_t * parse_variable(pm_parser_t *parser) { pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous); int depth; - bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end); + bool is_numbered_param = pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)); if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) { return UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false)); @@ -15840,7 +15770,7 @@ parse_method_definition_name(pm_parser_t *parser) { parser_lex(parser); return parser->previous; case PM_TOKEN_IDENTIFIER: - pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end); + pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)); parser_lex(parser); return parser->previous; case PM_CASE_OPERATOR: @@ -15848,8 +15778,8 @@ parse_method_definition_name(pm_parser_t *parser) { parser_lex(parser); return parser->previous; default: - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type)); - return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end }; + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type)); + return (pm_token_t) { .type = 0, .start = parser->current.start, .end = parser->current.end }; } } @@ -15977,10 +15907,8 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 // If we get here, then we have an end of a label immediately // after a start. In that case we'll create an empty symbol // node. - pm_token_t content = parse_strings_empty_content(parser->previous.start); - pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous); - - pm_string_shared_init(&symbol->unescaped, content.start, content.end); + pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, NULL, &parser->previous); + pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.start); node = UP(symbol); if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL); @@ -15992,7 +15920,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 if (match1(parser, PM_TOKEN_EOF)) { unescaped = PM_STRING_EMPTY; - content = not_provided(parser); + content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start, .end = parser->start }; } else { unescaped = parser->current_string; expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT); @@ -16012,13 +15940,11 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 // be able to contain all of the parts. if (match1(parser, PM_TOKEN_STRING_CONTENT)) { pm_node_list_t parts = { 0 }; - - pm_token_t delimiters = not_provided(parser); - pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped)); + pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped)); pm_node_list_append(&parts, part); do { - part = UP(pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters)); + part = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL)); pm_node_list_append(&parts, part); parser_lex(parser); } while (match1(parser, PM_TOKEN_STRING_CONTENT)); @@ -16036,9 +15962,9 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 } else if (accept1(parser, PM_TOKEN_STRING_END)) { node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped)); } else { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type)); parser->previous.start = parser->previous.end; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped)); } } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) { @@ -16061,10 +15987,10 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 if (!accept1(parser, PM_TOKEN_STRING_END)) { const uint8_t *location = parser->previous.end; if (location > parser->start && location[-1] == '\n') location--; - pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF); + pm_parser_err(parser, U32(location - parser->start), 0, PM_ERR_STRING_LITERAL_EOF); parser->previous.start = parser->previous.end; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } } else if (accept1(parser, PM_TOKEN_LABEL_END)) { node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true))); @@ -16073,10 +15999,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 // If we get here, then we have interpolation so we'll need // to create a string or symbol node with interpolation. pm_node_list_t parts = { 0 }; - pm_token_t string_opening = not_provided(parser); - pm_token_t string_closing = not_provided(parser); - - pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped)); + pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped)); pm_node_flag_set(part, parse_unescaped_encoding(parser)); pm_node_list_append(&parts, part); @@ -16153,9 +16076,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 } concating = true; - pm_token_t bounds = not_provided(parser); - - pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds); + pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, NULL, NULL, NULL); pm_interpolated_string_node_append(container, current); current = UP(container); } @@ -16182,10 +16103,10 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag static void parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) { // Skip this capture if it starts with an underscore. - if (peek_at(parser, location->start) == '_') return; + if (peek_at(parser, parser->start + location->start) == '_') return; if (pm_constant_id_list_includes(captures, capture)) { - pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE); + pm_parser_err(parser, location->start, location->length, PM_ERR_PATTERN_CAPTURE_DUPLICATE); } else { pm_constant_id_list_append(captures, capture); } @@ -16254,13 +16175,13 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures case PM_ARRAY_PATTERN_NODE: { pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner; - if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) { - pattern_node->base.location.start = node->location.start; - pattern_node->base.location.end = closing.end; + if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) { + PM_NODE_START_SET_NODE(pattern_node, node); + PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing); pattern_node->constant = node; - pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening); - pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing); + pattern_node->opening_loc = TOK2LOC(parser, &opening); + pattern_node->closing_loc = TOK2LOC(parser, &closing); return UP(pattern_node); } @@ -16270,13 +16191,13 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures case PM_FIND_PATTERN_NODE: { pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner; - if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) { - pattern_node->base.location.start = node->location.start; - pattern_node->base.location.end = closing.end; + if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) { + PM_NODE_START_SET_NODE(pattern_node, node); + PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing); pattern_node->constant = node; - pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening); - pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing); + pattern_node->opening_loc = TOK2LOC(parser, &opening); + pattern_node->closing_loc = TOK2LOC(parser, &closing); return UP(pattern_node); } @@ -16286,13 +16207,13 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures case PM_HASH_PATTERN_NODE: { pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner; - if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) { - pattern_node->base.location.start = node->location.start; - pattern_node->base.location.end = closing.end; + if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) { + PM_NODE_START_SET_NODE(pattern_node, node); + PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing); pattern_node->constant = node; - pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening); - pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing); + pattern_node->opening_loc = TOK2LOC(parser, &opening); + pattern_node->closing_loc = TOK2LOC(parser, &closing); return UP(pattern_node); } @@ -16324,18 +16245,17 @@ parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) { // will check for that here. If they do, then we'll add it to the local // table since this pattern will cause it to become a local variable. if (accept1(parser, PM_TOKEN_IDENTIFIER)) { - pm_token_t identifier = parser->previous; - pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier); + pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous); int depth; if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) { - pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0); + pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0); } - parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier)); + parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous)); name = UP(pm_local_variable_target_node_create( parser, - &PM_LOCATION_TOKEN_VALUE(&identifier), + &TOK2LOC(parser, &parser->previous), constant_id, (uint32_t) (depth == -1 ? 0 : depth) )); @@ -16368,10 +16288,10 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0); } - parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous)); + parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous)); value = UP(pm_local_variable_target_node_create( parser, - &PM_LOCATION_TOKEN_VALUE(&parser->previous), + &TOK2LOC(parser, &parser->previous), constant_id, (uint32_t) (depth == -1 ? 0 : depth) )); @@ -16414,22 +16334,24 @@ pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const u static pm_node_t * parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) { const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc; + const uint8_t *start = parser->start + PM_LOCATION_START(value_loc); + const uint8_t *end = parser->start + PM_LOCATION_END(value_loc); - pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end); + pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end); int depth = -1; - if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) { + if (pm_slice_is_valid_local(parser, start, end)) { depth = pm_parser_local_depth_constant_id(parser, constant_id); } else { - pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS); + pm_parser_err(parser, PM_NODE_START(key), PM_NODE_LENGTH(key), PM_ERR_PATTERN_HASH_KEY_LOCALS); - if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) { - PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start); + if ((end > start) && ((end[-1] == '!') || (end[-1] == '?'))) { + PM_PARSER_ERR_FORMAT(parser, value_loc->start, value_loc->length, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (end - start), (const char *) start); } } if (depth == -1) { - pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0); + pm_parser_local_add(parser, constant_id, start, end, 0); } parse_pattern_capture(parser, captures, constant_id, value_loc); @@ -16449,7 +16371,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca */ static void parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) { - if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) { + if (pm_static_literals_add(&parser->newline_list, parser->start, parser->start_line, keys, node, true) != NULL) { pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE); } } @@ -16469,7 +16391,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node rest = first_node; break; case PM_SYMBOL_NODE: { - if (pm_symbol_node_label_p(first_node)) { + if (pm_symbol_node_label_p(parser, first_node)) { parse_pattern_hash_key(parser, &keys, first_node); pm_node_t *value; @@ -16483,9 +16405,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1)); } - pm_token_t operator = not_provided(parser); - pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, &operator, value)); - + pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value)); pm_node_list_append(&assocs, assoc); break; } @@ -16498,9 +16418,8 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL; pm_parser_err_node(parser, first_node, diag_id); - pm_token_t operator = not_provided(parser); - pm_node_t *value = UP(pm_missing_node_create(parser, first_node->location.start, first_node->location.end)); - pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, &operator, value)); + pm_node_t *value = UP(pm_missing_node_create(parser, PM_NODE_START(first_node), PM_NODE_LENGTH(first_node))); + pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value)); pm_node_list_append(&assocs, assoc); break; @@ -16536,12 +16455,16 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) { pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED); - } else if (!pm_symbol_node_label_p(key)) { + } else if (!pm_symbol_node_label_p(parser, key)) { pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA); } + } else if (accept1(parser, PM_TOKEN_LABEL)) { + key = UP(pm_symbol_node_label_create(parser, &parser->previous)); } else { expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA); - key = UP(pm_symbol_node_label_create(parser, &parser->previous)); + + pm_token_t label = { .type = PM_TOKEN_LABEL, .start = parser->previous.end, .end = parser->previous.end }; + key = UP(pm_symbol_node_create(parser, NULL, &label, NULL)); } parse_pattern_hash_key(parser, &keys, key); @@ -16551,14 +16474,13 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) { value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key); } else { - value = UP(pm_missing_node_create(parser, key->location.end, key->location.end)); + value = UP(pm_missing_node_create(parser, PM_NODE_END(key), 0)); } } else { value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1)); } - pm_token_t operator = not_provided(parser); - pm_node_t *assoc = UP(pm_assoc_node_create(parser, key, &operator, value)); + pm_node_t *assoc = UP(pm_assoc_node_create(parser, key, NULL, value)); if (rest != NULL) { pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST); @@ -16591,10 +16513,10 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0); } - parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous)); + parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous)); return UP(pm_local_variable_target_node_create( parser, - &PM_LOCATION_TOKEN_VALUE(&parser->previous), + &TOK2LOC(parser, &parser->previous), constant_id, (uint32_t) (depth == -1 ? 0 : depth) )); @@ -16620,12 +16542,12 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm switch (PM_NODE_TYPE(inner)) { case PM_ARRAY_PATTERN_NODE: { pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner; - if (pattern_node->opening_loc.start == NULL) { - pattern_node->base.location.start = opening.start; - pattern_node->base.location.end = closing.end; + if (pattern_node->opening_loc.length == 0) { + PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening); + PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing); - pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening); - pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing); + pattern_node->opening_loc = TOK2LOC(parser, &opening); + pattern_node->closing_loc = TOK2LOC(parser, &closing); return UP(pattern_node); } @@ -16634,12 +16556,12 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm } case PM_FIND_PATTERN_NODE: { pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner; - if (pattern_node->opening_loc.start == NULL) { - pattern_node->base.location.start = opening.start; - pattern_node->base.location.end = closing.end; + if (pattern_node->opening_loc.length == 0) { + PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening); + PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing); - pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening); - pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing); + pattern_node->opening_loc = TOK2LOC(parser, &opening); + pattern_node->closing_loc = TOK2LOC(parser, &closing); return UP(pattern_node); } @@ -16681,10 +16603,10 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1)); break; default: { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type)); parser_lex(parser); - first_node = UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end)); + first_node = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous))); break; } } @@ -16695,11 +16617,11 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE, &opening); pm_token_t closing = parser->previous; - node->base.location.start = opening.start; - node->base.location.end = closing.end; + PM_NODE_START_SET_TOKEN(parser, node, &opening); + PM_NODE_LENGTH_SET_TOKEN(parser, node, &closing); - node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening); - node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing); + node->opening_loc = TOK2LOC(parser, &opening); + node->closing_loc = TOK2LOC(parser, &closing); } parser->pattern_matching_newlines = previous_pattern_matching_newlines; @@ -16719,7 +16641,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm } default: { pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE); - pm_node_t *right = UP(pm_missing_node_create(parser, operator.start, operator.end)); + pm_node_t *right = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator))); return UP(pm_range_node_create(parser, NULL, &operator, right)); } } @@ -16728,12 +16650,12 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1)); // If we found a label, we need to immediately return to the caller. - if (pm_symbol_node_label_p(node)) return node; + if (pm_symbol_node_label_p(parser, node)) return node; // Call nodes (arithmetic operations) are not allowed in patterns if (PM_NODE_TYPE(node) == PM_CALL_NODE) { pm_parser_err_node(parser, node, diag_id); - pm_missing_node_t *missing_node = pm_missing_node_create(parser, node->location.start, node->location.end); + pm_missing_node_t *missing_node = pm_missing_node_create(parser, PM_NODE_START(node), PM_NODE_LENGTH(node)); pm_node_unreference(parser, node); pm_node_destroy(parser, node); @@ -16771,7 +16693,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm pm_node_t *variable = UP(parse_variable(parser)); if (variable == NULL) { - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE); + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->previous, PM_ERR_NO_LOCAL_VARIABLE); variable = UP(pm_local_variable_read_node_missing_create(parser, &parser->previous, 0)); } @@ -16825,7 +16747,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm // If we get here, then we have a pin operator followed by something // not understood. We'll create a missing node and return that. pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN); - pm_node_t *variable = UP(pm_missing_node_create(parser, operator.start, operator.end)); + pm_node_t *variable = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator))); return UP(pm_pinned_variable_node_create(parser, &operator, variable)); } } @@ -16848,16 +16770,18 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm } default: pm_parser_err_current(parser, diag_id); - return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end)); + return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))); } } static bool parse_pattern_alternation_error_each(const pm_node_t *node, void *data) { switch (PM_NODE_TYPE(node)) { - case PM_LOCAL_VARIABLE_TARGET_NODE: - pm_parser_err((pm_parser_t *) data, node->location.start, node->location.end, PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE); + case PM_LOCAL_VARIABLE_TARGET_NODE: { + pm_parser_t *parser = (pm_parser_t *) data; + pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE); return false; + } default: return true; } @@ -16930,7 +16854,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p } default: { pm_parser_err_current(parser, diag_id); - pm_node_t *right = UP(pm_missing_node_create(parser, parser->current.start, parser->current.end)); + pm_node_t *right = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))); if (!alternation) { node = right; @@ -16957,10 +16881,10 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0); } - parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous)); + parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous)); pm_local_variable_target_node_t *target = pm_local_variable_target_node_create( parser, - &PM_LOCATION_TOKEN_VALUE(&parser->previous), + &TOK2LOC(parser, &parser->previous), constant_id, (uint32_t) (depth == -1 ? 0 : depth) ); @@ -17008,7 +16932,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag // be dynamic symbols leading to hash patterns. node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1)); - if (pm_symbol_node_label_p(node)) { + if (pm_symbol_node_label_p(parser, node)) { node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1))); if (!(flags & PM_PARSE_PATTERN_TOP)) { @@ -17037,7 +16961,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag // If we got a dynamic label symbol, then we need to treat it like the // beginning of a hash pattern. - if (pm_symbol_node_label_p(node)) { + if (pm_symbol_node_label_p(parser, node)) { return UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1))); } @@ -17115,23 +17039,27 @@ parse_negative_numeric(pm_node_t *node) { case PM_INTEGER_NODE: { pm_integer_node_t *cast = (pm_integer_node_t *) node; cast->base.location.start--; + cast->base.location.length++; cast->value.negative = true; break; } case PM_FLOAT_NODE: { pm_float_node_t *cast = (pm_float_node_t *) node; cast->base.location.start--; + cast->base.location.length++; cast->value = -cast->value; break; } case PM_RATIONAL_NODE: { pm_rational_node_t *cast = (pm_rational_node_t *) node; cast->base.location.start--; + cast->base.location.length++; cast->numerator.negative = true; break; } case PM_IMAGINARY_NODE: node->location.start--; + node->location.length++; parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric); break; default: @@ -17149,22 +17077,22 @@ static void pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { switch (diag_id) { case PM_ERR_HASH_KEY: { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, pm_token_type_human(parser->previous.type)); break; } case PM_ERR_HASH_VALUE: case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_type_human(parser->current.type)); break; } case PM_ERR_UNARY_RECEIVER: { const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type)); - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, human, parser->previous.start[0]); break; } case PM_ERR_UNARY_DISALLOWED: case PM_ERR_EXPECT_ARGUMENT: { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_type_human(parser->current.type)); break; } default: @@ -17391,15 +17319,15 @@ typedef struct { static void parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) { parse_regular_expression_error_data_t *callback_data = (parse_regular_expression_error_data_t *) data; - pm_location_t location; + pm_token_t location; if (callback_data->shared) { - location = (pm_location_t) { .start = start, .end = end }; + location = (pm_token_t) { .type = 0, .start = start, .end = end }; } else { - location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end }; + location = (pm_token_t) { .type = 0, .start = callback_data->start, .end = callback_data->end }; } - PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message); + PM_PARSER_ERR_FORMAT(callback_data->parser, PM_TOKEN_START(callback_data->parser, &location), PM_TOKEN_LENGTH(&location), PM_ERR_REGEXP_PARSE_ERROR, message); } /** @@ -17410,8 +17338,8 @@ parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_ const pm_string_t *unescaped = &node->unescaped; parse_regular_expression_error_data_t error_data = { .parser = parser, - .start = node->base.location.start, - .end = node->base.location.end, + .start = parser->start + PM_NODE_START(node), + .end = parser->start + PM_NODE_END(node), .shared = unescaped->type == PM_STRING_SHARED }; @@ -17451,11 +17379,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } else { // If there was no comma, then we need to add a syntax // error. - const uint8_t *location = parser->previous.end; - PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type)); - - parser->previous.start = location; - parser->previous.type = PM_TOKEN_MISSING; + PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_END(parser, &parser->previous), 0, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type)); + parser->previous.start = parser->previous.end; + parser->previous.type = 0; } } @@ -17494,7 +17420,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } else { element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1)); - if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) { + if (pm_symbol_node_label_p(parser, element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) { if (parsed_bare_hash) { pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH); } @@ -17503,15 +17429,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_static_literals_t hash_keys = { 0 }; pm_hash_key_static_literals_add(parser, &hash_keys, element); - pm_token_t operator; + pm_token_t operator = { 0 }; if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) { operator = parser->previous; - } else { - operator = not_provided(parser); } pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1)); - pm_node_t *assoc = UP(pm_assoc_node_create(parser, element, &operator, value)); + pm_node_t *assoc = UP(pm_assoc_node_create(parser, element, NTOK2PTR(operator), value)); pm_keyword_hash_node_elements_append(hash, assoc); element = UP(hash); @@ -17531,12 +17455,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b accept1(parser, PM_TOKEN_NEWLINE); if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type)); parser->previous.start = parser->previous.end; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } - pm_array_node_close_set(array, &parser->previous); + pm_array_node_close_set(parser, array, &parser->previous); pm_accepts_block_stack_pop(parser); return UP(array); @@ -17618,20 +17542,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // multiple target node. pm_multi_target_node_t *multi_target; - if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) { + if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.length == 0) { multi_target = (pm_multi_target_node_t *) statement; } else { multi_target = pm_multi_target_node_create(parser); pm_multi_target_node_targets_append(parser, multi_target, statement); } - pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening); - pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous); - - multi_target->lparen_loc = lparen_loc; - multi_target->rparen_loc = rparen_loc; - multi_target->base.location.start = lparen_loc.start; - multi_target->base.location.end = rparen_loc.end; + multi_target->lparen_loc = TOK2LOC(parser, &opening); + multi_target->rparen_loc = TOK2LOC(parser, &parser->previous); + PM_NODE_START_SET_TOKEN(parser, multi_target, &opening); + PM_NODE_LENGTH_SET_TOKEN(parser, multi_target, &parser->previous); pm_node_t *result; if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) { @@ -17682,7 +17603,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // If we didn't find a terminator and we didn't find a right // parenthesis, then this is a syntax error. if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type)); } // Parse each statement within the parentheses. @@ -17713,7 +17634,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } else if (!match1(parser, PM_TOKEN_EOF)) { // If we're at the end of the file, then we're going to add // an error after this for the ) anyway. - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type)); } } @@ -17737,9 +17658,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) { - const uint8_t *offset = statement->location.end; + const uint8_t *offset = parser->start + PM_NODE_END(statement); pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset }; - pm_node_t *value = UP(pm_missing_node_create(parser, offset, offset)); + pm_node_t *value = UP(pm_missing_node_create(parser, PM_NODE_END(statement), 0)); statement = UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value)); statements->body.nodes[statements->body.size - 1] = statement; @@ -17785,12 +17706,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_accepts_block_stack_pop(parser); expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM, &opening); - pm_hash_node_closing_loc_set(node, &parser->previous); + pm_hash_node_closing_loc_set(parser, node, &parser->previous); return UP(node); } case PM_TOKEN_CHARACTER_LITERAL: { - pm_token_t closing = not_provided(parser); pm_node_t *node = UP(pm_string_node_create_current_string( parser, &(pm_token_t) { @@ -17803,7 +17723,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b .start = parser->current.start + 1, .end = parser->current.end }, - &closing + NULL )); pm_node_flag_set(node, parse_unescaped_encoding(parser)); @@ -17953,11 +17873,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b call->closing_loc = arguments.closing_loc; call->block = arguments.block; - const uint8_t *end = pm_arguments_end(&arguments); - if (!end) { - end = call->message_loc.end; + const pm_location_t *end = pm_arguments_end(&arguments); + if (end == NULL) { + PM_NODE_LENGTH_SET_LOCATION(call, &call->message_loc); + } else { + PM_NODE_LENGTH_SET_LOCATION(call, end); } - call->base.location.end = end; } } else { // Otherwise, we know the identifier is in the local table. This @@ -17984,7 +17905,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // purposes of warnings. assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)); - if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) { + if (pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &identifier), PM_TOKEN_LENGTH(&identifier))) { pm_node_unreference(parser, node); } else { pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node; @@ -18030,7 +17951,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY)); } - node->location.end = opening.end; + PM_NODE_LENGTH_SET_TOKEN(parser, node, &opening); } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) { // If we get here, then we tried to find something in the // heredoc but couldn't actually parse anything, so we'll just @@ -18038,7 +17959,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // // parse_string_part handles its own errors, so there is no need // for us to add one here. - node = UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end)); + node = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous))); } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) { // If we get here, then the part that we parsed was plain string // content and we're at the end of the heredoc, so we can return @@ -18047,8 +17968,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_node_flag_set(part, parse_unescaped_encoding(parser)); pm_string_node_t *cast = (pm_string_node_t *) part; - cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening); - cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current); + cast->opening_loc = TOK2LOC(parser, &opening); + cast->closing_loc = TOK2LOC(parser, &parser->current); cast->base.location = cast->opening_loc; if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) { @@ -18082,7 +18003,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b cast->parts = parts; expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length); - pm_interpolated_xstring_node_closing_set(cast, &parser->previous); + pm_interpolated_xstring_node_closing_set(parser, cast, &parser->previous); cast->base.location = cast->opening_loc; node = UP(cast); @@ -18091,7 +18012,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_node_list_free(&parts); expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length); - pm_interpolated_string_node_closing_set(cast, &parser->previous); + pm_interpolated_string_node_closing_set(parser, cast, &parser->previous); cast->base.location = cast->opening_loc; node = UP(cast); @@ -18227,11 +18148,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // At this point we can create a case node, though we don't yet know // if it is a case-in or case-when node. - pm_token_t end_keyword = not_provided(parser); pm_node_t *node; if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) { - pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword); + pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, NULL); pm_static_literals_t literals = { 0 }; // At this point we've seen a when keyword, so we know this is a @@ -18275,11 +18195,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) { - pm_when_node_then_keyword_loc_set(when_node, &parser->previous); + pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous); } } else { expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER); - pm_when_node_then_keyword_loc_set(when_node, &parser->previous); + pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous); } if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { @@ -18301,7 +18221,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_static_literals_free(&literals); node = UP(case_node); } else { - pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword); + pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate); // If this is a case-match node (i.e., it is a pattern matching // case statement) then we must have a predicate. @@ -18346,12 +18266,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // Now we need to check for the terminator of the in node's // pattern. It can be a newline or semicolon optionally // followed by a `then` keyword. - pm_token_t then_keyword; + pm_token_t then_keyword = { 0 }; if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) { then_keyword = parser->previous; - } else { - then_keyword = not_provided(parser); } } else { expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER); @@ -18369,7 +18287,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // Now that we have the full pattern and statements, we can // create the node and attach it to the case node. - pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword)); + pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, NTOK2PTR(then_keyword))); pm_case_match_node_condition_append(case_node, condition); } @@ -18404,9 +18322,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM, &case_keyword); if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) { - pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous); + pm_case_node_end_keyword_loc_set(parser, (pm_case_node_t *) node, &parser->previous); } else { - pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous); + pm_case_match_node_end_keyword_loc_set(parser, (pm_case_match_node_t *) node, &parser->previous); } pop_block_exits(parser, previous_block_exits); @@ -18436,8 +18354,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1)); expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM, &begin_keyword); - begin_node->base.location.end = parser->previous.end; - pm_begin_node_end_keyword_set(begin_node, &parser->previous); + PM_NODE_LENGTH_SET_TOKEN(parser, begin_node, &parser->previous); + pm_begin_node_end_keyword_set(parser, begin_node, &parser->previous); pop_block_exits(parser, previous_block_exits); pm_node_list_free(¤t_block_exits); @@ -18490,7 +18408,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // Reject `foo && return bar`. if (!accepts_command_call && arguments.arguments != NULL) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(next.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(next.type)); } } } @@ -18513,7 +18431,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } default: assert(false && "unreachable"); - return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end)); + return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous))); } } case PM_TOKEN_KEYWORD_SUPER: { @@ -18524,7 +18442,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1)); if ( - arguments.opening_loc.start == NULL && + arguments.opening_loc.length == 0 && arguments.arguments == NULL && ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE)) ) { @@ -18572,7 +18490,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_parser_scope_push(parser, true); if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type)); } pm_node_t *statements = NULL; @@ -18609,7 +18527,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME); } - pm_token_t inheritance_operator; + pm_token_t inheritance_operator = { 0 }; pm_node_t *superclass; if (match1(parser, PM_TOKEN_LESS)) { @@ -18621,13 +18539,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1)); } else { - inheritance_operator = not_provided(parser); superclass = NULL; } pm_parser_scope_push(parser, true); - if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) { + if (inheritance_operator.start != NULL) { expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END); } else { accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); @@ -18666,7 +18583,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pop_block_exits(parser, previous_block_exits); pm_node_list_free(¤t_block_exits); - return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous)); + return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, NTOK2PTR(inheritance_operator), superclass, statements, &parser->previous)); } case PM_TOKEN_KEYWORD_DEF: { pm_node_list_t current_block_exits = { 0 }; @@ -18676,7 +18593,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b size_t opening_newline_index = token_newline_index(parser); pm_node_t *receiver = NULL; - pm_token_t operator = not_provided(parser); + pm_token_t operator = { 0 }; pm_token_t name; // This context is necessary for lexing `...` in a bare params @@ -18710,7 +18627,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b operator = parser->previous; name = parse_method_definition_name(parser); } else { - pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end); + pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)); pm_parser_scope_push(parser, true); name = parser->previous; @@ -18782,7 +18699,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b name = parse_method_definition_name(parser); } else { if (!valid_name) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type)); } name = identifier; @@ -18823,8 +18740,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b break; } - pm_token_t lparen; - pm_token_t rparen; + pm_token_t lparen = { 0 }; + pm_token_t rparen = { 0 }; pm_parameters_node_t *params; bool accept_endless_def = true; @@ -18844,9 +18761,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b context_pop(parser); if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type)); parser->previous.start = parser->previous.end; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } rparen = parser->previous; @@ -18859,8 +18776,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL); } - lparen = not_provided(parser); - rparen = not_provided(parser); params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1)); // Reject `def * = 1` and similar. We have to specifically check @@ -18871,18 +18786,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b break; } default: { - lparen = not_provided(parser); - rparen = not_provided(parser); params = NULL; - context_pop(parser); break; } } pm_node_t *statements = NULL; - pm_token_t equal; - pm_token_t end_keyword; + pm_token_t equal = { 0 }; + pm_token_t end_keyword = { 0 }; if (accept1(parser, PM_TOKEN_EQUAL)) { if (token_is_setter_name(&name)) { @@ -18895,7 +18807,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser->current_context->context == PM_CONTEXT_DEFAULT_PARAMS && parser->current_context->prev->context == PM_CONTEXT_BLOCK_PARAMETERS ) { - PM_PARSER_ERR_FORMAT(parser, def_keyword.start, parser->previous.end, PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition"); + PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &def_keyword), PM_TOKENS_LENGTH(&def_keyword, &parser->previous), PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition"); } equal = parser->previous; @@ -18926,11 +18838,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false); pm_do_loop_stack_pop(parser); context_pop(parser); - end_keyword = not_provided(parser); } else { - equal = not_provided(parser); - - if (lparen.type == PM_TOKEN_NOT_PROVIDED) { + if (lparen.start == NULL) { lex_state_set(parser, PM_LEX_STATE_BEG); parser->command_start = true; expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM); @@ -18970,7 +18879,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b * methods to override the unary operators, we should ignore * the @ in the same way we do for symbols. */ - pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name)); + pm_constant_id_t name_id = pm_parser_constant_id_raw(parser, name.start, parse_operator_symbol_name(&name)); flush_block_exits(parser, previous_block_exits); pm_node_list_free(¤t_block_exits); @@ -18984,19 +18893,19 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b statements, &locals, &def_keyword, - &operator, - &lparen, - &rparen, - &equal, - &end_keyword + NTOK2PTR(operator), + NTOK2PTR(lparen), + NTOK2PTR(rparen), + NTOK2PTR(equal), + NTOK2PTR(end_keyword) )); } case PM_TOKEN_KEYWORD_DEFINED: { parser_lex(parser); - pm_token_t keyword = parser->previous; - pm_token_t lparen; - pm_token_t rparen; + pm_token_t keyword = parser->previous; + pm_token_t lparen = { 0 }; + pm_token_t rparen = { 0 }; pm_node_t *expression; context_push(parser, PM_CONTEXT_DEFINED); @@ -19007,31 +18916,26 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { expression = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0)); - lparen = not_provided(parser); - rparen = not_provided(parser); + lparen = (pm_token_t) { 0 }; } else { expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1)); - if (parser->recovering) { - rparen = not_provided(parser); - } else { + if (!parser->recovering) { accept1(parser, PM_TOKEN_NEWLINE); expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN); rparen = parser->previous; } } } else { - lparen = not_provided(parser); - rparen = not_provided(parser); expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1)); } context_pop(parser); return UP(pm_defined_node_create( parser, - &lparen, + NTOK2PTR(lparen), expression, - &rparen, + NTOK2PTR(rparen), &keyword )); } @@ -19080,7 +18984,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1)); } else { pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX); - index = UP(pm_missing_node_create(parser, for_keyword.start, for_keyword.end)); + index = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &for_keyword), PM_TOKEN_LENGTH(&for_keyword))); } // Now, if there are multiple index expressions, parse them out. @@ -19099,13 +19003,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1)); pm_do_loop_stack_pop(parser); - pm_token_t do_keyword; + pm_token_t do_keyword = { 0 }; if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) { do_keyword = parser->previous; } else { - do_keyword = not_provided(parser); if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type)); } } @@ -19117,11 +19020,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false); expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM, &for_keyword); - return UP(pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous)); + return UP(pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, NTOK2PTR(do_keyword), &parser->previous)); } case PM_TOKEN_KEYWORD_IF: if (parser_end_of_line_p(parser)) { - PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL); + PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL); } size_t opening_newline_index = token_newline_index(parser); @@ -19171,13 +19074,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // syntax. if (!accepts_command_call && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) { if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) { - pm_parser_err(parser, parser->previous.end, parser->previous.end + 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN); + pm_parser_err(parser, PM_TOKEN_END(parser, &parser->previous), 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN); } else { accept1(parser, PM_TOKEN_NEWLINE); pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER); } - return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end)); + return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))); } accept1(parser, PM_TOKEN_NEWLINE); @@ -19188,13 +19091,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { receiver = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0)); } else { - arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&lparen); + arguments.opening_loc = TOK2LOC(parser, &lparen); receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1)); if (!parser->recovering) { accept1(parser, PM_TOKEN_NEWLINE); expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN); - arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous); + arguments.closing_loc = TOK2LOC(parser, &parser->previous); } } } else { @@ -19226,7 +19129,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pop_block_exits(parser, previous_block_exits); pm_node_list_free(¤t_block_exits); - pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + pm_token_t missing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; return UP(pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing)); } @@ -19315,11 +19218,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_do_loop_stack_pop(parser); context_pop(parser); - pm_token_t do_keyword; + pm_token_t do_keyword = { 0 }; if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) { do_keyword = parser->previous; } else { - do_keyword = not_provided(parser); expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE); } @@ -19334,7 +19236,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false); expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM, &keyword); - return UP(pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0)); + return UP(pm_until_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0)); } case PM_TOKEN_KEYWORD_WHILE: { size_t opening_newline_index = token_newline_index(parser); @@ -19349,11 +19251,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_do_loop_stack_pop(parser); context_pop(parser); - pm_token_t do_keyword; + pm_token_t do_keyword = { 0 }; if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) { do_keyword = parser->previous; } else { - do_keyword = not_provided(parser); expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE); } @@ -19368,7 +19269,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false); expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM, &keyword); - return UP(pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0)); + return UP(pm_while_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0)); } case PM_TOKEN_PERCENT_LOWER_I: { parser_lex(parser); @@ -19383,27 +19284,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // Interpolation is not possible but nested heredocs can still lead to // consecutive (disjoint) string tokens when the final newline is escaped. while (match1(parser, PM_TOKEN_STRING_CONTENT)) { - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - // Record the string node, moving to interpolation if needed. if (current == NULL) { - current = UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing)); + current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL)); parser_lex(parser); } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) { - pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing)); + pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL)); parser_lex(parser); pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string); } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) { pm_symbol_node_t *cast = (pm_symbol_node_t *) current; - pm_token_t bounds = not_provided(parser); - - pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end }; - pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped)); - pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing)); + pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start + cast->value_loc.start, .end = parser->start + cast->value_loc.start + cast->value_loc.length }; + pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped)); + pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL)); parser_lex(parser); - pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing); + pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL); pm_interpolated_symbol_node_append(interpolated, first_string); pm_interpolated_symbol_node_append(interpolated, second_string); @@ -19425,11 +19321,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_token_t closing = parser->current; if (match1(parser, PM_TOKEN_EOF)) { pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM); - closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; } else { expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM); } - pm_array_node_close_set(array, &closing); + pm_array_node_close_set(parser, array, &closing); return UP(array); } @@ -19459,20 +19355,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b break; } case PM_TOKEN_STRING_CONTENT: { - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - if (current == NULL) { // If we hit content and the current node is NULL, then this is // the first string content we've seen. In that case we're going // to create a new string node and set that to the current. - current = UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing)); + current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL)); parser_lex(parser); } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) { // If we hit string content and the current node is an // interpolated string, then we need to append the string content // to the list of child nodes. - pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing)); + pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL)); parser_lex(parser); pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string); @@ -19481,14 +19374,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // then we need to convert the current node into an interpolated // string and add the string content to the list of child nodes. pm_symbol_node_t *cast = (pm_symbol_node_t *) current; - pm_token_t bounds = not_provided(parser); - - pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end }; - pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped)); - pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing)); + pm_token_t content = { + .type = PM_TOKEN_STRING_CONTENT, + .start = parser->start + cast->value_loc.start, + .end = parser->start + cast->value_loc.start + cast->value_loc.length + }; + + pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped)); + pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL)); parser_lex(parser); - pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing); + pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL); pm_interpolated_symbol_node_append(interpolated, first_string); pm_interpolated_symbol_node_append(interpolated, second_string); @@ -19506,20 +19402,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // If we hit an embedded variable and the current node is NULL, // then this is the start of a new string. We'll set the current // node to a new interpolated string. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - current = UP(pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing)); + current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL)); } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) { // If we hit an embedded variable and the current node is a string // node, then we'll convert the current into an interpolated // string and add the string node to the list of parts. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing); + pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL); current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current)); pm_interpolated_symbol_node_append(interpolated, current); - interpolated->base.location.start = current->location.start; + PM_NODE_START_SET_NODE(interpolated, current); start_location_set = true; current = UP(interpolated); } else { @@ -19530,7 +19422,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1)); pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part); if (!start_location_set) { - current->location.start = part->location.start; + PM_NODE_START_SET_NODE(current, part); } break; } @@ -19540,21 +19432,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // If we hit an embedded expression and the current node is NULL, // then this is the start of a new string. We'll set the current // node to a new interpolated string. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - current = UP(pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing)); + current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL)); } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) { // If we hit an embedded expression and the current node is a // string node, then we'll convert the current into an // interpolated string and add the string node to the list of // parts. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing); + pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL); current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current)); pm_interpolated_symbol_node_append(interpolated, current); - interpolated->base.location.start = current->location.start; + PM_NODE_START_SET_NODE(interpolated, current); start_location_set = true; current = UP(interpolated); } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) { @@ -19567,7 +19455,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1)); pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part); if (!start_location_set) { - current->location.start = part->location.start; + PM_NODE_START_SET_NODE(current, part); } break; } @@ -19586,11 +19474,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_token_t closing = parser->current; if (match1(parser, PM_TOKEN_EOF)) { pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM); - closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; } else { expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM); } - pm_array_node_close_set(array, &closing); + pm_array_node_close_set(parser, array, &closing); return UP(array); } @@ -19607,10 +19495,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // Interpolation is not possible but nested heredocs can still lead to // consecutive (disjoint) string tokens when the final newline is escaped. while (match1(parser, PM_TOKEN_STRING_CONTENT)) { - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - - pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing)); + pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL)); // Record the string node, moving to interpolation if needed. if (current == NULL) { @@ -19618,7 +19503,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) { pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string); } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) { - pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing); + pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL); pm_interpolated_string_node_append(interpolated, current); pm_interpolated_string_node_append(interpolated, string); current = UP(interpolated); @@ -19639,12 +19524,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_token_t closing = parser->current; if (match1(parser, PM_TOKEN_EOF)) { pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM); - closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; } else { expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM); } - pm_array_node_close_set(array, &closing); + pm_array_node_close_set(parser, array, &closing); return UP(array); } case PM_TOKEN_PERCENT_UPPER_W: { @@ -19678,10 +19563,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b break; } case PM_TOKEN_STRING_CONTENT: { - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - - pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing)); + pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL)); pm_node_flag_set(string, parse_unescaped_encoding(parser)); parser_lex(parser); @@ -19701,7 +19583,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // a string node, then we need to convert the // current node into an interpolated string and add // the string content to the list of child nodes. - pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing); + pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL); pm_interpolated_string_node_append(interpolated, current); pm_interpolated_string_node_append(interpolated, string); current = UP(interpolated); @@ -19717,17 +19599,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // node is NULL, then this is the start of a new // string. We'll set the current node to a new // interpolated string. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - current = UP(pm_interpolated_string_node_create(parser, &opening, NULL, &closing)); + current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL)); } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) { // If we hit an embedded variable and the current // node is a string node, then we'll convert the // current into an interpolated string and add the // string node to the list of parts. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing); + pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL); pm_interpolated_string_node_append(interpolated, current); current = UP(interpolated); } else { @@ -19746,17 +19624,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // node is NULL, then this is the start of a new // string. We'll set the current node to a new // interpolated string. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - current = UP(pm_interpolated_string_node_create(parser, &opening, NULL, &closing)); + current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL)); } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) { // If we hit an embedded expression and the current // node is a string node, then we'll convert the // current into an interpolated string and add the // string node to the list of parts. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing); + pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL); pm_interpolated_string_node_append(interpolated, current); current = UP(interpolated); } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) { @@ -19786,12 +19660,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_token_t closing = parser->current; if (match1(parser, PM_TOKEN_EOF)) { pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM); - closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; } else { expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM); } - pm_array_node_close_set(array, &closing); + pm_array_node_close_set(parser, array, &closing); return UP(array); } case PM_TOKEN_REGEXP_BEGIN: { @@ -19850,10 +19724,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // a regular expression node with interpolation. interpolated = pm_interpolated_regular_expression_node_create(parser, &opening); - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped)); - + pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped)); if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) { // This is extremely strange, but the first string part of a // regular expression will always be tagged as binary if we @@ -19881,7 +19752,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_token_t closing = parser->current; if (match1(parser, PM_TOKEN_EOF)) { pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM); - closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; } else { expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM); } @@ -19934,10 +19805,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // create a string node with interpolation. node = pm_interpolated_xstring_node_create(parser, &opening, &opening); - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - - pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped)); + pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped)); pm_node_flag_set(part, parse_unescaped_encoding(parser)); pm_interpolated_xstring_node_append(node, part); @@ -19958,11 +19826,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_token_t closing = parser->current; if (match1(parser, PM_TOKEN_EOF)) { pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM); - closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; } else { expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM); } - pm_interpolated_xstring_node_closing_set(node, &closing); + pm_interpolated_xstring_node_closing_set(parser, node, &closing); return UP(node); } @@ -19974,7 +19842,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // still lex past it though and create a missing node place. if (binding_power != PM_BINDING_POWER_STATEMENT) { pm_parser_err_prefix(parser, diag_id); - return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end)); + return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous))); } pm_token_t operator = parser->previous; @@ -20084,13 +19952,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b accept1(parser, PM_TOKEN_NEWLINE); expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN); - pm_block_parameters_node_closing_set(block_parameters, &parser->previous); + pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous); break; } case PM_CASE_PARAMETER: { pm_accepts_block_stack_push(parser, false); - pm_token_t opening = not_provided(parser); - block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1)); + block_parameters = parse_block_parameters(parser, false, NULL, true, false, (uint16_t) (depth + 1)); pm_accepts_block_stack_pop(parser); break; } @@ -20178,17 +20045,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // If we get here, then we are assuming this token is closing a // parent context, so we'll indicate that to the user so that // they know how we behaved. - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable)); } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) { // We're going to make a special case here, because "cannot // parse expression" is pretty generic, and we know here that we // have an unexpected token. - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type)); } else { pm_parser_err_prefix(parser, diag_id); } - return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end)); + return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous))); } } } @@ -20285,9 +20152,7 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) { single_value = false; - pm_token_t opening = not_provided(parser); - pm_array_node_t *array = pm_array_node_create(parser, &opening); - + pm_array_node_t *array = pm_array_node_create(parser, NULL); pm_array_node_elements_append(array, value); value = UP(array); @@ -20315,7 +20180,7 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding // but without parenthesis. if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) { pm_call_node_t *call_node = (pm_call_node_t *) value; - if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) { + if ((call_node->arguments != NULL) && (call_node->opening_loc.length == 0)) { accepts_command_call_inner = true; } } @@ -20529,7 +20394,8 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) { length = pm_buffer_length(&unescaped); } - pm_location_t location; + const uint8_t *start; + const uint8_t *end; pm_constant_id_t name; // If the name of the capture group isn't a valid identifier, we do @@ -20542,12 +20408,14 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) { if (callback_data->shared) { // If the unescaped string is a slice of the source, then we can // copy the names directly. The pointers will line up. - location = (pm_location_t) { .start = source, .end = source + length }; - name = pm_parser_constant_id_location(parser, location.start, location.end); + start = source; + end = source + length; + name = pm_parser_constant_id_raw(parser, start, end); } else { // Otherwise, the name is a slice of the malloc-ed owned string, // in which case we need to copy it out into a new string. - location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end }; + start = parser->start + PM_NODE_START(call->receiver); + end = parser->start + PM_NODE_END(call->receiver); void *memory = xmalloc(length); if (memory == NULL) abort(); @@ -20572,7 +20440,7 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) { // If the identifier is not already a local, then we will add it to // the local table. - pm_parser_local_add(parser, name, location.start, location.end, 0); + pm_parser_local_add(parser, name, start, end, 0); } // Here we lazily create the MatchWriteNode since we know we're @@ -20583,7 +20451,7 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) { // Next, create the local variable target and add it to the list of // targets for the match. - pm_node_t *target = UP(pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth)); + pm_node_t *target = UP(pm_local_variable_target_node_create(parser, &TOK2LOC(parser, &((pm_token_t) { .type = 0, .start = start, .end = end })), name, depth == -1 ? 0 : (uint32_t) depth)); pm_node_list_append(&callback_data->match->targets, target); } @@ -20605,8 +20473,8 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t * parse_regular_expression_error_data_t error_data = { .parser = parser, - .start = call->receiver->location.start, - .end = call->receiver->location.end, + .start = parser->start + PM_NODE_START(call->receiver), + .end = parser->start + PM_NODE_END(call->receiver), .shared = content->type == PM_STRING_SHARED }; @@ -20634,7 +20502,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // is parsed because it could be referenced in the value. pm_call_node_t *call_node = (pm_call_node_t *) node; if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) { - pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0); + pm_parser_local_add_location(parser, &call_node->message_loc, 0); } } PRISM_FALLTHROUGH @@ -20643,7 +20511,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // variable before parsing the value, in case the value // references the variable. if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) { - pm_parser_local_add_location(parser, node->location.start, node->location.end, 0); + pm_parser_local_add_location(parser, &node->location, 0); } parser_lex(parser); @@ -20747,8 +20615,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t return result; } case PM_LOCAL_VARIABLE_READ_NODE: { - if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) { - PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start); + if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) { + PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + node->location.start); pm_node_unreference(parser, node); } @@ -20768,10 +20636,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // receiver that could have been a local variable) then we // will transform it into a local variable write. if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) { - pm_location_t *message_loc = &cast->message_loc; - pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end); - - pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1); + pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length); + pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1); parser_lex(parser); pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1)); @@ -20881,8 +20747,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t return result; } case PM_LOCAL_VARIABLE_READ_NODE: { - if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) { - PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start); + if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) { + PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node)); pm_node_unreference(parser, node); } @@ -20902,10 +20768,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // receiver that could have been a local variable) then we // will transform it into a local variable write. if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) { - pm_location_t *message_loc = &cast->message_loc; - pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end); - - pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1); + pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length); + pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1); parser_lex(parser); pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1)); @@ -21025,8 +20889,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t return result; } case PM_LOCAL_VARIABLE_READ_NODE: { - if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) { - PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start); + if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) { + PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node)); pm_node_unreference(parser, node); } @@ -21047,10 +20911,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // receiver that could have been a local variable) then we // will transform it into a local variable write. if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) { - pm_location_t *message_loc = &cast->message_loc; - pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end); - - pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1); + pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length); + pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1); pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, UP(cast), &token, value, constant_id, 0)); @@ -21088,7 +20950,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // In this case we have an operator but we don't know what it's for. // We need to treat it as an error. For now, we'll mark it as an error // and just skip right past it. - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type)); return node; } } @@ -21199,21 +21061,21 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t case PM_RESCUE_MODIFIER_NODE: { pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node; if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); } break; } case PM_AND_NODE: { pm_and_node_t *cast = (pm_and_node_t *) node; if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); } break; } case PM_OR_NODE: { pm_or_node_t *cast = (pm_or_node_t *) node; if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); } break; } @@ -21229,7 +21091,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t case PM_TOKEN_LESS: case PM_TOKEN_LESS_EQUAL: { if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) { - PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON); + PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON); } parser_lex(parser); @@ -21252,21 +21114,21 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t case PM_RESCUE_MODIFIER_NODE: { pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node; if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); } break; } case PM_AND_NODE: { pm_and_node_t *cast = (pm_and_node_t *) node; if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); } break; } case PM_OR_NODE: { pm_or_node_t *cast = (pm_or_node_t *) node; if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); } break; } @@ -21287,8 +21149,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t break; } default: { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type)); - message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type)); + message = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; } } @@ -21298,7 +21160,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t if ( (previous_binding_power == PM_BINDING_POWER_STATEMENT) && arguments.arguments == NULL && - arguments.opening_loc.start == NULL && + arguments.opening_loc.length == 0 && match1(parser, PM_TOKEN_COMMA) ) { return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); @@ -21364,8 +21226,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // before the `expect` function call to make sure it doesn't // accidentally move past a ':' token that occurs after the syntax // error. - pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; - pm_node_t *false_expression = UP(pm_missing_node_create(parser, colon.start, colon.end)); + pm_token_t colon = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; + pm_node_t *false_expression = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &colon), PM_TOKEN_LENGTH(&colon))); context_pop(parser); pop_block_exits(parser, previous_block_exits); @@ -21470,7 +21332,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t parser_lex(parser); pm_arguments_t arguments = { 0 }; - arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous); + arguments.opening_loc = TOK2LOC(parser, &parser->previous); if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) { pm_accepts_block_stack_push(parser, true); @@ -21479,7 +21341,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET); } - arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous); + arguments.closing_loc = TOK2LOC(parser, &parser->previous); // If we have a comma after the closing bracket then this is a multiple // assignment and we should parse the targets. @@ -21564,7 +21426,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t static inline bool pm_call_node_command_p(const pm_call_node_t *node) { return ( - (node->opening_loc.start == NULL) && + (node->opening_loc.length == 0) && (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) && (node->arguments != NULL || node->block != NULL) ); @@ -21582,7 +21444,7 @@ static pm_node_t * parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) { if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) { pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP); - return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end)); + return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))); } pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth); @@ -21618,7 +21480,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc // If we have a symbol node that is being parsed as a label, then we // need to immediately return, because there should never be an // infix operator following this node. - if (pm_symbol_node_label_p(node)) { + if (pm_symbol_node_label_p(parser, node)) { return node; } break; @@ -21683,7 +21545,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc // If this is a non-assoc operator and we are about to parse the // exact same operator, then we need to add an error. if (match1(parser, current_token_type)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type)); break; } @@ -21696,7 +21558,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc // if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) { if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type)); break; } @@ -21723,22 +21585,22 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc if ( // (1) foo[1] !( - cast->call_operator_loc.start == NULL && - cast->message_loc.start != NULL && - cast->message_loc.start[0] == '[' && - cast->message_loc.end[-1] == ']' + cast->call_operator_loc.length == 0 && + cast->message_loc.length > 0 && + parser->start[cast->message_loc.start] == '[' && + parser->start[cast->message_loc.start + cast->message_loc.length - 1] == ']' ) && // (2) foo.bar !( - cast->call_operator_loc.start != NULL && + cast->call_operator_loc.length > 0 && cast->arguments == NULL && cast->block == NULL && - cast->opening_loc.start == NULL + cast->opening_loc.length == 0 ) && // (3) foo.bar(1) !( - cast->call_operator_loc.start != NULL && - cast->opening_loc.start != NULL + cast->call_operator_loc.length > 0 && + cast->opening_loc.length > 0 ) && // (4) foo.bar do end !( @@ -21821,7 +21683,7 @@ wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) { pm_keyword_hash_node_elements_append(keywords, UP(pm_assoc_node_create( parser, UP(pm_symbol_node_synthesized_create(parser, "chomp")), - &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start }, + NULL, UP(pm_true_node_synthesized_create(parser)) ))); @@ -21887,7 +21749,7 @@ parse_program(pm_parser_t *parser) { // correct the location information. if (statements == NULL) { statements = pm_statements_node_create(parser); - pm_statements_node_location_set(statements, parser->start, parser->start); + statements->base.location = (pm_location_t) { 0 }; } return UP(pm_program_node_create(parser, &locals, statements)); @@ -21928,7 +21790,7 @@ pm_strnstr(const char *big, const char *little, size_t big_length) { static void pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) { if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') { - pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN); + pm_parser_warn(parser, U32(start - parser->start), U32(length), PM_WARN_SHEBANG_CARRIAGE_RETURN); } } #endif @@ -21986,7 +21848,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm .current = { .type = PM_TOKEN_EOF, .start = source, .end = source }, .next_start = NULL, .heredoc_end = NULL, - .data_loc = { .start = NULL, .end = NULL }, + .data_loc = { 0 }, .comment_list = { 0 }, .magic_comment_list = { 0 }, .warning_list = { 0 }, @@ -22041,7 +21903,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm // guess at the number of newlines that we'll need based on the size of the // input. size_t newline_size = size / 22; - pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size); + pm_newline_list_init(&parser->newline_list, newline_size < 4 ? 4 : newline_size); // If options were provided to this parse, establish them here. if (options != NULL) { @@ -22180,7 +22042,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm const uint8_t *newline = next_newline(cursor, parser->end - cursor); while (newline != NULL) { - pm_newline_list_append(&parser->newline_list, newline); + pm_newline_list_append(&parser->newline_list, U32(newline - parser->start + 1)); cursor = newline + 1; newline = next_newline(cursor, parser->end - cursor); @@ -22209,7 +22071,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor }; parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor }; } else { - pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND); + pm_parser_err(parser, 0, 0, PM_ERR_SCRIPT_NOT_FOUND); pm_newline_list_clear(&parser->newline_list); } } @@ -22506,7 +22368,7 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s pm_serialize_header(buffer); pm_serialize_encoding(parser.encoding, buffer); pm_buffer_append_varsint(buffer, parser.start_line); - pm_serialize_comment_list(&parser, &parser.comment_list, buffer); + pm_serialize_comment_list(&parser.comment_list, buffer); pm_node_destroy(&parser, node); pm_parser_free(&parser); diff --git a/prism/prism.h b/prism/prism.h index c468db18bef3c2..c1ce5829976074 100644 --- a/prism/prism.h +++ b/prism/prism.h @@ -143,11 +143,10 @@ PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void /** * Serialize the given list of comments to the given buffer. * - * @param parser The parser to serialize. * @param list The list of comments to serialize. * @param buffer The buffer to serialize to. */ -void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer); +void pm_serialize_comment_list(pm_list_t *list, pm_buffer_t *buffer); /** * Serialize the name of the encoding to the buffer. diff --git a/prism/static_literals.c b/prism/static_literals.c index 9fa37b999a9e46..13a52378dda802 100644 --- a/prism/static_literals.c +++ b/prism/static_literals.c @@ -9,6 +9,9 @@ typedef struct { /** The list of newline offsets to use to calculate line numbers. */ const pm_newline_list_t *newline_list; + /** The start of the source being parsed. */ + const uint8_t *start; + /** The line number that the parser starts on. */ int32_t start_line; @@ -353,7 +356,7 @@ pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_liter * Add a node to the set of static literals. */ pm_node_t * -pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace) { +pm_static_literals_add(const pm_newline_list_t *newline_list, const uint8_t *start, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace) { switch (PM_NODE_TYPE(node)) { case PM_INTEGER_NODE: case PM_SOURCE_LINE_NODE: @@ -361,6 +364,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line &literals->integer_nodes, &(pm_static_literals_metadata_t) { .newline_list = newline_list, + .start = start, .start_line = start_line, .encoding_name = NULL }, @@ -373,6 +377,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line &literals->float_nodes, &(pm_static_literals_metadata_t) { .newline_list = newline_list, + .start = start, .start_line = start_line, .encoding_name = NULL }, @@ -386,6 +391,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line &literals->number_nodes, &(pm_static_literals_metadata_t) { .newline_list = newline_list, + .start = start, .start_line = start_line, .encoding_name = NULL }, @@ -399,6 +405,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line &literals->string_nodes, &(pm_static_literals_metadata_t) { .newline_list = newline_list, + .start = start, .start_line = start_line, .encoding_name = NULL }, @@ -411,6 +418,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line &literals->regexp_nodes, &(pm_static_literals_metadata_t) { .newline_list = newline_list, + .start = start, .start_line = start_line, .encoding_name = NULL }, @@ -423,6 +431,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line &literals->symbol_nodes, &(pm_static_literals_metadata_t) { .newline_list = newline_list, + .start = start, .start_line = start_line, .encoding_name = NULL }, @@ -502,12 +511,12 @@ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_met const double value = ((const pm_float_node_t *) node)->value; if (PRISM_ISINF(value)) { - if (*node->location.start == '-') { + if (metadata->start[node->location.start] == '-') { pm_buffer_append_byte(buffer, '-'); } pm_buffer_append_string(buffer, "Infinity", 8); } else if (value == 0.0) { - if (*node->location.start == '-') { + if (metadata->start[node->location.start] == '-') { pm_buffer_append_byte(buffer, '-'); } pm_buffer_append_string(buffer, "0.0", 3); @@ -604,11 +613,12 @@ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_met * Create a string-based representation of the given static literal. */ void -pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node) { +pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, const uint8_t *start, int32_t start_line, const char *encoding_name, const pm_node_t *node) { pm_static_literal_inspect_node( buffer, &(pm_static_literals_metadata_t) { .newline_list = newline_list, + .start = start, .start_line = start_line, .encoding_name = encoding_name }, diff --git a/prism/static_literals.h b/prism/static_literals.h index bd29761899c29c..0f8eb43bfa5623 100644 --- a/prism/static_literals.h +++ b/prism/static_literals.h @@ -92,13 +92,14 @@ typedef struct { * Add a node to the set of static literals. * * @param newline_list The list of newline offsets to use to calculate lines. + * @param start The start of the source being parsed. * @param start_line The line number that the parser starts on. * @param literals The set of static literals to add the node to. * @param node The node to add to the set. * @param replace Whether to replace the previous node if one already exists. * @return A pointer to the node that is being overwritten, if there is one. */ -pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace); +pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, const uint8_t *start, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace); /** * Free the internal memory associated with the given static literals set. @@ -112,10 +113,11 @@ void pm_static_literals_free(pm_static_literals_t *literals); * * @param buffer The buffer to write the string to. * @param newline_list The list of newline offsets to use to calculate lines. + * @param start The start of the source being parsed. * @param start_line The line number that the parser starts on. * @param encoding_name The name of the encoding of the source being parsed. * @param node The node to create a string representation of. */ -void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node); +void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, const uint8_t *start, int32_t start_line, const char *encoding_name, const pm_node_t *node); #endif diff --git a/prism/templates/ext/prism/api_node.c.erb b/prism/templates/ext/prism/api_node.c.erb index 23af8886a7364f..e9c3742085369f 100644 --- a/prism/templates/ext/prism/api_node.c.erb +++ b/prism/templates/ext/prism/api_node.c.erb @@ -12,17 +12,12 @@ static VALUE rb_cPrism<%= node.name %>; <%- end -%> static VALUE -pm_location_new(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end, VALUE source, bool freeze) { +pm_location_new(const uint32_t start, const uint32_t length, VALUE source, bool freeze) { if (freeze) { - VALUE location_argv[] = { - source, - LONG2FIX(start - parser->start), - LONG2FIX(end - start) - }; - + VALUE location_argv[] = { source, LONG2FIX(start), LONG2FIX(length) }; return rb_obj_freeze(rb_class_new_instance(3, location_argv, rb_cPrismLocation)); } else { - uint64_t value = ((((uint64_t) (start - parser->start)) << 32) | ((uint32_t) (end - start))); + uint64_t value = ((((uint64_t) start) << 32) | ((uint64_t) length)); return ULL2NUM(value); } } @@ -30,7 +25,7 @@ pm_location_new(const pm_parser_t *parser, const uint8_t *start, const uint8_t * VALUE pm_token_new(const pm_parser_t *parser, const pm_token_t *token, rb_encoding *encoding, VALUE source, bool freeze) { ID type = rb_intern(pm_token_type_name(token->type)); - VALUE location = pm_location_new(parser, token->start, token->end, source, freeze); + VALUE location = pm_location_new((uint32_t) (token->start - parser->start), (uint32_t) (token->end - token->start), source, freeze); VALUE slice = rb_enc_str_new((const char *) token->start, token->end - token->start, encoding); if (freeze) rb_obj_freeze(slice); @@ -200,7 +195,7 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi argv[1] = ULONG2NUM(node->node_id); // location - argv[2] = pm_location_new(parser, node->location.start, node->location.end, source, freeze); + argv[2] = pm_location_new(node->location.start, node->location.length, source, freeze); // flags argv[3] = ULONG2NUM(node->flags); @@ -237,10 +232,10 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi if (freeze) rb_obj_freeze(argv[<%= index %>]); <%- when Prism::Template::LocationField -%> #line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>" - argv[<%= index %>] = pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source, freeze); + argv[<%= index %>] = pm_location_new(cast-><%= field.name %>.start, cast-><%= field.name %>.length, source, freeze); <%- when Prism::Template::OptionalLocationField -%> #line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>" - argv[<%= index %>] = cast-><%= field.name %>.start == NULL ? Qnil : pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source, freeze); + argv[<%= index %>] = cast-><%= field.name %>.length == 0 ? Qnil : pm_location_new(cast-><%= field.name %>.start, cast-><%= field.name %>.length, source, freeze); <%- when Prism::Template::UInt8Field -%> #line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>" argv[<%= index %>] = UINT2NUM(cast-><%= field.name %>); diff --git a/prism/templates/include/prism/ast.h.erb b/prism/templates/include/prism/ast.h.erb index 790cf9ebb8ade1..9115f20eaae766 100644 --- a/prism/templates/include/prism/ast.h.erb +++ b/prism/templates/include/prism/ast.h.erb @@ -46,15 +46,19 @@ typedef struct { } pm_token_t; /** - * This represents a range of bytes in the source string to which a node or - * token corresponds. + * This struct represents a slice in the source code, defined by an offset and + * a length. Note that we have confirmation that we can represent all locations + * within Ruby source files using 32-bit integers per: + * + * https://bugs.ruby-lang.org/issues/20488#note-1 + * */ typedef struct { - /** A pointer to the start location of the range in the source. */ - const uint8_t *start; + /** The offset of the location from the start of the source. */ + uint32_t start; - /** A pointer to the end location of the range in the source. */ - const uint8_t *end; + /** The length of the location. */ + uint32_t length; } pm_location_t; struct pm_node; @@ -112,7 +116,7 @@ static const pm_node_flags_t PM_NODE_FLAG_STATIC_LITERAL = 0x2; typedef struct pm_node { /** * This represents the type of the node. It somewhat maps to the nodes that - * existed in the original grammar and ripper, but it's not a 1:1 mapping. + * existed in the original grammar and ripper, but it is not a 1:1 mapping. */ pm_node_type_t type; @@ -129,7 +133,7 @@ typedef struct pm_node { uint32_t node_id; /** - * This is the location of the node in the source. It's a range of bytes + * This is the location of the node in the source. It is a range of bytes * containing a start and an end. */ pm_location_t location; @@ -160,6 +164,15 @@ typedef struct pm_node { * Return true if the given flag is set on the given node. */ #define PM_NODE_FLAG_P(node_, flag_) ((PM_NODE_FLAGS(node_) & (flag_)) != 0) + +/** + * The alignment required for a child node within a parent node. + */ +#ifdef _MSC_VER +#define PM_NODE_ALIGNAS __declspec(align(8)) +#else +#define PM_NODE_ALIGNAS PRISM_ALIGNAS(PRISM_ALIGNOF(void *)) +#endif <%- nodes.each do |node| -%> /** @@ -182,7 +195,6 @@ typedef struct pm_node { typedef struct pm_<%= node.human %> { /** The embedded base node. */ pm_node_t base; - <%- node.fields.each do |field| -%> /** @@ -195,7 +207,7 @@ typedef struct pm_<%= node.human %> { <%- end -%> */ <%= case field - when Prism::Template::NodeField, Prism::Template::OptionalNodeField then "struct #{field.c_type} *#{field.name}" + when Prism::Template::NodeField, Prism::Template::OptionalNodeField then "PM_NODE_ALIGNAS struct #{field.c_type} *#{field.name}" when Prism::Template::NodeListField then "struct pm_node_list #{field.name}" when Prism::Template::ConstantField, Prism::Template::OptionalConstantField then "pm_constant_id_t #{field.name}" when Prism::Template::ConstantListField then "pm_constant_id_list_t #{field.name}" diff --git a/prism/templates/include/prism/diagnostic.h.erb b/prism/templates/include/prism/diagnostic.h.erb index 07bbc8fae79264..c1864e602139e3 100644 --- a/prism/templates/include/prism/diagnostic.h.erb +++ b/prism/templates/include/prism/diagnostic.h.erb @@ -100,25 +100,25 @@ const char * pm_diagnostic_id_human(pm_diagnostic_id_t diag_id); * memory for its message. * * @param list The list to append to. - * @param start The start of the diagnostic. - * @param end The end of the diagnostic. + * @param start The source offset of the start of the diagnostic. + * @param length The length of the diagnostic. * @param diag_id The diagnostic ID. * @return Whether the diagnostic was successfully appended. */ -bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id); +bool pm_diagnostic_list_append(pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id); /** * Append a diagnostic to the given list of diagnostics that is using a format * string for its message. * * @param list The list to append to. - * @param start The start of the diagnostic. - * @param end The end of the diagnostic. + * @param start The source offset of the start of the diagnostic. + * @param length The length of the diagnostic. * @param diag_id The diagnostic ID. * @param ... The arguments to the format string for the message. * @return Whether the diagnostic was successfully appended. */ -bool pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id, ...); +bool pm_diagnostic_list_append_format(pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...); /** * Deallocate the internal state of the given diagnostic list. diff --git a/prism/templates/lib/prism/dot_visitor.rb.erb b/prism/templates/lib/prism/dot_visitor.rb.erb index cd2998fe61b9d6..87de1965b083b0 100644 --- a/prism/templates/lib/prism/dot_visitor.rb.erb +++ b/prism/templates/lib/prism/dot_visitor.rb.erb @@ -169,7 +169,7 @@ module Prism "Node_#{node.object_id}" end - # Inspect a location to display the start and end line and column numbers. + # Inspect a location to display the start and end line and columns in bytes. def location_inspect(location) "(#{location.start_line},#{location.start_column})-(#{location.end_line},#{location.end_column})" end diff --git a/prism/templates/lib/prism/node.rb.erb b/prism/templates/lib/prism/node.rb.erb index 8225bfb328e8da..d14a06961a577d 100644 --- a/prism/templates/lib/prism/node.rb.erb +++ b/prism/templates/lib/prism/node.rb.erb @@ -183,14 +183,13 @@ module Prism def tunnel(line, column) queue = [self] #: Array[Prism::node] result = [] #: Array[Prism::node] - - search_offset = source.line_to_byte_offset(line) + column + offset = source.byte_offset(line, column) while (node = queue.shift) result << node node.each_child_node do |child_node| - if child_node.start_offset <= search_offset && search_offset < child_node.end_offset + if child_node.start_offset <= offset && offset < child_node.end_offset queue << child_node break end @@ -201,7 +200,7 @@ module Prism end # Returns the first node that matches the given block when visited in a - # depth-first search. This is useful for finding a node that matches a + # breadth-first search. This is useful for finding a node that matches a # particular condition. # # node.breadth_first_search { |node| node.node_id == node_id } @@ -216,6 +215,26 @@ module Prism nil end + alias find breadth_first_search + + # Returns all of the nodes that match the given block when visited in a + # breadth-first search. This is useful for finding all nodes that match a + # particular condition. + # + # node.breadth_first_search_all { |node| node.is_a?(Prism::CallNode) } + # + def breadth_first_search_all(&block) + queue = [self] #: Array[Prism::node] + results = [] #: Array[Prism::node] + + while (node = queue.shift) + results << node if yield node + queue.concat(node.compact_child_nodes) + end + + results + end + alias find_all breadth_first_search_all # Returns a list of the fields that exist for this node class. Fields # describe the structure of the node. This kind of reflection is useful for diff --git a/prism/templates/lib/prism/serialize.rb.erb b/prism/templates/lib/prism/serialize.rb.erb index 6902df5c0159d2..2275d685ca7942 100644 --- a/prism/templates/lib/prism/serialize.rb.erb +++ b/prism/templates/lib/prism/serialize.rb.erb @@ -10,7 +10,7 @@ module Prism # The minor version of prism that we are expecting to find in the serialized # strings. - MINOR_VERSION = 8 + MINOR_VERSION = 9 # The patch version of prism that we are expecting to find in the serialized # strings. diff --git a/prism/templates/src/diagnostic.c.erb b/prism/templates/src/diagnostic.c.erb index 121dd4b2b652f7..88f8525f8008fb 100644 --- a/prism/templates/src/diagnostic.c.erb +++ b/prism/templates/src/diagnostic.c.erb @@ -447,12 +447,12 @@ pm_diagnostic_level(pm_diagnostic_id_t diag_id) { * Append an error to the given list of diagnostic. */ bool -pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) { +pm_diagnostic_list_append(pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) { pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) xcalloc(1, sizeof(pm_diagnostic_t)); if (diagnostic == NULL) return false; *diagnostic = (pm_diagnostic_t) { - .location = { start, end }, + .location = { .start = start, .length = length }, .diag_id = diag_id, .message = pm_diagnostic_message(diag_id), .owned = false, @@ -468,7 +468,7 @@ pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t * * string for its message. */ bool -pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id, ...) { +pm_diagnostic_list_append_format(pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...) { va_list arguments; va_start(arguments, diag_id); @@ -485,19 +485,19 @@ pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const ui return false; } - size_t length = (size_t) (result + 1); - char *message = (char *) xmalloc(length); + size_t message_length = (size_t) (result + 1); + char *message = (char *) xmalloc(message_length); if (message == NULL) { xfree(diagnostic); return false; } va_start(arguments, diag_id); - vsnprintf(message, length, format, arguments); + vsnprintf(message, message_length, format, arguments); va_end(arguments); *diagnostic = (pm_diagnostic_t) { - .location = { start, end }, + .location = { .start = start, .length = length }, .diag_id = diag_id, .message = message, .owned = true, diff --git a/prism/templates/src/node.c.erb b/prism/templates/src/node.c.erb index 2357e552000bc3..f1709a0249c3d7 100644 --- a/prism/templates/src/node.c.erb +++ b/prism/templates/src/node.c.erb @@ -226,10 +226,8 @@ pm_dump_json_constant(pm_buffer_t *buffer, const pm_parser_t *parser, pm_constan } static void -pm_dump_json_location(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_location_t *location) { - uint32_t start = (uint32_t) (location->start - parser->start); - uint32_t end = (uint32_t) (location->end - parser->start); - pm_buffer_append_format(buffer, "{\"start\":%" PRIu32 ",\"end\":%" PRIu32 "}", start, end); +pm_dump_json_location(pm_buffer_t *buffer, const pm_location_t *location) { + pm_buffer_append_format(buffer, "{\"start\":%" PRIu32 ",\"length\":%" PRIu32 "}", location->start, location->length); } /** @@ -243,7 +241,7 @@ pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *no pm_buffer_append_string(buffer, "{\"type\":\"<%= node.name %>\",\"location\":", <%= node.name.bytesize + 22 %>); const pm_<%= node.human %>_t *cast = (const pm_<%= node.human %>_t *) node; - pm_dump_json_location(buffer, parser, &cast->base.location); + pm_dump_json_location(buffer, &cast->base.location); <%- [*node.flags, *node.fields].each_with_index do |field, index| -%> // Dump the <%= field.name %> field @@ -290,10 +288,10 @@ pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *no } pm_buffer_append_byte(buffer, ']'); <%- when Prism::Template::LocationField -%> - pm_dump_json_location(buffer, parser, &cast-><%= field.name %>); + pm_dump_json_location(buffer, &cast-><%= field.name %>); <%- when Prism::Template::OptionalLocationField -%> - if (cast-><%= field.name %>.start != NULL) { - pm_dump_json_location(buffer, parser, &cast-><%= field.name %>); + if (cast-><%= field.name %>.length != 0) { + pm_dump_json_location(buffer, &cast-><%= field.name %>); } else { pm_buffer_append_string(buffer, "null", 4); } diff --git a/prism/templates/src/prettyprint.c.erb b/prism/templates/src/prettyprint.c.erb index 639c2fecf33ba3..74c0f6dbdf8478 100644 --- a/prism/templates/src/prettyprint.c.erb +++ b/prism/templates/src/prettyprint.c.erb @@ -13,7 +13,7 @@ void pm_prettyprint(void) {} static inline void prettyprint_location(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_location_t *location) { pm_line_column_t start = pm_newline_list_line_column(&parser->newline_list, location->start, parser->start_line); - pm_line_column_t end = pm_newline_list_line_column(&parser->newline_list, location->end, parser->start_line); + pm_line_column_t end = pm_newline_list_line_column(&parser->newline_list, location->start + location->length, parser->start_line); pm_buffer_append_format(output_buffer, "(%" PRIi32 ",%" PRIu32 ")-(%" PRIi32 ",%" PRIu32 ")", start.line, start.column, end.line, end.column); } @@ -106,17 +106,17 @@ prettyprint_node(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm pm_buffer_append_byte(output_buffer, ' '); prettyprint_location(output_buffer, parser, location); pm_buffer_append_string(output_buffer, " = \"", 4); - pm_buffer_append_source(output_buffer, location->start, (size_t) (location->end - location->start), PM_BUFFER_ESCAPING_RUBY); + pm_buffer_append_source(output_buffer, parser->start + location->start, (size_t) location->length, PM_BUFFER_ESCAPING_RUBY); pm_buffer_append_string(output_buffer, "\"\n", 2); <%- when Prism::Template::OptionalLocationField -%> pm_location_t *location = &cast-><%= field.name %>; - if (location->start == NULL) { + if (location->length == 0) { pm_buffer_append_string(output_buffer, " nil\n", 5); } else { pm_buffer_append_byte(output_buffer, ' '); prettyprint_location(output_buffer, parser, location); pm_buffer_append_string(output_buffer, " = \"", 4); - pm_buffer_append_source(output_buffer, location->start, (size_t) (location->end - location->start), PM_BUFFER_ESCAPING_RUBY); + pm_buffer_append_source(output_buffer, parser->start + location->start, (size_t) location->length, PM_BUFFER_ESCAPING_RUBY); pm_buffer_append_string(output_buffer, "\"\n", 2); } <%- when Prism::Template::UInt8Field -%> diff --git a/prism/templates/src/serialize.c.erb b/prism/templates/src/serialize.c.erb index 0f0aace445a680..958b0fd7cf075c 100644 --- a/prism/templates/src/serialize.c.erb +++ b/prism/templates/src/serialize.c.erb @@ -20,13 +20,9 @@ pm_sizet_to_u32(size_t value) { } static void -pm_serialize_location(const pm_parser_t *parser, const pm_location_t *location, pm_buffer_t *buffer) { - assert(location->start); - assert(location->end); - assert(location->start <= location->end); - - pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(location->start - parser->start)); - pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(location->end - location->start)); +pm_serialize_location(const pm_location_t *location, pm_buffer_t *buffer) { + pm_buffer_append_varuint(buffer, location->start); + pm_buffer_append_varuint(buffer, location->length); } static void @@ -77,7 +73,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { <%- if Prism::Template::INCLUDE_NODE_ID -%> pm_buffer_append_varuint(buffer, node->node_id); <%- end -%> - pm_serialize_location(parser, &node->location, buffer); + pm_serialize_location(&node->location, buffer); switch (PM_NODE_TYPE(node)) { // We do not need to serialize a ScopeNode ever as @@ -123,15 +119,15 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { } <%- when Prism::Template::LocationField -%> <%- if field.should_be_serialized? -%> - pm_serialize_location(parser, &((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer); + pm_serialize_location(&((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer); <%- end -%> <%- when Prism::Template::OptionalLocationField -%> <%- if field.should_be_serialized? -%> - if (((pm_<%= node.human %>_t *)node)-><%= field.name %>.start == NULL) { + if (((pm_<%= node.human %>_t *)node)-><%= field.name %>.length == 0) { pm_buffer_append_byte(buffer, 0); } else { pm_buffer_append_byte(buffer, 1); - pm_serialize_location(parser, &((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer); + pm_serialize_location(&((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer); } <%- end -%> <%- when Prism::Template::UInt8Field -%> @@ -169,60 +165,60 @@ pm_serialize_newline_list(pm_newline_list_t *list, pm_buffer_t *buffer) { } static void -pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *buffer) { +pm_serialize_comment(pm_comment_t *comment, pm_buffer_t *buffer) { // serialize type pm_buffer_append_byte(buffer, (uint8_t) comment->type); // serialize location - pm_serialize_location(parser, &comment->location, buffer); + pm_serialize_location(&comment->location, buffer); } /** * Serialize the given list of comments to the given buffer. */ void -pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) { +pm_serialize_comment_list(pm_list_t *list, pm_buffer_t *buffer) { pm_buffer_append_varuint(buffer, pm_sizet_to_u32(pm_list_size(list))); pm_comment_t *comment; for (comment = (pm_comment_t *) list->head; comment != NULL; comment = (pm_comment_t *) comment->node.next) { - pm_serialize_comment(parser, comment, buffer); + pm_serialize_comment(comment, buffer); } } static void -pm_serialize_magic_comment(pm_parser_t *parser, pm_magic_comment_t *magic_comment, pm_buffer_t *buffer) { +pm_serialize_magic_comment(pm_magic_comment_t *magic_comment, pm_buffer_t *buffer) { // serialize key location - pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(magic_comment->key_start - parser->start)); - pm_buffer_append_varuint(buffer, pm_sizet_to_u32(magic_comment->key_length)); + pm_buffer_append_varuint(buffer, magic_comment->key.start); + pm_buffer_append_varuint(buffer, magic_comment->key.length); // serialize value location - pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(magic_comment->value_start - parser->start)); - pm_buffer_append_varuint(buffer, pm_sizet_to_u32(magic_comment->value_length)); + pm_buffer_append_varuint(buffer, magic_comment->value.start); + pm_buffer_append_varuint(buffer, magic_comment->value.length); } static void -pm_serialize_magic_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) { +pm_serialize_magic_comment_list(pm_list_t *list, pm_buffer_t *buffer) { pm_buffer_append_varuint(buffer, pm_sizet_to_u32(pm_list_size(list))); pm_magic_comment_t *magic_comment; for (magic_comment = (pm_magic_comment_t *) list->head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) { - pm_serialize_magic_comment(parser, magic_comment, buffer); + pm_serialize_magic_comment(magic_comment, buffer); } } static void pm_serialize_data_loc(const pm_parser_t *parser, pm_buffer_t *buffer) { - if (parser->data_loc.end == NULL) { + if (parser->data_loc.length == 0) { pm_buffer_append_byte(buffer, 0); } else { pm_buffer_append_byte(buffer, 1); - pm_serialize_location(parser, &parser->data_loc, buffer); + pm_serialize_location(&parser->data_loc, buffer); } } static void -pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buffer_t *buffer) { +pm_serialize_diagnostic(pm_diagnostic_t *diagnostic, pm_buffer_t *buffer) { // serialize the type pm_buffer_append_varuint(buffer, (uint32_t) diagnostic->diag_id); @@ -232,18 +228,18 @@ pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buf pm_buffer_append_string(buffer, diagnostic->message, message_length); // serialize location - pm_serialize_location(parser, &diagnostic->location, buffer); + pm_serialize_location(&diagnostic->location, buffer); pm_buffer_append_byte(buffer, diagnostic->level); } static void -pm_serialize_diagnostic_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) { +pm_serialize_diagnostic_list(pm_list_t *list, pm_buffer_t *buffer) { pm_buffer_append_varuint(buffer, pm_sizet_to_u32(pm_list_size(list))); pm_diagnostic_t *diagnostic; for (diagnostic = (pm_diagnostic_t *) list->head; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) { - pm_serialize_diagnostic(parser, diagnostic, buffer); + pm_serialize_diagnostic(diagnostic, buffer); } } @@ -263,12 +259,12 @@ pm_serialize_metadata(pm_parser_t *parser, pm_buffer_t *buffer) { pm_buffer_append_varsint(buffer, parser->start_line); pm_serialize_newline_list(&parser->newline_list, buffer); <%- unless Prism::Template::SERIALIZE_ONLY_SEMANTICS_FIELDS -%> - pm_serialize_comment_list(parser, &parser->comment_list, buffer); + pm_serialize_comment_list(&parser->comment_list, buffer); <%- end -%> - pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer); + pm_serialize_magic_comment_list(&parser->magic_comment_list, buffer); pm_serialize_data_loc(parser, buffer); - pm_serialize_diagnostic_list(parser, &parser->error_list, buffer); - pm_serialize_diagnostic_list(parser, &parser->warning_list, buffer); + pm_serialize_diagnostic_list(&parser->error_list, buffer); + pm_serialize_diagnostic_list(&parser->warning_list, buffer); } #line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>" diff --git a/prism/templates/src/token_type.c.erb b/prism/templates/src/token_type.c.erb index f196393ee12661..5c6f2713100f91 100644 --- a/prism/templates/src/token_type.c.erb +++ b/prism/templates/src/token_type.c.erb @@ -31,10 +31,6 @@ pm_token_type_human(pm_token_type_t token_type) { switch (token_type) { case PM_TOKEN_EOF: return "end-of-input"; - case PM_TOKEN_MISSING: - return "missing token"; - case PM_TOKEN_NOT_PROVIDED: - return "not provided token"; case PM_TOKEN_AMPERSAND: return "'&'"; case PM_TOKEN_AMPERSAND_AMPERSAND: diff --git a/prism/util/pm_char.c b/prism/util/pm_char.c index a51dc11645ff63..748582b7fe10ae 100644 --- a/prism/util/pm_char.c +++ b/prism/util/pm_char.c @@ -83,15 +83,15 @@ pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length) { * searching past the given maximum number of characters. */ size_t -pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list) { +pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list, uint32_t start_offset) { if (length <= 0) return 0; - size_t size = 0; - size_t maximum = (size_t) length; + uint32_t size = 0; + uint32_t maximum = (uint32_t) length; while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_WHITESPACE)) { if (string[size] == '\n') { - pm_newline_list_append(newline_list, string + size); + pm_newline_list_append(newline_list, start_offset + size + 1); } size++; diff --git a/prism/util/pm_char.h b/prism/util/pm_char.h index deeafd632177da..b213e8edeefbe8 100644 --- a/prism/util/pm_char.h +++ b/prism/util/pm_char.h @@ -31,10 +31,12 @@ size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length); * @param string The string to search. * @param length The maximum number of characters to search. * @param newline_list The list of newlines to populate. + * @param start_offset The offset at which the string occurs in the source, for + * the purpose of tracking newlines. * @return The number of characters at the start of the string that are * whitespace. */ -size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list); +size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list, uint32_t start_offset); /** * Returns the number of characters at the start of the string that are inline diff --git a/prism/util/pm_newline_list.c b/prism/util/pm_newline_list.c index 8331618f54ff20..89c294a6d777e0 100644 --- a/prism/util/pm_newline_list.c +++ b/prism/util/pm_newline_list.c @@ -5,12 +5,10 @@ * allocation of the offsets succeeds, otherwise returns false. */ bool -pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity) { - list->offsets = (size_t *) xcalloc(capacity, sizeof(size_t)); +pm_newline_list_init(pm_newline_list_t *list, size_t capacity) { + list->offsets = (uint32_t *) xcalloc(capacity, sizeof(uint32_t)); if (list->offsets == NULL) return false; - list->start = start; - // This is 1 instead of 0 because we want to include the first line of the // file as having offset 0, which is set because of calloc. list->size = 1; @@ -32,24 +30,20 @@ pm_newline_list_clear(pm_newline_list_t *list) { * the offsets succeeds (if one was necessary), otherwise returns false. */ bool -pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) { +pm_newline_list_append(pm_newline_list_t *list, uint32_t cursor) { if (list->size == list->capacity) { - size_t *original_offsets = list->offsets; + uint32_t *original_offsets = list->offsets; list->capacity = (list->capacity * 3) / 2; - list->offsets = (size_t *) xcalloc(list->capacity, sizeof(size_t)); + list->offsets = (uint32_t *) xcalloc(list->capacity, sizeof(uint32_t)); if (list->offsets == NULL) return false; - memcpy(list->offsets, original_offsets, list->size * sizeof(size_t)); + memcpy(list->offsets, original_offsets, list->size * sizeof(uint32_t)); xfree(original_offsets); } - assert(*cursor == '\n'); - assert(cursor >= list->start); - size_t newline_offset = (size_t) (cursor - list->start + 1); - - assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]); - list->offsets[list->size++] = newline_offset; + assert(list->size == 0 || cursor > list->offsets[list->size - 1]); + list->offsets[list->size++] = cursor; return true; } @@ -59,21 +53,18 @@ pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) { * line of the closest offset less than the given offset is returned. */ int32_t -pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line) { - assert(cursor >= list->start); - size_t offset = (size_t) (cursor - list->start); - +pm_newline_list_line(const pm_newline_list_t *list, uint32_t cursor, int32_t start_line) { size_t left = 0; size_t right = list->size - 1; while (left <= right) { size_t mid = left + (right - left) / 2; - if (list->offsets[mid] == offset) { + if (list->offsets[mid] == cursor) { return ((int32_t) mid) + start_line; } - if (list->offsets[mid] < offset) { + if (list->offsets[mid] < cursor) { left = mid + 1; } else { right = mid - 1; @@ -89,21 +80,18 @@ pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32 * are returned. */ pm_line_column_t -pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line) { - assert(cursor >= list->start); - size_t offset = (size_t) (cursor - list->start); - +pm_newline_list_line_column(const pm_newline_list_t *list, uint32_t cursor, int32_t start_line) { size_t left = 0; size_t right = list->size - 1; while (left <= right) { size_t mid = left + (right - left) / 2; - if (list->offsets[mid] == offset) { + if (list->offsets[mid] == cursor) { return ((pm_line_column_t) { ((int32_t) mid) + start_line, 0 }); } - if (list->offsets[mid] < offset) { + if (list->offsets[mid] < cursor) { left = mid + 1; } else { right = mid - 1; @@ -112,7 +100,7 @@ pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor return ((pm_line_column_t) { .line = ((int32_t) left) + start_line - 1, - .column = (uint32_t) (offset - list->offsets[left - 1]) + .column = cursor - list->offsets[left - 1] }); } diff --git a/prism/util/pm_newline_list.h b/prism/util/pm_newline_list.h index 406abe8ba59062..dd3e625089ba3e 100644 --- a/prism/util/pm_newline_list.h +++ b/prism/util/pm_newline_list.h @@ -26,9 +26,6 @@ * sorted/inserted in ascending order. */ typedef struct { - /** A pointer to the start of the source string. */ - const uint8_t *start; - /** The number of offsets in the list. */ size_t size; @@ -36,7 +33,7 @@ typedef struct { size_t capacity; /** The list of offsets. */ - size_t *offsets; + uint32_t *offsets; } pm_newline_list_t; /** @@ -46,7 +43,7 @@ typedef struct { /** The line number. */ int32_t line; - /** The column number. */ + /** The column in bytes. */ uint32_t column; } pm_line_column_t; @@ -55,41 +52,39 @@ typedef struct { * allocation of the offsets succeeds, otherwise returns false. * * @param list The list to initialize. - * @param start A pointer to the start of the source string. * @param capacity The initial capacity of the list. * @return True if the allocation of the offsets succeeds, otherwise false. */ -bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity); +bool pm_newline_list_init(pm_newline_list_t *list, size_t capacity); /** * Clear out the newlines that have been appended to the list. * * @param list The list to clear. */ -void -pm_newline_list_clear(pm_newline_list_t *list); +void pm_newline_list_clear(pm_newline_list_t *list); /** * Append a new offset to the newline list. Returns true if the reallocation of * the offsets succeeds (if one was necessary), otherwise returns false. * * @param list The list to append to. - * @param cursor A pointer to the offset to append. + * @param cursor The offset to append. * @return True if the reallocation of the offsets succeeds (if one was * necessary), otherwise false. */ -bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor); +bool pm_newline_list_append(pm_newline_list_t *list, uint32_t cursor); /** * Returns the line of the given offset. If the offset is not in the list, the * line of the closest offset less than the given offset is returned. * * @param list The list to search. - * @param cursor A pointer to the offset to search for. + * @param cursor The offset to search for. * @param start_line The line to start counting from. * @return The line of the given offset. */ -int32_t pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line); +int32_t pm_newline_list_line(const pm_newline_list_t *list, uint32_t cursor, int32_t start_line); /** * Returns the line and column of the given offset. If the offset is not in the @@ -97,11 +92,11 @@ int32_t pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *curso * are returned. * * @param list The list to search. - * @param cursor A pointer to the offset to search for. + * @param cursor The offset to search for. * @param start_line The line to start counting from. * @return The line and column of the given offset. */ -pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line); +pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, uint32_t cursor, int32_t start_line); /** * Free the internal memory allocated for the newline list. diff --git a/prism/util/pm_strpbrk.c b/prism/util/pm_strpbrk.c index 916a4cc3fd3c16..60c67b29831344 100644 --- a/prism/util/pm_strpbrk.c +++ b/prism/util/pm_strpbrk.c @@ -4,22 +4,22 @@ * Add an invalid multibyte character error to the parser. */ static inline void -pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) { - pm_diagnostic_list_append_format(&parser->error_list, start, end, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *start); +pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, uint32_t start, uint32_t length) { + pm_diagnostic_list_append_format(&parser->error_list, start, length, PM_ERR_INVALID_MULTIBYTE_CHARACTER, parser->start[start]); } /** * Set the explicit encoding for the parser to the current encoding. */ static inline void -pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, const uint8_t *source, size_t width) { +pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, uint32_t start, uint32_t length) { if (parser->explicit_encoding != NULL) { if (parser->explicit_encoding == parser->encoding) { // Okay, we already locked to this encoding. } else if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) { // Not okay, we already found a Unicode escape sequence and this // conflicts. - pm_diagnostic_list_append_format(&parser->error_list, source, source + width, PM_ERR_MIXED_ENCODING, parser->encoding->name); + pm_diagnostic_list_append_format(&parser->error_list, start, length, PM_ERR_MIXED_ENCODING, parser->encoding->name); } else { // Should not be anything else. assert(false && "unreachable"); @@ -61,7 +61,7 @@ pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *chars index++; } while (index < maximum && pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index)) == 0); - pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index); + pm_strpbrk_invalid_multibyte_character(parser, (uint32_t) ((source + start) - parser->start), (uint32_t) (index - start)); } } } @@ -81,7 +81,7 @@ pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t return source + index; } - if (validate && source[index] >= 0x80) pm_strpbrk_explicit_encoding_set(parser, source, 1); + if (validate && source[index] >= 0x80) pm_strpbrk_explicit_encoding_set(parser, (uint32_t) (source - parser->start), 1); index++; } @@ -105,7 +105,7 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t index++; } else { size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index)); - if (validate) pm_strpbrk_explicit_encoding_set(parser, source, width); + if (validate) pm_strpbrk_explicit_encoding_set(parser, (uint32_t) (source - parser->start), (uint32_t) width); if (width > 0) { index += width; @@ -122,7 +122,7 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t index++; } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0); - pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index); + pm_strpbrk_invalid_multibyte_character(parser, (uint32_t) ((source + start) - parser->start), (uint32_t) (index - start)); } } } @@ -148,7 +148,7 @@ pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t index++; } else { size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index)); - pm_strpbrk_explicit_encoding_set(parser, source, width); + pm_strpbrk_explicit_encoding_set(parser, (uint32_t) (source - parser->start), (uint32_t) width); if (width > 0) { index += width; @@ -163,7 +163,7 @@ pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t index++; } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0); - pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index); + pm_strpbrk_invalid_multibyte_character(parser, (uint32_t) ((source + start) - parser->start), (uint32_t) (index - start)); } } } diff --git a/prism/version.h b/prism/version.h index 0ef7435c1741e7..b95611f96c78e8 100644 --- a/prism/version.h +++ b/prism/version.h @@ -14,7 +14,7 @@ /** * The minor version of the Prism library as an int. */ -#define PRISM_VERSION_MINOR 8 +#define PRISM_VERSION_MINOR 9 /** * The patch version of the Prism library as an int. @@ -24,6 +24,6 @@ /** * The version of the Prism library as a constant string. */ -#define PRISM_VERSION "1.8.0" +#define PRISM_VERSION "1.9.0" #endif diff --git a/prism_compile.c b/prism_compile.c index 788968113090cd..771db13f8909e7 100644 --- a/prism_compile.c +++ b/prism_compile.c @@ -144,7 +144,7 @@ pm_iseq_add_setlocal(rb_iseq_t *iseq, LINK_ANCHOR *const seq, int line, int node ((pm_node_location_t) { .line = pm_newline_list_line(&(parser)->newline_list, ((const pm_node_t *) (node))->location.start, (parser)->start_line), .node_id = ((const pm_node_t *) (node))->node_id }) #define PM_NODE_END_LOCATION(parser, node) \ - ((pm_node_location_t) { .line = pm_newline_list_line(&(parser)->newline_list, ((const pm_node_t *) (node))->location.end, (parser)->start_line), .node_id = ((const pm_node_t *) (node))->node_id }) + ((pm_node_location_t) { .line = pm_newline_list_line(&(parser)->newline_list, ((const pm_node_t *) (node))->location.start + ((const pm_node_t *) (node))->location.length, (parser)->start_line), .node_id = ((const pm_node_t *) (node))->node_id }) #define PM_LOCATION_START_LOCATION(parser, location, id) \ ((pm_node_location_t) { .line = pm_newline_list_line(&(parser)->newline_list, (location)->start, (parser)->start_line), .node_id = id }) @@ -153,7 +153,7 @@ pm_iseq_add_setlocal(rb_iseq_t *iseq, LINK_ANCHOR *const seq, int line, int node pm_newline_list_line_column(&(parser)->newline_list, ((const pm_node_t *) (node))->location.start, (parser)->start_line) #define PM_NODE_END_LINE_COLUMN(parser, node) \ - pm_newline_list_line_column(&(parser)->newline_list, ((const pm_node_t *) (node))->location.end, (parser)->start_line) + pm_newline_list_line_column(&(parser)->newline_list, ((const pm_node_t *) (node))->location.start + ((const pm_node_t *) (node))->location.length, (parser)->start_line) #define PM_LOCATION_START_LINE_COLUMN(parser, location) \ pm_newline_list_line_column(&(parser)->newline_list, (location)->start, (parser)->start_line) @@ -3231,7 +3231,7 @@ pm_scope_node_init(const pm_node_t *node, pm_scope_node_t *scope, pm_scope_node_ scope->base.type = PM_SCOPE_NODE; scope->base.location.start = node->location.start; - scope->base.location.end = node->location.end; + scope->base.location.length = node->location.length; scope->previous = previous; scope->ast_node = (pm_node_t *) node; @@ -3272,7 +3272,7 @@ pm_scope_node_init(const pm_node_t *node, pm_scope_node_t *scope, pm_scope_node_ if (cast->statements != NULL) { scope->base.location.start = cast->statements->base.location.start; - scope->base.location.end = cast->statements->base.location.end; + scope->base.location.length = cast->statements->base.location.length; } break; @@ -3652,7 +3652,7 @@ static void pm_compile_call(rb_iseq_t *iseq, const pm_call_node_t *call_node, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node, ID method_id, LABEL *start) { const pm_location_t *message_loc = &call_node->message_loc; - if (message_loc->start == NULL) message_loc = &call_node->base.location; + if (message_loc->length == 0) message_loc = &call_node->base.location; const pm_node_location_t location = PM_LOCATION_START_LOCATION(scope_node->parser, message_loc, call_node->base.node_id); @@ -3666,16 +3666,34 @@ pm_compile_call(rb_iseq_t *iseq, const pm_call_node_t *call_node, LINK_ANCHOR *c if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) { if (PM_BRANCH_COVERAGE_P(iseq)) { - const uint8_t *cursors[3] = { - call_node->closing_loc.end, - call_node->arguments == NULL ? NULL : call_node->arguments->base.location.end, - call_node->message_loc.end - }; + uint32_t end_cursor; + bool end_found = false; + + if (call_node->closing_loc.length > 0) { + uint32_t cursor = call_node->closing_loc.start + call_node->closing_loc.length; + end_cursor = cursor; + end_found = true; + } + + if (call_node->arguments != NULL) { + uint32_t cursor = call_node->arguments->base.location.start + call_node->arguments->base.location.length; + if (!end_found || cursor > end_cursor) { + end_cursor = cursor; + end_found = true; + } + } - const uint8_t *end_cursor = cursors[0]; - end_cursor = (end_cursor == NULL || cursors[1] == NULL) ? cursors[1] : (end_cursor > cursors[1] ? end_cursor : cursors[1]); - end_cursor = (end_cursor == NULL || cursors[2] == NULL) ? cursors[2] : (end_cursor > cursors[2] ? end_cursor : cursors[2]); - if (!end_cursor) end_cursor = call_node->closing_loc.end; + if (call_node->message_loc.length > 0) { + uint32_t cursor = call_node->message_loc.start + call_node->message_loc.length; + if (!end_found || cursor > end_cursor) { + end_cursor = cursor; + end_found = true; + } + } + + if (!end_found) { + end_cursor = call_node->closing_loc.start + call_node->closing_loc.length; + } const pm_line_column_t start_location = PM_NODE_START_LINE_COLUMN(scope_node->parser, call_node); const pm_line_column_t end_location = pm_newline_list_line_column(&scope_node->parser->newline_list, end_cursor, scope_node->parser->start_line); @@ -3822,9 +3840,9 @@ pm_compile_call(rb_iseq_t *iseq, const pm_call_node_t *call_node, LINK_ANCHOR *c * node. */ static inline VALUE -pm_compile_back_reference_ref(const pm_back_reference_read_node_t *node) +pm_compile_back_reference_ref(const pm_scope_node_t *scope_node, const pm_back_reference_read_node_t *node) { - const char *type = (const char *) (node->base.location.start + 1); + const char *type = (const char *) (scope_node->parser->start + node->base.location.start + 1); // Since a back reference is `$`, Ruby represents the ID as an // rb_intern on the value after the `$`. @@ -4215,7 +4233,7 @@ pm_compile_defined_expr0(rb_iseq_t *iseq, const pm_node_t *node, const pm_node_l // defined?($+) // ^^ const pm_back_reference_read_node_t *cast = (const pm_back_reference_read_node_t *) node; - VALUE ref = pm_compile_back_reference_ref(cast); + VALUE ref = pm_compile_back_reference_ref(scope_node, cast); PUSH_INSN(ret, location, putnil); PUSH_INSN3(ret, location, defined, INT2FIX(DEFINED_REF), ref, PUSH_VAL(DEFINED_GVAR)); @@ -7066,13 +7084,13 @@ pm_compile_alias_global_variable_node(rb_iseq_t *iseq, const pm_alias_global_var { const pm_location_t *name_loc = &node->new_name->location; - VALUE operand = ID2SYM(rb_intern3((const char *) name_loc->start, name_loc->end - name_loc->start, scope_node->encoding)); + VALUE operand = ID2SYM(rb_intern3((const char *) (scope_node->parser->start + name_loc->start), name_loc->length, scope_node->encoding)); PUSH_INSN1(ret, *location, putobject, operand); } { const pm_location_t *name_loc = &node->old_name->location; - VALUE operand = ID2SYM(rb_intern3((const char *) name_loc->start, name_loc->end - name_loc->start, scope_node->encoding)); + VALUE operand = ID2SYM(rb_intern3((const char *) (scope_node->parser->start + name_loc->start), name_loc->length, scope_node->encoding)); PUSH_INSN1(ret, *location, putobject, operand); } @@ -7351,7 +7369,7 @@ pm_compile_call_node(rb_iseq_t *iseq, const pm_call_node_t *node, LINK_ANCHOR *c ID method_id = pm_constant_id_lookup(scope_node, node->name); const pm_location_t *message_loc = &node->message_loc; - if (message_loc->start == NULL) message_loc = &node->base.location; + if (message_loc->length == 0) message_loc = &node->base.location; const pm_node_location_t location = PM_LOCATION_START_LOCATION(scope_node->parser, message_loc, node->base.node_id); const char *builtin_func; @@ -8719,7 +8737,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // ^^ if (!popped) { const pm_back_reference_read_node_t *cast = (const pm_back_reference_read_node_t *) node; - VALUE backref = pm_compile_back_reference_ref(cast); + VALUE backref = pm_compile_back_reference_ref(scope_node, cast); PUSH_INSN2(ret, location, getspecial, INT2FIX(1), backref); } @@ -10578,7 +10596,7 @@ pm_parse_errors_format_sort(const pm_parser_t *parser, const pm_list_t *error_li for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != finish; error = (pm_diagnostic_t *) error->node.next) { pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start, start_line); - pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end, start_line); + pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.start + error->location.length, start_line); // We're going to insert this error into the array in sorted order. We // do this by finding the first error that has a line number greater @@ -10942,7 +10960,7 @@ static bool pm_parse_process_error_utf8_p(const pm_parser_t *parser, const pm_location_t *location) { const size_t start_line = pm_newline_list_line_column(&parser->newline_list, location->start, 1).line; - const size_t end_line = pm_newline_list_line_column(&parser->newline_list, location->end, 1).line; + const size_t end_line = pm_newline_list_line_column(&parser->newline_list, location->start + location->length, 1).line; const uint8_t *start = parser->start + parser->newline_list.offsets[start_line - 1]; const uint8_t *end = ((end_line == parser->newline_list.size) ? parser->end : (parser->start + parser->newline_list.offsets[end_line])); @@ -11312,7 +11330,7 @@ pm_read_file(pm_string_t *string, const char *filepath) } size_t length = (size_t) len; - uint8_t *source = malloc(length); + uint8_t *source = malloc(length); // FIXME: using raw malloc because that's what Prism uses. memcpy(source, RSTRING_PTR(contents), length); *string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = length }; diff --git a/process.c b/process.c index 19f3172cb82ec6..006611d525e68f 100644 --- a/process.c +++ b/process.c @@ -4233,7 +4233,7 @@ rb_proc__fork(VALUE _obj) * puts "Before the fork: #{Process.pid}" * fork do * puts "In the child process: #{Process.pid}" - * end # => 382141 + * end # => 420520 * puts "After the fork: #{Process.pid}" * * Output: diff --git a/ractor.c b/ractor.c index 2dcbbd10a054bc..c90e5e16df1897 100644 --- a/ractor.c +++ b/ractor.c @@ -1211,7 +1211,8 @@ enum obj_traverse_iterator_result { traverse_stop, }; -typedef enum obj_traverse_iterator_result (*rb_obj_traverse_enter_func)(VALUE obj); +struct obj_traverse_data; +typedef enum obj_traverse_iterator_result (*rb_obj_traverse_enter_func)(VALUE obj, struct obj_traverse_data *data); typedef enum obj_traverse_iterator_result (*rb_obj_traverse_leave_func)(VALUE obj); typedef enum obj_traverse_iterator_result (*rb_obj_traverse_final_func)(VALUE obj); @@ -1222,13 +1223,15 @@ struct obj_traverse_data { rb_obj_traverse_leave_func leave_func; st_table *rec; - VALUE rec_hash; + VALUE rec_hash; // objects seen during traversal + VALUE *chain; // reference chain string built during unwinding (NULL if not needed) + VALUE *exception; // exception raised trying to freeze an object }; - struct obj_traverse_callback_data { bool stop; struct obj_traverse_data *data; + VALUE obj; }; static int obj_traverse_i(VALUE obj, struct obj_traverse_data *data); @@ -1239,11 +1242,13 @@ obj_hash_traverse_i(VALUE key, VALUE val, VALUE ptr) struct obj_traverse_callback_data *d = (struct obj_traverse_callback_data *)ptr; if (obj_traverse_i(key, d->data)) { + rb_ractor_error_chain_append(d->data->chain, "\n from Hash key %+"PRIsVALUE, key); d->stop = true; return ST_STOP; } if (obj_traverse_i(val, d->data)) { + rb_ractor_error_chain_append(d->data->chain, "\n from Hash value at key %+"PRIsVALUE, key); d->stop = true; return ST_STOP; } @@ -1277,6 +1282,9 @@ obj_traverse_ivar_foreach_i(ID key, VALUE val, st_data_t ptr) struct obj_traverse_callback_data *d = (struct obj_traverse_callback_data *)ptr; if (obj_traverse_i(val, d->data)) { + rb_ractor_error_chain_append(d->data->chain, + "\n from instance variable %"PRIsVALUE" of an instance of %"PRIsVALUE, + rb_id2str(key), rb_class_real(CLASS_OF(d->obj))); d->stop = true; return ST_STOP; } @@ -1289,7 +1297,7 @@ obj_traverse_i(VALUE obj, struct obj_traverse_data *data) { if (RB_SPECIAL_CONST_P(obj)) return 0; - switch (data->enter_func(obj)) { + switch (data->enter_func(obj, data)) { case traverse_cont: break; case traverse_skip: return 0; // skip children case traverse_stop: return 1; // stop search @@ -1304,9 +1312,12 @@ obj_traverse_i(VALUE obj, struct obj_traverse_data *data) struct obj_traverse_callback_data d = { .stop = false, .data = data, + .obj = obj, }; rb_ivar_foreach(obj, obj_traverse_ivar_foreach_i, (st_data_t)&d); - if (d.stop) return 1; + if (d.stop) { + return 1; + } switch (BUILTIN_TYPE(obj)) { // no child node @@ -1328,14 +1339,26 @@ obj_traverse_i(VALUE obj, struct obj_traverse_data *data) for (int i = 0; i < RARRAY_LENINT(obj); i++) { VALUE e = rb_ary_entry(obj, i); - if (obj_traverse_i(e, data)) return 1; + if (obj_traverse_i(e, data)) { + rb_ractor_error_chain_append(data->chain, "\n from Array element at index %d", i); + return 1; + } } } break; case T_HASH: { - if (obj_traverse_i(RHASH_IFNONE(obj), data)) return 1; + const VALUE ifnone = RHASH_IFNONE(obj); + if (obj_traverse_i(ifnone, data)) { + if (RB_FL_TEST_RAW(obj, RHASH_PROC_DEFAULT)) { + rb_ractor_error_chain_append(data->chain, "\n from Hash default proc"); + } + else { + rb_ractor_error_chain_append(data->chain, "\n from Hash default value"); + } + return 1; + } struct obj_traverse_callback_data d = { .stop = false, @@ -1352,7 +1375,14 @@ obj_traverse_i(VALUE obj, struct obj_traverse_data *data) const VALUE *ptr = RSTRUCT_CONST_PTR(obj); for (long i=0; ichain, + "\n from member %+"PRIsVALUE" of an instance of %"PRIsVALUE, + member_name, rb_class_real(CLASS_OF(obj))); + return 1; + } } } break; @@ -1423,15 +1453,21 @@ static int rb_obj_traverse(VALUE obj, rb_obj_traverse_enter_func enter_func, rb_obj_traverse_leave_func leave_func, - rb_obj_traverse_final_func final_func) + rb_obj_traverse_final_func final_func, + VALUE *chain, + VALUE *exception) { struct obj_traverse_data data = { .enter_func = enter_func, .leave_func = leave_func, .rec = NULL, + .chain = chain, + .exception = exception, }; - if (obj_traverse_i(obj, &data)) return 1; + if (obj_traverse_i(obj, &data)) { + return 1; + } if (final_func && data.rec) { struct rb_obj_traverse_final_data f = {final_func, 0}; st_foreach(data.rec, obj_traverse_final_i, (st_data_t)&f); @@ -1456,14 +1492,45 @@ allow_frozen_shareable_p(VALUE obj) return false; } +static VALUE +try_freeze(VALUE obj) +{ + rb_funcall(obj, idFreeze, 0); + return Qtrue; +} + +struct rescue_freeze_data { + VALUE exception; +}; + +static VALUE +rescue_freeze(VALUE data, VALUE freeze_exception) +{ + struct rescue_freeze_data *rescue_freeze_data = (struct rescue_freeze_data *)data; + VALUE exception = rb_exc_new3(rb_eRactorError, rb_str_new_cstr("raised calling #freeze")); + rb_ivar_set(exception, rb_intern("cause"), freeze_exception); + rescue_freeze_data->exception = exception; + return Qfalse; +} + static enum obj_traverse_iterator_result -make_shareable_check_shareable_freeze(VALUE obj, enum obj_traverse_iterator_result result) +make_shareable_check_shareable_freeze(VALUE obj, enum obj_traverse_iterator_result result, struct obj_traverse_data *data) { if (!RB_OBJ_FROZEN_RAW(obj)) { - rb_funcall(obj, idFreeze, 0); + struct rescue_freeze_data rescue_freeze_data = { 0 }; + if (!rb_rescue(try_freeze, obj, rescue_freeze, (VALUE)&rescue_freeze_data)) { + if (data->exception) { + *data->exception = rescue_freeze_data.exception; + } + return traverse_stop; + } if (UNLIKELY(!RB_OBJ_FROZEN_RAW(obj))) { - rb_raise(rb_eRactorError, "#freeze does not freeze object correctly"); + VALUE exception = rb_exc_new3(rb_eRactorError, rb_str_new_cstr("#freeze does not freeze object correctly")); + if (data->exception) { + *data->exception = exception; + } + return traverse_stop; } if (RB_OBJ_SHAREABLE_P(obj)) { @@ -1477,7 +1544,7 @@ make_shareable_check_shareable_freeze(VALUE obj, enum obj_traverse_iterator_resu static int obj_refer_only_shareables_p(VALUE obj); static enum obj_traverse_iterator_result -make_shareable_check_shareable(VALUE obj) +make_shareable_check_shareable(VALUE obj, struct obj_traverse_data *data) { VM_ASSERT(!SPECIAL_CONST_P(obj)); @@ -1490,7 +1557,8 @@ make_shareable_check_shareable(VALUE obj) if (type->flags & RUBY_TYPED_FROZEN_SHAREABLE_NO_REC) { if (obj_refer_only_shareables_p(obj)) { - make_shareable_check_shareable_freeze(obj, traverse_skip); + enum obj_traverse_iterator_result result = make_shareable_check_shareable_freeze(obj, traverse_skip, data); + if (result == traverse_stop) return traverse_stop; RB_OBJ_SET_SHAREABLE(obj); return traverse_skip; } @@ -1500,11 +1568,19 @@ make_shareable_check_shareable(VALUE obj) } } else if (rb_obj_is_proc(obj)) { - rb_proc_ractor_make_shareable(obj, Qundef); + if (!rb_proc_ractor_make_shareable_continue(obj, Qundef, data->chain)) { + rb_proc_t *proc = (rb_proc_t *)RTYPEDDATA_DATA(obj); + if (proc->block.type != block_type_iseq) rb_raise(rb_eRuntimeError, "not supported yet"); + + if (data->exception) { + *data->exception = rb_exc_new3(rb_eRactorIsolationError, rb_sprintf("Proc's self is not shareable: %" PRIsVALUE, obj)); + } + return traverse_stop; + } return traverse_cont; } else { - rb_raise(rb_eRactorError, "can not make shareable object for %+"PRIsVALUE, obj); + return traverse_stop; } } @@ -1529,7 +1605,7 @@ make_shareable_check_shareable(VALUE obj) break; } - return make_shareable_check_shareable_freeze(obj, traverse_cont); + return make_shareable_check_shareable_freeze(obj, traverse_cont, data); } static enum obj_traverse_iterator_result @@ -1546,9 +1622,20 @@ mark_shareable(VALUE obj) VALUE rb_ractor_make_shareable(VALUE obj) { - rb_obj_traverse(obj, - make_shareable_check_shareable, - null_leave, mark_shareable); + VALUE chain = Qnil; + VALUE exception = Qfalse; + if (rb_obj_traverse(obj, make_shareable_check_shareable, null_leave, mark_shareable, &chain, &exception)) { + if (exception) { + VALUE id_mesg = rb_intern("mesg"); + VALUE message = rb_attr_get(exception, id_mesg); + message = rb_sprintf("%"PRIsVALUE"%"PRIsVALUE, message, chain); + rb_ivar_set(exception, id_mesg, message); + rb_exc_raise(exception); + } + rb_raise(rb_eRactorError, "can not make shareable object for %+"PRIsVALUE"%"PRIsVALUE, obj, chain); + } + RB_GC_GUARD(chain); + RB_GC_GUARD(exception); return obj; } @@ -1579,7 +1666,7 @@ rb_ractor_ensure_main_ractor(const char *msg) } static enum obj_traverse_iterator_result -shareable_p_enter(VALUE obj) +shareable_p_enter(VALUE obj, struct obj_traverse_data *data) { if (RB_OBJ_SHAREABLE_P(obj)) { return traverse_skip; @@ -1600,11 +1687,9 @@ shareable_p_enter(VALUE obj) } bool -rb_ractor_shareable_p_continue(VALUE obj) +rb_ractor_shareable_p_continue(VALUE obj, VALUE *chain) { - if (rb_obj_traverse(obj, - shareable_p_enter, null_leave, - mark_shareable)) { + if (rb_obj_traverse(obj, shareable_p_enter, null_leave, mark_shareable, chain, NULL)) { return false; } else { @@ -1620,7 +1705,7 @@ rb_ractor_setup_belonging(VALUE obj) } static enum obj_traverse_iterator_result -reset_belonging_enter(VALUE obj) +reset_belonging_enter(VALUE obj, struct obj_traverse_data *data) { if (rb_ractor_shareable_p(obj)) { return traverse_skip; @@ -1642,7 +1727,7 @@ static VALUE ractor_reset_belonging(VALUE obj) { #if RACTOR_CHECK_MODE > 0 - rb_obj_traverse(obj, reset_belonging_enter, null_leave, NULL); + rb_obj_traverse(obj, reset_belonging_enter, null_leave, NULL, NULL, NULL); #endif return obj; } diff --git a/ractor_core.h b/ractor_core.h index d112ff87244944..63ab853501adb8 100644 --- a/ractor_core.h +++ b/ractor_core.h @@ -149,7 +149,7 @@ st_table *rb_ractor_targeted_hooks(rb_ractor_t *cr); RUBY_SYMBOL_EXPORT_BEGIN void rb_ractor_finish_marking(void); -bool rb_ractor_shareable_p_continue(VALUE obj); +bool rb_ractor_shareable_p_continue(VALUE obj, VALUE *chain); // THIS FUNCTION SHOULD NOT CALL WHILE INCREMENTAL MARKING!! // This function is for T_DATA::free_func @@ -270,6 +270,24 @@ rb_ractor_targeted_hooks_cnt(rb_ractor_t *cr) return cr->pub.targeted_hooks_cnt; } +static inline void +rb_ractor_error_chain_append(VALUE *chain_ptr, const char *fmt, ...) +{ + if (!chain_ptr) return; + + va_list args; + va_start(args, fmt); + + if (NIL_P(*chain_ptr)) { + *chain_ptr = rb_vsprintf(fmt, args); + } + else { + rb_str_vcatf(*chain_ptr, fmt, args); + } + + va_end(args); +} + #if RACTOR_CHECK_MODE > 0 # define RACTOR_BELONGING_ID(obj) (*(uint32_t *)(((uintptr_t)(obj)) + rb_gc_obj_slot_size(obj))) diff --git a/range.c b/range.c index fd08a81de7b8b1..36afdfa7619005 100644 --- a/range.c +++ b/range.c @@ -2768,8 +2768,8 @@ range_overlap(VALUE range, VALUE other) * * First, what's elsewhere. Class \Range: * - * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. - * - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here], + * - Inherits from {class Object}[rdoc-ref:Object@Whats+Here]. + * - Includes {module Enumerable}[rdoc-ref:Enumerable@Whats+Here], * which provides dozens of additional methods. * * Here, class \Range provides methods that are useful for: diff --git a/ruby.c b/ruby.c index 28f43176d61d70..cd5c8d1d15d66e 100644 --- a/ruby.c +++ b/ruby.c @@ -2200,7 +2200,7 @@ prism_script(ruby_cmdline_options_t *opt, pm_parse_result_t *result) // If we found an __END__ marker, then we're going to define a global // DATA constant that is a file object that can be read to read the // contents after the marker. - if (NIL_P(error) && result->parser.data_loc.start != NULL) { + if (NIL_P(error) && result->parser.data_loc.length != 0) { rb_define_global_const("DATA", rb_stdin); } } @@ -2237,17 +2237,17 @@ prism_script(ruby_cmdline_options_t *opt, pm_parse_result_t *result) // If we found an __END__ marker, then we're going to define a global // DATA constant that is a file object that can be read to read the // contents after the marker. - if (NIL_P(error) && result->parser.data_loc.start != NULL) { + if (NIL_P(error) && result->parser.data_loc.length != 0) { int xflag = opt->xflag; VALUE file = open_load_file(script_name, &xflag); const pm_parser_t *parser = &result->parser; - size_t offset = parser->data_loc.start - parser->start + 7; + uint32_t offset = parser->data_loc.start + 7; if ((parser->start + offset < parser->end) && parser->start[offset] == '\r') offset++; if ((parser->start + offset < parser->end) && parser->start[offset] == '\n') offset++; - rb_funcall(file, rb_intern_const("seek"), 2, SIZET2NUM(offset), INT2FIX(SEEK_SET)); + rb_funcall(file, rb_intern_const("seek"), 2, UINT2NUM(offset), INT2FIX(SEEK_SET)); rb_define_global_const("DATA", file); } } diff --git a/set.c b/set.c index 4d8178ffc080de..0fcfb1ef14748d 100644 --- a/set.c +++ b/set.c @@ -141,7 +141,7 @@ set_mark(void *ptr) static void set_free_embedded(struct set_object *sobj) { - free((&sobj->table)->entries); + xfree((&sobj->table)->entries); } static void @@ -1187,7 +1187,7 @@ set_reset_table_with_type(VALUE set, const struct st_hash_type *type) set_iter(set, set_merge_i, (st_data_t)&args); set_free_embedded(sobj); memcpy(&sobj->table, new, sizeof(*new)); - free(new); + xfree(new); } else { sobj->table.type = type; @@ -2051,8 +2051,8 @@ rb_set_size(VALUE set) * * First, what's elsewhere. \Class \Set: * - * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. - * - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here], + * - Inherits from {class Object}[rdoc-ref:Object@Whats+Here]. + * - Includes {module Enumerable}[rdoc-ref:Enumerable@Whats+Here], * which provides dozens of additional methods. * * In particular, class \Set does not have many methods of its own diff --git a/spec/bundler/bundler/gem_helper_spec.rb b/spec/bundler/bundler/gem_helper_spec.rb index 94f66537d3ebf6..83c2dd237adcec 100644 --- a/spec/bundler/bundler/gem_helper_spec.rb +++ b/spec/bundler/bundler/gem_helper_spec.rb @@ -386,6 +386,7 @@ def sha512_hexdigest(path) credentials = double("credentials", "file?" => true) allow(Bundler.user_home).to receive(:join). with(".gem/credentials").and_return(credentials) + allow(Bundler.user_home).to receive(:join).and_call_original end describe "success messaging" do diff --git a/spec/bundler/install/gemfile/sources_spec.rb b/spec/bundler/install/gemfile/sources_spec.rb index 90f87ed0c5daea..69b0816a18999e 100644 --- a/spec/bundler/install/gemfile/sources_spec.rb +++ b/spec/bundler/install/gemfile/sources_spec.rb @@ -1195,4 +1195,45 @@ expect(gem_section).not_to include("activerecord (7.0.0)") end end + + context "when a scoped rubygems source is missing a transitive dependency" do + before do + build_repo2 do + build_gem "fallback_dep", "1.0.0" + build_gem "foo", "1.0.0" + end + + build_repo3 do + build_gem "private_parent", "1.0.0" do |s| + s.add_dependency "fallback_dep" + end + end + + gemfile <<-G + source "https://gem.repo2" + + gem "foo" + + source "https://gem.repo3" do + gem "private_parent", "1.0.0" + end + G + + bundle :install, artifice: "compact_index" + end + + it "falls back to the default rubygems source for that dependency" do + build_repo2 do + build_gem "foo", "2.0.0" + end + + system_gems [] + + bundle "update foo", artifice: "compact_index" + + expect(the_bundle).to include_gems("private_parent 1.0.0", "fallback_dep 1.0.0", "foo 2.0.0") + expect(the_bundle).to include_gems("private_parent 1.0.0", source: "remote3") + expect(the_bundle).to include_gems("fallback_dep 1.0.0", source: "remote2") + end + end end diff --git a/spec/bundler/support/path.rb b/spec/bundler/support/path.rb index 0a534dd40e44eb..679f54152b29d7 100644 --- a/spec/bundler/support/path.rb +++ b/spec/bundler/support/path.rb @@ -114,7 +114,17 @@ def tmp(*path) end def tmp_root - source_root.join("tmp") + if ruby_core? && (tmpdir = ENV["TMPDIR"]) + # Use realpath to resolve any symlinks in TMPDIR (e.g., on macOS /var -> /private/var) + real = begin + File.realpath(tmpdir) + rescue Errno::ENOENT, Errno::EACCES + tmpdir + end + Pathname(real) + else + source_root.join("tmp") + end end # Bump this version whenever you make a breaking change to the spec setup diff --git a/spec/bundler/support/rubygems_ext.rb b/spec/bundler/support/rubygems_ext.rb index 2d681529aac2ef..cf639a660a04fd 100644 --- a/spec/bundler/support/rubygems_ext.rb +++ b/spec/bundler/support/rubygems_ext.rb @@ -43,7 +43,18 @@ def test_setup # sign extension bundles on macOS, to avoid trying to find the specified key # from the fake $HOME/Library/Keychains directory. ENV.delete "RUBY_CODESIGN" - ENV["TMPDIR"] = Path.tmpdir.to_s + if Path.ruby_core? + if (tmpdir = ENV["TMPDIR"]) + tmpdir_real = begin + File.realpath(tmpdir) + rescue Errno::ENOENT, Errno::EACCES + tmpdir + end + ENV["TMPDIR"] = tmpdir_real if tmpdir_real != tmpdir + end + else + ENV["TMPDIR"] = Path.tmpdir.to_s + end require "rubygems/user_interaction" Gem::DefaultUserInteraction.ui = Gem::SilentUI.new diff --git a/spec/mspec/tool/sync/sync-rubyspec.rb b/spec/mspec/tool/sync/sync-rubyspec.rb index 617123733e5442..122de0decba9f6 100644 --- a/spec/mspec/tool/sync/sync-rubyspec.rb +++ b/spec/mspec/tool/sync/sync-rubyspec.rb @@ -190,20 +190,20 @@ def test_new_specs Dir.chdir(SOURCE_REPO) do workflow = YAML.load_file(".github/workflows/ci.yml") job_name = MSPEC ? "test" : "specs" - versions = workflow.dig("jobs", job_name, "strategy", "matrix", "ruby") + versions = workflow.dig("jobs", job_name, "strategy", "matrix", "ruby").map(&:to_s) versions = versions.grep(/^\d+\./) # Test on MRI min_version, max_version = versions.minmax test_command = MSPEC ? "bundle install && bundle exec rspec" : "../mspec/bin/mspec -j" run_test = -> version { - command = "chruby #{version} && #{test_command}" + command = "chruby ruby-#{version} && #{test_command}" sh ENV["SHELL"], "-c", command } run_test[min_version] run_test[max_version] - run_test["ruby-master"] if TEST_MASTER + run_test["master"] if TEST_MASTER end end diff --git a/spec/mspec/tool/tag_from_output.rb b/spec/mspec/tool/tag_from_output.rb index b6b46038556ae1..41aa70f932057f 100755 --- a/spec/mspec/tool/tag_from_output.rb +++ b/spec/mspec/tool/tag_from_output.rb @@ -20,7 +20,7 @@ NUMBER = /^\d+\)$/ ERROR_OR_FAILED = / (ERROR|FAILED)$/ -SPEC_FILE = /^(\/.+_spec\.rb)\:\d+/ +SPEC_FILE = /^((?:\/|[CD]:\/).+_spec\.rb)\:\d+/ output.slice_before(NUMBER).select { |number, *rest| number =~ NUMBER and rest.any? { |line| line =~ ERROR_OR_FAILED } diff --git a/spec/ruby/README.md b/spec/ruby/README.md index 674ada4c9e4cc9..14a0068346fe3d 100644 --- a/spec/ruby/README.md +++ b/spec/ruby/README.md @@ -64,6 +64,7 @@ For older specs try these commits: * Ruby 2.7.8 - [Suite](https://github.com/ruby/spec/commit/93787e6035c925b593a9c0c6fb0e7e07a6f1df1f) using [MSpec](https://github.com/ruby/mspec/commit/1d8cf64722d8a7529f7cd205be5f16a89b7a67fd) * Ruby 3.0.7 - [Suite](https://github.com/ruby/spec/commit/affef93d9940f615e4836f64b011da211f570913) using [MSpec](https://github.com/ruby/mspec/commit/0aabb3e548eb5ea6cad0125f8f46cee34542b6b7) * Ruby 3.1.6 - [Suite](https://github.com/ruby/spec/commit/ec960f2389d1c2265d32397fa8afa6d462014efc) using [MSpec](https://github.com/ruby/mspec/commit/484310dbed35b84c74484fd674602f88c42d063a) +* Ruby 3.2.9 - [Suite](https://github.com/ruby/spec/commit/97f076242b7fc6e60703e6a6053365065cd6fc30) using [MSpec](https://github.com/ruby/mspec/commit/54704795e21128a930af2021c72c49cb87065134) ### Running the specs diff --git a/spec/ruby/command_line/dash_r_spec.rb b/spec/ruby/command_line/dash_r_spec.rb index 9f673c53dcc097..62b8dc001452a7 100644 --- a/spec/ruby/command_line/dash_r_spec.rb +++ b/spec/ruby/command_line/dash_r_spec.rb @@ -16,10 +16,7 @@ out = ruby_exe(fixture(__FILE__, "bad_syntax.rb"), options: "-r #{@test_file}", args: "2>&1", exit_status: 1) $?.should_not.success? out.should include("REQUIRED") - - # it's tempting not to rely on error message and rely only on exception class name, - # but CRuby before 3.2 doesn't print class name for syntax error - out.should include_any_of("syntax error", "SyntaxError") + out.should include("SyntaxError") end it "does not require the file if the main script file does not exist" do diff --git a/spec/ruby/command_line/syntax_error_spec.rb b/spec/ruby/command_line/syntax_error_spec.rb index 9ba87b9e22795b..88864c048ebfee 100644 --- a/spec/ruby/command_line/syntax_error_spec.rb +++ b/spec/ruby/command_line/syntax_error_spec.rb @@ -3,17 +3,11 @@ describe "The interpreter" do it "prints an error when given a file with invalid syntax" do out = ruby_exe(fixture(__FILE__, "bad_syntax.rb"), args: "2>&1", exit_status: 1) - - # it's tempting not to rely on error message and rely only on exception class name, - # but CRuby before 3.2 doesn't print class name for syntax error - out.should include_any_of("syntax error", "SyntaxError") + out.should.include?("SyntaxError") end it "prints an error when given code via -e with invalid syntax" do out = ruby_exe(nil, args: "-e 'a{' 2>&1", exit_status: 1) - - # it's tempting not to rely on error message and rely only on exception class name, - # but CRuby before 3.2 doesn't print class name for syntax error - out.should include_any_of("syntax error", "SyntaxError") + out.should.include?("SyntaxError") end end diff --git a/spec/ruby/core/array/fetch_spec.rb b/spec/ruby/core/array/fetch_spec.rb index b81c0b48d7296d..598b481ba46a11 100644 --- a/spec/ruby/core/array/fetch_spec.rb +++ b/spec/ruby/core/array/fetch_spec.rb @@ -12,9 +12,9 @@ end it "raises an IndexError if there is no element at index" do - -> { [1, 2, 3].fetch(3) }.should raise_error(IndexError) - -> { [1, 2, 3].fetch(-4) }.should raise_error(IndexError) - -> { [].fetch(0) }.should raise_error(IndexError) + -> { [1, 2, 3].fetch(3) }.should raise_error(IndexError, "index 3 outside of array bounds: -3...3") + -> { [1, 2, 3].fetch(-4) }.should raise_error(IndexError, "index -4 outside of array bounds: -3...3") + -> { [].fetch(0) }.should raise_error(IndexError, "index 0 outside of array bounds: 0...0") end it "returns default if there is no element at index if passed a default value" do @@ -50,6 +50,6 @@ def o.to_int(); 5; end end it "raises a TypeError when the passed argument can't be coerced to Integer" do - -> { [].fetch("cat") }.should raise_error(TypeError) + -> { [].fetch("cat") }.should raise_error(TypeError, "no implicit conversion of String into Integer") end end diff --git a/spec/ruby/core/array/pack/c_spec.rb b/spec/ruby/core/array/pack/c_spec.rb index 47b71b663d6a98..7a2b95def87f7f 100644 --- a/spec/ruby/core/array/pack/c_spec.rb +++ b/spec/ruby/core/array/pack/c_spec.rb @@ -45,20 +45,10 @@ [1, 2, 3, 4, 5].pack(pack_format('*')).should == "\x01\x02\x03\x04\x05" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - [1, 2, 3].pack(pack_format("\000", 2)).should == "\x01\x02" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [1, 2, 3].pack(pack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [1, 2, 3].pack(pack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/array/pack/shared/basic.rb b/spec/ruby/core/array/pack/shared/basic.rb index a63f64d296a312..2ebd75f6c5ed79 100644 --- a/spec/ruby/core/array/pack/shared/basic.rb +++ b/spec/ruby/core/array/pack/shared/basic.rb @@ -32,22 +32,11 @@ [@obj, @obj, @obj, @obj].pack("aa #{pack_format} # some comment \n#{pack_format}").should be_an_instance_of(String) end - ruby_version_is ""..."3.3" do - it "warns that a directive is unknown" do - # additional directive ('a') is required for the X directive - -> { [@obj, @obj].pack("a K" + pack_format) }.should complain(/unknown pack directive 'K' in 'a K#{pack_format}'/) - -> { [@obj, @obj].pack("a 0" + pack_format) }.should complain(/unknown pack directive '0' in 'a 0#{pack_format}'/) - -> { [@obj, @obj].pack("a :" + pack_format) }.should complain(/unknown pack directive ':' in 'a :#{pack_format}'/) - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError when a directive is unknown" do - # additional directive ('a') is required for the X directive - -> { [@obj, @obj].pack("a R" + pack_format) }.should raise_error(ArgumentError, /unknown pack directive 'R'/) - -> { [@obj, @obj].pack("a 0" + pack_format) }.should raise_error(ArgumentError, /unknown pack directive '0'/) - -> { [@obj, @obj].pack("a :" + pack_format) }.should raise_error(ArgumentError, /unknown pack directive ':'/) - end + it "raise ArgumentError when a directive is unknown" do + # additional directive ('a') is required for the X directive + -> { [@obj, @obj].pack("a R" + pack_format) }.should raise_error(ArgumentError, /unknown pack directive 'R'/) + -> { [@obj, @obj].pack("a 0" + pack_format) }.should raise_error(ArgumentError, /unknown pack directive '0'/) + -> { [@obj, @obj].pack("a :" + pack_format) }.should raise_error(ArgumentError, /unknown pack directive ':'/) end it "calls #to_str to coerce the directives string" do diff --git a/spec/ruby/core/array/pack/shared/float.rb b/spec/ruby/core/array/pack/shared/float.rb index 76c800b74dc5f1..3f60fee2150b48 100644 --- a/spec/ruby/core/array/pack/shared/float.rb +++ b/spec/ruby/core/array/pack/shared/float.rb @@ -25,20 +25,10 @@ [2.9, 1.4, 8.2].pack(pack_format("*")).should == "\x9a\x999@33\xb3?33\x03A" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - [5.3, 9.2].pack(pack_format("\000", 2)).should == "\x9a\x99\xa9@33\x13A" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [5.3, 9.2].pack(pack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [5.3, 9.2].pack(pack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do @@ -105,20 +95,10 @@ [2.9, 1.4, 8.2].pack(pack_format("*")).should == "@9\x99\x9a?\xb333A\x0333" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - [5.3, 9.2].pack(pack_format("\000", 2)).should == "@\xa9\x99\x9aA\x1333" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [5.3, 9.2].pack(pack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [5.3, 9.2].pack(pack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do @@ -177,20 +157,10 @@ [2.9, 1.4, 8.2].pack(pack_format("*")).should == "333333\x07@ffffff\xf6?ffffff\x20@" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - [5.3, 9.2].pack(pack_format("\000", 2)).should == "333333\x15@ffffff\x22@" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [5.3, 9.2].pack(pack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [5.3, 9.2].pack(pack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do @@ -248,20 +218,10 @@ [2.9, 1.4, 8.2].pack(pack_format("*")).should == "@\x07333333?\xf6ffffff@\x20ffffff" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - [5.3, 9.2].pack(pack_format("\000", 2)).should == "@\x15333333@\x22ffffff" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [5.3, 9.2].pack(pack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [5.3, 9.2].pack(pack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/array/pack/shared/integer.rb b/spec/ruby/core/array/pack/shared/integer.rb index 61f7cca184a9b0..ff2ee492016cc4 100644 --- a/spec/ruby/core/array/pack/shared/integer.rb +++ b/spec/ruby/core/array/pack/shared/integer.rb @@ -41,21 +41,10 @@ str.should == "\x78\x65\xcd\xab\x21\x43" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - str = [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) - str.should == "\x78\x65\xcd\xab" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do @@ -105,21 +94,10 @@ str.should == "\x65\x78\xab\xcd\x43\x21" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - str = [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) - str.should == "\x65\x78\xab\xcd" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do @@ -169,21 +147,10 @@ str.should == "\x78\x65\x43\x12\xcd\xab\xf0\xde\x21\x43\x65\x78" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - str = [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) - str.should == "\x78\x65\x43\x12\xcd\xab\xf0\xde" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do @@ -233,21 +200,10 @@ str.should == "\x12\x43\x65\x78\xde\xf0\xab\xcd\x78\x65\x43\x21" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - str = [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) - str.should == "\x12\x43\x65\x78\xde\xf0\xab\xcd" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do @@ -357,21 +313,10 @@ str.should == "\x56\x78\x12\x34\xcd\xab\xf0\xde\xf0\xde\xba\xdc\x21\x43\x65\x78" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - str = [0xdef0_abcd_3412_7856, 0x7865_4321_dcba_def0].pack(pack_format("\000", 2)) - str.should == "\x56\x78\x12\x34\xcd\xab\xf0\xde\xf0\xde\xba\xdc\x21\x43\x65\x78" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [0xdef0_abcd_3412_7856, 0x7865_4321_dcba_def0].pack(pack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [0xdef0_abcd_3412_7856, 0x7865_4321_dcba_def0].pack(pack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do @@ -429,21 +374,10 @@ str.should == "\xde\xf0\xab\xcd\x34\x12\x78\x56\x78\x65\x43\x21\xdc\xba\xde\xf0" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - str = [0xdef0_abcd_3412_7856, 0x7865_4321_dcba_def0].pack(pack_format("\000", 2)) - str.should == "\xde\xf0\xab\xcd\x34\x12\x78\x56\x78\x65\x43\x21\xdc\xba\xde\xf0" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [0xdef0_abcd_3412_7856, 0x7865_4321_dcba_def0].pack(pack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [0xdef0_abcd_3412_7856, 0x7865_4321_dcba_def0].pack(pack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/array/pack/shared/unicode.rb b/spec/ruby/core/array/pack/shared/unicode.rb index 4d8eaef3231067..0eccc7098c7cbf 100644 --- a/spec/ruby/core/array/pack/shared/unicode.rb +++ b/spec/ruby/core/array/pack/shared/unicode.rb @@ -67,20 +67,10 @@ -> { [obj].pack("U") }.should raise_error(TypeError) end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - [1, 2, 3].pack("U\x00U").should == "\x01\x02" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [1, 2, 3].pack("U\x00U") - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [1, 2, 3].pack("U\x00U") + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/array/pack/w_spec.rb b/spec/ruby/core/array/pack/w_spec.rb index e770288d67b4d2..ebadb94cab0504 100644 --- a/spec/ruby/core/array/pack/w_spec.rb +++ b/spec/ruby/core/array/pack/w_spec.rb @@ -24,20 +24,10 @@ [obj].pack("w").should == "\x05" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - [1, 2, 3].pack("w\x00w").should == "\x01\x02" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [1, 2, 3].pack("w\x00w") - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [1, 2, 3].pack("w\x00w") + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/array/rassoc_spec.rb b/spec/ruby/core/array/rassoc_spec.rb index 632a05e8b3778b..a7ffb75fb53521 100644 --- a/spec/ruby/core/array/rassoc_spec.rb +++ b/spec/ruby/core/array/rassoc_spec.rb @@ -36,17 +36,15 @@ def o.==(other); other == 'foobar'; end [[1, :foobar, o], [2, o, 1], [3, mock('foo')]].rassoc(key).should == [2, o, 1] end - ruby_version_is "3.3" do - it "calls to_ary on non-array elements" do - s1 = [1, 2] - s2 = ArraySpecs::ArrayConvertible.new(2, 3) - a = [s1, s2] - - s1.should_not_receive(:to_ary) - a.rassoc(2).should equal(s1) - - a.rassoc(3).should == [2, 3] - s2.called.should equal(:to_ary) - end + it "calls to_ary on non-array elements" do + s1 = [1, 2] + s2 = ArraySpecs::ArrayConvertible.new(2, 3) + a = [s1, s2] + + s1.should_not_receive(:to_ary) + a.rassoc(2).should equal(s1) + + a.rassoc(3).should == [2, 3] + s2.called.should equal(:to_ary) end end diff --git a/spec/ruby/core/array/sum_spec.rb b/spec/ruby/core/array/sum_spec.rb index 06abe061359faa..1886d692faaddc 100644 --- a/spec/ruby/core/array/sum_spec.rb +++ b/spec/ruby/core/array/sum_spec.rb @@ -74,13 +74,11 @@ [b].sum(a).should == 42 end - ruby_bug '#19530', ''...'3.3' do - it "calls + on the init value" do - a = mock("a") - b = mock("b") - a.should_receive(:+).with(42).and_return(b) - [42].sum(a).should == b - end + it "calls + on the init value" do + a = mock("a") + b = mock("b") + a.should_receive(:+).with(42).and_return(b) + [42].sum(a).should == b end end diff --git a/spec/ruby/core/basicobject/instance_eval_spec.rb b/spec/ruby/core/basicobject/instance_eval_spec.rb index 633b5c2cb1d9bb..f8d9d7505920d1 100644 --- a/spec/ruby/core/basicobject/instance_eval_spec.rb +++ b/spec/ruby/core/basicobject/instance_eval_spec.rb @@ -84,11 +84,9 @@ def foo end - ruby_version_is "3.3" do - it "uses the caller location as default location" do - f = Object.new - f.instance_eval("[__FILE__, __LINE__]").should == ["(eval at #{__FILE__}:#{__LINE__})", 1] - end + it "uses the caller location as default location" do + f = Object.new + f.instance_eval("[__FILE__, __LINE__]").should == ["(eval at #{__FILE__}:#{__LINE__})", 1] end it "has access to receiver's instance variables" do diff --git a/spec/ruby/core/binding/eval_spec.rb b/spec/ruby/core/binding/eval_spec.rb index bb2036f73911c4..7852e1c93936b4 100644 --- a/spec/ruby/core/binding/eval_spec.rb +++ b/spec/ruby/core/binding/eval_spec.rb @@ -60,14 +60,6 @@ bind.eval("#foo\n__LINE__", "(test)", 88).should == 89 end - ruby_version_is ""..."3.3" do - it "uses (eval) as __FILE__ if single argument given" do - obj = BindingSpecs::Demo.new(1) - bind = obj.get_binding - bind.eval("__FILE__").should == '(eval)' - end - end - it "uses 1 as __LINE__" do obj = BindingSpecs::Demo.new(1) bind = obj.get_binding @@ -107,9 +99,7 @@ bind.eval("'bar'.foo").should == "foo" end - ruby_version_is "3.3" do - it "uses the caller location as default filename" do - binding.eval("[__FILE__, __LINE__]").should == ["(eval at #{__FILE__}:#{__LINE__})", 1] - end + it "uses the caller location as default filename" do + binding.eval("[__FILE__, __LINE__]").should == ["(eval at #{__FILE__}:#{__LINE__})", 1] end end diff --git a/spec/ruby/core/builtin_constants/builtin_constants_spec.rb b/spec/ruby/core/builtin_constants/builtin_constants_spec.rb index 13e066cc7f1664..2c71b416679749 100644 --- a/spec/ruby/core/builtin_constants/builtin_constants_spec.rb +++ b/spec/ruby/core/builtin_constants/builtin_constants_spec.rb @@ -87,65 +87,63 @@ end ruby_version_is "4.0" do - context "The constant" do - describe "Ruby" do - it "is a Module" do - Ruby.should.instance_of?(Module) - end + describe "Ruby" do + it "is a Module" do + Ruby.should.instance_of?(Module) end + end - describe "Ruby::VERSION" do - it "is equal to RUBY_VERSION" do - Ruby::VERSION.should equal(RUBY_VERSION) - end + describe "Ruby::VERSION" do + it "is equal to RUBY_VERSION" do + Ruby::VERSION.should equal(RUBY_VERSION) end + end - describe "RUBY::PATCHLEVEL" do - it "is equal to RUBY_PATCHLEVEL" do - Ruby::PATCHLEVEL.should equal(RUBY_PATCHLEVEL) - end + describe "RUBY::PATCHLEVEL" do + it "is equal to RUBY_PATCHLEVEL" do + Ruby::PATCHLEVEL.should equal(RUBY_PATCHLEVEL) end + end - describe "Ruby::COPYRIGHT" do - it "is equal to RUBY_COPYRIGHT" do - Ruby::COPYRIGHT.should equal(RUBY_COPYRIGHT) - end + describe "Ruby::COPYRIGHT" do + it "is equal to RUBY_COPYRIGHT" do + Ruby::COPYRIGHT.should equal(RUBY_COPYRIGHT) end + end - describe "Ruby::DESCRIPTION" do - it "is equal to RUBY_DESCRIPTION" do - Ruby::DESCRIPTION.should equal(RUBY_DESCRIPTION) - end + describe "Ruby::DESCRIPTION" do + it "is equal to RUBY_DESCRIPTION" do + Ruby::DESCRIPTION.should equal(RUBY_DESCRIPTION) end + end - describe "Ruby::ENGINE" do - it "is equal to RUBY_ENGINE" do - Ruby::ENGINE.should equal(RUBY_ENGINE) - end + describe "Ruby::ENGINE" do + it "is equal to RUBY_ENGINE" do + Ruby::ENGINE.should equal(RUBY_ENGINE) end + end - describe "Ruby::ENGINE_VERSION" do - it "is equal to RUBY_ENGINE_VERSION" do - Ruby::ENGINE_VERSION.should equal(RUBY_ENGINE_VERSION) - end + describe "Ruby::ENGINE_VERSION" do + it "is equal to RUBY_ENGINE_VERSION" do + Ruby::ENGINE_VERSION.should equal(RUBY_ENGINE_VERSION) end + end - describe "Ruby::PLATFORM" do - it "is equal to RUBY_PLATFORM" do - Ruby::PLATFORM.should equal(RUBY_PLATFORM) - end + describe "Ruby::PLATFORM" do + it "is equal to RUBY_PLATFORM" do + Ruby::PLATFORM.should equal(RUBY_PLATFORM) end + end - describe "Ruby::RELEASE_DATE" do - it "is equal to RUBY_RELEASE_DATE" do - Ruby::RELEASE_DATE.should equal(RUBY_RELEASE_DATE) - end + describe "Ruby::RELEASE_DATE" do + it "is equal to RUBY_RELEASE_DATE" do + Ruby::RELEASE_DATE.should equal(RUBY_RELEASE_DATE) end + end - describe "Ruby::REVISION" do - it "is equal to RUBY_REVISION" do - Ruby::REVISION.should equal(RUBY_REVISION) - end + describe "Ruby::REVISION" do + it "is equal to RUBY_REVISION" do + Ruby::REVISION.should equal(RUBY_REVISION) end end end diff --git a/spec/ruby/core/data/with_spec.rb b/spec/ruby/core/data/with_spec.rb index fd0a99d1fadaab..83cb97fa60777b 100644 --- a/spec/ruby/core/data/with_spec.rb +++ b/spec/ruby/core/data/with_spec.rb @@ -44,14 +44,12 @@ def subclass.new(*) data_copy.unit.should == "m" end - ruby_version_is "3.3" do - it "calls #initialize" do - data = DataSpecs::DataWithOverriddenInitialize.new(42, "m") - ScratchPad.clear + it "calls #initialize" do + data = DataSpecs::DataWithOverriddenInitialize.new(42, "m") + ScratchPad.clear - data.with(amount: 0) + data.with(amount: 0) - ScratchPad.recorded.should == [:initialize, [], {amount: 0, unit: "m"}] - end + ScratchPad.recorded.should == [:initialize, [], {amount: 0, unit: "m"}] end end diff --git a/spec/ruby/core/dir/chdir_spec.rb b/spec/ruby/core/dir/chdir_spec.rb index 015386a9026cf3..fd277e4e1d64fc 100644 --- a/spec/ruby/core/dir/chdir_spec.rb +++ b/spec/ruby/core/dir/chdir_spec.rb @@ -125,96 +125,94 @@ def to_str; DirSpecs.mock_dir; end end end -ruby_version_is '3.3' do - describe "Dir#chdir" do - before :all do - DirSpecs.create_mock_dirs - end +describe "Dir#chdir" do + before :all do + DirSpecs.create_mock_dirs + end - after :all do - DirSpecs.delete_mock_dirs - end + after :all do + DirSpecs.delete_mock_dirs + end - before :each do - @original = Dir.pwd - end + before :each do + @original = Dir.pwd + end - after :each do - Dir.chdir(@original) - end + after :each do + Dir.chdir(@original) + end - it "changes the current working directory to self" do - dir = Dir.new(DirSpecs.mock_dir) - dir.chdir - Dir.pwd.should == DirSpecs.mock_dir - ensure - dir.close - end + it "changes the current working directory to self" do + dir = Dir.new(DirSpecs.mock_dir) + dir.chdir + Dir.pwd.should == DirSpecs.mock_dir + ensure + dir.close + end - it "changes the current working directory to self for duration of the block when a block is given" do - dir = Dir.new(DirSpecs.mock_dir) - pwd_in_block = nil + it "changes the current working directory to self for duration of the block when a block is given" do + dir = Dir.new(DirSpecs.mock_dir) + pwd_in_block = nil - dir.chdir { pwd_in_block = Dir.pwd } + dir.chdir { pwd_in_block = Dir.pwd } - pwd_in_block.should == DirSpecs.mock_dir - Dir.pwd.should == @original - ensure - dir.close - end + pwd_in_block.should == DirSpecs.mock_dir + Dir.pwd.should == @original + ensure + dir.close + end - it "returns 0 when successfully changing directory" do - dir = Dir.new(DirSpecs.mock_dir) - dir.chdir.should == 0 - ensure - dir.close - end + it "returns 0 when successfully changing directory" do + dir = Dir.new(DirSpecs.mock_dir) + dir.chdir.should == 0 + ensure + dir.close + end - it "returns the value of the block when a block is given" do - dir = Dir.new(DirSpecs.mock_dir) - dir.chdir { :block_value }.should == :block_value - ensure - dir.close - end + it "returns the value of the block when a block is given" do + dir = Dir.new(DirSpecs.mock_dir) + dir.chdir { :block_value }.should == :block_value + ensure + dir.close + end + + platform_is_not :windows do + it "does not raise an Errno::ENOENT if the original directory no longer exists" do + dir_name1 = tmp('testdir1') + dir_name2 = tmp('testdir2') + Dir.should_not.exist?(dir_name1) + Dir.should_not.exist?(dir_name2) + Dir.mkdir dir_name1 + Dir.mkdir dir_name2 - platform_is_not :windows do - it "does not raise an Errno::ENOENT if the original directory no longer exists" do - dir_name1 = tmp('testdir1') - dir_name2 = tmp('testdir2') - Dir.should_not.exist?(dir_name1) - Dir.should_not.exist?(dir_name2) - Dir.mkdir dir_name1 - Dir.mkdir dir_name2 - - dir2 = Dir.new(dir_name2) - - begin - Dir.chdir(dir_name1) do - dir2.chdir { Dir.unlink dir_name1 } - end - Dir.pwd.should == @original - ensure - Dir.unlink dir_name1 if Dir.exist?(dir_name1) - Dir.unlink dir_name2 if Dir.exist?(dir_name2) + dir2 = Dir.new(dir_name2) + + begin + Dir.chdir(dir_name1) do + dir2.chdir { Dir.unlink dir_name1 } end + Dir.pwd.should == @original ensure - dir2.close + Dir.unlink dir_name1 if Dir.exist?(dir_name1) + Dir.unlink dir_name2 if Dir.exist?(dir_name2) end + ensure + dir2.close end + end - it "always returns to the original directory when given a block" do - dir = Dir.new(DirSpecs.mock_dir) + it "always returns to the original directory when given a block" do + dir = Dir.new(DirSpecs.mock_dir) - begin - dir.chdir do - raise StandardError, "something bad happened" - end - rescue StandardError + begin + dir.chdir do + raise StandardError, "something bad happened" end - - Dir.pwd.should == @original - ensure - dir.close + rescue StandardError end + + Dir.pwd.should == @original + ensure + dir.close end end diff --git a/spec/ruby/core/dir/close_spec.rb b/spec/ruby/core/dir/close_spec.rb index f7cce318b8b17d..10ad1369c84d2f 100644 --- a/spec/ruby/core/dir/close_spec.rb +++ b/spec/ruby/core/dir/close_spec.rb @@ -24,7 +24,7 @@ dir.close.should == nil end - ruby_version_is '3.3'...'3.4' do + ruby_version_is ''...'3.4' do platform_is_not :windows do it "does not raise an error even if the file descriptor is closed with another Dir instance" do dir = Dir.open DirSpecs.mock_dir diff --git a/spec/ruby/core/dir/fchdir_spec.rb b/spec/ruby/core/dir/fchdir_spec.rb index 52600a95f2b80c..d5e77f7f03f372 100644 --- a/spec/ruby/core/dir/fchdir_spec.rb +++ b/spec/ruby/core/dir/fchdir_spec.rb @@ -1,73 +1,71 @@ require_relative '../../spec_helper' require_relative 'fixtures/common' -ruby_version_is '3.3' do - platform_is_not :windows do - describe "Dir.fchdir" do - before :all do - DirSpecs.create_mock_dirs - end +platform_is_not :windows do + describe "Dir.fchdir" do + before :all do + DirSpecs.create_mock_dirs + end - after :all do - DirSpecs.delete_mock_dirs - end + after :all do + DirSpecs.delete_mock_dirs + end - before :each do - @original = Dir.pwd - end + before :each do + @original = Dir.pwd + end - after :each do - Dir.chdir(@original) - end + after :each do + Dir.chdir(@original) + end - it "changes the current working directory to the directory specified by the integer file descriptor" do - dir = Dir.new(DirSpecs.mock_dir) - Dir.fchdir dir.fileno - Dir.pwd.should == DirSpecs.mock_dir - ensure - dir.close - end + it "changes the current working directory to the directory specified by the integer file descriptor" do + dir = Dir.new(DirSpecs.mock_dir) + Dir.fchdir dir.fileno + Dir.pwd.should == DirSpecs.mock_dir + ensure + dir.close + end - it "returns 0 when successfully changing directory" do - dir = Dir.new(DirSpecs.mock_dir) - Dir.fchdir(dir.fileno).should == 0 - ensure - dir.close - end + it "returns 0 when successfully changing directory" do + dir = Dir.new(DirSpecs.mock_dir) + Dir.fchdir(dir.fileno).should == 0 + ensure + dir.close + end - it "returns the value of the block when a block is given" do - dir = Dir.new(DirSpecs.mock_dir) - Dir.fchdir(dir.fileno) { :block_value }.should == :block_value - ensure - dir.close - end + it "returns the value of the block when a block is given" do + dir = Dir.new(DirSpecs.mock_dir) + Dir.fchdir(dir.fileno) { :block_value }.should == :block_value + ensure + dir.close + end - it "changes to the specified directory for the duration of the block" do - dir = Dir.new(DirSpecs.mock_dir) - Dir.fchdir(dir.fileno) { Dir.pwd }.should == DirSpecs.mock_dir - Dir.pwd.should == @original - ensure - dir.close - end + it "changes to the specified directory for the duration of the block" do + dir = Dir.new(DirSpecs.mock_dir) + Dir.fchdir(dir.fileno) { Dir.pwd }.should == DirSpecs.mock_dir + Dir.pwd.should == @original + ensure + dir.close + end - it "raises a SystemCallError if the file descriptor given is not valid" do - -> { Dir.fchdir(-1) }.should raise_error(SystemCallError, "Bad file descriptor - fchdir") - -> { Dir.fchdir(-1) { } }.should raise_error(SystemCallError, "Bad file descriptor - fchdir") - end + it "raises a SystemCallError if the file descriptor given is not valid" do + -> { Dir.fchdir(-1) }.should raise_error(SystemCallError, "Bad file descriptor - fchdir") + -> { Dir.fchdir(-1) { } }.should raise_error(SystemCallError, "Bad file descriptor - fchdir") + end - it "raises a SystemCallError if the file descriptor given is not for a directory" do - -> { Dir.fchdir $stdout.fileno }.should raise_error(SystemCallError, /(Not a directory|Invalid argument) - fchdir/) - -> { Dir.fchdir($stdout.fileno) { } }.should raise_error(SystemCallError, /(Not a directory|Invalid argument) - fchdir/) - end + it "raises a SystemCallError if the file descriptor given is not for a directory" do + -> { Dir.fchdir $stdout.fileno }.should raise_error(SystemCallError, /(Not a directory|Invalid argument) - fchdir/) + -> { Dir.fchdir($stdout.fileno) { } }.should raise_error(SystemCallError, /(Not a directory|Invalid argument) - fchdir/) end end +end - platform_is :windows do - describe "Dir.fchdir" do - it "raises NotImplementedError" do - -> { Dir.fchdir 1 }.should raise_error(NotImplementedError) - -> { Dir.fchdir(1) { } }.should raise_error(NotImplementedError) - end +platform_is :windows do + describe "Dir.fchdir" do + it "raises NotImplementedError" do + -> { Dir.fchdir 1 }.should raise_error(NotImplementedError) + -> { Dir.fchdir(1) { } }.should raise_error(NotImplementedError) end end end diff --git a/spec/ruby/core/dir/for_fd_spec.rb b/spec/ruby/core/dir/for_fd_spec.rb index ba467f2f86d928..1559e1baa43a6c 100644 --- a/spec/ruby/core/dir/for_fd_spec.rb +++ b/spec/ruby/core/dir/for_fd_spec.rb @@ -2,77 +2,75 @@ require_relative 'fixtures/common' quarantine! do # leads to "Errno::EBADF: Bad file descriptor - closedir" in DirSpecs.delete_mock_dirs -ruby_version_is '3.3' do - platform_is_not :windows do - describe "Dir.for_fd" do - before :all do - DirSpecs.create_mock_dirs - end +platform_is_not :windows do + describe "Dir.for_fd" do + before :all do + DirSpecs.create_mock_dirs + end - after :all do - DirSpecs.delete_mock_dirs - end + after :all do + DirSpecs.delete_mock_dirs + end - before :each do - @original = Dir.pwd - end + before :each do + @original = Dir.pwd + end - after :each do - Dir.chdir(@original) - end + after :each do + Dir.chdir(@original) + end - it "returns a new Dir object representing the directory specified by the given integer directory file descriptor" do - dir = Dir.new(DirSpecs.mock_dir) - dir_new = Dir.for_fd(dir.fileno) + it "returns a new Dir object representing the directory specified by the given integer directory file descriptor" do + dir = Dir.new(DirSpecs.mock_dir) + dir_new = Dir.for_fd(dir.fileno) - dir_new.should.instance_of?(Dir) - dir_new.children.should == dir.children - dir_new.fileno.should == dir.fileno - ensure - dir.close - end + dir_new.should.instance_of?(Dir) + dir_new.children.should == dir.children + dir_new.fileno.should == dir.fileno + ensure + dir.close + end - it "returns a new Dir object without associated path" do - dir = Dir.new(DirSpecs.mock_dir) - dir_new = Dir.for_fd(dir.fileno) + it "returns a new Dir object without associated path" do + dir = Dir.new(DirSpecs.mock_dir) + dir_new = Dir.for_fd(dir.fileno) - dir_new.path.should == nil - ensure - dir.close - end + dir_new.path.should == nil + ensure + dir.close + end - it "calls #to_int to convert a value to an Integer" do - dir = Dir.new(DirSpecs.mock_dir) - obj = mock("fd") - obj.should_receive(:to_int).and_return(dir.fileno) + it "calls #to_int to convert a value to an Integer" do + dir = Dir.new(DirSpecs.mock_dir) + obj = mock("fd") + obj.should_receive(:to_int).and_return(dir.fileno) - dir_new = Dir.for_fd(obj) - dir_new.fileno.should == dir.fileno - ensure - dir.close - end + dir_new = Dir.for_fd(obj) + dir_new.fileno.should == dir.fileno + ensure + dir.close + end - it "raises TypeError when value cannot be converted to Integer" do - -> { - Dir.for_fd(nil) - }.should raise_error(TypeError, "no implicit conversion from nil to integer") - end + it "raises TypeError when value cannot be converted to Integer" do + -> { + Dir.for_fd(nil) + }.should raise_error(TypeError, "no implicit conversion from nil to integer") + end - it "raises a SystemCallError if the file descriptor given is not valid" do - -> { Dir.for_fd(-1) }.should raise_error(SystemCallError, "Bad file descriptor - fdopendir") - end + it "raises a SystemCallError if the file descriptor given is not valid" do + -> { Dir.for_fd(-1) }.should raise_error(SystemCallError, "Bad file descriptor - fdopendir") + end - it "raises a SystemCallError if the file descriptor given is not for a directory" do - -> { Dir.for_fd $stdout.fileno }.should raise_error(SystemCallError, "Not a directory - fdopendir") - end + it "raises a SystemCallError if the file descriptor given is not for a directory" do + -> { Dir.for_fd $stdout.fileno }.should raise_error(SystemCallError, "Not a directory - fdopendir") end end +end - platform_is :windows do - describe "Dir.for_fd" do - it "raises NotImplementedError" do - -> { Dir.for_fd 1 }.should raise_error(NotImplementedError) - end +platform_is :windows do + describe "Dir.for_fd" do + it "raises NotImplementedError" do + -> { Dir.for_fd 1 }.should raise_error(NotImplementedError) end end end diff --git a/spec/ruby/core/encoding/ascii_compatible_spec.rb b/spec/ruby/core/encoding/ascii_compatible_spec.rb index 4804300e855dff..bbcc6add9e4e1b 100644 --- a/spec/ruby/core/encoding/ascii_compatible_spec.rb +++ b/spec/ruby/core/encoding/ascii_compatible_spec.rb @@ -8,4 +8,15 @@ it "returns false if self does not represent an ASCII-compatible encoding" do Encoding::UTF_16LE.ascii_compatible?.should be_false end + + it "returns false for UTF_16 and UTF_32" do + Encoding::UTF_16.should_not.ascii_compatible? + Encoding::UTF_32.should_not.ascii_compatible? + end + + it "is always false for dummy encodings" do + Encoding.list.select(&:dummy?).each do |encoding| + encoding.should_not.ascii_compatible? + end + end end diff --git a/spec/ruby/core/encoding/dummy_spec.rb b/spec/ruby/core/encoding/dummy_spec.rb index 75ffcd5a4ec093..77caebca9a2871 100644 --- a/spec/ruby/core/encoding/dummy_spec.rb +++ b/spec/ruby/core/encoding/dummy_spec.rb @@ -11,4 +11,15 @@ Encoding::CP50221.dummy?.should be_true Encoding::UTF_7.dummy?.should be_true end + + it "returns true for UTF_16 and UTF_32" do + Encoding::UTF_16.should.dummy? + Encoding::UTF_32.should.dummy? + end + + it "implies not #ascii_compatible?" do + Encoding.list.select(&:dummy?).each do |encoding| + encoding.should_not.ascii_compatible? + end + end end diff --git a/spec/ruby/core/encoding/replicate_spec.rb b/spec/ruby/core/encoding/replicate_spec.rb index 2da998837f866b..9fe0ba87478bd0 100644 --- a/spec/ruby/core/encoding/replicate_spec.rb +++ b/spec/ruby/core/encoding/replicate_spec.rb @@ -2,87 +2,7 @@ require_relative '../../spec_helper' describe "Encoding#replicate" do - ruby_version_is ""..."3.3" do - before :all do - @i = 0 - end - - before :each do - @i += 1 - @prefix = "RS#{@i}" - end - - it "returns a replica of ASCII" do - name = @prefix + '-ASCII' - e = suppress_warning { Encoding::ASCII.replicate(name) } - e.name.should == name - Encoding.find(name).should == e - - "a".dup.force_encoding(e).valid_encoding?.should be_true - "\x80".dup.force_encoding(e).valid_encoding?.should be_false - end - - it "returns a replica of UTF-8" do - name = @prefix + 'UTF-8' - e = suppress_warning { Encoding::UTF_8.replicate(name) } - e.name.should == name - Encoding.find(name).should == e - - "a".dup.force_encoding(e).valid_encoding?.should be_true - "\u3042".dup.force_encoding(e).valid_encoding?.should be_true - "\x80".dup.force_encoding(e).valid_encoding?.should be_false - end - - it "returns a replica of UTF-16BE" do - name = @prefix + 'UTF-16-BE' - e = suppress_warning { Encoding::UTF_16BE.replicate(name) } - e.name.should == name - Encoding.find(name).should == e - - "a".dup.force_encoding(e).valid_encoding?.should be_false - "\x30\x42".dup.force_encoding(e).valid_encoding?.should be_true - "\x80".dup.force_encoding(e).valid_encoding?.should be_false - end - - it "returns a replica of ISO-2022-JP" do - name = @prefix + 'ISO-2022-JP' - e = suppress_warning { Encoding::ISO_2022_JP.replicate(name) } - Encoding.find(name).should == e - - e.name.should == name - e.dummy?.should be_true - end - - # NOTE: it's unclear of the value of this (for the complexity cost of it), - # but it is the current CRuby behavior. - it "can be associated with a String" do - name = @prefix + '-US-ASCII' - e = suppress_warning { Encoding::US_ASCII.replicate(name) } - e.name.should == name - Encoding.find(name).should == e - - s = "abc".dup.force_encoding(e) - s.encoding.should == e - s.encoding.name.should == name - end - end - - ruby_version_is ""..."3.3" do - it "warns about deprecation" do - -> { - Encoding::US_ASCII.replicate('MY-US-ASCII') - }.should complain(/warning: Encoding#replicate is deprecated and will be removed in Ruby 3.3; use the original encoding instead/) - end - - it "raises EncodingError if too many encodings" do - code = '1_000.times {|i| Encoding::US_ASCII.replicate("R_#{i}") }' - ruby_exe(code, args: "2>&1", exit_status: 1).should.include?('too many encoding (> 256) (EncodingError)') - end - end - - ruby_version_is "3.3" do - it "has been removed" do - Encoding::US_ASCII.should_not.respond_to?(:replicate, true) - end + it "has been removed" do + Encoding::US_ASCII.should_not.respond_to?(:replicate, true) end end diff --git a/spec/ruby/core/enumerator/each_spec.rb b/spec/ruby/core/enumerator/each_spec.rb index 3af16e5587e466..8c9785cc85fe37 100644 --- a/spec/ruby/core/enumerator/each_spec.rb +++ b/spec/ruby/core/enumerator/each_spec.rb @@ -86,4 +86,19 @@ def object_each_with_arguments.each_with_arguments(arg, *args) ret.should be_an_instance_of(Enumerator) ret.should_not equal(@enum_with_arguments) end + + it "does not destructure yielded array values when chaining each.map" do + result = [[[1]]].each.map { |a, b| [a, b] } + result.should == [[[1], nil]] + end + + it "preserves array values yielded from the enumerator" do + result = [[1, 2]].each.map { |a| a } + result.should == [[1, 2]] + end + + it "allows destructuring to occur in the block, not the enumerator" do + result = [[1, 2]].each.map { |a, b| a } + result.should == [1] + end end diff --git a/spec/ruby/core/exception/no_method_error_spec.rb b/spec/ruby/core/exception/no_method_error_spec.rb index 772c569f67963e..d20878c6e3328d 100644 --- a/spec/ruby/core/exception/no_method_error_spec.rb +++ b/spec/ruby/core/exception/no_method_error_spec.rb @@ -66,204 +66,145 @@ end end - ruby_version_is ""..."3.3" do - it "calls #inspect when calling Exception#message" do - ScratchPad.record [] - test_class = Class.new do - def inspect - ScratchPad << :inspect_called - "" - end - end - instance = test_class.new - - begin - instance.bar - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']bar' for :#$/ - ScratchPad.recorded.should == [:inspect_called] - end - end - - it "fallbacks to a simpler representation of the receiver when receiver.inspect raises an exception" do - test_class = Class.new do - def inspect - raise NoMethodErrorSpecs::InstanceException - end - end - instance = test_class.new - - begin - instance.bar - rescue NoMethodError => error - message = error.message - message.should =~ /undefined method.+\bbar\b/ - message.should include test_class.inspect - end - end - - it "uses #name to display the receiver if it is a class" do - klass = Class.new { def self.name; "MyClass"; end } - - begin - klass.foo - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']foo' for MyClass:Class$/ - end + it "uses a literal name when receiver is nil" do + begin + nil.foo + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']foo' for nil\Z/ end + end - it "uses #name to display the receiver if it is a module" do - mod = Module.new { def self.name; "MyModule"; end } - - begin - mod.foo - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']foo' for MyModule:Module$/ - end + it "uses a literal name when receiver is true" do + begin + true.foo + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']foo' for true\Z/ end end - ruby_version_is "3.3" do - it "uses a literal name when receiver is nil" do - begin - nil.foo - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']foo' for nil\Z/ - end + it "uses a literal name when receiver is false" do + begin + false.foo + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']foo' for false\Z/ end + end - it "uses a literal name when receiver is true" do - begin - true.foo - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']foo' for true\Z/ - end - end + it "uses #name when receiver is a class" do + klass = Class.new { def self.name; "MyClass"; end } - it "uses a literal name when receiver is false" do - begin - false.foo - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']foo' for false\Z/ - end + begin + klass.foo + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']foo' for class MyClass\Z/ end + end - it "uses #name when receiver is a class" do - klass = Class.new { def self.name; "MyClass"; end } + it "uses class' string representation when receiver is an anonymous class" do + klass = Class.new - begin - klass.foo - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']foo' for class MyClass\Z/ - end + begin + klass.foo + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']foo' for class #\Z/ end + end - it "uses class' string representation when receiver is an anonymous class" do - klass = Class.new + it "uses class' string representation when receiver is a singleton class" do + obj = Object.new + singleton_class = obj.singleton_class - begin - klass.foo - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']foo' for class #\Z/ - end + begin + singleton_class.foo + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']foo' for class #>\Z/ end + end - it "uses class' string representation when receiver is a singleton class" do - obj = Object.new - singleton_class = obj.singleton_class + it "uses #name when receiver is a module" do + mod = Module.new { def self.name; "MyModule"; end } - begin - singleton_class.foo - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']foo' for class #>\Z/ - end + begin + mod.foo + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']foo' for module MyModule\Z/ end + end - it "uses #name when receiver is a module" do - mod = Module.new { def self.name; "MyModule"; end } + it "uses module's string representation when receiver is an anonymous module" do + m = Module.new - begin - mod.foo - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']foo' for module MyModule\Z/ - end + begin + m.foo + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']foo' for module #\Z/ end + end - it "uses module's string representation when receiver is an anonymous module" do - m = Module.new + it "uses class #name when receiver is an ordinary object" do + klass = Class.new { def self.name; "MyClass"; end } + instance = klass.new - begin - m.foo - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']foo' for module #\Z/ - end + begin + instance.bar + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']bar' for an instance of MyClass\Z/ end + end - it "uses class #name when receiver is an ordinary object" do - klass = Class.new { def self.name; "MyClass"; end } - instance = klass.new + it "uses class string representation when receiver is an instance of anonymous class" do + klass = Class.new + instance = klass.new - begin - instance.bar - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']bar' for an instance of MyClass\Z/ - end + begin + instance.bar + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']bar' for an instance of #\Z/ end + end - it "uses class string representation when receiver is an instance of anonymous class" do - klass = Class.new - instance = klass.new + it "uses class name when receiver has a singleton class" do + instance = NoMethodErrorSpecs::NoMethodErrorA.new + def instance.foo; end - begin - instance.bar - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']bar' for an instance of #\Z/ - end + begin + instance.bar + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']bar' for #\Z/ end + end - it "uses class name when receiver has a singleton class" do - instance = NoMethodErrorSpecs::NoMethodErrorA.new - def instance.foo; end - - begin - instance.bar - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']bar' for #\Z/ + it "does not call #inspect when calling Exception#message" do + ScratchPad.record [] + test_class = Class.new do + def inspect + ScratchPad << :inspect_called + "" end end + instance = test_class.new - it "does not call #inspect when calling Exception#message" do - ScratchPad.record [] - test_class = Class.new do - def inspect - ScratchPad << :inspect_called - "" - end - end - instance = test_class.new - - begin - instance.bar - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']bar' for an instance of #\Z/ - ScratchPad.recorded.should == [] - end + begin + instance.bar + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']bar' for an instance of #\Z/ + ScratchPad.recorded.should == [] end + end - it "does not truncate long class names" do - class_name = 'ExceptionSpecs::A' + 'a'*100 + it "does not truncate long class names" do + class_name = 'ExceptionSpecs::A' + 'a'*100 - begin - eval <<~RUBY - class #{class_name} - end + begin + eval <<~RUBY + class #{class_name} + end - obj = #{class_name}.new - obj.foo - RUBY - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']foo' for an instance of #{class_name}\Z/ - end + obj = #{class_name}.new + obj.foo + RUBY + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']foo' for an instance of #{class_name}\Z/ end end end diff --git a/spec/ruby/core/false/singleton_method_spec.rb b/spec/ruby/core/false/singleton_method_spec.rb index 738794b46c26f8..16dc85d67c64b7 100644 --- a/spec/ruby/core/false/singleton_method_spec.rb +++ b/spec/ruby/core/false/singleton_method_spec.rb @@ -1,15 +1,13 @@ require_relative '../../spec_helper' describe "FalseClass#singleton_method" do - ruby_version_is '3.3' do - it "raises regardless of whether FalseClass defines the method" do + it "raises regardless of whether FalseClass defines the method" do + -> { false.singleton_method(:foo) }.should raise_error(NameError) + begin + def (false).foo; end -> { false.singleton_method(:foo) }.should raise_error(NameError) - begin - def (false).foo; end - -> { false.singleton_method(:foo) }.should raise_error(NameError) - ensure - FalseClass.send(:remove_method, :foo) - end + ensure + FalseClass.send(:remove_method, :foo) end end end diff --git a/spec/ruby/core/fiber/kill_spec.rb b/spec/ruby/core/fiber/kill_spec.rb index 2f4c499280f400..abf23ff17621fa 100644 --- a/spec/ruby/core/fiber/kill_spec.rb +++ b/spec/ruby/core/fiber/kill_spec.rb @@ -2,89 +2,87 @@ require_relative 'fixtures/classes' require_relative '../../shared/kernel/raise' -ruby_version_is "3.3" do - describe "Fiber#kill" do - it "kills a non-resumed fiber" do - fiber = Fiber.new{} +describe "Fiber#kill" do + it "kills a non-resumed fiber" do + fiber = Fiber.new{} - fiber.alive?.should == true + fiber.alive?.should == true - fiber.kill - fiber.alive?.should == false - end - - it "kills a resumed fiber" do - fiber = Fiber.new{while true; Fiber.yield; end} - fiber.resume - - fiber.alive?.should == true + fiber.kill + fiber.alive?.should == false + end - fiber.kill - fiber.alive?.should == false - end + it "kills a resumed fiber" do + fiber = Fiber.new{while true; Fiber.yield; end} + fiber.resume - it "can kill itself" do - fiber = Fiber.new do - Fiber.current.kill - end + fiber.alive?.should == true - fiber.alive?.should == true + fiber.kill + fiber.alive?.should == false + end - fiber.resume - fiber.alive?.should == false + it "can kill itself" do + fiber = Fiber.new do + Fiber.current.kill end - it "kills a resumed fiber from a child" do - parent = Fiber.new do - child = Fiber.new do - parent.kill - parent.alive?.should == true - end + fiber.alive?.should == true + + fiber.resume + fiber.alive?.should == false + end - child.resume + it "kills a resumed fiber from a child" do + parent = Fiber.new do + child = Fiber.new do + parent.kill + parent.alive?.should == true end - parent.resume - parent.alive?.should == false + child.resume end - it "executes the ensure block" do - ensure_executed = false + parent.resume + parent.alive?.should == false + end - fiber = Fiber.new do - while true; Fiber.yield; end - ensure - ensure_executed = true - end + it "executes the ensure block" do + ensure_executed = false - fiber.resume - fiber.kill - ensure_executed.should == true + fiber = Fiber.new do + while true; Fiber.yield; end + ensure + ensure_executed = true end - it "does not execute rescue block" do - rescue_executed = false + fiber.resume + fiber.kill + ensure_executed.should == true + end - fiber = Fiber.new do - while true; Fiber.yield; end - rescue Exception - rescue_executed = true - end + it "does not execute rescue block" do + rescue_executed = false - fiber.resume - fiber.kill - rescue_executed.should == false + fiber = Fiber.new do + while true; Fiber.yield; end + rescue Exception + rescue_executed = true end - it "repeatedly kills a fiber" do - fiber = Fiber.new do - while true; Fiber.yield; end - ensure - while true; Fiber.yield; end - end + fiber.resume + fiber.kill + rescue_executed.should == false + end - fiber.kill - fiber.alive?.should == false + it "repeatedly kills a fiber" do + fiber = Fiber.new do + while true; Fiber.yield; end + ensure + while true; Fiber.yield; end end + + fiber.kill + fiber.alive?.should == false end end diff --git a/spec/ruby/core/fiber/storage_spec.rb b/spec/ruby/core/fiber/storage_spec.rb index 015caaf3bbff48..6ffc13ee283bec 100644 --- a/spec/ruby/core/fiber/storage_spec.rb +++ b/spec/ruby/core/fiber/storage_spec.rb @@ -161,13 +161,11 @@ def key.to_str; "Foo"; end -> { Fiber[Object.new] = 44 }.should raise_error(TypeError) end - ruby_version_is "3.3" do - it "deletes the fiber storage key when assigning nil" do - Fiber.new(storage: {life: 42}) { - Fiber[:life] = nil - Fiber.current.storage - }.resume.should == {} - end + it "deletes the fiber storage key when assigning nil" do + Fiber.new(storage: {life: 42}) { + Fiber[:life] = nil + Fiber.current.storage + }.resume.should == {} end end diff --git a/spec/ruby/core/file/basename_spec.rb b/spec/ruby/core/file/basename_spec.rb index 87695ab97be3ca..66a5b56ed9a11a 100644 --- a/spec/ruby/core/file/basename_spec.rb +++ b/spec/ruby/core/file/basename_spec.rb @@ -162,11 +162,7 @@ it "rejects strings encoded with non ASCII-compatible encodings" do Encoding.list.reject(&:ascii_compatible?).reject(&:dummy?).each do |enc| - begin - path = "/foo/bar".encode(enc) - rescue Encoding::ConverterNotFoundError - next - end + path = "/foo/bar".encode(enc) -> { File.basename(path) diff --git a/spec/ruby/core/file/dirname_spec.rb b/spec/ruby/core/file/dirname_spec.rb index 8e6016ce6fef5b..1b006af7839f17 100644 --- a/spec/ruby/core/file/dirname_spec.rb +++ b/spec/ruby/core/file/dirname_spec.rb @@ -78,7 +78,33 @@ def object.to_int; 2; end File.dirname("foo/../").should == "foo" end + it "rejects strings encoded with non ASCII-compatible encodings" do + Encoding.list.reject(&:ascii_compatible?).reject(&:dummy?).each do |enc| + path = "/foo/bar".encode(enc) + -> { + File.dirname(path) + }.should raise_error(Encoding::CompatibilityError) + end + end + + it "works with all ASCII-compatible encodings" do + Encoding.list.select(&:ascii_compatible?).each do |enc| + File.dirname("/foo/bar".encode(enc)).should == "/foo".encode(enc) + end + end + + it "handles Shift JIS 0x5C (\\) as second byte of a multi-byte sequence" do + # dir/fileソname.txt + path = "dir/file\x83\x5cname.txt".b.force_encoding(Encoding::SHIFT_JIS) + path.valid_encoding?.should be_true + File.dirname(path).should == "dir" + end + platform_is_not :windows do + it "ignores repeated leading / (edge cases on non-windows)" do + File.dirname("/////foo/bar/").should == "/foo" + end + it "returns all the components of filename except the last one (edge cases on non-windows)" do File.dirname('/////').should == '/' File.dirname("//foo//").should == "/" @@ -94,6 +120,13 @@ def object.to_int; 2; end File.dirname("//foo//").should == "//foo" File.dirname('/////').should == '//' end + + it "handles Shift JIS 0x5C (\\) as second byte of a multi-byte sequence (windows)" do + # dir\fileソname.txt + path = "dir\\file\x83\x5cname.txt".b.force_encoding(Encoding::SHIFT_JIS) + path.valid_encoding?.should be_true + File.dirname(path).should == "dir" + end end it "accepts an object that has a #to_path method" do diff --git a/spec/ruby/core/float/ceil_spec.rb b/spec/ruby/core/float/ceil_spec.rb index 75f56102922e82..5236a133f5de18 100644 --- a/spec/ruby/core/float/ceil_spec.rb +++ b/spec/ruby/core/float/ceil_spec.rb @@ -2,7 +2,7 @@ require_relative '../integer/shared/integer_ceil_precision' describe "Float#ceil" do - context "with precision" do + context "with values equal to integers" do it_behaves_like :integer_ceil_precision, :Float end @@ -20,7 +20,9 @@ 2.1679.ceil(0).should eql(3) 214.94.ceil(-1).should eql(220) 7.0.ceil(1).should eql(7.0) + 200.0.ceil(-2).should eql(200) -1.234.ceil(2).should eql(-1.23) 5.123812.ceil(4).should eql(5.1239) + 10.00001.ceil(5).should eql(10.00001) end end diff --git a/spec/ruby/core/float/floor_spec.rb b/spec/ruby/core/float/floor_spec.rb index 8b492ef4732fb7..1fafdadee9b6d7 100644 --- a/spec/ruby/core/float/floor_spec.rb +++ b/spec/ruby/core/float/floor_spec.rb @@ -2,7 +2,7 @@ require_relative '../integer/shared/integer_floor_precision' describe "Float#floor" do - context "with precision" do + context "with values equal to integers" do it_behaves_like :integer_floor_precision, :Float end @@ -20,7 +20,9 @@ 2.1679.floor(0).should eql(2) 214.94.floor(-1).should eql(210) 7.0.floor(1).should eql(7.0) + 200.0.floor(-2).should eql(200) -1.234.floor(2).should eql(-1.24) 5.123812.floor(4).should eql(5.1238) + 10.00001.floor(5).should eql(10.00001) end end diff --git a/spec/ruby/core/float/round_spec.rb b/spec/ruby/core/float/round_spec.rb index 7e8c792051b9d5..3e6575100bd52a 100644 --- a/spec/ruby/core/float/round_spec.rb +++ b/spec/ruby/core/float/round_spec.rb @@ -66,6 +66,7 @@ it "works for corner cases" do 42.0.round(308).should eql(42.0) 1.0e307.round(2).should eql(1.0e307) + 120.0.round(-1).should eql(120) end # redmine:5271 @@ -145,37 +146,35 @@ -4.809999999999999.round(5, half: :even).should eql(-4.81) end - ruby_bug "#19318", ""..."3.3" do - # These numbers are neighbouring floating point numbers round a - # precise value. They test that the rounding modes work correctly - # round that value and precision is not lost which might cause - # incorrect results. - it "does not lose precision during the rounding process" do - 767573.1875850001.round(5, half: nil).should eql(767573.18759) - 767573.1875850001.round(5, half: :up).should eql(767573.18759) - 767573.1875850001.round(5, half: :down).should eql(767573.18759) - 767573.1875850001.round(5, half: :even).should eql(767573.18759) - -767573.1875850001.round(5, half: nil).should eql(-767573.18759) - -767573.1875850001.round(5, half: :up).should eql(-767573.18759) - -767573.1875850001.round(5, half: :down).should eql(-767573.18759) - -767573.1875850001.round(5, half: :even).should eql(-767573.18759) - 767573.187585.round(5, half: nil).should eql(767573.18759) - 767573.187585.round(5, half: :up).should eql(767573.18759) - 767573.187585.round(5, half: :down).should eql(767573.18758) - 767573.187585.round(5, half: :even).should eql(767573.18758) - -767573.187585.round(5, half: nil).should eql(-767573.18759) - -767573.187585.round(5, half: :up).should eql(-767573.18759) - -767573.187585.round(5, half: :down).should eql(-767573.18758) - -767573.187585.round(5, half: :even).should eql(-767573.18758) - 767573.1875849998.round(5, half: nil).should eql(767573.18758) - 767573.1875849998.round(5, half: :up).should eql(767573.18758) - 767573.1875849998.round(5, half: :down).should eql(767573.18758) - 767573.1875849998.round(5, half: :even).should eql(767573.18758) - -767573.1875849998.round(5, half: nil).should eql(-767573.18758) - -767573.1875849998.round(5, half: :up).should eql(-767573.18758) - -767573.1875849998.round(5, half: :down).should eql(-767573.18758) - -767573.1875849998.round(5, half: :even).should eql(-767573.18758) - end + # These numbers are neighbouring floating point numbers round a + # precise value. They test that the rounding modes work correctly + # round that value and precision is not lost which might cause + # incorrect results. + it "does not lose precision during the rounding process" do + 767573.1875850001.round(5, half: nil).should eql(767573.18759) + 767573.1875850001.round(5, half: :up).should eql(767573.18759) + 767573.1875850001.round(5, half: :down).should eql(767573.18759) + 767573.1875850001.round(5, half: :even).should eql(767573.18759) + -767573.1875850001.round(5, half: nil).should eql(-767573.18759) + -767573.1875850001.round(5, half: :up).should eql(-767573.18759) + -767573.1875850001.round(5, half: :down).should eql(-767573.18759) + -767573.1875850001.round(5, half: :even).should eql(-767573.18759) + 767573.187585.round(5, half: nil).should eql(767573.18759) + 767573.187585.round(5, half: :up).should eql(767573.18759) + 767573.187585.round(5, half: :down).should eql(767573.18758) + 767573.187585.round(5, half: :even).should eql(767573.18758) + -767573.187585.round(5, half: nil).should eql(-767573.18759) + -767573.187585.round(5, half: :up).should eql(-767573.18759) + -767573.187585.round(5, half: :down).should eql(-767573.18758) + -767573.187585.round(5, half: :even).should eql(-767573.18758) + 767573.1875849998.round(5, half: nil).should eql(767573.18758) + 767573.1875849998.round(5, half: :up).should eql(767573.18758) + 767573.1875849998.round(5, half: :down).should eql(767573.18758) + 767573.1875849998.round(5, half: :even).should eql(767573.18758) + -767573.1875849998.round(5, half: nil).should eql(-767573.18758) + -767573.1875849998.round(5, half: :up).should eql(-767573.18758) + -767573.1875849998.round(5, half: :down).should eql(-767573.18758) + -767573.1875849998.round(5, half: :even).should eql(-767573.18758) end it "raises FloatDomainError for exceptional values with a half option" do @@ -197,7 +196,13 @@ it "returns 0 for 0 or undefined ndigits" do (0.0).round.should == 0 (-0.0).round(0).should == 0 - (0.0).round(half: :up) == 0 + (0.0).round(half: :up).should == 0 + end + + it "returns 0 for negative ndigits" do + (0.0).round(-1).should == 0 + (-0.0).round(-1).should == 0 + (0.0).round(-1, half: :up).should == 0 end end end diff --git a/spec/ruby/core/gc/config_spec.rb b/spec/ruby/core/gc/config_spec.rb index e20e8e4a16a97f..db452b0907f58b 100644 --- a/spec/ruby/core/gc/config_spec.rb +++ b/spec/ruby/core/gc/config_spec.rb @@ -40,6 +40,20 @@ GC.config.should == previous end + ruby_version_is ""..."4.0" do + it "returns the same as GC.config but without the :implementation key" do + previous = GC.config + GC.config({}).should == previous.except(:implementation) + end + end + + ruby_version_is "4.0" do + it "returns the same as GC.config, including the :implementation key" do + previous = GC.config + GC.config({}).should == previous + end + end + it "raises an ArgumentError if options include global keys" do -> { GC.config(implementation: "default") }.should raise_error(ArgumentError, 'Attempting to set read-only key "Implementation"') end diff --git a/spec/ruby/core/hash/compact_spec.rb b/spec/ruby/core/hash/compact_spec.rb index 13371bce434fc9..48f8bb7cae166c 100644 --- a/spec/ruby/core/hash/compact_spec.rb +++ b/spec/ruby/core/hash/compact_spec.rb @@ -19,28 +19,26 @@ @hash.should == @initial_pairs end - ruby_version_is '3.3' do - it "retains the default value" do - hash = Hash.new(1) - hash.compact.default.should == 1 - hash[:a] = 1 - hash.compact.default.should == 1 - end + it "retains the default value" do + hash = Hash.new(1) + hash.compact.default.should == 1 + hash[:a] = 1 + hash.compact.default.should == 1 + end - it "retains the default_proc" do - pr = proc { |h, k| h[k] = [] } - hash = Hash.new(&pr) - hash.compact.default_proc.should == pr - hash[:a] = 1 - hash.compact.default_proc.should == pr - end + it "retains the default_proc" do + pr = proc { |h, k| h[k] = [] } + hash = Hash.new(&pr) + hash.compact.default_proc.should == pr + hash[:a] = 1 + hash.compact.default_proc.should == pr + end - it "retains compare_by_identity flag" do - hash = {}.compare_by_identity - hash.compact.compare_by_identity?.should == true - hash[:a] = 1 - hash.compact.compare_by_identity?.should == true - end + it "retains compare_by_identity flag" do + hash = {}.compare_by_identity + hash.compact.compare_by_identity?.should == true + hash[:a] = 1 + hash.compact.compare_by_identity?.should == true end end diff --git a/spec/ruby/core/hash/constructor_spec.rb b/spec/ruby/core/hash/constructor_spec.rb index 0f97f7b40e9c2c..301f8675ce27be 100644 --- a/spec/ruby/core/hash/constructor_spec.rb +++ b/spec/ruby/core/hash/constructor_spec.rb @@ -44,23 +44,23 @@ it "raises for elements that are not arrays" do -> { - Hash[[:a]].should == {} - }.should raise_error(ArgumentError) + Hash[[:a]] + }.should raise_error(ArgumentError, "wrong element type Symbol at 0 (expected array)") -> { - Hash[[:nil]].should == {} - }.should raise_error(ArgumentError) + Hash[[nil]] + }.should raise_error(ArgumentError, "wrong element type nil at 0 (expected array)") end it "raises an ArgumentError for arrays of more than 2 elements" do - ->{ Hash[[[:a, :b, :c]]].should == {} }.should raise_error(ArgumentError) + ->{ + Hash[[[:a, :b, :c]]] + }.should raise_error(ArgumentError, "invalid number of elements (3 for 1..2)") end it "raises an ArgumentError when passed a list of value-invalid-pairs in an array" do -> { - -> { - Hash[[[:a, 1], [:b], 42, [:d, 2], [:e, 2, 3], []]] - }.should complain(/ignoring wrong elements/) - }.should raise_error(ArgumentError) + Hash[[[:a, 1], [:b], 42, [:d, 2], [:e, 2, 3], []]] + }.should raise_error(ArgumentError, "wrong element type Integer at 2 (expected array)") end describe "passed a single argument which responds to #to_hash" do @@ -117,13 +117,11 @@ def obj.to_hash() { 1 => 2, 3 => 4 } end Hash[hash].default_proc.should be_nil end - ruby_version_is '3.3' do - it "does not retain compare_by_identity flag" do - hash = { a: 1 }.compare_by_identity - Hash[hash].compare_by_identity?.should == false + it "does not retain compare_by_identity flag" do + hash = { a: 1 }.compare_by_identity + Hash[hash].compare_by_identity?.should == false - hash = {}.compare_by_identity - Hash[hash].compare_by_identity?.should == false - end + hash = {}.compare_by_identity + Hash[hash].compare_by_identity?.should == false end end diff --git a/spec/ruby/core/hash/new_spec.rb b/spec/ruby/core/hash/new_spec.rb index 5ae3e1f98d6205..8de44ec9411deb 100644 --- a/spec/ruby/core/hash/new_spec.rb +++ b/spec/ruby/core/hash/new_spec.rb @@ -34,7 +34,7 @@ -> { Hash.new(nil) { 0 } }.should raise_error(ArgumentError) end - ruby_version_is "3.3"..."3.4" do + ruby_version_is ""..."3.4" do it "emits a deprecation warning if keyword arguments are passed" do -> { Hash.new(unknown: true) }.should complain( Regexp.new(Regexp.escape("Calling Hash.new with keyword arguments is deprecated and will be removed in Ruby 3.4; use Hash.new({ key: value }) instead")) diff --git a/spec/ruby/core/hash/ruby2_keywords_hash_spec.rb b/spec/ruby/core/hash/ruby2_keywords_hash_spec.rb index 7dbb9c0a98351d..ddf9038800005f 100644 --- a/spec/ruby/core/hash/ruby2_keywords_hash_spec.rb +++ b/spec/ruby/core/hash/ruby2_keywords_hash_spec.rb @@ -72,12 +72,10 @@ Hash.ruby2_keywords_hash(hash).default_proc.should == pr end - ruby_version_is '3.3' do - it "retains compare_by_identity_flag" do - hash = {}.compare_by_identity - Hash.ruby2_keywords_hash(hash).compare_by_identity?.should == true - hash[:a] = 1 - Hash.ruby2_keywords_hash(hash).compare_by_identity?.should == true - end + it "retains compare_by_identity_flag" do + hash = {}.compare_by_identity + Hash.ruby2_keywords_hash(hash).compare_by_identity?.should == true + hash[:a] = 1 + Hash.ruby2_keywords_hash(hash).compare_by_identity?.should == true end end diff --git a/spec/ruby/core/hash/shared/to_s.rb b/spec/ruby/core/hash/shared/to_s.rb index e116b8878b9699..38dd2c44360a95 100644 --- a/spec/ruby/core/hash/shared/to_s.rb +++ b/spec/ruby/core/hash/shared/to_s.rb @@ -89,5 +89,36 @@ it "adds quotes to symbol keys that are not valid symbol literals" do { "needs-quotes": 1 }.send(@method).should == '{"needs-quotes": 1}' end + + it "can be evaled" do + no_quote = '{a: 1, a!: 1, a?: 1}' + eval(no_quote).inspect.should == no_quote + [ + '{"": 1}', + '{"0": 1, "!": 1, "%": 1, "&": 1, "*": 1, "+": 1, "-": 1, "/": 1, "<": 1, ">": 1, "^": 1, "`": 1, "|": 1, "~": 1}', + '{"@a": 1, "$a": 1, "+@": 1, "a=": 1, "[]": 1}', + '{"a\"b": 1, "@@a": 1, "<=>": 1, "===": 1, "[]=": 1}', + ].each do |quote| + eval(quote).inspect.should == quote + end + end + + it "can be evaled when Encoding.default_external is changed" do + external = Encoding.default_external + + Encoding.default_external = Encoding::ASCII + utf8_ascii_hash = '{"\\u3042": 1}' + eval(utf8_ascii_hash).inspect.should == utf8_ascii_hash + + Encoding.default_external = Encoding::UTF_8 + utf8_hash = "{\u3042: 1}" + eval(utf8_hash).inspect.should == utf8_hash + + Encoding.default_external = Encoding::Windows_31J + sjis_hash = "{\x87]: 1}".dup.force_encoding('sjis') + eval(sjis_hash).inspect.should == sjis_hash + ensure + Encoding.default_external = external + end end end diff --git a/spec/ruby/core/integer/ceil_spec.rb b/spec/ruby/core/integer/ceil_spec.rb index eb633fba78432f..395be58fbd3f48 100644 --- a/spec/ruby/core/integer/ceil_spec.rb +++ b/spec/ruby/core/integer/ceil_spec.rb @@ -10,15 +10,4 @@ context "with precision" do it_behaves_like :integer_ceil_precision, :Integer end - - context "precision argument specified as part of the ceil method is negative" do - it "returns the smallest integer greater than self with at least precision.abs trailing zeros" do - 18.ceil(-1).should eql(20) - 18.ceil(-2).should eql(100) - 18.ceil(-3).should eql(1000) - -1832.ceil(-1).should eql(-1830) - -1832.ceil(-2).should eql(-1800) - -1832.ceil(-3).should eql(-1000) - end - end end diff --git a/spec/ruby/core/integer/shared/integer_ceil_precision.rb b/spec/ruby/core/integer/shared/integer_ceil_precision.rb index 9f31c2cf615ed0..b23c17937faf0c 100644 --- a/spec/ruby/core/integer/shared/integer_ceil_precision.rb +++ b/spec/ruby/core/integer/shared/integer_ceil_precision.rb @@ -1,6 +1,6 @@ describe :integer_ceil_precision, shared: true do context "precision is zero" do - it "returns integer self" do + it "returns Integer equal to self" do send(@method, 0).ceil(0).should.eql?(0) send(@method, 123).ceil(0).should.eql?(123) send(@method, -123).ceil(0).should.eql?(-123) @@ -23,7 +23,16 @@ send(@method, 0).ceil(-10).should.eql?(0) end - it "returns largest integer less than self with at least precision.abs trailing zeros" do + it "returns Integer equal to self if there are already at least precision.abs trailing zeros" do + send(@method, 10).ceil(-1).should.eql?(10) + send(@method, 100).ceil(-1).should.eql?(100) + send(@method, 100).ceil(-2).should.eql?(100) + send(@method, -10).ceil(-1).should.eql?(-10) + send(@method, -100).ceil(-1).should.eql?(-100) + send(@method, -100).ceil(-2).should.eql?(-100) + end + + it "returns smallest Integer greater than self with at least precision.abs trailing zeros" do send(@method, 123).ceil(-1).should.eql?(130) send(@method, 123).ceil(-2).should.eql?(200) send(@method, 123).ceil(-3).should.eql?(1000) @@ -31,13 +40,15 @@ send(@method, -123).ceil(-1).should.eql?(-120) send(@method, -123).ceil(-2).should.eql?(-100) send(@method, -123).ceil(-3).should.eql?(0) + + send(@method, 100).ceil(-3).should.eql?(1000) + send(@method, -100).ceil(-3).should.eql?(0) end - ruby_bug "#20654", ""..."3.4" do - it "returns 10**precision.abs when precision.abs is larger than the number digits of self" do - send(@method, 123).ceil(-20).should.eql?(100000000000000000000) - send(@method, 123).ceil(-50).should.eql?(100000000000000000000000000000000000000000000000000) - end + # Bug #20654 + it "returns 10**precision.abs when precision.abs has more digits than self" do + send(@method, 123).ceil(-20).should.eql?(100000000000000000000) + send(@method, 123).ceil(-50).should.eql?(100000000000000000000000000000000000000000000000000) end end end diff --git a/spec/ruby/core/integer/shared/integer_floor_precision.rb b/spec/ruby/core/integer/shared/integer_floor_precision.rb index 4c5888c6c4818d..6247907d4cd0b8 100644 --- a/spec/ruby/core/integer/shared/integer_floor_precision.rb +++ b/spec/ruby/core/integer/shared/integer_floor_precision.rb @@ -33,11 +33,10 @@ send(@method, -123).floor(-3).should.eql?(-1000) end - ruby_bug "#20654", ""..."3.4" do - it "returns -(10**precision.abs) when self is negative and precision.abs is larger than the number digits of self" do - send(@method, -123).floor(-20).should.eql?(-100000000000000000000) - send(@method, -123).floor(-50).should.eql?(-100000000000000000000000000000000000000000000000000) - end + # Bug #20654 + it "returns -(10**precision.abs) when self is negative and precision.abs is larger than the number digits of self" do + send(@method, -123).floor(-20).should.eql?(-100000000000000000000) + send(@method, -123).floor(-50).should.eql?(-100000000000000000000000000000000000000000000000000) end end end diff --git a/spec/ruby/core/io/binread_spec.rb b/spec/ruby/core/io/binread_spec.rb index 9e36b84da97350..e4576c1aa1e4f3 100644 --- a/spec/ruby/core/io/binread_spec.rb +++ b/spec/ruby/core/io/binread_spec.rb @@ -45,7 +45,7 @@ -> { IO.binread @fname, 0, -1 }.should raise_error(Errno::EINVAL) end - ruby_version_is "3.3"..."4.0" do + ruby_version_is ""..."4.0" do # https://bugs.ruby-lang.org/issues/19630 it "warns about deprecation given a path with a pipe" do cmd = "|echo ok" diff --git a/spec/ruby/core/io/buffer/empty_spec.rb b/spec/ruby/core/io/buffer/empty_spec.rb index e1fd4ab6a23268..788b23f88f0a48 100644 --- a/spec/ruby/core/io/buffer/empty_spec.rb +++ b/spec/ruby/core/io/buffer/empty_spec.rb @@ -14,11 +14,9 @@ @buffer.empty?.should be_true end - ruby_version_is "3.3" do - it "is true for a 0-length String-backed buffer created with .string" do - IO::Buffer.string(0) do |buffer| - buffer.empty?.should be_true - end + it "is true for a 0-length String-backed buffer created with .string" do + IO::Buffer.string(0) do |buffer| + buffer.empty?.should be_true end end diff --git a/spec/ruby/core/io/buffer/external_spec.rb b/spec/ruby/core/io/buffer/external_spec.rb index 4377a383578167..10bb51053d422d 100644 --- a/spec/ruby/core/io/buffer/external_spec.rb +++ b/spec/ruby/core/io/buffer/external_spec.rb @@ -6,103 +6,18 @@ @buffer = nil end - context "with a buffer created with .new" do - it "is false for an internal buffer" do - @buffer = IO::Buffer.new(4) - @buffer.external?.should be_false - end - - it "is false for a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.external?.should be_false - end - end - - context "with a file-backed buffer created with .map" do - it "is true for a regular mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.external?.should be_true - end - end - - ruby_version_is "3.3" do - it "is false for a private mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY | IO::Buffer::PRIVATE) - @buffer.external?.should be_false - end - end - end - end - - context "with a String-backed buffer created with .for" do - it "is true for a buffer created without a block" do - @buffer = IO::Buffer.for("test") - @buffer.external?.should be_true - end - - it "is true for a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.external?.should be_true - end - end + it "is true for a buffer with externally-managed memory" do + @buffer = IO::Buffer.for("string") + @buffer.external?.should be_true end - ruby_version_is "3.3" do - context "with a String-backed buffer created with .string" do - it "is true" do - IO::Buffer.string(4) do |buffer| - buffer.external?.should be_true - end - end - end + it "is false for a buffer with self-managed memory" do + @buffer = IO::Buffer.new(12, IO::Buffer::MAPPED) + @buffer.external?.should be_false end - # Always false for slices - context "with a slice of a buffer" do - context "created with .new" do - it "is false when slicing an internal buffer" do - @buffer = IO::Buffer.new(4) - @buffer.slice.external?.should be_false - end - - it "is false when slicing a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.slice.external?.should be_false - end - end - - context "created with .map" do - it "is false" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.slice.external?.should be_false - end - end - end - - context "created with .for" do - it "is false when slicing a buffer created without a block" do - @buffer = IO::Buffer.for("test") - @buffer.slice.external?.should be_false - end - - it "is false when slicing a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.slice.external?.should be_false - end - end - end - - ruby_version_is "3.3" do - context "created with .string" do - it "is false" do - IO::Buffer.string(4) do |buffer| - buffer.slice.external?.should be_false - end - end - end - end + it "is false for a null buffer" do + @buffer = IO::Buffer.new(0) + @buffer.external?.should be_false end end diff --git a/spec/ruby/core/io/buffer/for_spec.rb b/spec/ruby/core/io/buffer/for_spec.rb new file mode 100644 index 00000000000000..d59a2a033afb3b --- /dev/null +++ b/spec/ruby/core/io/buffer/for_spec.rb @@ -0,0 +1,94 @@ +require_relative '../../../spec_helper' + +describe "IO::Buffer.for" do + before :each do + @string = +"för striñg" + end + + after :each do + @buffer&.free + @buffer = nil + end + + context "without a block" do + it "copies string's contents, creating a separate read-only buffer" do + @buffer = IO::Buffer.for(@string) + + @buffer.size.should == @string.bytesize + @buffer.get_string.should == @string.b + + @string[0] = "d" + @buffer.get_string(0, 1).should == "f".b + + -> { @buffer.set_string("d") }.should raise_error(IO::Buffer::AccessError, "Buffer is not writable!") + end + + it "creates an external, read-only buffer" do + @buffer = IO::Buffer.for(@string) + + @buffer.should_not.internal? + @buffer.should_not.mapped? + @buffer.should.external? + + @buffer.should_not.empty? + @buffer.should_not.null? + + @buffer.should_not.shared? + @buffer.should_not.private? + @buffer.should.readonly? + + @buffer.should_not.locked? + @buffer.should.valid? + end + end + + context "with a block" do + it "returns the last value in the block" do + value = + IO::Buffer.for(@string) do |buffer| + buffer.size * 3 + end + value.should == @string.bytesize * 3 + end + + it "frees the buffer at the end of the block" do + IO::Buffer.for(@string) do |buffer| + @buffer = buffer + @buffer.should_not.null? + end + @buffer.should.null? + end + + context "if string is not frozen" do + it "creates a modifiable string-backed buffer" do + IO::Buffer.for(@string) do |buffer| + buffer.size.should == @string.bytesize + buffer.get_string.should == @string.b + + buffer.should_not.readonly? + + buffer.set_string("ghost shell") + @string.should == "ghost shellg" + end + end + + it "locks the original string to prevent modification" do + IO::Buffer.for(@string) do |_buffer| + -> { @string[0] = "t" }.should raise_error(RuntimeError, "can't modify string; temporarily locked") + end + @string[1] = "u" + @string.should == "fur striñg" + end + end + + context "if string is frozen" do + it "creates a read-only string-backed buffer" do + IO::Buffer.for(@string.freeze) do |buffer| + buffer.should.readonly? + + -> { buffer.set_string("ghost shell") }.should raise_error(IO::Buffer::AccessError, "Buffer is not writable!") + end + end + end + end +end diff --git a/spec/ruby/core/io/buffer/free_spec.rb b/spec/ruby/core/io/buffer/free_spec.rb index f3a491897849ae..9a141e11f6b728 100644 --- a/spec/ruby/core/io/buffer/free_spec.rb +++ b/spec/ruby/core/io/buffer/free_spec.rb @@ -49,17 +49,15 @@ end end - ruby_version_is "3.3" do - context "with a String-backed buffer created with .string" do - it "disassociates the buffer from the string and nullifies the buffer" do - string = - IO::Buffer.string(4) do |buffer| - buffer.set_string("meat") - buffer.free - buffer.null?.should be_true - end - string.should == "meat" - end + context "with a String-backed buffer created with .string" do + it "disassociates the buffer from the string and nullifies the buffer" do + string = + IO::Buffer.string(4) do |buffer| + buffer.set_string("meat") + buffer.free + buffer.null?.should be_true + end + string.should == "meat" end end diff --git a/spec/ruby/core/io/buffer/initialize_spec.rb b/spec/ruby/core/io/buffer/initialize_spec.rb index c86d1e7f1d634a..90b501f53d9a92 100644 --- a/spec/ruby/core/io/buffer/initialize_spec.rb +++ b/spec/ruby/core/io/buffer/initialize_spec.rb @@ -14,14 +14,18 @@ it "creates a buffer with default state" do @buffer = IO::Buffer.new + + @buffer.should_not.external? + @buffer.should_not.shared? + @buffer.should_not.private? @buffer.should_not.readonly? @buffer.should_not.empty? @buffer.should_not.null? - # This is run-time state, set by #locked. @buffer.should_not.locked? + @buffer.should.valid? end context "with size argument" do @@ -29,25 +33,24 @@ size = IO::Buffer::PAGE_SIZE - 1 @buffer = IO::Buffer.new(size) @buffer.size.should == size + @buffer.should_not.empty? + @buffer.should.internal? @buffer.should_not.mapped? - @buffer.should_not.empty? end it "creates a new mapped buffer if size is greater than or equal to IO::Buffer::PAGE_SIZE" do size = IO::Buffer::PAGE_SIZE @buffer = IO::Buffer.new(size) @buffer.size.should == size + @buffer.should_not.empty? + @buffer.should_not.internal? @buffer.should.mapped? - @buffer.should_not.empty? end it "creates a null buffer if size is 0" do @buffer = IO::Buffer.new(0) - @buffer.size.should.zero? - @buffer.should_not.internal? - @buffer.should_not.mapped? @buffer.should.null? @buffer.should.empty? end @@ -77,27 +80,40 @@ @buffer.should_not.empty? end + it "allows extra flags" do + @buffer = IO::Buffer.new(10, IO::Buffer::INTERNAL | IO::Buffer::SHARED | IO::Buffer::READONLY) + @buffer.should.internal? + @buffer.should.shared? + @buffer.should.readonly? + end + + it "ignores flags if size is 0" do + @buffer = IO::Buffer.new(0, 0xffff) + @buffer.should.null? + @buffer.should.empty? + + @buffer.should_not.internal? + @buffer.should_not.mapped? + @buffer.should_not.external? + + @buffer.should_not.shared? + @buffer.should_not.readonly? + + @buffer.should_not.locked? + @buffer.should.valid? + end + it "raises IO::Buffer::AllocationError if neither IO::Buffer::MAPPED nor IO::Buffer::INTERNAL is given" do -> { IO::Buffer.new(10, IO::Buffer::READONLY) }.should raise_error(IO::Buffer::AllocationError, "Could not allocate buffer!") -> { IO::Buffer.new(10, 0) }.should raise_error(IO::Buffer::AllocationError, "Could not allocate buffer!") end - ruby_version_is "3.3" do - it "raises ArgumentError if flags is negative" do - -> { IO::Buffer.new(10, -1) }.should raise_error(ArgumentError, "Flags can't be negative!") - end - end - - ruby_version_is ""..."3.3" do - it "raises IO::Buffer::AllocationError with non-Integer flags" do - -> { IO::Buffer.new(10, 0.0) }.should raise_error(IO::Buffer::AllocationError, "Could not allocate buffer!") - end + it "raises ArgumentError if flags is negative" do + -> { IO::Buffer.new(10, -1) }.should raise_error(ArgumentError, "Flags can't be negative!") end - ruby_version_is "3.3" do - it "raises TypeError with non-Integer flags" do - -> { IO::Buffer.new(10, 0.0) }.should raise_error(TypeError, "not an Integer") - end + it "raises TypeError with non-Integer flags" do + -> { IO::Buffer.new(10, 0.0) }.should raise_error(TypeError, "not an Integer") end end end diff --git a/spec/ruby/core/io/buffer/internal_spec.rb b/spec/ruby/core/io/buffer/internal_spec.rb index 409699cc3c9230..40dc633d5d7fd0 100644 --- a/spec/ruby/core/io/buffer/internal_spec.rb +++ b/spec/ruby/core/io/buffer/internal_spec.rb @@ -6,103 +6,18 @@ @buffer = nil end - context "with a buffer created with .new" do - it "is true for an internal buffer" do - @buffer = IO::Buffer.new(4) - @buffer.internal?.should be_true - end - - it "is false for a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.internal?.should be_false - end - end - - context "with a file-backed buffer created with .map" do - it "is false for a regular mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.internal?.should be_false - end - end - - ruby_version_is "3.3" do - it "is false for a private mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY | IO::Buffer::PRIVATE) - @buffer.internal?.should be_false - end - end - end - end - - context "with a String-backed buffer created with .for" do - it "is false for a buffer created without a block" do - @buffer = IO::Buffer.for("test") - @buffer.internal?.should be_false - end - - it "is false for a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.internal?.should be_false - end - end + it "is true for an internally-allocated buffer" do + @buffer = IO::Buffer.new(12) + @buffer.internal?.should be_true end - ruby_version_is "3.3" do - context "with a String-backed buffer created with .string" do - it "is false" do - IO::Buffer.string(4) do |buffer| - buffer.internal?.should be_false - end - end - end + it "is false for an externally-allocated buffer" do + @buffer = IO::Buffer.new(12, IO::Buffer::MAPPED) + @buffer.internal?.should be_false end - # Always false for slices - context "with a slice of a buffer" do - context "created with .new" do - it "is false when slicing an internal buffer" do - @buffer = IO::Buffer.new(4) - @buffer.slice.internal?.should be_false - end - - it "is false when slicing a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.slice.internal?.should be_false - end - end - - context "created with .map" do - it "is false" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.slice.internal?.should be_false - end - end - end - - context "created with .for" do - it "is false when slicing a buffer created without a block" do - @buffer = IO::Buffer.for("test") - @buffer.slice.internal?.should be_false - end - - it "is false when slicing a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.slice.internal?.should be_false - end - end - end - - ruby_version_is "3.3" do - context "created with .string" do - it "is false" do - IO::Buffer.string(4) do |buffer| - buffer.slice.internal?.should be_false - end - end - end - end + it "is false for a null buffer" do + @buffer = IO::Buffer.new(0) + @buffer.internal?.should be_false end end diff --git a/spec/ruby/core/io/buffer/map_spec.rb b/spec/ruby/core/io/buffer/map_spec.rb new file mode 100644 index 00000000000000..4543c2d022a7ce --- /dev/null +++ b/spec/ruby/core/io/buffer/map_spec.rb @@ -0,0 +1,343 @@ +require_relative '../../../spec_helper' + +describe "IO::Buffer.map" do + before :all do + @tmp_files = [] + + @big_file_name = nil + @small_file_name = nil + end + + after :all do + @tmp_files.each {|file| File.delete(file)} + end + + def open_fixture + unless @small_file_name + @small_file_name = tmp("read_text.txt") + File.copy_stream(fixture(__dir__, "read_text.txt"), @small_file_name) + @tmp_files << @small_file_name + end + File.open(@small_file_name, "rb+") + end + + def open_big_file_fixture + unless @big_file_name + @big_file_name = tmp("big_file") + # Usually 4 kibibytes + 16 bytes + File.write(@big_file_name, "12345678" * (IO::Buffer::PAGE_SIZE / 8 + 2)) + @tmp_files << @big_file_name + end + File.open(@big_file_name, "rb+") + end + + after :each do + @buffer&.free + @buffer = nil + @file&.close + @file = nil + end + + it "creates a new buffer mapped from a file" do + @file = open_fixture + @buffer = IO::Buffer.map(@file) + + @buffer.size.should == 9 + @buffer.get_string.should == "abcâdef\n".b + end + + it "allows to close the file after creating buffer, retaining mapping" do + file = open_fixture + @buffer = IO::Buffer.map(file) + file.close + + @buffer.get_string.should == "abcâdef\n".b + end + + it "creates a mapped, external, shared buffer" do + @file = open_fixture + @buffer = IO::Buffer.map(@file) + + @buffer.should_not.internal? + @buffer.should.mapped? + @buffer.should.external? + + @buffer.should_not.empty? + @buffer.should_not.null? + + @buffer.should.shared? + @buffer.should_not.private? + @buffer.should_not.readonly? + + @buffer.should_not.locked? + @buffer.should.valid? + end + + platform_is_not :windows, :openbsd do + it "is shareable across processes" do + file_name = tmp("shared_buffer") + @file = File.open(file_name, "w+") + @file << "I'm private" + @file.rewind + @buffer = IO::Buffer.map(@file) + + IO.popen("-") do |child_pipe| + if child_pipe + # Synchronize on child's output. + child_pipe.readlines.first.chomp.should == @buffer.to_s + @buffer.get_string.should == "I'm shared!" + + @file.read.should == "I'm shared!" + else + @buffer.set_string("I'm shared!") + puts @buffer + end + ensure + child_pipe&.close + end + ensure + File.unlink(file_name) + end + end + + context "with an empty file" do + ruby_version_is "4.0" do + it "raises ArgumentError" do + file_name = tmp("empty.txt") + @file = File.open(file_name, "wb+") + @tmp_files << file_name + -> { IO::Buffer.map(@file) }.should raise_error(ArgumentError, "Invalid negative or zero file size!") + end + end + end + + context "with a file opened only for reading" do + it "raises a SystemCallError unless read-only" do + @file = File.open(fixture(__dir__, "read_text.txt"), "rb") + -> { IO::Buffer.map(@file) }.should raise_error(SystemCallError) + end + end + + context "with size argument" do + it "limits the buffer to the specified size in bytes, starting from the start of the file" do + @file = open_fixture + @buffer = IO::Buffer.map(@file, 4) + + @buffer.size.should == 4 + @buffer.get_string.should == "abc\xC3".b + end + + it "maps the whole file if size is nil" do + @file = open_fixture + @buffer = IO::Buffer.map(@file, nil) + + @buffer.size.should == 9 + end + + context "if size is 0" do + ruby_version_is "4.0" do + it "raises ArgumentError" do + @file = open_fixture + -> { IO::Buffer.map(@file, 0) }.should raise_error(ArgumentError, "Size can't be zero!") + end + end + end + + it "raises TypeError if size is not an Integer or nil" do + @file = open_fixture + -> { IO::Buffer.map(@file, "10") }.should raise_error(TypeError, "not an Integer") + -> { IO::Buffer.map(@file, 10.0) }.should raise_error(TypeError, "not an Integer") + end + + it "raises ArgumentError if size is negative" do + @file = open_fixture + -> { IO::Buffer.map(@file, -1) }.should raise_error(ArgumentError, "Size can't be negative!") + end + + ruby_version_is ""..."4.0" do + # May or may not cause a crash on access. + it "is undefined behavior if size is larger than file size" + end + + ruby_version_is "4.0" do + it "raises ArgumentError if size is larger than file size" do + @file = open_fixture + -> { IO::Buffer.map(@file, 8192) }.should raise_error(ArgumentError, "Size can't be larger than file size!") + end + end + end + + context "with size and offset arguments" do + # Neither Windows nor macOS have clear, stable behavior with non-zero offset. + # https://bugs.ruby-lang.org/issues/21700 + platform_is :linux do + context "if offset is an allowed value for system call" do + it "maps the span specified by size starting from the offset" do + @file = open_big_file_fixture + @buffer = IO::Buffer.map(@file, 14, IO::Buffer::PAGE_SIZE) + + @buffer.size.should == 14 + @buffer.get_string(0, 14).should == "12345678123456" + end + + context "if size is nil" do + ruby_version_is ""..."4.0" do + it "maps the rest of the file" do + @file = open_big_file_fixture + @buffer = IO::Buffer.map(@file, nil, IO::Buffer::PAGE_SIZE) + + @buffer.get_string(0, 1).should == "1" + end + + it "incorrectly sets buffer's size to file's full size" do + @file = open_big_file_fixture + @buffer = IO::Buffer.map(@file, nil, IO::Buffer::PAGE_SIZE) + + @buffer.size.should == @file.size + end + end + + ruby_version_is "4.0" do + it "maps the rest of the file" do + @file = open_big_file_fixture + @buffer = IO::Buffer.map(@file, nil, IO::Buffer::PAGE_SIZE) + + @buffer.get_string(0, 1).should == "1" + end + + it "sets buffer's size to file's remaining size" do + @file = open_big_file_fixture + @buffer = IO::Buffer.map(@file, nil, IO::Buffer::PAGE_SIZE) + + @buffer.size.should == (@file.size - IO::Buffer::PAGE_SIZE) + end + end + end + end + end + + it "maps the file from the start if offset is 0" do + @file = open_fixture + @buffer = IO::Buffer.map(@file, 4, 0) + + @buffer.size.should == 4 + @buffer.get_string.should == "abc\xC3".b + end + + ruby_version_is ""..."4.0" do + # May or may not cause a crash on access. + it "is undefined behavior if offset+size is larger than file size" + end + + ruby_version_is "4.0" do + it "raises ArgumentError if offset+size is larger than file size" do + @file = open_big_file_fixture + -> { IO::Buffer.map(@file, 17, IO::Buffer::PAGE_SIZE) }.should raise_error(ArgumentError, "Offset too large!") + ensure + # Windows requires the file to be closed before deletion. + @file.close unless @file.closed? + end + end + + it "raises TypeError if offset is not convertible to Integer" do + @file = open_fixture + -> { IO::Buffer.map(@file, 4, "4096") }.should raise_error(TypeError, /no implicit conversion/) + -> { IO::Buffer.map(@file, 4, nil) }.should raise_error(TypeError, /no implicit conversion/) + end + + ruby_version_is "4.0" do + it "raises ArgumentError if offset is negative" do + @file = open_fixture + -> { IO::Buffer.map(@file, 4, -1) }.should raise_error(ArgumentError, "Offset can't be negative!") + end + end + end + + context "with flags argument" do + context "when READONLY flag is specified" do + it "sets readonly flag on the buffer, allowing only reads" do + @file = open_fixture + @buffer = IO::Buffer.map(@file, nil, 0, IO::Buffer::READONLY) + + @buffer.should.readonly? + + @buffer.get_string.should == "abc\xC3\xA2def\n".b + end + + it "allows mapping read-only files" do + @file = File.open(fixture(__dir__, "read_text.txt"), "rb") + @buffer = IO::Buffer.map(@file, nil, 0, IO::Buffer::READONLY) + + @buffer.should.readonly? + + @buffer.get_string.should == "abc\xC3\xA2def\n".b + end + + it "causes IO::Buffer::AccessError on write" do + @file = open_fixture + @buffer = IO::Buffer.map(@file, nil, 0, IO::Buffer::READONLY) + + -> { @buffer.set_string("test") }.should raise_error(IO::Buffer::AccessError, "Buffer is not writable!") + end + end + + context "when PRIVATE is specified" do + it "sets private flag on the buffer, making it freely modifiable" do + @file = open_fixture + @buffer = IO::Buffer.map(@file, nil, 0, IO::Buffer::PRIVATE) + + @buffer.should.private? + @buffer.should_not.shared? + @buffer.should_not.external? + + @buffer.get_string.should == "abc\xC3\xA2def\n".b + @buffer.set_string("test12345") + @buffer.get_string.should == "test12345".b + + @file.read.should == "abcâdef\n".b + end + + it "allows mapping read-only files and modifying the buffer" do + @file = File.open(fixture(__dir__, "read_text.txt"), "rb") + @buffer = IO::Buffer.map(@file, nil, 0, IO::Buffer::PRIVATE) + + @buffer.should.private? + @buffer.should_not.shared? + @buffer.should_not.external? + + @buffer.get_string.should == "abc\xC3\xA2def\n".b + @buffer.set_string("test12345") + @buffer.get_string.should == "test12345".b + + @file.read.should == "abcâdef\n".b + end + + platform_is_not :windows do + it "is not shared across processes" do + file_name = tmp("shared_buffer") + @file = File.open(file_name, "w+") + @file << "I'm private" + @file.rewind + @buffer = IO::Buffer.map(@file, nil, 0, IO::Buffer::PRIVATE) + + IO.popen("-") do |child_pipe| + if child_pipe + # Synchronize on child's output. + child_pipe.readlines.first.chomp.should == @buffer.to_s + @buffer.get_string.should == "I'm private" + + @file.read.should == "I'm private" + else + @buffer.set_string("I'm shared!") + puts @buffer + end + ensure + child_pipe&.close + end + ensure + File.unlink(file_name) + end + end + end + end +end diff --git a/spec/ruby/core/io/buffer/mapped_spec.rb b/spec/ruby/core/io/buffer/mapped_spec.rb index b3610207ffb100..13dc548ed26e72 100644 --- a/spec/ruby/core/io/buffer/mapped_spec.rb +++ b/spec/ruby/core/io/buffer/mapped_spec.rb @@ -6,103 +6,18 @@ @buffer = nil end - context "with a buffer created with .new" do - it "is false for an internal buffer" do - @buffer = IO::Buffer.new(4) - @buffer.mapped?.should be_false - end - - it "is true for a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.mapped?.should be_true - end - end - - context "with a file-backed buffer created with .map" do - it "is true for a regular mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.mapped?.should be_true - end - end - - ruby_version_is "3.3" do - it "is true for a private mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY | IO::Buffer::PRIVATE) - @buffer.mapped?.should be_true - end - end - end - end - - context "with a String-backed buffer created with .for" do - it "is false for a buffer created without a block" do - @buffer = IO::Buffer.for("test") - @buffer.mapped?.should be_false - end - - it "is false for a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.mapped?.should be_false - end - end + it "is true for a buffer with mapped memory" do + @buffer = IO::Buffer.new(12, IO::Buffer::MAPPED) + @buffer.mapped?.should be_true end - ruby_version_is "3.3" do - context "with a String-backed buffer created with .string" do - it "is false" do - IO::Buffer.string(4) do |buffer| - buffer.mapped?.should be_false - end - end - end + it "is false for a buffer with non-mapped memory" do + @buffer = IO::Buffer.for("string") + @buffer.mapped?.should be_false end - # Always false for slices - context "with a slice of a buffer" do - context "created with .new" do - it "is false when slicing an internal buffer" do - @buffer = IO::Buffer.new(4) - @buffer.slice.mapped?.should be_false - end - - it "is false when slicing a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.slice.mapped?.should be_false - end - end - - context "created with .map" do - it "is false" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.slice.mapped?.should be_false - end - end - end - - context "created with .for" do - it "is false when slicing a buffer created without a block" do - @buffer = IO::Buffer.for("test") - @buffer.slice.mapped?.should be_false - end - - it "is false when slicing a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.slice.mapped?.should be_false - end - end - end - - ruby_version_is "3.3" do - context "created with .string" do - it "is false" do - IO::Buffer.string(4) do |buffer| - buffer.slice.mapped?.should be_false - end - end - end - end + it "is false for a null buffer" do + @buffer = IO::Buffer.new(0) + @buffer.mapped?.should be_false end end diff --git a/spec/ruby/core/io/buffer/null_spec.rb b/spec/ruby/core/io/buffer/null_spec.rb index 3fb1144d0ed66f..3a0e7f841bf94d 100644 --- a/spec/ruby/core/io/buffer/null_spec.rb +++ b/spec/ruby/core/io/buffer/null_spec.rb @@ -14,11 +14,9 @@ @buffer.null?.should be_false end - ruby_version_is "3.3" do - it "is false for a 0-length String-backed buffer created with .string" do - IO::Buffer.string(0) do |buffer| - buffer.null?.should be_false - end + it "is false for a 0-length String-backed buffer created with .string" do + IO::Buffer.string(0) do |buffer| + buffer.null?.should be_false end end diff --git a/spec/ruby/core/io/buffer/private_spec.rb b/spec/ruby/core/io/buffer/private_spec.rb index 7aa308997b1939..86b7a7a0d0b391 100644 --- a/spec/ruby/core/io/buffer/private_spec.rb +++ b/spec/ruby/core/io/buffer/private_spec.rb @@ -1,111 +1,23 @@ require_relative '../../../spec_helper' -ruby_version_is "3.3" do - describe "IO::Buffer#private?" do - after :each do - @buffer&.free - @buffer = nil - end - - context "with a buffer created with .new" do - it "is false for an internal buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::INTERNAL) - @buffer.private?.should be_false - end - - it "is false for a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.private?.should be_false - end - end - - context "with a file-backed buffer created with .map" do - it "is false for a regular mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.private?.should be_false - end - end - - it "is true for a private mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY | IO::Buffer::PRIVATE) - @buffer.private?.should be_true - end - end - end - - context "with a String-backed buffer created with .for" do - it "is false for a buffer created without a block" do - @buffer = IO::Buffer.for("test") - @buffer.private?.should be_false - end - - it "is false for a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.private?.should be_false - end - end - end - - context "with a String-backed buffer created with .string" do - it "is false" do - IO::Buffer.string(4) do |buffer| - buffer.private?.should be_false - end - end - end - - # Always false for slices - context "with a slice of a buffer" do - context "created with .new" do - it "is false when slicing an internal buffer" do - @buffer = IO::Buffer.new(4) - @buffer.slice.private?.should be_false - end - - it "is false when slicing a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.slice.private?.should be_false - end - end - - context "created with .map" do - it "is false when slicing a regular file-backed buffer" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.slice.private?.should be_false - end - end - - it "is false when slicing a private file-backed buffer" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY | IO::Buffer::PRIVATE) - @buffer.slice.private?.should be_false - end - end - end +describe "IO::Buffer#private?" do + after :each do + @buffer&.free + @buffer = nil + end - context "created with .for" do - it "is false when slicing a buffer created without a block" do - @buffer = IO::Buffer.for("test") - @buffer.slice.private?.should be_false - end + it "is true for a buffer created with PRIVATE flag" do + @buffer = IO::Buffer.new(12, IO::Buffer::INTERNAL | IO::Buffer::PRIVATE) + @buffer.private?.should be_true + end - it "is false when slicing a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.slice.private?.should be_false - end - end - end + it "is false for a buffer created without PRIVATE flag" do + @buffer = IO::Buffer.new(12, IO::Buffer::INTERNAL) + @buffer.private?.should be_false + end - context "created with .string" do - it "is false" do - IO::Buffer.string(4) do |buffer| - buffer.slice.private?.should be_false - end - end - end - end + it "is false for a null buffer" do + @buffer = IO::Buffer.new(0) + @buffer.private?.should be_false end end diff --git a/spec/ruby/core/io/buffer/readonly_spec.rb b/spec/ruby/core/io/buffer/readonly_spec.rb index 0014a876ed743e..2fc7d340b77b80 100644 --- a/spec/ruby/core/io/buffer/readonly_spec.rb +++ b/spec/ruby/core/io/buffer/readonly_spec.rb @@ -6,138 +6,23 @@ @buffer = nil end - context "with a buffer created with .new" do - it "is false for an internal buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::INTERNAL) - @buffer.readonly?.should be_false - end - - it "is false for a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.readonly?.should be_false - end - end - - context "with a file-backed buffer created with .map" do - it "is false for a writable mapping" do - File.open(__FILE__, "r+") do |file| - @buffer = IO::Buffer.map(file) - @buffer.readonly?.should be_false - end - end - - it "is true for a readonly mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.readonly?.should be_true - end - end - - ruby_version_is "3.3" do - it "is false for a private mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::PRIVATE) - @buffer.readonly?.should be_false - end - end - end + it "is true for a buffer created with READONLY flag" do + @buffer = IO::Buffer.new(12, IO::Buffer::INTERNAL | IO::Buffer::READONLY) + @buffer.readonly?.should be_true end - context "with a String-backed buffer created with .for" do - it "is true for a buffer created without a block" do - @buffer = IO::Buffer.for(+"test") - @buffer.readonly?.should be_true - end - - it "is false for a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.readonly?.should be_false - end - end - - it "is true for a buffer created with a block from a frozen string" do - IO::Buffer.for(-"test") do |buffer| - buffer.readonly?.should be_true - end - end + it "is true for a buffer that is non-writable" do + @buffer = IO::Buffer.for("string") + @buffer.readonly?.should be_true end - ruby_version_is "3.3" do - context "with a String-backed buffer created with .string" do - it "is false" do - IO::Buffer.string(4) do |buffer| - buffer.readonly?.should be_false - end - end - end + it "is false for a modifiable buffer" do + @buffer = IO::Buffer.new(12) + @buffer.readonly?.should be_false end - # This seems to be the only flag propagated from the source buffer to the slice. - context "with a slice of a buffer" do - context "created with .new" do - it "is false when slicing an internal buffer" do - @buffer = IO::Buffer.new(4) - @buffer.slice.readonly?.should be_false - end - - it "is false when slicing a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.slice.readonly?.should be_false - end - end - - context "created with .map" do - it "is false when slicing a read-write file-backed buffer" do - File.open(__FILE__, "r+") do |file| - @buffer = IO::Buffer.map(file) - @buffer.slice.readonly?.should be_false - end - end - - it "is true when slicing a readonly file-backed buffer" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.slice.readonly?.should be_true - end - end - - ruby_version_is "3.3" do - it "is false when slicing a private file-backed buffer" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::PRIVATE) - @buffer.slice.readonly?.should be_false - end - end - end - end - - context "created with .for" do - it "is true when slicing a buffer created without a block" do - @buffer = IO::Buffer.for(+"test") - @buffer.slice.readonly?.should be_true - end - - it "is false when slicing a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.slice.readonly?.should be_false - end - end - - it "is true when slicing a buffer created with a block from a frozen string" do - IO::Buffer.for(-"test") do |buffer| - buffer.slice.readonly?.should be_true - end - end - end - - ruby_version_is "3.3" do - context "created with .string" do - it "is false" do - IO::Buffer.string(4) do |buffer| - buffer.slice.readonly?.should be_false - end - end - end - end + it "is false for a null buffer" do + @buffer = IO::Buffer.new(0) + @buffer.readonly?.should be_false end end diff --git a/spec/ruby/core/io/buffer/resize_spec.rb b/spec/ruby/core/io/buffer/resize_spec.rb index 0da3a23356c0dc..a5e80439dac653 100644 --- a/spec/ruby/core/io/buffer/resize_spec.rb +++ b/spec/ruby/core/io/buffer/resize_spec.rb @@ -44,17 +44,15 @@ end end - ruby_version_is "3.3" do - it "resizes private buffer, discarding excess contents" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::PRIVATE) - @buffer.resize(10) - @buffer.size.should == 10 - @buffer.get_string.should == "require_re" - @buffer.resize(12) - @buffer.size.should == 12 - @buffer.get_string.should == "require_re\0\0" - end + it "resizes private buffer, discarding excess contents" do + File.open(__FILE__, "r") do |file| + @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::PRIVATE) + @buffer.resize(10) + @buffer.size.should == 10 + @buffer.get_string.should == "require_re" + @buffer.resize(12) + @buffer.size.should == 12 + @buffer.get_string.should == "require_re\0\0" end end end @@ -76,12 +74,10 @@ end end - ruby_version_is "3.3" do - context "with a String-backed buffer created with .string" do - it "disallows resizing, raising IO::Buffer::AccessError" do - IO::Buffer.string(4) do |buffer| - -> { buffer.resize(10) }.should raise_error(IO::Buffer::AccessError, "Cannot resize external buffer!") - end + context "with a String-backed buffer created with .string" do + it "disallows resizing, raising IO::Buffer::AccessError" do + IO::Buffer.string(4) do |buffer| + -> { buffer.resize(10) }.should raise_error(IO::Buffer::AccessError, "Cannot resize external buffer!") end end end diff --git a/spec/ruby/core/io/buffer/shared/null_and_empty.rb b/spec/ruby/core/io/buffer/shared/null_and_empty.rb index c8fe9e5e46ca9b..2ff5cf8f410db7 100644 --- a/spec/ruby/core/io/buffer/shared/null_and_empty.rb +++ b/spec/ruby/core/io/buffer/shared/null_and_empty.rb @@ -21,11 +21,9 @@ @buffer.send(@method).should be_false end - ruby_version_is "3.3" do - it "is false for a non-empty String-backed buffer created with .string" do - IO::Buffer.string(4) do |buffer| - buffer.send(@method).should be_false - end + it "is false for a non-empty String-backed buffer created with .string" do + IO::Buffer.string(4) do |buffer| + buffer.send(@method).should be_false end end diff --git a/spec/ruby/core/io/buffer/shared_spec.rb b/spec/ruby/core/io/buffer/shared_spec.rb index f2a638cf39f9b1..be8c29471af880 100644 --- a/spec/ruby/core/io/buffer/shared_spec.rb +++ b/spec/ruby/core/io/buffer/shared_spec.rb @@ -6,112 +6,30 @@ @buffer = nil end - context "with a buffer created with .new" do - it "is false for an internal buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::INTERNAL) - @buffer.shared?.should be_false - end - - it "is false for a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.shared?.should be_false - end - end - - context "with a file-backed buffer created with .map" do - it "is true for a regular mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.shared?.should be_true - end - end - - ruby_version_is "3.3" do - it "is false for a private mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY | IO::Buffer::PRIVATE) - @buffer.shared?.should be_false - end - end - end + it "is true for a buffer created with SHARED flag" do + @buffer = IO::Buffer.new(12, IO::Buffer::INTERNAL | IO::Buffer::SHARED) + @buffer.shared?.should be_true end - context "with a String-backed buffer created with .for" do - it "is false for a buffer created without a block" do - @buffer = IO::Buffer.for("test") - @buffer.shared?.should be_false - end - - it "is false for a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.shared?.should be_false - end - end + it "is true for a non-private buffer created with .map" do + path = tmp("read_text.txt") + File.copy_stream(fixture(__dir__, "read_text.txt"), path) + file = File.open(path, "r+") + @buffer = IO::Buffer.map(file) + @buffer.shared?.should be_true + ensure + @buffer.free + file.close + File.unlink(path) end - ruby_version_is "3.3" do - context "with a String-backed buffer created with .string" do - it "is false" do - IO::Buffer.string(4) do |buffer| - buffer.shared?.should be_false - end - end - end + it "is false for an unshared buffer" do + @buffer = IO::Buffer.new(12) + @buffer.shared?.should be_false end - # Always false for slices - context "with a slice of a buffer" do - context "created with .new" do - it "is false when slicing an internal buffer" do - @buffer = IO::Buffer.new(4) - @buffer.slice.shared?.should be_false - end - - it "is false when slicing a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.slice.shared?.should be_false - end - end - - context "created with .map" do - it "is false when slicing a regular file-backed buffer" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.slice.shared?.should be_false - end - end - - ruby_version_is "3.3" do - it "is false when slicing a private file-backed buffer" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY | IO::Buffer::PRIVATE) - @buffer.slice.shared?.should be_false - end - end - end - end - - context "created with .for" do - it "is false when slicing a buffer created without a block" do - @buffer = IO::Buffer.for("test") - @buffer.slice.shared?.should be_false - end - - it "is false when slicing a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.slice.shared?.should be_false - end - end - end - - ruby_version_is "3.3" do - context "created with .string" do - it "is false" do - IO::Buffer.string(4) do |buffer| - buffer.slice.shared?.should be_false - end - end - end - end + it "is false for a null buffer" do + @buffer = IO::Buffer.new(0) + @buffer.shared?.should be_false end end diff --git a/spec/ruby/core/io/buffer/string_spec.rb b/spec/ruby/core/io/buffer/string_spec.rb new file mode 100644 index 00000000000000..bc7a73075e3948 --- /dev/null +++ b/spec/ruby/core/io/buffer/string_spec.rb @@ -0,0 +1,62 @@ +require_relative '../../../spec_helper' + +describe "IO::Buffer.string" do + it "creates a modifiable buffer for the duration of the block" do + IO::Buffer.string(7) do |buffer| + @buffer = buffer + + buffer.size.should == 7 + buffer.get_string.should == "\0\0\0\0\0\0\0".b + + buffer.set_string("test") + buffer.get_string.should == "test\0\0\0" + end + @buffer.should.null? + end + + it "returns contents of the buffer as a binary string" do + string = + IO::Buffer.string(7) do |buffer| + buffer.set_string("ä test") + end + string.should == "\xC3\xA4 test".b + end + + it "creates an external buffer" do + IO::Buffer.string(8) do |buffer| + buffer.should_not.internal? + buffer.should_not.mapped? + buffer.should.external? + + buffer.should_not.empty? + buffer.should_not.null? + + buffer.should_not.shared? + buffer.should_not.private? + buffer.should_not.readonly? + + buffer.should_not.locked? + buffer.should.valid? + end + end + + it "returns an empty string if size is 0" do + string = + IO::Buffer.string(0) do |buffer| + buffer.size.should == 0 + end + string.should == "" + end + + it "raises ArgumentError if size is negative" do + -> { IO::Buffer.string(-1) {} }.should raise_error(ArgumentError, "negative string size (or size too big)") + end + + it "raises RangeError if size is too large" do + -> { IO::Buffer.string(2 ** 232) {} }.should raise_error(RangeError, /\Abignum too big to convert into [`']long'\z/) + end + + it "raises LocalJumpError if no block is given" do + -> { IO::Buffer.string(7) }.should raise_error(LocalJumpError, "no block given") + end +end diff --git a/spec/ruby/core/io/buffer/transfer_spec.rb b/spec/ruby/core/io/buffer/transfer_spec.rb index cb8c843ff24750..5b7b63e3339991 100644 --- a/spec/ruby/core/io/buffer/transfer_spec.rb +++ b/spec/ruby/core/io/buffer/transfer_spec.rb @@ -60,17 +60,15 @@ end end - ruby_version_is "3.3" do - context "with a String-backed buffer created with .string" do - it "transfers memory to a new buffer, breaking the transaction by nullifying the original" do - IO::Buffer.string(4) do |buffer| - info = buffer.to_s - @buffer = buffer.transfer - @buffer.to_s.should == info - buffer.null?.should be_true - end - @buffer.null?.should be_false + context "with a String-backed buffer created with .string" do + it "transfers memory to a new buffer, breaking the transaction by nullifying the original" do + IO::Buffer.string(4) do |buffer| + info = buffer.to_s + @buffer = buffer.transfer + @buffer.to_s.should == info + buffer.null?.should be_true end + @buffer.null?.should be_false end end diff --git a/spec/ruby/core/io/foreach_spec.rb b/spec/ruby/core/io/foreach_spec.rb index 6abe8901bac7a0..28d6fef7ae5079 100644 --- a/spec/ruby/core/io/foreach_spec.rb +++ b/spec/ruby/core/io/foreach_spec.rb @@ -47,14 +47,12 @@ end end - ruby_version_is "3.3" do - # https://bugs.ruby-lang.org/issues/19630 - it "warns about deprecation given a path with a pipe" do - cmd = "|echo ok" - -> { - IO.foreach(cmd).to_a - }.should complain(/IO process creation with a leading '\|'/) - end + # https://bugs.ruby-lang.org/issues/19630 + it "warns about deprecation given a path with a pipe" do + cmd = "|echo ok" + -> { + IO.foreach(cmd).to_a + }.should complain(/IO process creation with a leading '\|'/) end end end diff --git a/spec/ruby/core/io/gets_spec.rb b/spec/ruby/core/io/gets_spec.rb index ca64bf860e4148..0587fa07c43289 100644 --- a/spec/ruby/core/io/gets_spec.rb +++ b/spec/ruby/core/io/gets_spec.rb @@ -338,23 +338,11 @@ @io.gets.encoding.should == Encoding::BINARY end - ruby_version_is ''...'3.3' do - it "transcodes to internal encoding if the IO object's external encoding is BINARY" do - Encoding.default_external = Encoding::BINARY - Encoding.default_internal = Encoding::UTF_8 - @io = new_io @name, 'r' - @io.set_encoding Encoding::BINARY, Encoding::UTF_8 - @io.gets.encoding.should == Encoding::UTF_8 - end - end - - ruby_version_is '3.3' do - it "ignores the internal encoding if the IO object's external encoding is BINARY" do - Encoding.default_external = Encoding::BINARY - Encoding.default_internal = Encoding::UTF_8 - @io = new_io @name, 'r' - @io.set_encoding Encoding::BINARY, Encoding::UTF_8 - @io.gets.encoding.should == Encoding::BINARY - end + it "ignores the internal encoding if the IO object's external encoding is BINARY" do + Encoding.default_external = Encoding::BINARY + Encoding.default_internal = Encoding::UTF_8 + @io = new_io @name, 'r' + @io.set_encoding Encoding::BINARY, Encoding::UTF_8 + @io.gets.encoding.should == Encoding::BINARY end end diff --git a/spec/ruby/core/io/pread_spec.rb b/spec/ruby/core/io/pread_spec.rb index dc7bcedf3e5c2f..8f7d9b2521d9c9 100644 --- a/spec/ruby/core/io/pread_spec.rb +++ b/spec/ruby/core/io/pread_spec.rb @@ -1,140 +1,138 @@ # -*- encoding: utf-8 -*- require_relative '../../spec_helper' -guard -> { platform_is_not :windows or ruby_version_is "3.3" } do - describe "IO#pread" do - before :each do - @fname = tmp("io_pread.txt") - @contents = "1234567890" - touch(@fname) { |f| f.write @contents } - @file = File.open(@fname, "r+") - end - - after :each do - @file.close - rm_r @fname - end +describe "IO#pread" do + before :each do + @fname = tmp("io_pread.txt") + @contents = "1234567890" + touch(@fname) { |f| f.write @contents } + @file = File.open(@fname, "r+") + end - it "accepts a length, and an offset" do - @file.pread(4, 0).should == "1234" - @file.pread(3, 4).should == "567" - end + after :each do + @file.close + rm_r @fname + end - it "accepts a length, an offset, and an output buffer" do - buffer = +"foo" - @file.pread(3, 4, buffer).should.equal?(buffer) - buffer.should == "567" - end + it "accepts a length, and an offset" do + @file.pread(4, 0).should == "1234" + @file.pread(3, 4).should == "567" + end - it "shrinks the buffer in case of less bytes read" do - buffer = +"foo" - @file.pread(1, 0, buffer) - buffer.should == "1" - end + it "accepts a length, an offset, and an output buffer" do + buffer = +"foo" + @file.pread(3, 4, buffer).should.equal?(buffer) + buffer.should == "567" + end - it "grows the buffer in case of more bytes read" do - buffer = +"foo" - @file.pread(5, 0, buffer) - buffer.should == "12345" - end + it "shrinks the buffer in case of less bytes read" do + buffer = +"foo" + @file.pread(1, 0, buffer) + buffer.should == "1" + end - it "preserves the encoding of the given buffer" do - buffer = ''.encode(Encoding::ISO_8859_1) - @file.pread(10, 0, buffer) + it "grows the buffer in case of more bytes read" do + buffer = +"foo" + @file.pread(5, 0, buffer) + buffer.should == "12345" + end - buffer.encoding.should == Encoding::ISO_8859_1 - end + it "preserves the encoding of the given buffer" do + buffer = ''.encode(Encoding::ISO_8859_1) + @file.pread(10, 0, buffer) - it "does not advance the file pointer" do - @file.pread(4, 0).should == "1234" - @file.read.should == "1234567890" - end + buffer.encoding.should == Encoding::ISO_8859_1 + end - it "ignores the current offset" do - @file.pos = 3 - @file.pread(4, 0).should == "1234" - end + it "does not advance the file pointer" do + @file.pread(4, 0).should == "1234" + @file.read.should == "1234567890" + end - it "returns an empty string for maxlen = 0" do - @file.pread(0, 4).should == "" - end + it "ignores the current offset" do + @file.pos = 3 + @file.pread(4, 0).should == "1234" + end - it "returns a buffer for maxlen = 0 when buffer specified" do - buffer = +"foo" - @file.pread(0, 4, buffer).should.equal?(buffer) - buffer.should == "foo" - end + it "returns an empty string for maxlen = 0" do + @file.pread(0, 4).should == "" + end - it "ignores the offset for maxlen = 0, even if it is out of file bounds" do - @file.pread(0, 400).should == "" - end + it "returns a buffer for maxlen = 0 when buffer specified" do + buffer = +"foo" + @file.pread(0, 4, buffer).should.equal?(buffer) + buffer.should == "foo" + end - it "does not reset the buffer when reading with maxlen = 0" do - buffer = +"foo" - @file.pread(0, 4, buffer) - buffer.should == "foo" + it "ignores the offset for maxlen = 0, even if it is out of file bounds" do + @file.pread(0, 400).should == "" + end - @file.pread(0, 400, buffer) - buffer.should == "foo" - end + it "does not reset the buffer when reading with maxlen = 0" do + buffer = +"foo" + @file.pread(0, 4, buffer) + buffer.should == "foo" - it "converts maxlen to Integer using #to_int" do - maxlen = mock('maxlen') - maxlen.should_receive(:to_int).and_return(4) - @file.pread(maxlen, 0).should == "1234" - end + @file.pread(0, 400, buffer) + buffer.should == "foo" + end - it "converts offset to Integer using #to_int" do - offset = mock('offset') - offset.should_receive(:to_int).and_return(0) - @file.pread(4, offset).should == "1234" - end + it "converts maxlen to Integer using #to_int" do + maxlen = mock('maxlen') + maxlen.should_receive(:to_int).and_return(4) + @file.pread(maxlen, 0).should == "1234" + end - it "converts a buffer to String using to_str" do - buffer = mock('buffer') - buffer.should_receive(:to_str).at_least(1).and_return(+"foo") - @file.pread(4, 0, buffer) - buffer.should_not.is_a?(String) - buffer.to_str.should == "1234" - end + it "converts offset to Integer using #to_int" do + offset = mock('offset') + offset.should_receive(:to_int).and_return(0) + @file.pread(4, offset).should == "1234" + end - it "raises TypeError if maxlen is not an Integer and cannot be coerced into Integer" do - maxlen = Object.new - -> { @file.pread(maxlen, 0) }.should raise_error(TypeError, 'no implicit conversion of Object into Integer') - end + it "converts a buffer to String using to_str" do + buffer = mock('buffer') + buffer.should_receive(:to_str).at_least(1).and_return(+"foo") + @file.pread(4, 0, buffer) + buffer.should_not.is_a?(String) + buffer.to_str.should == "1234" + end - it "raises TypeError if offset is not an Integer and cannot be coerced into Integer" do - offset = Object.new - -> { @file.pread(4, offset) }.should raise_error(TypeError, 'no implicit conversion of Object into Integer') - end + it "raises TypeError if maxlen is not an Integer and cannot be coerced into Integer" do + maxlen = Object.new + -> { @file.pread(maxlen, 0) }.should raise_error(TypeError, 'no implicit conversion of Object into Integer') + end - it "raises ArgumentError for negative values of maxlen" do - -> { @file.pread(-4, 0) }.should raise_error(ArgumentError, 'negative string size (or size too big)') - end + it "raises TypeError if offset is not an Integer and cannot be coerced into Integer" do + offset = Object.new + -> { @file.pread(4, offset) }.should raise_error(TypeError, 'no implicit conversion of Object into Integer') + end - it "raised Errno::EINVAL for negative values of offset" do - -> { @file.pread(4, -1) }.should raise_error(Errno::EINVAL, /Invalid argument/) - end + it "raises ArgumentError for negative values of maxlen" do + -> { @file.pread(-4, 0) }.should raise_error(ArgumentError, 'negative string size (or size too big)') + end - it "raises TypeError if the buffer is not a String and cannot be coerced into String" do - buffer = Object.new - -> { @file.pread(4, 0, buffer) }.should raise_error(TypeError, 'no implicit conversion of Object into String') - end + it "raised Errno::EINVAL for negative values of offset" do + -> { @file.pread(4, -1) }.should raise_error(Errno::EINVAL, /Invalid argument/) + end - it "raises EOFError if end-of-file is reached" do - -> { @file.pread(1, 10) }.should raise_error(EOFError) - end + it "raises TypeError if the buffer is not a String and cannot be coerced into String" do + buffer = Object.new + -> { @file.pread(4, 0, buffer) }.should raise_error(TypeError, 'no implicit conversion of Object into String') + end - it "raises IOError when file is not open in read mode" do - File.open(@fname, "w") do |file| - -> { file.pread(1, 1) }.should raise_error(IOError) - end - end + it "raises EOFError if end-of-file is reached" do + -> { @file.pread(1, 10) }.should raise_error(EOFError) + end - it "raises IOError when file is closed" do - file = File.open(@fname, "r+") - file.close + it "raises IOError when file is not open in read mode" do + File.open(@fname, "w") do |file| -> { file.pread(1, 1) }.should raise_error(IOError) end end + + it "raises IOError when file is closed" do + file = File.open(@fname, "r+") + file.close + -> { file.pread(1, 1) }.should raise_error(IOError) + end end diff --git a/spec/ruby/core/io/pwrite_spec.rb b/spec/ruby/core/io/pwrite_spec.rb index 2bc508b37d1660..fd0b6cf380c463 100644 --- a/spec/ruby/core/io/pwrite_spec.rb +++ b/spec/ruby/core/io/pwrite_spec.rb @@ -1,69 +1,67 @@ # -*- encoding: utf-8 -*- require_relative '../../spec_helper' -guard -> { platform_is_not :windows or ruby_version_is "3.3" } do - describe "IO#pwrite" do - before :each do - @fname = tmp("io_pwrite.txt") - @file = File.open(@fname, "w+") - end +describe "IO#pwrite" do + before :each do + @fname = tmp("io_pwrite.txt") + @file = File.open(@fname, "w+") + end - after :each do - @file.close - rm_r @fname - end + after :each do + @file.close + rm_r @fname + end - it "returns the number of bytes written" do - @file.pwrite("foo", 0).should == 3 - end + it "returns the number of bytes written" do + @file.pwrite("foo", 0).should == 3 + end - it "accepts a string and an offset" do - @file.pwrite("foo", 2) - @file.pread(3, 2).should == "foo" - end + it "accepts a string and an offset" do + @file.pwrite("foo", 2) + @file.pread(3, 2).should == "foo" + end - it "does not advance the pointer in the file" do - @file.pwrite("bar", 3) - @file.write("foo") - @file.pread(6, 0).should == "foobar" - end + it "does not advance the pointer in the file" do + @file.pwrite("bar", 3) + @file.write("foo") + @file.pread(6, 0).should == "foobar" + end - it "calls #to_s on the object to be written" do - object = mock("to_s") - object.should_receive(:to_s).and_return("foo") - @file.pwrite(object, 0) - @file.pread(3, 0).should == "foo" - end + it "calls #to_s on the object to be written" do + object = mock("to_s") + object.should_receive(:to_s).and_return("foo") + @file.pwrite(object, 0) + @file.pread(3, 0).should == "foo" + end - it "calls #to_int on the offset" do - offset = mock("to_int") - offset.should_receive(:to_int).and_return(2) - @file.pwrite("foo", offset) - @file.pread(3, 2).should == "foo" - end + it "calls #to_int on the offset" do + offset = mock("to_int") + offset.should_receive(:to_int).and_return(2) + @file.pwrite("foo", offset) + @file.pread(3, 2).should == "foo" + end - it "raises IOError when file is not open in write mode" do - File.open(@fname, "r") do |file| - -> { file.pwrite("foo", 1) }.should raise_error(IOError, "not opened for writing") - end + it "raises IOError when file is not open in write mode" do + File.open(@fname, "r") do |file| + -> { file.pwrite("foo", 1) }.should raise_error(IOError, "not opened for writing") end + end - it "raises IOError when file is closed" do - file = File.open(@fname, "w+") - file.close - -> { file.pwrite("foo", 1) }.should raise_error(IOError, "closed stream") - end + it "raises IOError when file is closed" do + file = File.open(@fname, "w+") + file.close + -> { file.pwrite("foo", 1) }.should raise_error(IOError, "closed stream") + end - it "raises a NoMethodError if object does not respond to #to_s" do - -> { - @file.pwrite(BasicObject.new, 0) - }.should raise_error(NoMethodError, /undefined method [`']to_s'/) - end + it "raises a NoMethodError if object does not respond to #to_s" do + -> { + @file.pwrite(BasicObject.new, 0) + }.should raise_error(NoMethodError, /undefined method [`']to_s'/) + end - it "raises a TypeError if the offset cannot be converted to an Integer" do - -> { - @file.pwrite("foo", Object.new) - }.should raise_error(TypeError, "no implicit conversion of Object into Integer") - end + it "raises a TypeError if the offset cannot be converted to an Integer" do + -> { + @file.pwrite("foo", Object.new) + }.should raise_error(TypeError, "no implicit conversion of Object into Integer") end end diff --git a/spec/ruby/core/io/read_spec.rb b/spec/ruby/core/io/read_spec.rb index 988ec2ce30df25..dfb42e09db7681 100644 --- a/spec/ruby/core/io/read_spec.rb +++ b/spec/ruby/core/io/read_spec.rb @@ -65,15 +65,6 @@ end platform_is_not :windows do - ruby_version_is ""..."3.3" do - it "uses an :open_args option" do - string = IO.read(@fname, nil, 0, open_args: ["r", nil, {encoding: Encoding::US_ASCII}]) - string.encoding.should == Encoding::US_ASCII - - string = IO.read(@fname, nil, 0, open_args: ["r", nil, {}]) - string.encoding.should == Encoding::UTF_8 - end - end end it "disregards other options if :open_args is given" do @@ -135,18 +126,9 @@ -> { IO.read @fname, -1 }.should raise_error(ArgumentError) end - ruby_version_is ''...'3.3' do - it "raises an Errno::EINVAL when not passed a valid offset" do - -> { IO.read @fname, 0, -1 }.should raise_error(Errno::EINVAL) - -> { IO.read @fname, -1, -1 }.should raise_error(Errno::EINVAL) - end - end - - ruby_version_is '3.3' do - it "raises an ArgumentError when not passed a valid offset" do - -> { IO.read @fname, 0, -1 }.should raise_error(ArgumentError) - -> { IO.read @fname, -1, -1 }.should raise_error(ArgumentError) - end + it "raises an ArgumentError when not passed a valid offset" do + -> { IO.read @fname, 0, -1 }.should raise_error(ArgumentError) + -> { IO.read @fname, -1, -1 }.should raise_error(ArgumentError) end it "uses the external encoding specified via the :external_encoding option" do @@ -232,14 +214,12 @@ end end - ruby_version_is "3.3" do - # https://bugs.ruby-lang.org/issues/19630 - it "warns about deprecation" do - cmd = "|echo ok" - -> { - IO.read(cmd) - }.should complain(/IO process creation with a leading '\|'/) - end + # https://bugs.ruby-lang.org/issues/19630 + it "warns about deprecation" do + cmd = "|echo ok" + -> { + IO.read(cmd) + }.should complain(/IO process creation with a leading '\|'/) end end end @@ -322,11 +302,9 @@ -> { @io.read(nil, 'frozen-string'.freeze) }.should raise_error(FrozenError) end - ruby_bug "", ""..."3.3" do - it "raise FrozenError if the output buffer is frozen (2)" do - @io.read - -> { @io.read(1, ''.freeze) }.should raise_error(FrozenError) - end + it "raise FrozenError if the output buffer is frozen (2)" do + @io.read + -> { @io.read(1, ''.freeze) }.should raise_error(FrozenError) end it "consumes zero bytes when reading zero bytes" do diff --git a/spec/ruby/core/io/readlines_spec.rb b/spec/ruby/core/io/readlines_spec.rb index b4770775d1e813..07d29ea5317f2d 100644 --- a/spec/ruby/core/io/readlines_spec.rb +++ b/spec/ruby/core/io/readlines_spec.rb @@ -207,14 +207,12 @@ end end - ruby_version_is "3.3" do - # https://bugs.ruby-lang.org/issues/19630 - it "warns about deprecation given a path with a pipe" do - cmd = "|echo ok" - -> { - IO.readlines(cmd) - }.should complain(/IO process creation with a leading '\|'/) - end + # https://bugs.ruby-lang.org/issues/19630 + it "warns about deprecation given a path with a pipe" do + cmd = "|echo ok" + -> { + IO.readlines(cmd) + }.should complain(/IO process creation with a leading '\|'/) end end diff --git a/spec/ruby/core/io/select_spec.rb b/spec/ruby/core/io/select_spec.rb index 3893e7620f9e10..9fdb7e12c932db 100644 --- a/spec/ruby/core/io/select_spec.rb +++ b/spec/ruby/core/io/select_spec.rb @@ -149,16 +149,28 @@ end end -describe "IO.select when passed nil for timeout" do - it "sleeps forever and sets the thread status to 'sleep'" do - t = Thread.new do - IO.select(nil, nil, nil, nil) +describe "IO.select with infinite timeout" do + describe :io_select_infinite_timeout, shared: true do + it "sleeps forever and sets the thread status to 'sleep'" do + t = Thread.new do + IO.select(nil, nil, nil, @method) + end + + Thread.pass while t.status && t.status != "sleep" + t.join unless t.status + t.status.should == "sleep" + t.kill + t.join end + end - Thread.pass while t.status && t.status != "sleep" - t.join unless t.status - t.status.should == "sleep" - t.kill - t.join + describe "IO.select when passed nil for timeout" do + it_behaves_like :io_select_infinite_timeout, nil + end + + ruby_version_is "4.0" do + describe "IO.select when passed Float::INFINITY for timeout" do + it_behaves_like :io_select_infinite_timeout, Float::INFINITY + end end end diff --git a/spec/ruby/core/io/shared/readlines.rb b/spec/ruby/core/io/shared/readlines.rb index 6c1fa11a596800..77eb9cbd65cb8f 100644 --- a/spec/ruby/core/io/shared/readlines.rb +++ b/spec/ruby/core/io/shared/readlines.rb @@ -83,11 +83,9 @@ -> { IO.send(@method, @name, 2**128, &@object) }.should raise_error(RangeError) end - ruby_bug "#18767", ""..."3.3" do - describe "when passed limit" do - it "raises ArgumentError when passed 0 as a limit" do - -> { IO.send(@method, @name, 0, &@object) }.should raise_error(ArgumentError) - end + describe "when passed limit" do + it "raises ArgumentError when passed 0 as a limit" do + -> { IO.send(@method, @name, 0, &@object) }.should raise_error(ArgumentError) end end end diff --git a/spec/ruby/core/io/write_spec.rb b/spec/ruby/core/io/write_spec.rb index e58100f8467d9c..95e6371985bf7c 100644 --- a/spec/ruby/core/io/write_spec.rb +++ b/spec/ruby/core/io/write_spec.rb @@ -102,6 +102,13 @@ File.binread(@filename).should == "h\u0000\u0000\u0000i\u0000\u0000\u0000" end + it "ignores the 'bom|' prefix" do + File.open(@filename, "w", encoding: 'bom|utf-8') do |file| + file.write("hi") + end + File.binread(@filename).should == "hi" + end + it "raises a invalid byte sequence error if invalid bytes are being written" do # pack "\xFEhi" to avoid utf-8 conflict xFEhi = ([254].pack('C*') + 'hi').force_encoding('utf-8') @@ -220,7 +227,7 @@ end end - ruby_version_is "3.3"..."4.0" do + ruby_version_is ""..."4.0" do # https://bugs.ruby-lang.org/issues/19630 it "warns about deprecation given a path with a pipe" do -> { diff --git a/spec/ruby/core/kernel/Integer_spec.rb b/spec/ruby/core/kernel/Integer_spec.rb index 74dd3e0dd2ef27..c62b8b08013898 100644 --- a/spec/ruby/core/kernel/Integer_spec.rb +++ b/spec/ruby/core/kernel/Integer_spec.rb @@ -586,19 +586,10 @@ Integer("777", obj).should == 0777 end - # https://bugs.ruby-lang.org/issues/19349 - ruby_version_is ''...'3.3' do - it "ignores the base if it is not an integer and does not respond to #to_i" do - Integer("777", "8").should == 777 - end - end - - ruby_version_is '3.3' do - it "raises a TypeError if it is not an integer and does not respond to #to_i" do - -> { - Integer("777", "8") - }.should raise_error(TypeError, "no implicit conversion of String into Integer") - end + it "raises a TypeError if it is not an integer and does not respond to #to_i" do + -> { + Integer("777", "8") + }.should raise_error(TypeError, "no implicit conversion of String into Integer") end describe "when passed exception: false" do diff --git a/spec/ruby/core/kernel/caller_spec.rb b/spec/ruby/core/kernel/caller_spec.rb index 7cd703de5a3a8d..df051ef07f2d16 100644 --- a/spec/ruby/core/kernel/caller_spec.rb +++ b/spec/ruby/core/kernel/caller_spec.rb @@ -84,14 +84,25 @@ end guard -> { Kernel.instance_method(:tap).source_location } do - ruby_version_is ""..."4.0" do + ruby_version_is ""..."3.4" do it "includes core library methods defined in Ruby" do file, line = Kernel.instance_method(:tap).source_location file.should.start_with?(' { lambda(&proc{}) }.should complain("#{__FILE__}:#{__LINE__}: warning: lambda without a literal block is deprecated; use the proc without lambda instead\n") - end - end - - ruby_version_is "3.3" do - it "raises when proc isn't a lambda" do - -> { lambda(&proc{}) }.should raise_error(ArgumentError, /the lambda method requires a literal block/) - end + it "raises when proc isn't a lambda" do + -> { lambda(&proc{}) }.should raise_error(ArgumentError, /the lambda method requires a literal block/) end it "doesn't warn when proc is lambda" do diff --git a/spec/ruby/core/kernel/open_spec.rb b/spec/ruby/core/kernel/open_spec.rb index b967d5044ba92b..9d3f3760b96b3e 100644 --- a/spec/ruby/core/kernel/open_spec.rb +++ b/spec/ruby/core/kernel/open_spec.rb @@ -79,14 +79,12 @@ end end - ruby_version_is "3.3" do - # https://bugs.ruby-lang.org/issues/19630 - it "warns about deprecation given a path with a pipe" do - cmd = "|echo ok" - -> { - open(cmd) { |f| f.read } - }.should complain(/Kernel#open with a leading '\|'/) - end + # https://bugs.ruby-lang.org/issues/19630 + it "warns about deprecation given a path with a pipe" do + cmd = "|echo ok" + -> { + open(cmd) { |f| f.read } + }.should complain(/Kernel#open with a leading '\|'/) end end diff --git a/spec/ruby/core/kernel/shared/require.rb b/spec/ruby/core/kernel/shared/require.rb index 52f86f73e50f48..ef5b9486c6157d 100644 --- a/spec/ruby/core/kernel/shared/require.rb +++ b/spec/ruby/core/kernel/shared/require.rb @@ -266,15 +266,13 @@ ScratchPad.recorded.should == [:loaded] end - ruby_bug "#17340", ''...'3.3' do - it "loads a file concurrently" do - path = File.expand_path "concurrent_require_fixture.rb", CODE_LOADING_DIR - ScratchPad.record(@object) - -> { - @object.require(path) - }.should_not complain(/circular require considered harmful/, verbose: true) - ScratchPad.recorded.join - end + it "loads a file concurrently" do + path = File.expand_path "concurrent_require_fixture.rb", CODE_LOADING_DIR + ScratchPad.record(@object) + -> { + @object.require(path) + }.should_not complain(/circular require considered harmful/, verbose: true) + ScratchPad.recorded.join end end diff --git a/spec/ruby/core/kernel/sleep_spec.rb b/spec/ruby/core/kernel/sleep_spec.rb index e9c600aac41107..0b003ad189a48b 100644 --- a/spec/ruby/core/kernel/sleep_spec.rb +++ b/spec/ruby/core/kernel/sleep_spec.rb @@ -63,27 +63,19 @@ def o.divmod(*); [0, 0.001]; end actual_duration.should > 0.01 # 100 * 0.0001 => 0.01 end - ruby_version_is ""..."3.3" do - it "raises a TypeError when passed nil" do - -> { sleep(nil) }.should raise_error(TypeError) + it "accepts a nil duration" do + running = false + t = Thread.new do + running = true + sleep(nil) + 5 end - end - - ruby_version_is "3.3" do - it "accepts a nil duration" do - running = false - t = Thread.new do - running = true - sleep(nil) - 5 - end - Thread.pass until running - Thread.pass while t.status and t.status != "sleep" + Thread.pass until running + Thread.pass while t.status and t.status != "sleep" - t.wakeup - t.value.should == 5 - end + t.wakeup + t.value.should == 5 end context "Kernel.sleep with Fiber scheduler" do diff --git a/spec/ruby/core/marshal/shared/load.rb b/spec/ruby/core/marshal/shared/load.rb index 204a4d34e3edff..692c14cfa10adb 100644 --- a/spec/ruby/core/marshal/shared/load.rb +++ b/spec/ruby/core/marshal/shared/load.rb @@ -127,36 +127,32 @@ Object.should_not.frozen? end - ruby_bug "#19427", ""..."3.3" do - it "does freeze extended objects" do - object = Marshal.load("\x04\be:\x0FEnumerableo:\vObject\x00", freeze: true) - object.should.frozen? - end + it "does freeze extended objects" do + object = Marshal.load("\x04\be:\x0FEnumerableo:\vObject\x00", freeze: true) + object.should.frozen? + end - it "does freeze extended objects with instance variables" do - object = Marshal.load("\x04\be:\x0FEnumerableo:\vObject\x06:\n@ivarT", freeze: true) - object.should.frozen? - end + it "does freeze extended objects with instance variables" do + object = Marshal.load("\x04\be:\x0FEnumerableo:\vObject\x06:\n@ivarT", freeze: true) + object.should.frozen? end - ruby_bug "#19427", ""..."3.3" do - it "returns frozen object having #_dump method" do - object = Marshal.send(@method, Marshal.dump(UserDefined.new), freeze: true) - object.should.frozen? - end + it "returns frozen object having #_dump method" do + object = Marshal.send(@method, Marshal.dump(UserDefined.new), freeze: true) + object.should.frozen? + end - it "returns frozen object responding to #marshal_dump and #marshal_load" do - object = Marshal.send(@method, Marshal.dump(UserMarshal.new), freeze: true) - object.should.frozen? - end + it "returns frozen object responding to #marshal_dump and #marshal_load" do + object = Marshal.send(@method, Marshal.dump(UserMarshal.new), freeze: true) + object.should.frozen? + end - it "returns frozen object extended by a module" do - object = Object.new - object.extend(MarshalSpec::ModuleToExtendBy) + it "returns frozen object extended by a module" do + object = Object.new + object.extend(MarshalSpec::ModuleToExtendBy) - object = Marshal.send(@method, Marshal.dump(object), freeze: true) - object.should.frozen? - end + object = Marshal.send(@method, Marshal.dump(object), freeze: true) + object.should.frozen? end it "does not call freeze method" do @@ -239,12 +235,10 @@ string.should.frozen? end - ruby_bug "#19427", ""..."3.3" do - it "call the proc with extended objects" do - objs = [] - obj = Marshal.load("\x04\be:\x0FEnumerableo:\vObject\x00", Proc.new { |o| objs << o; o }) - objs.should == [obj] - end + it "call the proc with extended objects" do + objs = [] + obj = Marshal.load("\x04\be:\x0FEnumerableo:\vObject\x00", Proc.new { |o| objs << o; o }) + objs.should == [obj] end it "returns the value of the proc" do @@ -930,15 +924,13 @@ def io.binmode; raise "binmode"; end [Meths, UserRegexp, Regexp] end - ruby_bug "#19439", ""..."3.3" do - it "restore the regexp instance variables" do - obj = Regexp.new("hello") - obj.instance_variable_set(:@regexp_ivar, [42]) + it "restore the regexp instance variables" do + obj = Regexp.new("hello") + obj.instance_variable_set(:@regexp_ivar, [42]) - new_obj = Marshal.send(@method, "\x04\bI/\nhello\x00\a:\x06EF:\x11@regexp_ivar[\x06i/") - new_obj.instance_variables.should == [:@regexp_ivar] - new_obj.instance_variable_get(:@regexp_ivar).should == [42] - end + new_obj = Marshal.send(@method, "\x04\bI/\nhello\x00\a:\x06EF:\x11@regexp_ivar[\x06i/") + new_obj.instance_variables.should == [:@regexp_ivar] + new_obj.instance_variable_get(:@regexp_ivar).should == [42] end it "preserves Regexp encoding" do diff --git a/spec/ruby/core/matchdata/named_captures_spec.rb b/spec/ruby/core/matchdata/named_captures_spec.rb index 5e4693d62d662b..10b1f884d6e4c9 100644 --- a/spec/ruby/core/matchdata/named_captures_spec.rb +++ b/spec/ruby/core/matchdata/named_captures_spec.rb @@ -13,15 +13,13 @@ /\A(?.)(?.)(?.)(?.)?\z/.match('012').named_captures.should == { 'a' => '0', 'b' => '2' } end - ruby_version_is "3.3" do - it 'returns a Hash with Symbol keys when symbolize_names is provided a true value' do - /(?.)(?.)?/.match('0').named_captures(symbolize_names: true).should == { a: '0', b: nil } - /(?.)(?.)?/.match('0').named_captures(symbolize_names: "truly").should == { a: '0', b: nil } - end + it 'returns a Hash with Symbol keys when symbolize_names is provided a true value' do + /(?.)(?.)?/.match('0').named_captures(symbolize_names: true).should == { a: '0', b: nil } + /(?.)(?.)?/.match('0').named_captures(symbolize_names: "truly").should == { a: '0', b: nil } + end - it 'returns a Hash with String keys when symbolize_names is provided a false value' do - /(?.)(?.)?/.match('02').named_captures(symbolize_names: false).should == { 'a' => '0', 'b' => '2' } - /(?.)(?.)?/.match('02').named_captures(symbolize_names: nil).should == { 'a' => '0', 'b' => '2' } - end + it 'returns a Hash with String keys when symbolize_names is provided a false value' do + /(?.)(?.)?/.match('02').named_captures(symbolize_names: false).should == { 'a' => '0', 'b' => '2' } + /(?.)(?.)?/.match('02').named_captures(symbolize_names: nil).should == { 'a' => '0', 'b' => '2' } end end diff --git a/spec/ruby/core/math/log10_spec.rb b/spec/ruby/core/math/log10_spec.rb index c4daedcd5c9d75..f3bd7fd4b86474 100644 --- a/spec/ruby/core/math/log10_spec.rb +++ b/spec/ruby/core/math/log10_spec.rb @@ -23,6 +23,10 @@ -> { Math.log10("test") }.should raise_error(TypeError) end + it "raises a TypeError if passed a numerical argument as a string" do + -> { Math.log10("1.0") }.should raise_error(TypeError) + end + it "returns NaN given NaN" do Math.log10(nan_value).nan?.should be_true end diff --git a/spec/ruby/core/module/set_temporary_name_spec.rb b/spec/ruby/core/module/set_temporary_name_spec.rb index 46605ed6758877..0b96b869c90c51 100644 --- a/spec/ruby/core/module/set_temporary_name_spec.rb +++ b/spec/ruby/core/module/set_temporary_name_spec.rb @@ -1,147 +1,145 @@ require_relative '../../spec_helper' require_relative 'fixtures/set_temporary_name' -ruby_version_is "3.3" do - describe "Module#set_temporary_name" do - it "can assign a temporary name" do - m = Module.new - m.name.should be_nil +describe "Module#set_temporary_name" do + it "can assign a temporary name" do + m = Module.new + m.name.should be_nil - m.set_temporary_name("fake_name") - m.name.should == "fake_name" + m.set_temporary_name("fake_name") + m.name.should == "fake_name" - m.set_temporary_name(nil) - m.name.should be_nil - end + m.set_temporary_name(nil) + m.name.should be_nil + end - it "returns self" do - m = Module.new - m.set_temporary_name("fake_name").should.equal? m - end + it "returns self" do + m = Module.new + m.set_temporary_name("fake_name").should.equal? m + end - it "can assign a temporary name which is not a valid constant path" do - m = Module.new + it "can assign a temporary name which is not a valid constant path" do + m = Module.new - m.set_temporary_name("name") - m.name.should == "name" + m.set_temporary_name("name") + m.name.should == "name" - m.set_temporary_name("Template['foo.rb']") - m.name.should == "Template['foo.rb']" + m.set_temporary_name("Template['foo.rb']") + m.name.should == "Template['foo.rb']" - m.set_temporary_name("a::B") - m.name.should == "a::B" + m.set_temporary_name("a::B") + m.name.should == "a::B" - m.set_temporary_name("A::b") - m.name.should == "A::b" + m.set_temporary_name("A::b") + m.name.should == "A::b" - m.set_temporary_name("A::B::") - m.name.should == "A::B::" + m.set_temporary_name("A::B::") + m.name.should == "A::B::" - m.set_temporary_name("A::::B") - m.name.should == "A::::B" + m.set_temporary_name("A::::B") + m.name.should == "A::::B" - m.set_temporary_name("A=") - m.name.should == "A=" - end + m.set_temporary_name("A=") + m.name.should == "A=" + end - it "can't assign empty string as name" do - m = Module.new - -> { m.set_temporary_name("") }.should raise_error(ArgumentError, "empty class/module name") - end + it "can't assign empty string as name" do + m = Module.new + -> { m.set_temporary_name("") }.should raise_error(ArgumentError, "empty class/module name") + end - it "can't assign a constant name as a temporary name" do - m = Module.new - -> { m.set_temporary_name("Object") }.should raise_error(ArgumentError, "the temporary name must not be a constant path to avoid confusion") - end + it "can't assign a constant name as a temporary name" do + m = Module.new + -> { m.set_temporary_name("Object") }.should raise_error(ArgumentError, "the temporary name must not be a constant path to avoid confusion") + end - it "can't assign a constant path as a temporary name" do - m = Module.new - -> { m.set_temporary_name("A::B") }.should raise_error(ArgumentError, "the temporary name must not be a constant path to avoid confusion") - -> { m.set_temporary_name("::A") }.should raise_error(ArgumentError, "the temporary name must not be a constant path to avoid confusion") - -> { m.set_temporary_name("::A::B") }.should raise_error(ArgumentError, "the temporary name must not be a constant path to avoid confusion") - end + it "can't assign a constant path as a temporary name" do + m = Module.new + -> { m.set_temporary_name("A::B") }.should raise_error(ArgumentError, "the temporary name must not be a constant path to avoid confusion") + -> { m.set_temporary_name("::A") }.should raise_error(ArgumentError, "the temporary name must not be a constant path to avoid confusion") + -> { m.set_temporary_name("::A::B") }.should raise_error(ArgumentError, "the temporary name must not be a constant path to avoid confusion") + end - it "can't assign name to permanent module" do - -> { Object.set_temporary_name("fake_name") }.should raise_error(RuntimeError, "can't change permanent name") - end + it "can't assign name to permanent module" do + -> { Object.set_temporary_name("fake_name") }.should raise_error(RuntimeError, "can't change permanent name") + end - it "can assign a temporary name to a module nested into an anonymous module" do - m = Module.new - module m::N; end - m::N.name.should =~ /\A#::N\z/ + it "can assign a temporary name to a module nested into an anonymous module" do + m = Module.new + module m::N; end + m::N.name.should =~ /\A#::N\z/ - m::N.set_temporary_name("fake_name") - m::N.name.should == "fake_name" + m::N.set_temporary_name("fake_name") + m::N.name.should == "fake_name" - m::N.set_temporary_name(nil) - m::N.name.should be_nil - end + m::N.set_temporary_name(nil) + m::N.name.should be_nil + end - it "discards a temporary name when an outer anonymous module gets a permanent name" do - m = Module.new - module m::N; end + it "discards a temporary name when an outer anonymous module gets a permanent name" do + m = Module.new + module m::N; end - m::N.set_temporary_name("fake_name") - m::N.name.should == "fake_name" + m::N.set_temporary_name("fake_name") + m::N.name.should == "fake_name" - ModuleSpecs::SetTemporaryNameSpec::M = m - m::N.name.should == "ModuleSpecs::SetTemporaryNameSpec::M::N" - ModuleSpecs::SetTemporaryNameSpec.send :remove_const, :M - end + ModuleSpecs::SetTemporaryNameSpec::M = m + m::N.name.should == "ModuleSpecs::SetTemporaryNameSpec::M::N" + ModuleSpecs::SetTemporaryNameSpec.send :remove_const, :M + end - it "can update the name when assigned to a constant" do - m = Module.new - m::N = Module.new - m::N.name.should =~ /\A#::N\z/ - m::N.set_temporary_name(nil) + it "can update the name when assigned to a constant" do + m = Module.new + m::N = Module.new + m::N.name.should =~ /\A#::N\z/ + m::N.set_temporary_name(nil) - m::M = m::N - m::M.name.should =~ /\A#::M\z/m - end + m::M = m::N + m::M.name.should =~ /\A#::M\z/m + end - it "can reassign a temporary name repeatedly" do - m = Module.new + it "can reassign a temporary name repeatedly" do + m = Module.new - m.set_temporary_name("fake_name") - m.name.should == "fake_name" + m.set_temporary_name("fake_name") + m.name.should == "fake_name" - m.set_temporary_name("fake_name_2") - m.name.should == "fake_name_2" - end + m.set_temporary_name("fake_name_2") + m.name.should == "fake_name_2" + end - ruby_bug "#21094", ""..."4.0" do - it "also updates a name of a nested module" do - m = Module.new - m::N = Module.new - m::N.name.should =~ /\A#::N\z/ + ruby_bug "#21094", ""..."4.0" do + it "also updates a name of a nested module" do + m = Module.new + m::N = Module.new + m::N.name.should =~ /\A#::N\z/ - m.set_temporary_name "m" - m::N.name.should == "m::N" + m.set_temporary_name "m" + m::N.name.should == "m::N" - m.set_temporary_name nil - m::N.name.should == nil - end + m.set_temporary_name nil + m::N.name.should == nil end + end - it "keeps temporary name when assigned in an anonymous module" do - outer = Module.new - m = Module.new - m.set_temporary_name "m" - m.name.should == "m" - outer::M = m - m.name.should == "m" - m.inspect.should == "m" - end + it "keeps temporary name when assigned in an anonymous module" do + outer = Module.new + m = Module.new + m.set_temporary_name "m" + m.name.should == "m" + outer::M = m + m.name.should == "m" + m.inspect.should == "m" + end - it "keeps temporary name when assigned in an anonymous module and nested before" do - outer = Module.new - m = Module.new - outer::A = m - m.set_temporary_name "m" - m.name.should == "m" - outer::M = m - m.name.should == "m" - m.inspect.should == "m" - end + it "keeps temporary name when assigned in an anonymous module and nested before" do + outer = Module.new + m = Module.new + outer::A = m + m.set_temporary_name "m" + m.name.should == "m" + outer::M = m + m.name.should == "m" + m.inspect.should == "m" end end diff --git a/spec/ruby/core/module/shared/class_eval.rb b/spec/ruby/core/module/shared/class_eval.rb index b1d5cb3814edea..526d0a20363dc8 100644 --- a/spec/ruby/core/module/shared/class_eval.rb +++ b/spec/ruby/core/module/shared/class_eval.rb @@ -52,10 +52,8 @@ def foo ModuleSpecs.send(@method, "[__FILE__, __LINE__]", "test", 102).should == ["test", 102] end - ruby_version_is "3.3" do - it "uses the caller location as default filename" do - ModuleSpecs.send(@method, "[__FILE__, __LINE__]").should == ["(eval at #{__FILE__}:#{__LINE__})", 1] - end + it "uses the caller location as default filename" do + ModuleSpecs.send(@method, "[__FILE__, __LINE__]").should == ["(eval at #{__FILE__}:#{__LINE__})", 1] end it "converts a non-string filename to a string using to_str" do diff --git a/spec/ruby/core/nil/singleton_method_spec.rb b/spec/ruby/core/nil/singleton_method_spec.rb index 8d898b1cc94d70..fb47af0c3e8c5d 100644 --- a/spec/ruby/core/nil/singleton_method_spec.rb +++ b/spec/ruby/core/nil/singleton_method_spec.rb @@ -1,15 +1,13 @@ require_relative '../../spec_helper' describe "NilClass#singleton_method" do - ruby_version_is '3.3' do - it "raises regardless of whether NilClass defines the method" do + it "raises regardless of whether NilClass defines the method" do + -> { nil.singleton_method(:foo) }.should raise_error(NameError) + begin + def (nil).foo; end -> { nil.singleton_method(:foo) }.should raise_error(NameError) - begin - def (nil).foo; end - -> { nil.singleton_method(:foo) }.should raise_error(NameError) - ensure - NilClass.send(:remove_method, :foo) - end + ensure + NilClass.send(:remove_method, :foo) end end end diff --git a/spec/ruby/core/numeric/remainder_spec.rb b/spec/ruby/core/numeric/remainder_spec.rb index 674fa22d8ef997..29654310d231e2 100644 --- a/spec/ruby/core/numeric/remainder_spec.rb +++ b/spec/ruby/core/numeric/remainder_spec.rb @@ -6,9 +6,7 @@ @obj = NumericSpecs::Subclass.new @result = mock("Numeric#% result") @other = mock("Passed Object") - ruby_version_is "3.3" do - @other.should_receive(:coerce).with(@obj).and_return([@obj, @other]) - end + @other.should_receive(:coerce).with(@obj).and_return([@obj, @other]) end it "returns the result of calling self#% with other if self is 0" do diff --git a/spec/ruby/core/objectspace/weakkeymap/clear_spec.rb b/spec/ruby/core/objectspace/weakkeymap/clear_spec.rb index 8050e2c30729d6..b1804ec9b003b7 100644 --- a/spec/ruby/core/objectspace/weakkeymap/clear_spec.rb +++ b/spec/ruby/core/objectspace/weakkeymap/clear_spec.rb @@ -1,27 +1,25 @@ require_relative '../../../spec_helper' -ruby_version_is '3.3' do - describe "ObjectSpace::WeakKeyMap#clear" do - it "removes all the entries" do - m = ObjectSpace::WeakKeyMap.new +describe "ObjectSpace::WeakKeyMap#clear" do + it "removes all the entries" do + m = ObjectSpace::WeakKeyMap.new - key = Object.new - value = Object.new - m[key] = value + key = Object.new + value = Object.new + m[key] = value - key2 = Object.new - value2 = Object.new - m[key2] = value2 + key2 = Object.new + value2 = Object.new + m[key2] = value2 - m.clear + m.clear - m.key?(key).should == false - m.key?(key2).should == false - end + m.key?(key).should == false + m.key?(key2).should == false + end - it "returns self" do - m = ObjectSpace::WeakKeyMap.new - m.clear.should.equal?(m) - end + it "returns self" do + m = ObjectSpace::WeakKeyMap.new + m.clear.should.equal?(m) end end diff --git a/spec/ruby/core/objectspace/weakkeymap/delete_spec.rb b/spec/ruby/core/objectspace/weakkeymap/delete_spec.rb index 3cd61355d64f9f..ad32c2c75efda4 100644 --- a/spec/ruby/core/objectspace/weakkeymap/delete_spec.rb +++ b/spec/ruby/core/objectspace/weakkeymap/delete_spec.rb @@ -1,51 +1,49 @@ require_relative '../../../spec_helper' -ruby_version_is '3.3' do - describe "ObjectSpace::WeakKeyMap#delete" do - it "removes the entry and returns the deleted value" do - m = ObjectSpace::WeakKeyMap.new - key = Object.new - value = Object.new - m[key] = value - - m.delete(key).should == value - m.key?(key).should == false - end +describe "ObjectSpace::WeakKeyMap#delete" do + it "removes the entry and returns the deleted value" do + m = ObjectSpace::WeakKeyMap.new + key = Object.new + value = Object.new + m[key] = value + + m.delete(key).should == value + m.key?(key).should == false + end - it "uses equality semantic" do - m = ObjectSpace::WeakKeyMap.new - key = "foo".upcase - value = Object.new - m[key] = value + it "uses equality semantic" do + m = ObjectSpace::WeakKeyMap.new + key = "foo".upcase + value = Object.new + m[key] = value - m.delete("foo".upcase).should == value - m.key?(key).should == false - end + m.delete("foo".upcase).should == value + m.key?(key).should == false + end - it "calls supplied block if the key is not found" do - key = Object.new - m = ObjectSpace::WeakKeyMap.new - return_value = m.delete(key) do |yielded_key| - yielded_key.should == key - 5 - end - return_value.should == 5 + it "calls supplied block if the key is not found" do + key = Object.new + m = ObjectSpace::WeakKeyMap.new + return_value = m.delete(key) do |yielded_key| + yielded_key.should == key + 5 end + return_value.should == 5 + end - it "returns nil if the key is not found when no block is given" do - m = ObjectSpace::WeakKeyMap.new - m.delete(Object.new).should == nil - end + it "returns nil if the key is not found when no block is given" do + m = ObjectSpace::WeakKeyMap.new + m.delete(Object.new).should == nil + end - it "returns nil when a key cannot be garbage collected" do - map = ObjectSpace::WeakKeyMap.new + it "returns nil when a key cannot be garbage collected" do + map = ObjectSpace::WeakKeyMap.new - map.delete(1).should == nil - map.delete(1.0).should == nil - map.delete(:a).should == nil - map.delete(true).should == nil - map.delete(false).should == nil - map.delete(nil).should == nil - end + map.delete(1).should == nil + map.delete(1.0).should == nil + map.delete(:a).should == nil + map.delete(true).should == nil + map.delete(false).should == nil + map.delete(nil).should == nil end end diff --git a/spec/ruby/core/objectspace/weakkeymap/element_reference_spec.rb b/spec/ruby/core/objectspace/weakkeymap/element_reference_spec.rb index 51368e8d3ba3af..53eff79c40fb1c 100644 --- a/spec/ruby/core/objectspace/weakkeymap/element_reference_spec.rb +++ b/spec/ruby/core/objectspace/weakkeymap/element_reference_spec.rb @@ -1,107 +1,105 @@ require_relative '../../../spec_helper' require_relative 'fixtures/classes' -ruby_version_is "3.3" do - describe "ObjectSpace::WeakKeyMap#[]" do - it "is faithful to the map's content" do - map = ObjectSpace::WeakKeyMap.new - key1, key2 = %w[a b].map(&:upcase) - ref1, ref2 = %w[x y] - map[key1] = ref1 - map[key1].should == ref1 - map[key1] = ref1 - map[key1].should == ref1 - map[key2] = ref2 - map[key1].should == ref1 - map[key2].should == ref2 - end - - it "compares keys with #eql? semantics" do - map = ObjectSpace::WeakKeyMap.new - key = [1.0] - map[key] = "x" - map[[1]].should == nil - map[[1.0]].should == "x" - key.should == [1.0] # keep the key alive until here to keep the map entry - - map = ObjectSpace::WeakKeyMap.new - key = [1] - map[key] = "x" - map[[1.0]].should == nil - map[[1]].should == "x" - key.should == [1] # keep the key alive until here to keep the map entry - - map = ObjectSpace::WeakKeyMap.new - key1, key2 = %w[a a].map(&:upcase) - ref = "x" - map[key1] = ref - map[key2].should == ref - end - - it "compares key via #hash first" do - x = mock('0') - x.should_receive(:hash).and_return(0) - - map = ObjectSpace::WeakKeyMap.new - key = 'foo' - map[key] = :bar - map[x].should == nil - end - - it "does not compare keys with different #hash values via #eql?" do - x = mock('x') - x.should_not_receive(:eql?) - x.stub!(:hash).and_return(0) - - y = mock('y') - y.should_not_receive(:eql?) - y.stub!(:hash).and_return(1) - - map = ObjectSpace::WeakKeyMap.new - map[y] = 1 - map[x].should == nil - end - - it "compares keys with the same #hash value via #eql?" do - x = mock('x') - x.should_receive(:eql?).and_return(true) - x.stub!(:hash).and_return(42) - - y = mock('y') - y.should_not_receive(:eql?) - y.stub!(:hash).and_return(42) - - map = ObjectSpace::WeakKeyMap.new - map[y] = 1 - map[x].should == 1 - end - - it "finds a value via an identical key even when its #eql? isn't reflexive" do - x = mock('x') - x.should_receive(:hash).at_least(1).and_return(42) - x.stub!(:eql?).and_return(false) # Stubbed for clarity and latitude in implementation; not actually sent by MRI. - - map = ObjectSpace::WeakKeyMap.new - map[x] = :x - map[x].should == :x - end - - it "supports keys with private #hash method" do - key = WeakKeyMapSpecs::KeyWithPrivateHash.new - map = ObjectSpace::WeakKeyMap.new - map[key] = 42 - map[key].should == 42 - end - - it "returns nil and does not raise error when a key cannot be garbage collected" do - map = ObjectSpace::WeakKeyMap.new - - map[1].should == nil - map[1.0].should == nil - map[:a].should == nil - map[true].should == nil - map[false].should == nil - map[nil].should == nil - end +describe "ObjectSpace::WeakKeyMap#[]" do + it "is faithful to the map's content" do + map = ObjectSpace::WeakKeyMap.new + key1, key2 = %w[a b].map(&:upcase) + ref1, ref2 = %w[x y] + map[key1] = ref1 + map[key1].should == ref1 + map[key1] = ref1 + map[key1].should == ref1 + map[key2] = ref2 + map[key1].should == ref1 + map[key2].should == ref2 + end + + it "compares keys with #eql? semantics" do + map = ObjectSpace::WeakKeyMap.new + key = [1.0] + map[key] = "x" + map[[1]].should == nil + map[[1.0]].should == "x" + key.should == [1.0] # keep the key alive until here to keep the map entry + + map = ObjectSpace::WeakKeyMap.new + key = [1] + map[key] = "x" + map[[1.0]].should == nil + map[[1]].should == "x" + key.should == [1] # keep the key alive until here to keep the map entry + + map = ObjectSpace::WeakKeyMap.new + key1, key2 = %w[a a].map(&:upcase) + ref = "x" + map[key1] = ref + map[key2].should == ref + end + + it "compares key via #hash first" do + x = mock('0') + x.should_receive(:hash).and_return(0) + + map = ObjectSpace::WeakKeyMap.new + key = 'foo' + map[key] = :bar + map[x].should == nil + end + + it "does not compare keys with different #hash values via #eql?" do + x = mock('x') + x.should_not_receive(:eql?) + x.stub!(:hash).and_return(0) + + y = mock('y') + y.should_not_receive(:eql?) + y.stub!(:hash).and_return(1) + + map = ObjectSpace::WeakKeyMap.new + map[y] = 1 + map[x].should == nil + end + + it "compares keys with the same #hash value via #eql?" do + x = mock('x') + x.should_receive(:eql?).and_return(true) + x.stub!(:hash).and_return(42) + + y = mock('y') + y.should_not_receive(:eql?) + y.stub!(:hash).and_return(42) + + map = ObjectSpace::WeakKeyMap.new + map[y] = 1 + map[x].should == 1 + end + + it "finds a value via an identical key even when its #eql? isn't reflexive" do + x = mock('x') + x.should_receive(:hash).at_least(1).and_return(42) + x.stub!(:eql?).and_return(false) # Stubbed for clarity and latitude in implementation; not actually sent by MRI. + + map = ObjectSpace::WeakKeyMap.new + map[x] = :x + map[x].should == :x + end + + it "supports keys with private #hash method" do + key = WeakKeyMapSpecs::KeyWithPrivateHash.new + map = ObjectSpace::WeakKeyMap.new + map[key] = 42 + map[key].should == 42 + end + + it "returns nil and does not raise error when a key cannot be garbage collected" do + map = ObjectSpace::WeakKeyMap.new + + map[1].should == nil + map[1.0].should == nil + map[:a].should == nil + map[true].should == nil + map[false].should == nil + map[nil].should == nil end end diff --git a/spec/ruby/core/objectspace/weakkeymap/element_set_spec.rb b/spec/ruby/core/objectspace/weakkeymap/element_set_spec.rb index 8db8d780c71a86..c480aa661ae2fd 100644 --- a/spec/ruby/core/objectspace/weakkeymap/element_set_spec.rb +++ b/spec/ruby/core/objectspace/weakkeymap/element_set_spec.rb @@ -1,82 +1,80 @@ require_relative '../../../spec_helper' -ruby_version_is "3.3" do - describe "ObjectSpace::WeakKeyMap#[]=" do - def should_accept(map, key, value) - (map[key] = value).should == value - map.should.key?(key) - map[key].should == value - end +describe "ObjectSpace::WeakKeyMap#[]=" do + def should_accept(map, key, value) + (map[key] = value).should == value + map.should.key?(key) + map[key].should == value + end + + it "is correct" do + map = ObjectSpace::WeakKeyMap.new + key1, key2 = %w[a b].map(&:upcase) + ref1, ref2 = %w[x y] + should_accept(map, key1, ref1) + should_accept(map, key1, ref1) + should_accept(map, key2, ref2) + map[key1].should == ref1 + end + + it "requires the keys to implement #hash" do + map = ObjectSpace::WeakKeyMap.new + -> { map[BasicObject.new] = 1 }.should raise_error(NoMethodError, /undefined method [`']hash' for an instance of BasicObject/) + end - it "is correct" do + it "accepts frozen keys or values" do + map = ObjectSpace::WeakKeyMap.new + x = Object.new + should_accept(map, x, true) + should_accept(map, x, false) + should_accept(map, x, 42) + should_accept(map, x, :foo) + + y = Object.new.freeze + should_accept(map, x, y) + should_accept(map, y, x) + end + + it "does not duplicate and freeze String keys (like Hash#[]= does)" do + map = ObjectSpace::WeakKeyMap.new + key = +"a" + map[key] = 1 + + map.getkey("a").should.equal? key + map.getkey("a").should_not.frozen? + + key.should == "a" # keep the key alive until here to keep the map entry + end + + context "a key cannot be garbage collected" do + it "raises ArgumentError when Integer is used as a key" do map = ObjectSpace::WeakKeyMap.new - key1, key2 = %w[a b].map(&:upcase) - ref1, ref2 = %w[x y] - should_accept(map, key1, ref1) - should_accept(map, key1, ref1) - should_accept(map, key2, ref2) - map[key1].should == ref1 + -> { map[1] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) end - it "requires the keys to implement #hash" do + it "raises ArgumentError when Float is used as a key" do map = ObjectSpace::WeakKeyMap.new - -> { map[BasicObject.new] = 1 }.should raise_error(NoMethodError, /undefined method [`']hash' for an instance of BasicObject/) + -> { map[1.0] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) end - it "accepts frozen keys or values" do + it "raises ArgumentError when Symbol is used as a key" do map = ObjectSpace::WeakKeyMap.new - x = Object.new - should_accept(map, x, true) - should_accept(map, x, false) - should_accept(map, x, 42) - should_accept(map, x, :foo) - - y = Object.new.freeze - should_accept(map, x, y) - should_accept(map, y, x) + -> { map[:a] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) end - it "does not duplicate and freeze String keys (like Hash#[]= does)" do + it "raises ArgumentError when true is used as a key" do map = ObjectSpace::WeakKeyMap.new - key = +"a" - map[key] = 1 - - map.getkey("a").should.equal? key - map.getkey("a").should_not.frozen? - - key.should == "a" # keep the key alive until here to keep the map entry + -> { map[true] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) end - context "a key cannot be garbage collected" do - it "raises ArgumentError when Integer is used as a key" do - map = ObjectSpace::WeakKeyMap.new - -> { map[1] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) - end - - it "raises ArgumentError when Float is used as a key" do - map = ObjectSpace::WeakKeyMap.new - -> { map[1.0] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) - end - - it "raises ArgumentError when Symbol is used as a key" do - map = ObjectSpace::WeakKeyMap.new - -> { map[:a] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) - end - - it "raises ArgumentError when true is used as a key" do - map = ObjectSpace::WeakKeyMap.new - -> { map[true] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) - end - - it "raises ArgumentError when false is used as a key" do - map = ObjectSpace::WeakKeyMap.new - -> { map[false] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) - end + it "raises ArgumentError when false is used as a key" do + map = ObjectSpace::WeakKeyMap.new + -> { map[false] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) + end - it "raises ArgumentError when nil is used as a key" do - map = ObjectSpace::WeakKeyMap.new - -> { map[nil] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) - end + it "raises ArgumentError when nil is used as a key" do + map = ObjectSpace::WeakKeyMap.new + -> { map[nil] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) end end end diff --git a/spec/ruby/core/objectspace/weakkeymap/getkey_spec.rb b/spec/ruby/core/objectspace/weakkeymap/getkey_spec.rb index 8a2dbf809d8c23..0c8dec8aea5248 100644 --- a/spec/ruby/core/objectspace/weakkeymap/getkey_spec.rb +++ b/spec/ruby/core/objectspace/weakkeymap/getkey_spec.rb @@ -1,28 +1,26 @@ require_relative '../../../spec_helper' -ruby_version_is "3.3" do - describe "ObjectSpace::WeakKeyMap#getkey" do - it "returns the existing equal key" do - map = ObjectSpace::WeakKeyMap.new - key1, key2 = %w[a a].map(&:upcase) +describe "ObjectSpace::WeakKeyMap#getkey" do + it "returns the existing equal key" do + map = ObjectSpace::WeakKeyMap.new + key1, key2 = %w[a a].map(&:upcase) - map[key1] = true - map.getkey(key2).should equal(key1) - map.getkey("X").should == nil + map[key1] = true + map.getkey(key2).should equal(key1) + map.getkey("X").should == nil - key1.should == "A" # keep the key alive until here to keep the map entry - key2.should == "A" # keep the key alive until here to keep the map entry - end + key1.should == "A" # keep the key alive until here to keep the map entry + key2.should == "A" # keep the key alive until here to keep the map entry + end - it "returns nil when a key cannot be garbage collected" do - map = ObjectSpace::WeakKeyMap.new + it "returns nil when a key cannot be garbage collected" do + map = ObjectSpace::WeakKeyMap.new - map.getkey(1).should == nil - map.getkey(1.0).should == nil - map.getkey(:a).should == nil - map.getkey(true).should == nil - map.getkey(false).should == nil - map.getkey(nil).should == nil - end + map.getkey(1).should == nil + map.getkey(1.0).should == nil + map.getkey(:a).should == nil + map.getkey(true).should == nil + map.getkey(false).should == nil + map.getkey(nil).should == nil end end diff --git a/spec/ruby/core/objectspace/weakkeymap/inspect_spec.rb b/spec/ruby/core/objectspace/weakkeymap/inspect_spec.rb index 319f050970e31a..b6bb4691584293 100644 --- a/spec/ruby/core/objectspace/weakkeymap/inspect_spec.rb +++ b/spec/ruby/core/objectspace/weakkeymap/inspect_spec.rb @@ -1,21 +1,19 @@ require_relative '../../../spec_helper' -ruby_version_is "3.3" do - describe "ObjectSpace::WeakKeyMap#inspect" do - it "only displays size in output" do - map = ObjectSpace::WeakKeyMap.new - key1, key2, key3 = "foo", "bar", "bar" - map.inspect.should =~ /\A\#\z/ - map[key1] = 1 - map.inspect.should =~ /\A\#\z/ - map[key2] = 2 - map.inspect.should =~ /\A\#\z/ - map[key3] = 3 - map.inspect.should =~ /\A\#\z/ +describe "ObjectSpace::WeakKeyMap#inspect" do + it "only displays size in output" do + map = ObjectSpace::WeakKeyMap.new + key1, key2, key3 = "foo", "bar", "bar" + map.inspect.should =~ /\A\#\z/ + map[key1] = 1 + map.inspect.should =~ /\A\#\z/ + map[key2] = 2 + map.inspect.should =~ /\A\#\z/ + map[key3] = 3 + map.inspect.should =~ /\A\#\z/ - key1.should == "foo" # keep the key alive until here to keep the map entry - key2.should == "bar" # keep the key alive until here to keep the map entry - key3.should == "bar" # keep the key alive until here to keep the map entry - end + key1.should == "foo" # keep the key alive until here to keep the map entry + key2.should == "bar" # keep the key alive until here to keep the map entry + key3.should == "bar" # keep the key alive until here to keep the map entry end end diff --git a/spec/ruby/core/objectspace/weakkeymap/key_spec.rb b/spec/ruby/core/objectspace/weakkeymap/key_spec.rb index a9a2e12432c845..e0b686667197cc 100644 --- a/spec/ruby/core/objectspace/weakkeymap/key_spec.rb +++ b/spec/ruby/core/objectspace/weakkeymap/key_spec.rb @@ -1,44 +1,42 @@ require_relative '../../../spec_helper' -ruby_version_is "3.3" do - describe "ObjectSpace::WeakKeyMap#key?" do - it "recognizes keys in use" do - map = ObjectSpace::WeakKeyMap.new - key1, key2 = %w[a b].map(&:upcase) - ref1, ref2 = %w[x y] +describe "ObjectSpace::WeakKeyMap#key?" do + it "recognizes keys in use" do + map = ObjectSpace::WeakKeyMap.new + key1, key2 = %w[a b].map(&:upcase) + ref1, ref2 = %w[x y] - map[key1] = ref1 - map.key?(key1).should == true - map[key1] = ref1 - map.key?(key1).should == true - map[key2] = ref2 - map.key?(key2).should == true - end + map[key1] = ref1 + map.key?(key1).should == true + map[key1] = ref1 + map.key?(key1).should == true + map[key2] = ref2 + map.key?(key2).should == true + end - it "matches using equality semantics" do - map = ObjectSpace::WeakKeyMap.new - key1, key2 = %w[a a].map(&:upcase) - ref = "x" - map[key1] = ref - map.key?(key2).should == true - end + it "matches using equality semantics" do + map = ObjectSpace::WeakKeyMap.new + key1, key2 = %w[a a].map(&:upcase) + ref = "x" + map[key1] = ref + map.key?(key2).should == true + end - it "reports true if the pair exists and the value is nil" do - map = ObjectSpace::WeakKeyMap.new - key = Object.new - map[key] = nil - map.key?(key).should == true - end + it "reports true if the pair exists and the value is nil" do + map = ObjectSpace::WeakKeyMap.new + key = Object.new + map[key] = nil + map.key?(key).should == true + end - it "returns false when a key cannot be garbage collected" do - map = ObjectSpace::WeakKeyMap.new + it "returns false when a key cannot be garbage collected" do + map = ObjectSpace::WeakKeyMap.new - map.key?(1).should == false - map.key?(1.0).should == false - map.key?(:a).should == false - map.key?(true).should == false - map.key?(false).should == false - map.key?(nil).should == false - end + map.key?(1).should == false + map.key?(1.0).should == false + map.key?(:a).should == false + map.key?(true).should == false + map.key?(false).should == false + map.key?(nil).should == false end end diff --git a/spec/ruby/core/objectspace/weakmap/delete_spec.rb b/spec/ruby/core/objectspace/weakmap/delete_spec.rb index 302de264fb2998..03beebbb83419d 100644 --- a/spec/ruby/core/objectspace/weakmap/delete_spec.rb +++ b/spec/ruby/core/objectspace/weakmap/delete_spec.rb @@ -1,30 +1,28 @@ require_relative '../../../spec_helper' -ruby_version_is '3.3' do - describe "ObjectSpace::WeakMap#delete" do - it "removes the entry and returns the deleted value" do - m = ObjectSpace::WeakMap.new - key = Object.new - value = Object.new - m[key] = value +describe "ObjectSpace::WeakMap#delete" do + it "removes the entry and returns the deleted value" do + m = ObjectSpace::WeakMap.new + key = Object.new + value = Object.new + m[key] = value - m.delete(key).should == value - m.key?(key).should == false - end + m.delete(key).should == value + m.key?(key).should == false + end - it "calls supplied block if the key is not found" do - key = Object.new - m = ObjectSpace::WeakMap.new - return_value = m.delete(key) do |yielded_key| - yielded_key.should == key - 5 - end - return_value.should == 5 + it "calls supplied block if the key is not found" do + key = Object.new + m = ObjectSpace::WeakMap.new + return_value = m.delete(key) do |yielded_key| + yielded_key.should == key + 5 end + return_value.should == 5 + end - it "returns nil if the key is not found when no block is given" do - m = ObjectSpace::WeakMap.new - m.delete(Object.new).should == nil - end + it "returns nil if the key is not found when no block is given" do + m = ObjectSpace::WeakMap.new + m.delete(Object.new).should == nil end end diff --git a/spec/ruby/core/proc/clone_spec.rb b/spec/ruby/core/proc/clone_spec.rb index 730dc421a87086..7d47f2cde5b4a5 100644 --- a/spec/ruby/core/proc/clone_spec.rb +++ b/spec/ruby/core/proc/clone_spec.rb @@ -5,7 +5,7 @@ describe "Proc#clone" do it_behaves_like :proc_dup, :clone - ruby_bug "cloning a frozen proc is broken on Ruby 3.3", "3.3"..."3.4" do + ruby_bug "cloning a frozen proc is broken on Ruby 3.3", ""..."3.4" do it "preserves frozen status" do proc = Proc.new { } proc.freeze @@ -14,17 +14,15 @@ end end - ruby_version_is "3.3" do - it "calls #initialize_clone on subclass" do - obj = ProcSpecs::MyProc2.new(:a, 2) { } - dup = obj.clone + it "calls #initialize_clone on subclass" do + obj = ProcSpecs::MyProc2.new(:a, 2) { } + dup = obj.clone - dup.should_not equal(obj) - dup.class.should == ProcSpecs::MyProc2 + dup.should_not equal(obj) + dup.class.should == ProcSpecs::MyProc2 - dup.first.should == :a - dup.second.should == 2 - dup.initializer.should == :clone - end + dup.first.should == :a + dup.second.should == 2 + dup.initializer.should == :clone end end diff --git a/spec/ruby/core/proc/dup_spec.rb b/spec/ruby/core/proc/dup_spec.rb index 716357d1f0e327..bdb7d8ab5a4e82 100644 --- a/spec/ruby/core/proc/dup_spec.rb +++ b/spec/ruby/core/proc/dup_spec.rb @@ -12,17 +12,15 @@ proc.dup.frozen?.should == false end - ruby_version_is "3.3" do - it "calls #initialize_dup on subclass" do - obj = ProcSpecs::MyProc2.new(:a, 2) { } - dup = obj.dup + it "calls #initialize_dup on subclass" do + obj = ProcSpecs::MyProc2.new(:a, 2) { } + dup = obj.dup - dup.should_not equal(obj) - dup.class.should == ProcSpecs::MyProc2 + dup.should_not equal(obj) + dup.class.should == ProcSpecs::MyProc2 - dup.first.should == :a - dup.second.should == 2 - dup.initializer.should == :dup - end + dup.first.should == :a + dup.second.should == 2 + dup.initializer.should == :dup end end diff --git a/spec/ruby/core/proc/lambda_spec.rb b/spec/ruby/core/proc/lambda_spec.rb index 5c3c38fc2a64c1..67ee4645cd1f0a 100644 --- a/spec/ruby/core/proc/lambda_spec.rb +++ b/spec/ruby/core/proc/lambda_spec.rb @@ -14,13 +14,6 @@ Proc.new {}.lambda?.should be_false end - ruby_version_is ""..."3.3" do - it "is preserved when passing a Proc with & to the lambda keyword" do - suppress_warning {lambda(&->{})}.lambda?.should be_true - suppress_warning {lambda(&proc{})}.lambda?.should be_false - end - end - it "is preserved when passing a Proc with & to the proc keyword" do proc(&->{}).lambda?.should be_true proc(&proc{}).lambda?.should be_false diff --git a/spec/ruby/core/process/argv0_spec.rb b/spec/ruby/core/process/argv0_spec.rb index f5aba719e96a73..9cba382c009da4 100644 --- a/spec/ruby/core/process/argv0_spec.rb +++ b/spec/ruby/core/process/argv0_spec.rb @@ -13,10 +13,8 @@ end end - ruby_bug "#19597", ""..."3.3" do - it "returns a frozen object" do - Process.argv0.should.frozen? - end + it "returns a frozen object" do + Process.argv0.should.frozen? end it "returns every time the same object" do diff --git a/spec/ruby/core/process/status/bit_and_spec.rb b/spec/ruby/core/process/status/bit_and_spec.rb index a80536462947f2..9fd1425a97600e 100644 --- a/spec/ruby/core/process/status/bit_and_spec.rb +++ b/spec/ruby/core/process/status/bit_and_spec.rb @@ -17,7 +17,7 @@ end end - ruby_version_is "3.3"..."4.0" do + ruby_version_is ""..."4.0" do it "raises an ArgumentError if mask is negative" do suppress_warning do ruby_exe("exit(0)") diff --git a/spec/ruby/core/process/status/right_shift_spec.rb b/spec/ruby/core/process/status/right_shift_spec.rb index 355aaf4c9532cb..3eaedf50550e1d 100644 --- a/spec/ruby/core/process/status/right_shift_spec.rb +++ b/spec/ruby/core/process/status/right_shift_spec.rb @@ -16,7 +16,7 @@ end end - ruby_version_is "3.3"..."4.0" do + ruby_version_is ""..."4.0" do it "raises an ArgumentError if shift value is negative" do suppress_warning do ruby_exe("exit(0)") diff --git a/spec/ruby/core/process/warmup_spec.rb b/spec/ruby/core/process/warmup_spec.rb index b562d52d226715..4530ae222c2606 100644 --- a/spec/ruby/core/process/warmup_spec.rb +++ b/spec/ruby/core/process/warmup_spec.rb @@ -1,11 +1,9 @@ require_relative '../../spec_helper' describe "Process.warmup" do - ruby_version_is "3.3" do - # The behavior is entirely implementation specific. - # Other implementations are free to just make it a noop - it "is implemented" do - Process.warmup.should == true - end + # The behavior is entirely implementation specific. + # Other implementations are free to just make it a noop + it "is implemented" do + Process.warmup.should == true end end diff --git a/spec/ruby/core/range/case_compare_spec.rb b/spec/ruby/core/range/case_compare_spec.rb index c9b253f0a585d9..7a76487d68c575 100644 --- a/spec/ruby/core/range/case_compare_spec.rb +++ b/spec/ruby/core/range/case_compare_spec.rb @@ -11,9 +11,7 @@ it_behaves_like :range_cover_and_include, :=== it_behaves_like :range_cover, :=== - ruby_bug "#19533", ""..."3.3" do - it "returns true on any value if begin and end are both nil" do - (nil..nil).should === 1 - end + it "returns true on any value if begin and end are both nil" do + (nil..nil).should === 1 end end diff --git a/spec/ruby/core/range/overlap_spec.rb b/spec/ruby/core/range/overlap_spec.rb index 9b6fc134934208..3e7d2bdda8acf9 100644 --- a/spec/ruby/core/range/overlap_spec.rb +++ b/spec/ruby/core/range/overlap_spec.rb @@ -1,89 +1,87 @@ require_relative '../../spec_helper' -ruby_version_is '3.3' do - describe "Range#overlap?" do - it "returns true if other Range overlaps self" do - (0..2).overlap?(1..3).should == true - (1..3).overlap?(0..2).should == true - (0..2).overlap?(0..2).should == true - (0..3).overlap?(1..2).should == true - (1..2).overlap?(0..3).should == true - - ('a'..'c').overlap?('b'..'d').should == true - end - - it "returns false if other Range does not overlap self" do - (0..2).overlap?(3..4).should == false - (0..2).overlap?(-4..-1).should == false - - ('a'..'c').overlap?('d'..'f').should == false - end - - it "raises TypeError when called with non-Range argument" do - -> { - (0..2).overlap?(1) - }.should raise_error(TypeError, "wrong argument type Integer (expected Range)") - end - - it "returns true when beginningless and endless Ranges overlap" do - (0..2).overlap?(..3).should == true - (0..2).overlap?(..1).should == true - (0..2).overlap?(..0).should == true - - (..3).overlap?(0..2).should == true - (..1).overlap?(0..2).should == true - (..0).overlap?(0..2).should == true - - (0..2).overlap?(-1..).should == true - (0..2).overlap?(1..).should == true - (0..2).overlap?(2..).should == true - - (-1..).overlap?(0..2).should == true - (1..).overlap?(0..2).should == true - (2..).overlap?(0..2).should == true - - (0..).overlap?(2..).should == true - (..0).overlap?(..2).should == true - end - - it "returns false when beginningless and endless Ranges do not overlap" do - (0..2).overlap?(..-1).should == false - (0..2).overlap?(3..).should == false - - (..-1).overlap?(0..2).should == false - (3..).overlap?(0..2).should == false - end - - it "returns false when Ranges are not compatible" do - (0..2).overlap?('a'..'d').should == false - end - - it "return false when self is empty" do - (2..0).overlap?(1..3).should == false - (2...2).overlap?(1..3).should == false - (1...1).overlap?(1...1).should == false - (2..0).overlap?(2..0).should == false - - ('c'..'a').overlap?('b'..'d').should == false - ('a'...'a').overlap?('b'..'d').should == false - ('b'...'b').overlap?('b'...'b').should == false - ('c'...'a').overlap?('c'...'a').should == false - end - - it "return false when other Range is empty" do - (1..3).overlap?(2..0).should == false - (1..3).overlap?(2...2).should == false - - ('b'..'d').overlap?('c'..'a').should == false - ('b'..'d').overlap?('c'...'c').should == false - end - - it "takes into account exclusive end" do - (0...2).overlap?(2..4).should == false - (2..4).overlap?(0...2).should == false - - ('a'...'c').overlap?('c'..'e').should == false - ('c'..'e').overlap?('a'...'c').should == false - end +describe "Range#overlap?" do + it "returns true if other Range overlaps self" do + (0..2).overlap?(1..3).should == true + (1..3).overlap?(0..2).should == true + (0..2).overlap?(0..2).should == true + (0..3).overlap?(1..2).should == true + (1..2).overlap?(0..3).should == true + + ('a'..'c').overlap?('b'..'d').should == true + end + + it "returns false if other Range does not overlap self" do + (0..2).overlap?(3..4).should == false + (0..2).overlap?(-4..-1).should == false + + ('a'..'c').overlap?('d'..'f').should == false + end + + it "raises TypeError when called with non-Range argument" do + -> { + (0..2).overlap?(1) + }.should raise_error(TypeError, "wrong argument type Integer (expected Range)") + end + + it "returns true when beginningless and endless Ranges overlap" do + (0..2).overlap?(..3).should == true + (0..2).overlap?(..1).should == true + (0..2).overlap?(..0).should == true + + (..3).overlap?(0..2).should == true + (..1).overlap?(0..2).should == true + (..0).overlap?(0..2).should == true + + (0..2).overlap?(-1..).should == true + (0..2).overlap?(1..).should == true + (0..2).overlap?(2..).should == true + + (-1..).overlap?(0..2).should == true + (1..).overlap?(0..2).should == true + (2..).overlap?(0..2).should == true + + (0..).overlap?(2..).should == true + (..0).overlap?(..2).should == true + end + + it "returns false when beginningless and endless Ranges do not overlap" do + (0..2).overlap?(..-1).should == false + (0..2).overlap?(3..).should == false + + (..-1).overlap?(0..2).should == false + (3..).overlap?(0..2).should == false + end + + it "returns false when Ranges are not compatible" do + (0..2).overlap?('a'..'d').should == false + end + + it "return false when self is empty" do + (2..0).overlap?(1..3).should == false + (2...2).overlap?(1..3).should == false + (1...1).overlap?(1...1).should == false + (2..0).overlap?(2..0).should == false + + ('c'..'a').overlap?('b'..'d').should == false + ('a'...'a').overlap?('b'..'d').should == false + ('b'...'b').overlap?('b'...'b').should == false + ('c'...'a').overlap?('c'...'a').should == false + end + + it "return false when other Range is empty" do + (1..3).overlap?(2..0).should == false + (1..3).overlap?(2...2).should == false + + ('b'..'d').overlap?('c'..'a').should == false + ('b'..'d').overlap?('c'...'c').should == false + end + + it "takes into account exclusive end" do + (0...2).overlap?(2..4).should == false + (2..4).overlap?(0...2).should == false + + ('a'...'c').overlap?('c'..'e').should == false + ('c'..'e').overlap?('a'...'c').should == false end end diff --git a/spec/ruby/core/range/reverse_each_spec.rb b/spec/ruby/core/range/reverse_each_spec.rb index 56390cc0da4822..16aaace6afaa60 100644 --- a/spec/ruby/core/range/reverse_each_spec.rb +++ b/spec/ruby/core/range/reverse_each_spec.rb @@ -1,102 +1,124 @@ require_relative '../../spec_helper' -ruby_version_is "3.3" do - describe "Range#reverse_each" do - it "traverses the Range in reverse order and passes each element to block" do - a = [] - (1..3).reverse_each { |i| a << i } - a.should == [3, 2, 1] +describe "Range#reverse_each" do + it "traverses the Range in reverse order and passes each element to block" do + a = [] + (1..3).reverse_each { |i| a << i } + a.should == [3, 2, 1] + + a = [] + (1...3).reverse_each { |i| a << i } + a.should == [2, 1] + end - a = [] - (1...3).reverse_each { |i| a << i } - a.should == [2, 1] - end + it "returns self" do + r = (1..3) + r.reverse_each { |x| }.should equal(r) + end - it "returns self" do - r = (1..3) - r.reverse_each { |x| }.should equal(r) - end + it "returns an Enumerator if no block given" do + enum = (1..3).reverse_each + enum.should be_an_instance_of(Enumerator) + enum.to_a.should == [3, 2, 1] + end - it "returns an Enumerator if no block given" do - enum = (1..3).reverse_each - enum.should be_an_instance_of(Enumerator) - enum.to_a.should == [3, 2, 1] - end + it "raises a TypeError for endless Ranges of Integers" do + -> { + (1..).reverse_each.take(3) + }.should raise_error(TypeError, "can't iterate from NilClass") + end - it "raises a TypeError for endless Ranges of Integers" do - -> { - (1..).reverse_each.take(3) - }.should raise_error(TypeError, "can't iterate from NilClass") - end + it "raises a TypeError for endless Ranges of non-Integers" do + -> { + ("a"..).reverse_each.take(3) + }.should raise_error(TypeError, "can't iterate from NilClass") + end - it "raises a TypeError for endless Ranges of non-Integers" do - -> { - ("a"..).reverse_each.take(3) - }.should raise_error(TypeError, "can't iterate from NilClass") + context "Integer boundaries" do + it "supports beginningless Ranges" do + (..5).reverse_each.take(3).should == [5, 4, 3] end + end - context "Integer boundaries" do - it "supports beginningless Ranges" do - (..5).reverse_each.take(3).should == [5, 4, 3] - end + context "non-Integer boundaries" do + it "uses #succ to iterate a Range of non-Integer elements" do + y = mock('y') + x = mock('x') + + x.should_receive(:succ).any_number_of_times.and_return(y) + x.should_receive(:<=>).with(y).any_number_of_times.and_return(-1) + x.should_receive(:<=>).with(x).any_number_of_times.and_return(0) + y.should_receive(:<=>).with(x).any_number_of_times.and_return(1) + y.should_receive(:<=>).with(y).any_number_of_times.and_return(0) + + a = [] + (x..y).each { |i| a << i } + a.should == [x, y] end - context "non-Integer boundaries" do - it "uses #succ to iterate a Range of non-Integer elements" do - y = mock('y') - x = mock('x') + it "uses #succ to iterate a Range of Strings" do + a = [] + ('A'..'D').reverse_each { |i| a << i } + a.should == ['D','C','B','A'] + end - x.should_receive(:succ).any_number_of_times.and_return(y) - x.should_receive(:<=>).with(y).any_number_of_times.and_return(-1) - x.should_receive(:<=>).with(x).any_number_of_times.and_return(0) - y.should_receive(:<=>).with(x).any_number_of_times.and_return(1) - y.should_receive(:<=>).with(y).any_number_of_times.and_return(0) + it "uses #succ to iterate a Range of Symbols" do + a = [] + (:A..:D).reverse_each { |i| a << i } + a.should == [:D, :C, :B, :A] + end - a = [] - (x..y).each { |i| a << i } - a.should == [x, y] - end + it "raises a TypeError when `begin` value does not respond to #succ" do + -> { (Time.now..Time.now).reverse_each { |x| x } }.should raise_error(TypeError, /can't iterate from Time/) + -> { (//..//).reverse_each { |x| x } }.should raise_error(TypeError, /can't iterate from Regexp/) + -> { ([]..[]).reverse_each { |x| x } }.should raise_error(TypeError, /can't iterate from Array/) + end - it "uses #succ to iterate a Range of Strings" do - a = [] - ('A'..'D').reverse_each { |i| a << i } - a.should == ['D','C','B','A'] - end + it "does not support beginningless Ranges" do + -> { + (..'a').reverse_each { |x| x } + }.should raise_error(TypeError, /can't iterate from NilClass/) + end + end - it "uses #succ to iterate a Range of Symbols" do - a = [] - (:A..:D).reverse_each { |i| a << i } - a.should == [:D, :C, :B, :A] - end + context "when no block is given" do + describe "returned Enumerator size" do + it "returns the Range size when Range size is finite" do + (1..3).reverse_each.size.should == 3 + (1...3).reverse_each.size.should == 2 - it "raises a TypeError when `begin` value does not respond to #succ" do - -> { (Time.now..Time.now).reverse_each { |x| x } }.should raise_error(TypeError, /can't iterate from Time/) - -> { (//..//).reverse_each { |x| x } }.should raise_error(TypeError, /can't iterate from Regexp/) - -> { ([]..[]).reverse_each { |x| x } }.should raise_error(TypeError, /can't iterate from Array/) + (1..3.3).reverse_each.size.should == 3 + (1...3.3).reverse_each.size.should == 3 end - it "does not support beginningless Ranges" do - -> { - (..'a').reverse_each { |x| x } - }.should raise_error(TypeError, /can't iterate from NilClass/) + ruby_version_is ""..."3.4" do + it "returns a size when it is not iterable" do + (1.1..3).reverse_each.size.should == 2 + (1.1..3.3).reverse_each.size.should == 3 + (1.1..nil).reverse_each.size.should == Float::INFINITY + (nil..3.3).reverse_each.size.should == Float::INFINITY + (nil..nil).reverse_each.size.should == nil + end end - end - context "when no block is given" do - describe "returned Enumerator size" do - it "returns the Range size when Range size is finite" do - (1..3).reverse_each.size.should == 3 + ruby_version_is "3.4" do + it "raises TypeError when the range is not iterable" do + -> { (1.1..3).reverse_each.size }.should raise_error(TypeError, /can't iterate from Integer/) + -> { (1.1..3.3).reverse_each.size }.should raise_error(TypeError, /can't iterate from Float/) + -> { (1.1..nil).reverse_each.size }.should raise_error(TypeError, /can't iterate from NilClass/) + -> { (nil..3.3).reverse_each.size }.should raise_error(TypeError, /can't iterate from Float/) + -> { (nil..nil).reverse_each.size }.should raise_error(TypeError, /can't iterate from NilClass/) end + end - ruby_bug "#20936", "3.4"..."4.0" do - it "returns Infinity when Range size is infinite" do - (..3).reverse_each.size.should == Float::INFINITY - end + ruby_bug "#20936", "3.4"..."4.0" do + it "returns Infinity when Range size is infinite" do + (..3).reverse_each.size.should == Float::INFINITY end + end - it "returns nil when Range size is unknown" do - ('a'..'z').reverse_each.size.should == nil - end + it "returns nil when Range size is unknown" do + ('a'..'z').reverse_each.size.should == nil end end end diff --git a/spec/ruby/core/range/to_set_spec.rb b/spec/ruby/core/range/to_set_spec.rb index 589c0e9aedec26..14e0ce1e31eac1 100644 --- a/spec/ruby/core/range/to_set_spec.rb +++ b/spec/ruby/core/range/to_set_spec.rb @@ -1,7 +1,7 @@ require_relative '../../spec_helper' require_relative '../enumerable/fixtures/classes' -describe "Enumerable#to_set" do +describe "Range#to_set" do it "returns a new Set created from self" do (1..4).to_set.should == Set[1, 2, 3, 4] (1...4).to_set.should == Set[1, 2, 3] @@ -11,45 +11,44 @@ (1..3).to_set { |x| x * x }.should == Set[1, 4, 9] end + it "raises a TypeError for a beginningless range" do + -> { + (..0).to_set + }.should raise_error(TypeError, "can't iterate from NilClass") + end + ruby_version_is "4.0" do - it "raises a RangeError if the range is infinite" do + it "raises a RangeError if the range is endless" do -> { (1..).to_set }.should raise_error(RangeError, "cannot convert endless range to a set") -> { (1...).to_set }.should raise_error(RangeError, "cannot convert endless range to a set") end end - ruby_version_is ""..."4.0" do - it "instantiates an object of provided as the first argument set class" do - set = (1..3).to_set(EnumerableSpecs::SetSubclass) - set.should be_kind_of(EnumerableSpecs::SetSubclass) - set.to_a.sort.should == [1, 2, 3] - end - end - - ruby_version_is "4.0"..."4.1" do - it "instantiates an object of provided as the first argument set class and warns" do - set = nil - proc { + context "given positional arguments" do + ruby_version_is ""..."4.0" do + it "instantiates an object of provided as the first argument set class" do set = (1..3).to_set(EnumerableSpecs::SetSubclass) - }.should complain(/Enumerable#to_set/) - set.should be_kind_of(EnumerableSpecs::SetSubclass) - set.to_a.sort.should == [1, 2, 3] + set.should be_kind_of(EnumerableSpecs::SetSubclass) + set.to_a.sort.should == [1, 2, 3] + end end - end - ruby_version_is "4.1" do - it "does not accept any positional argument" do - -> { - (1..3).to_set(EnumerableSpecs::SetSubclass) - }.should raise_error(ArgumentError, 'wrong number of arguments (given 1, expected 0)') + ruby_version_is "4.0"..."4.1" do + it "instantiates an object of provided as the first argument set class and warns" do + -> { + set = (1..3).to_set(EnumerableSpecs::SetSubclass) + set.should be_kind_of(EnumerableSpecs::SetSubclass) + set.to_a.sort.should == [1, 2, 3] + }.should complain(/warning: passing arguments to Enumerable#to_set is deprecated/) + end end - end - it "does not need explicit `require 'set'`" do - output = ruby_exe(<<~RUBY, options: '--disable-gems', args: '2>&1') - puts (1..3).to_set.to_a.inspect - RUBY - - output.chomp.should == "[1, 2, 3]" + ruby_version_is "4.1" do + it "does not accept any positional argument" do + -> { + (1..3).to_set(EnumerableSpecs::SetSubclass) + }.should raise_error(ArgumentError, "wrong number of arguments (given 1, expected 0)") + end + end end end diff --git a/spec/ruby/core/rational/ceil_spec.rb b/spec/ruby/core/rational/ceil_spec.rb index d5bdadf3b6b000..0c0327448f35c6 100644 --- a/spec/ruby/core/rational/ceil_spec.rb +++ b/spec/ruby/core/rational/ceil_spec.rb @@ -1,45 +1,48 @@ require_relative "../../spec_helper" +require_relative "../integer/shared/integer_ceil_precision" describe "Rational#ceil" do + context "with values equal to integers" do + it_behaves_like :integer_ceil_precision, :Rational + end + before do @rational = Rational(2200, 7) end describe "with no arguments (precision = 0)" do - it "returns an Integer" do - @rational.ceil.should be_kind_of(Integer) - end + it "returns the Integer value rounded toward positive infinity" do + @rational.ceil.should eql 315 - it "returns the truncated value toward positive infinity" do - @rational.ceil.should == 315 - Rational(1, 2).ceil.should == 1 - Rational(-1, 2).ceil.should == 0 + Rational(1, 2).ceil.should eql 1 + Rational(-1, 2).ceil.should eql 0 + Rational(1, 1).ceil.should eql 1 end end describe "with a precision < 0" do - it "returns an Integer" do - @rational.ceil(-2).should be_kind_of(Integer) - @rational.ceil(-1).should be_kind_of(Integer) - end + it "moves the rounding point n decimal places left, returning an Integer" do + @rational.ceil(-3).should eql 1000 + @rational.ceil(-2).should eql 400 + @rational.ceil(-1).should eql 320 - it "moves the truncation point n decimal places left" do - @rational.ceil(-3).should == 1000 - @rational.ceil(-2).should == 400 - @rational.ceil(-1).should == 320 + Rational(100, 2).ceil(-1).should eql 50 + Rational(100, 2).ceil(-2).should eql 100 + Rational(-100, 2).ceil(-1).should eql(-50) + Rational(-100, 2).ceil(-2).should eql(0) end end describe "with precision > 0" do - it "returns a Rational" do - @rational.ceil(1).should be_kind_of(Rational) - @rational.ceil(2).should be_kind_of(Rational) - end + it "moves the rounding point n decimal places right, returning a Rational" do + @rational.ceil(1).should eql Rational(3143, 10) + @rational.ceil(2).should eql Rational(31429, 100) + @rational.ceil(3).should eql Rational(157143, 500) - it "moves the truncation point n decimal places right" do - @rational.ceil(1).should == Rational(3143, 10) - @rational.ceil(2).should == Rational(31429, 100) - @rational.ceil(3).should == Rational(157143, 500) + Rational(100, 2).ceil(1).should eql Rational(50, 1) + Rational(100, 2).ceil(2).should eql Rational(50, 1) + Rational(-100, 2).ceil(1).should eql Rational(-50, 1) + Rational(-100, 2).ceil(2).should eql Rational(-50, 1) end end end diff --git a/spec/ruby/core/rational/exponent_spec.rb b/spec/ruby/core/rational/exponent_spec.rb index 65fbf2ed1ca895..1f8a03740cc087 100644 --- a/spec/ruby/core/rational/exponent_spec.rb +++ b/spec/ruby/core/rational/exponent_spec.rb @@ -108,37 +108,37 @@ it "raises an ArgumentError when self is > 1" do -> { (Rational(2) ** bignum_value) - }.should raise_error(ArgumentError) + }.should raise_error(ArgumentError, "exponent is too large") -> { (Rational(fixnum_max) ** bignum_value) - }.should raise_error(ArgumentError) + }.should raise_error(ArgumentError, "exponent is too large") end it "raises an ArgumentError when self is > 1 and the exponent is negative" do -> { (Rational(2) ** -bignum_value) - }.should raise_error(ArgumentError) + }.should raise_error(ArgumentError, "exponent is too large") -> { (Rational(fixnum_max) ** -bignum_value) - }.should raise_error(ArgumentError) + }.should raise_error(ArgumentError, "exponent is too large") end it "raises an ArgumentError when self is < -1" do -> { (Rational(-2) ** bignum_value) - }.should raise_error(ArgumentError) + }.should raise_error(ArgumentError, "exponent is too large") -> { (Rational(fixnum_min) ** bignum_value) - }.should raise_error(ArgumentError) + }.should raise_error(ArgumentError, "exponent is too large") end it "raises an ArgumentError when self is < -1 and the exponent is negative" do -> { (Rational(-2) ** -bignum_value) - }.should raise_error(ArgumentError) + }.should raise_error(ArgumentError, "exponent is too large") -> { (Rational(fixnum_min) ** -bignum_value) - }.should raise_error(ArgumentError) + }.should raise_error(ArgumentError, "exponent is too large") end end diff --git a/spec/ruby/core/rational/floor_spec.rb b/spec/ruby/core/rational/floor_spec.rb index 8068aaf119e70f..5108e363f7a67c 100644 --- a/spec/ruby/core/rational/floor_spec.rb +++ b/spec/ruby/core/rational/floor_spec.rb @@ -1,45 +1,49 @@ require_relative "../../spec_helper" +require_relative "../integer/shared/integer_floor_precision" describe "Rational#floor" do + context "with values equal to integers" do + it_behaves_like :integer_floor_precision, :Rational + end + before do @rational = Rational(2200, 7) end describe "with no arguments (precision = 0)" do - it "returns an integer" do - @rational.floor.should be_kind_of(Integer) - end - it "returns the truncated value toward negative infinity" do - @rational.floor.should == 314 - Rational(1, 2).floor.should == 0 - Rational(-1, 2).floor.should == -1 + it "returns the Integer value rounded toward negative infinity" do + @rational.floor.should eql 314 + + Rational(1, 2).floor.should eql 0 + Rational(-1, 2).floor.should eql(-1) + Rational(1, 1).floor.should eql 1 end end describe "with a precision < 0" do - it "returns an integer" do - @rational.floor(-2).should be_kind_of(Integer) - @rational.floor(-1).should be_kind_of(Integer) - end + it "moves the rounding point n decimal places left, returning an Integer" do + @rational.floor(-3).should eql 0 + @rational.floor(-2).should eql 300 + @rational.floor(-1).should eql 310 - it "moves the truncation point n decimal places left" do - @rational.floor(-3).should == 0 - @rational.floor(-2).should == 300 - @rational.floor(-1).should == 310 + Rational(100, 2).floor(-1).should eql 50 + Rational(100, 2).floor(-2).should eql 0 + Rational(-100, 2).floor(-1).should eql(-50) + Rational(-100, 2).floor(-2).should eql(-100) end end describe "with a precision > 0" do - it "returns a Rational" do - @rational.floor(1).should be_kind_of(Rational) - @rational.floor(2).should be_kind_of(Rational) - end + it "moves the rounding point n decimal places right, returning a Rational" do + @rational.floor(1).should eql Rational(1571, 5) + @rational.floor(2).should eql Rational(7857, 25) + @rational.floor(3).should eql Rational(62857, 200) - it "moves the truncation point n decimal places right" do - @rational.floor(1).should == Rational(1571, 5) - @rational.floor(2).should == Rational(7857, 25) - @rational.floor(3).should == Rational(62857, 200) + Rational(100, 2).floor(1).should eql Rational(50, 1) + Rational(100, 2).floor(2).should eql Rational(50, 1) + Rational(-100, 2).floor(1).should eql Rational(-50, 1) + Rational(-100, 2).floor(2).should eql Rational(-50, 1) end end end diff --git a/spec/ruby/core/refinement/refined_class_spec.rb b/spec/ruby/core/refinement/refined_class_spec.rb index 60a58380ccf00b..b532d9a7738cca 100644 --- a/spec/ruby/core/refinement/refined_class_spec.rb +++ b/spec/ruby/core/refinement/refined_class_spec.rb @@ -2,11 +2,7 @@ require_relative 'shared/target' describe "Refinement#refined_class" do - ruby_version_is ""..."3.3" do - it_behaves_like :refinement_target, :refined_class - end - - ruby_version_is "3.3"..."3.4" do + ruby_version_is ""..."3.4" do it "has been deprecated in favour of Refinement#target" do refinement_int = nil diff --git a/spec/ruby/core/refinement/target_spec.rb b/spec/ruby/core/refinement/target_spec.rb index fee9588a96ed65..8bd816aea622dd 100644 --- a/spec/ruby/core/refinement/target_spec.rb +++ b/spec/ruby/core/refinement/target_spec.rb @@ -2,7 +2,5 @@ require_relative 'shared/target' describe "Refinement#target" do - ruby_version_is "3.3" do - it_behaves_like :refinement_target, :target - end + it_behaves_like :refinement_target, :target end diff --git a/spec/ruby/core/regexp/linear_time_spec.rb b/spec/ruby/core/regexp/linear_time_spec.rb index cf9e73c37c2b64..2f3f81ed207236 100644 --- a/spec/ruby/core/regexp/linear_time_spec.rb +++ b/spec/ruby/core/regexp/linear_time_spec.rb @@ -25,9 +25,7 @@ }.should complain(/warning: flags ignored/) end - ruby_version_is "3.3" do - it "returns true for positive lookarounds" do - Regexp.linear_time?(/(?:(?=a*)a)*/).should == true - end + it "returns true for positive lookarounds" do + Regexp.linear_time?(/(?:(?=a*)a)*/).should == true end end diff --git a/spec/ruby/core/set/flatten_spec.rb b/spec/ruby/core/set/flatten_spec.rb index f2cb3dfa524a35..b26bc8481af58f 100644 --- a/spec/ruby/core/set/flatten_spec.rb +++ b/spec/ruby/core/set/flatten_spec.rb @@ -46,14 +46,4 @@ (set = Set[]) << set -> { set.flatten! }.should raise_error(ArgumentError) end - - version_is(set_version, ""..."1.1.0") do #ruby_version_is ""..."3.3" do - ruby_version_is ""..."4.0" do - context "when Set contains a Set-like object" do - it "flattens self, including Set-like objects" do - Set[SetSpecs::SetLike.new([1])].flatten!.should == Set[1] - end - end - end - end end diff --git a/spec/ruby/core/set/merge_spec.rb b/spec/ruby/core/set/merge_spec.rb index 0c6ed276700e7d..bf945cdcc02238 100644 --- a/spec/ruby/core/set/merge_spec.rb +++ b/spec/ruby/core/set/merge_spec.rb @@ -23,15 +23,7 @@ end end - ruby_version_is ""..."3.3" do - it "accepts only a single argument" do - -> { Set[].merge([], []) }.should raise_error(ArgumentError, "wrong number of arguments (given 2, expected 1)") - end - end - - ruby_version_is "3.3" do - it "accepts multiple arguments" do - Set[:a, :b].merge(Set[:b, :c], [:d]).should == Set[:a, :b, :c, :d] - end + it "accepts multiple arguments" do + Set[:a, :b].merge(Set[:b, :c], [:d]).should == Set[:a, :b, :c, :d] end end diff --git a/spec/ruby/core/set/proper_subset_spec.rb b/spec/ruby/core/set/proper_subset_spec.rb index fb7848c0015200..6f99447019b852 100644 --- a/spec/ruby/core/set/proper_subset_spec.rb +++ b/spec/ruby/core/set/proper_subset_spec.rb @@ -32,14 +32,4 @@ -> { Set[].proper_subset?("test") }.should raise_error(ArgumentError) -> { Set[].proper_subset?(Object.new) }.should raise_error(ArgumentError) end - - version_is(set_version, ""..."1.1.0") do #ruby_version_is ""..."3.3" do - ruby_version_is ""..."4.0" do - context "when comparing to a Set-like object" do - it "returns true if passed a Set-like object that self is a proper subset of" do - Set[1, 2, 3].proper_subset?(SetSpecs::SetLike.new([1, 2, 3, 4])).should be_true - end - end - end - end end diff --git a/spec/ruby/core/set/subset_spec.rb b/spec/ruby/core/set/subset_spec.rb index 112bd9b38adc12..da80d174da4fa1 100644 --- a/spec/ruby/core/set/subset_spec.rb +++ b/spec/ruby/core/set/subset_spec.rb @@ -32,14 +32,4 @@ -> { Set[].subset?("test") }.should raise_error(ArgumentError) -> { Set[].subset?(Object.new) }.should raise_error(ArgumentError) end - - version_is(set_version, ""..."1.1.0") do #ruby_version_is ""..."3.3" do - ruby_version_is ""..."4.0" do - context "when comparing to a Set-like object" do - it "returns true if passed a Set-like object that self is a subset of" do - Set[1, 2, 3].subset?(SetSpecs::SetLike.new([1, 2, 3, 4])).should be_true - end - end - end - end end diff --git a/spec/ruby/core/string/append_as_bytes_spec.rb b/spec/ruby/core/string/append_as_bytes_spec.rb index b1703e5f89baf9..def663d5ce2239 100644 --- a/spec/ruby/core/string/append_as_bytes_spec.rb +++ b/spec/ruby/core/string/append_as_bytes_spec.rb @@ -7,14 +7,16 @@ -> { str.append_as_bytes("\xE2\x82") }.should raise_error(FrozenError) end - it "allows creating broken strings" do + it "allows creating broken strings in UTF8" do str = +"hello" str.append_as_bytes("\xE2\x82") str.valid_encoding?.should == false str.append_as_bytes("\xAC") str.valid_encoding?.should == true + end + it "allows creating broken strings in UTF_32" do str = "abc".encode(Encoding::UTF_32LE) str.append_as_bytes("def") str.encoding.should == Encoding::UTF_32LE diff --git a/spec/ruby/core/string/bytesplice_spec.rb b/spec/ruby/core/string/bytesplice_spec.rb index 2c770e340aad27..cfd9e3ea9a7f39 100644 --- a/spec/ruby/core/string/bytesplice_spec.rb +++ b/spec/ruby/core/string/bytesplice_spec.rb @@ -57,77 +57,75 @@ -> { s.bytesplice(2, 1, "xxx") }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"") end - ruby_version_is "3.3" do - it "raises IndexError when str_index is less than -bytesize" do - -> { "hello".bytesplice(2, 1, "HELLO", -6, 0) }.should raise_error(IndexError, "index -6 out of string") - end - - it "raises IndexError when str_index is greater than bytesize" do - -> { "hello".bytesplice(2, 1, "HELLO", 6, 0) }.should raise_error(IndexError, "index 6 out of string") - end - - it "raises IndexError for negative str length" do - -> { "abc".bytesplice(0, 1, "", 0, -2) }.should raise_error(IndexError, "negative length -2") - end - - it "replaces with integer str indices" do - "hello".bytesplice(1, 2, "HELLO", -5, 0).should == "hlo" - "hello".bytesplice(1, 2, "HELLO", 0, 0).should == "hlo" - "hello".bytesplice(1, 2, "HELLO", 0, 1).should == "hHlo" - "hello".bytesplice(1, 2, "HELLO", 0, 5).should == "hHELLOlo" - "hello".bytesplice(1, 2, "HELLO", 0, 6).should == "hHELLOlo" - end - - it "raises RangeError when str range left boundary is less than -bytesize" do - -> { "hello".bytesplice(0..1, "HELLO", -6...-6) }.should raise_error(RangeError, "-6...-6 out of range") - end - - it "replaces with str ranges" do - "hello".bytesplice(1..2, "HELLO", -5...-5).should == "hlo" - "hello".bytesplice(1..2, "HELLO", 0...0).should == "hlo" - "hello".bytesplice(1..2, "HELLO", 0..0).should == "hHlo" - "hello".bytesplice(1..2, "HELLO", 0...1).should == "hHlo" - "hello".bytesplice(1..2, "HELLO", 0..1).should == "hHElo" - "hello".bytesplice(1..2, "HELLO", 0..-1).should == "hHELLOlo" - "hello".bytesplice(1..2, "HELLO", 0...5).should == "hHELLOlo" - "hello".bytesplice(1..2, "HELLO", 0...6).should == "hHELLOlo" - end - - it "raises ArgumentError when integer str index is provided without str length argument" do - -> { "hello".bytesplice(0, 1, "xxx", 0) }.should raise_error(ArgumentError, "wrong number of arguments (given 4, expected 2, 3, or 5)") - end - - it "replaces on an empty string with str index/length" do - "".bytesplice(0, 0, "", 0, 0).should == "" - "".bytesplice(0, 0, "xxx", 0, 1).should == "x" - end - - it "mutates self with substring and str index/length" do - s = "hello" - s.bytesplice(2, 1, "xxx", 1, 2).should.equal?(s) - s.should.eql?("hexxlo") - end - - it "raises when string is frozen and str index/length" do - s = "hello".freeze - -> { s.bytesplice(2, 1, "xxx", 0, 1) }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"") - end - - it "replaces on an empty string with str range" do - "".bytesplice(0..0, "", 0..0).should == "" - "".bytesplice(0..0, "xyz", 0..1).should == "xy" - end - - it "mutates self with substring and str range" do - s = "hello" - s.bytesplice(2..2, "xyz", 1..2).should.equal?(s) - s.should.eql?("heyzlo") - end - - it "raises when string is frozen and str range" do - s = "hello".freeze - -> { s.bytesplice(2..2, "yzx", 0..1) }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"") - end + it "raises IndexError when str_index is less than -bytesize" do + -> { "hello".bytesplice(2, 1, "HELLO", -6, 0) }.should raise_error(IndexError, "index -6 out of string") + end + + it "raises IndexError when str_index is greater than bytesize" do + -> { "hello".bytesplice(2, 1, "HELLO", 6, 0) }.should raise_error(IndexError, "index 6 out of string") + end + + it "raises IndexError for negative str length" do + -> { "abc".bytesplice(0, 1, "", 0, -2) }.should raise_error(IndexError, "negative length -2") + end + + it "replaces with integer str indices" do + "hello".bytesplice(1, 2, "HELLO", -5, 0).should == "hlo" + "hello".bytesplice(1, 2, "HELLO", 0, 0).should == "hlo" + "hello".bytesplice(1, 2, "HELLO", 0, 1).should == "hHlo" + "hello".bytesplice(1, 2, "HELLO", 0, 5).should == "hHELLOlo" + "hello".bytesplice(1, 2, "HELLO", 0, 6).should == "hHELLOlo" + end + + it "raises RangeError when str range left boundary is less than -bytesize" do + -> { "hello".bytesplice(0..1, "HELLO", -6...-6) }.should raise_error(RangeError, "-6...-6 out of range") + end + + it "replaces with str ranges" do + "hello".bytesplice(1..2, "HELLO", -5...-5).should == "hlo" + "hello".bytesplice(1..2, "HELLO", 0...0).should == "hlo" + "hello".bytesplice(1..2, "HELLO", 0..0).should == "hHlo" + "hello".bytesplice(1..2, "HELLO", 0...1).should == "hHlo" + "hello".bytesplice(1..2, "HELLO", 0..1).should == "hHElo" + "hello".bytesplice(1..2, "HELLO", 0..-1).should == "hHELLOlo" + "hello".bytesplice(1..2, "HELLO", 0...5).should == "hHELLOlo" + "hello".bytesplice(1..2, "HELLO", 0...6).should == "hHELLOlo" + end + + it "raises ArgumentError when integer str index is provided without str length argument" do + -> { "hello".bytesplice(0, 1, "xxx", 0) }.should raise_error(ArgumentError, "wrong number of arguments (given 4, expected 2, 3, or 5)") + end + + it "replaces on an empty string with str index/length" do + "".bytesplice(0, 0, "", 0, 0).should == "" + "".bytesplice(0, 0, "xxx", 0, 1).should == "x" + end + + it "mutates self with substring and str index/length" do + s = "hello" + s.bytesplice(2, 1, "xxx", 1, 2).should.equal?(s) + s.should.eql?("hexxlo") + end + + it "raises when string is frozen and str index/length" do + s = "hello".freeze + -> { s.bytesplice(2, 1, "xxx", 0, 1) }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"") + end + + it "replaces on an empty string with str range" do + "".bytesplice(0..0, "", 0..0).should == "" + "".bytesplice(0..0, "xyz", 0..1).should == "xy" + end + + it "mutates self with substring and str range" do + s = "hello" + s.bytesplice(2..2, "xyz", 1..2).should.equal?(s) + s.should.eql?("heyzlo") + end + + it "raises when string is frozen and str range" do + s = "hello".freeze + -> { s.bytesplice(2..2, "yzx", 0..1) }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"") end end @@ -201,94 +199,92 @@ result.encoding.should == Encoding::UTF_8 end - ruby_version_is "3.3" do - it "raises IndexError when str_index is out of byte size boundary" do - -> { "こんにちは".bytesplice(3, 3, "こんにちは", -16, 0) }.should raise_error(IndexError, "index -16 out of string") - end - - it "raises IndexError when str_index is not on a codepoint boundary" do - -> { "こんにちは".bytesplice(3, 3, "こんにちは", 1, 0) }.should raise_error(IndexError, "offset 1 does not land on character boundary") - end - - it "raises IndexError when str_length is not matching the codepoint boundary" do - -> { "こんにちは".bytesplice(3, 3, "こんにちは", 0, 1) }.should raise_error(IndexError, "offset 1 does not land on character boundary") - -> { "こんにちは".bytesplice(3, 3, "こんにちは", 0, 2) }.should raise_error(IndexError, "offset 2 does not land on character boundary") - end - - it "replaces with integer str indices" do - "こんにちは".bytesplice(3, 3, "こんにちは", -15, 0).should == "こにちは" - "こんにちは".bytesplice(3, 3, "こんにちは", 0, 0).should == "こにちは" - "こんにちは".bytesplice(3, 3, "こんにちは", 0, 3).should == "ここにちは" - "こんにちは".bytesplice(3, 3, "はは", 3, 3).should == "こはにちは" - "こんにちは".bytesplice(3, 3, "こんにちは", 15, 0).should == "こにちは" - end - - it "replaces with str range" do - "こんにちは".bytesplice(0..2, "こんにちは", -15...-16).should == "んにちは" - "こんにちは".bytesplice(0..2, "こんにちは", 0...0).should == "んにちは" - "こんにちは".bytesplice(0..2, "こんにちは", 3..5).should == "んんにちは" - "こんにちは".bytesplice(0..2, "こんにちは", 3...6).should == "んんにちは" - "こんにちは".bytesplice(0..2, "こんにちは", 3..8).should == "んにんにちは" - "こんにちは".bytesplice(0..2, "こんにちは", 0..-1).should == "こんにちはんにちは" - "こんにちは".bytesplice(0..2, "こんにちは", 0...15).should == "こんにちはんにちは" - "こんにちは".bytesplice(0..2, "こんにちは", 0...18).should == "こんにちはんにちは" - end - - it "treats negative length for str range as 0" do - "こんにちは".bytesplice(0..2, "こんにちは", 0...-100).should == "んにちは" - "こんにちは".bytesplice(0..2, "こんにちは", 3...-100).should == "んにちは" - "こんにちは".bytesplice(0..2, "こんにちは", -15...-100).should == "んにちは" - end - - it "raises when ranges not match codepoint boundaries in str" do - -> { "こんにちは".bytesplice(3...3, "こ", 0..0) }.should raise_error(IndexError, "offset 1 does not land on character boundary") - -> { "こんにちは".bytesplice(3...3, "こ", 0..1) }.should raise_error(IndexError, "offset 2 does not land on character boundary") - # Begin is incorrect - -> { "こんにちは".bytesplice(3...3, "こんにちは", -4..-1) }.should raise_error(IndexError, "offset 11 does not land on character boundary") - -> { "こんにちは".bytesplice(3...3, "こんにちは", -5..-1) }.should raise_error(IndexError, "offset 10 does not land on character boundary") - # End is incorrect - -> { "こんにちは".bytesplice(3...3, "こんにちは", -3..-2) }.should raise_error(IndexError, "offset 14 does not land on character boundary") - -> { "こんにちは".bytesplice(3...3, "こんにちは", -3..-3) }.should raise_error(IndexError, "offset 13 does not land on character boundary") - end - - it "deals with a different encoded argument with str index/length" do - s = "こんにちは" - s.encoding.should == Encoding::UTF_8 - sub = "goodbye" - sub.force_encoding(Encoding::US_ASCII) - - result = s.bytesplice(3, 3, sub, 0, 3) - result.should == "こgooにちは" - result.encoding.should == Encoding::UTF_8 - - s = "hello" - s.force_encoding(Encoding::US_ASCII) - sub = "こんにちは" - sub.encoding.should == Encoding::UTF_8 - - result = s.bytesplice(1, 2, sub, 3, 3) - result.should == "hんlo" - result.encoding.should == Encoding::UTF_8 - end - - it "deals with a different encoded argument with str range" do - s = "こんにちは" - s.encoding.should == Encoding::UTF_8 - sub = "goodbye" - sub.force_encoding(Encoding::US_ASCII) - - result = s.bytesplice(3..5, sub, 0..2) - result.should == "こgooにちは" - result.encoding.should == Encoding::UTF_8 - - s = "hello" - s.force_encoding(Encoding::US_ASCII) - sub = "こんにちは" - sub.encoding.should == Encoding::UTF_8 - - result = s.bytesplice(1..2, sub, 3..5) - result.should == "hんlo" - result.encoding.should == Encoding::UTF_8 - end + it "raises IndexError when str_index is out of byte size boundary" do + -> { "こんにちは".bytesplice(3, 3, "こんにちは", -16, 0) }.should raise_error(IndexError, "index -16 out of string") + end + + it "raises IndexError when str_index is not on a codepoint boundary" do + -> { "こんにちは".bytesplice(3, 3, "こんにちは", 1, 0) }.should raise_error(IndexError, "offset 1 does not land on character boundary") + end + + it "raises IndexError when str_length is not matching the codepoint boundary" do + -> { "こんにちは".bytesplice(3, 3, "こんにちは", 0, 1) }.should raise_error(IndexError, "offset 1 does not land on character boundary") + -> { "こんにちは".bytesplice(3, 3, "こんにちは", 0, 2) }.should raise_error(IndexError, "offset 2 does not land on character boundary") + end + + it "replaces with integer str indices" do + "こんにちは".bytesplice(3, 3, "こんにちは", -15, 0).should == "こにちは" + "こんにちは".bytesplice(3, 3, "こんにちは", 0, 0).should == "こにちは" + "こんにちは".bytesplice(3, 3, "こんにちは", 0, 3).should == "ここにちは" + "こんにちは".bytesplice(3, 3, "はは", 3, 3).should == "こはにちは" + "こんにちは".bytesplice(3, 3, "こんにちは", 15, 0).should == "こにちは" + end + + it "replaces with str range" do + "こんにちは".bytesplice(0..2, "こんにちは", -15...-16).should == "んにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 0...0).should == "んにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 3..5).should == "んんにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 3...6).should == "んんにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 3..8).should == "んにんにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 0..-1).should == "こんにちはんにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 0...15).should == "こんにちはんにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 0...18).should == "こんにちはんにちは" + end + + it "treats negative length for str range as 0" do + "こんにちは".bytesplice(0..2, "こんにちは", 0...-100).should == "んにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 3...-100).should == "んにちは" + "こんにちは".bytesplice(0..2, "こんにちは", -15...-100).should == "んにちは" + end + + it "raises when ranges not match codepoint boundaries in str" do + -> { "こんにちは".bytesplice(3...3, "こ", 0..0) }.should raise_error(IndexError, "offset 1 does not land on character boundary") + -> { "こんにちは".bytesplice(3...3, "こ", 0..1) }.should raise_error(IndexError, "offset 2 does not land on character boundary") + # Begin is incorrect + -> { "こんにちは".bytesplice(3...3, "こんにちは", -4..-1) }.should raise_error(IndexError, "offset 11 does not land on character boundary") + -> { "こんにちは".bytesplice(3...3, "こんにちは", -5..-1) }.should raise_error(IndexError, "offset 10 does not land on character boundary") + # End is incorrect + -> { "こんにちは".bytesplice(3...3, "こんにちは", -3..-2) }.should raise_error(IndexError, "offset 14 does not land on character boundary") + -> { "こんにちは".bytesplice(3...3, "こんにちは", -3..-3) }.should raise_error(IndexError, "offset 13 does not land on character boundary") + end + + it "deals with a different encoded argument with str index/length" do + s = "こんにちは" + s.encoding.should == Encoding::UTF_8 + sub = "goodbye" + sub.force_encoding(Encoding::US_ASCII) + + result = s.bytesplice(3, 3, sub, 0, 3) + result.should == "こgooにちは" + result.encoding.should == Encoding::UTF_8 + + s = "hello" + s.force_encoding(Encoding::US_ASCII) + sub = "こんにちは" + sub.encoding.should == Encoding::UTF_8 + + result = s.bytesplice(1, 2, sub, 3, 3) + result.should == "hんlo" + result.encoding.should == Encoding::UTF_8 + end + + it "deals with a different encoded argument with str range" do + s = "こんにちは" + s.encoding.should == Encoding::UTF_8 + sub = "goodbye" + sub.force_encoding(Encoding::US_ASCII) + + result = s.bytesplice(3..5, sub, 0..2) + result.should == "こgooにちは" + result.encoding.should == Encoding::UTF_8 + + s = "hello" + s.force_encoding(Encoding::US_ASCII) + sub = "こんにちは" + sub.encoding.should == Encoding::UTF_8 + + result = s.bytesplice(1..2, sub, 3..5) + result.should == "hんlo" + result.encoding.should == Encoding::UTF_8 end end diff --git a/spec/ruby/core/string/index_spec.rb b/spec/ruby/core/string/index_spec.rb index 835263a2cd58c2..01e6a6a4009720 100644 --- a/spec/ruby/core/string/index_spec.rb +++ b/spec/ruby/core/string/index_spec.rb @@ -231,15 +231,13 @@ $~.should == nil end - ruby_bug "#20421", ""..."3.3" do - it "always clear $~" do - "a".index(/a/) - $~.should_not == nil - - string = "blablabla" - string.index(/bla/, string.length + 1) - $~.should == nil - end + it "always clear $~" do + "a".index(/a/) + $~.should_not == nil + + string = "blablabla" + string.index(/bla/, string.length + 1) + $~.should == nil end it "starts the search at the given offset" do @@ -330,21 +328,10 @@ "われわわれ".index(/わ/, 3).should == 3 end - ruby_bug "#19763", ""..."3.3.0" do - it "raises an Encoding::CompatibilityError if the encodings are incompatible" do - re = Regexp.new "れ".encode(Encoding::EUC_JP) - -> do - "あれ".index re - end.should raise_error(Encoding::CompatibilityError, "incompatible encoding regexp match (EUC-JP regexp with UTF-8 string)") - end - end - - # The exception message was incorrectly "incompatible character encodings: UTF-8 and EUC-JP" before 3.3.0 - # Still test that the right exception class is used before that. it "raises an Encoding::CompatibilityError if the encodings are incompatible" do re = Regexp.new "れ".encode(Encoding::EUC_JP) -> do "あれ".index re - end.should raise_error(Encoding::CompatibilityError) + end.should raise_error(Encoding::CompatibilityError, "incompatible encoding regexp match (EUC-JP regexp with UTF-8 string)") end end diff --git a/spec/ruby/core/string/shared/chars.rb b/spec/ruby/core/string/shared/chars.rb index c730643cf49874..74a32fb513f024 100644 --- a/spec/ruby/core/string/shared/chars.rb +++ b/spec/ruby/core/string/shared/chars.rb @@ -14,7 +14,6 @@ s.send(@method){}.should equal(s) end - it "is unicode aware" do "\303\207\342\210\202\303\251\306\222g".send(@method).to_a.should == ["\303\207", "\342\210\202", "\303\251", "\306\222", "g"] @@ -63,4 +62,25 @@ [0xA2].pack('C').force_encoding('SJIS') ] end + + it "returns individual chars for dummy encodings" do + "ab".dup.force_encoding(Encoding::UTF_7).send(@method).to_a.should == [ + "\x61".dup.force_encoding(Encoding::UTF_7), + "\x62".dup.force_encoding(Encoding::UTF_7) + ] + + "abcd".dup.force_encoding(Encoding::UTF_16).send(@method).to_a.should == [ + "\x61".dup.force_encoding(Encoding::UTF_16), + "\x62".dup.force_encoding(Encoding::UTF_16), + "\x63".dup.force_encoding(Encoding::UTF_16), + "\x64".dup.force_encoding(Encoding::UTF_16) + ] + + "abcd".dup.force_encoding(Encoding::UTF_32).send(@method).to_a.should == [ + "\x61".dup.force_encoding(Encoding::UTF_32), + "\x62".dup.force_encoding(Encoding::UTF_32), + "\x63".dup.force_encoding(Encoding::UTF_32), + "\x64".dup.force_encoding(Encoding::UTF_32) + ] + end end diff --git a/spec/ruby/core/string/shared/codepoints.rb b/spec/ruby/core/string/shared/codepoints.rb index 1c28ba3d5e22ff..ecdf7d719db553 100644 --- a/spec/ruby/core/string/shared/codepoints.rb +++ b/spec/ruby/core/string/shared/codepoints.rb @@ -59,4 +59,9 @@ s.ascii_only?.should be_true s.send(@method).to_a.should == s.bytes.to_a end + + it "returns individual bytes for dummy encodings UTF-16 and UTF-32" do + "abcd".dup.force_encoding(Encoding::UTF_16).send(@method).to_a.should == [97, 98, 99, 100] + "abcd".dup.force_encoding(Encoding::UTF_32).send(@method).to_a.should == [97, 98, 99, 100] + end end diff --git a/spec/ruby/core/string/shared/each_line.rb b/spec/ruby/core/string/shared/each_line.rb index 231a6d9d4ff3a2..c2f3abfa80e0a6 100644 --- a/spec/ruby/core/string/shared/each_line.rb +++ b/spec/ruby/core/string/shared/each_line.rb @@ -159,4 +159,18 @@ a.should == ["hello\r\n", "world\r\n"] end end + + it "does not split lines for dummy UTF-16" do + "a\nb".encode(Encoding::UTF_16).lines.should == [ + "\xFE\xFF\x00\x61\x00\n\x00\x62".dup.force_encoding(Encoding::UTF_16) + ] + + str = "\x00\n\n\x00".dup.force_encoding(Encoding::UTF_16) + str.lines.should == [str] + end + + it "raises Encoding::ConverterNotFoundError for dummy UTF-7" do + str = "a\nb".dup.force_encoding(Encoding::UTF_7) + -> { str.lines }.should raise_error(Encoding::ConverterNotFoundError) + end end diff --git a/spec/ruby/core/string/shared/grapheme_clusters.rb b/spec/ruby/core/string/shared/grapheme_clusters.rb index 8b666868b1df68..985b558f08a03e 100644 --- a/spec/ruby/core/string/shared/grapheme_clusters.rb +++ b/spec/ruby/core/string/shared/grapheme_clusters.rb @@ -9,6 +9,15 @@ a.should == ['a', 'b', "\u{1f3f3}\u{fe0f}\u{200d}\u{1f308}", "\u{1F43E}"] end + it "returns grapheme clusters for various UTF encodings" do + [Encoding::UTF_16LE, Encoding::UTF_16BE, Encoding::UTF_32LE, Encoding::UTF_32BE].each do |enc| + a = [] + # test string: abc[rainbow flag emoji][paw prints] + "ab\u{1f3f3}\u{fe0f}\u{200d}\u{1f308}\u{1F43E}".encode(enc).send(@method) { |c| a << c } + a.should == ['a', 'b', "\u{1f3f3}\u{fe0f}\u{200d}\u{1f308}", "\u{1F43E}"].map { |s| s.encode(enc) } + end + end + it "returns self" do s = StringSpecs::MyString.new "ab\u{1f3f3}\u{fe0f}\u{200d}\u{1f308}\u{1F43E}" s.send(@method) {}.should equal(s) diff --git a/spec/ruby/core/string/start_with_spec.rb b/spec/ruby/core/string/start_with_spec.rb index 35e33b46a668ee..8b0ba6b5a7ec3b 100644 --- a/spec/ruby/core/string/start_with_spec.rb +++ b/spec/ruby/core/string/start_with_spec.rb @@ -11,17 +11,8 @@ "\xA9".should.start_with?("\xA9") # A9 is not a character head for UTF-8 end - ruby_version_is ""..."3.3" do - it "does not check we are matching only part of a character" do - "\xe3\x81\x82".size.should == 1 - "\xe3\x81\x82".should.start_with?("\xe3") - end - end - - ruby_version_is "3.3" do # #19784 - it "checks we are matching only part of a character" do - "\xe3\x81\x82".size.should == 1 - "\xe3\x81\x82".should_not.start_with?("\xe3") - end + it "checks we are matching only part of a character" do + "\xe3\x81\x82".size.should == 1 + "\xe3\x81\x82".should_not.start_with?("\xe3") end end diff --git a/spec/ruby/core/string/tr_s_spec.rb b/spec/ruby/core/string/tr_s_spec.rb index dd72da440c93d5..693ff8ace21bb5 100644 --- a/spec/ruby/core/string/tr_s_spec.rb +++ b/spec/ruby/core/string/tr_s_spec.rb @@ -18,13 +18,11 @@ "hello ^--^".tr_s("---", "_").should == "hello ^_^" end - ruby_bug "#19769", ""..."3.3" do - it "accepts c1-c1 notation to denote range of one character" do - "hello".tr_s('e-e', 'x').should == "hxllo" - "123456789".tr_s("2-23","xy").should == "1xy456789" - "hello ^-^".tr_s("e-", "a-a_").should == "hallo ^_^" - "hello ^-^".tr_s("---o", "_a").should == "hella ^_^" - end + it "accepts c1-c1 notation to denote range of one character" do + "hello".tr_s('e-e', 'x').should == "hxllo" + "123456789".tr_s("2-23","xy").should == "1xy456789" + "hello ^-^".tr_s("e-", "a-a_").should == "hallo ^_^" + "hello ^-^".tr_s("---o", "_a").should == "hella ^_^" end it "pads to_str with its last char if it is shorter than from_string" do diff --git a/spec/ruby/core/string/tr_spec.rb b/spec/ruby/core/string/tr_spec.rb index 75841a974fcc53..8478ccc9d2879c 100644 --- a/spec/ruby/core/string/tr_spec.rb +++ b/spec/ruby/core/string/tr_spec.rb @@ -17,13 +17,11 @@ "hello ^-^".tr("---", "_").should == "hello ^_^" end - ruby_bug "#19769", ""..."3.3" do - it "accepts c1-c1 notation to denote range of one character" do - "hello".tr('e-e', 'x').should == "hxllo" - "123456789".tr("2-23","xy").should == "1xy456789" - "hello ^-^".tr("e-", "a-a_").should == "hallo ^_^" - "hello ^-^".tr("---o", "_a").should == "hella ^_^" - end + it "accepts c1-c1 notation to denote range of one character" do + "hello".tr('e-e', 'x').should == "hxllo" + "123456789".tr("2-23","xy").should == "1xy456789" + "hello ^-^".tr("e-", "a-a_").should == "hallo ^_^" + "hello ^-^".tr("---o", "_a").should == "hella ^_^" end it "pads to_str with its last char if it is shorter than from_string" do diff --git a/spec/ruby/core/string/unpack/b_spec.rb b/spec/ruby/core/string/unpack/b_spec.rb index b088f901fc026c..70ea1cb6ad98e3 100644 --- a/spec/ruby/core/string/unpack/b_spec.rb +++ b/spec/ruby/core/string/unpack/b_spec.rb @@ -86,20 +86,10 @@ ].should be_computed_by(:unpack, "BBB") end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "\x80\x00".unpack("B\x00B").should == ["1", "0"] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "\x80\x00".unpack("B\x00B") - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x80\x00".unpack("B\x00B") + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do @@ -194,20 +184,10 @@ ].should be_computed_by(:unpack, "bbb") end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "\x01\x00".unpack("b\x00b").should == ["1", "0"] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "\x01\x00".unpack("b\x00b") - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x00".unpack("b\x00b") + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/c_spec.rb b/spec/ruby/core/string/unpack/c_spec.rb index 1e9548fb82411a..e42b027c7b8d6c 100644 --- a/spec/ruby/core/string/unpack/c_spec.rb +++ b/spec/ruby/core/string/unpack/c_spec.rb @@ -35,20 +35,10 @@ ].should be_computed_by(:unpack, unpack_format(3)) end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "abc".unpack(unpack_format("\000", 2)).should == [97, 98] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "abc".unpack(unpack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "abc".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/h_spec.rb b/spec/ruby/core/string/unpack/h_spec.rb index 535836087d0f56..130b36401a7d05 100644 --- a/spec/ruby/core/string/unpack/h_spec.rb +++ b/spec/ruby/core/string/unpack/h_spec.rb @@ -56,20 +56,10 @@ ].should be_computed_by(:unpack, "HHH") end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "\x01\x10".unpack("H\x00H").should == ["0", "1"] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "\x01\x10".unpack("H\x00H") - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x10".unpack("H\x00H") + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do @@ -133,20 +123,10 @@ ].should be_computed_by(:unpack, "hhh") end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "\x01\x10".unpack("h\x00h").should == ["1", "0"] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "\x01\x10".unpack("h\x00h") - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x10".unpack("h\x00h") + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/shared/basic.rb b/spec/ruby/core/string/unpack/shared/basic.rb index 734630bda0a620..132c4ef08acf21 100644 --- a/spec/ruby/core/string/unpack/shared/basic.rb +++ b/spec/ruby/core/string/unpack/shared/basic.rb @@ -9,20 +9,10 @@ "abc".unpack(d).should be_an_instance_of(Array) end - ruby_version_is ""..."3.3" do - it "warns about using an unknown directive" do - -> { "abcdefgh".unpack("a R" + unpack_format) }.should complain(/unknown unpack directive 'R' in 'a R#{unpack_format}'/) - -> { "abcdefgh".unpack("a 0" + unpack_format) }.should complain(/unknown unpack directive '0' in 'a 0#{unpack_format}'/) - -> { "abcdefgh".unpack("a :" + unpack_format) }.should complain(/unknown unpack directive ':' in 'a :#{unpack_format}'/) - end - end - - ruby_version_is "3.3" do - it "raises ArgumentError when a directive is unknown" do - -> { "abcdefgh".unpack("a K" + unpack_format) }.should raise_error(ArgumentError, "unknown unpack directive 'K' in 'a K#{unpack_format}'") - -> { "abcdefgh".unpack("a 0" + unpack_format) }.should raise_error(ArgumentError, "unknown unpack directive '0' in 'a 0#{unpack_format}'") - -> { "abcdefgh".unpack("a :" + unpack_format) }.should raise_error(ArgumentError, "unknown unpack directive ':' in 'a :#{unpack_format}'") - end + it "raises ArgumentError when a directive is unknown" do + -> { "abcdefgh".unpack("a K" + unpack_format) }.should raise_error(ArgumentError, "unknown unpack directive 'K' in 'a K#{unpack_format}'") + -> { "abcdefgh".unpack("a 0" + unpack_format) }.should raise_error(ArgumentError, "unknown unpack directive '0' in 'a 0#{unpack_format}'") + -> { "abcdefgh".unpack("a :" + unpack_format) }.should raise_error(ArgumentError, "unknown unpack directive ':' in 'a :#{unpack_format}'") end end diff --git a/spec/ruby/core/string/unpack/shared/float.rb b/spec/ruby/core/string/unpack/shared/float.rb index b31c2c8bdc406a..0133be2ecb498e 100644 --- a/spec/ruby/core/string/unpack/shared/float.rb +++ b/spec/ruby/core/string/unpack/shared/float.rb @@ -56,21 +56,10 @@ [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - array = "\x9a\x999@33\xb3?".unpack(unpack_format("\000", 2)) - array.should == [2.9000000953674316, 1.399999976158142] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "\x9a\x999@33\xb3?".unpack(unpack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x9a\x999@33\xb3?".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do @@ -135,21 +124,10 @@ [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - array = "@9\x99\x9a?\xb333".unpack(unpack_format("\000", 2)) - array.should == [2.9000000953674316, 1.399999976158142] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "@9\x99\x9a?\xb333".unpack(unpack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "@9\x99\x9a?\xb333".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do @@ -217,20 +195,10 @@ [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "333333\x07@ffffff\xf6?".unpack(unpack_format("\000", 2)).should == [2.9, 1.4] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "333333\x07@ffffff\xf6?".unpack(unpack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "333333\x07@ffffff\xf6?".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do @@ -297,20 +265,10 @@ [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "@\x07333333?\xf6ffffff".unpack(unpack_format("\000", 2)).should == [2.9, 1.4] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "@\x07333333?\xf6ffffff".unpack(unpack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "@\x07333333?\xf6ffffff".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/shared/integer.rb b/spec/ruby/core/string/unpack/shared/integer.rb index d3934753ba3ef8..eb994562251732 100644 --- a/spec/ruby/core/string/unpack/shared/integer.rb +++ b/spec/ruby/core/string/unpack/shared/integer.rb @@ -32,20 +32,10 @@ ].should be_computed_by(:unpack, unpack_format(3)) end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "abcd".unpack(unpack_format("\000", 2)).should == [25185, 25699] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "abcd".unpack(unpack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "abcd".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do @@ -97,20 +87,10 @@ ].should be_computed_by(:unpack, unpack_format(3)) end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "badc".unpack(unpack_format("\000", 2)).should == [25185, 25699] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "badc".unpack(unpack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "badc".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do @@ -163,20 +143,10 @@ ].should be_computed_by(:unpack, unpack_format(3)) end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "abcdefgh".unpack(unpack_format("\000", 2)).should == [1684234849, 1751606885] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "abcdefgh".unpack(unpack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "abcdefgh".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do @@ -229,20 +199,10 @@ ].should be_computed_by(:unpack, unpack_format(3)) end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "dcbahgfe".unpack(unpack_format("\000", 2)).should == [1684234849, 1751606885] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "dcbahgfe".unpack(unpack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "dcbahgfe".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do @@ -291,21 +251,10 @@ "abc".unpack(unpack_format('*')).should == [] end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - array = "abcdefghabghefcd".unpack(unpack_format("\000", 2)) - array.should == [7523094288207667809, 7233738012216484449] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "badc".unpack(unpack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "badc".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do @@ -365,21 +314,10 @@ "abc".unpack(unpack_format('*')).should == [] end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - array = "hgfedcbadcfehgba".unpack(unpack_format("\000", 2)) - array.should == [7523094288207667809, 7233738012216484449] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "hgfedcbadcfehgba".unpack(unpack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "hgfedcbadcfehgba".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/shared/unicode.rb b/spec/ruby/core/string/unpack/shared/unicode.rb index 9fe07f53aec1b3..b056aaed0be627 100644 --- a/spec/ruby/core/string/unpack/shared/unicode.rb +++ b/spec/ruby/core/string/unpack/shared/unicode.rb @@ -50,20 +50,10 @@ "\xc2\x80".unpack("UUUU").should == [0x80] end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "\x01\x02".unpack("U\x00U").should == [1, 2] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "\x01\x02".unpack("U\x00U") - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x02".unpack("U\x00U") + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/w_spec.rb b/spec/ruby/core/string/unpack/w_spec.rb index 7d3533ccae109e..d2ad657b09c8b8 100644 --- a/spec/ruby/core/string/unpack/w_spec.rb +++ b/spec/ruby/core/string/unpack/w_spec.rb @@ -15,20 +15,10 @@ ].should be_computed_by(:unpack, "w") end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "\x01\x02\x03".unpack("w\x00w").should == [1, 2] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "\x01\x02\x03".unpack("w\x00w") - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x02\x03".unpack("w\x00w") + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/struct/new_spec.rb b/spec/ruby/core/struct/new_spec.rb index 1d35de7b871230..741d6889af08a1 100644 --- a/spec/ruby/core/struct/new_spec.rb +++ b/spec/ruby/core/struct/new_spec.rb @@ -77,18 +77,10 @@ def obj.to_str() "Foo" end -> { Struct.new(:animal, { name: 'chris' }) }.should raise_error(TypeError) end - ruby_version_is ""..."3.3" do - it "raises ArgumentError if not provided any arguments" do - -> { Struct.new }.should raise_error(ArgumentError) - end - end - - ruby_version_is "3.3" do - it "works when not provided any arguments" do - c = Struct.new - c.should be_kind_of(Class) - c.superclass.should == Struct - end + it "works when not provided any arguments" do + c = Struct.new + c.should be_kind_of(Class) + c.superclass.should == Struct end it "raises ArgumentError when there is a duplicate member" do diff --git a/spec/ruby/core/symbol/inspect_spec.rb b/spec/ruby/core/symbol/inspect_spec.rb index df4566c48e6449..f2269996af0f92 100644 --- a/spec/ruby/core/symbol/inspect_spec.rb +++ b/spec/ruby/core/symbol/inspect_spec.rb @@ -109,4 +109,23 @@ input.inspect.should == expected end end + + it "quotes BINARY symbols" do + sym = "foo\xA4".b.to_sym + sym.inspect.should == ':"foo\xA4"' + end + + it "quotes symbols in non-ASCII-compatible encodings" do + Encoding.list.reject(&:ascii_compatible?).reject(&:dummy?).each do |encoding| + sym = "foo".encode(encoding).to_sym + sym.inspect.should == ':"foo"' + end + end + + it "quotes and escapes symbols in dummy encodings" do + Encoding.list.select(&:dummy?).each do |encoding| + sym = "abcd".dup.force_encoding(encoding).to_sym + sym.inspect.should == ':"\x61\x62\x63\x64"' + end + end end diff --git a/spec/ruby/core/thread/backtrace/location/fixtures/classes.rb b/spec/ruby/core/thread/backtrace/location/fixtures/classes.rb index e903c3e450fe97..103c36b3a0ab04 100644 --- a/spec/ruby/core/thread/backtrace/location/fixtures/classes.rb +++ b/spec/ruby/core/thread/backtrace/location/fixtures/classes.rb @@ -1,10 +1,26 @@ +# These are top-level def on purpose to test those cases + +def label_top_method = ThreadBacktraceLocationSpecs::LABEL.call + +def self.label_sdef_method_of_main = ThreadBacktraceLocationSpecs::LABEL.call + +class << self + def label_sclass_method_of_main = ThreadBacktraceLocationSpecs::LABEL.call +end + module ThreadBacktraceLocationSpecs MODULE_LOCATION = caller_locations(0) rescue nil + INSTANCE = Object.new.extend(self) + LABEL = -> { caller_locations(1, 1)[0].label } def self.locations caller_locations end + def instance_method_location + caller_locations(0) + end + def self.method_location caller_locations(0) end @@ -15,6 +31,12 @@ def self.block_location end end + def instance_block_location + 1.times do + return caller_locations(0) + end + end + def self.locations_inside_nested_blocks first_level_location = nil second_level_location = nil @@ -32,4 +54,86 @@ def self.locations_inside_nested_blocks [first_level_location, second_level_location, third_level_location] end + + def instance_locations_inside_nested_block + loc = nil + 1.times do + 1.times do + loc = caller_locations(0) + end + end + loc + end + + def original_method = LABEL.call + alias_method :aliased_method, :original_method + + module M + class C + def regular_instance_method = LABEL.call + + def self.sdef_class_method = LABEL.call + + class << self + def sclass_method = LABEL.call + + def block_in_sclass_method + -> { + -> { LABEL.call }.call + }.call + end + end + block_in_sclass_method + end + end + + class M::D + def scoped_method = LABEL.call + + def self.sdef_scoped_method = LABEL.call + + class << self + def sclass_scoped_method = LABEL.call + end + + module ::ThreadBacktraceLocationSpecs + def top = LABEL.call + end + + class ::ThreadBacktraceLocationSpecs::Nested + def top_nested = LABEL.call + + class C + def top_nested_c = LABEL.call + end + end + end + + SOME_OBJECT = Object.new + SOME_OBJECT.instance_exec do + def unknown_def_singleton_method = LABEL.call + + def self.unknown_sdef_singleton_method = LABEL.call + end + + M.module_eval do + def module_eval_method = LABEL.call + + def self.sdef_module_eval_method = LABEL.call + end + + def ThreadBacktraceLocationSpecs.string_class_method = LABEL.call + + module M + def ThreadBacktraceLocationSpecs.nested_class_method = LABEL.call + end + + module M + module_function def mod_function = LABEL.call + end + + expr = self + def expr.sdef_expression = LABEL.call + + def expr.block_in_sdef_expression = -> { LABEL.call }.call end diff --git a/spec/ruby/core/thread/backtrace/location/label_spec.rb b/spec/ruby/core/thread/backtrace/location/label_spec.rb index 85ddccc8e3f831..7d358b45ea8fe3 100644 --- a/spec/ruby/core/thread/backtrace/location/label_spec.rb +++ b/spec/ruby/core/thread/backtrace/location/label_spec.rb @@ -15,7 +15,7 @@ end it 'returns the module name for a module location' do - ThreadBacktraceLocationSpecs::MODULE_LOCATION[0].label.should include "ThreadBacktraceLocationSpecs" + ThreadBacktraceLocationSpecs::MODULE_LOCATION[0].label.should == "" end it 'includes the nesting level of a block as part of the location label' do @@ -34,4 +34,194 @@ main_label.should == "block in
\n" required_label.should == "block in \n" end + + it "return the same name as the caller for eval" do + this = caller_locations(0)[0].label + eval("caller_locations(0)[0]").label.should == this + + b = binding + b.eval("caller_locations(0)[0]").label.should == this + + b.local_variable_set(:binding_var1, 1) + b.eval("caller_locations(0)[0]").label.should == this + + b.local_variable_set(:binding_var2, 2) + b.eval("caller_locations(0)[0]").label.should == this + + b.local_variable_set(:binding_var2, 2) + eval("caller_locations(0)[0]", b).label.should == this + end + + ruby_version_is "3.4" do + describe "is Module#method for" do + it "a core method defined natively" do + BasicObject.instance_method(:instance_exec).should_not.source_location + loc = nil + loc = instance_exec { caller_locations(1, 1)[0] } + loc.label.should == "BasicObject#instance_exec" + end + + it "a core method defined in Ruby" do + Kernel.instance_method(:tap).should.source_location + loc = nil + tap { loc = caller_locations(1, 1)[0] } + loc.label.should == "Kernel#tap" + end + + it "an instance method defined in Ruby" do + ThreadBacktraceLocationSpecs::INSTANCE.instance_method_location[0].label.should == "ThreadBacktraceLocationSpecs#instance_method_location" + end + + it "a block in an instance method defined in Ruby" do + ThreadBacktraceLocationSpecs::INSTANCE.instance_block_location[0].label.should == "block in ThreadBacktraceLocationSpecs#instance_block_location" + end + + it "a nested block in an instance method defined in Ruby" do + ThreadBacktraceLocationSpecs::INSTANCE.instance_locations_inside_nested_block[0].label.should == "block (2 levels) in ThreadBacktraceLocationSpecs#instance_locations_inside_nested_block" + end + + it "a method defined via module_exec" do + ThreadBacktraceLocationSpecs.module_exec do + def in_module_exec + caller_locations(0) + end + end + ThreadBacktraceLocationSpecs::INSTANCE.in_module_exec[0].label.should == "ThreadBacktraceLocationSpecs#in_module_exec" + end + + it "a method defined via module_eval" do + ThreadBacktraceLocationSpecs.module_eval <<~RUBY + def in_module_eval + caller_locations(0) + end + RUBY + ThreadBacktraceLocationSpecs::INSTANCE.in_module_eval[0].label.should == "ThreadBacktraceLocationSpecs#in_module_eval" + end + end + + describe "is Module.method for" do + it "a singleton method defined in Ruby" do + ThreadBacktraceLocationSpecs.method_location[0].label.should == "ThreadBacktraceLocationSpecs.method_location" + end + + it "a block in a singleton method defined in Ruby" do + ThreadBacktraceLocationSpecs.block_location[0].label.should == "block in ThreadBacktraceLocationSpecs.block_location" + end + + it "a nested block in a singleton method defined in Ruby" do + ThreadBacktraceLocationSpecs.locations_inside_nested_blocks[2].label.should == "block (3 levels) in ThreadBacktraceLocationSpecs.locations_inside_nested_blocks" + end + + it "a singleton method defined via def Const.method" do + def ThreadBacktraceLocationSpecs.def_singleton + caller_locations(0) + end + ThreadBacktraceLocationSpecs.def_singleton[0].label.should == "ThreadBacktraceLocationSpecs.def_singleton" + end + end + + it "shows the original method name for an aliased method" do + ThreadBacktraceLocationSpecs::INSTANCE.aliased_method.should == "ThreadBacktraceLocationSpecs#original_method" + end + + # A wide variety of cases. + # These show interesting cases when trying to determine the name statically/at parse time + describe "is correct for" do + base = ThreadBacktraceLocationSpecs + + it "M::C#regular_instance_method" do + base::M::C.new.regular_instance_method.should == "#{base}::M::C#regular_instance_method" + end + + it "M::C.sdef_class_method" do + base::M::C.sdef_class_method.should == "#{base}::M::C.sdef_class_method" + end + + it "M::C.sclass_method" do + base::M::C.sclass_method.should == "#{base}::M::C.sclass_method" + end + + it "M::C.block_in_sclass_method" do + base::M::C.block_in_sclass_method.should == "block (2 levels) in #{base}::M::C.block_in_sclass_method" + end + + it "M::D#scoped_method" do + base::M::D.new.scoped_method.should == "#{base}::M::D#scoped_method" + end + + it "M::D.sdef_scoped_method" do + base::M::D.sdef_scoped_method.should == "#{base}::M::D.sdef_scoped_method" + end + + it "M::D.sclass_scoped_method" do + base::M::D.sclass_scoped_method.should == "#{base}::M::D.sclass_scoped_method" + end + + it "ThreadBacktraceLocationSpecs#top" do + ThreadBacktraceLocationSpecs::INSTANCE.top.should == "ThreadBacktraceLocationSpecs#top" + end + + it "ThreadBacktraceLocationSpecs::Nested#top_nested" do + ThreadBacktraceLocationSpecs::Nested.new.top_nested.should == "ThreadBacktraceLocationSpecs::Nested#top_nested" + end + + it "ThreadBacktraceLocationSpecs::Nested::C#top_nested_c" do + ThreadBacktraceLocationSpecs::Nested::C.new.top_nested_c.should == "ThreadBacktraceLocationSpecs::Nested::C#top_nested_c" + end + + it "Object#label_top_method" do + label_top_method.should == "Object#label_top_method" + end + + it "main.label_sdef_method_of_main" do + main = TOPLEVEL_BINDING.receiver + main.label_sdef_method_of_main.should == "label_sdef_method_of_main" + end + + it "main.label_sclass_method_of_main" do + main = TOPLEVEL_BINDING.receiver + main.label_sclass_method_of_main.should == "label_sclass_method_of_main" + end + + it "unknown_def_singleton_method" do + base::SOME_OBJECT.unknown_def_singleton_method.should == "unknown_def_singleton_method" + end + + it "unknown_sdef_singleton_method" do + base::SOME_OBJECT.unknown_sdef_singleton_method.should == "unknown_sdef_singleton_method" + end + + it "M#module_eval_method" do + Object.new.extend(base::M).module_eval_method.should == "#{base}::M#module_eval_method" + end + + it "M.sdef_module_eval_method" do + base::M.sdef_module_eval_method.should == "#{base}::M.sdef_module_eval_method" + end + + it "ThreadBacktraceLocationSpecs.string_class_method" do + ThreadBacktraceLocationSpecs.string_class_method.should == "ThreadBacktraceLocationSpecs.string_class_method" + end + + it "ThreadBacktraceLocationSpecs.nested_class_method" do + ThreadBacktraceLocationSpecs.nested_class_method.should == "ThreadBacktraceLocationSpecs.nested_class_method" + end + + it "M#mod_function" do + Object.new.extend(base::M).send(:mod_function).should == "#{base}::M#mod_function" + end + + it "M.mod_function" do + base::M.mod_function.should == "#{base}::M.mod_function" + end + + it "sdef_expression" do + base.sdef_expression.should == "#{base}.sdef_expression" + end + + it "block_in_sdef_expression" do + base.block_in_sdef_expression.should == "block in #{base}.block_in_sdef_expression" + end + end + end end diff --git a/spec/ruby/core/thread/native_thread_id_spec.rb b/spec/ruby/core/thread/native_thread_id_spec.rb index 374cc592797a20..65d1b5b318dbac 100644 --- a/spec/ruby/core/thread/native_thread_id_spec.rb +++ b/spec/ruby/core/thread/native_thread_id_spec.rb @@ -18,12 +18,8 @@ main_thread_id = Thread.current.native_thread_id t_thread_id = t.native_thread_id - if ruby_version_is "3.3" - # native_thread_id can be nil on a M:N scheduler - t_thread_id.should be_kind_of(Integer) if t_thread_id != nil - else - t_thread_id.should be_kind_of(Integer) - end + # native_thread_id can be nil on a M:N scheduler + t_thread_id.should be_kind_of(Integer) if t_thread_id != nil main_thread_id.should_not == t_thread_id diff --git a/spec/ruby/core/time/new_spec.rb b/spec/ruby/core/time/new_spec.rb index dc3ccbdc0052df..f3b5d0142044b4 100644 --- a/spec/ruby/core/time/new_spec.rb +++ b/spec/ruby/core/time/new_spec.rb @@ -554,20 +554,10 @@ def obj.to_int; 3; end Time.new("2020-12-25T00:56:17.123456789876 +09:00").subsec.should == 0.123456789 end - ruby_version_is ""..."3.3" do - it "raise TypeError is can't convert precision keyword argument into Integer" do - -> { - Time.new("2021-12-25 00:00:00.123456789876 +09:00", precision: "") - }.should raise_error(TypeError, "no implicit conversion from string") - end - end - - ruby_version_is "3.3" do - it "raise TypeError is can't convert precision keyword argument into Integer" do - -> { - Time.new("2021-12-25 00:00:00.123456789876 +09:00", precision: "") - }.should raise_error(TypeError, "no implicit conversion of String into Integer") - end + it "raise TypeError is can't convert precision keyword argument into Integer" do + -> { + Time.new("2021-12-25 00:00:00.123456789876 +09:00", precision: "") + }.should raise_error(TypeError, "no implicit conversion of String into Integer") end it "raises ArgumentError if part of time string is missing" do diff --git a/spec/ruby/core/tracepoint/path_spec.rb b/spec/ruby/core/tracepoint/path_spec.rb index dc2ca840b80ca5..aa6868ead2ffd8 100644 --- a/spec/ruby/core/tracepoint/path_spec.rb +++ b/spec/ruby/core/tracepoint/path_spec.rb @@ -13,29 +13,14 @@ path.should == "#{__FILE__}" end - ruby_version_is ""..."3.3" do - it 'equals (eval) inside an eval for :end event' do - path = nil - TracePoint.new(:end) { |tp| - next unless TracePointSpec.target_thread? - path = tp.path - }.enable do - eval("module TracePointSpec; end") - end - path.should == '(eval)' - end - end - - ruby_version_is "3.3" do - it 'equals "(eval at __FILE__:__LINE__)" inside an eval for :end event' do - path = nil - TracePoint.new(:end) { |tp| - next unless TracePointSpec.target_thread? - path = tp.path - }.enable do - eval("module TracePointSpec; end") - end - path.should == "(eval at #{__FILE__}:#{__LINE__ - 2})" + it 'equals "(eval at __FILE__:__LINE__)" inside an eval for :end event' do + path = nil + TracePoint.new(:end) { |tp| + next unless TracePointSpec.target_thread? + path = tp.path + }.enable do + eval("module TracePointSpec; end") end + path.should == "(eval at #{__FILE__}:#{__LINE__ - 2})" end end diff --git a/spec/ruby/core/tracepoint/raised_exception_spec.rb b/spec/ruby/core/tracepoint/raised_exception_spec.rb index 5ac85318404964..e74afa9abc96c1 100644 --- a/spec/ruby/core/tracepoint/raised_exception_spec.rb +++ b/spec/ruby/core/tracepoint/raised_exception_spec.rb @@ -18,21 +18,19 @@ end end - ruby_version_is "3.3" do - it 'returns value from exception rescued on the :rescue event' do - raised_exception, error_result = nil - trace = TracePoint.new(:rescue) { |tp| - next unless TracePointSpec.target_thread? - raised_exception = tp.raised_exception - } - trace.enable do - begin - raise StandardError - rescue => e - error_result = e - end - raised_exception.should equal(error_result) + it 'returns value from exception rescued on the :rescue event' do + raised_exception, error_result = nil + trace = TracePoint.new(:rescue) { |tp| + next unless TracePointSpec.target_thread? + raised_exception = tp.raised_exception + } + trace.enable do + begin + raise StandardError + rescue => e + error_result = e end + raised_exception.should equal(error_result) end end end diff --git a/spec/ruby/core/true/singleton_method_spec.rb b/spec/ruby/core/true/singleton_method_spec.rb index c06793850fa87a..575c504b728da3 100644 --- a/spec/ruby/core/true/singleton_method_spec.rb +++ b/spec/ruby/core/true/singleton_method_spec.rb @@ -1,15 +1,13 @@ require_relative '../../spec_helper' describe "TrueClass#singleton_method" do - ruby_version_is '3.3' do - it "raises regardless of whether TrueClass defines the method" do + it "raises regardless of whether TrueClass defines the method" do + -> { true.singleton_method(:foo) }.should raise_error(NameError) + begin + def (true).foo; end -> { true.singleton_method(:foo) }.should raise_error(NameError) - begin - def (true).foo; end - -> { true.singleton_method(:foo) }.should raise_error(NameError) - ensure - TrueClass.send(:remove_method, :foo) - end + ensure + TrueClass.send(:remove_method, :foo) end end end diff --git a/spec/ruby/core/unboundmethod/equal_value_spec.rb b/spec/ruby/core/unboundmethod/equal_value_spec.rb index b2d78c50afb359..c9f7ad45dacc82 100644 --- a/spec/ruby/core/unboundmethod/equal_value_spec.rb +++ b/spec/ruby/core/unboundmethod/equal_value_spec.rb @@ -110,9 +110,6 @@ class << self c.method(:n).should == Class.instance_method(:new).bind(c) end - # On CRuby < 3.2, the 2 specs below pass due to method/instance_method skipping zsuper methods. - # We are interested in the general pattern working, i.e. the combination of method/instance_method - # and #== exposes the wanted behavior. it "considers methods through visibility change equal" do c = Class.new do class << self diff --git a/spec/ruby/core/warning/element_reference_spec.rb b/spec/ruby/core/warning/element_reference_spec.rb index c0ed37ef139d05..6179c578646255 100644 --- a/spec/ruby/core/warning/element_reference_spec.rb +++ b/spec/ruby/core/warning/element_reference_spec.rb @@ -10,11 +10,9 @@ ruby_exe('p [Warning[:deprecated], Warning[:experimental]]', options: "-w").chomp.should == "[true, true]" end - ruby_version_is '3.3' do - it "returns default values for :performance category" do - ruby_exe('p Warning[:performance]').chomp.should == "false" - ruby_exe('p Warning[:performance]', options: "-w").chomp.should == "false" - end + it "returns default values for :performance category" do + ruby_exe('p Warning[:performance]').chomp.should == "false" + ruby_exe('p Warning[:performance]', options: "-w").chomp.should == "false" end it "raises for unknown category" do diff --git a/spec/ruby/core/warning/element_set_spec.rb b/spec/ruby/core/warning/element_set_spec.rb index d59a7d4c9e13c8..1dbc66ce26cae9 100644 --- a/spec/ruby/core/warning/element_set_spec.rb +++ b/spec/ruby/core/warning/element_set_spec.rb @@ -17,15 +17,13 @@ end end - ruby_version_is '3.3' do - it "enables or disables performance warnings" do - original = Warning[:performance] - begin - Warning[:performance] = !original - Warning[:performance].should == !original - ensure - Warning[:performance] = original - end + it "enables or disables performance warnings" do + original = Warning[:performance] + begin + Warning[:performance] = !original + Warning[:performance].should == !original + ensure + Warning[:performance] = original end end diff --git a/spec/ruby/language/assignments_spec.rb b/spec/ruby/language/assignments_spec.rb index c4adf73c1cbf67..58a244b7c27d87 100644 --- a/spec/ruby/language/assignments_spec.rb +++ b/spec/ruby/language/assignments_spec.rb @@ -219,15 +219,7 @@ def []=(*args, **kw) end end - ruby_version_is ""..."3.3" do - it "supports keyword arguments in index assignments" do - a = @klass.new - eval "a[1, 2, 3, b: 4] += 5" - a.x.should == [[1, 2, 3, {b: 4}, 105], {}] - end - end - - ruby_version_is "3.3"..."3.4" do + ruby_version_is ""..."3.4" do it "supports keyword arguments in index assignments" do a = @klass.new eval "a[1, 2, 3, b: 4] += 5" diff --git a/spec/ruby/language/block_spec.rb b/spec/ruby/language/block_spec.rb index cc003b8946270e..67aad76c57e922 100644 --- a/spec/ruby/language/block_spec.rb +++ b/spec/ruby/language/block_spec.rb @@ -192,6 +192,22 @@ def m(a) yield a end m(obj) { |a, b, c| [a, b, c] }.should == [1, 2, nil] end + it "calls #respond_to? on a BasicObject to check if object has method #to_ary" do + ScratchPad.record [] + obj = BasicObject.new + def obj.respond_to?(name, *) + ScratchPad << [:respond_to?, name] + name == :to_ary ? true : super + end + def obj.to_ary + ScratchPad << :to_ary + [1, 2] + end + + m(obj) { |a, b, c| [a, b, c] }.should == [1, 2, nil] + ScratchPad.recorded.should == [[:respond_to?, :to_ary], :to_ary] + end + it "receives the object if it does not respond to #respond_to?" do obj = BasicObject.new @@ -1041,8 +1057,8 @@ def all_kwrest(arg1, arg2, *rest, post1, post2, kw1: 1, kw2: 2, okw1:, okw2:, ** end end -describe "`it` calls without arguments in a block with no ordinary parameters" do - ruby_version_is "3.3"..."3.4" do +describe "`it` calls without arguments in a block" do + ruby_version_is ""..."3.4" do it "emits a deprecation warning" do -> { eval "proc { it }" @@ -1094,38 +1110,11 @@ def o.it end end end - - ruby_version_is "3.4" do - it "does not emit a deprecation warning" do - -> { - eval "proc { it }" - }.should_not complain - end - - it "acts as the first argument if no local variables exist" do - eval("proc { it * 2 }").call(5).should == 10 - end - - it "can be reassigned to act as a local variable" do - eval("proc { tmp = it; it = tmp * 2; it }").call(21).should == 42 - end - - it "can be used in nested calls" do - eval("proc { it.map { it * 2 } }").call([1, 2, 3]).should == [2, 4, 6] - end - - it "cannot be mixed with numbered parameters" do - -> { - eval "proc { it + _1 }" - }.should raise_error(SyntaxError, /numbered parameters are not allowed when 'it' is already used|'it' is already used in/) - - -> { - eval "proc { _1 + it }" - }.should raise_error(SyntaxError, /numbered parameter is already used in|'it' is not allowed when a numbered parameter is already used/) - end - end end +# Duplicates specs in language/it_parameter_spec.rb +# Need them here to run on Ruby versions prior 3.4 +# TODO: remove when the minimal supported Ruby version is 3.4 describe "if `it` is defined as a variable" do it "treats `it` as a captured variable if defined outside of a block" do it = 5 diff --git a/spec/ruby/language/delegation_spec.rb b/spec/ruby/language/delegation_spec.rb index c711a536c22d71..cd44956f5d1c65 100644 --- a/spec/ruby/language/delegation_spec.rb +++ b/spec/ruby/language/delegation_spec.rb @@ -37,6 +37,16 @@ def delegate(...) a.new.delegate(1, b: 2, &block).should == [[1], {b: 2}, block] end + it "delegates with additional arguments" do + a = Class.new(DelegationSpecs::Target) + a.class_eval(<<-RUBY) + def delegate(...) + target(:first, :second, ...) + end + RUBY + a.new.delegate(1, b: 2).should == [[:first, :second, 1], {b: 2}, nil] + end + it "parses as open endless Range when brackets are omitted" do a = Class.new(DelegationSpecs::Target) suppress_warning do @@ -99,13 +109,11 @@ def delegate(*) a.new.delegate(0, 1).should == [[0, 1], {}, nil] end - ruby_version_is "3.3" do - context "within a block that accepts anonymous rest within a method that accepts anonymous rest" do - it "does not allow delegating rest" do - -> { - eval "def m(*); proc { |*| n(*) } end" - }.should raise_error(SyntaxError, /anonymous rest parameter is also used within block/) - end + context "within a block that accepts anonymous rest within a method that accepts anonymous rest" do + it "does not allow delegating rest" do + -> { + eval "def m(*); proc { |*| n(*) } end" + }.should raise_error(SyntaxError, /anonymous rest parameter is also used within block/) end end end @@ -122,13 +130,11 @@ def delegate(**) a.new.delegate(a: 1) { |x| x }.should == [[], {a: 1}, nil] end - ruby_version_is "3.3" do - context "within a block that accepts anonymous kwargs within a method that accepts anonymous kwargs" do - it "does not allow delegating kwargs" do - -> { - eval "def m(**); proc { |**| n(**) } end" - }.should raise_error(SyntaxError, /anonymous keyword rest parameter is also used within block/) - end + context "within a block that accepts anonymous kwargs within a method that accepts anonymous kwargs" do + it "does not allow delegating kwargs" do + -> { + eval "def m(**); proc { |**| n(**) } end" + }.should raise_error(SyntaxError, /anonymous keyword rest parameter is also used within block/) end end end @@ -146,13 +152,11 @@ def delegate(&) a.new.delegate(&block).should == [[], {}, block] end - ruby_version_is "3.3" do - context "within a block that accepts anonymous block within a method that accepts anonymous block" do - it "does not allow delegating a block" do - -> { - eval "def m(&); proc { |&| n(&) } end" - }.should raise_error(SyntaxError, /anonymous block parameter is also used within block/) - end + context "within a block that accepts anonymous block within a method that accepts anonymous block" do + it "does not allow delegating a block" do + -> { + eval "def m(&); proc { |&| n(&) } end" + }.should raise_error(SyntaxError, /anonymous block parameter is also used within block/) end end end diff --git a/spec/ruby/language/file_spec.rb b/spec/ruby/language/file_spec.rb index 59563d9642e00e..36fd329bf6a7ca 100644 --- a/spec/ruby/language/file_spec.rb +++ b/spec/ruby/language/file_spec.rb @@ -7,16 +7,8 @@ -> { eval("__FILE__ = 1") }.should raise_error(SyntaxError) end - ruby_version_is ""..."3.3" do - it "equals (eval) inside an eval" do - eval("__FILE__").should == "(eval)" - end - end - - ruby_version_is "3.3" do - it "equals (eval at __FILE__:__LINE__) inside an eval" do - eval("__FILE__").should == "(eval at #{__FILE__}:#{__LINE__})" - end + it "equals (eval at __FILE__:__LINE__) inside an eval" do + eval("__FILE__").should == "(eval at #{__FILE__}:#{__LINE__})" end end diff --git a/spec/ruby/language/for_spec.rb b/spec/ruby/language/for_spec.rb index b8ddfe5f0ddfb0..7fc6751d070eb1 100644 --- a/spec/ruby/language/for_spec.rb +++ b/spec/ruby/language/for_spec.rb @@ -129,37 +129,34 @@ class OFor n.should == 3 end - # Segfault in MRI 3.3 and lower: https://bugs.ruby-lang.org/issues/20468 - ruby_bug "#20468", ""..."3.4" do - it "allows an attribute with safe navigation as an iterator name" do - class OFor - attr_accessor :target - end - - ofor = OFor.new - m = [1,2,3] - n = 0 - eval <<~RUBY - for ofor&.target in m - n += 1 - end - RUBY - ofor.target.should == 3 - n.should == 3 + it "allows an attribute with safe navigation as an iterator name" do + class OFor + attr_accessor :target end - it "allows an attribute with safe navigation on a nil base as an iterator name" do - ofor = nil - m = [1,2,3] - n = 0 - eval <<~RUBY - for ofor&.target in m - n += 1 - end - RUBY - ofor.should be_nil - n.should == 3 - end + ofor = OFor.new + m = [1,2,3] + n = 0 + eval <<~RUBY + for ofor&.target in m + n += 1 + end + RUBY + ofor.target.should == 3 + n.should == 3 + end + + it "allows an attribute with safe navigation on a nil base as an iterator name" do + ofor = nil + m = [1,2,3] + n = 0 + eval <<~RUBY + for ofor&.target in m + n += 1 + end + RUBY + ofor.should be_nil + n.should == 3 end it "allows an array index writer as an iterator name" do diff --git a/spec/ruby/language/hash_spec.rb b/spec/ruby/language/hash_spec.rb index 668716e2e325da..c7e1bf2d88bffd 100644 --- a/spec/ruby/language/hash_spec.rb +++ b/spec/ruby/language/hash_spec.rb @@ -167,6 +167,17 @@ def h.to_hash; {:b => 2, :c => 3}; end {**nil}.should == {} {a: 1, **nil}.should == {a: 1} end + + it "expands nil using ** into {} and provides a copy to the callable" do + ScratchPad.record [] + insert = -> key, **kw do + kw[key] = 1 + ScratchPad << kw + end + insert.call(:foo, **nil) + insert.call(:bar, **nil) + ScratchPad.recorded.should == [{ foo: 1 }, { bar: 1 }] + end end it "expands an '**{}' or '**obj' element with the last key/value pair taking precedence" do @@ -264,17 +275,15 @@ def m(**h) h.should == { one: 1, two: 2 } end - ruby_bug "#20012", ""..."3.3" do - it "makes a copy when calling a method taking a positional Hash" do - def m(h) - h.delete(:one); h - end - - h = { one: 1, two: 2 } - m(**h).should == { two: 2 } - m(**h).should_not.equal?(h) - h.should == { one: 1, two: 2 } + it "makes a copy when calling a method taking a positional Hash" do + def m(h) + h.delete(:one); h end + + h = { one: 1, two: 2 } + m(**h).should == { two: 2 } + m(**h).should_not.equal?(h) + h.should == { one: 1, two: 2 } end describe "hash with omitted value" do diff --git a/spec/ruby/language/it_parameter_spec.rb b/spec/ruby/language/it_parameter_spec.rb index 72023180d91d54..58ec3a6faf0f1a 100644 --- a/spec/ruby/language/it_parameter_spec.rb +++ b/spec/ruby/language/it_parameter_spec.rb @@ -1,6 +1,7 @@ require_relative '../spec_helper' ruby_version_is "3.4" do + eval <<-RUBY # use eval to avoid warnings on Ruby 3.3 describe "The `it` parameter" do it "provides it in a block" do -> { it }.call("a").should == "a" @@ -17,9 +18,28 @@ -> { it + -> { it * it }.call(2) }.call(3).should == 7 end + it "can be reassigned to act as a local variable" do + proc { tmp = it; it = tmp * 2; it }.call(21).should == 42 + end + it "is a regular local variable if there is already a 'it' local variable" do - it = 0 - proc { it }.call("a").should == 0 + it = 0 + proc { it }.call("a").should == 0 + end + + it "is a regular local variable if there is a method `it` defined" do + o = Object.new + def o.it + 21 + end + + o.instance_eval("proc { it * 2 }").call(1).should == 2 + end + + it "is not shadowed by an reassignment in a block" do + a = nil + proc { a = it; it = 42 }.call(0) + a.should == 0 # if `it` were shadowed its value would be nil end it "raises SyntaxError when block parameters are specified explicitly" do @@ -36,6 +56,16 @@ -> { eval("['a'].map { |x| it }") }.should raise_error(SyntaxError, /ordinary parameter is defined/) end + it "cannot be mixed with numbered parameters" do + -> { + eval("proc { it + _1 }") + }.should raise_error(SyntaxError, /numbered parameters are not allowed when 'it' is already used|'it' is already used in/) + + -> { + eval("proc { _1 + it }") + }.should raise_error(SyntaxError, /numbered parameter is already used in|'it' is not allowed when a numbered parameter is already used/) + end + it "affects block arity" do -> {}.arity.should == 0 -> { it }.arity.should == 1 @@ -62,5 +92,17 @@ def obj.foo; it; end -> { obj.foo("a") }.should raise_error(ArgumentError, /wrong number of arguments/) end + + context "given multiple arguments" do + it "provides it in a block and assigns the first argument for a block" do + proc { it }.call("a", "b").should == "a" + end + + it "raises ArgumentError for a proc" do + -> { -> { it }.call("a", "b") }.should raise_error(ArgumentError, "wrong number of arguments (given 2, expected 1)") + -> { lambda { it }.call("a", "b") }.should raise_error(ArgumentError, "wrong number of arguments (given 2, expected 1)") + end + end end + RUBY end diff --git a/spec/ruby/language/keyword_arguments_spec.rb b/spec/ruby/language/keyword_arguments_spec.rb index 4f6370d419e03b..c51c3bc656d4e9 100644 --- a/spec/ruby/language/keyword_arguments_spec.rb +++ b/spec/ruby/language/keyword_arguments_spec.rb @@ -87,16 +87,14 @@ def m(*a) end context "**" do - ruby_version_is "3.3" do - it "copies a non-empty Hash for a method taking (*args)" do - def m(*args) - args[0] - end - - h = {a: 1} - m(**h).should_not.equal?(h) - h.should == {a: 1} + it "copies a non-empty Hash for a method taking (*args)" do + def m(*args) + args[0] end + + h = {a: 1} + m(**h).should_not.equal?(h) + h.should == {a: 1} end it "copies the given Hash for a method taking (**kwargs)" do diff --git a/spec/ruby/language/method_spec.rb b/spec/ruby/language/method_spec.rb index 8f72bd45ed8cbe..8f9f094fd89a45 100644 --- a/spec/ruby/language/method_spec.rb +++ b/spec/ruby/language/method_spec.rb @@ -1234,10 +1234,8 @@ def n(value, &block) args.should == [true] end - ruby_version_is "3.3" do - it "supports multiple statements" do - eval("m (1; 2)").should == [2] - end + it "supports multiple statements" do + eval("m (1; 2)").should == [2] end end diff --git a/spec/ruby/library/English/English_spec.rb b/spec/ruby/library/English/English_spec.rb index 4d615d1e2506ef..166785f066640b 100644 --- a/spec/ruby/library/English/English_spec.rb +++ b/spec/ruby/library/English/English_spec.rb @@ -130,18 +130,6 @@ $LAST_MATCH_INFO.should == $~ end - ruby_version_is ""..."3.3" do - it "aliases $IGNORECASE to $=" do - $VERBOSE, verbose = nil, $VERBOSE - begin - $IGNORECASE.should_not be_nil - $IGNORECASE.should == $= - ensure - $VERBOSE = verbose - end - end - end - it "aliases $ARGV to $*" do $ARGV.should_not be_nil $ARGV.should == $* diff --git a/spec/ruby/library/bigdecimal/remainder_spec.rb b/spec/ruby/library/bigdecimal/remainder_spec.rb index 0eb06f7ef1d402..b31967e76bd53c 100644 --- a/spec/ruby/library/bigdecimal/remainder_spec.rb +++ b/spec/ruby/library/bigdecimal/remainder_spec.rb @@ -56,25 +56,6 @@ @nan.remainder(@infinity).should.nan? end - version_is BigDecimal::VERSION, ""..."3.1.4" do #ruby_version_is ""..."3.3" do - it "returns NaN if Infinity is involved" do - @infinity.remainder(@infinity).should.nan? - @infinity.remainder(@one).should.nan? - @infinity.remainder(@mixed).should.nan? - @infinity.remainder(@one_minus).should.nan? - @infinity.remainder(@frac_1).should.nan? - @one.remainder(@infinity).should.nan? - - @infinity_minus.remainder(@infinity_minus).should.nan? - @infinity_minus.remainder(@one).should.nan? - @one.remainder(@infinity_minus).should.nan? - @frac_2.remainder(@infinity_minus).should.nan? - - @infinity.remainder(@infinity_minus).should.nan? - @infinity_minus.remainder(@infinity).should.nan? - end - end - it "coerces arguments to BigDecimal if possible" do @three.remainder(2).should == @one end diff --git a/spec/ruby/library/bigdecimal/to_s_spec.rb b/spec/ruby/library/bigdecimal/to_s_spec.rb index ba9f960eb32450..025057b4d7e873 100644 --- a/spec/ruby/library/bigdecimal/to_s_spec.rb +++ b/spec/ruby/library/bigdecimal/to_s_spec.rb @@ -52,10 +52,8 @@ BigDecimal("1.2345").to_s('0F').should == "1.2345" end - version_is BigDecimal::VERSION, "3.1.5" do #ruby_version_is '3.3' do - it "inserts a space every n chars to integer part, if integer n is supplied" do - BigDecimal('1000010').to_s('5F').should == "10 00010.0" - end + it "inserts a space every n chars to integer part, if integer n is supplied" do + BigDecimal('1000010').to_s('5F').should == "10 00010.0" end it "can return a leading space for values > 0" do diff --git a/spec/ruby/library/random/formatter/alphanumeric_spec.rb b/spec/ruby/library/random/formatter/alphanumeric_spec.rb index 9bd325e1d0a6ab..ce45b96dc2b7a7 100644 --- a/spec/ruby/library/random/formatter/alphanumeric_spec.rb +++ b/spec/ruby/library/random/formatter/alphanumeric_spec.rb @@ -41,16 +41,14 @@ }.should raise_error(ArgumentError) end - ruby_version_is "3.3" do - it "accepts a 'chars' argument with the output alphabet" do - @object.alphanumeric(chars: ['a', 'b']).should =~ /\A[ab]+\z/ - end + it "accepts a 'chars' argument with the output alphabet" do + @object.alphanumeric(chars: ['a', 'b']).should =~ /\A[ab]+\z/ + end - it "converts the elements of chars using #to_s" do - to_s = mock("to_s") - to_s.should_receive(:to_s).and_return("[mock to_s]") - # Using 1 value in chars results in an infinite loop - @object.alphanumeric(1, chars: [to_s, to_s]).should == "[mock to_s]" - end + it "converts the elements of chars using #to_s" do + to_s = mock("to_s") + to_s.should_receive(:to_s).and_return("[mock to_s]") + # Using 1 value in chars results in an infinite loop + @object.alphanumeric(1, chars: [to_s, to_s]).should == "[mock to_s]" end end diff --git a/spec/ruby/library/ripper/lex_spec.rb b/spec/ruby/library/ripper/lex_spec.rb index 97cfb06904fad6..0255480579ee4f 100644 --- a/spec/ruby/library/ripper/lex_spec.rb +++ b/spec/ruby/library/ripper/lex_spec.rb @@ -10,14 +10,14 @@ [[1, 5], :on_lparen, "(", 'BEG|LABEL'], [[1, 6], :on_ident, "a", 'ARG'], [[1, 7], :on_rparen, ")", 'ENDFN'], - [[1, 8], :on_sp, " ", 'BEG'], + [[1, 8], :on_semicolon, ";", 'BEG'], [[1, 9], :on_kw, "nil", 'END'], [[1, 12], :on_sp, " ", 'END'], [[1, 13], :on_kw, "end", 'END'] ] - lexed = Ripper.lex("def m(a) nil end") + lexed = Ripper.lex("def m(a);nil end") lexed.map { |e| - e[0...-1] + [e[-1].to_s.split('|').map { |s| s.sub(/^EXPR_/, '') }.join('|')] + e[0...-1] + [e[-1].to_s] }.should == expected end end diff --git a/spec/ruby/library/socket/addrinfo/initialize_spec.rb b/spec/ruby/library/socket/addrinfo/initialize_spec.rb index 1f16531aaa4dea..c556bd758b925a 100644 --- a/spec/ruby/library/socket/addrinfo/initialize_spec.rb +++ b/spec/ruby/library/socket/addrinfo/initialize_spec.rb @@ -53,11 +53,11 @@ @addrinfo.ip_port.should == 25 end - it "returns the INET6 pfamily" do + it "returns the specified family" do @addrinfo.pfamily.should == Socket::PF_INET6 end - it "returns the INET6 afamily" do + it "returns the specified family" do @addrinfo.afamily.should == Socket::AF_INET6 end @@ -83,11 +83,11 @@ @addrinfo.ip_port.should == 25 end - it "returns the INET6 pfamily" do + it "returns the specified family" do @addrinfo.pfamily.should == Socket::PF_INET6 end - it "returns the INET6 afamily" do + it "returns the specified family" do @addrinfo.afamily.should == Socket::AF_INET6 end @@ -113,11 +113,11 @@ @addrinfo.ip_port.should == 25 end - it "returns the INET6 pfamily" do + it "returns the specified family" do @addrinfo.pfamily.should == Socket::PF_INET6 end - it "returns the INET6 afamily" do + it "returns the specified family" do @addrinfo.afamily.should == Socket::AF_INET6 end @@ -147,11 +147,11 @@ @addrinfo.ip_port.should == 46102 end - it "returns the INET pfamily" do + it "returns the specified family" do @addrinfo.pfamily.should == Socket::PF_INET end - it "returns the INET afamily" do + it "returns the specified family" do @addrinfo.afamily.should == Socket::AF_INET end @@ -217,11 +217,11 @@ @addrinfo.ip_port.should == 46102 end - it "returns the INET pfamily" do + it "returns the specified family" do @addrinfo.pfamily.should == Socket::PF_INET end - it "returns the INET afamily" do + it "returns the specified family" do @addrinfo.afamily.should == Socket::AF_INET end @@ -247,11 +247,11 @@ @addrinfo.ip_port.should == 46102 end - it "returns the INET pfamily" do + it "returns the specified family" do @addrinfo.pfamily.should == Socket::PF_INET end - it "returns the INET afamily" do + it "returns the specified family" do @addrinfo.afamily.should == Socket::AF_INET end @@ -311,11 +311,11 @@ @addrinfo.ip_port.should == 46102 end - it "returns the INET pfamily" do + it "returns the specified family" do @addrinfo.pfamily.should == Socket::PF_INET end - it "returns the INET afamily" do + it "returns the specified family" do @addrinfo.afamily.should == Socket::AF_INET end @@ -514,13 +514,13 @@ @sockaddr = Socket.sockaddr_in(80, '127.0.0.1') end - it 'returns an Addrinfo with :PF_INET family' do + it 'returns an Addrinfo with the specified family' do addr = Addrinfo.new(@sockaddr, :PF_INET) addr.pfamily.should == Socket::PF_INET end - it 'returns an Addrinfo with :INET family' do + it 'returns an Addrinfo with the specified family' do addr = Addrinfo.new(@sockaddr, :INET) addr.pfamily.should == Socket::PF_INET @@ -544,13 +544,13 @@ @sockaddr = Socket.sockaddr_in(80, '127.0.0.1') end - it 'returns an Addrinfo with "PF_INET" family' do + it 'returns an Addrinfo with the specified family' do addr = Addrinfo.new(@sockaddr, 'PF_INET') addr.pfamily.should == Socket::PF_INET end - it 'returns an Addrinfo with "INET" family' do + it 'returns an Addrinfo with the specified family' do addr = Addrinfo.new(@sockaddr, 'INET') addr.pfamily.should == Socket::PF_INET diff --git a/spec/ruby/library/socket/basicsocket/recv_nonblock_spec.rb b/spec/ruby/library/socket/basicsocket/recv_nonblock_spec.rb index f2a6682f12b8ea..f2383513f286b2 100644 --- a/spec/ruby/library/socket/basicsocket/recv_nonblock_spec.rb +++ b/spec/ruby/library/socket/basicsocket/recv_nonblock_spec.rb @@ -112,60 +112,30 @@ @server.close unless @server.closed? end - ruby_version_is ""..."3.3" do - it "returns an empty String on a closed stream socket" do - ready = false - - t = Thread.new do - client = @server.accept - - Thread.pass while !ready - begin - client.recv_nonblock(10) - rescue IO::EAGAINWaitReadable - retry - end - ensure - client.close if client - end - - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil + it "returns nil on a closed stream socket" do + ready = false - socket = TCPSocket.new('127.0.0.1', @port) - socket.close - ready = true + t = Thread.new do + client = @server.accept - t.value.should == "" - end - end - - ruby_version_is "3.3" do - it "returns nil on a closed stream socket" do - ready = false - - t = Thread.new do - client = @server.accept - - Thread.pass while !ready - begin - client.recv_nonblock(10) - rescue IO::EAGAINWaitReadable - retry - end - ensure - client.close if client + Thread.pass while !ready + begin + client.recv_nonblock(10) + rescue IO::EAGAINWaitReadable + retry end + ensure + client.close if client + end - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil + Thread.pass while t.status and t.status != "sleep" + t.status.should_not be_nil - socket = TCPSocket.new('127.0.0.1', @port) - socket.close - ready = true + socket = TCPSocket.new('127.0.0.1', @port) + socket.close + ready = true - t.value.should be_nil - end + t.value.should be_nil end end end diff --git a/spec/ruby/library/socket/basicsocket/recv_spec.rb b/spec/ruby/library/socket/basicsocket/recv_spec.rb index a51920f52a092a..7581f1bc1533fa 100644 --- a/spec/ruby/library/socket/basicsocket/recv_spec.rb +++ b/spec/ruby/library/socket/basicsocket/recv_spec.rb @@ -184,42 +184,21 @@ @server.close unless @server.closed? end - ruby_version_is ""..."3.3" do - it "returns an empty String on a closed stream socket" do - t = Thread.new do - client = @server.accept - client.recv(10) - ensure - client.close if client - end - - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil - - socket = TCPSocket.new('127.0.0.1', @port) - socket.close - - t.value.should == "" + it "returns nil on a closed stream socket" do + t = Thread.new do + client = @server.accept + client.recv(10) + ensure + client.close if client end - end - - ruby_version_is "3.3" do - it "returns nil on a closed stream socket" do - t = Thread.new do - client = @server.accept - client.recv(10) - ensure - client.close if client - end - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil + Thread.pass while t.status and t.status != "sleep" + t.status.should_not be_nil - socket = TCPSocket.new('127.0.0.1', @port) - socket.close + socket = TCPSocket.new('127.0.0.1', @port) + socket.close - t.value.should be_nil - end + t.value.should be_nil end end diff --git a/spec/ruby/library/socket/basicsocket/recvmsg_nonblock_spec.rb b/spec/ruby/library/socket/basicsocket/recvmsg_nonblock_spec.rb index b5fdd7c93bee8d..d1cde4411bd8bc 100644 --- a/spec/ruby/library/socket/basicsocket/recvmsg_nonblock_spec.rb +++ b/spec/ruby/library/socket/basicsocket/recvmsg_nonblock_spec.rb @@ -235,64 +235,31 @@ @server.close unless @server.closed? end - ruby_version_is ""..."3.3" do - platform_is_not :windows do # #recvmsg_nonblock() raises 'Errno::EINVAL: Invalid argument - recvmsg(2)' - it "returns an empty String as received data on a closed stream socket" do - ready = false + platform_is_not :windows do + it "returns nil on a closed stream socket" do + ready = false - t = Thread.new do - client = @server.accept + t = Thread.new do + client = @server.accept - Thread.pass while !ready - begin - client.recvmsg_nonblock(10) - rescue IO::EAGAINWaitReadable - retry - end - ensure - client.close if client + Thread.pass while !ready + begin + client.recvmsg_nonblock(10) + rescue IO::EAGAINWaitReadable + retry end - - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil - - socket = TCPSocket.new('127.0.0.1', @port) - socket.close - ready = true - - t.value.should.is_a? Array - t.value[0].should == "" + ensure + client.close if client end - end - end - ruby_version_is "3.3" do - platform_is_not :windows do - it "returns nil on a closed stream socket" do - ready = false + Thread.pass while t.status and t.status != "sleep" + t.status.should_not be_nil - t = Thread.new do - client = @server.accept + socket = TCPSocket.new('127.0.0.1', @port) + socket.close + ready = true - Thread.pass while !ready - begin - client.recvmsg_nonblock(10) - rescue IO::EAGAINWaitReadable - retry - end - ensure - client.close if client - end - - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil - - socket = TCPSocket.new('127.0.0.1', @port) - socket.close - ready = true - - t.value.should be_nil - end + t.value.should be_nil end end end diff --git a/spec/ruby/library/socket/basicsocket/recvmsg_spec.rb b/spec/ruby/library/socket/basicsocket/recvmsg_spec.rb index 04ba1d74c768c1..cfa0f4c61d476f 100644 --- a/spec/ruby/library/socket/basicsocket/recvmsg_spec.rb +++ b/spec/ruby/library/socket/basicsocket/recvmsg_spec.rb @@ -208,46 +208,22 @@ @server.close unless @server.closed? end - ruby_version_is ""..."3.3" do - platform_is_not :windows do - it "returns an empty String as received data on a closed stream socket" do - t = Thread.new do - client = @server.accept - client.recvmsg(10) - ensure - client.close if client - end - - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil - - socket = TCPSocket.new('127.0.0.1', @port) - socket.close - - t.value.should.is_a? Array - t.value[0].should == "" + platform_is_not :windows do + it "returns nil on a closed stream socket" do + t = Thread.new do + client = @server.accept + client.recvmsg(10) + ensure + client.close if client end - end - end - - ruby_version_is "3.3" do - platform_is_not :windows do - it "returns nil on a closed stream socket" do - t = Thread.new do - client = @server.accept - client.recvmsg(10) - ensure - client.close if client - end - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil + Thread.pass while t.status and t.status != "sleep" + t.status.should_not be_nil - socket = TCPSocket.new('127.0.0.1', @port) - socket.close + socket = TCPSocket.new('127.0.0.1', @port) + socket.close - t.value.should be_nil - end + t.value.should be_nil end end end diff --git a/spec/ruby/library/socket/ipsocket/recvfrom_spec.rb b/spec/ruby/library/socket/ipsocket/recvfrom_spec.rb index b58903df237b9d..5e6a145c9bdaeb 100644 --- a/spec/ruby/library/socket/ipsocket/recvfrom_spec.rb +++ b/spec/ruby/library/socket/ipsocket/recvfrom_spec.rb @@ -83,43 +83,21 @@ @client.close unless @client.closed? end - ruby_version_is ""..."3.3" do - it "returns an empty String as received data on a closed stream socket" do - t = Thread.new do - client = @server.accept - message = client.recvfrom(10) - message - ensure - client.close if client - end - - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil - - @client.close - - t.value.should.is_a? Array - t.value[0].should == "" + it "returns nil on a closed stream socket" do + t = Thread.new do + client = @server.accept + message = client.recvfrom(10) + message + ensure + client.close if client end - end - - ruby_version_is "3.3" do - it "returns nil on a closed stream socket" do - t = Thread.new do - client = @server.accept - message = client.recvfrom(10) - message - ensure - client.close if client - end - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil + Thread.pass while t.status and t.status != "sleep" + t.status.should_not be_nil - @client.close + @client.close - t.value.should be_nil - end + t.value.should be_nil end end diff --git a/spec/ruby/library/socket/socket/getaddrinfo_spec.rb b/spec/ruby/library/socket/socket/getaddrinfo_spec.rb index 6576af52eeadc7..17ffeaccaf498b 100644 --- a/spec/ruby/library/socket/socket/getaddrinfo_spec.rb +++ b/spec/ruby/library/socket/socket/getaddrinfo_spec.rb @@ -107,22 +107,12 @@ res.each { |a| expected.should include(a) } end - ruby_version_is ""..."3.3" do - it "raises SocketError when fails to resolve address" do - -> { - Socket.getaddrinfo("www.kame.net", 80, "AF_UNIX") - }.should raise_error(SocketError) - end - end - - ruby_version_is "3.3" do - it "raises ResolutionError when fails to resolve address" do - -> { - Socket.getaddrinfo("www.kame.net", 80, "AF_UNIX") - }.should raise_error(Socket::ResolutionError) { |e| - [Socket::EAI_FAMILY, Socket::EAI_FAIL].should.include?(e.error_code) - } - end + it "raises ResolutionError when fails to resolve address" do + -> { + Socket.getaddrinfo("www.kame.net", 80, "AF_UNIX") + }.should raise_error(Socket::ResolutionError) { |e| + [Socket::EAI_FAMILY, Socket::EAI_FAIL].should.include?(e.error_code) + } end end end diff --git a/spec/ruby/library/socket/socket/getnameinfo_spec.rb b/spec/ruby/library/socket/socket/getnameinfo_spec.rb index af4a10c9c2baa5..48cc94bcd182ab 100644 --- a/spec/ruby/library/socket/socket/getnameinfo_spec.rb +++ b/spec/ruby/library/socket/socket/getnameinfo_spec.rb @@ -61,22 +61,12 @@ def should_be_valid_dns_name(name) name_info[1].should == 'discard' end - ruby_version_is ""..."3.3" do - it "raises SocketError when fails to resolve address" do - -> { - Socket.getnameinfo(["AF_UNIX", 80, "0.0.0.0"]) - }.should raise_error(SocketError) - end - end - - ruby_version_is "3.3" do - it "raises ResolutionError when fails to resolve address" do - -> { - Socket.getnameinfo(["AF_UNIX", 80, "0.0.0.0"]) - }.should raise_error(Socket::ResolutionError) { |e| - [Socket::EAI_FAMILY, Socket::EAI_FAIL].should.include?(e.error_code) - } - end + it "raises ResolutionError when fails to resolve address" do + -> { + Socket.getnameinfo(["AF_UNIX", 80, "0.0.0.0"]) + }.should raise_error(Socket::ResolutionError) { |e| + [Socket::EAI_FAMILY, Socket::EAI_FAIL].should.include?(e.error_code) + } end end diff --git a/spec/ruby/library/socket/socket/recvfrom_nonblock_spec.rb b/spec/ruby/library/socket/socket/recvfrom_nonblock_spec.rb index 01b42bcc52b4fa..38a9f5ff5bc3fe 100644 --- a/spec/ruby/library/socket/socket/recvfrom_nonblock_spec.rb +++ b/spec/ruby/library/socket/socket/recvfrom_nonblock_spec.rb @@ -158,61 +158,30 @@ @client.close unless @client.closed? end - ruby_version_is ""..."3.3" do - it "returns an empty String as received data on a closed stream socket" do - ready = false - - t = Thread.new do - client, _ = @server.accept - - Thread.pass while !ready - begin - client.recvfrom_nonblock(10) - rescue IO::EAGAINWaitReadable - retry - end - ensure - client.close if client - end - - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil + it "returns nil on a closed stream socket" do + ready = false - @client.connect(@server_addr) - @client.close - ready = true - - t.value.should.is_a? Array - t.value[0].should == "" - end - end + t = Thread.new do + client, _ = @server.accept - ruby_version_is "3.3" do - it "returns nil on a closed stream socket" do - ready = false - - t = Thread.new do - client, _ = @server.accept - - Thread.pass while !ready - begin - client.recvfrom_nonblock(10) - rescue IO::EAGAINWaitReadable - retry - end - ensure - client.close if client + Thread.pass while !ready + begin + client.recvfrom_nonblock(10) + rescue IO::EAGAINWaitReadable + retry end + ensure + client.close if client + end - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil + Thread.pass while t.status and t.status != "sleep" + t.status.should_not be_nil - @client.connect(@server_addr) - @client.close - ready = true + @client.connect(@server_addr) + @client.close + ready = true - t.value.should be_nil - end + t.value.should be_nil end end end diff --git a/spec/ruby/library/socket/socket/recvfrom_spec.rb b/spec/ruby/library/socket/socket/recvfrom_spec.rb index 6ba39ffcaf534c..cbbc162f6b0d28 100644 --- a/spec/ruby/library/socket/socket/recvfrom_spec.rb +++ b/spec/ruby/library/socket/socket/recvfrom_spec.rb @@ -111,43 +111,21 @@ @client.close unless @client.closed? end - ruby_version_is ""..."3.3" do - it "returns an empty String as received data on a closed stream socket" do - t = Thread.new do - client, _ = @server.accept - client.recvfrom(10) - ensure - client.close if client - end - - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil - - @client.connect(@server_addr) - @client.close - - t.value.should.is_a? Array - t.value[0].should == "" + it "returns nil on a closed stream socket" do + t = Thread.new do + client, _ = @server.accept + client.recvfrom(10) + ensure + client.close if client end - end - - ruby_version_is "3.3" do - it "returns nil on a closed stream socket" do - t = Thread.new do - client, _ = @server.accept - client.recvfrom(10) - ensure - client.close if client - end - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil + Thread.pass while t.status and t.status != "sleep" + t.status.should_not be_nil - @client.connect(@server_addr) - @client.close + @client.connect(@server_addr) + @client.close - t.value.should be_nil - end + t.value.should be_nil end end diff --git a/spec/ruby/library/stringscanner/named_captures_spec.rb b/spec/ruby/library/stringscanner/named_captures_spec.rb index a68d66c216a82e..927784a6c4a8a9 100644 --- a/spec/ruby/library/stringscanner/named_captures_spec.rb +++ b/spec/ruby/library/stringscanner/named_captures_spec.rb @@ -16,11 +16,9 @@ @s.named_captures.should == {} end - # https://github.com/ruby/strscan/issues/132 - ruby_bug "", ""..."3.3" do # fixed in strscan v3.0.7 - it "returns {} if there is no any matching done" do - @s.named_captures.should == {} - end + # https://github.com/ruby/strscan/issues/132 fixed in strscan v3.0.7 + it "returns {} if there is no any matching done" do + @s.named_captures.should == {} end it "returns nil for an optional named capturing group if it doesn't match" do diff --git a/spec/ruby/optional/capi/encoding_spec.rb b/spec/ruby/optional/capi/encoding_spec.rb index c14983c7ead703..734b5f125381db 100644 --- a/spec/ruby/optional/capi/encoding_spec.rb +++ b/spec/ruby/optional/capi/encoding_spec.rb @@ -745,4 +745,34 @@ ruby_exe(code, args: "2>&1", exit_status: 1).should.include?('too many encoding (> 256) (EncodingError)') end end + + describe "ONIGENC_IS_UNICODE" do + it "is true only for select UTF-related encodings" do + unicode = [ + Encoding::UTF_8, + Encoding::UTF8_DOCOMO, + Encoding::UTF8_KDDI, + Encoding::UTF8_MAC, + Encoding::UTF8_SOFTBANK, + Encoding::CESU_8, + Encoding::UTF_16LE, + Encoding::UTF_16BE, + Encoding::UTF_32LE, + Encoding::UTF_32BE + ] + unicode.each do |enc| + @s.should.ONIGENC_IS_UNICODE(enc) + end + + (Encoding.list - unicode).each { |enc| + @s.should_not.ONIGENC_IS_UNICODE(enc) + } + end + + # Redundant with the above but more explicit + it "is false for the dummy UTF-16 and UTF-32 encodings" do + @s.should_not.ONIGENC_IS_UNICODE(Encoding::UTF_16) + @s.should_not.ONIGENC_IS_UNICODE(Encoding::UTF_32) + end + end end diff --git a/spec/ruby/optional/capi/ext/encoding_spec.c b/spec/ruby/optional/capi/ext/encoding_spec.c index aa8662cfbd6426..98d4e2e3b772c8 100644 --- a/spec/ruby/optional/capi/ext/encoding_spec.c +++ b/spec/ruby/optional/capi/ext/encoding_spec.c @@ -324,6 +324,10 @@ static VALUE encoding_spec_rb_define_dummy_encoding(VALUE self, VALUE name) { return INT2NUM(rb_define_dummy_encoding(RSTRING_PTR(name))); } +static VALUE encoding_spec_ONIGENC_IS_UNICODE(VALUE self, VALUE encoding) { + return ONIGENC_IS_UNICODE(rb_to_encoding(encoding)) ? Qtrue : Qfalse; +} + void Init_encoding_spec(void) { VALUE cls; native_rb_encoding_pointer = (rb_encoding**) malloc(sizeof(rb_encoding*)); @@ -384,6 +388,7 @@ void Init_encoding_spec(void) { rb_define_method(cls, "ONIGENC_MBC_CASE_FOLD", encoding_spec_ONIGENC_MBC_CASE_FOLD, 1); rb_define_method(cls, "rb_enc_left_char_head", encoding_spec_rb_enc_left_char_head, 2); rb_define_method(cls, "rb_define_dummy_encoding", encoding_spec_rb_define_dummy_encoding, 1); + rb_define_method(cls, "ONIGENC_IS_UNICODE", encoding_spec_ONIGENC_IS_UNICODE, 1); } #ifdef __cplusplus diff --git a/spec/ruby/optional/capi/ext/kernel_spec.c b/spec/ruby/optional/capi/ext/kernel_spec.c index a8fed21b5900b6..eee324052d0936 100644 --- a/spec/ruby/optional/capi/ext/kernel_spec.c +++ b/spec/ruby/optional/capi/ext/kernel_spec.c @@ -1,4 +1,5 @@ #include "ruby.h" +#include "ruby/vm.h" #include "rubyspec.h" #include @@ -337,6 +338,15 @@ static VALUE kernel_spec_rb_set_end_proc(VALUE self, VALUE io) { return Qnil; } +static void at_exit_hook(ruby_vm_t *vm) { + puts("ruby_vm_at_exit hook ran"); +} + +static VALUE kernel_spec_ruby_vm_at_exit(VALUE self) { + ruby_vm_at_exit(at_exit_hook); + return self; +} + static VALUE kernel_spec_rb_f_sprintf(VALUE self, VALUE ary) { return rb_f_sprintf((int)RARRAY_LEN(ary), RARRAY_PTR(ary)); } @@ -434,6 +444,7 @@ void Init_kernel_spec(void) { rb_define_method(cls, "rb_yield_splat", kernel_spec_rb_yield_splat, 1); rb_define_method(cls, "rb_exec_recursive", kernel_spec_rb_exec_recursive, 1); rb_define_method(cls, "rb_set_end_proc", kernel_spec_rb_set_end_proc, 1); + rb_define_method(cls, "ruby_vm_at_exit", kernel_spec_ruby_vm_at_exit, 0); rb_define_method(cls, "rb_f_sprintf", kernel_spec_rb_f_sprintf, 1); rb_define_method(cls, "rb_str_format", kernel_spec_rb_str_format, 3); rb_define_method(cls, "rb_make_backtrace", kernel_spec_rb_make_backtrace, 0); diff --git a/spec/ruby/optional/capi/ext/string_spec.c b/spec/ruby/optional/capi/ext/string_spec.c index 094013e049cbf6..74aa9e56e816fe 100644 --- a/spec/ruby/optional/capi/ext/string_spec.c +++ b/spec/ruby/optional/capi/ext/string_spec.c @@ -581,6 +581,14 @@ static VALUE string_spec_rb_str_to_interned_str(VALUE self, VALUE str) { return rb_str_to_interned_str(str); } +static VALUE string_spec_rb_interned_str(VALUE self, VALUE str, VALUE len) { + return rb_interned_str(RSTRING_PTR(str), FIX2LONG(len)); +} + +static VALUE string_spec_rb_interned_str_cstr(VALUE self, VALUE str) { + return rb_interned_str_cstr(RSTRING_PTR(str)); +} + void Init_string_spec(void) { VALUE cls = rb_define_class("CApiStringSpecs", rb_cObject); rb_define_method(cls, "rb_cstr2inum", string_spec_rb_cstr2inum, 2); @@ -681,6 +689,8 @@ void Init_string_spec(void) { rb_define_method(cls, "rb_enc_interned_str_cstr", string_spec_rb_enc_interned_str_cstr, 2); rb_define_method(cls, "rb_enc_interned_str", string_spec_rb_enc_interned_str, 3); rb_define_method(cls, "rb_str_to_interned_str", string_spec_rb_str_to_interned_str, 1); + rb_define_method(cls, "rb_interned_str", string_spec_rb_interned_str, 2); + rb_define_method(cls, "rb_interned_str_cstr", string_spec_rb_interned_str_cstr, 1); } #ifdef __cplusplus diff --git a/spec/ruby/optional/capi/io_spec.rb b/spec/ruby/optional/capi/io_spec.rb index ab7a7fc8f6f661..dc4ac3e3744ce8 100644 --- a/spec/ruby/optional/capi/io_spec.rb +++ b/spec/ruby/optional/capi/io_spec.rb @@ -494,166 +494,164 @@ end end - ruby_version_is "3.3" do - describe "rb_io_mode" do - it "returns the mode" do - (@o.rb_io_mode(@r_io) & 0b11).should == 0b01 - (@o.rb_io_mode(@w_io) & 0b11).should == 0b10 - (@o.rb_io_mode(@rw_io) & 0b11).should == 0b11 - end + describe "rb_io_mode" do + it "returns the mode" do + (@o.rb_io_mode(@r_io) & 0b11).should == 0b01 + (@o.rb_io_mode(@w_io) & 0b11).should == 0b10 + (@o.rb_io_mode(@rw_io) & 0b11).should == 0b11 end + end - describe "rb_io_path" do - it "returns the IO#path" do - @o.rb_io_path(@r_io).should == @r_io.path - @o.rb_io_path(@rw_io).should == @rw_io.path - @o.rb_io_path(@rw_io).should == @name - end + describe "rb_io_path" do + it "returns the IO#path" do + @o.rb_io_path(@r_io).should == @r_io.path + @o.rb_io_path(@rw_io).should == @rw_io.path + @o.rb_io_path(@rw_io).should == @name end + end - describe "rb_io_closed_p" do - it "returns false when io is not closed" do - @o.rb_io_closed_p(@r_io).should == false - @r_io.closed?.should == false - end + describe "rb_io_closed_p" do + it "returns false when io is not closed" do + @o.rb_io_closed_p(@r_io).should == false + @r_io.closed?.should == false + end - it "returns true when io is closed" do - @r_io.close + it "returns true when io is closed" do + @r_io.close - @o.rb_io_closed_p(@r_io).should == true - @r_io.closed?.should == true - end + @o.rb_io_closed_p(@r_io).should == true + @r_io.closed?.should == true end + end - quarantine! do # "Errno::EBADF: Bad file descriptor" at closing @r_io, @rw_io etc in the after :each hook - describe "rb_io_open_descriptor" do - it "creates a new IO instance" do - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) - io.should.is_a?(IO) - end - - it "return an instance of the specified class" do - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) - io.class.should == File + quarantine! do # "Errno::EBADF: Bad file descriptor" at closing @r_io, @rw_io etc in the after :each hook + describe "rb_io_open_descriptor" do + it "creates a new IO instance" do + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) + io.should.is_a?(IO) + end - io = @o.rb_io_open_descriptor(IO, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) - io.class.should == IO - end + it "return an instance of the specified class" do + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) + io.class.should == File - it "sets the specified file descriptor" do - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) - io.fileno.should == @r_io.fileno - end + io = @o.rb_io_open_descriptor(IO, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) + io.class.should == IO + end - it "sets the specified path" do - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) - io.path.should == "a.txt" - end + it "sets the specified file descriptor" do + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) + io.fileno.should == @r_io.fileno + end - it "sets the specified mode" do - io = @o.rb_io_open_descriptor(File, @r_io.fileno, CApiIOSpecs::FMODE_BINMODE, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) - io.should.binmode? + it "sets the specified path" do + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) + io.path.should == "a.txt" + end - io = @o.rb_io_open_descriptor(File, @r_io.fileno, CApiIOSpecs::FMODE_TEXTMODE, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) - io.should_not.binmode? - end + it "sets the specified mode" do + io = @o.rb_io_open_descriptor(File, @r_io.fileno, CApiIOSpecs::FMODE_BINMODE, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) + io.should.binmode? - it "sets the specified timeout" do - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) - io.timeout.should == 60 - end + io = @o.rb_io_open_descriptor(File, @r_io.fileno, CApiIOSpecs::FMODE_TEXTMODE, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) + io.should_not.binmode? + end - it "sets the specified internal encoding" do - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) - io.internal_encoding.should == Encoding::US_ASCII - end + it "sets the specified timeout" do + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) + io.timeout.should == 60 + end - it "sets the specified external encoding" do - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) - io.external_encoding.should == Encoding::UTF_8 - end + it "sets the specified internal encoding" do + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) + io.internal_encoding.should == Encoding::US_ASCII + end - it "does not apply the specified encoding flags" do - name = tmp("rb_io_open_descriptor_specs") - File.write(name, "123\r\n456\n89") - file = File.open(name, "r") + it "sets the specified external encoding" do + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) + io.external_encoding.should == Encoding::UTF_8 + end - io = @o.rb_io_open_descriptor(File, file.fileno, CApiIOSpecs::FMODE_READABLE, "a.txt", 60, "US-ASCII", "UTF-8", CApiIOSpecs::ECONV_UNIVERSAL_NEWLINE_DECORATOR, {}) - io.read_nonblock(20).should == "123\r\n456\n89" - ensure - file.close - rm_r name - end + it "does not apply the specified encoding flags" do + name = tmp("rb_io_open_descriptor_specs") + File.write(name, "123\r\n456\n89") + file = File.open(name, "r") - it "ignores the IO open options" do - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {external_encoding: "windows-1251"}) - io.external_encoding.should == Encoding::UTF_8 + io = @o.rb_io_open_descriptor(File, file.fileno, CApiIOSpecs::FMODE_READABLE, "a.txt", 60, "US-ASCII", "UTF-8", CApiIOSpecs::ECONV_UNIVERSAL_NEWLINE_DECORATOR, {}) + io.read_nonblock(20).should == "123\r\n456\n89" + ensure + file.close + rm_r name + end - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {internal_encoding: "windows-1251"}) - io.internal_encoding.should == Encoding::US_ASCII + it "ignores the IO open options" do + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {external_encoding: "windows-1251"}) + io.external_encoding.should == Encoding::UTF_8 - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {encoding: "windows-1251:binary"}) - io.external_encoding.should == Encoding::UTF_8 - io.internal_encoding.should == Encoding::US_ASCII + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {internal_encoding: "windows-1251"}) + io.internal_encoding.should == Encoding::US_ASCII - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {textmode: false}) - io.should_not.binmode? + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {encoding: "windows-1251:binary"}) + io.external_encoding.should == Encoding::UTF_8 + io.internal_encoding.should == Encoding::US_ASCII - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {binmode: true}) - io.should_not.binmode? + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {textmode: false}) + io.should_not.binmode? - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {autoclose: false}) - io.should.autoclose? + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {binmode: true}) + io.should_not.binmode? - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {path: "a.txt"}) - io.path.should == "a.txt" - end + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {autoclose: false}) + io.should.autoclose? - it "ignores the IO encoding options" do - io = @o.rb_io_open_descriptor(File, @w_io.fileno, CApiIOSpecs::FMODE_WRITABLE, "a.txt", 60, "US-ASCII", "UTF-8", 0, {crlf_newline: true}) + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {path: "a.txt"}) + io.path.should == "a.txt" + end - io.write("123\r\n456\n89") - io.flush + it "ignores the IO encoding options" do + io = @o.rb_io_open_descriptor(File, @w_io.fileno, CApiIOSpecs::FMODE_WRITABLE, "a.txt", 60, "US-ASCII", "UTF-8", 0, {crlf_newline: true}) - @r_io.read_nonblock(20).should == "123\r\n456\n89" - end + io.write("123\r\n456\n89") + io.flush - it "allows wrong mode" do - io = @o.rb_io_open_descriptor(File, @w_io.fileno, CApiIOSpecs::FMODE_READABLE, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) - io.should.is_a?(File) + @r_io.read_nonblock(20).should == "123\r\n456\n89" + end - platform_is_not :windows do - -> { io.read_nonblock(1) }.should raise_error(Errno::EBADF) - end + it "allows wrong mode" do + io = @o.rb_io_open_descriptor(File, @w_io.fileno, CApiIOSpecs::FMODE_READABLE, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) + io.should.is_a?(File) - platform_is :windows do - -> { io.read_nonblock(1) }.should raise_error(IO::EWOULDBLOCKWaitReadable) - end + platform_is_not :windows do + -> { io.read_nonblock(1) }.should raise_error(Errno::EBADF) end - it "tolerates NULL as rb_io_encoding *encoding parameter" do - io = @o.rb_io_open_descriptor_without_encoding(File, @r_io.fileno, 0, "a.txt", 60) - io.should.is_a?(File) + platform_is :windows do + -> { io.read_nonblock(1) }.should raise_error(IO::EWOULDBLOCKWaitReadable) end + end - it "deduplicates path String" do - path = "a.txt".dup - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, path, 60, "US-ASCII", "UTF-8", 0, {}) - io.path.should_not equal(path) + it "tolerates NULL as rb_io_encoding *encoding parameter" do + io = @o.rb_io_open_descriptor_without_encoding(File, @r_io.fileno, 0, "a.txt", 60) + io.should.is_a?(File) + end - path = "a.txt".freeze - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, path, 60, "US-ASCII", "UTF-8", 0, {}) - io.path.should_not equal(path) - end + it "deduplicates path String" do + path = "a.txt".dup + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, path, 60, "US-ASCII", "UTF-8", 0, {}) + io.path.should_not equal(path) + + path = "a.txt".freeze + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, path, 60, "US-ASCII", "UTF-8", 0, {}) + io.path.should_not equal(path) + end - it "calls #to_str to convert a path to a String" do - path = Object.new - def path.to_str; "a.txt"; end + it "calls #to_str to convert a path to a String" do + path = Object.new + def path.to_str; "a.txt"; end - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, path, 60, "US-ASCII", "UTF-8", 0, {}) + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, path, 60, "US-ASCII", "UTF-8", 0, {}) - io.path.should == "a.txt" - end + io.path.should == "a.txt" end end end diff --git a/spec/ruby/optional/capi/kernel_spec.rb b/spec/ruby/optional/capi/kernel_spec.rb index 6633ee50c1f8ac..0a2362fb304ab8 100644 --- a/spec/ruby/optional/capi/kernel_spec.rb +++ b/spec/ruby/optional/capi/kernel_spec.rb @@ -703,6 +703,12 @@ def proc_caller end end + describe "ruby_vm_at_exit" do + it "runs a C function after the VM is terminated" do + ruby_exe("require #{kernel_path.inspect}; CApiKernelSpecs.new.ruby_vm_at_exit").should == "ruby_vm_at_exit hook ran\n" + end + end + describe "rb_f_sprintf" do it "returns a string according to format and arguments" do @s.rb_f_sprintf(["%d %f %s", 10, 2.5, "test"]).should == "10 2.500000 test" diff --git a/spec/ruby/optional/capi/object_spec.rb b/spec/ruby/optional/capi/object_spec.rb index 8b4d8a9bba0e58..6716fd9e33766c 100644 --- a/spec/ruby/optional/capi/object_spec.rb +++ b/spec/ruby/optional/capi/object_spec.rb @@ -1004,7 +1004,6 @@ def reach it "calls the callback function for each cvar and ivar on a class" do exp = [:@@cvar, :foo, :@@cvar2, :bar, :@ivar, :baz] - exp.unshift(:__classpath__, 'CApiObjectSpecs::CVars') if RUBY_VERSION < "3.3" ary = @o.rb_ivar_foreach(CApiObjectSpecs::CVars) ary.should == exp @@ -1012,7 +1011,6 @@ def reach it "calls the callback function for each cvar and ivar on a module" do exp = [:@@mvar, :foo, :@@mvar2, :bar, :@ivar, :baz] - exp.unshift(:__classpath__, 'CApiObjectSpecs::MVars') if RUBY_VERSION < "3.3" ary = @o.rb_ivar_foreach(CApiObjectSpecs::MVars) ary.should == exp diff --git a/spec/ruby/optional/capi/spec_helper.rb b/spec/ruby/optional/capi/spec_helper.rb index e7abf46e6ccf65..d937c967d062fc 100644 --- a/spec/ruby/optional/capi/spec_helper.rb +++ b/spec/ruby/optional/capi/spec_helper.rb @@ -59,7 +59,11 @@ def compile_extension(name) tmpdir = tmp("cext_#{name}") Dir.mkdir(tmpdir) begin - ["#{core_ext_dir}/rubyspec.h", "#{spec_ext_dir}/#{ext}.c"].each do |file| + files = ["#{core_ext_dir}/rubyspec.h", "#{spec_ext_dir}/#{ext}.c"] + if spec_ext_dir != core_ext_dir + files += Dir.glob("#{spec_ext_dir}/*.h") + end + files.each do |file| if cxx and file.end_with?('.c') cp file, "#{tmpdir}/#{File.basename(file, '.c')}.cpp" else diff --git a/spec/ruby/optional/capi/string_spec.rb b/spec/ruby/optional/capi/string_spec.rb index 72f20ee6a52455..889f0a6cfe5d51 100644 --- a/spec/ruby/optional/capi/string_spec.rb +++ b/spec/ruby/optional/capi/string_spec.rb @@ -1369,8 +1369,133 @@ def inspect result1.should_not.equal?(result2) end + it "preserves the encoding of the original string" do + result1 = @s.rb_str_to_interned_str("hello".dup.force_encoding(Encoding::US_ASCII)) + result2 = @s.rb_str_to_interned_str("hello".dup.force_encoding(Encoding::UTF_8)) + result1.encoding.should == Encoding::US_ASCII + result2.encoding.should == Encoding::UTF_8 + end + it "returns the same string as String#-@" do @s.rb_str_to_interned_str("hello").should.equal?(-"hello") end end + + describe "rb_interned_str" do + it "returns a frozen string" do + str = "hello" + result = @s.rb_interned_str(str, str.bytesize) + result.should.is_a?(String) + result.should.frozen? + result.encoding.should == Encoding::US_ASCII + end + + it "returns the same frozen string" do + str = "hello" + result1 = @s.rb_interned_str(str, str.bytesize) + result2 = @s.rb_interned_str(str, str.bytesize) + result1.should.equal?(result2) + end + + it "supports strings with embedded null bytes" do + str = "foo\x00bar\x00baz".b + result = @s.rb_interned_str(str, str.bytesize) + result.should == str + end + + it "return US_ASCII encoding for an empty string" do + result = @s.rb_interned_str("", 0) + result.should == "" + result.encoding.should == Encoding::US_ASCII + end + + it "returns US_ASCII encoding for strings of only 7 bit ASCII" do + 0x00.upto(0x7f).each do |char| + result = @s.rb_interned_str(char.chr, 1) + result.encoding.should == Encoding::US_ASCII + end + end + + ruby_bug "21842", ""..."4.1" do + it "returns BINARY encoding for strings that use the 8th bit" do + 0x80.upto(0xff) do |char| + result = @s.rb_interned_str(char.chr, 1) + result.encoding.should == Encoding::BINARY + end + end + end + + it 'returns the same string when using non-ascii characters' do + str = 'こんにちは' + result1 = @s.rb_interned_str(str, str.bytesize) + result2 = @s.rb_interned_str(str, str.bytesize) + result1.should.equal?(result2) + end + + ruby_bug "21842", ""..."4.1" do + it "returns the same string as String#-@" do + str = "hello".dup.force_encoding(Encoding::US_ASCII) + @s.rb_interned_str(str, str.bytesize).should.equal?(-str) + end + end + end + + describe "rb_interned_str_cstr" do + it "returns a frozen string" do + str = "hello" + result = @s.rb_interned_str_cstr(str) + result.should.is_a?(String) + result.should.frozen? + result.encoding.should == Encoding::US_ASCII + end + + it "returns the same frozen string" do + str = "hello" + result1 = @s.rb_interned_str_cstr(str) + result2 = @s.rb_interned_str_cstr(str) + result1.should.equal?(result2) + end + + it "does not support strings with embedded null bytes" do + str = "foo\x00bar\x00baz".b + result = @s.rb_interned_str_cstr(str) + result.should == "foo" + end + + it "return US_ASCII encoding for an empty string" do + result = @s.rb_interned_str_cstr("") + result.should == "" + result.encoding.should == Encoding::US_ASCII + end + + it "returns US_ASCII encoding for strings of only 7 bit ASCII" do + 0x01.upto(0x7f).each do |char| + result = @s.rb_interned_str_cstr(char.chr) + result.encoding.should == Encoding::US_ASCII + end + end + + ruby_bug "21842", ""..."4.1" do + it "returns BINARY encoding for strings that use the 8th bit" do + 0x80.upto(0xff) do |char| + result = @s.rb_interned_str_cstr(char.chr) + result.encoding.should == Encoding::BINARY + end + end + end + + it 'returns the same string when using non-ascii characters' do + str = 'こんにちは' + result1 = @s.rb_interned_str_cstr(str) + result2 = @s.rb_interned_str_cstr(str) + result1.should.equal?(result2) + end + + ruby_bug "21842", ""..."4.1" do + it "returns the same string as String#-@" do + str = "hello".dup.force_encoding(Encoding::US_ASCII) + @s.rb_interned_str_cstr(str).should.equal?(-str) + end + end + end end diff --git a/spec/ruby/optional/capi/struct_spec.rb b/spec/ruby/optional/capi/struct_spec.rb index cc8d7f932e53b1..3f9eff52bc0b10 100644 --- a/spec/ruby/optional/capi/struct_spec.rb +++ b/spec/ruby/optional/capi/struct_spec.rb @@ -239,78 +239,76 @@ end end -ruby_version_is "3.3" do - describe "C-API Data function" do - before :all do - @s = CApiStructSpecs.new - @klass = @s.rb_data_define(nil, "a", "b", "c") - end - - describe "rb_data_define" do - it "returns a subclass of Data class when passed nil as the first argument" do - @klass.should.is_a? Class - @klass.superclass.should == Data - end - - it "returns a subclass of a class when passed as the first argument" do - superclass = Class.new(Data) - klass = @s.rb_data_define(superclass, "a", "b", "c") - - klass.should.is_a? Class - klass.superclass.should == superclass - end - - it "creates readers for the members" do - obj = @klass.new(1, 2, 3) - - obj.a.should == 1 - obj.b.should == 2 - obj.c.should == 3 - end - - it "returns the member names as Symbols" do - obj = @klass.new(0, 0, 0) - - obj.members.should == [:a, :b, :c] - end - - it "raises an ArgumentError if arguments contain duplicate member name" do - -> { @s.rb_data_define(nil, "a", "b", "a") }.should raise_error(ArgumentError) - end - - it "raises when first argument is not a class" do - -> { @s.rb_data_define([], "a", "b", "c") }.should raise_error(TypeError, "wrong argument type Array (expected Class)") - end - end - - describe "rb_struct_initialize" do - it "sets all members for a Data instance" do - data = @klass.allocate - @s.rb_struct_initialize(data, [1, 2, 3]).should == nil - data.a.should == 1 - data.b.should == 2 - data.c.should == 3 - end - - it "freezes the Data instance" do - data = @klass.allocate - @s.rb_struct_initialize(data, [1, 2, 3]).should == nil - data.should.frozen? - -> { @s.rb_struct_initialize(data, [1, 2, 3]) }.should raise_error(FrozenError) - end - - it "raises ArgumentError if too many values" do - data = @klass.allocate - -> { @s.rb_struct_initialize(data, [1, 2, 3, 4]) }.should raise_error(ArgumentError, "struct size differs") - end - - it "treats missing values as nil" do - data = @klass.allocate - @s.rb_struct_initialize(data, [1, 2]).should == nil - data.a.should == 1 - data.b.should == 2 - data.c.should == nil - end +describe "C-API Data function" do + before :all do + @s = CApiStructSpecs.new + @klass = @s.rb_data_define(nil, "a", "b", "c") + end + + describe "rb_data_define" do + it "returns a subclass of Data class when passed nil as the first argument" do + @klass.should.is_a? Class + @klass.superclass.should == Data + end + + it "returns a subclass of a class when passed as the first argument" do + superclass = Class.new(Data) + klass = @s.rb_data_define(superclass, "a", "b", "c") + + klass.should.is_a? Class + klass.superclass.should == superclass + end + + it "creates readers for the members" do + obj = @klass.new(1, 2, 3) + + obj.a.should == 1 + obj.b.should == 2 + obj.c.should == 3 + end + + it "returns the member names as Symbols" do + obj = @klass.new(0, 0, 0) + + obj.members.should == [:a, :b, :c] + end + + it "raises an ArgumentError if arguments contain duplicate member name" do + -> { @s.rb_data_define(nil, "a", "b", "a") }.should raise_error(ArgumentError) + end + + it "raises when first argument is not a class" do + -> { @s.rb_data_define([], "a", "b", "c") }.should raise_error(TypeError, "wrong argument type Array (expected Class)") + end + end + + describe "rb_struct_initialize" do + it "sets all members for a Data instance" do + data = @klass.allocate + @s.rb_struct_initialize(data, [1, 2, 3]).should == nil + data.a.should == 1 + data.b.should == 2 + data.c.should == 3 + end + + it "freezes the Data instance" do + data = @klass.allocate + @s.rb_struct_initialize(data, [1, 2, 3]).should == nil + data.should.frozen? + -> { @s.rb_struct_initialize(data, [1, 2, 3]) }.should raise_error(FrozenError) + end + + it "raises ArgumentError if too many values" do + data = @klass.allocate + -> { @s.rb_struct_initialize(data, [1, 2, 3, 4]) }.should raise_error(ArgumentError, "struct size differs") + end + + it "treats missing values as nil" do + data = @klass.allocate + @s.rb_struct_initialize(data, [1, 2]).should == nil + data.a.should == 1 + data.b.should == 2 + data.c.should == nil end end end diff --git a/spec/ruby/security/cve_2020_10663_spec.rb b/spec/ruby/security/cve_2020_10663_spec.rb index c44a13a0dd4b5d..7f42c407420b46 100644 --- a/spec/ruby/security/cve_2020_10663_spec.rb +++ b/spec/ruby/security/cve_2020_10663_spec.rb @@ -21,7 +21,7 @@ def to_json(*args) guard -> { JSON.const_defined?(:Pure) or - version_is(JSON::VERSION, '2.3.0') + version_is(JSON::VERSION, '2.3.0'...'2.11.0') } do describe "CVE-2020-10663 is resisted by" do it "only creating custom objects if passed create_additions: true or using JSON.load" do diff --git a/spec/ruby/shared/kernel/at_exit.rb b/spec/ruby/shared/kernel/at_exit.rb index 29db79bb391428..d57ab73920f3fa 100644 --- a/spec/ruby/shared/kernel/at_exit.rb +++ b/spec/ruby/shared/kernel/at_exit.rb @@ -60,10 +60,7 @@ result = ruby_exe('{', options: "-r#{script}", args: "2>&1", exit_status: 1) $?.should_not.success? result.should.include?("handler ran\n") - - # it's tempting not to rely on error message and rely only on exception class name, - # but CRuby before 3.2 doesn't print class name for syntax error - result.should include_any_of("syntax error", "SyntaxError") + result.should include("SyntaxError") end it "calls the nested handler right after the outer one if a handler is nested into another handler" do diff --git a/spec/ruby/shared/queue/freeze.rb b/spec/ruby/shared/queue/freeze.rb index 4c506a42355f62..5dedd005df4975 100644 --- a/spec/ruby/shared/queue/freeze.rb +++ b/spec/ruby/shared/queue/freeze.rb @@ -1,18 +1,8 @@ describe :queue_freeze, shared: true do - ruby_version_is ""..."3.3" do - it "can be frozen" do - queue = @object.call + it "raises an exception when freezing" do + queue = @object.call + -> { queue.freeze - queue.should.frozen? - end - end - - ruby_version_is "3.3" do - it "raises an exception when freezing" do - queue = @object.call - -> { - queue.freeze - }.should raise_error(TypeError, "cannot freeze #{queue}") - end + }.should raise_error(TypeError, "cannot freeze #{queue}") end end diff --git a/spec/ruby/shared/string/start_with.rb b/spec/ruby/shared/string/start_with.rb index 4b947a3bbf0ea8..9592eda4d43d31 100644 --- a/spec/ruby/shared/string/start_with.rb +++ b/spec/ruby/shared/string/start_with.rb @@ -70,15 +70,7 @@ $1.should be_nil end - ruby_version_is ""..."3.3" do - it "does not check that we are not matching part of a character" do - "\xC3\xA9".send(@method).should.start_with?("\xC3") - end - end - - ruby_version_is "3.3" do # #19784 - it "checks that we are not matching part of a character" do - "\xC3\xA9".send(@method).should_not.start_with?("\xC3") - end + it "checks that we are not matching part of a character" do + "\xC3\xA9".send(@method).should_not.start_with?("\xC3") end end diff --git a/string.c b/string.c index 464eab21463ff3..3cfc77600b6f1b 100644 --- a/string.c +++ b/string.c @@ -1559,7 +1559,7 @@ rb_str_tmp_frozen_no_embed_acquire(VALUE orig) } RSTRING(str)->len = RSTRING(orig)->len; - RSTRING(str)->as.heap.aux.capa = capa; + RSTRING(str)->as.heap.aux.capa = capa + (TERM_LEN(orig) - TERM_LEN(str)); return str; } @@ -3135,7 +3135,7 @@ str_subseq(VALUE str, long beg, long len) const int termlen = TERM_LEN(str); if (!SHARABLE_SUBSTRING_P(beg, len, RSTRING_LEN(str))) { - str2 = rb_str_new(RSTRING_PTR(str) + beg, len); + str2 = rb_enc_str_new(RSTRING_PTR(str) + beg, len, rb_str_enc_get(str)); RB_GC_GUARD(str); return str2; } @@ -7814,7 +7814,7 @@ mapping_buffer_free(void *p) while (current_buffer) { previous_buffer = current_buffer; current_buffer = current_buffer->next; - ruby_sized_xfree(previous_buffer, previous_buffer->capa); + ruby_sized_xfree(previous_buffer, offsetof(mapping_buffer, space) + previous_buffer->capa); } } @@ -12174,8 +12174,8 @@ rb_str_unicode_normalized_p(int argc, VALUE *argv, VALUE str) * * First, what's elsewhere. Class +Symbol+: * - * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. - * - Includes {module Comparable}[rdoc-ref:Comparable@What-27s+Here]. + * - Inherits from {class Object}[rdoc-ref:Object@Whats+Here]. + * - Includes {module Comparable}[rdoc-ref:Comparable@Whats+Here]. * * Here, class +Symbol+ provides methods that are useful for: * diff --git a/struct.c b/struct.c index 65410ebdf302b8..61aff40a32c081 100644 --- a/struct.c +++ b/struct.c @@ -2134,8 +2134,8 @@ rb_data_inspect(VALUE s) * * First, what's elsewhere. Class \Struct: * - * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. - * - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here], + * - Inherits from {class Object}[rdoc-ref:Object@Whats+Here]. + * - Includes {module Enumerable}[rdoc-ref:Enumerable@Whats+Here], * which provides dozens of additional methods. * * See also Data, which is a somewhat similar, but stricter concept for defining immutable diff --git a/test/monitor/test_monitor.rb b/test/monitor/test_monitor.rb index 4c55afca6c869e..7a26831bafe2ba 100644 --- a/test/monitor/test_monitor.rb +++ b/test/monitor/test_monitor.rb @@ -274,7 +274,7 @@ def test_timedwait @monitor.synchronize do queue2.enq(nil) assert_equal("foo", b) - result2 = cond.wait(0.1) + result2 = cond.wait(10) assert_equal(true, result2) assert_equal("bar", b) end diff --git a/test/prism/bom_test.rb b/test/prism/bom_test.rb index 890bc4b36c3ac4..0fa00ae4e844b6 100644 --- a/test/prism/bom_test.rb +++ b/test/prism/bom_test.rb @@ -5,6 +5,7 @@ return if RUBY_ENGINE != "ruby" require_relative "test_helper" +require "ripper" module Prism class BOMTest < TestCase @@ -53,7 +54,7 @@ def test_string def assert_bom(source) bommed = "\xEF\xBB\xBF#{source}" - assert_equal Prism.lex_ripper(bommed), Prism.lex_compat(bommed).value + assert_equal Ripper.lex(bommed), Prism.lex_compat(bommed).value end end end diff --git a/test/prism/errors_test.rb b/test/prism/errors_test.rb index aa264ae5b7a08f..cbe8b06ad659f4 100644 --- a/test/prism/errors_test.rb +++ b/test/prism/errors_test.rb @@ -45,19 +45,19 @@ def test_embdoc_ending def test_unterminated_string_closing statement = Prism.parse_statement("'hello") assert_equal statement.unescaped, "hello" - assert_empty statement.closing + assert_nil statement.closing end def test_unterminated_interpolated_string_closing statement = Prism.parse_statement('"hello') assert_equal statement.unescaped, "hello" - assert_empty statement.closing + assert_nil statement.closing end def test_unterminated_empty_string_closing statement = Prism.parse_statement('"') assert_empty statement.unescaped - assert_empty statement.closing + assert_nil statement.closing end def test_invalid_message_name @@ -84,7 +84,7 @@ def test_regexp_encoding_option_mismatch_error def test_incomplete_def_closing_loc statement = Prism.parse_statement("def f; 123") - assert_empty(statement.end_keyword) + assert_nil(statement.end_keyword) end private diff --git a/test/prism/lex_test.rb b/test/prism/lex_test.rb index ea4606d2fb6251..9a9f203c280d98 100644 --- a/test/prism/lex_test.rb +++ b/test/prism/lex_test.rb @@ -3,6 +3,7 @@ return if !(RUBY_ENGINE == "ruby" && RUBY_VERSION >= "3.2.0") require_relative "test_helper" +require "ripper" module Prism class LexTest < TestCase @@ -49,7 +50,7 @@ def test_parse_lex_file if RUBY_VERSION >= "3.3" def test_lex_compare prism = Prism.lex_compat(File.read(__FILE__), version: "current").value - ripper = Prism.lex_ripper(File.read(__FILE__)) + ripper = Ripper.lex(File.read(__FILE__)) assert_equal(ripper, prism) end end diff --git a/test/prism/result/breadth_first_search_test.rb b/test/prism/result/breadth_first_search_test.rb index e2e043a902102e..7e7962f1724e47 100644 --- a/test/prism/result/breadth_first_search_test.rb +++ b/test/prism/result/breadth_first_search_test.rb @@ -14,5 +14,16 @@ def test_breadth_first_search refute_nil found assert_equal 8, found.start_offset end + + def test_breadth_first_search_all + result = Prism.parse("[1 + 2, 2]") + found_nodes = + result.value.breadth_first_search_all do |node| + node.is_a?(IntegerNode) + end + + assert_equal 3, found_nodes.size + assert_equal 8, found_nodes[0].start_offset + end end end diff --git a/test/prism/result/overlap_test.rb b/test/prism/result/overlap_test.rb index 155bc870d36f74..d605eeca44f64d 100644 --- a/test/prism/result/overlap_test.rb +++ b/test/prism/result/overlap_test.rb @@ -33,8 +33,13 @@ def assert_overlap(fixture) queue << child if compare - assert_operator current.location.start_offset, :<=, child.location.start_offset - assert_operator current.location.end_offset, :>=, child.location.end_offset + assert_operator current.location.start_offset, :<=, child.location.start_offset, -> { + "[#{fixture.full_path}] Parent node #{current.class} at #{current.location} does not start before child node #{child.class} at #{child.location}" + } + + assert_operator current.location.end_offset, :>=, child.location.end_offset, -> { + "[#{fixture.full_path}] Parent node #{current.class} at #{current.location} does not end after child node #{child.class} at #{child.location}" + } end end end diff --git a/test/prism/result/source_location_test.rb b/test/prism/result/source_location_test.rb index 38b971d02b8321..993150f58127d8 100644 --- a/test/prism/result/source_location_test.rb +++ b/test/prism/result/source_location_test.rb @@ -935,16 +935,16 @@ def assert_location(kind, source, expected = 0...source.length, **options) node = yield node if block_given? if expected.begin == 0 - assert_equal 0, node.location.start_column + assert_equal 0, node.location.start_column, "#{kind} start_column" end if expected.end == source.length - assert_equal source.split("\n").last.length, node.location.end_column + assert_equal source.split("\n").last.length, node.location.end_column, "#{kind} end_column" end assert_kind_of kind, node - assert_equal expected.begin, node.location.start_offset - assert_equal expected.end, node.location.end_offset + assert_equal expected.begin, node.location.start_offset, "#{kind} start_offset" + assert_equal expected.end, node.location.end_offset, "#{kind} end_offset" end end end diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb index c8d259135f47cc..a89a9503b98fd4 100644 --- a/test/prism/ruby/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -136,7 +136,7 @@ def test_lexer assert_equal(expected, lexer.parse[0].to_a) assert_equal(lexer.parse[0].to_a, lexer.scan[0].to_a) - assert_equal(%i[on_int on_sp on_op], Translation::Ripper::Lexer.new("1 +").lex.map(&:event)) + assert_equal(%i[on_int on_sp on_op], Translation::Ripper::Lexer.new("1 +").lex.map { |token| token[1] }) assert_raise(SyntaxError) { Translation::Ripper::Lexer.new("1 +").lex(raise_errors: true) } end @@ -169,13 +169,13 @@ def assert_ripper_lex(source) # Prism emits tokens by their order in the code, not in parse order ripper.sort_by! { |elem| elem[0] } - [prism.size, ripper.size].max.times do |i| - expected = ripper[i] - actual = prism[i] + [prism.size, ripper.size].max.times do |index| + expected = ripper[index] + actual = prism[index] - # Since tokens related to heredocs are not emitted in the same order, - # the state also doesn't line up. - if expected && actual && expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end + # There are some tokens that have slightly different state that do not + # effect the parse tree, so they may not match. + if expected && actual && expected[1] == actual[1] && %i[on_comment on_heredoc_end on_embexpr_end on_sp].include?(expected[1]) expected[3] = actual[3] = nil end diff --git a/test/prism/ruby/source_test.rb b/test/prism/ruby/source_test.rb index afd2825765f7e0..f7cf4fe83a2ec3 100644 --- a/test/prism/ruby/source_test.rb +++ b/test/prism/ruby/source_test.rb @@ -4,44 +4,48 @@ module Prism class SourceTest < TestCase - def test_line_to_byte_offset - parse_result = Prism.parse(<<~SRC) + def test_byte_offset + source = Prism.parse(<<~SRC).source abcd efgh ijkl SRC - source = parse_result.source - - assert_equal 0, source.line_to_byte_offset(1) - assert_equal 5, source.line_to_byte_offset(2) - assert_equal 10, source.line_to_byte_offset(3) - assert_equal 15, source.line_to_byte_offset(4) - e = assert_raise(ArgumentError) { source.line_to_byte_offset(5) } - assert_equal "line 5 is out of range", e.message - e = assert_raise(ArgumentError) { source.line_to_byte_offset(0) } - assert_equal "line 0 is out of range", e.message - e = assert_raise(ArgumentError) { source.line_to_byte_offset(-1) } - assert_equal "line -1 is out of range", e.message + + assert_equal 0, source.byte_offset(1, 0) + assert_equal 5, source.byte_offset(2, 0) + assert_equal 10, source.byte_offset(3, 0) + assert_equal 15, source.byte_offset(4, 0) + + error = assert_raise(ArgumentError) { source.byte_offset(5, 0) } + assert_equal "line 5 is out of range", error.message + + error = assert_raise(ArgumentError) { source.byte_offset(0, 0) } + assert_equal "line 0 is out of range", error.message + + error = assert_raise(ArgumentError) { source.byte_offset(-1, 0) } + assert_equal "line -1 is out of range", error.message end - def test_line_to_byte_offset_with_start_line - parse_result = Prism.parse(<<~SRC, line: 11) + def test_byte_offset_with_start_line + source = Prism.parse(<<~SRC, line: 11).source abcd efgh ijkl SRC - source = parse_result.source - - assert_equal 0, source.line_to_byte_offset(11) - assert_equal 5, source.line_to_byte_offset(12) - assert_equal 10, source.line_to_byte_offset(13) - assert_equal 15, source.line_to_byte_offset(14) - e = assert_raise(ArgumentError) { source.line_to_byte_offset(15) } - assert_equal "line 15 is out of range", e.message - e = assert_raise(ArgumentError) { source.line_to_byte_offset(10) } - assert_equal "line 10 is out of range", e.message - e = assert_raise(ArgumentError) { source.line_to_byte_offset(9) } - assert_equal "line 9 is out of range", e.message + + assert_equal 0, source.byte_offset(11, 0) + assert_equal 5, source.byte_offset(12, 0) + assert_equal 10, source.byte_offset(13, 0) + assert_equal 15, source.byte_offset(14, 0) + + error = assert_raise(ArgumentError) { source.byte_offset(15, 0) } + assert_equal "line 15 is out of range", error.message + + error = assert_raise(ArgumentError) { source.byte_offset(10, 0) } + assert_equal "line 10 is out of range", error.message + + error = assert_raise(ArgumentError) { source.byte_offset(9, 0) } + assert_equal "line 9 is out of range", error.message end end end diff --git a/test/ruby/test_ractor.rb b/test/ruby/test_ractor.rb index 6ae511217aca09..a60fb58c5237a1 100644 --- a/test/ruby/test_ractor.rb +++ b/test/ruby/test_ractor.rb @@ -213,6 +213,91 @@ def test_ifunc_proc_not_shareable assert_unshareable(pr, /not supported yet/, exception: RuntimeError) end + def test_error_includes_ivar + obj = Class.new do + def initialize + @unshareable = -> {} + end + end.new + assert_unshareable(obj, /from instance variable @unshareable of an instance of # {}], /from Array element at index 1/) + end + + def test_error_includes_hash_key_and_value + assert_unshareable({ unshareable: -> {} }, /from Hash value at key :unshareable/) + end + + def test_error_includes_hash_unshareable_key + assert_unshareable({ -> {} => true }, /from Hash key # {}) + assert_unshareable(s, /from member :member of an instance of TestRactor::S/) + end + + def test_error_includes_block_self + pr = -> {} + assert_unshareable(pr, /from block's self \(an instance of #{self.class.name}\)/) + end + + def test_error_wraps_freeze_error + obj = Class.new do + undef_method :freeze + end.new + e = assert_unshareable(obj, /raised calling #freeze/, exception: Ractor::Error) + assert_equal NoMethodError, e.cause.class + assert_equal :freeze, e.cause.name + end + + def test_error_for_module_instance_variable + assert_ractor(<<~'RUBY') + h = Hash.new {}.freeze + mod = Module.new do + attr_reader :unshareable + @unshareable = h + end + mod.extend(mod) + e = Ractor.new(mod) do |mod| + mod.unshareable + rescue + $! + end.value + assert_kind_of Ractor::IsolationError, e + assert_match(/from Hash default proc/, e.message) + RUBY + end + + def test_error_for_module_constant + assert_ractor(<<~'RUBY') + module ModuleWithUnshareableConstant + UNSHAREABLE = Hash.new {}.freeze + end + + e = Ractor.new do + ModuleWithUnshareableConstant::UNSHAREABLE + rescue + $! + end.value + assert_kind_of(Ractor::IsolationError, e) + assert_match(/from Hash default proc/, e.message) + RUBY + end + def assert_make_shareable(obj) refute Ractor.shareable?(obj), "object was already shareable" Ractor.make_shareable(obj) @@ -221,9 +306,10 @@ def assert_make_shareable(obj) def assert_unshareable(obj, msg=nil, exception: Ractor::IsolationError) refute Ractor.shareable?(obj), "object is already shareable" - assert_raise_with_message(exception, msg) do + e = assert_raise_with_message(exception, msg) do Ractor.make_shareable(obj) end refute Ractor.shareable?(obj), "despite raising, object became shareable" + e end end diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index 2066610cb27be2..7b068e9898fc80 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -1096,16 +1096,68 @@ def test }, call_threshold: 2 end - def test_invokesuper_to_cfunc - assert_compiles '["MyArray", 3]', %q{ - class MyArray < Array + def test_invokesuper_to_cfunc_no_args + assert_compiles '["MyString", 3]', %q{ + class MyString < String def length - ["MyArray", super] + ["MyString", super] end end def test - MyArray.new([1, 2, 3]).length + MyString.new("abc").length + end + + test # profile invokesuper + test # compile + run compiled code + }, call_threshold: 2 + end + + def test_invokesuper_to_cfunc_simple_args + assert_compiles '["MyString", true]', %q{ + class MyString < String + def include?(other) + ["MyString", super(other)] + end + end + + def test + MyString.new("abc").include?("bc") + end + + test # profile invokesuper + test # compile + run compiled code + }, call_threshold: 2 + end + + + def test_invokesuper_to_cfunc_with_optional_arg + assert_compiles '["MyString", 6]', %q{ + class MyString < String + def byteindex(needle, offset = 0) + ["MyString", super(needle, offset)] + end + end + + def test + MyString.new("hello world").byteindex("world") + end + + test # profile invokesuper + test # compile + run compiled code + }, call_threshold: 2 + end + + def test_invokesuper_to_cfunc_varargs + assert_compiles '["MyString", true]', %q{ + class MyString < String + def end_with?(str) + ["MyString", super(str)] + end + end + + def test + MyString.new("abc").end_with?("bc") end test # profile invokesuper @@ -1496,6 +1548,46 @@ def test = Child.new.foo(1) }, call_threshold: 2 end + def test_invokesuperforward + assert_compiles '[1, 2, 3]', %q{ + class A + def foo(a,b,c) = [a,b,c] + end + + class B < A + def foo(...) = super + end + + def test + B.new.foo(1, 2, 3) + end + + test + test + }, call_threshold: 2 + end + + def test_invokesuperforward_with_args_kwargs_and_block + assert_compiles '[[1, 2], {x: 3}, 4]', %q{ + class A + def foo(*args, **kwargs, &block) + [args, kwargs, block&.call] + end + end + + class B < A + def foo(...) = super + end + + def test + B.new.foo(1, 2, x: 3) { 4 } + end + + test + test + }, call_threshold: 2 + end + def test_send_with_non_constant_keyword_default assert_compiles '[[2, 4, 16], [10, 4, 16], [2, 20, 16], [2, 4, 30], [10, 20, 30]]', %q{ def dbl(x = 1) = x * 2 diff --git a/timev.rb b/timev.rb index cf8a88e64eff0c..005c3d481a0ebf 100644 --- a/timev.rb +++ b/timev.rb @@ -170,8 +170,8 @@ # # First, what's elsewhere. Class +Time+: # -# - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. -# - Includes {module Comparable}[rdoc-ref:Comparable@What-27s+Here]. +# - Inherits from {class Object}[rdoc-ref:Object@Whats+Here]. +# - Includes {module Comparable}[rdoc-ref:Comparable@Whats+Here]. # # Here, class +Time+ provides methods that are useful for: # diff --git a/variable.c b/variable.c index ff8d24d78aef6c..ecca8b89fd25bd 100644 --- a/variable.c +++ b/variable.c @@ -1444,11 +1444,13 @@ rb_ivar_lookup(VALUE obj, ID id, VALUE undef) VALUE val = rb_ivar_lookup(RCLASS_WRITABLE_FIELDS_OBJ(obj), id, undef); if (val != undef && rb_is_instance_id(id) && - UNLIKELY(!rb_ractor_main_p()) && - !rb_ractor_shareable_p(val)) { - rb_raise(rb_eRactorIsolationError, - "can not get unshareable values from instance variables of classes/modules from non-main Ractors (%"PRIsVALUE" from %"PRIsVALUE")", - rb_id2str(id), obj); + UNLIKELY(!rb_ractor_main_p())) { + VALUE chain = Qnil; + if (!rb_ractor_shareable_p_continue(val, &chain)) { + rb_raise(rb_eRactorIsolationError, + "can not get unshareable values from instance variables of classes/modules from non-main Ractors (%"PRIsVALUE" from %"PRIsVALUE")%"PRIsVALUE, + rb_id2str(id), obj, chain); + } } return val; } @@ -3356,8 +3358,11 @@ rb_const_get_0(VALUE klass, ID id, int exclude, int recurse, int visibility) VALUE c = rb_const_search(klass, id, exclude, recurse, visibility, &found_in); if (!UNDEF_P(c)) { if (UNLIKELY(!rb_ractor_main_p())) { - if (!rb_ractor_shareable_p(c)) { - rb_raise(rb_eRactorIsolationError, "can not access non-shareable objects in constant %"PRIsVALUE"::%"PRIsVALUE" by non-main Ractor.", rb_class_path(found_in), rb_id2str(id)); + VALUE chain = Qnil; + if (!rb_ractor_shareable_p_continue(c, &chain)) { + rb_raise(rb_eRactorIsolationError, + "can not access non-shareable objects in constant %"PRIsVALUE"::%"PRIsVALUE" by non-main Ractor.%"PRIsVALUE, + rb_class_path(found_in), rb_id2str(id), chain); } } return c; diff --git a/vm.c b/vm.c index 2cae6779d9cbc1..eb9b719fcc27b5 100644 --- a/vm.c +++ b/vm.c @@ -1568,6 +1568,18 @@ rb_proc_isolate(VALUE self) VALUE rb_proc_ractor_make_shareable(VALUE self, VALUE replace_self) +{ + VALUE chain = Qnil; + if (!rb_proc_ractor_make_shareable_continue(self, replace_self, &chain)) { + rb_raise(rb_eRactorIsolationError, + "Proc's self is not shareable: %" PRIsVALUE "%"PRIsVALUE, + self, chain); + } + return self; +} + +bool +rb_proc_ractor_make_shareable_continue(VALUE self, VALUE replace_self, VALUE *chain) { const rb_iseq_t *iseq = vm_proc_iseq(self); @@ -1580,10 +1592,14 @@ rb_proc_ractor_make_shareable(VALUE self, VALUE replace_self) if (proc->block.type != block_type_iseq) rb_raise(rb_eRuntimeError, "not supported yet"); - if (!rb_ractor_shareable_p(vm_block_self(&proc->block))) { - rb_raise(rb_eRactorIsolationError, - "Proc's self is not shareable: %" PRIsVALUE, - self); + VALUE block_self = vm_block_self(&proc->block); + if (!RB_SPECIAL_CONST_P(block_self) && + !RB_OBJ_SHAREABLE_P(block_self)) { + if (!rb_ractor_shareable_p_continue(block_self, chain)) { + rb_ractor_error_chain_append(chain, "\n from block's self (an instance of %"PRIsVALUE")", + rb_class_real(CLASS_OF(block_self))); + return false; + } } VALUE read_only_variables = Qfalse; @@ -1601,15 +1617,15 @@ rb_proc_ractor_make_shareable(VALUE self, VALUE replace_self) if (block->type != block_type_symbol) rb_raise(rb_eRuntimeError, "not supported yet"); VALUE proc_self = vm_block_self(block); - if (!rb_ractor_shareable_p(proc_self)) { - rb_raise(rb_eRactorIsolationError, - "Proc's self is not shareable: %" PRIsVALUE, - self); + if (!rb_ractor_shareable_p_continue(proc_self, chain)) { + rb_ractor_error_chain_append(chain, "\n from proc's self (an instance of %"PRIsVALUE")", + rb_class_real(CLASS_OF(proc_self))); + return false; } } RB_OBJ_SET_FROZEN_SHAREABLE(self); - return self; + return true; } VALUE diff --git a/vm_core.h b/vm_core.h index 68adc5eac16f32..31f0bc0e6078ca 100644 --- a/vm_core.h +++ b/vm_core.h @@ -1297,6 +1297,8 @@ VALUE rb_proc_isolate_bang(VALUE self, VALUE replace_self); VALUE rb_proc_ractor_make_shareable(VALUE proc, VALUE replace_self); RUBY_SYMBOL_EXPORT_END +bool rb_proc_ractor_make_shareable_continue(VALUE self, VALUE replace_self, VALUE *chain); + typedef struct { VALUE flags; /* imemo header */ rb_iseq_t *iseq; diff --git a/vm_eval.c b/vm_eval.c index 652fc4d85feac2..cf01b4a62b37da 100644 --- a/vm_eval.c +++ b/vm_eval.c @@ -1776,7 +1776,7 @@ pm_eval_make_iseq(VALUE src, VALUE fname, int line, /* We need to duplicate the string because the Ruby string may * be embedded so compaction could move the string and the pointer * will change. */ - char *name_dup = xmalloc(length + 1); + char *name_dup = malloc(length + 1); // FIXME: using raw `malloc` because that is what Prism uses. strlcpy(name_dup, name, length + 1); RB_GC_GUARD(name_obj); diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 9cb163e97f2d36..d772ad05ec53af 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -1143,9 +1143,11 @@ vm_get_ev_const(rb_execution_context_t *ec, VALUE orig_klass, ID id, bool allow_ } else { if (UNLIKELY(!rb_ractor_main_p())) { - if (!rb_ractor_shareable_p(val)) { + VALUE chain = Qnil; + if (!rb_ractor_shareable_p_continue(val, &chain)) { rb_raise(rb_eRactorIsolationError, - "can not access non-shareable objects in constant %"PRIsVALUE"::%"PRIsVALUE" by non-main ractor.", rb_class_path(klass), rb_id2str(id)); + "can not access non-shareable objects in constant %"PRIsVALUE"::%"PRIsVALUE" by non-main Ractor.%"PRIsVALUE, + rb_class_path(klass), rb_id2str(id), chain); } } return val; @@ -6050,7 +6052,7 @@ vm_define_method(const rb_execution_context_t *ec, VALUE obj, ID id, VALUE iseqv // * If it's VM_BLOCK_HANDLER_NONE, return nil // * If it's an ISEQ or an IFUNC, fetch it from its rb_captured_block // * If it's a PROC or SYMBOL, return it as is -static VALUE +VALUE rb_vm_untag_block_handler(VALUE block_handler) { if (VM_BLOCK_HANDLER_NONE == block_handler) return Qnil; diff --git a/win32/configure.bat b/win32/configure.bat index 9355caa4d852da..d4e010ac4b0374 100755 --- a/win32/configure.bat +++ b/win32/configure.bat @@ -1,7 +1,6 @@ @echo off @setlocal EnableExtensions DisableDelayedExpansion || exit /b -1 set PROMPT=$E[94m+$E[m$S -set witharg= if "%~dp0" == "%CD%\" ( echo don't run in win32 directory. @@ -19,180 +18,141 @@ call set "WIN32DIR=%%WIN32DIR:%~x0:/:=:/:%%" call set "WIN32DIR=%%WIN32DIR:/%~n0:/:=:/:%%" set "WIN32DIR=%WIN32DIR:~0,-3%" -set XINCFLAGS= -set XLDFLAGS= +set configure=%~0 +set optdirs= set pathlist= set config_make=confargs~%RANDOM%.mak -set confargs=%config_make:.mak=.c% +set confargs=%config_make:.mak=.sub% +set debug_configure= echo>%config_make% # CONFIGURE -( - echo #define $ $$ // - echo !ifndef CONFIGURE_ARGS - echo #define CONFIGURE_ARGS \ -) >%confargs% +type nul > %confargs% :loop -set opt=%1 -if "%1" == "" goto :end -if "%1" == "--debug-configure" (echo on & shift & goto :loop) -if "%1" == "--no-debug-configure" (echo off & shift & goto :loop) -if "%1" == "--prefix" goto :prefix -if "%1" == "--srcdir" goto :srcdir -if "%1" == "srcdir" goto :srcdir -if "%1" == "--target" goto :target -if "%1" == "target" goto :target -if "%1" == "--with-static-linked-ext" goto :extstatic -if "%1" == "--program-prefix" goto :pprefix -if "%1" == "--program-suffix" goto :suffix -if "%1" == "--program-transform-name" goto :transform_name -if "%1" == "--program-name" goto :installname -if "%1" == "--install-name" goto :installname -if "%1" == "--so-name" goto :soname -if "%1" == "--enable-install-doc" goto :enable-rdoc -if "%1" == "--disable-install-doc" goto :disable-rdoc -if "%1" == "--enable-install-static-library" goto :enable-lib -if "%1" == "--disable-install-static-library" goto :disable-lib -if "%1" == "--enable-debug-env" goto :enable-debug-env -if "%1" == "--disable-debug-env" goto :disable-debug-env -if "%1" == "--enable-devel" goto :enable-devel -if "%1" == "--disable-devel" goto :disable-devel -if "%1" == "--enable-rubygems" goto :enable-rubygems -if "%1" == "--disable-rubygems" goto :disable-rubygems -if "%1" == "--extout" goto :extout -if "%1" == "--path" goto :path -if "%1" == "--with-baseruby" goto :baseruby -if "%1" == "--without-baseruby" goto :nobaseruby -if "%1" == "--with-ntver" goto :ntver -if "%1" == "--with-libdir" goto :libdir -if "%1" == "--with-git" goto :git -if "%1" == "--without-git" goto :nogit -if "%1" == "--without-ext" goto :witharg -if "%1" == "--without-extensions" goto :witharg -if "%1" == "--with-opt-dir" goto :opt-dir -if "%1" == "--with-gmp" goto :gmp -if "%1" == "--with-gmp-dir" goto :gmp-dir -if "%opt:~0,10%" == "--without-" goto :withoutarg -if "%opt:~0,7%" == "--with-" goto :witharg -if "%1" == "-h" goto :help -if "%1" == "--help" goto :help +if [%1] == [] goto :end ; +if "%~1" == "" (shift & goto :loop) +for /f "delims== tokens=1,*" %%I in ("%~1") do ((set "opt=%%I") && (set "arg=%%J")) + set "eq==" + if "%arg%" == "" if not "%~1" == "%opt%=%arg%" (set "eq=") + shift + if "%opt%" == "--debug-configure" ( + echo on + set "debug_configure=yes" + goto :loop ; + ) + if "%opt%" == "--no-debug-configure" ( + echo off + set "debug_configure=" + goto :loop ; + ) + if "%opt%" == "--prefix" goto :dir + if "%opt%" == "srcdir" set "opt=--srcdir" + if "%opt%" == "--srcdir" goto :dir + if "%opt%" == "--target" goto :target + if "%opt%" == "target" goto :target + if "%opt:~0,10%" == "--program-" goto :program_name + if "%opt%" == "--install-name" (set "var=RUBY_INSTALL_NAME" & goto :name) + if "%opt%" == "--so-name" (set "var=RUBY_SO_NAME" & goto :name) + if "%opt%" == "--extout" goto :extout + if "%opt%" == "--path" goto :path + if "%opt:~0,9%" == "--enable-" (set "enable=yes" & goto :enable) + if "%opt:~0,10%" == "--disable-" (set "enable=no" & goto :enable) + if "%opt:~0,10%" == "--without-" goto :withoutarg + if "%opt:~0,7%" == "--with-" goto :witharg + if "%opt%" == "-h" goto :help + if "%opt%" == "--help" goto :help if "%opt:~0,1%" == "-" ( - echo>>%confargs% %1 \ - set witharg= - ) else if "%witharg%" == "" ( - echo>>%confargs% %1 \ + goto :unknown_opt + ) + if "%eq%" == "=" ( + set "var=%opt%" + goto :name + ) + set "arg=%opt%" + set "eq==" + set "opt=--target" + set "target=%arg%" +:loopend + if not "%arg%" == "" ( + echo>>%confargs% "%opt%=%arg:$=$$%" \ ) else ( - echo>>%confargs% ,%1\ + echo>>%confargs% "%opt%%eq%" \ ) - shift -goto :loop ; -:srcdir - echo>> %config_make% srcdir = %~2 - echo>>%confargs% --srcdir=%2 \ - shift - shift -goto :loop ; -:prefix - echo>> %config_make% prefix = %~2 - echo>>%confargs% %1=%2 \ - shift - shift -goto :loop ; -:pprefix - echo>> %config_make% PROGRAM_PREFIX = %~2 - echo>>%confargs% %1=%2 \ - shift - shift -goto :loop ; -:suffix - echo>> %config_make% PROGRAM_SUFFIX = %~2 - echo>>%confargs% %1=%2 \ - shift - shift -goto :loop ; -:installname - echo>> %config_make% RUBY_INSTALL_NAME = %~2 - echo>>%confargs% %1=%2 \ - shift - shift -goto :loop ; -:soname - echo>> %config_make% RUBY_SO_NAME = %~2 - echo>>%confargs% %1=%2 \ - shift - shift -goto :loop ; -:transform_name - - shift - shift goto :loop ; :target - echo>> %config_make% target = %~2 - echo>>%confargs% --target=%2 \ - if "%~2" == "x64-mswin64" ( - echo>> %config_make% TARGET_OS = mswin64 + if "%eq%" == "" (set "arg=%~1" & shift) + if "%arg%" == "" ( + echo 1>&2 %configure%: missing argument for %opt% + exit /b 1 ) - shift - shift -goto :loop ; -:extstatic - echo>> %config_make% EXTSTATIC = static - echo>>%confargs% %1 \ - shift -goto :loop ; -:enable-rdoc - echo>> %config_make% RDOCTARGET = rdoc - echo>>%confargs% %1 \ - shift -goto :loop ; -:disable-rdoc - echo>> %config_make% RDOCTARGET = nodoc - echo>>%confargs% %1 \ - shift -goto :loop ; -:enable-lib - echo>> %config_make% INSTALL_STATIC_LIBRARY = yes - echo>>%confargs% %1 \ - shift -goto :loop ; -:disable-lib - echo>> %config_make% INSTALL_STATIC_LIBRARY = no - echo>>%confargs% %1 \ - shift -goto :loop ; -:enable-debug-env - echo>> %config_make% ENABLE_DEBUG_ENV = yes - echo>>%confargs% %1 \ - shift -goto :loop ; -:disable-debug-env - echo>> %config_make% ENABLE_DEBUG_ENV = no - echo>>%confargs% %1 \ - shift + set "target=%arg%" + set "opt=--target" + echo>>%confargs% "--target=%arg:$=$$%" \ goto :loop ; -:enable-devel - echo>> %config_make% RUBY_DEVEL = yes - echo>>%confargs% %1 \ - shift -goto :loop ; -:disable-devel - echo>> %config_make% RUBY_DEVEL = no - echo>>%confargs% %1 \ - shift +:program_name + if "%eq%" == "" (set "arg=%~1" & shift) + for /f "delims=- tokens=1,*" %I in ("%opt%") do set "var=%%J" + if "%var%" == "prefix" (set "var=PROGRAM_PREFIX" & goto :name) + if "%var%" == "suffix" (set "var=PROGRAM_SUFFIX" & goto :name) + if "%var%" == "name" (set "var=RUBY_INSTALL_NAME" & goto :name) + if "%var%" == "transform-name" ( + echo.1>&2 %configure%: --program-transform-name option is not supported + exit /b 1 + ) +goto :unknown_opt +:name + if "%eq%" == "" (set "arg=%~1" & shift) + echo>> %config_make% %var% = %arg% +goto :loopend ; +:dir + if "%eq%" == "" (set "arg=%~1" & shift) + echo>> %config_make% %opt:~2% = %arg:\=/% +goto :loopend ; +:enable + echo>>%confargs% "%opt%" \ + if %enable% == yes (set "opt=%opt:~9%") else (set "opt=%opt:~10%") + if "%opt%" == "rdoc" ( + echo>> %config_make% RDOCTARGET = %enable:yes=r%doc + ) + if "%opt%" == "install-static-library" ( + echo>> %config_make% INSTALL_STATIC_LIBRARY = %enable% + ) + if "%opt%" == "debug-env" ( + echo>> %config_make% ENABLE_DEBUG_ENV = %enable% + ) + if "%opt%" == "devel" ( + echo>> %config_make% RUBY_DEVEL = %enable% + ) + if "%opt%" == "rubygems" ( + echo>> %config_make% USE_RUBYGEMS = %enable% + ) goto :loop ; -:enable-rubygems - echo>> %config_make% USE_RUBYGEMS = yes - echo>>%confargs% %1 \ - shift +:withoutarg + echo>>%confargs% "%opt%" \ + if "%opt%" == "--without-baseruby" goto :nobaseruby + if "%opt%" == "--without-git" goto :nogit + if "%opt%" == "--without-ext" goto :witharg + if "%opt%" == "--without-extensions" goto :witharg goto :loop ; -:disable-rubygems - echo>> %config_make% USE_RUBYGEMS = no - echo>>%confargs% %1 \ - shift +:witharg + if "%opt%" == "--with-static-linked-ext" goto :extstatic + if "%eq%" == "" (set "arg=%~1" & shift) + if not "%arg%" == "" ( + echo>>%confargs% "%opt%=%arg:$=$$%" \ + ) else ( + echo>>%confargs% "%opt%%eq%" \ + ) + if "%opt%" == "--with-baseruby" goto :baseruby + if "%opt%" == "--with-ntver" goto :ntver + if "%opt%" == "--with-libdir" goto :libdir + if "%opt%" == "--with-git" goto :git + if "%opt%" == "--with-opt-dir" goto :opt-dir + if "%opt%" == "--with-gmp-dir" goto :opt-dir + if "%opt%" == "--with-gmp" goto :gmp + if "%opt%" == "--with-destdir" goto :destdir goto :loop ; :ntver ::- For version constants, see ::- https://learn.microsoft.com/en-us/cpp/porting/modifying-winver-and-win32-winnt#remarks - set NTVER=%~2 + if "%eq%" == "" (set "NTVER=%~1" & shift) else (set "NTVER=%arg%") if /i not "%NTVER:~0,2%" == "0x" if /i not "%NTVER:~0,13%" == "_WIN32_WINNT_" ( for %%i in (A B C D E F G H I J K L M N O P Q R S T U V W X Y Z) do ( call set NTVER=%%NTVER:%%i=%%i%% @@ -200,83 +160,60 @@ goto :loop ; call set NTVER=_WIN32_WINNT_%%NTVER%% ) echo>> %config_make% NTVER = %NTVER% - echo>>%confargs% %1=%2 \ - shift - shift -goto :loop ; +goto :loopend ; :extout - if not "%~2" == ".ext" (echo>> %config_make% EXTOUT = %~2) - echo>>%confargs% %1=%2 \ - shift - shift -goto :loop ; + if "%eq%" == "" (set "arg=%~1" & shift) + if not "%arg%" == ".ext" (echo>> %config_make% EXTOUT = %arg%) +goto :loopend ; :path - set pathlist=%pathlist%%~2; - echo>>%confargs% %1=%2 \ - shift - shift -goto :loop ; + if "%eq%" == "" (set "arg=%~1" & shift) + set "pathlist=%pathlist%%arg:\=/%;" +goto :loopend ; +:extstatic + if "%eq%" == "" (set "arg=static" & shift) + echo>> %config_make% EXTSTATIC = %arg% +goto :loopend ; :baseruby echo>> %config_make% HAVE_BASERUBY = yes - echo>> %config_make% BASERUBY = %~2 - echo>>%confargs% %1=%2 \ - shift - shift + echo>> %config_make% BASERUBY = %arg% goto :loop ; :nobaseruby echo>> %config_make% HAVE_BASERUBY = no echo>> %config_make% BASERUBY = - echo>>%confargs% %1 \ - shift goto :loop ; :libdir - echo>> %config_make% libdir_basename = %~2 - echo>>%confargs% %1=%2 \ - shift - shift + echo>> %config_make% libdir_basename = %arg% goto :loop ; :git - echo>> %config_make% GIT = %~2 - echo>>%confargs% %1=%2 \ - shift - shift + echo>> %config_make% GIT = %arg% goto :loop ; :nogit echo>> %config_make% GIT = never-use echo>> %config_make% HAVE_GIT = no - echo>>%confargs% %1 \ - shift goto :loop ; :gmp echo>> %config_make% WITH_GMP = yes - echo>>%confargs% %1 \ - shift goto :loop ; -:gmp-dir +:destdir + echo>> %config_make% DESTDIR = %arg% +goto :loop ; :opt-dir - set opt=%~2 - for %%I in (%opt:;= %) do ( - set d=%%I - call pushd %%d:/=\%% && ( - call set XINCFLAGS=%%XINCFLAGS%% -I%%CD:\=/%%/include - call set XLDFLAGS=%%XLDFLAGS%% -libpath:%%CD:\=/%%/lib + if "%arg%" == "" ( + echo 1>&2 %configure%: missing argument for %opt% + exit /b 1 + ) + :optdir-loop + for /f "delims=; tokens=1,*" %%I in ("%arg%") do (set "d=%%I" & set "arg=%%J") + pushd %d:/=\% && ( + set "optdirs=%optdirs%;%CD:\=/%" popd ) - ) -:witharg - echo>>%confargs% %1=%2\ - set witharg=1 - shift - shift -goto :loop ; -:withoutarg - echo>>%confargs% %1 \ - shift + if not "%arg%" == "" goto :optdir-loop goto :loop ; :help echo Configuration: echo --help display this help - echo --srcdir=DIR find the sources in DIR [configure dir or `..'] + echo --srcdir=DIR find the sources in DIR [configure dir or '..'] echo Installation directories: echo --prefix=PREFIX install files in PREFIX [/usr] echo System types: @@ -286,36 +223,50 @@ goto :loop ; echo --with-static-linked-ext link external modules statically echo --with-ext="a,b,..." use extensions a, b, ... echo --without-ext="a,b,..." ignore extensions a, b, ... - echo --with-opt-dir="DIR-LIST" add optional headers and libraries directories separated by `;' + echo --with-opt-dir="DIR-LIST" add optional headers and libraries directories separated by ';' echo --disable-install-doc do not install rdoc indexes during install echo --with-ntver=0xXXXX target NT version (shouldn't use with old SDK) echo --with-ntver=_WIN32_WINNT_XXXX echo --with-ntver=XXXX same as --with-ntver=_WIN32_WINNT_XXXX - echo Note that `,' and `;' need to be enclosed within double quotes in batch file command line. + echo Note that '=,;' need to be enclosed within double quotes in batch file command line. del %confargs% %config_make% -goto :exit +goto :EOF +:unknown_opt + ( + echo %configure%: unknown option %opt% + echo Try --help option. + ) 1>&2 + exit /b 1 :end +if "%debug_configure%" == "yes" (type %confargs%) +if not "%optdirs%" == "" (echo>>%config_make% optdirs = %optdirs:~1%) ( - echo // - echo configure_args = CONFIGURE_ARGS - echo !endif - echo #undef $ -) >> %confargs% -( - cl -EP %confargs% 2>nul | findstr "! =" echo. - if NOT "%XINCFLAGS%" == "" echo XINCFLAGS = %XINCFLAGS% - if NOT "%XLDFLAGS%" == "" echo XLDFLAGS = %XLDFLAGS% - if NOT "%pathlist%" == "" ( + echo configure_args = \ + type %confargs% + echo # configure_args + + echo. + echo !if "$(optdirs)" != "" + for %%I in ("$(optdirs:\=/)" "$(optdirs:/;=;)") do @echo optdirs = %%~I + echo XINCFLAGS = -I"$(optdirs:;=/include" -I")/include" + echo XLDFLAGS = -libpath:"$(optdirs:;=/lib" -libpath:")/lib" + echo !endif + + if not "%pathlist%" == "" ( + echo. call echo PATH = %%pathlist:;=/bin;%%$^(PATH^) call echo INCLUDE = %%pathlist:;=/include;%%$^(INCLUDE^) call echo LIB = %%pathlist:;=/lib;%%$^(LIB^) ) ) >> %config_make% -del %confargs% > nul + +del %confargs% +if "%debug_configure%" == "yes" (type %config_make%) nmake -al -f %WIN32DIR%/setup.mak "WIN32DIR=%WIN32DIR%" ^ config_make=%config_make% ^ - MAKEFILE=Makefile.new MAKEFILE_BACK=Makefile.old MAKEFILE_NEW=Makefile -:exit -@endlocal + MAKEFILE=Makefile.new MAKEFILE_BACK=Makefile.old MAKEFILE_NEW=Makefile ^ + %target% +set error=%ERRORLEVEL% +if exist %config_make% del /q %config_make% diff --git a/win32/enc-setup.mak b/win32/enc-setup.mak index 44345a2f45d6ed..b012161e3256cc 100644 --- a/win32/enc-setup.mak +++ b/win32/enc-setup.mak @@ -4,7 +4,7 @@ BUILTIN_ENCOBJS BUILTIN_TRANSOBJS: $(srcdir)/enc/Makefile.in !include $(srcdir)/enc/Makefile.in BUILTIN_ENCOBJS: - @echo BUILTIN_ENCOBJS = $(BUILTIN_ENCS:.c=.obj) >> $(MAKEFILE) + @echo>> $(MAKEFILE) BUILTIN_ENCOBJS = $(BUILTIN_ENCS:.c=.obj) BUILTIN_TRANSOBJS: - @echo BUILTIN_TRANSOBJS = $(BUILTIN_TRANSES:.trans=.obj) >> $(MAKEFILE) + @echo>> $(MAKEFILE) BUILTIN_TRANSOBJS = $(BUILTIN_TRANSES:.trans=.obj) diff --git a/win32/setup.mak b/win32/setup.mak index 6fc28ebafbc996..19ace3445c2568 100644 --- a/win32/setup.mak +++ b/win32/setup.mak @@ -22,7 +22,7 @@ MAKE = $(MAKE) -f $(MAKEFILE) MAKEFILE = Makefile !endif CPU = PROCESSOR_LEVEL -CC = $(CC) -nologo -source-charset:utf-8 +CC = $(CC) -nologo CPP = $(CC) -EP !if "$(HAVE_BASERUBY)" != "no" && "$(BASERUBY)" == "" BASERUBY = ruby @@ -35,13 +35,14 @@ i586-mswin32: -prologue- -i586- -epilogue- i686-mswin32: -prologue- -i686- -epilogue- alpha-mswin32: -prologue- -alpha- -epilogue- x64-mswin64: -prologue- -x64- -epilogue- +arm64-mswin64: -prologue- -arm64- -epilogue- -prologue-: -basic-vars- -baseruby- -gmp- -generic-: -osname- -basic-vars-: nul @rem <<$(MAKEFILE) -### Makefile for ruby $(TARGET_OS) ### +### Makefile for ruby ### MAKE = nmake srcdir = $(srcdir:\=/) prefix = $(prefix:\=/) @@ -70,20 +71,31 @@ int main(void) {mpz_init(x); return 0;} @echo # TARGET>>$(MAKEFILE) -osname32-: -osname-section- - @echo TARGET_OS = mswin32>>$(MAKEFILE) + @$(CPP) -Tc <<"checking if target OS is 32bit" >>$(MAKEFILE) +#ifdef _WIN64 +#error +#else +TARGET_OS = mswin32 +#endif +<< -osname64-: -osname-section- - @echo TARGET_OS = mswin64>>$(MAKEFILE) + @$(CPP) -Tc <<"checking if target OS is 64bit" >>$(MAKEFILE) +#ifndef _WIN64 +#error +#else +TARGET_OS = mswin64 +#endif +<< -osname-: -osname-section- - @echo !ifndef TARGET_OS>>$(MAKEFILE) - @($(CC) -c < nul && (echo TARGET_OS = mswin32) || (echo TARGET_OS = mswin64)) >>$(MAKEFILE) + @$(CPP) -Tc <<"checking for target OS" 2>nul | findstr = >>$(MAKEFILE) #ifdef _WIN64 -#error +TARGET_OS = mswin64 +#else +TARGET_OS = mswin32 #endif << - @echo !endif>>$(MAKEFILE) - @$(WIN32DIR:/=\)\rm.bat conftest.* -compiler-: -compiler-section- -version- -runtime- -headers- @@ -211,27 +223,54 @@ del %0 & exit << -generic-: nul - @$(CPP) <nul | findstr = >>$(MAKEFILE) + @$(CPP) -Tc <nul | findstr = >>$(MAKEFILE) #if defined _M_ARM64 MACHINE = arm64 #elif defined _M_X64 MACHINE = x64 #else MACHINE = x86 -#endif -<< !if defined($(CPU)) - @echo>>$(MAKEFILE) $(CPU) = $(PROCESSOR_LEVEL) +$(CPU) = $(PROCESSOR_LEVEL) !endif +#endif -alpha-: -osname32- - @echo MACHINE = alpha>>$(MAKEFILE) + @$(CPP) -Tc <<"checking if compiler is for $(@:-=)" >>$(MAKEFILE) +#ifndef _M_ALPHA +#error Not compiler for $(@:-=) +#else +MACHINE = $(@:-=) +#endif +<< + -x64-: -osname64- - @echo MACHINE = x64>>$(MAKEFILE) + @$(CPP) -Tc <<"checking if compiler is for $(@:-=)" >>$(MAKEFILE) +#ifndef _M_AMD64 +#error Not compiler for $(@:-=) +#else +MACHINE = $(@:-=) +#endif +<< + -ix86-: -osname32- - @echo MACHINE = x86>>$(MAKEFILE) + @$(CPP) -Tc <<"checking if compiler is for $(@:-=)" >>$(MAKEFILE) +#ifndef _M_IX86 +#error Not compiler for $(@:-=) +#else +#define ix86 x86 +MACHINE = $(@:-=) +#endif +<< + -arm64-: -osname64- - @echo MACHINE = arm64>>$(MAKEFILE) + @$(CPP) -Tc <<"checking if compiler is for $(@:-=)" >>$(MAKEFILE) +#ifndef _M_ARM64 +#error Not compiler for $(@:-=) +#else +MACHINE = $(@:-=) +#endif +<< -i386-: -ix86- @echo $(CPU) = 3>>$(MAKEFILE) @@ -261,7 +300,7 @@ MACHINE = x86 # XLDFLAGS = # RFLAGS = -r # EXTLIBS = -CC = $(CC) +CC = $(CC) -source-charset:utf-8 !if "$(AS)" != "ml64" AS = $(AS) -nologo !endif @@ -277,5 +316,5 @@ $(BANG)include $$(srcdir)/win32/Makefile.sub << @$(COMSPEC) /C $(srcdir:/=\)\win32\rm.bat config.h config.status -@move /y $(MAKEFILE_NEW) $(MAKEFILE_BACK) > nul 2> nul - @ren $(MAKEFILE) $(MAKEFILE_NEW) + @move /y $(MAKEFILE) $(MAKEFILE_NEW) > nul @echo type 'nmake' to make ruby. diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 56994388a3a4a1..9216802a3c1919 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -917,38 +917,39 @@ pub const YARVINSN_trace_setlocal_WC_0: ruby_vminsn_type = 214; pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 215; pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 216; pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 217; -pub const YARVINSN_zjit_getinstancevariable: ruby_vminsn_type = 218; -pub const YARVINSN_zjit_setinstancevariable: ruby_vminsn_type = 219; -pub const YARVINSN_zjit_definedivar: ruby_vminsn_type = 220; -pub const YARVINSN_zjit_send: ruby_vminsn_type = 221; -pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 222; -pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 223; -pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 224; -pub const YARVINSN_zjit_invokesuper: ruby_vminsn_type = 225; -pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 226; -pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 227; -pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 228; -pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 229; -pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 230; -pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 231; -pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 232; -pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 233; -pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 234; -pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 235; -pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 236; -pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 237; -pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 238; -pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 239; -pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 240; -pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 241; -pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 242; -pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 243; -pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 244; -pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 245; -pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 246; -pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 247; -pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 248; -pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 249; +pub const YARVINSN_zjit_getblockparamproxy: ruby_vminsn_type = 218; +pub const YARVINSN_zjit_getinstancevariable: ruby_vminsn_type = 219; +pub const YARVINSN_zjit_setinstancevariable: ruby_vminsn_type = 220; +pub const YARVINSN_zjit_definedivar: ruby_vminsn_type = 221; +pub const YARVINSN_zjit_send: ruby_vminsn_type = 222; +pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 223; +pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 224; +pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 225; +pub const YARVINSN_zjit_invokesuper: ruby_vminsn_type = 226; +pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 227; +pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 228; +pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 229; +pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 230; +pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 231; +pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 232; +pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 233; +pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 234; +pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 235; +pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 236; +pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 237; +pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 238; +pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 239; +pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 240; +pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 241; +pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 242; +pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 243; +pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 244; +pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 245; +pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 246; +pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 247; +pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 248; +pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 249; +pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 250; pub type ruby_vminsn_type = u32; pub type rb_iseq_callback = ::std::option::Option< unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void), diff --git a/zjit.c b/zjit.c index 9560d88130b03f..0c463334cde42a 100644 --- a/zjit.c +++ b/zjit.c @@ -300,6 +300,7 @@ rb_zjit_class_has_default_allocator(VALUE klass) } +VALUE rb_vm_untag_block_handler(VALUE block_handler); VALUE rb_vm_get_untagged_block_handler(rb_control_frame_t *reg_cfp); void diff --git a/zjit.rb b/zjit.rb index 0bd6c1b96d36d0..0cc9ca8261a274 100644 --- a/zjit.rb +++ b/zjit.rb @@ -191,6 +191,7 @@ def stats_string print_counters_with_prefix(prefix: 'getivar_fallback_', prompt: 'getivar fallback reasons', buf:, stats:, limit: 5) print_counters_with_prefix(prefix: 'definedivar_fallback_', prompt: 'definedivar fallback reasons', buf:, stats:, limit: 5) print_counters_with_prefix(prefix: 'invokeblock_handler_', prompt: 'invokeblock handler', buf:, stats:, limit: 10) + print_counters_with_prefix(prefix: 'getblockparamproxy_handler_', prompt: 'getblockparamproxy handler', buf:, stats:, limit: 10) # Show most popular unsupported call features. Because each call can # use multiple complex features, a decrease in this number does not diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs index 794293d1d321c7..d71e75c444af69 100644 --- a/zjit/bindgen/src/main.rs +++ b/zjit/bindgen/src/main.rs @@ -409,6 +409,7 @@ fn main() { .allowlist_function("rb_str_neq_internal") .allowlist_function("rb_yarv_ary_entry_internal") .allowlist_function("rb_vm_get_untagged_block_handler") + .allowlist_function("rb_vm_untag_block_handler") .allowlist_function("rb_FL_TEST") .allowlist_function("rb_FL_TEST_RAW") .allowlist_function("rb_RB_TYPE_P") diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index d06e84536f6345..a1836ea9dfb3a4 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -390,7 +390,7 @@ impl Assembler { } let mut asm_local = Assembler::new_with_asm(&self); - let live_ranges: Vec = take(&mut self.live_ranges); + let live_ranges = take(&mut self.live_ranges); let mut iterator = self.instruction_iterator(); let asm = &mut asm_local; @@ -755,7 +755,7 @@ impl Assembler { asm_local.accept_scratch_reg = true; asm_local.stack_base_idx = self.stack_base_idx; asm_local.label_names = self.label_names.clone(); - asm_local.live_ranges.resize(self.live_ranges.len(), LiveRange { start: None, end: None }); + asm_local.live_ranges = LiveRanges::new(self.live_ranges.len()); // Create one giant block to linearize everything into asm_local.new_block_without_id(); @@ -1691,7 +1691,7 @@ impl Assembler { /// /// If a, b, and c are all registers. fn merge_three_reg_mov( - live_ranges: &[LiveRange], + live_ranges: &LiveRanges, iterator: &mut InsnIter, asm: &mut Assembler, left: &Opnd, @@ -1717,7 +1717,6 @@ mod tests { use super::*; use insta::assert_snapshot; - use crate::hir; static TEMP_REGS: [Reg; 5] = [X1_REG, X9_REG, X10_REG, X14_REG, X15_REG]; @@ -1753,7 +1752,7 @@ mod tests { asm.cret(val64); asm.frame_teardown(JIT_PRESERVED_REGS); - assert_disasm_snapshot!(lir_string(&mut asm), @r" + assert_disasm_snapshot!(lir_string(&mut asm), @" bb0: # bb0(): foo@/tmp/a.rb:1 FrameSetup 1, x19, x21, x20 @@ -1766,6 +1765,7 @@ mod tests { Je bb0 CRet v0 FrameTeardown x19, x21, x20 + PadPatchPoint "); } diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index f2f7bc61659d04..b2ec95a9d4a84c 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -21,18 +21,33 @@ use crate::state::rb_zjit_record_exit_stack; #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug, PartialOrd, Ord)] pub struct BlockId(pub usize); +#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug, PartialOrd, Ord)] +pub struct VRegId(pub usize); + impl From for usize { fn from(val: BlockId) -> Self { val.0 } } +impl From for usize { + fn from(val: VRegId) -> Self { + val.0 + } +} + impl std::fmt::Display for BlockId { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "l{}", self.0) } } +impl std::fmt::Display for VRegId { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "v{}", self.0) + } +} + /// Dummy HIR block ID used when creating test or invalid LIR blocks const DUMMY_HIR_BLOCK_ID: usize = usize::MAX; /// Dummy RPO index used when creating test or invalid LIR blocks @@ -131,7 +146,7 @@ pub enum MemBase /// Register: Every Opnd::Mem should have MemBase::Reg as of emit. Reg(u8), /// Virtual register: Lowered to MemBase::Reg or MemBase::Stack in alloc_regs. - VReg(usize), + VReg(VRegId), /// Stack slot: Lowered to MemBase::Reg in scratch_split. Stack { stack_idx: usize, num_bits: u8 }, } @@ -158,7 +173,7 @@ impl fmt::Display for Mem { write!(f, "[")?; match self.base { MemBase::Reg(reg_no) => write!(f, "{}", mem_base_reg(reg_no))?, - MemBase::VReg(idx) => write!(f, "v{idx}")?, + MemBase::VReg(idx) => write!(f, "{idx}")?, MemBase::Stack { stack_idx, num_bits } if num_bits == 64 => write!(f, "Stack[{stack_idx}]")?, MemBase::Stack { stack_idx, num_bits } => write!(f, "Stack{num_bits}[{stack_idx}]")?, } @@ -196,7 +211,7 @@ pub enum Opnd Value(VALUE), /// Virtual register. Lowered to Reg or Mem in Assembler::alloc_regs(). - VReg{ idx: usize, num_bits: u8 }, + VReg{ idx: VRegId, num_bits: u8 }, // Low-level operands, for lowering Imm(i64), // Raw signed immediate @@ -212,8 +227,8 @@ impl fmt::Display for Opnd { None => write!(f, "None"), Value(VALUE(value)) if *value < 10 => write!(f, "Value({value:x})"), Value(VALUE(value)) => write!(f, "Value(0x{value:x})"), - VReg { idx, num_bits } if *num_bits == 64 => write!(f, "v{idx}"), - VReg { idx, num_bits } => write!(f, "VReg{num_bits}(v{idx})"), + VReg { idx, num_bits } if *num_bits == 64 => write!(f, "{idx}"), + VReg { idx, num_bits } => write!(f, "VReg{num_bits}({idx})"), Imm(value) if value.abs() < 10 => write!(f, "Imm({value:x})"), Imm(value) => write!(f, "Imm(0x{value:x})"), UImm(value) if *value < 10 => write!(f, "{value:x}"), @@ -282,7 +297,7 @@ impl Opnd } /// Unwrap the index of a VReg - pub fn vreg_idx(&self) -> usize { + pub fn vreg_idx(&self) -> VRegId { match self { Opnd::VReg { idx, .. } => *idx, _ => unreachable!("trying to unwrap {self:?} into VReg"), @@ -321,10 +336,10 @@ impl Opnd pub fn map_index(self, indices: &[usize]) -> Opnd { match self { Opnd::VReg { idx, num_bits } => { - Opnd::VReg { idx: indices[idx], num_bits } + Opnd::VReg { idx: VRegId(indices[idx.0]), num_bits } } Opnd::Mem(Mem { base: MemBase::VReg(idx), disp, num_bits }) => { - Opnd::Mem(Mem { base: MemBase::VReg(indices[idx]), disp, num_bits }) + Opnd::Mem(Mem { base: MemBase::VReg(VRegId(indices[idx.0])), disp, num_bits }) }, _ => self } @@ -1355,12 +1370,44 @@ impl LiveRange { } } +/// Type-safe wrapper around `Vec` that can be indexed by VRegId +#[derive(Clone, Debug, Default)] +pub struct LiveRanges(Vec); + +impl LiveRanges { + pub fn new(size: usize) -> Self { + Self(vec![LiveRange { start: None, end: None }; size]) + } + + pub fn len(&self) -> usize { + self.0.len() + } + + pub fn get(&self, vreg_id: VRegId) -> Option<&LiveRange> { + self.0.get(vreg_id.0) + } +} + +impl std::ops::Index for LiveRanges { + type Output = LiveRange; + + fn index(&self, idx: VRegId) -> &Self::Output { + &self.0[idx.0] + } +} + +impl std::ops::IndexMut for LiveRanges { + fn index_mut(&mut self, idx: VRegId) -> &mut Self::Output { + &mut self.0[idx.0] + } +} + /// StackState manages which stack slots are used by which VReg pub struct StackState { /// The maximum number of spilled VRegs at a time stack_size: usize, /// Map from index at the C stack for spilled VRegs to Some(vreg_idx) if allocated - stack_slots: Vec>, + stack_slots: Vec>, /// Copy of Assembler::stack_base_idx. Used for calculating stack slot offsets. stack_base_idx: usize, } @@ -1376,7 +1423,7 @@ impl StackState { } /// Allocate a stack slot for a given vreg_idx - fn alloc_stack(&mut self, vreg_idx: usize) -> Opnd { + fn alloc_stack(&mut self, vreg_idx: VRegId) -> Opnd { for stack_idx in 0..self.stack_size { if self.stack_slots[stack_idx].is_none() { self.stack_slots[stack_idx] = Some(vreg_idx); @@ -1437,7 +1484,7 @@ struct RegisterPool { /// Some(vreg_idx) if the register at the index in `pool` is used by the VReg. /// None if the register is not in use. - pool: Vec>, + pool: Vec>, /// The number of live registers. /// Provides a quick way to query `pool.filter(|r| r.is_some()).count()` @@ -1461,7 +1508,7 @@ impl RegisterPool { /// Mutate the pool to indicate that the register at the index /// has been allocated and is live. - fn alloc_opnd(&mut self, vreg_idx: usize) -> Opnd { + fn alloc_opnd(&mut self, vreg_idx: VRegId) -> Opnd { for (reg_idx, reg) in self.regs.iter().enumerate() { if self.pool[reg_idx].is_none() { self.pool[reg_idx] = Some(vreg_idx); @@ -1473,7 +1520,7 @@ impl RegisterPool { } /// Allocate a specific register - fn take_reg(&mut self, reg: &Reg, vreg_idx: usize) -> Opnd { + fn take_reg(&mut self, reg: &Reg, vreg_idx: VRegId) -> Opnd { let reg_idx = self.regs.iter().position(|elem| elem.reg_no == reg.reg_no) .unwrap_or_else(|| panic!("Unable to find register: {}", reg.reg_no)); assert_eq!(self.pool[reg_idx], None, "register already allocated for VReg({:?})", self.pool[reg_idx]); @@ -1499,7 +1546,7 @@ impl RegisterPool { } /// Return a list of (Reg, vreg_idx) tuples for all live registers - fn live_regs(&self) -> Vec<(Reg, usize)> { + fn live_regs(&self) -> Vec<(Reg, VRegId)> { let mut live_regs = Vec::with_capacity(self.live_regs); for (reg_idx, ®) in self.regs.iter().enumerate() { if let Some(vreg_idx) = self.pool[reg_idx] { @@ -1510,7 +1557,7 @@ impl RegisterPool { } /// Return vreg_idx if a given register is already in use - fn vreg_for(&self, reg: &Reg) -> Option { + fn vreg_for(&self, reg: &Reg) -> Option { let reg_idx = self.regs.iter().position(|elem| elem.reg_no == reg.reg_no).unwrap(); self.pool[reg_idx] } @@ -1536,7 +1583,7 @@ pub struct Assembler { current_block_id: BlockId, /// Live range for each VReg indexed by its `idx`` - pub(super) live_ranges: Vec, + pub(super) live_ranges: LiveRanges, /// Names of labels pub(super) label_names: Vec, @@ -1568,7 +1615,7 @@ impl Assembler leaf_ccall_stack_size: None, basic_blocks: Vec::default(), current_block_id: BlockId(0), - live_ranges: Vec::default(), + live_ranges: LiveRanges::default(), idx: 0, } } @@ -1602,7 +1649,7 @@ impl Assembler // Initialize live_ranges to match the old assembler's size // This allows reusing VRegs from the old assembler - asm.live_ranges.resize(old_asm.live_ranges.len(), LiveRange { start: None, end: None }); + asm.live_ranges = LiveRanges::new(old_asm.live_ranges.len()); asm } @@ -1698,11 +1745,25 @@ impl Assembler // Emit instructions with labels, expanding branch parameters let mut insns = Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY); - for block in self.sorted_blocks() { + let blocks = self.sorted_blocks(); + let num_blocks = blocks.len(); + + for (block_id, block) in blocks.iter().enumerate() { + // Entry blocks shouldn't ever be preceded by something that can + // stomp on this block. + if !block.is_entry { + insns.push(Insn::PadPatchPoint); + } + // Process each instruction, expanding branch params if needed for insn in &block.insns { self.expand_branch_insn(insn, &mut insns); } + + // Make sure we don't stomp on the next function + if block_id == num_blocks - 1 { + insns.push(Insn::PadPatchPoint); + } } insns } @@ -1780,8 +1841,8 @@ impl Assembler /// Build an Opnd::VReg and initialize its LiveRange pub(super) fn new_vreg(&mut self, num_bits: u8) -> Opnd { - let vreg = Opnd::VReg { idx: self.live_ranges.len(), num_bits }; - self.live_ranges.push(LiveRange { start: None, end: None }); + let vreg = Opnd::VReg { idx: VRegId(self.live_ranges.len()), num_bits }; + self.live_ranges.0.push(LiveRange { start: None, end: None }); vreg } @@ -1794,7 +1855,7 @@ impl Assembler // Initialize the live range of the output VReg to insn_idx..=insn_idx if let Some(Opnd::VReg { idx, .. }) = insn.out_opnd() { - assert!(*idx < self.live_ranges.len()); + assert!(idx.0 < self.live_ranges.len()); assert_eq!(self.live_ranges[*idx], LiveRange { start: None, end: None }); self.live_ranges[*idx] = LiveRange { start: Some(insn_idx), end: Some(insn_idx) }; } @@ -1805,7 +1866,7 @@ impl Assembler match *opnd { Opnd::VReg { idx, .. } | Opnd::Mem(Mem { base: MemBase::VReg(idx), .. }) => { - assert!(idx < self.live_ranges.len()); + assert!(idx.0 < self.live_ranges.len()); assert_ne!(self.live_ranges[idx].end, None); self.live_ranges[idx].end = Some(self.live_ranges[idx].end().max(insn_idx)); } @@ -1894,7 +1955,7 @@ impl Assembler let mut vreg_opnd: Vec> = vec![None; self.live_ranges.len()]; // List of registers saved before a C call, paired with the VReg index. - let mut saved_regs: Vec<(Reg, usize)> = vec![]; + let mut saved_regs: Vec<(Reg, VRegId)> = vec![]; // Remember the indexes of Insn::FrameSetup to update the stack size later let mut frame_setup_idxs: Vec<(BlockId, usize)> = vec![]; @@ -1906,7 +1967,7 @@ impl Assembler let asm = &mut asm_local; - let live_ranges: Vec = take(&mut self.live_ranges); + let live_ranges = take(&mut self.live_ranges); while let Some((index, mut insn)) = iterator.next(asm) { // Remember the index of FrameSetup to bump slot_count when we know the max number of spilled VRegs. @@ -1924,7 +1985,7 @@ impl Assembler let new_opnd = pool.alloc_opnd(vreg_idx); asm.mov(new_opnd, C_RET_OPND); pool.dealloc_opnd(&Opnd::Reg(C_RET_REG)); - vreg_opnd[vreg_idx] = Some(new_opnd); + vreg_opnd[vreg_idx.0] = Some(new_opnd); } true @@ -1943,7 +2004,7 @@ impl Assembler // uses this operand. If it is, we can return the allocated // register to the pool. if live_ranges[idx].end() == index { - if let Some(opnd) = vreg_opnd[idx] { + if let Some(opnd) = vreg_opnd[idx.0] { pool.dealloc_opnd(&opnd); } else { unreachable!("no register allocated for insn {:?}", insn); @@ -1987,7 +2048,7 @@ impl Assembler }; if let Some(vreg_idx) = vreg_idx { if live_ranges[vreg_idx].end() == index { - debug!("Allocating a register for VReg({}) at instruction index {} even though it does not live past this index", vreg_idx, index); + debug!("Allocating a register for {vreg_idx} at instruction index {index} even though it does not live past this index"); } // This is going to be the output operand that we will set on the // instruction. CCall and LiveReg need to use a specific register. @@ -2012,7 +2073,7 @@ impl Assembler if let Some(Opnd::VReg{ idx, .. }) = opnd_iter.next() { if live_ranges[*idx].end() == index { - if let Some(Opnd::Reg(reg)) = vreg_opnd[*idx] { + if let Some(Opnd::Reg(reg)) = vreg_opnd[idx.0] { out_reg = Some(pool.take_reg(®, vreg_idx)); } } @@ -2031,7 +2092,7 @@ impl Assembler // extends beyond the index of the instruction. let out = insn.out_opnd_mut().unwrap(); let out_opnd = out_opnd.with_num_bits(out_num_bits); - vreg_opnd[out.vreg_idx()] = Some(out_opnd); + vreg_opnd[out.vreg_idx().0] = Some(out_opnd); *out = out_opnd; } @@ -2040,10 +2101,10 @@ impl Assembler while let Some(opnd) = opnd_iter.next() { match *opnd { Opnd::VReg { idx, num_bits } => { - *opnd = vreg_opnd[idx].unwrap().with_num_bits(num_bits); + *opnd = vreg_opnd[idx.0].unwrap().with_num_bits(num_bits); }, Opnd::Mem(Mem { base: MemBase::VReg(idx), disp, num_bits }) => { - *opnd = match vreg_opnd[idx].unwrap() { + *opnd = match vreg_opnd[idx.0].unwrap() { Opnd::Reg(reg) => Opnd::Mem(Mem { base: MemBase::Reg(reg.reg_no), disp, num_bits }), // If the base is spilled, lower it to MemBase::Stack, which scratch_split will lower to MemBase::Reg. Opnd::Mem(mem) => Opnd::Mem(Mem { base: pool.stack_state.mem_to_stack_membase(mem), disp, num_bits }), @@ -2058,7 +2119,7 @@ impl Assembler // register if let Some(idx) = vreg_idx { if live_ranges[idx].end() == index { - if let Some(opnd) = vreg_opnd[idx] { + if let Some(opnd) = vreg_opnd[idx.0] { pool.dealloc_opnd(&opnd); } else { unreachable!("no register allocated for insn {:?}", insn); @@ -2164,6 +2225,11 @@ impl Assembler fn compile_exit(asm: &mut Assembler, exit: &SideExit) { let SideExit { pc, stack, locals } = exit; + // Side exit blocks are not part of the CFG at the moment, + // so we need to manually ensure that patchpoints get padded + // so that nobody stomps on us + asm.pad_patch_point(); + asm_comment!(asm, "save cfp->pc"); asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), *pc); @@ -2849,7 +2915,7 @@ impl Assembler { asm_local.accept_scratch_reg = self.accept_scratch_reg; asm_local.stack_base_idx = self.stack_base_idx; asm_local.label_names = self.label_names.clone(); - asm_local.live_ranges.resize(self.live_ranges.len(), LiveRange { start: None, end: None }); + asm_local.live_ranges = LiveRanges::new(self.live_ranges.len()); // Create one giant block to linearize everything into asm_local.new_block_without_id(); diff --git a/zjit/src/backend/tests.rs b/zjit/src/backend/tests.rs index 701029b8ec0c2c..32b6fe9b5ef31e 100644 --- a/zjit/src/backend/tests.rs +++ b/zjit/src/backend/tests.rs @@ -3,7 +3,6 @@ use crate::backend::lir::*; use crate::cruby::*; use crate::codegen::c_callable; use crate::options::rb_zjit_prepare_options; -use crate::hir; #[test] fn test_add() { diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs index a4cf8dfcc5e892..d55dce1b9b82e0 100644 --- a/zjit/src/backend/x86_64/mod.rs +++ b/zjit/src/backend/x86_64/mod.rs @@ -140,7 +140,7 @@ impl Assembler { { let mut asm_local = Assembler::new_with_asm(&self); let asm = &mut asm_local; - let live_ranges: Vec = take(&mut self.live_ranges); + let live_ranges = take(&mut self.live_ranges); let mut iterator = self.instruction_iterator(); while let Some((index, mut insn)) = iterator.next(asm) { @@ -166,7 +166,7 @@ impl Assembler { // When we split an operand, we can create a new VReg not in `live_ranges`. // So when we see a VReg with out-of-range index, it's created from splitting // from the loop above and we know it doesn't outlive the current instruction. - let vreg_outlives_insn = |vreg_idx| { + let vreg_outlives_insn = |vreg_idx: VRegId| { live_ranges .get(vreg_idx) .is_some_and(|live_range: &LiveRange| live_range.end() > index) @@ -472,7 +472,7 @@ impl Assembler { asm_local.accept_scratch_reg = true; asm_local.stack_base_idx = self.stack_base_idx; asm_local.label_names = self.label_names.clone(); - asm_local.live_ranges.resize(self.live_ranges.len(), LiveRange { start: None, end: None }); + asm_local.live_ranges = LiveRanges::new(self.live_ranges.len()); // Create one giant block to linearize everything into asm_local.new_block_without_id(); @@ -1173,7 +1173,7 @@ mod tests { asm.cret(val64); asm.frame_teardown(JIT_PRESERVED_REGS); - assert_disasm_snapshot!(lir_string(&mut asm), @r" + assert_disasm_snapshot!(lir_string(&mut asm), @" bb0: # bb0(): foo@/tmp/a.rb:1 FrameSetup 1, r13, rbx, r12 @@ -1186,6 +1186,7 @@ mod tests { Je bb0 CRet v0 FrameTeardown r13, rbx, r12 + PadPatchPoint "); } diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 8714518866c3b4..2038be808dc633 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -333,6 +333,8 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, func }; gen_if_false(&mut asm, val_opnd, branch_edge, fall_through_edge); + assert!(asm.current_block().insns.last().unwrap().is_terminator()); + asm.set_current_block(fall_through_target); let label = jit.get_label(&mut asm, fall_through_target, block_id); @@ -356,6 +358,8 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, func }; gen_if_true(&mut asm, val_opnd, branch_edge, fall_through_edge); + assert!(asm.current_block().insns.last().unwrap().is_terminator()); + asm.set_current_block(fall_through_target); let label = jit.get_label(&mut asm, fall_through_target, block_id); @@ -368,6 +372,8 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, func args: target.args.iter().map(|insn_id| jit.get_opnd(*insn_id)).collect() }; gen_jump(&mut asm, branch_edge); + assert!(asm.current_block().insns.last().unwrap().is_terminator()); + }, _ => { if let Err(last_snapshot) = gen_insn(cb, &mut jit, &mut asm, function, insn_id, &insn) { @@ -382,8 +388,8 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, func } } } - // Make sure the last patch point has enough space to insert a jump - asm.pad_patch_point(); + // Blocks should always end with control flow + assert!(asm.current_block().insns.last().unwrap().is_terminator()); } // Generate code if everything can be compiled @@ -478,9 +484,10 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::Snapshot { .. } => return Ok(()), // we don't need to do anything for this instruction at the moment &Insn::Send { cd, blockiseq, state, reason, .. } => gen_send(jit, asm, cd, blockiseq, &function.frame_state(state), reason), &Insn::SendForward { cd, blockiseq, state, reason, .. } => gen_send_forward(jit, asm, cd, blockiseq, &function.frame_state(state), reason), + Insn::SendDirect { cme, iseq, recv, args, kw_bits, blockiseq, state, .. } => gen_send_iseq_direct(cb, jit, asm, *cme, *iseq, opnd!(recv), opnds!(args), *kw_bits, &function.frame_state(*state), *blockiseq), &Insn::SendWithoutBlock { cd, state, reason, .. } => gen_send_without_block(jit, asm, cd, &function.frame_state(state), reason), - Insn::SendWithoutBlockDirect { cme, iseq, recv, args, kw_bits, state, .. } => gen_send_iseq_direct(cb, jit, asm, *cme, *iseq, opnd!(recv), opnds!(args), *kw_bits, &function.frame_state(*state), None), &Insn::InvokeSuper { cd, blockiseq, state, reason, .. } => gen_invokesuper(jit, asm, cd, blockiseq, &function.frame_state(state), reason), + &Insn::InvokeSuperForward { cd, blockiseq, state, reason, .. } => gen_invokesuperforward(jit, asm, cd, blockiseq, &function.frame_state(state), reason), &Insn::InvokeBlock { cd, state, reason, .. } => gen_invokeblock(jit, asm, cd, &function.frame_state(state), reason), Insn::InvokeProc { recv, args, state, kw_splat } => gen_invokeproc(jit, asm, opnd!(recv), opnds!(args), *kw_splat, &function.frame_state(*state)), // Ensure we have enough room fit ec, self, and arguments @@ -525,10 +532,12 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::UnboxFixnum { val } => gen_unbox_fixnum(asm, opnd!(val)), Insn::Test { val } => gen_test(asm, opnd!(val)), Insn::RefineType { val, .. } => opnd!(val), + Insn::HasType { val, expected } => gen_has_type(asm, opnd!(val), *expected), Insn::GuardType { val, guard_type, state } => gen_guard_type(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)), Insn::GuardTypeNot { val, guard_type, state } => gen_guard_type_not(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)), &Insn::GuardBitEquals { val, expected, reason, state } => gen_guard_bit_equals(jit, asm, opnd!(val), expected, reason, &function.frame_state(state)), - &Insn::GuardBlockParamProxy { level, state } => no_output!(gen_guard_block_param_proxy(jit, asm, level, &function.frame_state(state))), + &Insn::GuardAnyBitSet { val, mask, reason, state } => gen_guard_any_bit_set(jit, asm, opnd!(val), mask, reason, &function.frame_state(state)), + &Insn::GuardNoBitsSet { val, mask, reason, state } => gen_guard_no_bits_set(jit, asm, opnd!(val), mask, reason, &function.frame_state(state)), Insn::GuardNotFrozen { recv, state } => gen_guard_not_frozen(jit, asm, opnd!(recv), &function.frame_state(*state)), Insn::GuardNotShared { recv, state } => gen_guard_not_shared(jit, asm, opnd!(recv), &function.frame_state(*state)), &Insn::GuardLess { left, right, state } => gen_guard_less(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state)), @@ -579,6 +588,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::GuardShape { val, shape, state } => gen_guard_shape(jit, asm, opnd!(val), shape, &function.frame_state(state)), Insn::LoadPC => gen_load_pc(asm), Insn::LoadEC => gen_load_ec(), + &Insn::GetEP { level } => gen_get_ep(asm, level), Insn::GetLEP => gen_get_lep(jit, asm), Insn::LoadSelf => gen_load_self(), &Insn::LoadField { recv, id, offset, return_type } => gen_load_field(asm, opnd!(recv), id, offset, return_type), @@ -785,26 +795,6 @@ fn gen_getblockparam(jit: &mut JITState, asm: &mut Assembler, ep_offset: u32, le asm.load(Opnd::mem(VALUE_BITS, ep, offset)) } -fn gen_guard_block_param_proxy(jit: &JITState, asm: &mut Assembler, level: u32, state: &FrameState) { - // Bail out if the `&block` local variable has been modified - let ep = gen_get_ep(asm, level); - let flags = Opnd::mem(64, ep, SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32)); - asm.test(flags, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()); - asm.jnz(side_exit(jit, state, SideExitReason::BlockParamProxyModified)); - - // This handles two cases which are nearly identical - // Block handler is a tagged pointer. Look at the tag. - // VM_BH_ISEQ_BLOCK_P(): block_handler & 0x03 == 0x01 - // VM_BH_IFUNC_P(): block_handler & 0x03 == 0x03 - // So to check for either of those cases we can use: val & 0x1 == 0x1 - const _: () = assert!(RUBY_SYMBOL_FLAG & 1 == 0, "guard below rejects symbol block handlers"); - - // Bail ouf if the block handler is neither ISEQ nor ifunc - let block_handler = asm.load(Opnd::mem(64, ep, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)); - asm.test(block_handler, 0x1.into()); - asm.jz(side_exit(jit, state, SideExitReason::BlockParamProxyNotIseqOrIfunc)); -} - fn gen_guard_not_frozen(jit: &JITState, asm: &mut Assembler, recv: Opnd, state: &FrameState) -> Opnd { let recv = asm.load(recv); // It's a heap object, so check the frozen flag @@ -1023,6 +1013,15 @@ fn gen_ccall(asm: &mut Assembler, cfunc: *const u8, name: ID, recv: Opnd, args: asm.ccall(cfunc, cfunc_args) } +// Change cfp->block_code in the current frame. See vm_caller_setup_arg_block(). +// VM_CFP_TO_CAPTURED_BLOCK then turns &cfp->self into a block handler. +// rb_captured_block->code.iseq aliases with cfp->block_code. +fn gen_block_handler_specval(asm: &mut Assembler, blockiseq: IseqPtr) -> lir::Opnd { + asm.store(Opnd::mem(VALUE_BITS, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), VALUE::from(blockiseq).into()); + let cfp_self_addr = asm.lea(Opnd::mem(VALUE_BITS, CFP, RUBY_OFFSET_CFP_SELF)); + asm.or(cfp_self_addr, Opnd::Imm(1)) +} + /// Generate code for a variadic C function call /// func(int argc, VALUE *argv, VALUE recv) fn gen_ccall_variadic( @@ -1052,13 +1051,8 @@ fn gen_ccall_variadic( gen_spill_stack(jit, asm, state); gen_spill_locals(jit, asm, state); - let block_handler_specval = if let Some(block_iseq) = blockiseq { - // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block(). - // VM_CFP_TO_CAPTURED_BLOCK then turns &cfp->self into a block handler. - // rb_captured_block->code.iseq aliases with cfp->block_code. - asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), VALUE::from(block_iseq).into()); - let cfp_self_addr = asm.lea(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)); - asm.or(cfp_self_addr, Opnd::Imm(1)) + let block_handler_specval = if let Some(blockiseq) = blockiseq { + gen_block_handler_specval(asm, blockiseq) } else { VM_BLOCK_HANDLER_NONE.into() }; @@ -1445,7 +1439,7 @@ fn gen_send_iseq_direct( args: Vec, kw_bits: u32, state: &FrameState, - block_handler: Option, + blockiseq: Option, ) -> lir::Opnd { gen_incr_counter(asm, Counter::iseq_optimized_send_count); @@ -1461,6 +1455,12 @@ fn gen_send_iseq_direct( gen_spill_locals(jit, asm, state); gen_spill_stack(jit, asm, state); + // This mirrors vm_caller_setup_arg_block() in for the `blockiseq != NULL` case. + // The HIR specialization guards ensure we will only reach here for literal blocks, + // not &block forwarding, &:foo, etc. Thise are rejected in `type_specialize` by + // `unspecializable_call_type`. + let block_handler = blockiseq.map(|b| gen_block_handler_specval(asm, b)); + let (frame_type, specval) = if VM_METHOD_TYPE_BMETHOD == unsafe { get_cme_def_type(cme) } { // Extract EP from the Proc instance let procv = unsafe { rb_get_def_bmethod_proc((*cme).def) }; @@ -1512,11 +1512,25 @@ fn gen_send_iseq_direct( asm.mov(CFP, new_cfp); asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP); + let params = unsafe { iseq.params() }; + + // For &block, the JIT entrypoint expects the block_handler as an argument + // This HIR param is not actually used, things read from specval from the VM frame today. + // TODO: Remove unused param from HIR, or pass specval through c_args. + // See https://github.com/ruby/ruby/pull/15911#discussion_r2710544982 + let needs_block = params.flags.has_block() != 0; + // Set up arguments - let mut c_args = vec![recv]; + let mut c_args = Vec::with_capacity({ + // This is a heuristic to avoid re-allocation, not necessary for correctness + 1 /* recv */ + args.len() + if needs_block { 1 } else { 0 } + }); + c_args.push(recv); c_args.extend(&args); + if needs_block { + c_args.push(specval); + } - let params = unsafe { iseq.params() }; let num_optionals_passed = if params.flags.has_opt() != 0 { // See vm_call_iseq_setup_normal_opt_start in vm_inshelper.c let lead_num = params.lead_num as u32; @@ -1638,6 +1652,29 @@ fn gen_invokesuper( ) } +/// Compile a dynamic dispatch for `super` with `...` +fn gen_invokesuperforward( + jit: &mut JITState, + asm: &mut Assembler, + cd: *const rb_call_data, + blockiseq: IseqPtr, + state: &FrameState, + reason: SendFallbackReason, +) -> lir::Opnd { + gen_incr_send_fallback_counter(asm, reason); + + gen_prepare_non_leaf_call(jit, asm, state); + asm_comment!(asm, "call super with dynamic dispatch (forwarding)"); + unsafe extern "C" { + fn rb_vm_invokesuperforward(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE; + } + asm_ccall!( + asm, + rb_vm_invokesuperforward, + EC, CFP, Opnd::const_ptr(cd), VALUE::from(blockiseq).into() + ) +} + /// Compile a string resurrection fn gen_string_copy(asm: &mut Assembler, recv: Opnd, chilled: bool, state: &FrameState) -> Opnd { // TODO: split rb_ec_str_resurrect into separate functions @@ -2157,6 +2194,69 @@ fn gen_test(asm: &mut Assembler, val: lir::Opnd) -> lir::Opnd { asm.csel_e(0.into(), 1.into()) } +fn gen_has_type(asm: &mut Assembler, val: lir::Opnd, ty: Type) -> lir::Opnd { + if ty.is_subtype(types::Fixnum) { + asm.test(val, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); + asm.csel_nz(Opnd::Imm(1), Opnd::Imm(0)) + } else if ty.is_subtype(types::Flonum) { + // Flonum: (val & RUBY_FLONUM_MASK) == RUBY_FLONUM_FLAG + let masked = asm.and(val, Opnd::UImm(RUBY_FLONUM_MASK as u64)); + asm.cmp(masked, Opnd::UImm(RUBY_FLONUM_FLAG as u64)); + asm.csel_e(Opnd::Imm(1), Opnd::Imm(0)) + } else if ty.is_subtype(types::StaticSymbol) { + // Static symbols have (val & 0xff) == RUBY_SYMBOL_FLAG + // Use 8-bit comparison like YJIT does. GuardType should not be used + // for a known VALUE, which with_num_bits() does not support. + asm.cmp(val.with_num_bits(8), Opnd::UImm(RUBY_SYMBOL_FLAG as u64)); + asm.csel_e(Opnd::Imm(1), Opnd::Imm(0)) + } else if ty.is_subtype(types::NilClass) { + asm.cmp(val, Qnil.into()); + asm.csel_e(Opnd::Imm(1), Opnd::Imm(0)) + } else if ty.is_subtype(types::TrueClass) { + asm.cmp(val, Qtrue.into()); + asm.csel_e(Opnd::Imm(1), Opnd::Imm(0)) + } else if ty.is_subtype(types::FalseClass) { + asm.cmp(val, Qfalse.into()); + asm.csel_e(Opnd::Imm(1), Opnd::Imm(0)) + } else if ty.is_immediate() { + // All immediate types' guard should have been handled above + panic!("unexpected immediate guard type: {ty}"); + } else if let Some(expected_class) = ty.runtime_exact_ruby_class() { + // If val isn't in a register, load it to use it as the base of Opnd::mem later. + // TODO: Max thinks codegen should not care about the shapes of the operands except to create them. (Shopify/ruby#685) + let val = match val { + Opnd::Reg(_) | Opnd::VReg { .. } => val, + _ => asm.load(val), + }; + + let ret_label = asm.new_label("true"); + let false_label = asm.new_label("false"); + + // Check if it's a special constant + asm.test(val, (RUBY_IMMEDIATE_MASK as u64).into()); + asm.jnz(false_label.clone()); + + // Check if it's false + asm.cmp(val, Qfalse.into()); + asm.je(false_label.clone()); + + // Load the class from the object's klass field + let klass = asm.load(Opnd::mem(64, val, RUBY_OFFSET_RBASIC_KLASS)); + asm.cmp(klass, Opnd::Value(expected_class)); + asm.jmp(ret_label.clone()); + + // If we get here then the value was false, unset the Z flag + // so that csel_e will select false instead of true + asm.write_label(false_label); + asm.test(Opnd::UImm(1), Opnd::UImm(1)); + + asm.write_label(ret_label); + asm.csel_e(Opnd::UImm(1), Opnd::Imm(0)) + } else { + unimplemented!("unsupported type: {ty}"); + } +} + /// Compile a type check with a side exit fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard_type: Type, state: &FrameState) -> lir::Opnd { gen_incr_counter(asm, Counter::guard_type_count); @@ -2290,6 +2390,32 @@ fn gen_guard_bit_equals(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, val } +fn mask_to_opnd(mask: crate::hir::Const) -> Option { + match mask { + crate::hir::Const::CUInt8(v) => Some(Opnd::UImm(v as u64)), + crate::hir::Const::CUInt16(v) => Some(Opnd::UImm(v as u64)), + crate::hir::Const::CUInt32(v) => Some(Opnd::UImm(v as u64)), + crate::hir::Const::CUInt64(v) => Some(Opnd::UImm(v)), + _ => None + } +} + +/// Compile a bitmask check with a side exit if none of the masked bits are not set +fn gen_guard_any_bit_set(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, mask: crate::hir::Const, reason: SideExitReason, state: &FrameState) -> lir::Opnd { + let mask_opnd = mask_to_opnd(mask).unwrap_or_else(|| panic!("gen_guard_any_bit_set: unexpected hir::Const {mask:?}")); + asm.test(val, mask_opnd); + asm.jz(side_exit(jit, state, reason)); + val +} + +/// Compile a bitmask check with a side exit if any of the masked bits are set +fn gen_guard_no_bits_set(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, mask: crate::hir::Const, reason: SideExitReason, state: &FrameState) -> lir::Opnd { + let mask_opnd = mask_to_opnd(mask).unwrap_or_else(|| panic!("gen_guard_no_bits_set: unexpected hir::Const {mask:?}")); + asm.test(val, mask_opnd); + asm.jnz(side_exit(jit, state, reason)); + val +} + /// Generate code that records unoptimized C functions if --zjit-stats is enabled fn gen_incr_counter_ptr(asm: &mut Assembler, counter_ptr: *mut u64) { if get_option!(stats) { @@ -2690,7 +2816,7 @@ fn function_stub_hit_body(cb: &mut CodeBlock, iseq_call: &IseqCallRef) -> Result Ok(jit_entry_ptr) } -/// Compile a stub for an ISEQ called by SendWithoutBlockDirect +/// Compile a stub for an ISEQ called by SendDirect fn gen_function_stub(cb: &mut CodeBlock, iseq_call: IseqCallRef) -> Result { let (mut asm, scratch_reg) = Assembler::new_with_scratch_reg(); asm.new_block_without_id(); diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs index 51faaab9c24658..94b2a443c8b043 100644 --- a/zjit/src/cruby.rs +++ b/zjit/src/cruby.rs @@ -1396,6 +1396,8 @@ pub(crate) mod ids { name: self_ content: b"self" name: rb_ivar_get_at_no_ractor_check name: _shape_id + name: _env_data_index_flags + name: _env_data_index_specval } /// Get an CRuby `ID` to an interned string, e.g. a particular method name. diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index 15533180dad72f..969c5a4c693a73 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -1706,38 +1706,39 @@ pub const YARVINSN_trace_setlocal_WC_0: ruby_vminsn_type = 214; pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 215; pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 216; pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 217; -pub const YARVINSN_zjit_getinstancevariable: ruby_vminsn_type = 218; -pub const YARVINSN_zjit_setinstancevariable: ruby_vminsn_type = 219; -pub const YARVINSN_zjit_definedivar: ruby_vminsn_type = 220; -pub const YARVINSN_zjit_send: ruby_vminsn_type = 221; -pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 222; -pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 223; -pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 224; -pub const YARVINSN_zjit_invokesuper: ruby_vminsn_type = 225; -pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 226; -pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 227; -pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 228; -pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 229; -pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 230; -pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 231; -pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 232; -pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 233; -pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 234; -pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 235; -pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 236; -pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 237; -pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 238; -pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 239; -pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 240; -pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 241; -pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 242; -pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 243; -pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 244; -pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 245; -pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 246; -pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 247; -pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 248; -pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 249; +pub const YARVINSN_zjit_getblockparamproxy: ruby_vminsn_type = 218; +pub const YARVINSN_zjit_getinstancevariable: ruby_vminsn_type = 219; +pub const YARVINSN_zjit_setinstancevariable: ruby_vminsn_type = 220; +pub const YARVINSN_zjit_definedivar: ruby_vminsn_type = 221; +pub const YARVINSN_zjit_send: ruby_vminsn_type = 222; +pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 223; +pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 224; +pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 225; +pub const YARVINSN_zjit_invokesuper: ruby_vminsn_type = 226; +pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 227; +pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 228; +pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 229; +pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 230; +pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 231; +pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 232; +pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 233; +pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 234; +pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 235; +pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 236; +pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 237; +pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 238; +pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 239; +pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 240; +pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 241; +pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 242; +pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 243; +pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 244; +pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 245; +pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 246; +pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 247; +pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 248; +pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 249; +pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 250; pub type ruby_vminsn_type = u32; pub type rb_iseq_callback = ::std::option::Option< unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void), @@ -2081,6 +2082,7 @@ unsafe extern "C" { pub fn rb_zjit_class_initialized_p(klass: VALUE) -> bool; pub fn rb_zjit_class_get_alloc_func(klass: VALUE) -> rb_alloc_func_t; pub fn rb_zjit_class_has_default_allocator(klass: VALUE) -> bool; + pub fn rb_vm_untag_block_handler(block_handler: VALUE) -> VALUE; pub fn rb_vm_get_untagged_block_handler(reg_cfp: *mut rb_control_frame_t) -> VALUE; pub fn rb_zjit_writebarrier_check_immediate(recv: VALUE, val: VALUE); pub fn rb_iseq_encoded_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; diff --git a/zjit/src/distribution.rs b/zjit/src/distribution.rs index 2c6ffb3ae6fff0..9b3920396a13b9 100644 --- a/zjit/src/distribution.rs +++ b/zjit/src/distribution.rs @@ -69,7 +69,7 @@ enum DistributionKind { SkewedMegamorphic, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct DistributionSummary { kind: DistributionKind, buckets: [T; N], @@ -134,6 +134,10 @@ impl Distributi assert!(idx < N, "index {idx} out of bounds for buckets[{N}]"); self.buckets[idx] } + + pub fn buckets(&self) -> &[T] { + &self.buckets + } } #[cfg(test)] diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 2aa74dce8be26c..901beffea02772 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -506,6 +506,7 @@ pub enum SideExitReason { Interrupt, BlockParamProxyModified, BlockParamProxyNotIseqOrIfunc, + BlockParamProxyNotNil, BlockParamWbRequired, StackOverflow, FixnumModByZero, @@ -625,10 +626,11 @@ pub enum SendFallbackReason { SendWithoutBlockNotOptimizedNeedPermission, SendWithoutBlockBopRedefined, SendWithoutBlockOperandsNotFixnum, - SendWithoutBlockDirectKeywordMismatch, - SendWithoutBlockDirectKeywordCountMismatch, - SendWithoutBlockDirectMissingKeyword, - SendWithoutBlockDirectTooManyKeywords, + SendWithoutBlockPolymorphicFallback, + SendDirectKeywordMismatch, + SendDirectKeywordCountMismatch, + SendDirectMissingKeyword, + SendDirectTooManyKeywords, SendPolymorphic, SendMegamorphic, SendNoProfiles, @@ -686,10 +688,11 @@ impl Display for SendFallbackReason { SendNotOptimizedNeedPermission => write!(f, "Send: method private or protected and no FCALL"), SendWithoutBlockBopRedefined => write!(f, "SendWithoutBlock: basic operation was redefined"), SendWithoutBlockOperandsNotFixnum => write!(f, "SendWithoutBlock: operands are not fixnums"), - SendWithoutBlockDirectKeywordMismatch => write!(f, "SendWithoutBlockDirect: keyword mismatch"), - SendWithoutBlockDirectKeywordCountMismatch => write!(f, "SendWithoutBlockDirect: keyword count mismatch"), - SendWithoutBlockDirectMissingKeyword => write!(f, "SendWithoutBlockDirect: missing keyword"), - SendWithoutBlockDirectTooManyKeywords => write!(f, "SendWithoutBlockDirect: too many keywords for fixnum bitmask"), + SendWithoutBlockPolymorphicFallback => write!(f, "SendWithoutBlock: polymorphic fallback"), + SendDirectKeywordMismatch => write!(f, "SendDirect: keyword mismatch"), + SendDirectKeywordCountMismatch => write!(f, "SendDirect: keyword count mismatch"), + SendDirectMissingKeyword => write!(f, "SendDirect: missing keyword"), + SendDirectTooManyKeywords => write!(f, "SendDirect: too many keywords for fixnum bitmask"), SendPolymorphic => write!(f, "Send: polymorphic call site"), SendMegamorphic => write!(f, "Send: megamorphic call site"), SendNoProfiles => write!(f, "Send: no profile data available"), @@ -855,6 +858,9 @@ pub enum Insn { /// Set a class variable `id` to `val` SetClassVar { id: ID, val: InsnId, ic: *const iseq_inline_cvar_cache_entry, state: InsnId }, + /// Get the EP at the given level from the current CFP. + GetEP { level: u32 }, + /// Get the EP of the ISeq of the containing method, or "local level", skipping over block-level EPs. /// Equivalent of GET_LEP() macro. GetLEP, @@ -936,6 +942,14 @@ pub enum Insn { state: InsnId, reason: SendFallbackReason, }, + InvokeSuperForward { + recv: InsnId, + cd: *const rb_call_data, + blockiseq: IseqPtr, + args: Vec, + state: InsnId, + reason: SendFallbackReason, + }, InvokeBlock { cd: *const rb_call_data, args: Vec, @@ -951,13 +965,14 @@ pub enum Insn { }, /// Optimized ISEQ call - SendWithoutBlockDirect { + SendDirect { recv: InsnId, cd: *const rb_call_data, cme: *const rb_callable_method_entry_t, iseq: IseqPtr, args: Vec, kw_bits: u32, + blockiseq: Option, state: InsnId, }, @@ -1003,17 +1018,20 @@ pub enum Insn { /// Refine the known type information of with additional type information. /// Computes the intersection of the existing type and the new type. RefineType { val: InsnId, new_type: Type }, + /// Return CBool[true] if val has type Type and CBool[false] otherwise. + HasType { val: InsnId, expected: Type }, /// Side-exit if val doesn't have the expected type. GuardType { val: InsnId, guard_type: Type, state: InsnId }, GuardTypeNot { val: InsnId, guard_type: Type, state: InsnId }, /// Side-exit if val is not the expected Const. GuardBitEquals { val: InsnId, expected: Const, reason: SideExitReason, state: InsnId }, + /// Side-exit if (val & mask) == 0 + GuardAnyBitSet { val: InsnId, mask: Const, reason: SideExitReason, state: InsnId }, + /// Side-exit if (val & mask) != 0 + GuardNoBitsSet { val: InsnId, mask: Const, reason: SideExitReason, state: InsnId }, /// Side-exit if val doesn't have the expected shape. GuardShape { val: InsnId, shape: ShapeId, state: InsnId }, - /// Side-exit if the block param has been modified or the block handler for the frame - /// is neither ISEQ nor ifunc, which makes it incompatible with rb_block_param_proxy. - GuardBlockParamProxy { level: u32, state: InsnId }, /// Side-exit if val is frozen. Does *not* check if the val is an immediate; assumes that it is /// a heap object. GuardNotFrozen { recv: InsnId, state: InsnId }, @@ -1057,7 +1075,7 @@ impl Insn { | Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::SetClassVar { .. } | Insn::ArrayExtend { .. } | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetGlobal { .. } | Insn::SetLocal { .. } | Insn::Throw { .. } | Insn::IncrCounter(_) | Insn::IncrCounterPtr { .. } - | Insn::CheckInterrupts { .. } | Insn::GuardBlockParamProxy { .. } | Insn::GuardSuperMethodEntry { .. } + | Insn::CheckInterrupts { .. } | Insn::GuardSuperMethodEntry { .. } | Insn::StoreField { .. } | Insn::WriteBarrier { .. } | Insn::HashAset { .. } | Insn::ArrayAset { .. } => false, _ => true, @@ -1145,6 +1163,7 @@ impl Insn { Insn::DefinedIvar { .. } => effects::Any, Insn::LoadPC { .. } => Effect::read_write(abstract_heaps::PC, abstract_heaps::Empty), Insn::LoadEC { .. } => effects::Empty, + Insn::GetEP { .. } => effects::Empty, Insn::GetLEP { .. } => effects::Empty, Insn::LoadSelf { .. } => Effect::read_write(abstract_heaps::Frame, abstract_heaps::Empty), Insn::LoadField { .. } => Effect::read_write(abstract_heaps::Other, abstract_heaps::Empty), @@ -1183,8 +1202,9 @@ impl Insn { Insn::Send { .. } => effects::Any, Insn::SendForward { .. } => effects::Any, Insn::InvokeSuper { .. } => effects::Any, + Insn::InvokeSuperForward { .. } => effects::Any, Insn::InvokeBlock { .. } => effects::Any, - Insn::SendWithoutBlockDirect { .. } => effects::Any, + Insn::SendDirect { .. } => effects::Any, Insn::InvokeBuiltin { .. } => effects::Any, Insn::EntryPoint { .. } => effects::Any, Insn::Return { .. } => effects::Any, @@ -1210,8 +1230,9 @@ impl Insn { Insn::GuardType { .. } => effects::Any, Insn::GuardTypeNot { .. } => effects::Any, Insn::GuardBitEquals { .. } => effects::Any, + Insn::GuardAnyBitSet { .. } => effects::Any, + Insn::GuardNoBitsSet { .. } => effects::Any, Insn::GuardShape { .. } => effects::Any, - Insn::GuardBlockParamProxy { .. } => effects::Any, Insn::GuardNotFrozen { .. } => effects::Any, Insn::GuardNotShared { .. } => effects::Any, Insn::GuardGreaterEq { .. } => effects::Any, @@ -1225,6 +1246,7 @@ impl Insn { Insn::CheckInterrupts { .. } => effects::Any, Insn::InvokeProc { .. } => effects::Any, Insn::RefineType { .. } => effects::Empty, + Insn::HasType { .. } => effects::Empty, } } @@ -1275,6 +1297,20 @@ fn get_local_var_name_for_printer(iseq: Option, level: u32, ep_offset: Some(format!(":{}", id.contents_lossy())) } +/// Construct a qualified method name for display/debug output. +/// Returns strings like "Array#length" for instance methods or "Foo.bar" for singleton methods. +fn qualified_method_name(class: VALUE, method_id: ID) -> String { + let method_name = method_id.contents_lossy(); + // rb_zjit_singleton_class_p also checks if it's a class + if unsafe { rb_zjit_singleton_class_p(class) } { + let class_name = get_class_name(unsafe { rb_class_attached_object(class) }); + format!("{class_name}.{method_name}") + } else { + let class_name = get_class_name(class); + format!("{class_name}#{method_name}") + } +} + static REGEXP_FLAGS: &[(u32, &str)] = &[ (ONIG_OPTION_MULTILINE, "MULTILINE"), (ONIG_OPTION_IGNORECASE, "IGNORECASE"), @@ -1437,8 +1473,8 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { write!(f, " # SendFallbackReason: {reason}")?; Ok(()) } - Insn::SendWithoutBlockDirect { recv, cd, iseq, args, .. } => { - write!(f, "SendWithoutBlockDirect {recv}, :{} ({:?})", ruby_call_method_name(*cd), self.ptr_map.map_ptr(iseq))?; + Insn::SendDirect { recv, cd, iseq, args, blockiseq, .. } => { + write!(f, "SendDirect {recv}, {:p}, :{} ({:?})", self.ptr_map.map_ptr(blockiseq), ruby_call_method_name(*cd), self.ptr_map.map_ptr(iseq))?; for arg in args { write!(f, ", {arg}")?; } @@ -1471,6 +1507,14 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { write!(f, " # SendFallbackReason: {reason}")?; Ok(()) } + Insn::InvokeSuperForward { recv, blockiseq, args, reason, .. } => { + write!(f, "InvokeSuperForward {recv}, {:p}", self.ptr_map.map_ptr(blockiseq))?; + for arg in args { + write!(f, ", {arg}")?; + } + write!(f, " # SendFallbackReason: {reason}")?; + Ok(()) + } Insn::InvokeBlock { args, reason, .. } => { write!(f, "InvokeBlock")?; for arg in args { @@ -1521,10 +1565,12 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::FixnumRShift { left, right, .. } => { write!(f, "FixnumRShift {left}, {right}") }, Insn::GuardType { val, guard_type, .. } => { write!(f, "GuardType {val}, {}", guard_type.print(self.ptr_map)) }, Insn::RefineType { val, new_type, .. } => { write!(f, "RefineType {val}, {}", new_type.print(self.ptr_map)) }, + Insn::HasType { val, expected, .. } => { write!(f, "HasType {val}, {}", expected.print(self.ptr_map)) }, Insn::GuardTypeNot { val, guard_type, .. } => { write!(f, "GuardTypeNot {val}, {}", guard_type.print(self.ptr_map)) }, Insn::GuardBitEquals { val, expected, .. } => { write!(f, "GuardBitEquals {val}, {}", expected.print(self.ptr_map)) }, + Insn::GuardAnyBitSet { val, mask, .. } => { write!(f, "GuardAnyBitSet {val}, {}", mask.print(self.ptr_map)) }, + Insn::GuardNoBitsSet { val, mask, .. } => { write!(f, "GuardNoBitsSet {val}, {}", mask.print(self.ptr_map)) }, &Insn::GuardShape { val, shape, .. } => { write!(f, "GuardShape {val}, {:p}", self.ptr_map.map_shape(shape)) }, - Insn::GuardBlockParamProxy { level, .. } => write!(f, "GuardBlockParamProxy l{level}"), Insn::GuardNotFrozen { recv, .. } => write!(f, "GuardNotFrozen {recv}"), Insn::GuardNotShared { recv, .. } => write!(f, "GuardNotShared {recv}"), Insn::GuardLess { left, right, .. } => write!(f, "GuardLess {left}, {right}"), @@ -1586,6 +1632,7 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::GetIvar { self_val, id, .. } => write!(f, "GetIvar {self_val}, :{}", id.contents_lossy()), Insn::LoadPC => write!(f, "LoadPC"), Insn::LoadEC => write!(f, "LoadEC"), + &Insn::GetEP { level } => write!(f, "GetEP {level}"), Insn::GetLEP => write!(f, "GetLEP"), Insn::LoadSelf => write!(f, "LoadSelf"), &Insn::LoadField { recv, id, offset, return_type: _ } => write!(f, "LoadField {recv}, :{}@{:p}", id.contents_lossy(), self.ptr_map.map_offset(offset)), @@ -1821,7 +1868,8 @@ pub enum ValidationError { MiscValidationError(InsnId, String), } -fn can_direct_send(function: &mut Function, block: BlockId, iseq: *const rb_iseq_t, ci: *const rb_callinfo, send_insn: InsnId, args: &[InsnId]) -> bool { +/// Check if we can do a direct send to the given iseq with the given args. +fn can_direct_send(function: &mut Function, block: BlockId, iseq: *const rb_iseq_t, ci: *const rb_callinfo, send_insn: InsnId, args: &[InsnId], blockiseq: Option) -> bool { let mut can_send = true; let mut count_failure = |counter| { can_send = false; @@ -1829,10 +1877,14 @@ fn can_direct_send(function: &mut Function, block: BlockId, iseq: *const rb_iseq }; let params = unsafe { iseq.params() }; + let caller_has_literal_block: bool = blockiseq.is_some(); + let callee_has_block_param = 0 != params.flags.has_block(); + use Counter::*; if 0 != params.flags.has_rest() { count_failure(complex_arg_pass_param_rest) } if 0 != params.flags.has_post() { count_failure(complex_arg_pass_param_post) } - if 0 != params.flags.has_block() { count_failure(complex_arg_pass_param_block) } + if callee_has_block_param && !caller_has_literal_block + { count_failure(complex_arg_pass_param_block) } if 0 != params.flags.forwardable() { count_failure(complex_arg_pass_param_forwardable) } if 0 != params.flags.has_kwrest() { count_failure(complex_arg_pass_param_kwrest) } @@ -1867,7 +1919,11 @@ fn can_direct_send(function: &mut Function, block: BlockId, iseq: *const rb_iseq let kwarg = unsafe { rb_vm_ci_kwarg(ci) }; let caller_kw_count = if kwarg.is_null() { 0 } else { (unsafe { get_cikw_keyword_len(kwarg) }) as usize }; let caller_positional = args.len() - caller_kw_count; - let final_argc = caller_positional + kw_total_num as usize; + // Right now, the JIT entrypoint accepts the block as an param + // We may remove it, remove the block_arg addition to match + // See: https://github.com/ruby/ruby/pull/15911#discussion_r2710544982 + let block_arg = if 0 != params.flags.has_block() { 1 } else { 0 }; + let final_argc = caller_positional + kw_total_num as usize + block_arg; if final_argc + 1 > C_ARG_OPNDS.len() { // +1 for self function.set_dynamic_send_reason(send_insn, TooManyArgsForLir); return false; @@ -2160,6 +2216,7 @@ impl Function { | EntryPoint {..} | LoadPC | LoadEC + | GetEP {..} | GetLEP | LoadSelf | IncrCounterPtr {..} @@ -2198,11 +2255,13 @@ impl Function { &IfTrue { val, ref target } => IfTrue { val: find!(val), target: find_branch_edge!(target) }, &IfFalse { val, ref target } => IfFalse { val: find!(val), target: find_branch_edge!(target) }, &RefineType { val, new_type } => RefineType { val: find!(val), new_type }, + &HasType { val, expected } => HasType { val: find!(val), expected }, &GuardType { val, guard_type, state } => GuardType { val: find!(val), guard_type, state }, &GuardTypeNot { val, guard_type, state } => GuardTypeNot { val: find!(val), guard_type, state }, &GuardBitEquals { val, expected, reason, state } => GuardBitEquals { val: find!(val), expected, reason, state }, + &GuardAnyBitSet { val, mask, reason, state } => GuardAnyBitSet { val: find!(val), mask, reason, state }, + &GuardNoBitsSet { val, mask, reason, state } => GuardNoBitsSet { val: find!(val), mask, reason, state }, &GuardShape { val, shape, state } => GuardShape { val: find!(val), shape, state }, - &GuardBlockParamProxy { level, state } => GuardBlockParamProxy { level, state: find!(state) }, &GuardNotFrozen { recv, state } => GuardNotFrozen { recv: find!(recv), state }, &GuardNotShared { recv, state } => GuardNotShared { recv: find!(recv), state }, &GuardGreaterEq { left, right, state } => GuardGreaterEq { left: find!(left), right: find!(right), state }, @@ -2244,13 +2303,14 @@ impl Function { state, reason, }, - &SendWithoutBlockDirect { recv, cd, cme, iseq, ref args, kw_bits, state } => SendWithoutBlockDirect { + &SendDirect { recv, cd, cme, iseq, ref args, kw_bits, blockiseq, state } => SendDirect { recv: find!(recv), cd, cme, iseq, args: find_vec!(args), kw_bits, + blockiseq, state, }, &Send { recv, cd, blockiseq, ref args, state, reason } => Send { @@ -2277,6 +2337,14 @@ impl Function { state, reason, }, + &InvokeSuperForward { recv, cd, blockiseq, ref args, state, reason } => InvokeSuperForward { + recv: find!(recv), + cd, + blockiseq, + args: find_vec!(args), + state, + reason, + }, &InvokeBlock { cd, ref args, state, reason } => InvokeBlock { cd, args: find_vec!(args), @@ -2356,6 +2424,7 @@ impl Function { | SendForward { reason, .. } | SendWithoutBlock { reason, .. } | InvokeSuper { reason, .. } + | InvokeSuperForward { reason, .. } | InvokeBlock { reason, .. } => *reason = dynamic_send_reason, _ => unreachable!("unexpected instruction {} at {insn_id}", self.find(insn_id)) @@ -2392,7 +2461,7 @@ impl Function { | Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::SetClassVar { .. } | Insn::ArrayExtend { .. } | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetLocal { .. } | Insn::IncrCounter(_) | Insn::IncrCounterPtr { .. } - | Insn::CheckInterrupts { .. } | Insn::GuardBlockParamProxy { .. } | Insn::GuardSuperMethodEntry { .. } + | Insn::CheckInterrupts { .. } | Insn::GuardSuperMethodEntry { .. } | Insn::StoreField { .. } | Insn::WriteBarrier { .. } | Insn::HashAset { .. } | Insn::ArrayAset { .. } => panic!("Cannot infer type of instruction with no output: {}. See Insn::has_output().", self.insns[insn.0]), Insn::Const { val: Const::Value(val) } => Type::from_value(*val), @@ -2449,8 +2518,11 @@ impl Function { &Insn::CCallVariadic { return_type, .. } => return_type, Insn::GuardType { val, guard_type, .. } => self.type_of(*val).intersection(*guard_type), Insn::RefineType { val, new_type, .. } => self.type_of(*val).intersection(*new_type), + Insn::HasType { .. } => types::CBool, Insn::GuardTypeNot { .. } => types::BasicObject, Insn::GuardBitEquals { val, expected, .. } => self.type_of(*val).intersection(Type::from_const(*expected)), + Insn::GuardAnyBitSet { val, .. } => self.type_of(*val), + Insn::GuardNoBitsSet { val, .. } => self.type_of(*val), Insn::GuardShape { val, .. } => self.type_of(*val), Insn::GuardNotFrozen { recv, .. } | Insn::GuardNotShared { recv, .. } => self.type_of(*recv), Insn::GuardLess { left, .. } => self.type_of(*left), @@ -2473,10 +2545,11 @@ impl Function { Insn::FixnumRShift { .. } => types::Fixnum, Insn::PutSpecialObject { .. } => types::BasicObject, Insn::SendWithoutBlock { .. } => types::BasicObject, - Insn::SendWithoutBlockDirect { .. } => types::BasicObject, + Insn::SendDirect { .. } => types::BasicObject, Insn::Send { .. } => types::BasicObject, Insn::SendForward { .. } => types::BasicObject, Insn::InvokeSuper { .. } => types::BasicObject, + Insn::InvokeSuperForward { .. } => types::BasicObject, Insn::InvokeBlock { .. } => types::BasicObject, Insn::InvokeProc { .. } => types::BasicObject, Insn::InvokeBuiltin { return_type, .. } => return_type.unwrap_or(types::BasicObject), @@ -2494,6 +2567,7 @@ impl Function { Insn::GetIvar { .. } => types::BasicObject, Insn::LoadPC => types::CPtr, Insn::LoadEC => types::CPtr, + Insn::GetEP { .. } => types::CPtr, Insn::GetLEP => types::CPtr, Insn::LoadSelf => types::BasicObject, &Insn::LoadField { return_type, .. } => return_type, @@ -2621,7 +2695,9 @@ impl Function { Insn::GuardType { val, .. } | Insn::GuardTypeNot { val, .. } | Insn::GuardShape { val, .. } - | Insn::GuardBitEquals { val, .. } => self.chase_insn(val), + | Insn::GuardBitEquals { val, .. } + | Insn::GuardAnyBitSet { val, .. } + | Insn::GuardNoBitsSet { val, .. } => self.chase_insn(val), | Insn::RefineType { val, .. } => self.chase_insn(val), _ => id, } @@ -2639,7 +2715,7 @@ impl Function { } /// Prepare arguments for a direct send, handling keyword argument reordering and default synthesis. - /// Returns the (state, processed_args, kw_bits) to use for the SendWithoutBlockDirect instruction, + /// Returns the (state, processed_args, kw_bits) to use for the SendDirect instruction, /// or Err with the fallback reason if direct send isn't possible. fn prepare_direct_send_args( &mut self, @@ -2684,7 +2760,7 @@ impl Function { if callee_keyword.is_null() { if !kwarg.is_null() { // Caller is passing kwargs but callee doesn't expect them. - return Err(SendWithoutBlockDirectKeywordMismatch); + return Err(SendDirectKeywordMismatch); } // Neither caller nor callee have keywords - nothing to do return Ok((args.to_vec(), args.len(), 0)); @@ -2697,7 +2773,7 @@ impl Function { // When there are 31+ keywords, CRuby uses a hash instead of a fixnum bitmask // for kw_bits. Fall back to VM dispatch for this rare case. if callee_kw_count >= VM_KW_SPECIFIED_BITS_MAX as usize { - return Err(SendWithoutBlockDirectTooManyKeywords); + return Err(SendDirectTooManyKeywords); } let callee_kw_required = unsafe { (*callee_keyword).required_num } as usize; @@ -2706,7 +2782,7 @@ impl Function { // Caller can't provide more keywords than callee expects (no **kwrest support yet). if caller_kw_count > callee_kw_count { - return Err(SendWithoutBlockDirectKeywordCountMismatch); + return Err(SendDirectKeywordCountMismatch); } // The keyword arguments are the last arguments in the args vector. @@ -2736,7 +2812,7 @@ impl Function { if !found { // Caller is passing an unknown keyword - this will raise ArgumentError. // Fall back to VM dispatch to handle the error. - return Err(SendWithoutBlockDirectKeywordMismatch); + return Err(SendDirectKeywordMismatch); } } @@ -2760,7 +2836,7 @@ impl Function { if !found { // Required keyword not provided by caller which will raise an ArgumentError. if i < callee_kw_required { - return Err(SendWithoutBlockDirectMissingKeyword); + return Err(SendDirectMissingKeyword); } // Optional keyword not provided - use default value @@ -2806,6 +2882,22 @@ impl Function { self.resolve_receiver_type_from_profile(recv, insn_idx) } + fn polymorphic_summary(&self, profiles: &ProfileOracle, recv: InsnId, insn_idx: usize) -> Option { + let Some(entries) = profiles.types.get(&insn_idx) else { + return None; + }; + let recv = self.chase_insn(recv); + for (entry_insn, entry_type_summary) in entries { + if self.union_find.borrow().find_const(*entry_insn) == recv { + if entry_type_summary.is_polymorphic() { + return Some(entry_type_summary.clone()); + } + return None; + } + } + None + } + /// Resolve the receiver type for method dispatch optimization from profile data. /// /// Returns: @@ -2938,9 +3030,11 @@ impl Function { } } - /// Rewrite SendWithoutBlock opcodes into SendWithoutBlockDirect opcodes if we know the target - /// ISEQ statically. This removes run-time method lookups and opens the door for inlining. + /// Rewrite eligible Send/SendWithoutBlock opcodes into SendDirect + /// opcodes if we know the target ISEQ statically. This removes run-time method lookups and + /// opens the door for inlining. /// Also try and inline constant caches, specialize object allocations, and more. + /// Calls to C functions are handled separately in optimize_c_calls. fn type_specialize(&mut self) { for block in self.rpo() { let old_insns = std::mem::take(&mut self.blocks[block.0].insns); @@ -3010,7 +3104,7 @@ impl Function { def_type = unsafe { get_cme_def_type(cme) }; } - // If the call site info indicates that the `Function` has overly complex arguments, then do not optimize into a `SendWithoutBlockDirect`. + // If the call site info indicates that the `Function` has overly complex arguments, then do not optimize into a `SendDirect`. // Optimized methods(`VM_METHOD_TYPE_OPTIMIZED`) handle their own argument constraints (e.g., kw_splat for Proc call). if def_type != VM_METHOD_TYPE_OPTIMIZED && unspecializable_call_type(flags) { self.count_complex_call_features(block, flags); @@ -3023,15 +3117,20 @@ impl Function { // Only specialize positional-positional calls // TODO(max): Handle other kinds of parameter passing let iseq = unsafe { get_def_iseq_ptr((*cme).def) }; - if !can_direct_send(self, block, iseq, ci, insn_id, args.as_slice()) { + if !can_direct_send(self, block, iseq, ci, insn_id, args.as_slice(), None) { self.push_insn_id(block, insn_id); continue; } + // Check singleton class assumption first, before emitting other patchpoints if !self.assume_no_singleton_classes(block, klass, state) { self.set_dynamic_send_reason(insn_id, SingletonClassSeen); self.push_insn_id(block, insn_id); continue; } + + // Add PatchPoint for method redefinition self.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass, method: mid, cme }, state }); + + // Add GuardType for profiled receiver if let Some(profiled_type) = profiled_type { recv = self.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state }); } @@ -3041,7 +3140,7 @@ impl Function { self.push_insn_id(block, insn_id); continue; }; - let send_direct = self.push_insn(block, Insn::SendWithoutBlockDirect { recv, cd, cme, iseq, args: processed_args, kw_bits, state: send_state }); + let send_direct = self.push_insn(block, Insn::SendDirect { recv, cd, cme, iseq, args: processed_args, kw_bits, state: send_state, blockiseq: None }); self.make_equal_to(insn_id, send_direct); } else if def_type == VM_METHOD_TYPE_BMETHOD { let procv = unsafe { rb_get_def_bmethod_proc((*cme).def) }; @@ -3056,11 +3155,9 @@ impl Function { let capture = unsafe { proc_block.as_.captured.as_ref() }; let iseq = unsafe { *capture.code.iseq.as_ref() }; - if !can_direct_send(self, block, iseq, ci, insn_id, args.as_slice()) { + if !can_direct_send(self, block, iseq, ci, insn_id, args.as_slice(), None) { self.push_insn_id(block, insn_id); continue; } - // Can't pass a block to a block for now - assert!((unsafe { rb_vm_ci_flag(ci) } & VM_CALL_ARGS_BLOCKARG) == 0, "SendWithoutBlock but has a block arg"); // Patch points: // Check for "defined with an un-shareable Proc in a different Ractor" @@ -3084,7 +3181,7 @@ impl Function { self.push_insn_id(block, insn_id); continue; }; - let send_direct = self.push_insn(block, Insn::SendWithoutBlockDirect { recv, cd, cme, iseq, args: processed_args, kw_bits, state: send_state }); + let send_direct = self.push_insn(block, Insn::SendDirect { recv, cd, cme, iseq, args: processed_args, kw_bits, state: send_state, blockiseq: None }); self.make_equal_to(insn_id, send_direct); } else if def_type == VM_METHOD_TYPE_IVAR && args.is_empty() { // Check if we're accessing ivars of a Class or Module object as they require single-ractor mode. @@ -3213,14 +3310,12 @@ impl Function { self.push_insn_id(block, insn_id); continue; } } - // This doesn't actually optimize Send yet, just replaces the fallback reason to be more precise. - // The actual optimization is done in reduce_send_to_ccall. - Insn::Send { recv, cd, state, .. } => { + Insn::Send { mut recv, cd, state, blockiseq, args, .. } => { let frame_state = self.frame_state(state); - let klass = match self.resolve_receiver_type(recv, self.type_of(recv), frame_state.insn_idx) { - ReceiverTypeResolution::StaticallyKnown { class } => class, + let (klass, profiled_type) = match self.resolve_receiver_type(recv, self.type_of(recv), frame_state.insn_idx) { + ReceiverTypeResolution::StaticallyKnown { class } => (class, None), ReceiverTypeResolution::Monomorphic { profiled_type } - | ReceiverTypeResolution::SkewedPolymorphic { profiled_type } => profiled_type.class(), + | ReceiverTypeResolution::SkewedPolymorphic { profiled_type } => (profiled_type.class(), Some(profiled_type)), ReceiverTypeResolution::SkewedMegamorphic { .. } | ReceiverTypeResolution::Megamorphic => { if get_option!(stats) { @@ -3245,6 +3340,9 @@ impl Function { } }; let ci = unsafe { get_call_data_ci(cd) }; // info about the call site + + let flags = unsafe { rb_vm_ci_flag(ci) }; + let mid = unsafe { vm_ci_mid(ci) }; // Do method lookup let mut cme = unsafe { rb_callable_method_entry(klass, mid) }; @@ -3255,13 +3353,70 @@ impl Function { // Load an overloaded cme if applicable. See vm_search_cc(). // It allows you to use a faster ISEQ if possible. cme = unsafe { rb_check_overloaded_cme(cme, ci) }; + let visibility = unsafe { METHOD_ENTRY_VISI(cme) }; + match (visibility, flags & VM_CALL_FCALL != 0) { + (METHOD_VISI_PUBLIC, _) => {} + (METHOD_VISI_PRIVATE, true) => {} + (METHOD_VISI_PROTECTED, true) => {} + _ => { + self.set_dynamic_send_reason(insn_id, SendNotOptimizedNeedPermission); + self.push_insn_id(block, insn_id); continue; + } + } let mut def_type = unsafe { get_cme_def_type(cme) }; while def_type == VM_METHOD_TYPE_ALIAS { cme = unsafe { rb_aliased_callable_method_entry(cme) }; def_type = unsafe { get_cme_def_type(cme) }; } - self.set_dynamic_send_reason(insn_id, SendNotOptimizedMethodType(MethodType::from(def_type))); - self.push_insn_id(block, insn_id); continue; + + // If the call site info indicates that the `Function` has overly complex arguments, then do not optimize into a `SendDirect`. + // Optimized methods(`VM_METHOD_TYPE_OPTIMIZED`) handle their own argument constraints (e.g., kw_splat for Proc call). + if def_type != VM_METHOD_TYPE_OPTIMIZED && unspecializable_call_type(flags) { + self.count_complex_call_features(block, flags); + self.set_dynamic_send_reason(insn_id, ComplexArgPass); + self.push_insn_id(block, insn_id); continue; + } + + if def_type == VM_METHOD_TYPE_ISEQ { + let iseq = unsafe { get_def_iseq_ptr((*cme).def) }; + if !can_direct_send(self, block, iseq, ci, insn_id, args.as_slice(), Some(blockiseq)) { + self.push_insn_id(block, insn_id); continue; + } + + // Check singleton class assumption first, before emitting other patchpoints + if !self.assume_no_singleton_classes(block, klass, state) { + self.set_dynamic_send_reason(insn_id, SingletonClassSeen); + self.push_insn_id(block, insn_id); continue; + } + + // Add PatchPoint for method redefinition + self.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass, method: mid, cme }, state }); + + // Add GuardType for profiled receiver + if let Some(profiled_type) = profiled_type { + recv = self.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state }); + } + + let Ok((send_state, processed_args, kw_bits)) = self.prepare_direct_send_args(block, &args, ci, iseq, state) + .inspect_err(|&reason| self.set_dynamic_send_reason(insn_id, reason)) else { + self.push_insn_id(block, insn_id); continue; + }; + + let send_direct = self.push_insn(block, Insn::SendDirect { + recv, + cd, + cme, + iseq, + args: processed_args, + kw_bits, + blockiseq: Some(blockiseq), + state: send_state, + }); + self.make_equal_to(insn_id, send_direct); + } else { + self.set_dynamic_send_reason(insn_id, SendNotOptimizedMethodType(MethodType::from(def_type))); + self.push_insn_id(block, insn_id); continue; + } } Insn::GetConstantPath { ic, state, .. } => { let idlist: *const ID = unsafe { (*ic).segments }; @@ -3363,6 +3518,40 @@ impl Function { }; } Insn::InvokeSuper { recv, cd, blockiseq, args, state, .. } => { + // Helper to emit common guards for super call optimization. + fn emit_super_call_guards( + fun: &mut Function, + block: BlockId, + super_cme: *const rb_callable_method_entry_t, + current_cme: *const rb_callable_method_entry_t, + mid: ID, + state: InsnId, + ) { + fun.push_insn(block, Insn::PatchPoint { + invariant: Invariant::MethodRedefined { + klass: unsafe { (*super_cme).defined_class }, + method: mid, + cme: super_cme + }, + state + }); + + let lep = fun.push_insn(block, Insn::GetLEP); + fun.push_insn(block, Insn::GuardSuperMethodEntry { + lep, + cme: current_cme, + state + }); + + let block_handler = fun.push_insn(block, Insn::GetBlockHandler { lep }); + fun.push_insn(block, Insn::GuardBitEquals { + val: block_handler, + expected: Const::Value(VALUE(VM_BLOCK_HANDLER_NONE as usize)), + reason: SideExitReason::UnhandledBlockArg, + state + }); + } + // Don't handle calls with literal blocks (e.g., super { ... }) if !blockiseq.is_null() { self.push_insn_id(block, insn_id); @@ -3426,66 +3615,107 @@ impl Function { continue; } - // Check if it's an ISEQ method; bail if it isn't. let def_type = unsafe { get_cme_def_type(super_cme) }; - if def_type != VM_METHOD_TYPE_ISEQ { - self.push_insn_id(block, insn_id); - self.set_dynamic_send_reason(insn_id, SuperNotOptimizedMethodType(MethodType::from(def_type))); - continue; - } - // Check if the super method's parameters support direct send. - // If not, we can't do direct dispatch. - let super_iseq = unsafe { get_def_iseq_ptr((*super_cme).def) }; - if !can_direct_send(self, block, super_iseq, ci, insn_id, args.as_slice()) { - self.push_insn_id(block, insn_id); - self.set_dynamic_send_reason(insn_id, SuperTargetComplexArgsPass); - continue; - } + if def_type == VM_METHOD_TYPE_ISEQ { + // Check if the super method's parameters support direct send. + // If not, we can't do direct dispatch. + let super_iseq = unsafe { get_def_iseq_ptr((*super_cme).def) }; + // TODO: pass Option to can_direct_send when we start specializing `super { ... }`. + if !can_direct_send(self, block, super_iseq, ci, insn_id, args.as_slice(), None) { + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, SuperTargetComplexArgsPass); + continue; + } - // Add PatchPoint for method redefinition. - self.push_insn(block, Insn::PatchPoint { - invariant: Invariant::MethodRedefined { - klass: unsafe { (*super_cme).defined_class }, - method: mid, - cme: super_cme - }, - state - }); + let Ok((send_state, processed_args, kw_bits)) = self.prepare_direct_send_args(block, &args, ci, super_iseq, state) + .inspect_err(|&reason| self.set_dynamic_send_reason(insn_id, reason)) else { + self.push_insn_id(block, insn_id); continue; + }; - // Guard that we're calling `super` from the expected method context. - let lep = self.push_insn(block, Insn::GetLEP); - self.push_insn(block, Insn::GuardSuperMethodEntry { - lep, - cme: current_cme, - state - }); + emit_super_call_guards(self, block, super_cme, current_cme, mid, state); + + // Use SendDirect with the super method's CME and ISEQ. + let send_direct = self.push_insn(block, Insn::SendDirect { + recv, + cd, + cme: super_cme, + iseq: super_iseq, + args: processed_args, + kw_bits, + state: send_state, + blockiseq: None, + }); + self.make_equal_to(insn_id, send_direct); - // Guard that no block is being passed (implicit or explicit). - let block_handler = self.push_insn(block, Insn::GetBlockHandler { lep }); - self.push_insn(block, Insn::GuardBitEquals { - val: block_handler, - expected: Const::Value(VALUE(VM_BLOCK_HANDLER_NONE as usize)), - reason: SideExitReason::UnhandledBlockArg, - state - }); + } else if def_type == VM_METHOD_TYPE_CFUNC { + let cfunc = unsafe { get_cme_def_body_cfunc(super_cme) }; + let cfunc_argc = unsafe { get_mct_argc(cfunc) }; + let cfunc_ptr = unsafe { get_mct_func(cfunc) }.cast(); + + match cfunc_argc { + // C function with fixed argument count. + 0.. => { + // Check argc matches + if args.len() != cfunc_argc as usize { + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, ArgcParamMismatch); + continue; + } - let Ok((send_state, processed_args, kw_bits)) = self.prepare_direct_send_args(block, &args, ci, super_iseq, state) - .inspect_err(|&reason| self.set_dynamic_send_reason(insn_id, reason)) else { - self.push_insn_id(block, insn_id); continue; - }; + emit_super_call_guards(self, block, super_cme, current_cme, mid, state); + + // Use CCallWithFrame for the C function. + let name = rust_str_to_id(&qualified_method_name(unsafe { (*super_cme).owner }, unsafe { (*super_cme).called_id })); + let ccall = self.push_insn(block, Insn::CCallWithFrame { + cd, + cfunc: cfunc_ptr, + recv, + args: args.clone(), + cme: super_cme, + name, + state, + return_type: types::BasicObject, + elidable: false, + blockiseq: None, + }); + self.make_equal_to(insn_id, ccall); + } - // Use SendWithoutBlockDirect with the super method's CME and ISEQ. - let send_direct = self.push_insn(block, Insn::SendWithoutBlockDirect { - recv, - cd, - cme: super_cme, - iseq: super_iseq, - args: processed_args, - kw_bits, - state: send_state - }); - self.make_equal_to(insn_id, send_direct); + // Variadic C function: func(int argc, VALUE *argv, VALUE recv) + -1 => { + emit_super_call_guards(self, block, super_cme, current_cme, mid, state); + + // Use CCallVariadic for the variadic C function. + let name = rust_str_to_id(&qualified_method_name(unsafe { (*super_cme).owner }, unsafe { (*super_cme).called_id })); + let ccall = self.push_insn(block, Insn::CCallVariadic { + cfunc: cfunc_ptr, + recv, + args: args.clone(), + cme: super_cme, + name, + state, + return_type: types::BasicObject, + elidable: false, + blockiseq: None, + }); + self.make_equal_to(insn_id, ccall); + } + + // Array-variadic: (self, args_ruby_array). + -2 => { + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, SuperNotOptimizedMethodType(MethodType::Cfunc)); + continue; + } + _ => unreachable!("unknown cfunc argc: {}", cfunc_argc) + } + } else { + // Other method types (not ISEQ or CFUNC) + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, SuperNotOptimizedMethodType(MethodType::from(def_type))); + continue; + } } _ => { self.push_insn_id(block, insn_id); } } @@ -3500,8 +3730,10 @@ impl Function { assert!(self.blocks[block.0].insns.is_empty()); for insn_id in old_insns { match self.find(insn_id) { - // Reject block ISEQs to avoid autosplat and other block parameter complications. - Insn::SendWithoutBlockDirect { recv, iseq, cd, args, state, .. } => { + // We can inline SendDirect with blockiseq because we are prohibiting `yield` + // and `.call`, which would trigger autosplat. We only inline constants and + // variables and builtin calls. + Insn::SendDirect { recv, iseq, cd, args, state, .. } => { let call_info = unsafe { (*cd).ci }; let ci_flags = unsafe { vm_ci_flag(call_info) }; // .send call is not currently supported for builtins @@ -3749,7 +3981,7 @@ impl Function { self.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass: recv_class, method: method_id, cme }, state }); } - /// Optimize SendWithoutBlock that land in a C method to a direct CCall without + /// Optimize Send/SendWithoutBlock that land in a C method to a direct CCall without /// runtime lookup. fn optimize_c_calls(&mut self) { if unsafe { rb_zjit_method_tracing_currently_enabled() } { @@ -3814,7 +4046,10 @@ impl Function { // When seeing &block argument, fall back to dynamic dispatch for now // TODO: Support block forwarding if unspecializable_c_call_type(ci_flags) { - fun.count_complex_call_features(block, ci_flags); + // Only count features NOT already counted in type_specialize. + if !unspecializable_call_type(ci_flags) { + fun.count_complex_call_features(block, ci_flags); + } fun.set_dynamic_send_reason(send_insn_id, ComplexArgPass); return Err(()); } @@ -3984,7 +4219,10 @@ impl Function { // Filter for simple call sites (i.e. no splats etc.) if ci_flags & VM_CALL_ARGS_SIMPLE == 0 { - fun.count_complex_call_features(block, ci_flags); + // Only count features NOT already counted in type_specialize. + if !unspecializable_call_type(ci_flags) { + fun.count_complex_call_features(block, ci_flags); + } fun.set_dynamic_send_reason(send_insn_id, ComplexArgPass); return Err(()); } @@ -4064,8 +4302,10 @@ impl Function { // func(int argc, VALUE *argv, VALUE recv) let ci_flags = unsafe { vm_ci_flag(call_info) }; if ci_flags & VM_CALL_ARGS_SIMPLE == 0 { - // TODO(alan): Add fun.count_complex_call_features() here without double - // counting splat + // Only count features NOT already counted in type_specialize. + if !unspecializable_call_type(ci_flags) { + fun.count_complex_call_features(block, ci_flags); + } fun.set_dynamic_send_reason(send_insn_id, ComplexArgPass); return Err(()); } else { @@ -4143,18 +4383,6 @@ impl Function { Err(()) } - fn qualified_method_name(class: VALUE, method_id: ID) -> String { - let method_name = method_id.contents_lossy(); - // rb_zjit_singleton_class_p also checks if it's a class - if unsafe { rb_zjit_singleton_class_p(class) } { - let class_name = get_class_name(unsafe { rb_class_attached_object(class) }); - format!("{class_name}.{method_name}") - } else { - let class_name = get_class_name(class); - format!("{class_name}#{method_name}") - } - } - fn count_not_inlined_cfunc(fun: &mut Function, block: BlockId, cme: *const rb_callable_method_entry_t) { let owner = unsafe { (*cme).owner }; let called_id = unsafe { (*cme).called_id }; @@ -4406,6 +4634,7 @@ impl Function { | &Insn::EntryPoint { .. } | &Insn::LoadPC | &Insn::LoadEC + | &Insn::GetEP { .. } | &Insn::GetLEP | &Insn::LoadSelf | &Insn::GetLocal { .. } @@ -4480,6 +4709,7 @@ impl Function { worklist.push_back(state); } | &Insn::RefineType { val, .. } + | &Insn::HasType { val, .. } | &Insn::Return { val } | &Insn::Test { val } | &Insn::SetLocal { val, .. } @@ -4494,6 +4724,8 @@ impl Function { | &Insn::GuardType { val, state, .. } | &Insn::GuardTypeNot { val, state, .. } | &Insn::GuardBitEquals { val, state, .. } + | &Insn::GuardAnyBitSet { val, state, .. } + | &Insn::GuardNoBitsSet { val, state, .. } | &Insn::GuardShape { val, state, .. } | &Insn::GuardNotFrozen { recv: val, state } | &Insn::GuardNotShared { recv: val, state } @@ -4589,9 +4821,10 @@ impl Function { | &Insn::SendWithoutBlock { recv, ref args, state, .. } | &Insn::CCallVariadic { recv, ref args, state, .. } | &Insn::CCallWithFrame { recv, ref args, state, .. } - | &Insn::SendWithoutBlockDirect { recv, ref args, state, .. } + | &Insn::SendDirect { recv, ref args, state, .. } | &Insn::InvokeBuiltin { recv, ref args, state, .. } | &Insn::InvokeSuper { recv, ref args, state, .. } + | &Insn::InvokeSuperForward { recv, ref args, state, .. } | &Insn::InvokeProc { recv, ref args, state, .. } => { worklist.push_back(recv); worklist.extend(args); @@ -4643,7 +4876,6 @@ impl Function { worklist.push_back(recv); worklist.push_back(val); } - &Insn::GuardBlockParamProxy { state, .. } | &Insn::GetGlobal { state, .. } | &Insn::GetSpecialSymbol { state, .. } | &Insn::GetSpecialNumber { state, .. } | @@ -5125,6 +5357,34 @@ impl Function { Ok(()) } + // Validate that every instruction use is from a block-local definition, which is a temporary + // constraint until we get a global register allocator. + // TODO(tenderworks): Remove this + fn temporary_validate_block_local_definite_assignment(&self) -> Result<(), ValidationError> { + for block in self.rpo() { + let mut assigned = InsnSet::with_capacity(self.insns.len()); + for ¶m in &self.blocks[block.0].params { + assigned.insert(param); + } + // Check that each instruction's operands are assigned + for &insn_id in &self.blocks[block.0].insns { + let insn_id = self.union_find.borrow().find_const(insn_id); + let mut operands = VecDeque::new(); + let insn = self.find(insn_id); + self.worklist_traverse_single_insn(&insn, &mut operands); + for operand in operands { + if !assigned.get(operand) { + return Err(ValidationError::OperandNotDefined(block, insn_id, operand)); + } + } + if insn.has_output() { + assigned.insert(insn_id); + } + } + } + Ok(()) + } + /// Checks that each instruction('s representative) appears only once in the CFG. fn validate_insn_uniqueness(&self) -> Result<(), ValidationError> { let mut seen = InsnSet::with_capacity(self.insns.len()); @@ -5161,12 +5421,12 @@ impl Function { | Insn::GetGlobal { .. } | Insn::LoadPC | Insn::LoadEC + | Insn::GetEP { .. } | Insn::GetLEP | Insn::LoadSelf | Insn::Snapshot { .. } | Insn::Jump { .. } | Insn::EntryPoint { .. } - | Insn::GuardBlockParamProxy { .. } | Insn::GuardSuperMethodEntry { .. } | Insn::GetBlockHandler { .. } | Insn::PatchPoint { .. } @@ -5220,10 +5480,11 @@ impl Function { } // Instructions with recv and a Vec of Ruby objects Insn::SendWithoutBlock { recv, ref args, .. } - | Insn::SendWithoutBlockDirect { recv, ref args, .. } + | Insn::SendDirect { recv, ref args, .. } | Insn::Send { recv, ref args, .. } | Insn::SendForward { recv, ref args, .. } | Insn::InvokeSuper { recv, ref args, .. } + | Insn::InvokeSuperForward { recv, ref args, .. } | Insn::CCallWithFrame { recv, ref args, .. } | Insn::CCallVariadic { recv, ref args, .. } | Insn::InvokeBuiltin { recv, ref args, .. } @@ -5389,6 +5650,18 @@ impl Function { Const::CPtr(_) => self.assert_subtype(insn_id, val, types::CPtr), } } + Insn::GuardAnyBitSet { val, mask, .. } + | Insn::GuardNoBitsSet { val, mask, .. } => { + match mask { + Const::CUInt8(_) | Const::CUInt16(_) | Const::CUInt32(_) | Const::CUInt64(_) + if self.is_a(val, types::CInt) || self.is_a(val, types::RubyValue) => { + Ok(()) + } + _ => { + Err(ValidationError::MiscValidationError(insn_id, "GuardAnyBitSet/GuardNoBitsSet can only compare RubyValue/CUInt or CInt/CUInt".to_string())) + } + } + } Insn::GuardLess { left, right, .. } | Insn::GuardGreaterEq { left, right, .. } => { self.assert_subtype(insn_id, left, types::CInt64)?; @@ -5408,6 +5681,7 @@ impl Function { self.assert_subtype(insn_id, class, types::Class) } Insn::RefineType { .. } => Ok(()), + Insn::HasType { val, .. } => self.assert_subtype(insn_id, val, types::BasicObject), } } @@ -5425,6 +5699,7 @@ impl Function { pub fn validate(&self) -> Result<(), ValidationError> { self.validate_block_terminators_and_jumps()?; self.validate_definite_assignment()?; + self.temporary_validate_block_local_definite_assignment()?; self.validate_insn_uniqueness()?; self.validate_types()?; Ok(()) @@ -6056,7 +6331,38 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { } } } - } else { + } else if opcode == YARVINSN_getblockparamproxy || opcode == YARVINSN_trace_getblockparamproxy { + if get_option!(stats) { + let iseq_insn_idx = exit_state.insn_idx; + if let Some([block_handler_distribution]) = profiles.payload.profile.get_operand_types(iseq_insn_idx) { + let summary = TypeDistributionSummary::new(block_handler_distribution); + + if summary.is_monomorphic() { + let obj = summary.bucket(0).class(); + if unsafe { rb_IMEMO_TYPE_P(obj, imemo_iseq) == 1} { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_iseq)); + } else if unsafe { rb_IMEMO_TYPE_P(obj, imemo_ifunc) == 1} { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_ifunc)); + } + else if obj.nil_p() { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_nil)); + } + else if obj.symbol_p() { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_symbol)); + } else if unsafe { rb_obj_is_proc(obj).test() } { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_proc)); + } + } else if summary.is_polymorphic() || summary.is_skewed_polymorphic() { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_polymorphic)); + } else if summary.is_megamorphic() || summary.is_skewed_megamorphic() { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_megamorphic)); + } + } else { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_no_profiles)); + } + } + } + else { profiles.profile_stack(&exit_state); } @@ -6451,9 +6757,39 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { } YARVINSN_getblockparamproxy => { let level = get_arg(pc, 1).as_u32(); - fun.push_insn(block, Insn::GuardBlockParamProxy { level, state: exit_id }); - // TODO(Shopify/ruby#753): GC root, so we should be able to avoid unnecessary GC tracing - state.stack_push(fun.push_insn(block, Insn::Const { val: Const::Value(unsafe { rb_block_param_proxy }) })); + + let profiled_block_type = if let Some([block_handler_distribution]) = profiles.payload.profile.get_operand_types(exit_state.insn_idx) { + let summary = TypeDistributionSummary::new(block_handler_distribution); + summary.is_monomorphic().then_some(summary.bucket(0).class()) + } else { + None + }; + + let ep = fun.push_insn(block, Insn::GetEP { level }); + let flags = fun.push_insn(block, Insn::LoadField { recv: ep, id: ID!(_env_data_index_flags), offset: SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32), return_type: types::CInt64 }); + fun.push_insn(block, Insn::GuardNoBitsSet { val: flags, mask: Const::CUInt64(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()), reason: SideExitReason::BlockParamProxyModified, state: exit_id }); + + let block_handler = fun.push_insn(block, Insn::LoadField { recv: ep, id: ID!(_env_data_index_specval), offset: SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL, return_type: types::CInt64 }); + + match profiled_block_type { + Some(ty) if ty.nil_p() => { + fun.push_insn(block, Insn::GuardBitEquals { val: block_handler, expected: Const::CInt64(VM_BLOCK_HANDLER_NONE.into()), reason: SideExitReason::BlockParamProxyNotNil, state: exit_id }); + state.stack_push(fun.push_insn(block, Insn::Const { val: Const::Value(Qnil) })); + } + _ => { + // This handles two cases which are nearly identical + // Block handler is a tagged pointer. Look at the tag. + // VM_BH_ISEQ_BLOCK_P(): block_handler & 0x03 == 0x01 + // VM_BH_IFUNC_P(): block_handler & 0x03 == 0x03 + // So to check for either of those cases we can use: val & 0x1 == 0x1 + const _: () = assert!(RUBY_SYMBOL_FLAG & 1 == 0, "guard below rejects symbol block handlers"); + + // Bail out if the block handler is neither ISEQ nor ifunc + fun.push_insn(block, Insn::GuardAnyBitSet { val: block_handler, mask: Const::CUInt64(0x1), reason: SideExitReason::BlockParamProxyNotIseqOrIfunc, state: exit_id }); + // TODO(Shopify/ruby#753): GC root, so we should be able to avoid unnecessary GC tracing + state.stack_push(fun.push_insn(block, Insn::Const { val: Const::Value(unsafe { rb_block_param_proxy }) })); + } + } } YARVINSN_getblockparam => { fn new_branch_block( @@ -6706,6 +7042,71 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { } let argc = unsafe { vm_ci_argc((*cd).ci) }; + { + fn new_branch_block( + fun: &mut Function, + cd: *const rb_call_data, + argc: usize, + opcode: u32, + new_type: Type, + insn_idx: u32, + exit_state: &FrameState, + locals_count: usize, + stack_count: usize, + join_block: BlockId, + ) -> BlockId { + let block = fun.new_block(insn_idx); + let self_param = fun.push_insn(block, Insn::Param); + let mut state = exit_state.clone(); + state.locals.clear(); + state.stack.clear(); + state.locals.extend((0..locals_count).map(|_| fun.push_insn(block, Insn::Param))); + state.stack.extend((0..stack_count).map(|_| fun.push_insn(block, Insn::Param))); + let snapshot = fun.push_insn(block, Insn::Snapshot { state: state.clone() }); + let args = state.stack_pop_n(argc).unwrap(); + let recv = state.stack_pop().unwrap(); + let refined_recv = fun.push_insn(block, Insn::RefineType { val: recv, new_type }); + state.replace(recv, refined_recv); + let send = fun.push_insn(block, Insn::SendWithoutBlock { recv: refined_recv, cd, args, state: snapshot, reason: Uncategorized(opcode) }); + state.stack_push(send); + fun.push_insn(block, Insn::Jump(BranchEdge { target: join_block, args: state.as_args(self_param) })); + block + } + let branch_insn_idx = exit_state.insn_idx as u32; + let locals_count = state.locals.len(); + let stack_count = state.stack.len(); + let recv = state.stack_topn(argc as usize)?; // args are on top + let entry_args = state.as_args(self_param); + if let Some(summary) = fun.polymorphic_summary(&profiles, recv, exit_state.insn_idx) { + let join_block = insn_idx_to_block.get(&insn_idx).copied().unwrap_or_else(|| fun.new_block(insn_idx)); + // TODO(max): Only iterate over unique classes, not unique (class, shape) pairs. + for &profiled_type in summary.buckets() { + if profiled_type.is_empty() { break; } + let expected = Type::from_profiled_type(profiled_type); + let has_type = fun.push_insn(block, Insn::HasType { val: recv, expected }); + let iftrue_block = + new_branch_block(&mut fun, cd, argc as usize, opcode, expected, branch_insn_idx, &exit_state, locals_count, stack_count, join_block); + let target = BranchEdge { target: iftrue_block, args: entry_args.clone() }; + fun.push_insn(block, Insn::IfTrue { val: has_type, target }); + } + // Continue compilation from the join block at the next instruction. + // Make a copy of the current state without the args (pop the receiver + // and push the result) because we just use the locals/stack sizes to + // make the right number of Params + let mut join_state = state.clone(); + join_state.stack_pop_n(argc as usize)?; + queue.push_back((join_state, join_block, insn_idx, local_inval)); + // In the fallthrough case, do a generic interpreter send and then join. + let args = state.stack_pop_n(argc as usize)?; + let recv = state.stack_pop()?; + let reason = SendWithoutBlockPolymorphicFallback; + let send = fun.push_insn(block, Insn::SendWithoutBlock { recv, cd, args, state: exit_id, reason }); + state.stack_push(send); + fun.push_insn(block, Insn::Jump(BranchEdge { target: join_block, args: state.as_args(self_param) })); + break; // End the block + } + } + let args = state.stack_pop_n(argc as usize)?; let recv = state.stack_pop()?; let send = fun.push_insn(block, Insn::SendWithoutBlock { recv, cd, args, state: exit_id, reason: Uncategorized(opcode) }); @@ -6798,6 +7199,35 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { } } } + YARVINSN_invokesuperforward => { + let cd: *const rb_call_data = get_arg(pc, 0).as_ptr(); + let blockiseq: IseqPtr = get_arg(pc, 1).as_iseq(); + let call_info = unsafe { rb_get_call_data_ci(cd) }; + let flags = unsafe { rb_vm_ci_flag(call_info) }; + let forwarding = (flags & VM_CALL_FORWARDING) != 0; + if let Err(call_type) = unhandled_call_type(flags) { + // Can't handle tailcall; side-exit into the interpreter + fun.push_insn(block, Insn::SideExit { state: exit_id, reason: SideExitReason::UnhandledCallType(call_type) }); + break; // End the block + } + let argc = unsafe { vm_ci_argc((*cd).ci) }; + let args = state.stack_pop_n(argc as usize + usize::from(forwarding))?; + let recv = state.stack_pop()?; + let result = fun.push_insn(block, Insn::InvokeSuperForward { recv, cd, blockiseq, args, state: exit_id, reason: Uncategorized(opcode) }); + state.stack_push(result); + + if !blockiseq.is_null() { + // Reload locals that may have been modified by the blockiseq. + // TODO: Avoid reloading locals that are not referenced by the blockiseq + // or not used after this. Max thinks we could eventually DCE them. + for local_idx in 0..state.locals.len() { + let ep_offset = local_idx_to_ep_offset(iseq, local_idx) as u32; + // TODO: We could use `use_sp: true` with PatchPoint + let val = fun.push_insn(block, Insn::GetLocal { ep_offset, level: 0, use_sp: false, rest_param: false }); + state.setlocal(ep_offset, val); + } + } + } YARVINSN_invokeblock => { let cd: *const rb_call_data = get_arg(pc, 0).as_ptr(); let call_info = unsafe { rb_get_call_data_ci(cd) }; @@ -7577,6 +8007,16 @@ mod validation_tests { assert_matches_err(function.validate_definite_assignment(), ValidationError::OperandNotDefined(entry, val, dangling)); } + #[test] + fn not_defined_within_bb_block_local() { + let mut function = Function::new(std::ptr::null()); + let entry = function.entry_block; + // Create an instruction without making it belong to anything. + let dangling = function.new_insn(Insn::Const{val: Const::CBool(true)}); + let val = function.push_insn(function.entry_block, Insn::ArrayDup { val: dangling, state: InsnId(0usize) }); + assert_matches_err(function.temporary_validate_block_local_definite_assignment(), ValidationError::OperandNotDefined(entry, val, dangling)); + } + #[test] fn using_non_output_insn() { let mut function = Function::new(std::ptr::null()); @@ -7588,6 +8028,17 @@ mod validation_tests { assert_matches_err(function.validate_definite_assignment(), ValidationError::OperandNotDefined(entry, val, ret)); } + #[test] + fn using_non_output_insn_block_local() { + let mut function = Function::new(std::ptr::null()); + let entry = function.entry_block; + let const_ = function.push_insn(function.entry_block, Insn::Const{val: Const::CBool(true)}); + // Ret is a non-output instruction. + let ret = function.push_insn(function.entry_block, Insn::Return { val: const_ }); + let val = function.push_insn(function.entry_block, Insn::ArrayDup { val: ret, state: InsnId(0usize) }); + assert_matches_err(function.temporary_validate_block_local_definite_assignment(), ValidationError::OperandNotDefined(entry, val, ret)); + } + #[test] fn not_dominated_by_diamond() { // This tests that one branch is missing a definition which fails. diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index 0110af3f2c4c5d..de4e2ec39db7a7 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -701,7 +701,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v18:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v19:BasicObject = SendWithoutBlockDirect v18, :foo (0x1038) + v19:BasicObject = SendDirect v18, 0x1038, :foo (0x1048) CheckInterrupts Return v19 "); @@ -795,7 +795,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v20:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v21:BasicObject = SendWithoutBlockDirect v20, :foo (0x1038), v11 + v21:BasicObject = SendDirect v20, 0x1038, :foo (0x1048), v11 CheckInterrupts Return v21 "); @@ -913,7 +913,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v18:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v19:BasicObject = SendWithoutBlockDirect v18, :foo (0x1038) + v19:BasicObject = SendDirect v18, 0x1038, :foo (0x1048) CheckInterrupts Return v19 "); @@ -941,7 +941,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, Integer@0x1008, cme:0x1010) v20:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v21:BasicObject = SendWithoutBlockDirect v20, :Integer (0x1038), v11 + v21:BasicObject = SendDirect v20, 0x1038, :Integer (0x1048), v11 CheckInterrupts Return v21 "); @@ -971,7 +971,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v22:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v23:BasicObject = SendWithoutBlockDirect v22, :foo (0x1038), v11, v13 + v23:BasicObject = SendDirect v22, 0x1038, :foo (0x1048), v11, v13 CheckInterrupts Return v23 "); @@ -1001,11 +1001,11 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v23:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v24:BasicObject = SendWithoutBlockDirect v23, :foo (0x1038) + v24:BasicObject = SendDirect v23, 0x1038, :foo (0x1048) PatchPoint NoSingletonClass(Object@0x1000) - PatchPoint MethodRedefined(Object@0x1000, bar@0x1040, cme:0x1048) + PatchPoint MethodRedefined(Object@0x1000, bar@0x1050, cme:0x1058) v27:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v28:BasicObject = SendWithoutBlockDirect v27, :bar (0x1038) + v28:BasicObject = SendDirect v27, 0x1038, :bar (0x1048) CheckInterrupts Return v28 "); @@ -1031,7 +1031,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v18:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v19:BasicObject = SendWithoutBlockDirect v18, :foo (0x1038) + v19:BasicObject = SendDirect v18, 0x1038, :foo (0x1048) CheckInterrupts Return v19 "); @@ -1058,7 +1058,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v20:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v21:BasicObject = SendWithoutBlockDirect v20, :foo (0x1038), v11 + v21:BasicObject = SendDirect v20, 0x1038, :foo (0x1048), v11 CheckInterrupts Return v21 "); @@ -1086,7 +1086,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v22:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v23:BasicObject = SendWithoutBlockDirect v22, :foo (0x1038), v11, v13 + v23:BasicObject = SendDirect v22, 0x1038, :foo (0x1048), v11, v13 CheckInterrupts Return v23 "); @@ -1113,14 +1113,14 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, target@0x1008, cme:0x1010) v44:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v45:BasicObject = SendWithoutBlockDirect v44, :target (0x1038) + v45:BasicObject = SendDirect v44, 0x1038, :target (0x1048) v14:Fixnum[10] = Const Value(10) v16:Fixnum[20] = Const Value(20) v18:Fixnum[30] = Const Value(30) PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, target@0x1008, cme:0x1010) v48:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v49:BasicObject = SendWithoutBlockDirect v48, :target (0x1038), v14, v16, v18 + v49:BasicObject = SendDirect v48, 0x1038, :target (0x1048), v14, v16, v18 v24:Fixnum[10] = Const Value(10) v26:Fixnum[20] = Const Value(20) v28:Fixnum[30] = Const Value(30) @@ -2865,20 +2865,21 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(C@0x1000) PatchPoint MethodRedefined(C@0x1000, foo@0x1008, cme:0x1010) v21:HeapObject[class_exact:C] = GuardType v9, HeapObject[class_exact:C] - v22:BasicObject = SendWithoutBlockDirect v21, :foo (0x1038) + v22:BasicObject = SendDirect v21, 0x1038, :foo (0x1048) CheckInterrupts Return v22 "); } #[test] - fn dont_specialize_call_to_iseq_with_block() { - eval(" - def foo(&block) = 1 - def test = foo {|| } + fn test_send_direct_iseq_with_block() { + let result = eval(" + def foo(a, b, &block) = block.call(a, b) + def test = foo(1, 2) { |a, b| a + b } test test "); + assert_eq!(VALUE::fixnum_from_usize(3), result); assert_snapshot!(hir_string("test"), @r" fn test@:3: bb0(): @@ -2889,9 +2890,14 @@ mod hir_opt_tests { EntryPoint JIT(0) Jump bb2(v4) bb2(v6:BasicObject): - v11:BasicObject = Send v6, 0x1000, :foo # SendFallbackReason: Send: unsupported method type Iseq + v11:Fixnum[1] = Const Value(1) + v13:Fixnum[2] = Const Value(2) + PatchPoint NoSingletonClass(Object@0x1000) + PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) + v22:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] + v23:BasicObject = SendDirect v22, 0x1038, :foo (0x1048), v11, v13 CheckInterrupts - Return v11 + Return v23 "); } @@ -2921,7 +2927,10 @@ mod hir_opt_tests { bb2(v8:BasicObject, v9:NilClass): v13:Fixnum[1] = Const Value(1) SetLocal :a, l0, EP@3, v13 - v19:BasicObject = Send v8, 0x1000, :foo # SendFallbackReason: Send: unsupported method type Iseq + PatchPoint NoSingletonClass(Object@0x1000) + PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) + v31:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v8, HeapObject[class_exact*:Object@VALUE(0x1000)] + IncrCounter inline_iseq_optimized_send_count v20:BasicObject = GetLocal :a, l0, EP@3 v24:BasicObject = GetLocal :a, l0, EP@3 CheckInterrupts @@ -3003,7 +3012,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v22:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v23:BasicObject = SendWithoutBlockDirect v22, :foo (0x1038), v11, v13 + v23:BasicObject = SendDirect v22, 0x1038, :foo (0x1048), v11, v13 CheckInterrupts Return v23 "); @@ -3033,7 +3042,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v24:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v26:BasicObject = SendWithoutBlockDirect v24, :foo (0x1038), v13, v15, v11 + v26:BasicObject = SendDirect v24, 0x1038, :foo (0x1048), v13, v15, v11 CheckInterrupts Return v26 "); @@ -3063,7 +3072,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v24:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v26:BasicObject = SendWithoutBlockDirect v24, :foo (0x1038), v11, v15, v13 + v26:BasicObject = SendDirect v24, 0x1038, :foo (0x1048), v11, v15, v13 CheckInterrupts Return v26 "); @@ -3092,7 +3101,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v22:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v23:BasicObject = SendWithoutBlockDirect v22, :foo (0x1038), v11, v13 + v23:BasicObject = SendDirect v22, 0x1038, :foo (0x1048), v11, v13 CheckInterrupts Return v23 "); @@ -3122,7 +3131,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v37:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v38:BasicObject = SendWithoutBlockDirect v37, :foo (0x1038), v11, v13, v15 + v38:BasicObject = SendDirect v37, 0x1038, :foo (0x1048), v11, v13, v15 v20:Fixnum[1] = Const Value(1) v22:Fixnum[2] = Const Value(2) v24:Fixnum[4] = Const Value(4) @@ -3130,7 +3139,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v41:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v43:BasicObject = SendWithoutBlockDirect v41, :foo (0x1038), v20, v22, v26, v24 + v43:BasicObject = SendDirect v41, 0x1038, :foo (0x1048), v20, v22, v26, v24 v30:ArrayExact = NewArray v38, v43 CheckInterrupts Return v30 @@ -3161,7 +3170,7 @@ mod hir_opt_tests { PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v35:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] v36:Fixnum[4] = Const Value(4) - v38:BasicObject = SendWithoutBlockDirect v35, :foo (0x1038), v11, v13, v36 + v38:BasicObject = SendDirect v35, 0x1038, :foo (0x1048), v11, v13, v36 v18:Fixnum[1] = Const Value(1) v20:Fixnum[2] = Const Value(2) v22:Fixnum[40] = Const Value(40) @@ -3169,7 +3178,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v41:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v43:BasicObject = SendWithoutBlockDirect v41, :foo (0x1038), v18, v20, v24, v22 + v43:BasicObject = SendDirect v41, 0x1038, :foo (0x1048), v18, v20, v24, v22 v28:ArrayExact = NewArray v38, v43 CheckInterrupts Return v28 @@ -3198,7 +3207,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, target@0x1008, cme:0x1010) v48:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v49:BasicObject = SendWithoutBlockDirect v48, :target (0x1038), v11 + v49:BasicObject = SendDirect v48, 0x1038, :target (0x1048), v11 v16:Fixnum[10] = Const Value(10) v18:Fixnum[20] = Const Value(20) v20:Fixnum[30] = Const Value(30) @@ -3206,7 +3215,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, target@0x1008, cme:0x1010) v52:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v53:BasicObject = SendWithoutBlockDirect v52, :target (0x1038), v16, v18, v20, v22 + v53:BasicObject = SendDirect v52, 0x1038, :target (0x1048), v16, v18, v20, v22 v27:Fixnum[10] = Const Value(10) v29:Fixnum[20] = Const Value(20) v31:Fixnum[30] = Const Value(30) @@ -3242,7 +3251,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v20:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v21:BasicObject = SendWithoutBlockDirect v20, :foo (0x1038), v11 + v21:BasicObject = SendDirect v20, 0x1038, :foo (0x1048), v11 CheckInterrupts Return v21 "); @@ -3296,7 +3305,7 @@ mod hir_opt_tests { PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v18:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] v19:Fixnum[1] = Const Value(1) - v21:BasicObject = SendWithoutBlockDirect v18, :foo (0x1038), v19 + v21:BasicObject = SendDirect v18, 0x1038, :foo (0x1048), v19 CheckInterrupts Return v21 "); @@ -3374,6 +3383,7 @@ mod hir_opt_tests { v11:StringExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) v12:StringExact = StringCopy v11 v14:Fixnum[1] = Const Value(1) + IncrCounter complex_arg_pass_caller_kwarg v16:BasicObject = SendWithoutBlock v6, :sprintf, v12, v14 # SendFallbackReason: Complex argument passing CheckInterrupts Return v16 @@ -3578,7 +3588,7 @@ mod hir_opt_tests { v49:HeapObject[class_exact:C] = ObjectAllocClass C:VALUE(0x1008) PatchPoint NoSingletonClass(C@0x1008) PatchPoint MethodRedefined(C@0x1008, initialize@0x1038, cme:0x1040) - v52:BasicObject = SendWithoutBlockDirect v49, :initialize (0x1068), v16 + v52:BasicObject = SendDirect v49, 0x1068, :initialize (0x1078), v16 CheckInterrupts CheckInterrupts Return v49 @@ -3876,11 +3886,15 @@ mod hir_opt_tests { EntryPoint JIT(0) Jump bb2(v5, v6) bb2(v8:BasicObject, v9:BasicObject): - GuardBlockParamProxy l0 - v15:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1000)) - v17:BasicObject = Send v8, 0x1008, :tap, v15 # SendFallbackReason: Uncategorized(send) + v14:CPtr = GetEP 0 + v15:CInt64 = LoadField v14, :_env_data_index_flags@0x1000 + v16:CInt64 = GuardNoBitsSet v15, CUInt64(512) + v17:CInt64 = LoadField v14, :_env_data_index_specval@0x1001 + v18:CInt64 = GuardAnyBitSet v17, CUInt64(1) + v19:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1008)) + v21:BasicObject = Send v8, 0x1010, :tap, v19 # SendFallbackReason: Uncategorized(send) CheckInterrupts - Return v17 + Return v21 "); } @@ -5312,7 +5326,7 @@ mod hir_opt_tests { v13:Fixnum[10] = Const Value(10) PatchPoint NoSingletonClass(Array@0x1008) PatchPoint MethodRedefined(Array@0x1008, []@0x1010, cme:0x1018) - v23:BasicObject = SendWithoutBlockDirect v11, :[] (0x1040), v13 + v23:BasicObject = SendDirect v11, 0x1040, :[] (0x1050), v13 CheckInterrupts Return v23 "); @@ -5370,7 +5384,7 @@ mod hir_opt_tests { v11:ArrayExact = ArrayDup v10 PatchPoint NoSingletonClass(Array@0x1008) PatchPoint MethodRedefined(Array@0x1008, max@0x1010, cme:0x1018) - v20:BasicObject = SendWithoutBlockDirect v11, :max (0x1040) + v20:BasicObject = SendDirect v11, 0x1040, :max (0x1050) CheckInterrupts Return v20 "); @@ -6393,9 +6407,29 @@ mod hir_opt_tests { EntryPoint JIT(0) Jump bb2(v5, v6) bb2(v8:BasicObject, v9:BasicObject): - v14:BasicObject = SendWithoutBlock v9, :foo # SendFallbackReason: Uncategorized(opt_send_without_block) + v14:CBool = HasType v9, HeapObject[class_exact:C] + IfTrue v14, bb4(v8, v9, v9) + v23:CBool = HasType v9, HeapObject[class_exact:C] + IfTrue v23, bb5(v8, v9, v9) + v32:BasicObject = SendWithoutBlock v9, :foo # SendFallbackReason: SendWithoutBlock: polymorphic fallback + Jump bb3(v8, v9, v32) + bb4(v15:BasicObject, v16:BasicObject, v17:BasicObject): + v19:HeapObject[class_exact:C] = RefineType v17, HeapObject[class_exact:C] + PatchPoint NoSingletonClass(C@0x1000) + PatchPoint MethodRedefined(C@0x1000, foo@0x1008, cme:0x1010) + IncrCounter getivar_fallback_not_monomorphic + v44:BasicObject = GetIvar v19, :@foo + Jump bb3(v15, v16, v44) + bb5(v24:BasicObject, v25:BasicObject, v26:BasicObject): + v28:HeapObject[class_exact:C] = RefineType v26, HeapObject[class_exact:C] + PatchPoint NoSingletonClass(C@0x1000) + PatchPoint MethodRedefined(C@0x1000, foo@0x1008, cme:0x1010) + IncrCounter getivar_fallback_not_monomorphic + v47:BasicObject = GetIvar v28, :@foo + Jump bb3(v24, v25, v47) + bb3(v34:BasicObject, v35:BasicObject, v36:BasicObject): CheckInterrupts - Return v14 + Return v36 "); } @@ -6508,6 +6542,37 @@ mod hir_opt_tests { #[test] fn test_do_not_optimize_send_with_block_forwarding() { + eval(r#" + def test(&block) = [].map(&block) + test { |x| x }; test { |x| x } + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :block, l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v13:ArrayExact = NewArray + v15:CPtr = GetEP 0 + v16:CInt64 = LoadField v15, :_env_data_index_flags@0x1000 + v17:CInt64 = GuardNoBitsSet v16, CUInt64(512) + v18:CInt64 = LoadField v15, :_env_data_index_specval@0x1001 + v19:CInt64 = GuardAnyBitSet v18, CUInt64(1) + v20:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1008)) + IncrCounter complex_arg_pass_caller_blockarg + v22:BasicObject = Send v13, 0x1010, :map, v20 # SendFallbackReason: Complex argument passing + CheckInterrupts + Return v22 + "); + } + + #[test] + fn test_replace_block_param_proxy_with_nil() { eval(r#" def test(&block) = [].map(&block) test; test @@ -6524,25 +6589,64 @@ mod hir_opt_tests { Jump bb2(v5, v6) bb2(v8:BasicObject, v9:BasicObject): v13:ArrayExact = NewArray - GuardBlockParamProxy l0 - v16:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1000)) + v15:CPtr = GetEP 0 + v16:CInt64 = LoadField v15, :_env_data_index_flags@0x1000 + v17:CInt64 = GuardNoBitsSet v16, CUInt64(512) + v18:CInt64 = LoadField v15, :_env_data_index_specval@0x1001 + v19:CInt64[0] = GuardBitEquals v18, CInt64(0) + v20:NilClass = Const Value(nil) IncrCounter complex_arg_pass_caller_blockarg - v18:BasicObject = Send v13, 0x1008, :map, v16 # SendFallbackReason: Complex argument passing + v22:BasicObject = Send v13, 0x1008, :map, v20 # SendFallbackReason: Complex argument passing CheckInterrupts - Return v18 + Return v22 "); } #[test] - fn test_do_not_optimize_send_to_iseq_method_with_block() { + fn test_replace_block_param_proxy_with_nil_nested() { eval(r#" + def test(&block) + proc do + [].map(&block) + end + end + test; test + "#); + assert_snapshot!(hir_string_proc("test"), @r" + fn block in test@:4: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v10:ArrayExact = NewArray + v12:CPtr = GetEP 1 + v13:CInt64 = LoadField v12, :_env_data_index_flags@0x1000 + v14:CInt64 = GuardNoBitsSet v13, CUInt64(512) + v15:CInt64 = LoadField v12, :_env_data_index_specval@0x1001 + v16:CInt64 = GuardAnyBitSet v15, CUInt64(1) + v17:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1008)) + IncrCounter complex_arg_pass_caller_blockarg + v19:BasicObject = Send v10, 0x1010, :map, v17 # SendFallbackReason: Complex argument passing + CheckInterrupts + Return v19 + "); + } + + #[test] + fn test_send_direct_iseq_with_block_no_callee_block_param() { + let result = eval(r#" def foo yield 1 end - def test = foo {} + def test = foo { |x| x * 2 } test; test "#); + assert_eq!(VALUE::fixnum_from_usize(2), result); assert_snapshot!(hir_string("test"), @r" fn test@:6: bb0(): @@ -6553,9 +6657,12 @@ mod hir_opt_tests { EntryPoint JIT(0) Jump bb2(v4) bb2(v6:BasicObject): - v11:BasicObject = Send v6, 0x1000, :foo # SendFallbackReason: Send: unsupported method type Iseq + PatchPoint NoSingletonClass(Object@0x1000) + PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) + v18:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] + v19:BasicObject = SendDirect v18, 0x1038, :foo (0x1048) CheckInterrupts - Return v11 + Return v19 "); } @@ -9438,6 +9545,123 @@ mod hir_opt_tests { "); } + #[test] + fn test_inline_send_with_block_with_no_params() { + eval(r#" + def callee = 123 + def test + callee do + end + end + test + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:4: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + PatchPoint NoSingletonClass(Object@0x1000) + PatchPoint MethodRedefined(Object@0x1000, callee@0x1008, cme:0x1010) + v18:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] + IncrCounter inline_iseq_optimized_send_count + v21:Fixnum[123] = Const Value(123) + CheckInterrupts + Return v21 + "); + } + + #[test] + fn test_inline_send_with_block_with_one_param() { + eval(r#" + def callee = 123 + def test + callee do |_| + end + end + test + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:4: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + PatchPoint NoSingletonClass(Object@0x1000) + PatchPoint MethodRedefined(Object@0x1000, callee@0x1008, cme:0x1010) + v18:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] + IncrCounter inline_iseq_optimized_send_count + v21:Fixnum[123] = Const Value(123) + CheckInterrupts + Return v21 + "); + } + + #[test] + fn test_inline_send_with_block_with_multiple_params() { + eval(r#" + def callee = 123 + def test + callee do |_a, _b| + end + end + test + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:4: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + PatchPoint NoSingletonClass(Object@0x1000) + PatchPoint MethodRedefined(Object@0x1000, callee@0x1008, cme:0x1010) + v18:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] + IncrCounter inline_iseq_optimized_send_count + v21:Fixnum[123] = Const Value(123) + CheckInterrupts + Return v21 + "); + } + + #[test] + fn test_no_inline_send_with_symbol_block() { + eval(r#" + def callee = 123 + public def the_block = 456 + def test + callee(&:the_block) + end + puts test + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:5: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v11:StaticSymbol[:the_block] = Const Value(VALUE(0x1000)) + v13:BasicObject = Send v6, 0x1008, :callee, v11 # SendFallbackReason: Uncategorized(send) + CheckInterrupts + Return v13 + "); + } + #[test] fn test_optimize_stringexact_eq_stringexact() { eval(r#" @@ -11029,8 +11253,8 @@ mod hir_opt_tests { // A Ruby method as the target of `super` should optimize provided no block is given. let hir = hir_string_proc("B.new.method(:foo)"); - assert!(!hir.contains("InvokeSuper "), "InvokeSuper should optimize to SendWithoutBlockDirect but got:\n{hir}"); - assert!(hir.contains("SendWithoutBlockDirect"), "Should optimize to SendWithoutBlockDirect for call without args or block:\n{hir}"); + assert!(!hir.contains("InvokeSuper "), "InvokeSuper should optimize to SendDirect but got:\n{hir}"); + assert!(hir.contains("SendDirect"), "Should optimize to SendDirect for call without args or block:\n{hir}"); assert_snapshot!(hir, @r" fn foo@:10: @@ -11047,7 +11271,7 @@ mod hir_opt_tests { GuardSuperMethodEntry v17, 0x1038 v19:RubyValue = GetBlockHandler v17 v20:FalseClass = GuardBitEquals v19, Value(false) - v21:BasicObject = SendWithoutBlockDirect v6, :foo (0x1040) + v21:BasicObject = SendDirect v6, 0x1040, :foo (0x1050) CheckInterrupts Return v21 "); @@ -11072,8 +11296,8 @@ mod hir_opt_tests { "); let hir = hir_string_proc("B.new.method(:foo)"); - assert!(!hir.contains("InvokeSuper "), "InvokeSuper should optimize to SendWithoutBlockDirect but got:\n{hir}"); - assert!(hir.contains("SendWithoutBlockDirect"), "Should optimize to SendWithoutBlockDirect for call without args or block:\n{hir}"); + assert!(!hir.contains("InvokeSuper "), "InvokeSuper should optimize to SendDirect but got:\n{hir}"); + assert!(hir.contains("SendDirect"), "Should optimize to SendDirect for call without args or block:\n{hir}"); assert_snapshot!(hir, @r" fn foo@:10: @@ -11091,9 +11315,9 @@ mod hir_opt_tests { GuardSuperMethodEntry v26, 0x1038 v28:RubyValue = GetBlockHandler v26 v29:FalseClass = GuardBitEquals v28, Value(false) - v30:BasicObject = SendWithoutBlockDirect v8, :foo (0x1040), v9 + v30:BasicObject = SendDirect v8, 0x1040, :foo (0x1050), v9 v17:Fixnum[1] = Const Value(1) - PatchPoint MethodRedefined(Integer@0x1048, +@0x1050, cme:0x1058) + PatchPoint MethodRedefined(Integer@0x1058, +@0x1060, cme:0x1068) v33:Fixnum = GuardType v30, Fixnum v34:Fixnum = FixnumAdd v33, v17 IncrCounter inline_cfunc_optimized_send_count @@ -11122,7 +11346,7 @@ mod hir_opt_tests { let hir = hir_string_proc("B.new.method(:foo)"); assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}"); - assert!(!hir.contains("SendWithoutBlockDirect"), "Should not optimize to SendWithoutBlockDirect for explicit blockarg:\n{hir}"); + assert!(!hir.contains("SendDirect"), "Should not optimize to SendDirect for explicit blockarg:\n{hir}"); assert_snapshot!(hir, @r" fn foo@:10: @@ -11162,9 +11386,9 @@ mod hir_opt_tests { let hir = hir_string_proc("B.new.method(:foo)"); assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}"); - assert!(!hir.contains("SendWithoutBlockDirect"), "Should not optimize to SendWithoutBlockDirect for block literal:\n{hir}"); + assert!(!hir.contains("SendDirect"), "Should not optimize to SendDirect for block literal:\n{hir}"); - // With a block, we don't optimize to SendWithoutBlockDirect + // With a block, we don't optimize to SendDirect assert_snapshot!(hir, @r" fn foo@:10: bb0(): @@ -11182,7 +11406,7 @@ mod hir_opt_tests { } #[test] - fn test_invokesuper_to_cfunc_remains_invokesuper() { + fn test_invokesuper_to_cfunc_optimizes_to_ccall() { eval(" class MyArray < Array def length @@ -11194,10 +11418,10 @@ mod hir_opt_tests { "); let hir = hir_string_proc("MyArray.new.method(:length)"); - assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}"); - assert!(!hir.contains("SendWithoutBlockDirect"), "Should not optimize to SendWithoutBlockDirect for CFUNC:\n{hir}"); + assert!(!hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}"); + assert!(hir.contains("CCallWithFrame"), "Should optimize to CCallWithFrame for non-variadic cfunc:\n{hir}"); - assert_snapshot!(hir, @r" + assert_snapshot!(hir, @" fn length@:4: bb0(): EntryPoint interpreter @@ -11207,9 +11431,64 @@ mod hir_opt_tests { EntryPoint JIT(0) Jump bb2(v4) bb2(v6:BasicObject): - v11:BasicObject = InvokeSuper v6, 0x1000 # SendFallbackReason: super: unsupported target method type Cfunc + PatchPoint MethodRedefined(Array@0x1000, length@0x1008, cme:0x1010) + v17:CPtr = GetLEP + GuardSuperMethodEntry v17, 0x1038 + v19:RubyValue = GetBlockHandler v17 + v20:FalseClass = GuardBitEquals v19, Value(false) + v21:BasicObject = CCallWithFrame v6, :Array#length@0x1040 CheckInterrupts - Return v11 + Return v21 + "); + } + + #[test] + fn test_invokesuper_to_variadic_cfunc_optimizes_to_ccall() { + eval(" + class MyString < String + def byteindex(needle, offset = 0) + super(needle, offset) + end + end + + MyString.new('hello world').byteindex('world', 0); MyString.new('hello world').byteindex('world', 0) + "); + + let hir = hir_string_proc("MyString.new('hello world').method(:byteindex)"); + assert!(!hir.contains("InvokeSuper "), "InvokeSuper should optimize to CCallVariadic but got:\n{hir}"); + assert!(hir.contains("CCallVariadic"), "Should optimize to CCallVariadic for variadic cfunc:\n{hir}"); + + assert_snapshot!(hir, @" + fn byteindex@:3: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :needle, l0, SP@5 + v3:BasicObject = GetLocal :offset, l0, SP@4 + v4:CPtr = LoadPC + v5:CPtr[CPtr(0x1000)] = Const CPtr(0x1008) + v6:CBool = IsBitEqual v4, v5 + IfTrue v6, bb2(v1, v2, v3) + Jump bb4(v1, v2, v3) + bb1(v10:BasicObject, v11:BasicObject): + EntryPoint JIT(0) + v12:NilClass = Const Value(nil) + Jump bb2(v10, v11, v12) + bb2(v19:BasicObject, v20:BasicObject, v21:BasicObject): + v24:Fixnum[0] = Const Value(0) + Jump bb4(v19, v20, v24) + bb3(v15:BasicObject, v16:BasicObject, v17:BasicObject): + EntryPoint JIT(1) + Jump bb4(v15, v16, v17) + bb4(v27:BasicObject, v28:BasicObject, v29:BasicObject): + PatchPoint MethodRedefined(String@0x1010, byteindex@0x1018, cme:0x1020) + v42:CPtr = GetLEP + GuardSuperMethodEntry v42, 0x1008 + v44:RubyValue = GetBlockHandler v42 + v45:FalseClass = GuardBitEquals v44, Value(false) + v46:BasicObject = CCallVariadic v27, :String#byteindex@0x1048, v28, v29 + CheckInterrupts + Return v46 "); } @@ -11234,7 +11513,7 @@ mod hir_opt_tests { let hir = hir_string_proc("B.new.method(:foo)"); assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}"); - assert!(!hir.contains("SendWithoutBlockDirect"), "Should not optimize to SendWithoutBlockDirect for explicit blockarg:\n{hir}"); + assert!(!hir.contains("SendDirect"), "Should not optimize to SendDirect for explicit blockarg:\n{hir}"); assert_snapshot!(hir, @r" fn foo@:10: @@ -11282,7 +11561,7 @@ mod hir_opt_tests { let hir = hir_string_proc("B.new.method(:foo)"); assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}"); - assert!(!hir.contains("SendWithoutBlockDirect"), "Should not optimize to SendWithoutBlockDirect for symbol-to-proc:\n{hir}"); + assert!(!hir.contains("SendDirect"), "Should not optimize to SendDirect for symbol-to-proc:\n{hir}"); assert_snapshot!(hir, @r" fn foo@:10: @@ -11400,4 +11679,108 @@ mod hir_opt_tests { Return v47 "); } + + #[test] + fn specialize_polymorphic_send_iseq() { + set_call_threshold(4); + eval(" + class C + def foo = 3 + end + + class D + def foo = 4 + end + + def test o + o.foo + 2 + end + + test C.new; test D.new; test C.new; test D.new + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:11: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :o, l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v14:CBool = HasType v9, HeapObject[class_exact:C] + IfTrue v14, bb4(v8, v9, v9) + v23:CBool = HasType v9, HeapObject[class_exact:D] + IfTrue v23, bb5(v8, v9, v9) + v32:BasicObject = SendWithoutBlock v9, :foo # SendFallbackReason: SendWithoutBlock: polymorphic fallback + Jump bb3(v8, v9, v32) + bb4(v15:BasicObject, v16:BasicObject, v17:BasicObject): + PatchPoint NoSingletonClass(C@0x1000) + PatchPoint MethodRedefined(C@0x1000, foo@0x1008, cme:0x1010) + IncrCounter inline_iseq_optimized_send_count + v54:Fixnum[3] = Const Value(3) + Jump bb3(v15, v16, v54) + bb5(v24:BasicObject, v25:BasicObject, v26:BasicObject): + PatchPoint NoSingletonClass(D@0x1038) + PatchPoint MethodRedefined(D@0x1038, foo@0x1008, cme:0x1040) + IncrCounter inline_iseq_optimized_send_count + v56:Fixnum[4] = Const Value(4) + Jump bb3(v24, v25, v56) + bb3(v34:BasicObject, v35:BasicObject, v36:BasicObject): + v39:Fixnum[2] = Const Value(2) + PatchPoint MethodRedefined(Integer@0x1068, +@0x1070, cme:0x1078) + v59:Fixnum = GuardType v36, Fixnum + v60:Fixnum = FixnumAdd v59, v39 + IncrCounter inline_cfunc_optimized_send_count + CheckInterrupts + Return v60 + "); + } + + #[test] + fn specialize_polymorphic_send_with_immediate() { + set_call_threshold(4); + eval(" + class C; end + + def test o + o.itself + end + + test C.new; test 3; test C.new; test 4 + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:5: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :o, l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v14:CBool = HasType v9, HeapObject[class_exact:C] + IfTrue v14, bb4(v8, v9, v9) + v23:CBool = HasType v9, Fixnum + IfTrue v23, bb5(v8, v9, v9) + v32:BasicObject = SendWithoutBlock v9, :itself # SendFallbackReason: SendWithoutBlock: polymorphic fallback + Jump bb3(v8, v9, v32) + bb4(v15:BasicObject, v16:BasicObject, v17:BasicObject): + v19:HeapObject[class_exact:C] = RefineType v17, HeapObject[class_exact:C] + PatchPoint NoSingletonClass(C@0x1000) + PatchPoint MethodRedefined(C@0x1000, itself@0x1008, cme:0x1010) + IncrCounter inline_cfunc_optimized_send_count + Jump bb3(v15, v16, v19) + bb5(v24:BasicObject, v25:BasicObject, v26:BasicObject): + v28:Fixnum = RefineType v26, Fixnum + PatchPoint MethodRedefined(Integer@0x1038, itself@0x1008, cme:0x1010) + IncrCounter inline_cfunc_optimized_send_count + Jump bb3(v24, v25, v28) + bb3(v34:BasicObject, v35:BasicObject, v36:BasicObject): + CheckInterrupts + Return v36 + "); + } } diff --git a/zjit/src/hir/tests.rs b/zjit/src/hir/tests.rs index 56f1928f1fa753..5b97a61d80dd0d 100644 --- a/zjit/src/hir/tests.rs +++ b/zjit/src/hir/tests.rs @@ -80,8 +80,8 @@ mod snapshot_tests { PatchPoint MethodRedefined(Object@0x1010, foo@0x1018, cme:0x1020) v24:HeapObject[class_exact*:Object@VALUE(0x1010)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1010)] v25:Any = Snapshot FrameState { pc: 0x1008, stack: [v6, v13, v15, v11], locals: [] } - v26:BasicObject = SendWithoutBlockDirect v24, :foo (0x1048), v13, v15, v11 - v18:Any = Snapshot FrameState { pc: 0x1050, stack: [v26], locals: [] } + v26:BasicObject = SendDirect v24, 0x1048, :foo (0x1058), v13, v15, v11 + v18:Any = Snapshot FrameState { pc: 0x1060, stack: [v26], locals: [] } PatchPoint NoTracePoint CheckInterrupts Return v26 @@ -114,8 +114,8 @@ mod snapshot_tests { PatchPoint NoSingletonClass(Object@0x1010) PatchPoint MethodRedefined(Object@0x1010, foo@0x1018, cme:0x1020) v22:HeapObject[class_exact*:Object@VALUE(0x1010)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1010)] - v23:BasicObject = SendWithoutBlockDirect v22, :foo (0x1048), v11, v13 - v16:Any = Snapshot FrameState { pc: 0x1050, stack: [v23], locals: [] } + v23:BasicObject = SendDirect v22, 0x1048, :foo (0x1058), v11, v13 + v16:Any = Snapshot FrameState { pc: 0x1060, stack: [v23], locals: [] } PatchPoint NoTracePoint CheckInterrupts Return v23 @@ -1843,7 +1843,7 @@ pub mod hir_build_tests { } #[test] - fn test_cant_compile_super_forward() { + fn test_compile_super_forward() { eval(" def test(...) = super(...) "); @@ -1858,7 +1858,79 @@ pub mod hir_build_tests { EntryPoint JIT(0) Jump bb2(v5, v6) bb2(v8:BasicObject, v9:BasicObject): - SideExit UnhandledYARVInsn(invokesuperforward) + v15:BasicObject = InvokeSuperForward v8, 0x1000, v9 # SendFallbackReason: Uncategorized(invokesuperforward) + CheckInterrupts + Return v15 + "); + } + + #[test] + fn test_compile_super_forward_with_block() { + eval(" + def test(...) = super { |x| x } + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :..., l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v15:BasicObject = InvokeSuperForward v8, 0x1000, v9 # SendFallbackReason: Uncategorized(invokesuperforward) + v16:BasicObject = GetLocal :..., l0, EP@3 + CheckInterrupts + Return v15 + "); + } + + #[test] + fn test_compile_super_forward_with_use() { + eval(" + def test(...) = super(...) + 1 + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :..., l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v15:BasicObject = InvokeSuperForward v8, 0x1000, v9 # SendFallbackReason: Uncategorized(invokesuperforward) + v17:Fixnum[1] = Const Value(1) + v20:BasicObject = SendWithoutBlock v15, :+, v17 # SendFallbackReason: Uncategorized(opt_plus) + CheckInterrupts + Return v20 + "); + } + + #[test] + fn test_compile_super_forward_with_arg() { + eval(" + def test(...) = super(1, ...) + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :..., l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v14:Fixnum[1] = Const Value(1) + v17:BasicObject = InvokeSuperForward v8, 0x1000, v14, v9 # SendFallbackReason: Uncategorized(invokesuperforward) + CheckInterrupts + Return v17 "); } @@ -1984,8 +2056,12 @@ pub mod hir_build_tests { bb2(v16:BasicObject, v17:BasicObject, v18:ArrayExact, v19:BasicObject, v20:BasicObject, v21:NilClass): v28:ArrayExact = ToArray v18 PatchPoint NoEPEscape(test) - GuardBlockParamProxy l0 - v34:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1000)) + v33:CPtr = GetEP 0 + v34:CInt64 = LoadField v33, :_env_data_index_flags@0x1000 + v35:CInt64 = GuardNoBitsSet v34, CUInt64(512) + v36:CInt64 = LoadField v33, :_env_data_index_specval@0x1001 + v37:CInt64 = GuardAnyBitSet v36, CUInt64(1) + v38:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1008)) SideExit UnhandledYARVInsn(splatkw) "); } @@ -3356,20 +3432,24 @@ pub mod hir_build_tests { bb2(v16:BasicObject, v17:BasicObject, v18:BasicObject, v19:BasicObject, v20:BasicObject, v21:NilClass): v25:BasicObject = InvokeBuiltin dir_s_open, v16, v17, v18 PatchPoint NoEPEscape(open) - GuardBlockParamProxy l0 - v32:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1000)) + v31:CPtr = GetEP 0 + v32:CInt64 = LoadField v31, :_env_data_index_flags@0x1000 + v33:CInt64 = GuardNoBitsSet v32, CUInt64(512) + v34:CInt64 = LoadField v31, :_env_data_index_specval@0x1001 + v35:CInt64 = GuardAnyBitSet v34, CUInt64(1) + v36:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1008)) + CheckInterrupts + v39:CBool[true] = Test v36 + v40 = RefineType v36, Falsy + IfFalse v39, bb3(v16, v17, v18, v19, v20, v25) + v42:HeapObject[BlockParamProxy] = RefineType v36, Truthy + v46:BasicObject = InvokeBlock, v25 # SendFallbackReason: Uncategorized(invokeblock) + v49:BasicObject = InvokeBuiltin dir_s_close, v16, v25 CheckInterrupts - v35:CBool[true] = Test v32 - v36 = RefineType v32, Falsy - IfFalse v35, bb3(v16, v17, v18, v19, v20, v25) - v38:HeapObject[BlockParamProxy] = RefineType v32, Truthy - v42:BasicObject = InvokeBlock, v25 # SendFallbackReason: Uncategorized(invokeblock) - v45:BasicObject = InvokeBuiltin dir_s_close, v16, v25 - CheckInterrupts - Return v42 - bb3(v51, v52, v53, v54, v55, v56): + Return v46 + bb3(v55, v56, v57, v58, v59, v60): CheckInterrupts - Return v56 + Return v60 "); } diff --git a/zjit/src/profile.rs b/zjit/src/profile.rs index c1feb759529e15..ad6da06c71e91b 100644 --- a/zjit/src/profile.rs +++ b/zjit/src/profile.rs @@ -91,6 +91,7 @@ fn profile_insn(bare_opcode: ruby_vminsn_type, ec: EcPtr) { YARVINSN_opt_size => profile_operands(profiler, profile, 1), YARVINSN_opt_succ => profile_operands(profiler, profile, 1), YARVINSN_invokeblock => profile_block_handler(profiler, profile), + YARVINSN_getblockparamproxy => profile_getblockparamproxy(profiler, profile), YARVINSN_invokesuper => profile_invokesuper(profiler, profile), YARVINSN_opt_send_without_block | YARVINSN_send => { let cd: *const rb_call_data = profiler.insn_opnd(0).as_ptr(); @@ -155,6 +156,22 @@ fn profile_block_handler(profiler: &mut Profiler, profile: &mut IseqProfile) { types[0].observe(ty); } +fn profile_getblockparamproxy(profiler: &mut Profiler, profile: &mut IseqProfile) { + let types = &mut profile.opnd_types[profiler.insn_idx]; + if types.is_empty() { + types.resize(1, TypeDistribution::new()); + } + + let level = profiler.insn_opnd(1).as_u32(); + let ep = unsafe { get_cfp_ep_level(profiler.cfp, level) }; + let block_handler = unsafe { *ep.offset(VM_ENV_DATA_INDEX_SPECVAL as isize) }; + let untagged = unsafe { rb_vm_untag_block_handler(block_handler) }; + + let ty = ProfiledType::object(untagged); + VALUE::from(profiler.iseq).write_barrier(ty.class()); + types[0].observe(ty); +} + fn profile_invokesuper(profiler: &mut Profiler, profile: &mut IseqProfile) { let cme = unsafe { rb_vm_frame_method_entry(profiler.cfp) }; let cme_value = VALUE(cme as usize); // CME is a T_IMEMO, which is a VALUE diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index 96d75b7aec84b7..6fc754007f6a82 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -210,6 +210,7 @@ make_counters! { exit_stackoverflow, exit_block_param_proxy_modified, exit_block_param_proxy_not_iseq_or_ifunc, + exit_block_param_proxy_not_nil, exit_block_param_wb_required, exit_too_many_keyword_parameters, } @@ -228,6 +229,7 @@ make_counters! { send_fallback_too_many_args_for_lir, send_fallback_send_without_block_bop_redefined, send_fallback_send_without_block_operands_not_fixnum, + send_fallback_send_without_block_polymorphic_fallback, send_fallback_send_without_block_direct_keyword_mismatch, send_fallback_send_without_block_direct_keyword_count_mismatch, send_fallback_send_without_block_direct_missing_keyword, @@ -422,6 +424,15 @@ make_counters! { invokeblock_handler_polymorphic, invokeblock_handler_megamorphic, invokeblock_handler_no_profiles, + + getblockparamproxy_handler_iseq, + getblockparamproxy_handler_ifunc, + getblockparamproxy_handler_symbol, + getblockparamproxy_handler_proc, + getblockparamproxy_handler_nil, + getblockparamproxy_handler_polymorphic, + getblockparamproxy_handler_megamorphic, + getblockparamproxy_handler_no_profiles, } /// Increase a counter by a specified amount @@ -558,6 +569,7 @@ pub fn side_exit_counter(reason: crate::hir::SideExitReason) -> Counter { StackOverflow => exit_stackoverflow, BlockParamProxyModified => exit_block_param_proxy_modified, BlockParamProxyNotIseqOrIfunc => exit_block_param_proxy_not_iseq_or_ifunc, + BlockParamProxyNotNil => exit_block_param_proxy_not_nil, BlockParamWbRequired => exit_block_param_wb_required, TooManyKeywordParameters => exit_too_many_keyword_parameters, PatchPoint(Invariant::BOPRedefined { .. }) @@ -599,10 +611,11 @@ pub fn send_fallback_counter(reason: crate::hir::SendFallbackReason) -> Counter TooManyArgsForLir => send_fallback_too_many_args_for_lir, SendWithoutBlockBopRedefined => send_fallback_send_without_block_bop_redefined, SendWithoutBlockOperandsNotFixnum => send_fallback_send_without_block_operands_not_fixnum, - SendWithoutBlockDirectKeywordMismatch => send_fallback_send_without_block_direct_keyword_mismatch, - SendWithoutBlockDirectKeywordCountMismatch=> send_fallback_send_without_block_direct_keyword_count_mismatch, - SendWithoutBlockDirectMissingKeyword => send_fallback_send_without_block_direct_missing_keyword, - SendWithoutBlockDirectTooManyKeywords => send_fallback_send_without_block_direct_too_many_keywords, + SendWithoutBlockPolymorphicFallback => send_fallback_send_without_block_polymorphic_fallback, + SendDirectKeywordMismatch => send_fallback_send_without_block_direct_keyword_mismatch, + SendDirectKeywordCountMismatch => send_fallback_send_without_block_direct_keyword_count_mismatch, + SendDirectMissingKeyword => send_fallback_send_without_block_direct_missing_keyword, + SendDirectTooManyKeywords => send_fallback_send_without_block_direct_too_many_keywords, SendPolymorphic => send_fallback_send_polymorphic, SendMegamorphic => send_fallback_send_megamorphic, SendNoProfiles => send_fallback_send_no_profiles,