diff --git a/.github/actions/setup/directories/action.yml b/.github/actions/setup/directories/action.yml index 00a3a4cf2a560a..7c1e4e9b0ee0e2 100644 --- a/.github/actions/setup/directories/action.yml +++ b/.github/actions/setup/directories/action.yml @@ -96,7 +96,7 @@ runs: - uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 with: path: ${{ inputs.srcdir }}/.downloaded-cache - key: downloaded-cache + key: ${{ runner.os }}-${{ runner.arch }}-downloaded-cache - if: steps.which.outputs.autoreconf shell: bash diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 28adf526c7d0d8..053c37ec5d9f23 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -102,6 +102,7 @@ jobs: with: path: src\vcpkg_installed key: windows-${{ matrix.os }}-vcpkg-${{ hashFiles('src/vcpkg.json') }} + if: ${{ github.ref_name == 'master' || startsWith(github.ref_name, 'ruby_') }} - name: setup env # Available Ruby versions: https://github.com/actions/runner-images/blob/main/images/windows/Windows2019-Readme.md#ruby diff --git a/array.rb b/array.rb index 5f31693cabf2de..03663dbb0b7ff1 100644 --- a/array.rb +++ b/array.rb @@ -212,7 +212,7 @@ def fetch_values(*indexes, &block) indexes end - with_yjit do + with_jit do if Primitive.rb_builtin_basic_definition_p(:each) undef :each diff --git a/bootstraptest/test_ractor.rb b/bootstraptest/test_ractor.rb index 93c6686f0a8f4f..b1e9e3a79d02cb 100644 --- a/bootstraptest/test_ractor.rb +++ b/bootstraptest/test_ractor.rb @@ -2315,3 +2315,33 @@ def Warning.warn(msg) raise unless $msg.all?{/Ractor#take/ =~ it} $msg.size } + +# Cause lots of inline CC misses. +assert_equal 'ok', <<~'RUBY' + class A; def test; 1 + 1; end; end + class B; def test; 1 + 1; end; end + class C; def test; 1 + 1; end; end + class D; def test; 1 + 1; end; end + class E; def test; 1 + 1; end; end + class F; def test; 1 + 1; end; end + class G; def test; 1 + 1; end; end + + objs = [A.new, B.new, C.new, D.new, E.new, F.new, G.new].freeze + + def call_test(obj) + obj.test + end + + ractors = 7.times.map do + Ractor.new(objs) do |objs| + objs = objs.shuffle + 100_000.times do + objs.each do |o| + call_test(o) + end + end + end + end + ractors.each(&:join) + :ok +RUBY diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb index d480369c759119..3e3936942d67bb 100644 --- a/bootstraptest/test_yjit.rb +++ b/bootstraptest/test_yjit.rb @@ -468,91 +468,6 @@ def getter end } -assert_equal '0', %q{ - # This is a regression test for incomplete invalidation from - # opt_setinlinecache. This test might be brittle, so - # feel free to remove it in the future if it's too annoying. - # This test assumes --yjit-call-threshold=2. - module M - Foo = 1 - def foo - Foo - end - - def pin_self_type_then_foo - _ = @foo - foo - end - - def only_ints - 1 + self - foo - end - end - - class Integer - include M - end - - class Sub - include M - end - - foo_method = M.instance_method(:foo) - - dbg = ->(message) do - return # comment this out to get printouts - - $stderr.puts RubyVM::YJIT.disasm(foo_method) - $stderr.puts message - end - - 2.times { 42.only_ints } - - dbg["There should be two versions of getinlineache"] - - module M - remove_const(:Foo) - end - - dbg["There should be no getinlinecaches"] - - 2.times do - 42.only_ints - rescue NameError => err - _ = "caught name error #{err}" - end - - dbg["There should be one version of getinlineache"] - - 2.times do - Sub.new.pin_self_type_then_foo - rescue NameError - _ = 'second specialization' - end - - dbg["There should be two versions of getinlineache"] - - module M - Foo = 1 - end - - dbg["There should still be two versions of getinlineache"] - - 42.only_ints - - dbg["There should be no getinlinecaches"] - - # Find name of the first VM instruction in M#foo. - insns = RubyVM::InstructionSequence.of(foo_method).to_a - if defined?(RubyVM::YJIT.blocks_for) && (insns.last.find { Array === _1 }&.first == :opt_getinlinecache) - RubyVM::YJIT.blocks_for(RubyVM::InstructionSequence.of(foo_method)) - .filter { _1.iseq_start_index == 0 }.count - else - 0 # skip the test - end -} - # Check that frozen objects are respected assert_equal 'great', %q{ class Foo diff --git a/common.mk b/common.mk index 4133f90aa80130..b5a4526ccce8f5 100644 --- a/common.mk +++ b/common.mk @@ -1236,8 +1236,9 @@ BUILTIN_RB_SRCS = \ $(srcdir)/nilclass.rb \ $(srcdir)/prelude.rb \ $(srcdir)/gem_prelude.rb \ + $(srcdir)/jit_hook.rb \ + $(srcdir)/jit_undef.rb \ $(srcdir)/yjit.rb \ - $(srcdir)/yjit_hook.rb \ $(srcdir)/zjit.rb \ $(empty) BUILTIN_RB_INCS = $(BUILTIN_RB_SRCS:.rb=.rbinc) diff --git a/debug_counter.h b/debug_counter.h index 8ffce66f0f7b16..c8d8ed8f110988 100644 --- a/debug_counter.h +++ b/debug_counter.h @@ -49,7 +49,7 @@ RB_DEBUG_COUNTER(cc_temp) // dummy CC (stack-allocated) RB_DEBUG_COUNTER(cc_found_in_ccs) // count for CC lookup success in CCS RB_DEBUG_COUNTER(cc_not_found_in_ccs) // count for CC lookup success in CCS -RB_DEBUG_COUNTER(cc_ent_invalidate) // count for invalidating cc (cc->klass = 0) +RB_DEBUG_COUNTER(cc_ent_invalidate) // count for invalidating cc (cc->klass = Qundef) RB_DEBUG_COUNTER(cc_cme_invalidate) // count for invalidating CME RB_DEBUG_COUNTER(cc_invalidate_leaf) // count for invalidating klass if klass has no-subclasses diff --git a/depend b/depend index df0ae1e610b1c3..ec8c2771c92104 100644 --- a/depend +++ b/depend @@ -9196,6 +9196,8 @@ miniinit.$(OBJEXT): {$(VPATH)}internal/warning_push.h miniinit.$(OBJEXT): {$(VPATH)}internal/xmalloc.h miniinit.$(OBJEXT): {$(VPATH)}io.rb miniinit.$(OBJEXT): {$(VPATH)}iseq.h +miniinit.$(OBJEXT): {$(VPATH)}jit_hook.rb +miniinit.$(OBJEXT): {$(VPATH)}jit_undef.rb miniinit.$(OBJEXT): {$(VPATH)}kernel.rb miniinit.$(OBJEXT): {$(VPATH)}marshal.rb miniinit.$(OBJEXT): {$(VPATH)}method.h @@ -9232,7 +9234,6 @@ miniinit.$(OBJEXT): {$(VPATH)}vm_core.h miniinit.$(OBJEXT): {$(VPATH)}vm_opts.h miniinit.$(OBJEXT): {$(VPATH)}warning.rb miniinit.$(OBJEXT): {$(VPATH)}yjit.rb -miniinit.$(OBJEXT): {$(VPATH)}yjit_hook.rb miniinit.$(OBJEXT): {$(VPATH)}zjit.rb namespace.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h namespace.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h @@ -18755,6 +18756,8 @@ vm.$(OBJEXT): {$(VPATH)}internal/variable.h vm.$(OBJEXT): {$(VPATH)}internal/warning_push.h vm.$(OBJEXT): {$(VPATH)}internal/xmalloc.h vm.$(OBJEXT): {$(VPATH)}iseq.h +vm.$(OBJEXT): {$(VPATH)}jit_hook.rbinc +vm.$(OBJEXT): {$(VPATH)}jit_undef.rbinc vm.$(OBJEXT): {$(VPATH)}method.h vm.$(OBJEXT): {$(VPATH)}missing.h vm.$(OBJEXT): {$(VPATH)}node.h @@ -18797,7 +18800,6 @@ vm.$(OBJEXT): {$(VPATH)}vm_opts.h vm.$(OBJEXT): {$(VPATH)}vm_sync.h vm.$(OBJEXT): {$(VPATH)}vmtc.inc vm.$(OBJEXT): {$(VPATH)}yjit.h -vm.$(OBJEXT): {$(VPATH)}yjit_hook.rbinc vm.$(OBJEXT): {$(VPATH)}zjit.h vm_backtrace.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h vm_backtrace.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h diff --git a/doc/string/encode.rdoc b/doc/string/encode.rdoc index 65872fdfd4658d..14b959ffffb276 100644 --- a/doc/string/encode.rdoc +++ b/doc/string/encode.rdoc @@ -1,4 +1,6 @@ -Returns a copy of +self+ transcoded as determined by +dst_encoding+. +Returns a copy of +self+ transcoded as determined by +dst_encoding+; +see {Encodings}[rdoc-ref:encodings.rdoc]. + By default, raises an exception if +self+ contains an invalid byte or a character not defined in +dst_encoding+; that behavior may be modified by encoding options; see below. @@ -45,3 +47,4 @@ given, conversion from an encoding +enc+ to the same encoding +enc+ no-op, i.e. the string is simply copied without any changes, and no exceptions are raised, even if there are invalid bytes. +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/end_with_p.rdoc b/doc/string/end_with_p.rdoc index f959cf7aaab371..fcd92421225ca8 100644 --- a/doc/string/end_with_p.rdoc +++ b/doc/string/end_with_p.rdoc @@ -1,11 +1,10 @@ -Returns whether +self+ ends with any of the given +strings+. +Returns whether +self+ ends with any of the given +strings+: -Returns +true+ if any given string matches the end, +false+ otherwise: + 'foo'.end_with?('oo') # => true + 'foo'.end_with?('bar', 'oo') # => true + 'foo'.end_with?('bar', 'baz') # => false + 'foo'.end_with?('') # => true + 'тест'.end_with?('т') # => true + 'こんにちは'.end_with?('は') # => true - 'hello'.end_with?('ello') #=> true - 'hello'.end_with?('heaven', 'ello') #=> true - 'hello'.end_with?('heaven', 'paradise') #=> false - 'тест'.end_with?('т') # => true - 'こんにちは'.end_with?('は') # => true - -Related: String#start_with?. +Related: see {Querying}[rdoc-ref:String@Querying]. diff --git a/doc/string/eql_p.rdoc b/doc/string/eql_p.rdoc new file mode 100644 index 00000000000000..85409c5ed687aa --- /dev/null +++ b/doc/string/eql_p.rdoc @@ -0,0 +1,18 @@ +Returns whether +self+ and +object+ have the same length and content: + + s = 'foo' + s.eql?('foo') # => true + s.eql?('food') # => false + s.eql?('FOO') # => false + +Returns +false+ if the two strings' encodings are not compatible: + + s0 = "äöü" # => "äöü" + s1 = s0.encode(Encoding::ISO_8859_1) # => "\xE4\xF6\xFC" + s0.encoding # => # + s1.encoding # => # + s0.eql?(s1) # => false + +See {Encodings}[rdoc-ref:encodings.rdoc]. + +Related: see {Querying}[rdoc-ref:String@Querying]. diff --git a/doc/string/force_encoding.rdoc b/doc/string/force_encoding.rdoc index fd9615caaa30f4..a509e67f80508e 100644 --- a/doc/string/force_encoding.rdoc +++ b/doc/string/force_encoding.rdoc @@ -1,5 +1,6 @@ -Changes the encoding of +self+ to +encoding+, +Changes the encoding of +self+ to the given +encoding+, which may be a string encoding name or an Encoding object; +does not change the underlying bytes; returns self: s = 'łał' @@ -7,14 +8,14 @@ returns self: s.encoding # => # s.force_encoding('ascii') # => "\xC5\x82a\xC5\x82" s.encoding # => # - -Does not change the underlying bytes: - + s.valid_encoding? # => true s.bytes # => [197, 130, 97, 197, 130] Makes the change even if the given +encoding+ is invalid for +self+ (as is the change above): - s.valid_encoding? # => false - s.force_encoding(Encoding::UTF_8) # => "łał" - s.valid_encoding? # => true + s.valid_encoding? # => false + +See {Encodings}[rdoc-ref:encodings.rdoc]. + +Related: see {Modifying}[rdoc-ref:String@Modifying]. diff --git a/doc/string/getbyte.rdoc b/doc/string/getbyte.rdoc new file mode 100644 index 00000000000000..ba1c06fd27cb67 --- /dev/null +++ b/doc/string/getbyte.rdoc @@ -0,0 +1,26 @@ +Returns the byte at zero-based +index+ as an integer: + + s = 'foo' + s.getbyte(0) # => 102 + s.getbyte(1) # => 111 + s.getbyte(2) # => 111 + +Counts backward from the end if +index+ is negative: + + s.getbyte(-3) # => 102 + +Returns +nil+ if +index+ is out of range: + + s.getbyte(3) # => nil + s.getbyte(-4) # => nil + +More examples: + + s = 'тест' + s.bytes # => [209, 130, 208, 181, 209, 129, 209, 130] + s.getbyte(2) # => 208 + s = 'こんにちは' + s.bytes # => [227, 129, 147, 227, 130, 147, 227, 129, 171, 227, 129, 161, 227, 129, 175] + s.getbyte(2) # => 147 + +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non--5CString]. diff --git a/ext/objspace/objspace_dump.c b/ext/objspace/objspace_dump.c index 80732d0282d384..f90ad89b5a6ed7 100644 --- a/ext/objspace/objspace_dump.c +++ b/ext/objspace/objspace_dump.c @@ -801,7 +801,7 @@ shape_id_i(shape_id_t shape_id, void *data) if (RSHAPE_TYPE(shape_id) != SHAPE_ROOT) { dump_append(dc, ", \"parent_id\":"); - dump_append_lu(dc, RSHAPE_PARENT(shape_id)); + dump_append_lu(dc, RSHAPE_PARENT_RAW_ID(shape_id)); } dump_append(dc, ", \"depth\":"); diff --git a/ext/openssl/ossl_pkcs7.c b/ext/openssl/ossl_pkcs7.c index 944cbb5e97f5d8..910ef9665c7919 100644 --- a/ext/openssl/ossl_pkcs7.c +++ b/ext/openssl/ossl_pkcs7.c @@ -143,11 +143,19 @@ ossl_PKCS7_SIGNER_INFO_dup(PKCS7_SIGNER_INFO *si) } static PKCS7_RECIP_INFO * -ossl_PKCS7_RECIP_INFO_dup(PKCS7_RECIP_INFO *si) +ossl_PKCS7_RECIP_INFO_dup(PKCS7_RECIP_INFO *ri) { - return ASN1_dup((i2d_of_void *)i2d_PKCS7_RECIP_INFO, - (d2i_of_void *)d2i_PKCS7_RECIP_INFO, - si); + PKCS7_RECIP_INFO *ri_new = ASN1_dup((i2d_of_void *)i2d_PKCS7_RECIP_INFO, + (d2i_of_void *)d2i_PKCS7_RECIP_INFO, + ri); + if (ri_new && ri->cert) { + if (!X509_up_ref(ri->cert)) { + PKCS7_RECIP_INFO_free(ri_new); + return NULL; + } + ri_new->cert = ri->cert; + } + return ri_new; } static VALUE @@ -510,6 +518,8 @@ ossl_pkcs7_get_detached(VALUE self) { PKCS7 *p7; GetPKCS7(self, p7); + if (!PKCS7_type_is_signed(p7)) + return Qfalse; return PKCS7_get_detached(p7) ? Qtrue : Qfalse; } @@ -770,7 +780,6 @@ ossl_pkcs7_verify(int argc, VALUE *argv, VALUE self) BIO *in, *out; PKCS7 *p7; VALUE data; - const char *msg; GetPKCS7(self, p7); rb_scan_args(argc, argv, "22", &certs, &store, &indata, &flags); @@ -794,14 +803,16 @@ ossl_pkcs7_verify(int argc, VALUE *argv, VALUE self) ok = PKCS7_verify(p7, x509s, x509st, in, out, flg); BIO_free(in); sk_X509_pop_free(x509s, X509_free); - if (ok < 0) ossl_raise(ePKCS7Error, "PKCS7_verify"); - msg = ERR_reason_error_string(ERR_peek_error()); - ossl_pkcs7_set_err_string(self, msg ? rb_str_new2(msg) : Qnil); - ossl_clear_error(); data = ossl_membio2str(out); ossl_pkcs7_set_data(self, data); - - return (ok == 1) ? Qtrue : Qfalse; + if (ok != 1) { + const char *msg = ERR_reason_error_string(ERR_peek_error()); + ossl_pkcs7_set_err_string(self, msg ? rb_str_new_cstr(msg) : Qnil); + ossl_clear_error(); + return Qfalse; + } + ossl_pkcs7_set_err_string(self, Qnil); + return Qtrue; } static VALUE @@ -837,30 +848,38 @@ ossl_pkcs7_add_data(VALUE self, VALUE data) PKCS7 *pkcs7; BIO *out, *in; char buf[4096]; - int len; + int len, ret; GetPKCS7(self, pkcs7); - if(PKCS7_type_is_signed(pkcs7)){ - if(!PKCS7_content_new(pkcs7, NID_pkcs7_data)) - ossl_raise(ePKCS7Error, NULL); + if (PKCS7_type_is_signed(pkcs7)) { + if (!PKCS7_content_new(pkcs7, NID_pkcs7_data)) + ossl_raise(ePKCS7Error, "PKCS7_content_new"); } in = ossl_obj2bio(&data); - if(!(out = PKCS7_dataInit(pkcs7, NULL))) goto err; - for(;;){ - if((len = BIO_read(in, buf, sizeof(buf))) <= 0) - break; - if(BIO_write(out, buf, len) != len) - goto err; + if (!(out = PKCS7_dataInit(pkcs7, NULL))) { + BIO_free(in); + ossl_raise(ePKCS7Error, "PKCS7_dataInit"); } - if(!PKCS7_dataFinal(pkcs7, out)) goto err; - ossl_pkcs7_set_data(self, Qnil); - - err: + for (;;) { + if ((len = BIO_read(in, buf, sizeof(buf))) <= 0) + break; + if (BIO_write(out, buf, len) != len) { + BIO_free_all(out); + BIO_free(in); + ossl_raise(ePKCS7Error, "BIO_write"); + } + } + if (BIO_flush(out) <= 0) { + BIO_free_all(out); + BIO_free(in); + ossl_raise(ePKCS7Error, "BIO_flush"); + } + ret = PKCS7_dataFinal(pkcs7, out); BIO_free_all(out); BIO_free(in); - if(ERR_peek_error()){ - ossl_raise(ePKCS7Error, NULL); - } + if (!ret) + ossl_raise(ePKCS7Error, "PKCS7_dataFinal"); + ossl_pkcs7_set_data(self, Qnil); return data; } diff --git a/ext/openssl/ossl_x509attr.c b/ext/openssl/ossl_x509attr.c index 3f6b89bdde5af3..d983af59686946 100644 --- a/ext/openssl/ossl_x509attr.c +++ b/ext/openssl/ossl_x509attr.c @@ -54,14 +54,9 @@ ossl_x509attr_new(X509_ATTRIBUTE *attr) VALUE obj; obj = NewX509Attr(cX509Attr); - if (!attr) { - new = X509_ATTRIBUTE_new(); - } else { - new = X509_ATTRIBUTE_dup(attr); - } - if (!new) { - ossl_raise(eX509AttrError, NULL); - } + new = X509_ATTRIBUTE_dup(attr); + if (!new) + ossl_raise(eX509AttrError, "X509_ATTRIBUTE_dup"); SetX509Attr(obj, new); return obj; diff --git a/ext/openssl/ossl_x509cert.c b/ext/openssl/ossl_x509cert.c index ecf42d7d43e6d2..30e3c617531bde 100644 --- a/ext/openssl/ossl_x509cert.c +++ b/ext/openssl/ossl_x509cert.c @@ -54,14 +54,9 @@ ossl_x509_new(X509 *x509) VALUE obj; obj = NewX509(cX509Cert); - if (!x509) { - new = X509_new(); - } else { - new = X509_dup(x509); - } - if (!new) { - ossl_raise(eX509CertError, NULL); - } + new = X509_dup(x509); + if (!new) + ossl_raise(eX509CertError, "X509_dup"); SetX509(obj, new); return obj; diff --git a/ext/openssl/ossl_x509crl.c b/ext/openssl/ossl_x509crl.c index 135dfe3d758d18..52174d1711487b 100644 --- a/ext/openssl/ossl_x509crl.c +++ b/ext/openssl/ossl_x509crl.c @@ -64,8 +64,9 @@ ossl_x509crl_new(X509_CRL *crl) VALUE obj; obj = NewX509CRL(cX509CRL); - tmp = crl ? X509_CRL_dup(crl) : X509_CRL_new(); - if(!tmp) ossl_raise(eX509CRLError, NULL); + tmp = X509_CRL_dup(crl); + if (!tmp) + ossl_raise(eX509CRLError, "X509_CRL_dup"); SetX509CRL(obj, tmp); return obj; diff --git a/ext/openssl/ossl_x509ext.c b/ext/openssl/ossl_x509ext.c index 9b0d9aa651977d..01aa3a8f51cd17 100644 --- a/ext/openssl/ossl_x509ext.c +++ b/ext/openssl/ossl_x509ext.c @@ -68,14 +68,9 @@ ossl_x509ext_new(X509_EXTENSION *ext) VALUE obj; obj = NewX509Ext(cX509Ext); - if (!ext) { - new = X509_EXTENSION_new(); - } else { - new = X509_EXTENSION_dup(ext); - } - if (!new) { - ossl_raise(eX509ExtError, NULL); - } + new = X509_EXTENSION_dup(ext); + if (!new) + ossl_raise(eX509ExtError, "X509_EXTENSION_dup"); SetX509Ext(obj, new); return obj; diff --git a/ext/openssl/ossl_x509name.c b/ext/openssl/ossl_x509name.c index b3791aefa38826..7d0fd35247f5cb 100644 --- a/ext/openssl/ossl_x509name.c +++ b/ext/openssl/ossl_x509name.c @@ -59,14 +59,9 @@ ossl_x509name_new(X509_NAME *name) VALUE obj; obj = NewX509Name(cX509Name); - if (!name) { - new = X509_NAME_new(); - } else { - new = X509_NAME_dup(name); - } - if (!new) { - ossl_raise(eX509NameError, NULL); - } + new = X509_NAME_dup(name); + if (!new) + ossl_raise(eX509NameError, "X509_NAME_dup"); SetX509Name(obj, new); return obj; diff --git a/ext/openssl/ossl_x509revoked.c b/ext/openssl/ossl_x509revoked.c index 1eff5dd3556d57..9496c4bf1b49fc 100644 --- a/ext/openssl/ossl_x509revoked.c +++ b/ext/openssl/ossl_x509revoked.c @@ -54,14 +54,9 @@ ossl_x509revoked_new(X509_REVOKED *rev) VALUE obj; obj = NewX509Rev(cX509Rev); - if (!rev) { - new = X509_REVOKED_new(); - } else { - new = X509_REVOKED_dup(rev); - } - if (!new) { - ossl_raise(eX509RevError, NULL); - } + new = X509_REVOKED_dup(rev); + if (!new) + ossl_raise(eX509RevError, "X509_REVOKED_dup"); SetX509Rev(obj, new); return obj; diff --git a/ext/openssl/ossl_x509store.c b/ext/openssl/ossl_x509store.c index 18acdc8ad0f3fb..8291578f274b6d 100644 --- a/ext/openssl/ossl_x509store.c +++ b/ext/openssl/ossl_x509store.c @@ -735,10 +735,14 @@ static VALUE ossl_x509stctx_get_curr_cert(VALUE self) { X509_STORE_CTX *ctx; + X509 *x509; GetX509StCtx(self, ctx); + x509 = X509_STORE_CTX_get_current_cert(ctx); + if (!x509) + return Qnil; - return ossl_x509_new(X509_STORE_CTX_get_current_cert(ctx)); + return ossl_x509_new(x509); } /* diff --git a/gc.c b/gc.c index cdc8891d7c6016..4af43edcc4cb4b 100644 --- a/gc.c +++ b/gc.c @@ -353,11 +353,6 @@ rb_gc_shutdown_call_finalizer_p(VALUE obj) return true; case T_SYMBOL: - if (RSYMBOL(obj)->fstr && - (BUILTIN_TYPE(RSYMBOL(obj)->fstr) == T_NONE || - BUILTIN_TYPE(RSYMBOL(obj)->fstr) == T_ZOMBIE)) { - RSYMBOL(obj)->fstr = 0; - } return true; case T_NONE: @@ -1213,7 +1208,7 @@ classext_free(rb_classext_t *ext, bool is_prime, VALUE namespace, void *arg) struct classext_foreach_args *args = (struct classext_foreach_args *)arg; rb_id_table_free(RCLASSEXT_M_TBL(ext)); - rb_cc_tbl_free(RCLASSEXT_CC_TBL(ext), args->klass); + if (!RCLASSEXT_SHARED_CONST_TBL(ext) && (tbl = RCLASSEXT_CONST_TBL(ext)) != NULL) { rb_free_const_table(tbl); } @@ -1243,7 +1238,6 @@ classext_iclass_free(rb_classext_t *ext, bool is_prime, VALUE namespace, void *a if (RCLASSEXT_CALLABLE_M_TBL(ext) != NULL) { rb_id_table_free(RCLASSEXT_CALLABLE_M_TBL(ext)); } - rb_cc_tbl_free(RCLASSEXT_CC_TBL(ext), args->klass); rb_class_classext_free_subclasses(ext, args->klass); @@ -1748,7 +1742,7 @@ rb_objspace_free_objects(void *objspace) int rb_objspace_garbage_object_p(VALUE obj) { - return rb_gc_impl_garbage_object_p(rb_gc_get_objspace(), obj); + return !SPECIAL_CONST_P(obj) && rb_gc_impl_garbage_object_p(rb_gc_get_objspace(), obj); } bool @@ -1903,8 +1897,6 @@ object_id0(VALUE obj) return object_id_get(obj, shape_id); } - // rb_shape_object_id_shape may lock if the current shape has - // multiple children. shape_id_t object_id_shape_id = rb_shape_transition_object_id(obj); id = generate_next_object_id(); @@ -2267,24 +2259,6 @@ rb_gc_after_updating_jit_code(void) #endif } -static enum rb_id_table_iterator_result -cc_table_memsize_i(VALUE ccs_ptr, void *data_ptr) -{ - size_t *total_size = data_ptr; - struct rb_class_cc_entries *ccs = (struct rb_class_cc_entries *)ccs_ptr; - *total_size += sizeof(*ccs); - *total_size += sizeof(ccs->entries[0]) * ccs->capa; - return ID_TABLE_CONTINUE; -} - -static size_t -cc_table_memsize(struct rb_id_table *cc_table) -{ - size_t total = rb_id_table_memsize(cc_table); - rb_id_table_foreach_values(cc_table, cc_table_memsize_i, &total); - return total; -} - static void classext_memsize(rb_classext_t *ext, bool prime, VALUE namespace, void *arg) { @@ -2300,9 +2274,6 @@ classext_memsize(rb_classext_t *ext, bool prime, VALUE namespace, void *arg) if (RCLASSEXT_CONST_TBL(ext)) { s += rb_id_table_memsize(RCLASSEXT_CONST_TBL(ext)); } - if (RCLASSEXT_CC_TBL(ext)) { - s += cc_table_memsize(RCLASSEXT_CC_TBL(ext)); - } if (RCLASSEXT_SUPERCLASSES_WITH_SELF(ext)) { s += (RCLASSEXT_SUPERCLASS_DEPTH(ext) + 1) * sizeof(VALUE); } @@ -2353,9 +2324,6 @@ rb_obj_memsize_of(VALUE obj) size += rb_id_table_memsize(RCLASS_M_TBL(obj)); } } - if (RCLASS_WRITABLE_CC_TBL(obj)) { - size += cc_table_memsize(RCLASS_WRITABLE_CC_TBL(obj)); - } break; case T_STRING: size += rb_str_memsize(obj); @@ -2840,47 +2808,6 @@ mark_const_tbl(rb_objspace_t *objspace, struct rb_id_table *tbl) rb_id_table_foreach_values(tbl, mark_const_entry_i, objspace); } -struct mark_cc_entry_args { - rb_objspace_t *objspace; - VALUE klass; -}; - -static enum rb_id_table_iterator_result -mark_cc_entry_i(VALUE ccs_ptr, void *data) -{ - struct rb_class_cc_entries *ccs = (struct rb_class_cc_entries *)ccs_ptr; - - VM_ASSERT(vm_ccs_p(ccs)); - - if (METHOD_ENTRY_INVALIDATED(ccs->cme)) { - rb_vm_ccs_free(ccs); - return ID_TABLE_DELETE; - } - else { - gc_mark_internal((VALUE)ccs->cme); - - for (int i=0; ilen; i++) { - VM_ASSERT(((struct mark_cc_entry_args *)data)->klass == ccs->entries[i].cc->klass); - VM_ASSERT(vm_cc_check_cme(ccs->entries[i].cc, ccs->cme)); - - gc_mark_internal((VALUE)ccs->entries[i].cc); - } - return ID_TABLE_CONTINUE; - } -} - -static void -mark_cc_tbl(rb_objspace_t *objspace, struct rb_id_table *tbl, VALUE klass) -{ - struct mark_cc_entry_args args; - - if (!tbl) return; - - args.objspace = objspace; - args.klass = klass; - rb_id_table_foreach_values(tbl, mark_cc_entry_i, (void *)&args); -} - static enum rb_id_table_iterator_result mark_cvc_tbl_i(VALUE cvc_entry, void *objspace) { @@ -3118,7 +3045,6 @@ gc_mark_classext_module(rb_classext_t *ext, bool prime, VALUE namespace, void *a { struct gc_mark_classext_foreach_arg *foreach_arg = (struct gc_mark_classext_foreach_arg *)arg; rb_objspace_t *objspace = foreach_arg->objspace; - VALUE obj = foreach_arg->obj; if (RCLASSEXT_SUPER(ext)) { gc_mark_internal(RCLASSEXT_SUPER(ext)); @@ -3129,7 +3055,7 @@ gc_mark_classext_module(rb_classext_t *ext, bool prime, VALUE namespace, void *a mark_const_tbl(objspace, RCLASSEXT_CONST_TBL(ext)); } mark_m_tbl(objspace, RCLASSEXT_CALLABLE_M_TBL(ext)); - mark_cc_tbl(objspace, RCLASSEXT_CC_TBL(ext), obj); + gc_mark_internal(RCLASSEXT_CC_TBL(ext)); mark_cvc_tbl(objspace, RCLASSEXT_CVC_TBL(ext)); gc_mark_internal(RCLASSEXT_CLASSPATH(ext)); } @@ -3139,7 +3065,6 @@ gc_mark_classext_iclass(rb_classext_t *ext, bool prime, VALUE namespace, void *a { struct gc_mark_classext_foreach_arg *foreach_arg = (struct gc_mark_classext_foreach_arg *)arg; rb_objspace_t *objspace = foreach_arg->objspace; - VALUE iclass = foreach_arg->obj; if (RCLASSEXT_SUPER(ext)) { gc_mark_internal(RCLASSEXT_SUPER(ext)); @@ -3151,7 +3076,7 @@ gc_mark_classext_iclass(rb_classext_t *ext, bool prime, VALUE namespace, void *a gc_mark_internal(RCLASSEXT_INCLUDER(ext)); } mark_m_tbl(objspace, RCLASSEXT_CALLABLE_M_TBL(ext)); - mark_cc_tbl(objspace, RCLASSEXT_CC_TBL(ext), iclass); + gc_mark_internal(RCLASSEXT_CC_TBL(ext)); } #define TYPED_DATA_REFS_OFFSET_LIST(d) (size_t *)(uintptr_t)RTYPEDDATA_TYPE(d)->function.dmark @@ -3715,33 +3640,6 @@ update_m_tbl(void *objspace, struct rb_id_table *tbl) } } -static enum rb_id_table_iterator_result -update_cc_tbl_i(VALUE ccs_ptr, void *objspace) -{ - struct rb_class_cc_entries *ccs = (struct rb_class_cc_entries *)ccs_ptr; - VM_ASSERT(vm_ccs_p(ccs)); - - if (rb_gc_impl_object_moved_p(objspace, (VALUE)ccs->cme)) { - ccs->cme = (const rb_callable_method_entry_t *)gc_location_internal(objspace, (VALUE)ccs->cme); - } - - for (int i=0; ilen; i++) { - if (rb_gc_impl_object_moved_p(objspace, (VALUE)ccs->entries[i].cc)) { - ccs->entries[i].cc = (struct rb_callcache *)gc_location_internal(objspace, (VALUE)ccs->entries[i].cc); - } - } - - // do not replace - return ID_TABLE_CONTINUE; -} - -static void -update_cc_tbl(void *objspace, struct rb_id_table *tbl) -{ - if (!tbl) return; - rb_id_table_foreach_values(tbl, update_cc_tbl_i, objspace); -} - static enum rb_id_table_iterator_result update_cvc_tbl_i(VALUE cvc_entry, void *objspace) { @@ -3840,7 +3738,7 @@ update_classext(rb_classext_t *ext, bool is_prime, VALUE namespace, void *arg) if (!RCLASSEXT_SHARED_CONST_TBL(ext)) { update_const_tbl(objspace, RCLASSEXT_CONST_TBL(ext)); } - update_cc_tbl(objspace, RCLASSEXT_CC_TBL(ext)); + UPDATE_IF_MOVED(objspace, RCLASSEXT_CC_TBL(ext)); update_cvc_tbl(objspace, RCLASSEXT_CVC_TBL(ext)); update_superclasses(objspace, ext); update_subclasses(objspace, ext); @@ -3859,7 +3757,7 @@ update_iclass_classext(rb_classext_t *ext, bool is_prime, VALUE namespace, void } update_m_tbl(objspace, RCLASSEXT_M_TBL(ext)); update_m_tbl(objspace, RCLASSEXT_CALLABLE_M_TBL(ext)); - update_cc_tbl(objspace, RCLASSEXT_CC_TBL(ext)); + UPDATE_IF_MOVED(objspace, RCLASSEXT_CC_TBL(ext)); update_subclasses(objspace, ext); update_classext_values(objspace, ext, true); @@ -4929,11 +4827,11 @@ rb_raw_obj_info_buitin_type(char *const buff, const size_t buff_size, const VALU case imemo_callcache: { const struct rb_callcache *cc = (const struct rb_callcache *)obj; - VALUE class_path = cc->klass ? rb_class_path_cached(cc->klass) : Qnil; + VALUE class_path = vm_cc_valid(cc) ? rb_class_path_cached(cc->klass) : Qnil; const rb_callable_method_entry_t *cme = vm_cc_cme(cc); APPEND_F("(klass:%s cme:%s%s (%p) call:%p", - NIL_P(class_path) ? (cc->klass ? "??" : "") : RSTRING_PTR(class_path), + NIL_P(class_path) ? (vm_cc_valid(cc) ? "??" : "") : RSTRING_PTR(class_path), cme ? rb_id2name(cme->called_id) : "", cme ? (METHOD_ENTRY_INVALIDATED(cme) ? " [inv]" : "") : "", (void *)cme, diff --git a/id_table.c b/id_table.c index c831524ff87996..b70587319182ce 100644 --- a/id_table.c +++ b/id_table.c @@ -47,7 +47,7 @@ struct rb_id_table { #if SIZEOF_VALUE == 8 #define ITEM_GET_KEY(tbl, i) ((tbl)->items[i].key) -#define ITEM_KEY_ISSET(tbl, i) ((tbl)->items[i].key) +#define ITEM_KEY_ISSET(tbl, i) ((tbl)->items && (tbl)->items[i].key) #define ITEM_COLLIDED(tbl, i) ((tbl)->items[i].collision) #define ITEM_SET_COLLIDED(tbl, i) ((tbl)->items[i].collision = 1) static inline void @@ -298,6 +298,10 @@ rb_id_table_foreach_values(struct rb_id_table *tbl, rb_id_table_foreach_values_f { int i, capa = tbl->capa; + if (!tbl->items) { + return; + } + for (i=0; iitems[i].val, data); @@ -345,7 +349,7 @@ managed_id_table_memsize(const void *data) return rb_id_table_memsize(tbl) - sizeof(struct rb_id_table); } -static const rb_data_type_t managed_id_table_type = { +const rb_data_type_t rb_managed_id_table_type = { .wrap_struct_name = "VM/managed_id_table", .function = { .dmark = NULL, // Nothing to mark @@ -359,20 +363,26 @@ static inline struct rb_id_table * managed_id_table_ptr(VALUE obj) { RUBY_ASSERT(RB_TYPE_P(obj, T_DATA)); - RUBY_ASSERT(rb_typeddata_inherited_p(RTYPEDDATA_TYPE(obj), &managed_id_table_type)); + RUBY_ASSERT(rb_typeddata_inherited_p(RTYPEDDATA_TYPE(obj), &rb_managed_id_table_type)); return RTYPEDDATA_GET_DATA(obj); } VALUE -rb_managed_id_table_new(size_t capa) +rb_managed_id_table_create(const rb_data_type_t *type, size_t capa) { struct rb_id_table *tbl; - VALUE obj = TypedData_Make_Struct(0, struct rb_id_table, &managed_id_table_type, tbl); + VALUE obj = TypedData_Make_Struct(0, struct rb_id_table, type, tbl); rb_id_table_init(tbl, capa); return obj; } +VALUE +rb_managed_id_table_new(size_t capa) +{ + return rb_managed_id_table_create(&rb_managed_id_table_type, capa); +} + static enum rb_id_table_iterator_result managed_id_table_dup_i(ID id, VALUE val, void *data) { @@ -385,7 +395,7 @@ VALUE rb_managed_id_table_dup(VALUE old_table) { struct rb_id_table *new_tbl; - VALUE obj = TypedData_Make_Struct(0, struct rb_id_table, &managed_id_table_type, new_tbl); + VALUE obj = TypedData_Make_Struct(0, struct rb_id_table, RTYPEDDATA_TYPE(old_table), new_tbl); struct rb_id_table *old_tbl = managed_id_table_ptr(old_table); rb_id_table_init(new_tbl, old_tbl->num + 1); rb_id_table_foreach(old_tbl, managed_id_table_dup_i, new_tbl); @@ -415,3 +425,15 @@ rb_managed_id_table_foreach(VALUE table, rb_id_table_foreach_func_t *func, void { rb_id_table_foreach(managed_id_table_ptr(table), func, data); } + +void +rb_managed_id_table_foreach_values(VALUE table, rb_id_table_foreach_values_func_t *func, void *data) +{ + rb_id_table_foreach_values(managed_id_table_ptr(table), func, data); +} + +int +rb_managed_id_table_delete(VALUE table, ID id) +{ + return rb_id_table_delete(managed_id_table_ptr(table), id); +} diff --git a/id_table.h b/id_table.h index 3e8d82e64af98a..0c8cd343ee2587 100644 --- a/id_table.h +++ b/id_table.h @@ -35,12 +35,17 @@ void rb_id_table_foreach(struct rb_id_table *tbl, rb_id_table_foreach_func_t *fu void rb_id_table_foreach_values(struct rb_id_table *tbl, rb_id_table_foreach_values_func_t *func, void *data); void rb_id_table_foreach_values_with_replace(struct rb_id_table *tbl, rb_id_table_foreach_values_func_t *func, rb_id_table_update_value_callback_func_t *replace, void *data); +VALUE rb_managed_id_table_create(const rb_data_type_t *type, size_t capa); VALUE rb_managed_id_table_new(size_t capa); VALUE rb_managed_id_table_dup(VALUE table); int rb_managed_id_table_insert(VALUE table, ID id, VALUE val); int rb_managed_id_table_lookup(VALUE table, ID id, VALUE *valp); size_t rb_managed_id_table_size(VALUE table); void rb_managed_id_table_foreach(VALUE table, rb_id_table_foreach_func_t *func, void *data); +void rb_managed_id_table_foreach_values(VALUE table, rb_id_table_foreach_values_func_t *func, void *data); +int rb_managed_id_table_delete(VALUE table, ID id); + +extern const rb_data_type_t rb_managed_id_table_type; RUBY_SYMBOL_EXPORT_BEGIN size_t rb_id_table_size(const struct rb_id_table *tbl); diff --git a/imemo.c b/imemo.c index 7153689030a7b6..7298d78d65cdb7 100644 --- a/imemo.c +++ b/imemo.c @@ -273,7 +273,7 @@ rb_imemo_memsize(VALUE obj) static bool moved_or_living_object_strictly_p(VALUE obj) { - return obj && (!rb_objspace_garbage_object_p(obj) || BUILTIN_TYPE(obj) == T_MOVED); + return !SPECIAL_CONST_P(obj) && (!rb_objspace_garbage_object_p(obj) || BUILTIN_TYPE(obj) == T_MOVED); } static void @@ -353,25 +353,19 @@ rb_imemo_mark_and_move(VALUE obj, bool reference_updating) */ struct rb_callcache *cc = (struct rb_callcache *)obj; if (reference_updating) { - if (!cc->klass) { - // already invalidated + if (moved_or_living_object_strictly_p((VALUE)cc->cme_)) { + *((VALUE *)&cc->klass) = rb_gc_location(cc->klass); + *((struct rb_callable_method_entry_struct **)&cc->cme_) = + (struct rb_callable_method_entry_struct *)rb_gc_location((VALUE)cc->cme_); } - else { - if (moved_or_living_object_strictly_p(cc->klass) && - moved_or_living_object_strictly_p((VALUE)cc->cme_)) { - *((VALUE *)&cc->klass) = rb_gc_location(cc->klass); - *((struct rb_callable_method_entry_struct **)&cc->cme_) = - (struct rb_callable_method_entry_struct *)rb_gc_location((VALUE)cc->cme_); - } - else { - vm_cc_invalidate(cc); - } + else if (vm_cc_valid(cc)) { + vm_cc_invalidate(cc); } } else { - if (cc->klass && (vm_cc_super_p(cc) || vm_cc_refinement_p(cc))) { + rb_gc_mark_weak((VALUE *)&cc->klass); + if ((vm_cc_super_p(cc) || vm_cc_refinement_p(cc))) { rb_gc_mark_movable((VALUE)cc->cme_); - rb_gc_mark_movable((VALUE)cc->klass); } } @@ -521,61 +515,6 @@ rb_free_const_table(struct rb_id_table *tbl) rb_id_table_free(tbl); } -// alive: if false, target pointers can be freed already. -static void -vm_ccs_free(struct rb_class_cc_entries *ccs, int alive, VALUE klass) -{ - if (ccs->entries) { - for (int i=0; ilen; i++) { - const struct rb_callcache *cc = ccs->entries[i].cc; - if (!alive) { - // ccs can be free'ed. - if (rb_gc_pointer_to_heap_p((VALUE)cc) && - !rb_objspace_garbage_object_p((VALUE)cc) && - IMEMO_TYPE_P(cc, imemo_callcache) && - cc->klass == klass) { - // OK. maybe target cc. - } - else { - continue; - } - } - - VM_ASSERT(!vm_cc_super_p(cc) && !vm_cc_refinement_p(cc)); - vm_cc_invalidate(cc); - } - ruby_xfree(ccs->entries); - } - ruby_xfree(ccs); -} - -void -rb_vm_ccs_free(struct rb_class_cc_entries *ccs) -{ - RB_DEBUG_COUNTER_INC(ccs_free); - vm_ccs_free(ccs, true, Qundef); -} - -static enum rb_id_table_iterator_result -cc_tbl_free_i(VALUE ccs_ptr, void *data) -{ - struct rb_class_cc_entries *ccs = (struct rb_class_cc_entries *)ccs_ptr; - VALUE klass = (VALUE)data; - VM_ASSERT(vm_ccs_p(ccs)); - - vm_ccs_free(ccs, false, klass); - - return ID_TABLE_CONTINUE; -} - -void -rb_cc_tbl_free(struct rb_id_table *cc_tbl, VALUE klass) -{ - if (!cc_tbl) return; - rb_id_table_foreach_values(cc_tbl, cc_tbl_free_i, (void *)klass); - rb_id_table_free(cc_tbl); -} - static inline void imemo_fields_free(struct rb_fields *fields) { diff --git a/inits.c b/inits.c index b4e58ea25a1cec..e0dab9e890fbd8 100644 --- a/inits.c +++ b/inits.c @@ -88,8 +88,10 @@ void rb_call_builtin_inits(void) { #define BUILTIN(n) CALL(builtin_##n) - BUILTIN(kernel); + BUILTIN(jit_hook); BUILTIN(yjit); + BUILTIN(zjit); + BUILTIN(kernel); BUILTIN(gc); BUILTIN(ractor); BUILTIN(numeric); @@ -107,8 +109,7 @@ rb_call_builtin_inits(void) BUILTIN(thread_sync); BUILTIN(nilclass); BUILTIN(marshal); - BUILTIN(zjit); - BUILTIN(yjit_hook); + BUILTIN(jit_undef); Init_builtin_prelude(); } #undef CALL diff --git a/internal/class.h b/internal/class.h index 520994170faa59..328d650e8bb31a 100644 --- a/internal/class.h +++ b/internal/class.h @@ -83,7 +83,7 @@ struct rb_classext_struct { struct rb_id_table *m_tbl; struct rb_id_table *const_tbl; struct rb_id_table *callable_m_tbl; - struct rb_id_table *cc_tbl; /* ID -> [[ci1, cc1], [ci2, cc2] ...] */ + VALUE cc_tbl; /* { ID => { cme, [cc1, cc2, ...] }, ... } */ struct rb_id_table *cvc_tbl; VALUE *superclasses; /** @@ -262,7 +262,7 @@ static inline void RCLASS_WRITE_SUPER(VALUE klass, VALUE super); static inline void RCLASS_SET_CONST_TBL(VALUE klass, struct rb_id_table *table, bool shared); static inline void RCLASS_WRITE_CONST_TBL(VALUE klass, struct rb_id_table *table, bool shared); static inline void RCLASS_WRITE_CALLABLE_M_TBL(VALUE klass, struct rb_id_table *table); -static inline void RCLASS_WRITE_CC_TBL(VALUE klass, struct rb_id_table *table); +static inline void RCLASS_WRITE_CC_TBL(VALUE klass, VALUE table); static inline void RCLASS_SET_CVC_TBL(VALUE klass, struct rb_id_table *table); static inline void RCLASS_WRITE_CVC_TBL(VALUE klass, struct rb_id_table *table); @@ -628,9 +628,9 @@ RCLASS_WRITE_CALLABLE_M_TBL(VALUE klass, struct rb_id_table *table) } static inline void -RCLASS_WRITE_CC_TBL(VALUE klass, struct rb_id_table *table) +RCLASS_WRITE_CC_TBL(VALUE klass, VALUE table) { - RCLASSEXT_CC_TBL(RCLASS_EXT_WRITABLE(klass)) = table; + RB_OBJ_WRITE(klass, &RCLASSEXT_CC_TBL(RCLASS_EXT_WRITABLE(klass)), table); } static inline void diff --git a/internal/cmdlineopt.h b/internal/cmdlineopt.h index 667fd6df2e976f..aed209e2a21f19 100644 --- a/internal/cmdlineopt.h +++ b/internal/cmdlineopt.h @@ -23,9 +23,6 @@ typedef struct ruby_cmdline_options { ruby_features_t warn; unsigned int dump; long backtrace_length_limit; -#if USE_ZJIT - void *zjit; -#endif const char *crash_report; @@ -42,6 +39,9 @@ typedef struct ruby_cmdline_options { #if USE_YJIT unsigned int yjit: 1; #endif +#if USE_ZJIT + unsigned int zjit: 1; +#endif } ruby_cmdline_options_t; struct ruby_opt_message { diff --git a/internal/imemo.h b/internal/imemo.h index 0ad00fe6b79d99..5dd2d04fa4d0ab 100644 --- a/internal/imemo.h +++ b/internal/imemo.h @@ -148,7 +148,6 @@ static inline void MEMO_V2_SET(struct MEMO *m, VALUE v); size_t rb_imemo_memsize(VALUE obj); void rb_imemo_mark_and_move(VALUE obj, bool reference_updating); -void rb_cc_tbl_free(struct rb_id_table *cc_tbl, VALUE klass); void rb_imemo_free(VALUE obj); RUBY_SYMBOL_EXPORT_BEGIN diff --git a/io.c b/io.c index 9dcfff76a33522..4ee45c13442f7c 100644 --- a/io.c +++ b/io.c @@ -9999,14 +9999,14 @@ io_wait(int argc, VALUE *argv, VALUE io) } static void -argf_mark(void *ptr) +argf_mark_and_move(void *ptr) { struct argf *p = ptr; - rb_gc_mark(p->filename); - rb_gc_mark(p->current_file); - rb_gc_mark(p->argv); - rb_gc_mark(p->inplace); - rb_gc_mark(p->encs.ecopts); + rb_gc_mark_and_move(&p->filename); + rb_gc_mark_and_move(&p->current_file); + rb_gc_mark_and_move(&p->argv); + rb_gc_mark_and_move(&p->inplace); + rb_gc_mark_and_move(&p->encs.ecopts); } static size_t @@ -10017,20 +10017,9 @@ argf_memsize(const void *ptr) return size; } -static void -argf_compact(void *ptr) -{ - struct argf *p = ptr; - p->filename = rb_gc_location(p->filename); - p->current_file = rb_gc_location(p->current_file); - p->argv = rb_gc_location(p->argv); - p->inplace = rb_gc_location(p->inplace); - p->encs.ecopts = rb_gc_location(p->encs.ecopts); -} - static const rb_data_type_t argf_type = { "ARGF", - {argf_mark, RUBY_TYPED_DEFAULT_FREE, argf_memsize, argf_compact}, + {argf_mark_and_move, RUBY_TYPED_DEFAULT_FREE, argf_memsize, argf_mark_and_move}, 0, 0, RUBY_TYPED_FREE_IMMEDIATELY }; diff --git a/iseq.c b/iseq.c index c0523f61d70de7..4334bdd7953970 100644 --- a/iseq.c +++ b/iseq.c @@ -325,15 +325,13 @@ cc_is_active(const struct rb_callcache *cc, bool reference_updating) cc = (const struct rb_callcache *)rb_gc_location((VALUE)cc); } - if (vm_cc_markable(cc)) { - if (cc->klass) { // cc is not invalidated - const struct rb_callable_method_entry_struct *cme = vm_cc_cme(cc); - if (reference_updating) { - cme = (const struct rb_callable_method_entry_struct *)rb_gc_location((VALUE)cme); - } - if (!METHOD_ENTRY_INVALIDATED(cme)) { - return true; - } + if (vm_cc_markable(cc) && vm_cc_valid(cc)) { + const struct rb_callable_method_entry_struct *cme = vm_cc_cme(cc); + if (reference_updating) { + cme = (const struct rb_callable_method_entry_struct *)rb_gc_location((VALUE)cme); + } + if (!METHOD_ENTRY_INVALIDATED(cme)) { + return true; } } } diff --git a/jit.c b/jit.c index 74a042d45d5b39..e68758368a45fb 100644 --- a/jit.c +++ b/jit.c @@ -442,3 +442,15 @@ rb_yarv_ary_entry_internal(VALUE ary, long offset) { return rb_ary_entry_internal(ary, offset); } + +void +rb_set_cfp_pc(struct rb_control_frame_struct *cfp, const VALUE *pc) +{ + cfp->pc = pc; +} + +void +rb_set_cfp_sp(struct rb_control_frame_struct *cfp, VALUE *sp) +{ + cfp->sp = sp; +} diff --git a/jit_hook.rb b/jit_hook.rb new file mode 100644 index 00000000000000..487361c049ed32 --- /dev/null +++ b/jit_hook.rb @@ -0,0 +1,13 @@ +class Module + # Internal helper for built-in initializations to define methods only when JIT is enabled. + # This method is removed in jit_undef.rb. + private def with_jit(&block) # :nodoc: + # ZJIT currently doesn't compile Array#each properly, so it's disabled for now. + if defined?(RubyVM::ZJIT) && Primitive.rb_zjit_option_enabled_p && false # TODO: remove `&& false` (Shopify/ruby#667) + # We don't support lazily enabling ZJIT yet, so we can call the block right away. + block.call + elsif defined?(RubyVM::YJIT) + RubyVM::YJIT.send(:add_jit_hook, block) + end + end +end diff --git a/jit_undef.rb b/jit_undef.rb new file mode 100644 index 00000000000000..0e855fe7a23077 --- /dev/null +++ b/jit_undef.rb @@ -0,0 +1,4 @@ +# Remove the helper defined in jit_hook.rb +class Module + undef :with_jit +end diff --git a/kernel.rb b/kernel.rb index 554de49977c5e3..888ef0c531d7e6 100644 --- a/kernel.rb +++ b/kernel.rb @@ -291,13 +291,3 @@ def Integer(arg, base = 0, exception: true) end end end - -class Module - # Internal helper for built-in initializations to define methods only when YJIT is enabled. - # This method is removed in yjit_hook.rb. - private def with_yjit(&block) # :nodoc: - if defined?(RubyVM::YJIT) - RubyVM::YJIT.send(:add_yjit_hook, block) - end - end -end diff --git a/lib/mkmf.rb b/lib/mkmf.rb index 7ac4f916b8ae24..d0974b05436928 100644 --- a/lib/mkmf.rb +++ b/lib/mkmf.rb @@ -860,7 +860,7 @@ def try_func(func, libs, headers = nil, opt = "", &b) v } unless strvars.empty? - prepare << "char " << strvars.map {|v| "#{v}[1024]"}.join(", ") << "; " + prepare << "char " << strvars.map {|v| %[#{v}[1024] = ""]}.join(", ") << "; " end when nil call = "" diff --git a/lib/prism/translation/ruby_parser.rb b/lib/prism/translation/ruby_parser.rb index 3808cd3130906e..ac538a2e97ae43 100644 --- a/lib/prism/translation/ruby_parser.rb +++ b/lib/prism/translation/ruby_parser.rb @@ -1151,8 +1151,8 @@ def visit_keyword_rest_parameter_node(node) def visit_lambda_node(node) parameters = case node.parameters - when nil, NumberedParametersNode - s(node, :args) + when nil, ItParametersNode, NumberedParametersNode + 0 else visit(node.parameters) end diff --git a/method.h b/method.h index 6abf2495b0df38..8328b86ee96102 100644 --- a/method.h +++ b/method.h @@ -259,6 +259,6 @@ void rb_vm_delete_cc_refinement(const struct rb_callcache *cc); void rb_clear_method_cache(VALUE klass_or_module, ID mid); void rb_clear_all_refinement_method_cache(void); -void rb_invalidate_method_caches(struct rb_id_table *cm_tbl, struct rb_id_table *cc_tbl); +void rb_invalidate_method_caches(struct rb_id_table *cm_tbl, VALUE cc_tbl); #endif /* RUBY_METHOD_H */ diff --git a/numeric.rb b/numeric.rb index 27e9951fd3bd8d..552a3dd687aedc 100644 --- a/numeric.rb +++ b/numeric.rb @@ -322,7 +322,7 @@ def denominator 1 end - with_yjit do + with_jit do if Primitive.rb_builtin_basic_definition_p(:downto) undef :downto diff --git a/object.c b/object.c index 4d54b7374009bd..28e61254215b0d 100644 --- a/object.c +++ b/object.c @@ -337,12 +337,10 @@ rb_obj_copy_ivar(VALUE dest, VALUE obj) return; } - shape_id_t dest_shape_id = src_shape_id; shape_id_t initial_shape_id = RBASIC_SHAPE_ID(dest); - RUBY_ASSERT(RSHAPE_TYPE_P(initial_shape_id, SHAPE_ROOT)); - dest_shape_id = rb_shape_rebuild(initial_shape_id, src_shape_id); + shape_id_t dest_shape_id = rb_shape_rebuild(initial_shape_id, src_shape_id); if (UNLIKELY(rb_shape_too_complex_p(dest_shape_id))) { st_table *table = rb_st_init_numtable_with_size(src_num_ivs); rb_obj_copy_ivs_to_hash_table(obj, table); diff --git a/ractor.c b/ractor.c index 12ffced0a3546f..721234a98bde2f 100644 --- a/ractor.c +++ b/ractor.c @@ -2267,77 +2267,84 @@ struct cross_ractor_require { bool silent; }; -static void -cross_ractor_require_mark(void *ptr) -{ - struct cross_ractor_require *crr = (struct cross_ractor_require *)ptr; - rb_gc_mark(crr->port); - rb_gc_mark(crr->result); - rb_gc_mark(crr->exception); - rb_gc_mark(crr->feature); - rb_gc_mark(crr->module); -} +RUBY_REFERENCES(cross_ractor_require_refs) = { + RUBY_REF_EDGE(struct cross_ractor_require, port), + RUBY_REF_EDGE(struct cross_ractor_require, result), + RUBY_REF_EDGE(struct cross_ractor_require, exception), + RUBY_REF_EDGE(struct cross_ractor_require, feature), + RUBY_REF_EDGE(struct cross_ractor_require, module), + RUBY_REF_END +}; static const rb_data_type_t cross_ractor_require_data_type = { "ractor/cross_ractor_require", { - cross_ractor_require_mark, + RUBY_REFS_LIST_PTR(cross_ractor_require_refs), RUBY_DEFAULT_FREE, NULL, // memsize NULL, // compact }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_DECL_MARKING }; static VALUE -require_body(VALUE data) +require_body(VALUE crr_obj) { - struct cross_ractor_require *crr = (struct cross_ractor_require *)data; + struct cross_ractor_require *crr; + TypedData_Get_Struct(crr_obj, struct cross_ractor_require, &cross_ractor_require_data_type, crr); ID require; CONST_ID(require, "require"); if (crr->silent) { int rb_require_internal_silent(VALUE fname); - crr->result = INT2NUM(rb_require_internal_silent(crr->feature)); + + RB_OBJ_WRITE(crr_obj, &crr->result, INT2NUM(rb_require_internal_silent(crr->feature))); } else { - crr->result = rb_funcallv(Qnil, require, 1, &crr->feature); + RB_OBJ_WRITE(crr_obj, &crr->result, rb_funcallv(Qnil, require, 1, &crr->feature)); } return Qnil; } static VALUE -require_rescue(VALUE data, VALUE errinfo) +require_rescue(VALUE crr_obj, VALUE errinfo) { - struct cross_ractor_require *crr = (struct cross_ractor_require *)data; - crr->exception = errinfo; + struct cross_ractor_require *crr; + TypedData_Get_Struct(crr_obj, struct cross_ractor_require, &cross_ractor_require_data_type, crr); + + RB_OBJ_WRITE(crr_obj, &crr->exception, errinfo); + return Qundef; } static VALUE -require_result_copy_body(VALUE data) +require_result_copy_body(VALUE crr_obj) { - struct cross_ractor_require *crr = (struct cross_ractor_require *)data; + struct cross_ractor_require *crr; + TypedData_Get_Struct(crr_obj, struct cross_ractor_require, &cross_ractor_require_data_type, crr); if (crr->exception != Qundef) { VM_ASSERT(crr->result == Qundef); - crr->exception = ractor_copy(crr->exception); + RB_OBJ_WRITE(crr_obj, &crr->exception, ractor_copy(crr->exception)); } else{ VM_ASSERT(crr->result != Qundef); - crr->result = ractor_copy(crr->result); + RB_OBJ_WRITE(crr_obj, &crr->result, ractor_copy(crr->result)); } return Qnil; } static VALUE -require_result_copy_resuce(VALUE data, VALUE errinfo) +require_result_copy_resuce(VALUE crr_obj, VALUE errinfo) { - struct cross_ractor_require *crr = (struct cross_ractor_require *)data; - crr->exception = errinfo; // ractor_move(crr->exception); + struct cross_ractor_require *crr; + TypedData_Get_Struct(crr_obj, struct cross_ractor_require, &cross_ractor_require_data_type, crr); + + RB_OBJ_WRITE(crr_obj, &crr->exception, errinfo); + return Qnil; } @@ -2355,16 +2362,16 @@ ractor_require_protect(VALUE crr_obj, VALUE (*func)(VALUE)) } // catch any error - rb_rescue2(func, (VALUE)crr, - require_rescue, (VALUE)crr, rb_eException, 0); + rb_rescue2(func, crr_obj, + require_rescue, crr_obj, rb_eException, 0); if (silent) { ruby_debug = debug; rb_set_errinfo(errinfo); } - rb_rescue2(require_result_copy_body, (VALUE)crr, - require_result_copy_resuce, (VALUE)crr, rb_eException, 0); + rb_rescue2(require_result_copy_body, crr_obj, + require_result_copy_resuce, crr_obj, rb_eException, 0); ractor_port_send(GET_EC(), crr->port, Qtrue, Qfalse); RB_GC_GUARD(crr_obj); @@ -2388,8 +2395,8 @@ rb_ractor_require(VALUE feature, bool silent) FL_SET_RAW(crr_obj, RUBY_FL_SHAREABLE); // Convert feature to proper file path and make it shareable as fstring - crr->feature = rb_fstring(FilePathValue(feature)); - crr->port = ractor_port_new(GET_RACTOR()); + RB_OBJ_WRITE(crr_obj, &crr->feature, rb_fstring(FilePathValue(feature))); + RB_OBJ_WRITE(crr_obj, &crr->port, ractor_port_new(GET_RACTOR())); crr->result = Qundef; crr->exception = Qundef; crr->silent = silent; @@ -2424,10 +2431,13 @@ ractor_require(rb_execution_context_t *ec, VALUE self, VALUE feature) } static VALUE -autoload_load_body(VALUE data) +autoload_load_body(VALUE crr_obj) { - struct cross_ractor_require *crr = (struct cross_ractor_require *)data; - crr->result = rb_autoload_load(crr->module, crr->name); + struct cross_ractor_require *crr; + TypedData_Get_Struct(crr_obj, struct cross_ractor_require, &cross_ractor_require_data_type, crr); + + RB_OBJ_WRITE(crr_obj, &crr->result, rb_autoload_load(crr->module, crr->name)); + return Qnil; } @@ -2443,9 +2453,9 @@ rb_ractor_autoload_load(VALUE module, ID name) struct cross_ractor_require *crr; VALUE crr_obj = TypedData_Make_Struct(0, struct cross_ractor_require, &cross_ractor_require_data_type, crr); FL_SET_RAW(crr_obj, RUBY_FL_SHAREABLE); - crr->module = module; - crr->name = name; - crr->port = ractor_port_new(GET_RACTOR()); + RB_OBJ_WRITE(crr_obj, &crr->module, module); + RB_OBJ_WRITE(crr_obj, &crr->name, name); + RB_OBJ_WRITE(crr_obj, &crr->port, ractor_port_new(GET_RACTOR())); crr->result = Qundef; crr->exception = Qundef; diff --git a/ractor_sync.c b/ractor_sync.c index eb967a73cbc324..8437516bfb5c05 100644 --- a/ractor_sync.c +++ b/ractor_sync.c @@ -1221,7 +1221,6 @@ ractor_try_send(rb_execution_context_t *ec, const struct ractor_port *rp, VALUE // Ractor::Selector struct ractor_selector { - rb_ractor_t *r; struct st_table *ports; // rpv -> rp }; diff --git a/regcomp.c b/regcomp.c index 38bfed5631d643..e389e6f1209af5 100644 --- a/regcomp.c +++ b/regcomp.c @@ -640,17 +640,17 @@ compile_cclass_node(CClassNode* cc, regex_t* reg) else { if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { if (IS_NCCLASS_NOT(cc)) - add_opcode(reg, OP_CCLASS_MB_NOT); + add_opcode(reg, OP_CCLASS_MB_NOT); else - add_opcode(reg, OP_CCLASS_MB); + add_opcode(reg, OP_CCLASS_MB); r = add_multi_byte_cclass(cc->mbuf, reg); } else { if (IS_NCCLASS_NOT(cc)) - add_opcode(reg, OP_CCLASS_MIX_NOT); + add_opcode(reg, OP_CCLASS_MIX_NOT); else - add_opcode(reg, OP_CCLASS_MIX); + add_opcode(reg, OP_CCLASS_MIX); r = add_bitset(reg, cc->bs); if (r) return r; @@ -762,9 +762,9 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) if (NTYPE(qn->target) == NT_CANY) { if (qn->greedy && infinite) { if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) - return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; + return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; else - return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen; + return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen; } } @@ -776,17 +776,17 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) if (infinite && qn->lower <= 1) { if (qn->greedy) { if (qn->lower == 1) - len = SIZE_OP_JUMP; + len = SIZE_OP_JUMP; else - len = 0; + len = 0; len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP; } else { if (qn->lower == 0) - len = SIZE_OP_JUMP; + len = SIZE_OP_JUMP; else - len = 0; + len = 0; len += mod_tlen + SIZE_OP_PUSH + cklen; } @@ -800,10 +800,10 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) else if (qn->upper == 1 && qn->greedy) { if (qn->lower == 0) { if (CKN_ON) { - len = SIZE_OP_STATE_CHECK_PUSH + tlen; + len = SIZE_OP_STATE_CHECK_PUSH + tlen; } else { - len = SIZE_OP_PUSH + tlen; + len = SIZE_OP_PUSH + tlen; } } else { @@ -841,31 +841,31 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg) if (r) return r; if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { if (IS_MULTILINE(reg->options)) - r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); + r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); else - r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); + r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); if (r) return r; if (CKN_ON) { - r = add_state_check_num(reg, ckn); - if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; } return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); } else { if (IS_MULTILINE(reg->options)) { - r = add_opcode(reg, (CKN_ON ? - OP_STATE_CHECK_ANYCHAR_ML_STAR - : OP_ANYCHAR_ML_STAR)); + r = add_opcode(reg, (CKN_ON ? + OP_STATE_CHECK_ANYCHAR_ML_STAR + : OP_ANYCHAR_ML_STAR)); } else { - r = add_opcode(reg, (CKN_ON ? - OP_STATE_CHECK_ANYCHAR_STAR - : OP_ANYCHAR_STAR)); + r = add_opcode(reg, (CKN_ON ? + OP_STATE_CHECK_ANYCHAR_STAR + : OP_ANYCHAR_STAR)); } if (r) return r; if (CKN_ON) - r = add_state_check_num(reg, ckn); + r = add_state_check_num(reg, ckn); return r; } @@ -879,45 +879,45 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg) if (infinite && qn->lower <= 1) { if (qn->greedy) { if (qn->lower == 1) { - r = add_opcode_rel_addr(reg, OP_JUMP, - (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)); - if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)); + if (r) return r; } if (CKN_ON) { - r = add_opcode(reg, OP_STATE_CHECK_PUSH); - if (r) return r; - r = add_state_check_num(reg, ckn); - if (r) return r; - r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP); + r = add_opcode(reg, OP_STATE_CHECK_PUSH); + if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; + r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP); } else { - r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); + r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); } if (r) return r; r = compile_tree_empty_check(qn->target, reg, empty_info); if (r) return r; r = add_opcode_rel_addr(reg, OP_JUMP, - -(mod_tlen + (int )SIZE_OP_JUMP - + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH))); + -(mod_tlen + (int )SIZE_OP_JUMP + + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH))); } else { if (qn->lower == 0) { - r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); - if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); + if (r) return r; } r = compile_tree_empty_check(qn->target, reg, empty_info); if (r) return r; if (CKN_ON) { - r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP); - if (r) return r; - r = add_state_check_num(reg, ckn); - if (r) return r; - r = add_rel_addr(reg, - -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP)); + r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP); + if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; + r = add_rel_addr(reg, + -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP)); } else - r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); + r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); } } else if (qn->upper == 0) { @@ -932,14 +932,14 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg) else if (qn->upper == 1 && qn->greedy) { if (qn->lower == 0) { if (CKN_ON) { - r = add_opcode(reg, OP_STATE_CHECK_PUSH); - if (r) return r; - r = add_state_check_num(reg, ckn); - if (r) return r; - r = add_rel_addr(reg, tlen); + r = add_opcode(reg, OP_STATE_CHECK_PUSH); + if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; + r = add_rel_addr(reg, tlen); } else { - r = add_opcode_rel_addr(reg, OP_PUSH, tlen); + r = add_opcode_rel_addr(reg, OP_PUSH, tlen); } if (r) return r; } @@ -991,9 +991,9 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) if (NTYPE(qn->target) == NT_CANY) { if (qn->greedy && infinite) { if (IS_NOT_NULL(qn->next_head_exact)) - return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; + return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; else - return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower; + return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower; } } @@ -1014,13 +1014,13 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) if (qn->greedy) { #ifdef USE_OP_PUSH_OR_JUMP_EXACT if (IS_NOT_NULL(qn->head_exact)) - len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP; + len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP; else #endif if (IS_NOT_NULL(qn->next_head_exact)) - len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP; + len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP; else - len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP; + len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP; } else len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH; @@ -1060,17 +1060,17 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg) if (r) return r; if (IS_NOT_NULL(qn->next_head_exact)) { if (IS_MULTILINE(reg->options)) - r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); + r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); else - r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); + r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); if (r) return r; return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); } else { if (IS_MULTILINE(reg->options)) - return add_opcode(reg, OP_ANYCHAR_ML_STAR); + return add_opcode(reg, OP_ANYCHAR_ML_STAR); else - return add_opcode(reg, OP_ANYCHAR_STAR); + return add_opcode(reg, OP_ANYCHAR_STAR); } } @@ -1084,17 +1084,17 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg) if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { if (qn->greedy) { #ifdef USE_OP_PUSH_OR_JUMP_EXACT - if (IS_NOT_NULL(qn->head_exact)) - r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1); - else + if (IS_NOT_NULL(qn->head_exact)) + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1); + else #endif - if (IS_NOT_NULL(qn->next_head_exact)) - r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT); - else - r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH); + if (IS_NOT_NULL(qn->next_head_exact)) + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT); + else + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH); } else { - r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP); + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP); } if (r) return r; } @@ -1106,34 +1106,34 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg) if (qn->greedy) { #ifdef USE_OP_PUSH_OR_JUMP_EXACT if (IS_NOT_NULL(qn->head_exact)) { - r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1, - mod_tlen + SIZE_OP_JUMP); - if (r) return r; - add_bytes(reg, NSTR(qn->head_exact)->s, 1); - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, - -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1)); + r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1, + mod_tlen + SIZE_OP_JUMP); + if (r) return r; + add_bytes(reg, NSTR(qn->head_exact)->s, 1); + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1)); } else #endif if (IS_NOT_NULL(qn->next_head_exact)) { - r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT, - mod_tlen + SIZE_OP_JUMP); - if (r) return r; - add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, + r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT, + mod_tlen + SIZE_OP_JUMP); + if (r) return r; + add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT)); } else { - r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); - if (r) return r; - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, - -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH)); + r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); + if (r) return r; + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH)); } } else { @@ -1159,7 +1159,7 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg) for (i = 0; i < n; i++) { r = add_opcode_rel_addr(reg, OP_PUSH, - (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); + (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); if (r) return r; r = compile_tree(qn->target, reg); if (r) return r; @@ -1246,29 +1246,29 @@ compile_length_enclose_node(EncloseNode* node, regex_t* reg) #ifdef USE_SUBEXP_CALL if (IS_ENCLOSE_CALLED(node)) { len = SIZE_OP_MEMORY_START_PUSH + tlen - + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; + + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - len += (IS_ENCLOSE_RECURSION(node) - ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); + len += (IS_ENCLOSE_RECURSION(node) + ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); else - len += (IS_ENCLOSE_RECURSION(node) - ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); + len += (IS_ENCLOSE_RECURSION(node) + ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); } else if (IS_ENCLOSE_RECURSION(node)) { len = SIZE_OP_MEMORY_START_PUSH; len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum) - ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC); + ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC); } else #endif { if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) - len = SIZE_OP_MEMORY_START_PUSH; + len = SIZE_OP_MEMORY_START_PUSH; else - len = SIZE_OP_MEMORY_START; + len = SIZE_OP_MEMORY_START; len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum) - ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); + ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); } break; @@ -1283,7 +1283,7 @@ compile_length_enclose_node(EncloseNode* node, regex_t* reg) if (tlen < 0) return tlen; len = tlen * qn->lower - + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; + + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; } else { #endif @@ -1348,11 +1348,11 @@ compile_enclose_node(EncloseNode* node, regex_t* reg) len = compile_length_tree(node->target, reg); len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - len += (IS_ENCLOSE_RECURSION(node) - ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); + len += (IS_ENCLOSE_RECURSION(node) + ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); else - len += (IS_ENCLOSE_RECURSION(node) - ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); + len += (IS_ENCLOSE_RECURSION(node) + ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); r = add_opcode_rel_addr(reg, OP_JUMP, len); if (r) return r; @@ -1370,11 +1370,11 @@ compile_enclose_node(EncloseNode* node, regex_t* reg) #ifdef USE_SUBEXP_CALL if (IS_ENCLOSE_CALLED(node)) { if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) - ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); + r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) + ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); else - r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) - ? OP_MEMORY_END_REC : OP_MEMORY_END)); + r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) + ? OP_MEMORY_END_REC : OP_MEMORY_END)); if (r) return r; r = add_mem_num(reg, node->regnum); @@ -1383,9 +1383,9 @@ compile_enclose_node(EncloseNode* node, regex_t* reg) } else if (IS_ENCLOSE_RECURSION(node)) { if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - r = add_opcode(reg, OP_MEMORY_END_PUSH_REC); + r = add_opcode(reg, OP_MEMORY_END_PUSH_REC); else - r = add_opcode(reg, OP_MEMORY_END_REC); + r = add_opcode(reg, OP_MEMORY_END_REC); if (r) return r; r = add_mem_num(reg, node->regnum); } @@ -1393,9 +1393,9 @@ compile_enclose_node(EncloseNode* node, regex_t* reg) #endif { if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - r = add_opcode(reg, OP_MEMORY_END_PUSH); + r = add_opcode(reg, OP_MEMORY_END_PUSH); else - r = add_opcode(reg, OP_MEMORY_END); + r = add_opcode(reg, OP_MEMORY_END); if (r) return r; r = add_mem_num(reg, node->regnum); } @@ -1421,7 +1421,7 @@ compile_enclose_node(EncloseNode* node, regex_t* reg) r = add_opcode(reg, OP_POP); if (r) return r; r = add_opcode_rel_addr(reg, OP_JUMP, - -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); + -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); } else { #endif @@ -1579,11 +1579,11 @@ compile_anchor_node(AnchorNode* node, regex_t* reg) r = add_opcode(reg, OP_LOOK_BEHIND); if (r) return r; if (node->char_len < 0) { - r = get_char_length_tree(node->target, reg, &n); - if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + r = get_char_length_tree(node->target, reg, &n); + if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; } else - n = node->char_len; + n = node->char_len; r = add_length(reg, n); if (r) return r; r = compile_tree(node->target, reg); @@ -1595,14 +1595,14 @@ compile_anchor_node(AnchorNode* node, regex_t* reg) int n; len = compile_length_tree(node->target, reg); r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT, - len + SIZE_OP_FAIL_LOOK_BEHIND_NOT); + len + SIZE_OP_FAIL_LOOK_BEHIND_NOT); if (r) return r; if (node->char_len < 0) { - r = get_char_length_tree(node->target, reg, &n); - if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + r = get_char_length_tree(node->target, reg, &n); + if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; } else - n = node->char_len; + n = node->char_len; r = add_length(reg, n); if (r) return r; r = compile_tree(node->target, reg); @@ -1641,10 +1641,10 @@ compile_length_tree(Node* node, regex_t* reg) int n = 0; len = 0; do { - r = compile_length_tree(NCAR(node), reg); - if (r < 0) return r; - len += r; - n++; + r = compile_length_tree(NCAR(node), reg); + if (r < 0) return r; + len += r; + n++; } while (IS_NOT_NULL(node = NCDR(node))); r = len; r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1); @@ -1673,17 +1673,17 @@ compile_length_tree(Node* node, regex_t* reg) #ifdef USE_BACKREF_WITH_LEVEL if (IS_BACKREF_NEST_LEVEL(br)) { - r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + + r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); } else #endif if (br->back_num == 1) { - r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2) - ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); + r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2) + ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); } else { - r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); + r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); } } break; @@ -1732,26 +1732,26 @@ compile_tree(Node* node, regex_t* reg) Node* x = node; len = 0; do { - len += compile_length_tree(NCAR(x), reg); - if (NCDR(x) != NULL) { - len += SIZE_OP_PUSH + SIZE_OP_JUMP; - } + len += compile_length_tree(NCAR(x), reg); + if (NCDR(x) != NULL) { + len += SIZE_OP_PUSH + SIZE_OP_JUMP; + } } while (IS_NOT_NULL(x = NCDR(x))); pos = reg->used + len; /* goal position */ do { - len = compile_length_tree(NCAR(node), reg); - if (IS_NOT_NULL(NCDR(node))) { - r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP); - if (r) break; - } - r = compile_tree(NCAR(node), reg); - if (r) break; - if (IS_NOT_NULL(NCDR(node))) { - len = pos - (reg->used + SIZE_OP_JUMP); - r = add_opcode_rel_addr(reg, OP_JUMP, len); - if (r) break; - } + len = compile_length_tree(NCAR(node), reg); + if (IS_NOT_NULL(NCDR(node))) { + r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP); + if (r) break; + } + r = compile_tree(NCAR(node), reg); + if (r) break; + if (IS_NOT_NULL(NCDR(node))) { + len = pos - (reg->used + SIZE_OP_JUMP); + r = add_opcode_rel_addr(reg, OP_JUMP, len); + if (r) break; + } } while (IS_NOT_NULL(node = NCDR(node))); } break; @@ -1773,18 +1773,18 @@ compile_tree(Node* node, regex_t* reg) switch (NCTYPE(node)->ctype) { case ONIGENC_CTYPE_WORD: - if (NCTYPE(node)->ascii_range != 0) { - if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD; - else op = OP_ASCII_WORD; - } - else { - if (NCTYPE(node)->not != 0) op = OP_NOT_WORD; - else op = OP_WORD; - } - break; + if (NCTYPE(node)->ascii_range != 0) { + if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD; + else op = OP_ASCII_WORD; + } + else { + if (NCTYPE(node)->not != 0) op = OP_NOT_WORD; + else op = OP_WORD; + } + break; default: - return ONIGERR_TYPE_BUG; - break; + return ONIGERR_TYPE_BUG; + break; } r = add_opcode(reg, op); } @@ -1803,58 +1803,58 @@ compile_tree(Node* node, regex_t* reg) #ifdef USE_BACKREF_WITH_LEVEL if (IS_BACKREF_NEST_LEVEL(br)) { - r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); - if (r) return r; - r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); - if (r) return r; - r = add_length(reg, br->nest_level); - if (r) return r; - - goto add_bacref_mems; + r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); + if (r) return r; + r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); + if (r) return r; + r = add_length(reg, br->nest_level); + if (r) return r; + + goto add_bacref_mems; } else #endif if (br->back_num == 1) { - n = br->back_static[0]; - if (IS_IGNORECASE(reg->options)) { - r = add_opcode(reg, OP_BACKREFN_IC); - if (r) return r; - r = add_mem_num(reg, n); - } - else { - switch (n) { - case 1: r = add_opcode(reg, OP_BACKREF1); break; - case 2: r = add_opcode(reg, OP_BACKREF2); break; - default: - r = add_opcode(reg, OP_BACKREFN); - if (r) return r; - r = add_mem_num(reg, n); - break; - } - } + n = br->back_static[0]; + if (IS_IGNORECASE(reg->options)) { + r = add_opcode(reg, OP_BACKREFN_IC); + if (r) return r; + r = add_mem_num(reg, n); + } + else { + switch (n) { + case 1: r = add_opcode(reg, OP_BACKREF1); break; + case 2: r = add_opcode(reg, OP_BACKREF2); break; + default: + r = add_opcode(reg, OP_BACKREFN); + if (r) return r; + r = add_mem_num(reg, n); + break; + } + } } else { - int i; - int* p; + int i; + int* p; - if (IS_IGNORECASE(reg->options)) { - r = add_opcode(reg, OP_BACKREF_MULTI_IC); - } - else { - r = add_opcode(reg, OP_BACKREF_MULTI); - } - if (r) return r; + if (IS_IGNORECASE(reg->options)) { + r = add_opcode(reg, OP_BACKREF_MULTI_IC); + } + else { + r = add_opcode(reg, OP_BACKREF_MULTI); + } + if (r) return r; #ifdef USE_BACKREF_WITH_LEVEL add_bacref_mems: #endif - r = add_length(reg, br->back_num); - if (r) return r; - p = BACKREFS_P(br); - for (i = br->back_num - 1; i >= 0; i--) { - r = add_mem_num(reg, p[i]); - if (r) return r; - } + r = add_length(reg, br->back_num); + if (r) return r; + p = BACKREFS_P(br); + for (i = br->back_num - 1; i >= 0; i--) { + r = add_mem_num(reg, p[i]); + if (r) return r; + } } } break; @@ -1909,7 +1909,7 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) Node* old = *ptarget; r = noname_disable_map(ptarget, map, counter); if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) { - onig_reduce_nested_quantifier(node, *ptarget); + onig_reduce_nested_quantifier(node, *ptarget); } } break; @@ -1918,18 +1918,18 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) { EncloseNode* en = NENCLOSE(node); if (en->type == ENCLOSE_MEMORY) { - if (IS_ENCLOSE_NAMED_GROUP(en)) { - (*counter)++; - map[en->regnum].new_val = *counter; - en->regnum = *counter; - } - else if (en->regnum != 0) { - *plink = en->target; - en->target = NULL_NODE; - onig_node_free(node); - r = noname_disable_map(plink, map, counter); - break; - } + if (IS_ENCLOSE_NAMED_GROUP(en)) { + (*counter)++; + map[en->regnum].new_val = *counter; + en->regnum = *counter; + } + else if (en->regnum != 0) { + *plink = en->target; + en->target = NULL_NODE; + onig_node_free(node); + r = noname_disable_map(plink, map, counter); + break; + } } r = noname_disable_map(&(en->target), map, counter); } @@ -1995,8 +1995,8 @@ renumber_by_map(Node* node, GroupNumRemap* map, const int num_mem) { EncloseNode* en = NENCLOSE(node); if (en->type == ENCLOSE_CONDITION) { - if (en->regnum > num_mem) return ONIGERR_INVALID_BACKREF; - en->regnum = map[en->regnum].new_val; + if (en->regnum > num_mem) return ONIGERR_INVALID_BACKREF; + en->regnum = map[en->regnum].new_val; } r = renumber_by_map(en->target, map, num_mem); } @@ -2127,8 +2127,8 @@ quantifiers_memory_node_info(Node* node) { int v; do { - v = quantifiers_memory_node_info(NCAR(node)); - if (v > r) r = v; + v = quantifiers_memory_node_info(NCAR(node)); + if (v > r) r = v; } while (v >= 0 && IS_NOT_NULL(node = NCDR(node))); } break; @@ -2147,7 +2147,7 @@ quantifiers_memory_node_info(Node* node) { QtfrNode* qn = NQTFR(node); if (qn->upper != 0) { - r = quantifiers_memory_node_info(qn->target); + r = quantifiers_memory_node_info(qn->target); } } break; @@ -2157,17 +2157,17 @@ quantifiers_memory_node_info(Node* node) EncloseNode* en = NENCLOSE(node); switch (en->type) { case ENCLOSE_MEMORY: - return NQ_TARGET_IS_EMPTY_MEM; - break; + return NQ_TARGET_IS_EMPTY_MEM; + break; case ENCLOSE_OPTION: case ENCLOSE_STOP_BACKTRACK: case ENCLOSE_CONDITION: case ENCLOSE_ABSENT: - r = quantifiers_memory_node_info(en->target); - break; + r = quantifiers_memory_node_info(en->target); + break; default: - break; + break; } } break; @@ -2207,10 +2207,10 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) r = get_min_match_length(nodes[backs[0]], min, env); if (r != 0) break; for (i = 1; i < br->back_num; i++) { - if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; - r = get_min_match_length(nodes[backs[i]], &tmin, env); - if (r != 0) break; - if (*min > tmin) *min = tmin; + if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; + r = get_min_match_length(nodes[backs[i]], &tmin, env); + if (r != 0) break; + if (*min > tmin) *min = tmin; } } break; @@ -2220,7 +2220,7 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) if (IS_CALL_RECURSION(NCALL(node))) { EncloseNode* en = NENCLOSE(NCALL(node)->target); if (IS_ENCLOSE_MIN_FIXED(en)) - *min = en->min_len; + *min = en->min_len; } else r = get_min_match_length(NCALL(node)->target, min, env); @@ -2239,11 +2239,11 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) Node *x, *y; y = node; do { - x = NCAR(y); - r = get_min_match_length(x, &tmin, env); - if (r != 0) break; - if (y == node) *min = tmin; - else if (*min > tmin) *min = tmin; + x = NCAR(y); + r = get_min_match_length(x, &tmin, env); + if (r != 0) break; + if (y == node) *min = tmin; + else if (*min > tmin) *min = tmin; } while (r == 0 && IS_NOT_NULL(y = NCDR(y))); } break; @@ -2269,9 +2269,9 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) QtfrNode* qn = NQTFR(node); if (qn->lower > 0) { - r = get_min_match_length(qn->target, min, env); - if (r == 0) - *min = distance_multiply(*min, qn->lower); + r = get_min_match_length(qn->target, min, env); + if (r == 0) + *min = distance_multiply(*min, qn->lower); } } break; @@ -2284,28 +2284,28 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) if (IS_ENCLOSE_MIN_FIXED(en)) *min = en->min_len; else { - if (IS_ENCLOSE_MARK1(NENCLOSE(node))) - *min = 0; /* recursive */ - else { - SET_ENCLOSE_STATUS(node, NST_MARK1); - r = get_min_match_length(en->target, min, env); - CLEAR_ENCLOSE_STATUS(node, NST_MARK1); - if (r == 0) { - en->min_len = *min; - SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); - } - } + if (IS_ENCLOSE_MARK1(NENCLOSE(node))) + *min = 0; /* recursive */ + else { + SET_ENCLOSE_STATUS(node, NST_MARK1); + r = get_min_match_length(en->target, min, env); + CLEAR_ENCLOSE_STATUS(node, NST_MARK1); + if (r == 0) { + en->min_len = *min; + SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); + } + } } break; case ENCLOSE_OPTION: case ENCLOSE_STOP_BACKTRACK: case ENCLOSE_CONDITION: - r = get_min_match_length(en->target, min, env); - break; + r = get_min_match_length(en->target, min, env); + break; case ENCLOSE_ABSENT: - break; + break; } } break; @@ -2330,7 +2330,7 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) do { r = get_max_match_length(NCAR(node), &tmax, env); if (r == 0) - *max = distance_add(*max, tmax); + *max = distance_add(*max, tmax); } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; @@ -2364,15 +2364,15 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) Node** nodes = SCANENV_MEM_NODES(env); BRefNode* br = NBREF(node); if (br->state & NST_RECURSION) { - *max = ONIG_INFINITE_DISTANCE; - break; + *max = ONIG_INFINITE_DISTANCE; + break; } backs = BACKREFS_P(br); for (i = 0; i < br->back_num; i++) { - if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; - r = get_max_match_length(nodes[backs[i]], &tmax, env); - if (r != 0) break; - if (*max < tmax) *max = tmax; + if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; + r = get_max_match_length(nodes[backs[i]], &tmax, env); + if (r != 0) break; + if (*max < tmax) *max = tmax; } } break; @@ -2391,13 +2391,13 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) QtfrNode* qn = NQTFR(node); if (qn->upper != 0) { - r = get_max_match_length(qn->target, max, env); - if (r == 0 && *max != 0) { - if (! IS_REPEAT_INFINITE(qn->upper)) - *max = distance_multiply(*max, qn->upper); - else - *max = ONIG_INFINITE_DISTANCE; - } + r = get_max_match_length(qn->target, max, env); + if (r == 0 && *max != 0) { + if (! IS_REPEAT_INFINITE(qn->upper)) + *max = distance_multiply(*max, qn->upper); + else + *max = ONIG_INFINITE_DISTANCE; + } } } break; @@ -2407,31 +2407,31 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) EncloseNode* en = NENCLOSE(node); switch (en->type) { case ENCLOSE_MEMORY: - if (IS_ENCLOSE_MAX_FIXED(en)) - *max = en->max_len; - else { - if (IS_ENCLOSE_MARK1(NENCLOSE(node))) - *max = ONIG_INFINITE_DISTANCE; - else { - SET_ENCLOSE_STATUS(node, NST_MARK1); - r = get_max_match_length(en->target, max, env); - CLEAR_ENCLOSE_STATUS(node, NST_MARK1); - if (r == 0) { - en->max_len = *max; - SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); - } - } - } - break; + if (IS_ENCLOSE_MAX_FIXED(en)) + *max = en->max_len; + else { + if (IS_ENCLOSE_MARK1(NENCLOSE(node))) + *max = ONIG_INFINITE_DISTANCE; + else { + SET_ENCLOSE_STATUS(node, NST_MARK1); + r = get_max_match_length(en->target, max, env); + CLEAR_ENCLOSE_STATUS(node, NST_MARK1); + if (r == 0) { + en->max_len = *max; + SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); + } + } + } + break; case ENCLOSE_OPTION: case ENCLOSE_STOP_BACKTRACK: case ENCLOSE_CONDITION: - r = get_max_match_length(en->target, max, env); - break; + r = get_max_match_length(en->target, max, env); + break; case ENCLOSE_ABSENT: - break; + break; } } break; @@ -2461,7 +2461,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) do { r = get_char_length_tree1(NCAR(node), reg, &tlen, level); if (r == 0) - *len = (int )distance_add(*len, tlen); + *len = (int )distance_add(*len, tlen); } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; @@ -2472,21 +2472,21 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) r = get_char_length_tree1(NCAR(node), reg, &tlen, level); while (r == 0 && IS_NOT_NULL(node = NCDR(node))) { - r = get_char_length_tree1(NCAR(node), reg, &tlen2, level); - if (r == 0) { - if (tlen != tlen2) - varlen = 1; - } + r = get_char_length_tree1(NCAR(node), reg, &tlen2, level); + if (r == 0) { + if (tlen != tlen2) + varlen = 1; + } } if (r == 0) { - if (varlen != 0) { - if (level == 1) - r = GET_CHAR_LEN_TOP_ALT_VARLEN; - else - r = GET_CHAR_LEN_VARLEN; - } - else - *len = tlen; + if (varlen != 0) { + if (level == 1) + r = GET_CHAR_LEN_TOP_ALT_VARLEN; + else + r = GET_CHAR_LEN_VARLEN; + } + else + *len = tlen; } } break; @@ -2496,8 +2496,8 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) StrNode* sn = NSTR(node); UChar *s = sn->s; while (s < sn->end) { - s += enclen(reg->enc, s, sn->end); - (*len)++; + s += enclen(reg->enc, s, sn->end); + (*len)++; } } break; @@ -2506,12 +2506,12 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) { QtfrNode* qn = NQTFR(node); if (qn->lower == qn->upper) { - r = get_char_length_tree1(qn->target, reg, &tlen, level); - if (r == 0) - *len = (int )distance_multiply(tlen, qn->lower); + r = get_char_length_tree1(qn->target, reg, &tlen, level); + if (r == 0) + *len = (int )distance_multiply(tlen, qn->lower); } else - r = GET_CHAR_LEN_VARLEN; + r = GET_CHAR_LEN_VARLEN; } break; @@ -2539,25 +2539,25 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) switch (en->type) { case ENCLOSE_MEMORY: #ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_CLEN_FIXED(en)) - *len = en->char_len; - else { - r = get_char_length_tree1(en->target, reg, len, level); - if (r == 0) { - en->char_len = *len; - SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); - } - } - break; + if (IS_ENCLOSE_CLEN_FIXED(en)) + *len = en->char_len; + else { + r = get_char_length_tree1(en->target, reg, len, level); + if (r == 0) { + en->char_len = *len; + SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); + } + } + break; #endif case ENCLOSE_OPTION: case ENCLOSE_STOP_BACKTRACK: case ENCLOSE_CONDITION: - r = get_char_length_tree1(en->target, reg, len, level); - break; + r = get_char_length_tree1(en->target, reg, len, level); + break; case ENCLOSE_ABSENT: default: - break; + break; } } break; @@ -2596,29 +2596,29 @@ is_not_included(Node* x, Node* y, regex_t* reg) { switch (ytype) { case NT_CTYPE: - if (NCTYPE(y)->ctype == NCTYPE(x)->ctype && - NCTYPE(y)->not != NCTYPE(x)->not && - NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range) - return 1; - else - return 0; - break; + if (NCTYPE(y)->ctype == NCTYPE(x)->ctype && + NCTYPE(y)->not != NCTYPE(x)->not && + NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range) + return 1; + else + return 0; + break; case NT_CCLASS: swap: - { - Node* tmp; - tmp = x; x = y; y = tmp; - goto retry; - } - break; + { + Node* tmp; + tmp = x; x = y; y = tmp; + goto retry; + } + break; case NT_STR: - goto swap; - break; + goto swap; + break; default: - break; + break; } } break; @@ -2628,80 +2628,80 @@ is_not_included(Node* x, Node* y, regex_t* reg) CClassNode* xc = NCCLASS(x); switch (ytype) { case NT_CTYPE: - switch (NCTYPE(y)->ctype) { - case ONIGENC_CTYPE_WORD: - if (NCTYPE(y)->not == 0) { - if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (BITSET_AT(xc->bs, i)) { - if (NCTYPE(y)->ascii_range) { - if (IS_CODE_SB_WORD(reg->enc, i)) return 0; - } - else { - if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0; - } - } - } - return 1; - } - return 0; - } - else { - if (IS_NOT_NULL(xc->mbuf)) return 0; - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - int is_word; - if (NCTYPE(y)->ascii_range) - is_word = IS_CODE_SB_WORD(reg->enc, i); - else - is_word = ONIGENC_IS_CODE_WORD(reg->enc, i); - if (! is_word) { - if (!IS_NCCLASS_NOT(xc)) { - if (BITSET_AT(xc->bs, i)) - return 0; - } - else { - if (! BITSET_AT(xc->bs, i)) - return 0; - } - } - } - return 1; - } - break; - - default: - break; - } - break; + switch (NCTYPE(y)->ctype) { + case ONIGENC_CTYPE_WORD: + if (NCTYPE(y)->not == 0) { + if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (BITSET_AT(xc->bs, i)) { + if (NCTYPE(y)->ascii_range) { + if (IS_CODE_SB_WORD(reg->enc, i)) return 0; + } + else { + if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0; + } + } + } + return 1; + } + return 0; + } + else { + if (IS_NOT_NULL(xc->mbuf)) return 0; + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + int is_word; + if (NCTYPE(y)->ascii_range) + is_word = IS_CODE_SB_WORD(reg->enc, i); + else + is_word = ONIGENC_IS_CODE_WORD(reg->enc, i); + if (! is_word) { + if (!IS_NCCLASS_NOT(xc)) { + if (BITSET_AT(xc->bs, i)) + return 0; + } + else { + if (! BITSET_AT(xc->bs, i)) + return 0; + } + } + } + return 1; + } + break; + + default: + break; + } + break; case NT_CCLASS: - { - int v; - CClassNode* yc = NCCLASS(y); - - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - v = BITSET_AT(xc->bs, i); - if ((v != 0 && !IS_NCCLASS_NOT(xc)) || - (v == 0 && IS_NCCLASS_NOT(xc))) { - v = BITSET_AT(yc->bs, i); - if ((v != 0 && !IS_NCCLASS_NOT(yc)) || - (v == 0 && IS_NCCLASS_NOT(yc))) - return 0; - } - } - if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) || - (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc))) - return 1; - return 0; - } - break; + { + int v; + CClassNode* yc = NCCLASS(y); + + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + v = BITSET_AT(xc->bs, i); + if ((v != 0 && !IS_NCCLASS_NOT(xc)) || + (v == 0 && IS_NCCLASS_NOT(xc))) { + v = BITSET_AT(yc->bs, i); + if ((v != 0 && !IS_NCCLASS_NOT(yc)) || + (v == 0 && IS_NCCLASS_NOT(yc))) + return 0; + } + } + if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) || + (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc))) + return 1; + return 0; + } + break; case NT_STR: - goto swap; - break; + goto swap; + break; default: - break; + break; } } break; @@ -2710,60 +2710,60 @@ is_not_included(Node* x, Node* y, regex_t* reg) { StrNode* xs = NSTR(x); if (NSTRING_LEN(x) == 0) - break; + break; switch (ytype) { case NT_CTYPE: - switch (NCTYPE(y)->ctype) { - case ONIGENC_CTYPE_WORD: - if (NCTYPE(y)->ascii_range) { - if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end)) - return NCTYPE(y)->not; - else - return !(NCTYPE(y)->not); - } - else { - if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end)) - return NCTYPE(y)->not; - else - return !(NCTYPE(y)->not); - } - break; - default: - break; - } - break; + switch (NCTYPE(y)->ctype) { + case ONIGENC_CTYPE_WORD: + if (NCTYPE(y)->ascii_range) { + if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end)) + return NCTYPE(y)->not; + else + return !(NCTYPE(y)->not); + } + else { + if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end)) + return NCTYPE(y)->not; + else + return !(NCTYPE(y)->not); + } + break; + default: + break; + } + break; case NT_CCLASS: - { - CClassNode* cc = NCCLASS(y); + { + CClassNode* cc = NCCLASS(y); - code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s, - xs->s + ONIGENC_MBC_MAXLEN(reg->enc)); - return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1); - } - break; + code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s, + xs->s + ONIGENC_MBC_MAXLEN(reg->enc)); + return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1); + } + break; case NT_STR: - { - UChar *q; - StrNode* ys = NSTR(y); - len = NSTRING_LEN(x); - if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); - if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) { - /* tiny version */ - return 0; - } - else { - for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) { - if (*p != *q) return 1; - } - } - } - break; + { + UChar *q; + StrNode* ys = NSTR(y); + len = NSTRING_LEN(x); + if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); + if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) { + /* tiny version */ + return 0; + } + else { + for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) { + if (*p != *q) return 1; + } + } + } + break; default: - break; + break; } } break; @@ -2805,13 +2805,13 @@ get_head_value_node(Node* node, int exact, regex_t* reg) StrNode* sn = NSTR(node); if (sn->end <= sn->s) - break; + break; if (exact != 0 && - !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { + !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { } else { - n = node; + n = node; } } break; @@ -2821,11 +2821,11 @@ get_head_value_node(Node* node, int exact, regex_t* reg) QtfrNode* qn = NQTFR(node); if (qn->lower > 0) { #ifdef USE_OP_PUSH_OR_JUMP_EXACT - if (IS_NOT_NULL(qn->head_exact)) - n = qn->head_exact; - else + if (IS_NOT_NULL(qn->head_exact)) + n = qn->head_exact; + else #endif - n = get_head_value_node(qn->target, exact, reg); + n = get_head_value_node(qn->target, exact, reg); } } break; @@ -2835,23 +2835,23 @@ get_head_value_node(Node* node, int exact, regex_t* reg) EncloseNode* en = NENCLOSE(node); switch (en->type) { case ENCLOSE_OPTION: - { - OnigOptionType options = reg->options; + { + OnigOptionType options = reg->options; - reg->options = NENCLOSE(node)->option; - n = get_head_value_node(NENCLOSE(node)->target, exact, reg); - reg->options = options; - } - break; + reg->options = NENCLOSE(node)->option; + n = get_head_value_node(NENCLOSE(node)->target, exact, reg); + reg->options = options; + } + break; case ENCLOSE_MEMORY: case ENCLOSE_STOP_BACKTRACK: case ENCLOSE_CONDITION: - n = get_head_value_node(en->target, exact, reg); - break; + n = get_head_value_node(en->target, exact, reg); + break; case ENCLOSE_ABSENT: - break; + break; } } break; @@ -2882,20 +2882,20 @@ check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask) case NT_ALT: do { r = check_type_tree(NCAR(node), type_mask, enclose_mask, - anchor_mask); + anchor_mask); } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; case NT_QTFR: r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask, - anchor_mask); + anchor_mask); break; case NT_ENCLOSE: { EncloseNode* en = NENCLOSE(node); if ((en->type & enclose_mask) == 0) - return 1; + return 1; r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask); } @@ -2908,7 +2908,7 @@ check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask) if (NANCHOR(node)->target) r = check_type_tree(NANCHOR(node)->target, - type_mask, enclose_mask, anchor_mask); + type_mask, enclose_mask, anchor_mask); break; default: @@ -2938,14 +2938,14 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) x = node; do { - ret = subexp_inf_recursive_check(NCAR(x), env, head); - if (ret < 0 || ret == RECURSION_INFINITE) return ret; - r |= ret; - if (head) { - ret = get_min_match_length(NCAR(x), &min, env); - if (ret != 0) return ret; - if (min != 0) head = 0; - } + ret = subexp_inf_recursive_check(NCAR(x), env, head); + if (ret < 0 || ret == RECURSION_INFINITE) return ret; + r |= ret; + if (head) { + ret = get_min_match_length(NCAR(x), &min, env); + if (ret != 0) return ret; + if (min != 0) head = 0; + } } while (IS_NOT_NULL(x = NCDR(x))); } break; @@ -2955,9 +2955,9 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) int ret; r = RECURSION_EXIST; do { - ret = subexp_inf_recursive_check(NCAR(node), env, head); - if (ret < 0 || ret == RECURSION_INFINITE) return ret; - r &= ret; + ret = subexp_inf_recursive_check(NCAR(node), env, head); + if (ret < 0 || ret == RECURSION_INFINITE) return ret; + r &= ret; } while (IS_NOT_NULL(node = NCDR(node))); } break; @@ -2977,8 +2977,8 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) case ANCHOR_PREC_READ_NOT: case ANCHOR_LOOK_BEHIND: case ANCHOR_LOOK_BEHIND_NOT: - r = subexp_inf_recursive_check(an->target, env, head); - break; + r = subexp_inf_recursive_check(an->target, env, head); + break; } } break; @@ -3033,8 +3033,8 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) case ANCHOR_PREC_READ_NOT: case ANCHOR_LOOK_BEHIND: case ANCHOR_LOOK_BEHIND_NOT: - r = subexp_inf_recursive_check_trav(an->target, env); - break; + r = subexp_inf_recursive_check_trav(an->target, env); + break; } } break; @@ -3044,10 +3044,10 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) EncloseNode* en = NENCLOSE(node); if (IS_ENCLOSE_RECURSION(en)) { - SET_ENCLOSE_STATUS(node, NST_MARK1); - r = subexp_inf_recursive_check(en->target, env, 1); - if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION; - CLEAR_ENCLOSE_STATUS(node, NST_MARK1); + SET_ENCLOSE_STATUS(node, NST_MARK1); + r = subexp_inf_recursive_check(en->target, env, 1); + if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION; + CLEAR_ENCLOSE_STATUS(node, NST_MARK1); } r = subexp_inf_recursive_check_trav(en->target, env); } @@ -3086,8 +3086,8 @@ subexp_recursive_check(Node* node) case ANCHOR_PREC_READ_NOT: case ANCHOR_LOOK_BEHIND: case ANCHOR_LOOK_BEHIND_NOT: - r = subexp_recursive_check(an->target); - break; + r = subexp_recursive_check(an->target); + break; } } break; @@ -3132,9 +3132,9 @@ subexp_recursive_check_trav(Node* node, ScanEnv* env) { int ret; do { - ret = subexp_recursive_check_trav(NCAR(node), env); - if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE; - else if (ret < 0) return ret; + ret = subexp_recursive_check_trav(NCAR(node), env); + if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE; + else if (ret < 0) return ret; } while (IS_NOT_NULL(node = NCDR(node))); } break; @@ -3143,7 +3143,7 @@ subexp_recursive_check_trav(Node* node, ScanEnv* env) r = subexp_recursive_check_trav(NQTFR(node)->target, env); if (NQTFR(node)->upper == 0) { if (r == FOUND_CALLED_NODE) - NQTFR(node)->is_referred = 1; + NQTFR(node)->is_referred = 1; } break; @@ -3155,8 +3155,8 @@ subexp_recursive_check_trav(Node* node, ScanEnv* env) case ANCHOR_PREC_READ_NOT: case ANCHOR_LOOK_BEHIND: case ANCHOR_LOOK_BEHIND_NOT: - r = subexp_recursive_check_trav(an->target, env); - break; + r = subexp_recursive_check_trav(an->target, env); + break; } } break; @@ -3166,16 +3166,16 @@ subexp_recursive_check_trav(Node* node, ScanEnv* env) EncloseNode* en = NENCLOSE(node); if (! IS_ENCLOSE_RECURSION(en)) { - if (IS_ENCLOSE_CALLED(en)) { - SET_ENCLOSE_STATUS(node, NST_MARK1); - r = subexp_recursive_check(en->target); - if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION); - CLEAR_ENCLOSE_STATUS(node, NST_MARK1); - } + if (IS_ENCLOSE_CALLED(en)) { + SET_ENCLOSE_STATUS(node, NST_MARK1); + r = subexp_recursive_check(en->target); + if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION); + CLEAR_ENCLOSE_STATUS(node, NST_MARK1); + } } r = subexp_recursive_check_trav(en->target, env); if (IS_ENCLOSE_CALLED(en)) - r |= FOUND_CALLED_NODE; + r |= FOUND_CALLED_NODE; } break; @@ -3219,60 +3219,60 @@ setup_subexp_call(Node* node, ScanEnv* env) Node** nodes = SCANENV_MEM_NODES(env); if (cn->group_num != 0) { - int gnum = cn->group_num; + int gnum = cn->group_num; # ifdef USE_NAMED_GROUP - if (env->num_named > 0 && - IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && - !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { - return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; - } + if (env->num_named > 0 && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && + !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { + return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; + } # endif - if (gnum > env->num_mem) { - onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end); - return ONIGERR_UNDEFINED_GROUP_REFERENCE; - } + if (gnum > env->num_mem) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_GROUP_REFERENCE; + } # ifdef USE_NAMED_GROUP set_call_attr: # endif - cn->target = nodes[cn->group_num]; - if (IS_NULL(cn->target)) { - onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); - return ONIGERR_UNDEFINED_NAME_REFERENCE; - } - SET_ENCLOSE_STATUS(cn->target, NST_CALLED); - BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num); - cn->unset_addr_list = env->unset_addr_list; + cn->target = nodes[cn->group_num]; + if (IS_NULL(cn->target)) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + SET_ENCLOSE_STATUS(cn->target, NST_CALLED); + BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num); + cn->unset_addr_list = env->unset_addr_list; } # ifdef USE_NAMED_GROUP # ifdef USE_PERL_SUBEXP_CALL else if (cn->name == cn->name_end) { - goto set_call_attr; + goto set_call_attr; } # endif else { - int *refs; - - int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, - &refs); - if (n <= 0) { - onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); - return ONIGERR_UNDEFINED_NAME_REFERENCE; - } - else if (n > 1 && - ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) { - onig_scan_env_set_error_string(env, - ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); - return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; - } - else { - cn->group_num = refs[0]; - goto set_call_attr; - } + int *refs; + + int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, + &refs); + if (n <= 0) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + else if (n > 1 && + ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) { + onig_scan_env_set_error_string(env, + ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); + return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; + } + else { + cn->group_num = refs[0]; + goto set_call_attr; + } } # endif } @@ -3287,8 +3287,8 @@ setup_subexp_call(Node* node, ScanEnv* env) case ANCHOR_PREC_READ_NOT: case ANCHOR_LOOK_BEHIND: case ANCHOR_LOOK_BEHIND_NOT: - r = setup_subexp_call(an->target, env); - break; + r = setup_subexp_call(an->target, env); + break; } } break; @@ -3370,26 +3370,26 @@ next_setup(Node* node, Node* next_node, regex_t* reg) Node* n = get_head_value_node(next_node, 1, reg); /* '\0': for UTF-16BE etc... */ if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') { - qn->next_head_exact = n; + qn->next_head_exact = n; } #endif /* automatic possessification a*b ==> (?>a*)b */ if (qn->lower <= 1) { - int ttype = NTYPE(qn->target); - if (IS_NODE_TYPE_SIMPLE(ttype)) { - Node *x, *y; - x = get_head_value_node(qn->target, 0, reg); - if (IS_NOT_NULL(x)) { - y = get_head_value_node(next_node, 0, reg); - if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) { - Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK); - CHECK_NULL_RETURN_MEMERR(en); - SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); - swap_node(node, en); - NENCLOSE(node)->target = en; - } - } - } + int ttype = NTYPE(qn->target); + if (IS_NODE_TYPE_SIMPLE(ttype)) { + Node *x, *y; + x = get_head_value_node(qn->target, 0, reg); + if (IS_NOT_NULL(x)) { + y = get_head_value_node(next_node, 0, reg); + if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) { + Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK); + CHECK_NULL_RETURN_MEMERR(en); + SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); + swap_node(node, en); + NENCLOSE(node)->target = en; + } + } + } } } } @@ -3425,15 +3425,15 @@ update_string_node_case_fold(regex_t* reg, Node *node) len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf); for (i = 0; i < len; i++) { if (sp >= ebuf) { - UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2); - if (IS_NULL(p)) { - xfree(sbuf); - return ONIGERR_MEMORY; - } - sbuf = p; - sp = sbuf + sbuf_size; - sbuf_size *= 2; - ebuf = sbuf + sbuf_size; + UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2); + if (IS_NULL(p)) { + xfree(sbuf); + return ONIGERR_MEMORY; + } + sbuf = p; + sp = sbuf + sbuf_size; + sbuf_size *= 2; + ebuf = sbuf + sbuf_size; } *sp++ = buf[i]; @@ -3448,7 +3448,7 @@ update_string_node_case_fold(regex_t* reg, Node *node) static int expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, - regex_t* reg) + regex_t* reg) { int r; Node *node; @@ -3470,7 +3470,7 @@ expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, static int is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[], - int slen) + int slen) { int i; @@ -3487,8 +3487,8 @@ is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[], static int expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], - UChar *p, int slen, UChar *end, - regex_t* reg, Node **rnode) + UChar *p, int slen, UChar *end, + regex_t* reg, Node **rnode) { int r, i, j, len, varlen; Node *anode, *var_anode, *snode, *xnode, *an; @@ -3533,8 +3533,8 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], for (j = 0; j < items[i].code_len; j++) { len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf); if (len < 0) { - r = len; - goto mem_err2; + r = len; + goto mem_err2; } r = onig_node_str_cat(snode, buf, buf + len); @@ -3551,29 +3551,29 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *q = p + items[i].byte_len; if (q < end) { - r = expand_case_fold_make_rem_string(&rem, q, end, reg); - if (r != 0) { - onig_node_free(an); - goto mem_err2; - } - - xnode = onig_node_list_add(NULL_NODE, snode); - if (IS_NULL(xnode)) { - onig_node_free(an); - onig_node_free(rem); - goto mem_err2; - } - if (IS_NULL(onig_node_list_add(xnode, rem))) { - onig_node_free(an); - onig_node_free(xnode); - onig_node_free(rem); - goto mem_err; - } - - NCAR(an) = xnode; + r = expand_case_fold_make_rem_string(&rem, q, end, reg); + if (r != 0) { + onig_node_free(an); + goto mem_err2; + } + + xnode = onig_node_list_add(NULL_NODE, snode); + if (IS_NULL(xnode)) { + onig_node_free(an); + onig_node_free(rem); + goto mem_err2; + } + if (IS_NULL(onig_node_list_add(xnode, rem))) { + onig_node_free(an); + onig_node_free(xnode); + onig_node_free(rem); + goto mem_err; + } + + NCAR(an) = xnode; } else { - NCAR(an) = snode; + NCAR(an) = snode; } NCDR(var_anode) = an; @@ -3621,7 +3621,7 @@ expand_case_fold_string(Node* node, regex_t* reg) p = start; while (p < end) { n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, - p, end, items); + p, end, items); if (n < 0) { r = n; goto err; @@ -3632,23 +3632,23 @@ expand_case_fold_string(Node* node, regex_t* reg) varlen = is_case_fold_variable_len(n, items, len); if (n == 0 || varlen == 0) { if (IS_NULL(snode)) { - if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { + if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { onig_node_free(top_root); - top_root = root = onig_node_list_add(NULL_NODE, prev_node); - if (IS_NULL(root)) { - onig_node_free(prev_node); - goto mem_err; - } - } - - prev_node = snode = onig_node_new_str(NULL, NULL); - if (IS_NULL(snode)) goto mem_err; - if (IS_NOT_NULL(root)) { - if (IS_NULL(onig_node_list_add(root, snode))) { - onig_node_free(snode); - goto mem_err; - } - } + top_root = root = onig_node_list_add(NULL_NODE, prev_node); + if (IS_NULL(root)) { + onig_node_free(prev_node); + goto mem_err; + } + } + + prev_node = snode = onig_node_new_str(NULL, NULL); + if (IS_NULL(snode)) goto mem_err; + if (IS_NOT_NULL(root)) { + if (IS_NULL(onig_node_list_add(root, snode))) { + onig_node_free(snode); + goto mem_err; + } + } } r = onig_node_str_cat(snode, p, p + len); @@ -3659,42 +3659,42 @@ expand_case_fold_string(Node* node, regex_t* reg) if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break; if (IS_NOT_NULL(snode)) { - r = update_string_node_case_fold(reg, snode); - if (r == 0) { - NSTRING_SET_AMBIG(snode); - } + r = update_string_node_case_fold(reg, snode); + if (r == 0) { + NSTRING_SET_AMBIG(snode); + } } if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { onig_node_free(top_root); - top_root = root = onig_node_list_add(NULL_NODE, prev_node); - if (IS_NULL(root)) { - onig_node_free(prev_node); - goto mem_err; - } + top_root = root = onig_node_list_add(NULL_NODE, prev_node); + if (IS_NULL(root)) { + onig_node_free(prev_node); + goto mem_err; + } } r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node); if (r < 0) goto mem_err; if (r == 1) { - if (IS_NULL(root)) { - top_root = prev_node; - } - else { - if (IS_NULL(onig_node_list_add(root, prev_node))) { - onig_node_free(prev_node); - goto mem_err; - } - } - - root = NCAR(prev_node); + if (IS_NULL(root)) { + top_root = prev_node; + } + else { + if (IS_NULL(onig_node_list_add(root, prev_node))) { + onig_node_free(prev_node); + goto mem_err; + } + } + + root = NCAR(prev_node); } else { /* r == 0 */ - if (IS_NOT_NULL(root)) { - if (IS_NULL(onig_node_list_add(root, prev_node))) { - onig_node_free(prev_node); - goto mem_err; - } - } + if (IS_NOT_NULL(root)) { + if (IS_NULL(onig_node_list_add(root, prev_node))) { + onig_node_free(prev_node); + goto mem_err; + } + } } snode = NULL_NODE; @@ -3719,9 +3719,9 @@ expand_case_fold_string(Node* node, regex_t* reg) onig_node_free(top_root); top_root = root = onig_node_list_add(NULL_NODE, prev_node); if (IS_NULL(root)) { - onig_node_free(srem); - onig_node_free(prev_node); - goto mem_err; + onig_node_free(srem); + onig_node_free(prev_node); + goto mem_err; } } @@ -3730,8 +3730,8 @@ expand_case_fold_string(Node* node, regex_t* reg) } else { if (IS_NULL(onig_node_list_add(root, srem))) { - onig_node_free(srem); - goto mem_err; + onig_node_free(srem); + goto mem_err; } } } @@ -3771,7 +3771,7 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) case NT_LIST: { do { - r = setup_comb_exp_check(NCAR(node), r, env); + r = setup_comb_exp_check(NCAR(node), r, env); } while (r >= 0 && IS_NOT_NULL(node = NCDR(node))); } break; @@ -3780,8 +3780,8 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) { int ret; do { - ret = setup_comb_exp_check(NCAR(node), state, env); - r |= ret; + ret = setup_comb_exp_check(NCAR(node), state, env); + r |= ret; } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node))); } break; @@ -3795,55 +3795,55 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) int var_num; if (! IS_REPEAT_INFINITE(qn->upper)) { - if (qn->upper > 1) { - /* {0,1}, {1,1} are allowed */ - child_state |= CEC_IN_FINITE_REPEAT; - - /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ - if (env->backrefed_mem == 0) { - if (NTYPE(qn->target) == NT_ENCLOSE) { - EncloseNode* en = NENCLOSE(qn->target); - if (en->type == ENCLOSE_MEMORY) { - if (NTYPE(en->target) == NT_QTFR) { - QtfrNode* q = NQTFR(en->target); - if (IS_REPEAT_INFINITE(q->upper) - && q->greedy == qn->greedy) { - qn->upper = (qn->lower == 0 ? 1 : qn->lower); - if (qn->upper == 1) - child_state = state; - } - } - } - } - } - } + if (qn->upper > 1) { + /* {0,1}, {1,1} are allowed */ + child_state |= CEC_IN_FINITE_REPEAT; + + /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ + if (env->backrefed_mem == 0) { + if (NTYPE(qn->target) == NT_ENCLOSE) { + EncloseNode* en = NENCLOSE(qn->target); + if (en->type == ENCLOSE_MEMORY) { + if (NTYPE(en->target) == NT_QTFR) { + QtfrNode* q = NQTFR(en->target); + if (IS_REPEAT_INFINITE(q->upper) + && q->greedy == qn->greedy) { + qn->upper = (qn->lower == 0 ? 1 : qn->lower); + if (qn->upper == 1) + child_state = state; + } + } + } + } + } + } } if (state & CEC_IN_FINITE_REPEAT) { - qn->comb_exp_check_num = -1; + qn->comb_exp_check_num = -1; } else { - if (IS_REPEAT_INFINITE(qn->upper)) { - var_num = CEC_INFINITE_NUM; - child_state |= CEC_IN_INFINITE_REPEAT; - } - else { - var_num = qn->upper - qn->lower; - } - - if (var_num >= CEC_THRES_NUM_BIG_REPEAT) - add_state |= CEC_CONT_BIG_REPEAT; - - if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) || - ((state & CEC_CONT_BIG_REPEAT) != 0 && - var_num >= CEC_THRES_NUM_BIG_REPEAT)) { - if (qn->comb_exp_check_num == 0) { - env->num_comb_exp_check++; - qn->comb_exp_check_num = env->num_comb_exp_check; - if (env->curr_max_regnum > env->comb_exp_max_regnum) - env->comb_exp_max_regnum = env->curr_max_regnum; - } - } + if (IS_REPEAT_INFINITE(qn->upper)) { + var_num = CEC_INFINITE_NUM; + child_state |= CEC_IN_INFINITE_REPEAT; + } + else { + var_num = qn->upper - qn->lower; + } + + if (var_num >= CEC_THRES_NUM_BIG_REPEAT) + add_state |= CEC_CONT_BIG_REPEAT; + + if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) || + ((state & CEC_CONT_BIG_REPEAT) != 0 && + var_num >= CEC_THRES_NUM_BIG_REPEAT)) { + if (qn->comb_exp_check_num == 0) { + env->num_comb_exp_check++; + qn->comb_exp_check_num = env->num_comb_exp_check; + if (env->curr_max_regnum > env->comb_exp_max_regnum) + env->comb_exp_max_regnum = env->curr_max_regnum; + } + } } r = setup_comb_exp_check(target, child_state, env); @@ -3857,17 +3857,17 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) switch (en->type) { case ENCLOSE_MEMORY: - { - if (env->curr_max_regnum < en->regnum) - env->curr_max_regnum = en->regnum; + { + if (env->curr_max_regnum < en->regnum) + env->curr_max_regnum = en->regnum; - r = setup_comb_exp_check(en->target, state, env); - } - break; + r = setup_comb_exp_check(en->target, state, env); + } + break; default: - r = setup_comb_exp_check(en->target, state, env); - break; + r = setup_comb_exp_check(en->target, state, env); + break; } } break; @@ -3917,11 +3917,11 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) { Node* prev = NULL_NODE; do { - r = setup_tree(NCAR(node), reg, state, env); - if (IS_NOT_NULL(prev) && r == 0) { - r = next_setup(prev, NCAR(node), reg); - } - prev = NCAR(node); + r = setup_tree(NCAR(node), reg, state, env); + if (IS_NOT_NULL(prev) && r == 0) { + r = next_setup(prev, NCAR(node), reg); + } + prev = NCAR(node); } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); } break; @@ -3958,15 +3958,15 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) BRefNode* br = NBREF(node); p = BACKREFS_P(br); for (i = 0; i < br->back_num; i++) { - if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; - BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); - BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); + if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; + BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); + BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); #ifdef USE_BACKREF_WITH_LEVEL - if (IS_BACKREF_NEST_LEVEL(br)) { - BIT_STATUS_ON_AT(env->bt_mem_end, p[i]); - } + if (IS_BACKREF_NEST_LEVEL(br)) { + BIT_STATUS_ON_AT(env->bt_mem_end, p[i]); + } #endif - SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED); + SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED); } } break; @@ -3978,100 +3978,100 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) Node* target = qn->target; if ((state & IN_REPEAT) != 0) { - qn->state |= NST_IN_REPEAT; + qn->state |= NST_IN_REPEAT; } if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) { - r = get_min_match_length(target, &d, env); - if (r) break; - if (d == 0) { - qn->target_empty_info = NQ_TARGET_IS_EMPTY; + r = get_min_match_length(target, &d, env); + if (r) break; + if (d == 0) { + qn->target_empty_info = NQ_TARGET_IS_EMPTY; #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT - r = quantifiers_memory_node_info(target); - if (r < 0) break; - if (r > 0) { - qn->target_empty_info = r; - } + r = quantifiers_memory_node_info(target); + if (r < 0) break; + if (r > 0) { + qn->target_empty_info = r; + } #endif #if 0 - r = get_max_match_length(target, &d, env); - if (r == 0 && d == 0) { - /* ()* ==> ()?, ()+ ==> () */ - qn->upper = 1; - if (qn->lower > 1) qn->lower = 1; - if (NTYPE(target) == NT_STR) { - qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */ - } - } + r = get_max_match_length(target, &d, env); + if (r == 0 && d == 0) { + /* ()* ==> ()?, ()+ ==> () */ + qn->upper = 1; + if (qn->lower > 1) qn->lower = 1; + if (NTYPE(target) == NT_STR) { + qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */ + } + } #endif - } + } } state |= IN_REPEAT; if (qn->lower != qn->upper) - state |= IN_VAR_REPEAT; + state |= IN_VAR_REPEAT; r = setup_tree(target, reg, state, env); if (r) break; /* expand string */ #define EXPAND_STRING_MAX_LENGTH 100 if (NTYPE(target) == NT_STR) { - if (qn->lower > 1) { - int i, n = qn->lower; - OnigDistance len = NSTRING_LEN(target); - StrNode* sn = NSTR(target); - Node* np; - - np = onig_node_new_str(sn->s, sn->end); - if (IS_NULL(np)) return ONIGERR_MEMORY; - NSTR(np)->flag = sn->flag; - - for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) { - r = onig_node_str_cat(np, sn->s, sn->end); - if (r) { - onig_node_free(np); - return r; - } - } - if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) { - Node *np1, *np2; - - qn->lower -= i; - if (! IS_REPEAT_INFINITE(qn->upper)) - qn->upper -= i; - - np1 = onig_node_new_list(np, NULL); - if (IS_NULL(np1)) { - onig_node_free(np); - return ONIGERR_MEMORY; - } - swap_node(np1, node); - np2 = onig_node_list_add(node, np1); - if (IS_NULL(np2)) { - onig_node_free(np1); - return ONIGERR_MEMORY; - } - } - else { - swap_node(np, node); - onig_node_free(np); - } - break; /* break case NT_QTFR: */ - } + if (qn->lower > 1) { + int i, n = qn->lower; + OnigDistance len = NSTRING_LEN(target); + StrNode* sn = NSTR(target); + Node* np; + + np = onig_node_new_str(sn->s, sn->end); + if (IS_NULL(np)) return ONIGERR_MEMORY; + NSTR(np)->flag = sn->flag; + + for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) { + r = onig_node_str_cat(np, sn->s, sn->end); + if (r) { + onig_node_free(np); + return r; + } + } + if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) { + Node *np1, *np2; + + qn->lower -= i; + if (! IS_REPEAT_INFINITE(qn->upper)) + qn->upper -= i; + + np1 = onig_node_new_list(np, NULL); + if (IS_NULL(np1)) { + onig_node_free(np); + return ONIGERR_MEMORY; + } + swap_node(np1, node); + np2 = onig_node_list_add(node, np1); + if (IS_NULL(np2)) { + onig_node_free(np1); + return ONIGERR_MEMORY; + } + } + else { + swap_node(np, node); + onig_node_free(np); + } + break; /* break case NT_QTFR: */ + } } #ifdef USE_OP_PUSH_OR_JUMP_EXACT if (qn->greedy && (qn->target_empty_info != 0)) { - if (NTYPE(target) == NT_QTFR) { - QtfrNode* tqn = NQTFR(target); - if (IS_NOT_NULL(tqn->head_exact)) { - qn->head_exact = tqn->head_exact; - tqn->head_exact = NULL; - } - } - else { - qn->head_exact = get_head_value_node(qn->target, 1, reg); - } + if (NTYPE(target) == NT_QTFR) { + QtfrNode* tqn = NQTFR(target); + if (IS_NOT_NULL(tqn->head_exact)) { + qn->head_exact = tqn->head_exact; + tqn->head_exact = NULL; + } + } + else { + qn->head_exact = get_head_value_node(qn->target, 1, reg); + } } #endif } @@ -4083,61 +4083,61 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) switch (en->type) { case ENCLOSE_OPTION: - { - OnigOptionType options = reg->options; - reg->options = NENCLOSE(node)->option; - r = setup_tree(NENCLOSE(node)->target, reg, state, env); - reg->options = options; - } - break; + { + OnigOptionType options = reg->options; + reg->options = NENCLOSE(node)->option; + r = setup_tree(NENCLOSE(node)->target, reg, state, env); + reg->options = options; + } + break; case ENCLOSE_MEMORY: - if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) { - BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum); - /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ - } - if (IS_ENCLOSE_CALLED(en)) - state |= IN_CALL; - if (IS_ENCLOSE_RECURSION(en)) - state |= IN_RECCALL; - else if ((state & IN_RECCALL) != 0) - SET_CALL_RECURSION(node); - r = setup_tree(en->target, reg, state, env); - break; + if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) { + BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum); + /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ + } + if (IS_ENCLOSE_CALLED(en)) + state |= IN_CALL; + if (IS_ENCLOSE_RECURSION(en)) + state |= IN_RECCALL; + else if ((state & IN_RECCALL) != 0) + SET_CALL_RECURSION(node); + r = setup_tree(en->target, reg, state, env); + break; case ENCLOSE_STOP_BACKTRACK: - { - Node* target = en->target; - r = setup_tree(target, reg, state, env); - if (NTYPE(target) == NT_QTFR) { - QtfrNode* tqn = NQTFR(target); - if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 && - tqn->greedy != 0) { /* (?>a*), a*+ etc... */ - int qtype = NTYPE(tqn->target); - if (IS_NODE_TYPE_SIMPLE(qtype)) - SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT); - } - } - } - break; + { + Node* target = en->target; + r = setup_tree(target, reg, state, env); + if (NTYPE(target) == NT_QTFR) { + QtfrNode* tqn = NQTFR(target); + if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 && + tqn->greedy != 0) { /* (?>a*), a*+ etc... */ + int qtype = NTYPE(tqn->target); + if (IS_NODE_TYPE_SIMPLE(qtype)) + SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT); + } + } + } + break; case ENCLOSE_CONDITION: #ifdef USE_NAMED_GROUP - if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) && - env->num_named > 0 && - IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && - !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { - return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; - } + if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) && + env->num_named > 0 && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && + !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { + return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; + } #endif - if (NENCLOSE(node)->regnum > env->num_mem) - return ONIGERR_INVALID_BACKREF; - r = setup_tree(NENCLOSE(node)->target, reg, state, env); - break; + if (NENCLOSE(node)->regnum > env->num_mem) + return ONIGERR_INVALID_BACKREF; + r = setup_tree(NENCLOSE(node)->target, reg, state, env); + break; case ENCLOSE_ABSENT: - r = setup_tree(NENCLOSE(node)->target, reg, state, env); - break; + r = setup_tree(NENCLOSE(node)->target, reg, state, env); + break; } } break; @@ -4148,11 +4148,11 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) switch (an->type) { case ANCHOR_PREC_READ: - r = setup_tree(an->target, reg, state, env); - break; + r = setup_tree(an->target, reg, state, env); + break; case ANCHOR_PREC_READ_NOT: - r = setup_tree(an->target, reg, (state | IN_NOT), env); - break; + r = setup_tree(an->target, reg, (state | IN_NOT), env); + break; /* allowed node types in look-behind */ #define ALLOWED_TYPE_IN_LB \ @@ -4174,30 +4174,30 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) ANCHOR_WORD_BEGIN | ANCHOR_WORD_END ) case ANCHOR_LOOK_BEHIND: - { - r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, - ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB); - if (r < 0) return r; - if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - if (NTYPE(node) != NT_ANCHOR) goto restart; - r = setup_tree(an->target, reg, state, env); - if (r != 0) return r; - r = setup_look_behind(node, reg, env); - } - break; + { + r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, + ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB); + if (r < 0) return r; + if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + if (NTYPE(node) != NT_ANCHOR) goto restart; + r = setup_tree(an->target, reg, state, env); + if (r != 0) return r; + r = setup_look_behind(node, reg, env); + } + break; case ANCHOR_LOOK_BEHIND_NOT: - { - r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, - ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); - if (r < 0) return r; - if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - if (NTYPE(node) != NT_ANCHOR) goto restart; - r = setup_tree(an->target, reg, (state | IN_NOT), env); - if (r != 0) return r; - r = setup_look_behind(node, reg, env); - } - break; + { + r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, + ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); + if (r < 0) return r; + if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + if (NTYPE(node) != NT_ANCHOR) goto restart; + r = setup_tree(an->target, reg, (state | IN_NOT), env); + if (r != 0) return r; + r = setup_look_behind(node, reg, env); + } + break; } } break; @@ -4213,7 +4213,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) /* set skip map for Boyer-Moore search */ static int set_bm_skip(UChar* s, UChar* end, regex_t* reg, - UChar skip[], int** int_skip, int ignore_case) + UChar skip[], int** int_skip, int ignore_case) { OnigDistance i, len; int clen, flen, n, j, k; @@ -4229,24 +4229,24 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg, for (i = 0; i < len - 1; i += clen) { p = s + i; if (ignore_case) - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, - p, end, items); + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, + p, end, items); clen = enclen(enc, p, end); if (p + clen > end) - clen = (int )(end - p); + clen = (int )(end - p); for (j = 0; j < n; j++) { - if ((items[j].code_len != 1) || (items[j].byte_len != clen)) - return 1; /* different length isn't supported. */ - flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); - if (flen != clen) - return 1; /* different length isn't supported. */ + if ((items[j].code_len != 1) || (items[j].byte_len != clen)) + return 1; /* different length isn't supported. */ + flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); + if (flen != clen) + return 1; /* different length isn't supported. */ } for (j = 0; j < clen; j++) { - skip[s[i + j]] = (UChar )(len - 1 - i - j); - for (k = 0; k < n; k++) { - skip[buf[k][j]] = (UChar )(len - 1 - i - j); - } + skip[s[i + j]] = (UChar )(len - 1 - i - j); + for (k = 0; k < n; k++) { + skip[buf[k][j]] = (UChar )(len - 1 - i - j); + } } } } @@ -4265,24 +4265,24 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg, for (i = 0; i < len - 1; i += clen) { p = s + i; if (ignore_case) - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, - p, end, items); + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, + p, end, items); clen = enclen(enc, p, end); if (p + clen > end) - clen = (int )(end - p); + clen = (int )(end - p); for (j = 0; j < n; j++) { - if ((items[j].code_len != 1) || (items[j].byte_len != clen)) - return 1; /* different length isn't supported. */ - flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); - if (flen != clen) - return 1; /* different length isn't supported. */ + if ((items[j].code_len != 1) || (items[j].byte_len != clen)) + return 1; /* different length isn't supported. */ + flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); + if (flen != clen) + return 1; /* different length isn't supported. */ } for (j = 0; j < clen; j++) { - (*int_skip)[s[i + j]] = (int )(len - 1 - i - j); - for (k = 0; k < n; k++) { - (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j); - } + (*int_skip)[s[i + j]] = (int )(len - 1 - i - j); + for (k = 0; k < n; k++) { + (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j); + } } } # endif @@ -4295,7 +4295,7 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg, /* set skip map for Sunday's quick search */ static int set_bm_skip(UChar* s, UChar* end, regex_t* reg, - UChar skip[], int** int_skip, int ignore_case) + UChar skip[], int** int_skip, int ignore_case) { OnigDistance i, len; int clen, flen, n, j, k; @@ -4311,24 +4311,24 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg, for (i = 0; i < len; i += clen) { p = s + i; if (ignore_case) - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, - p, end, items); + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, + p, end, items); clen = enclen(enc, p, end); if (p + clen > end) - clen = (int )(end - p); + clen = (int )(end - p); for (j = 0; j < n; j++) { - if ((items[j].code_len != 1) || (items[j].byte_len != clen)) - return 1; /* different length isn't supported. */ - flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); - if (flen != clen) - return 1; /* different length isn't supported. */ + if ((items[j].code_len != 1) || (items[j].byte_len != clen)) + return 1; /* different length isn't supported. */ + flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); + if (flen != clen) + return 1; /* different length isn't supported. */ } for (j = 0; j < clen; j++) { - skip[s[i + j]] = (UChar )(len - i - j); - for (k = 0; k < n; k++) { - skip[buf[k][j]] = (UChar )(len - i - j); - } + skip[s[i + j]] = (UChar )(len - i - j); + for (k = 0; k < n; k++) { + skip[buf[k][j]] = (UChar )(len - i - j); + } } } } @@ -4347,24 +4347,24 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg, for (i = 0; i < len; i += clen) { p = s + i; if (ignore_case) - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, - p, end, items); + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, + p, end, items); clen = enclen(enc, p, end); if (p + clen > end) - clen = (int )(end - p); + clen = (int )(end - p); for (j = 0; j < n; j++) { - if ((items[j].code_len != 1) || (items[j].byte_len != clen)) - return 1; /* different length isn't supported. */ - flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); - if (flen != clen) - return 1; /* different length isn't supported. */ + if ((items[j].code_len != 1) || (items[j].byte_len != clen)) + return 1; /* different length isn't supported. */ + flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); + if (flen != clen) + return 1; /* different length isn't supported. */ } for (j = 0; j < clen; j++) { - (*int_skip)[s[i + j]] = (int )(len - i - j); - for (k = 0; k < n; k++) { - (*int_skip)[buf[k][j]] = (int )(len - i - j); - } + (*int_skip)[s[i + j]] = (int )(len - i - j); + for (k = 0; k < n; k++) { + (*int_skip)[buf[k][j]] = (int )(len - i - j); + } } } # endif @@ -4562,7 +4562,7 @@ copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from) static void concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right, - OnigDistance left_len, OnigDistance right_len) + OnigDistance left_len, OnigDistance right_len) { clear_opt_anc_info(to); @@ -4678,7 +4678,7 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) static void concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end, - int raw ARG_UNUSED, OnigEncoding enc) + int raw ARG_UNUSED, OnigEncoding enc) { int i, j, len; UChar *p; @@ -4923,7 +4923,7 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) if (add->exb.len > 0 && to->len.max == 0) { concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, - to->len.max, add->len.max); + to->len.max, add->len.max); copy_opt_anc_info(&add->exb.anc, &tanc); } @@ -4954,12 +4954,12 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) if (to->expr.len > 0) { if (add->len.max > 0) { if (to->expr.len > (int )add->len.max) - to->expr.len = (int )add->len.max; + to->expr.len = (int )add->len.max; if (to->expr.mmd.max == 0) - select_opt_exact_info(enc, &to->exb, &to->expr); + select_opt_exact_info(enc, &to->exb, &to->expr); else - select_opt_exact_info(enc, &to->exm, &to->expr); + select_opt_exact_info(enc, &to->exm, &to->expr); } } else if (add->expr.len > 0) { @@ -5005,11 +5005,11 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) copy_opt_env(&nenv, env); do { - r = optimize_node_left(NCAR(nd), &nopt, &nenv); - if (r == 0) { - add_mml(&nenv.mmd, &nopt.len); - concat_left_node_opt_info(env->enc, opt, &nopt); - } + r = optimize_node_left(NCAR(nd), &nopt, &nenv); + if (r == 0) { + add_mml(&nenv.mmd, &nopt.len); + concat_left_node_opt_info(env->enc, opt, &nopt); + } } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd))); } break; @@ -5020,11 +5020,11 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) Node* nd = node; do { - r = optimize_node_left(NCAR(nd), &nopt, env); - if (r == 0) { - if (nd == node) copy_node_opt_info(opt, &nopt); - else alt_merge_node_opt_info(opt, &nopt, env); - } + r = optimize_node_left(NCAR(nd), &nopt, env); + if (r == 0) { + if (nd == node) copy_node_opt_info(opt, &nopt); + else alt_merge_node_opt_info(opt, &nopt, env); + } } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd))); } break; @@ -5036,40 +5036,40 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) int is_raw = NSTRING_IS_RAW(node); if (! NSTRING_IS_AMBIG(node)) { - concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, - is_raw, env->enc); - opt->exb.ignore_case = 0; - if (slen > 0) { - add_char_opt_map_info(&opt->map, *(sn->s), env->enc); - } - set_mml(&opt->len, slen, slen); + concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, + is_raw, env->enc); + opt->exb.ignore_case = 0; + if (slen > 0) { + add_char_opt_map_info(&opt->map, *(sn->s), env->enc); + } + set_mml(&opt->len, slen, slen); } else { - OnigDistance max; - - if (NSTRING_IS_DONT_GET_OPT_INFO(node)) { - int n = onigenc_strlen(env->enc, sn->s, sn->end); - max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * (OnigDistance)n; - } - else { - concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, - is_raw, env->enc); - opt->exb.ignore_case = 1; - - if (slen > 0) { - r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end, - env->enc, env->case_fold_flag); - if (r != 0) break; - } - - max = slen; - } - - set_mml(&opt->len, slen, max); + OnigDistance max; + + if (NSTRING_IS_DONT_GET_OPT_INFO(node)) { + int n = onigenc_strlen(env->enc, sn->s, sn->end); + max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * (OnigDistance)n; + } + else { + concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, + is_raw, env->enc); + opt->exb.ignore_case = 1; + + if (slen > 0) { + r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end, + env->enc, env->case_fold_flag); + if (r != 0) break; + } + + max = slen; + } + + set_mml(&opt->len, slen, max); } if ((OnigDistance )opt->exb.len == slen) - opt->exb.reach_end = 1; + opt->exb.reach_end = 1; } break; @@ -5081,19 +5081,19 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) /* no need to check ignore case. (set in setup_tree()) */ if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) { - OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); - OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); + OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); - set_mml(&opt->len, min, max); + set_mml(&opt->len, min, max); } else { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - z = BITSET_AT(cc->bs, i); - if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); - } - } - set_mml(&opt->len, 1, 1); + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + z = BITSET_AT(cc->bs, i); + if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) { + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + } + } + set_mml(&opt->len, 1, 1); } } break; @@ -5106,30 +5106,30 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) max = ONIGENC_MBC_MAXLEN_DIST(env->enc); if (max == 1) { - min = 1; - - maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE; - switch (NCTYPE(node)->ctype) { - case ONIGENC_CTYPE_WORD: - if (NCTYPE(node)->not != 0) { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); - } - } - } - else { - for (i = 0; i < maxcode; i++) { - if (ONIGENC_IS_CODE_WORD(env->enc, i)) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); - } - } - } - break; - } + min = 1; + + maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE; + switch (NCTYPE(node)->ctype) { + case ONIGENC_CTYPE_WORD: + if (NCTYPE(node)->not != 0) { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) { + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + } + } + } + else { + for (i = 0; i < maxcode; i++) { + if (ONIGENC_IS_CODE_WORD(env->enc, i)) { + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + } + } + } + break; + } } else { - min = ONIGENC_MBC_MINLEN(env->enc); + min = ONIGENC_MBC_MINLEN(env->enc); } set_mml(&opt->len, min, max); } @@ -5158,20 +5158,20 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case ANCHOR_PREC_READ: { - NodeOptInfo nopt; + NodeOptInfo nopt; - r = optimize_node_left(NANCHOR(node)->target, &nopt, env); - if (r == 0) { - if (nopt.exb.len > 0) - copy_opt_exact_info(&opt->expr, &nopt.exb); - else if (nopt.exm.len > 0) - copy_opt_exact_info(&opt->expr, &nopt.exm); + r = optimize_node_left(NANCHOR(node)->target, &nopt, env); + if (r == 0) { + if (nopt.exb.len > 0) + copy_opt_exact_info(&opt->expr, &nopt.exb); + else if (nopt.exm.len > 0) + copy_opt_exact_info(&opt->expr, &nopt.exm); - opt->expr.reach_end = 0; + opt->expr.reach_end = 0; - if (nopt.map.value > 0) - copy_opt_map_info(&opt->map, &nopt.map); - } + if (nopt.map.value > 0) + copy_opt_map_info(&opt->map, &nopt.map); + } } break; @@ -5189,8 +5189,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) BRefNode* br = NBREF(node); if (br->state & NST_RECURSION) { - set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); - break; + set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); + break; } backs = BACKREFS_P(br); r = get_min_match_length(nodes[backs[0]], &min, env->scan_env); @@ -5198,12 +5198,12 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) r = get_max_match_length(nodes[backs[0]], &max, env->scan_env); if (r != 0) break; for (i = 1; i < br->back_num; i++) { - r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env); - if (r != 0) break; - r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env); - if (r != 0) break; - if (min > tmin) min = tmin; - if (max < tmax) max = tmax; + r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env); + if (r != 0) break; + r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env); + if (r != 0) break; + if (min > tmin) min = tmin; + if (max < tmax) max = tmax; } if (r == 0) set_mml(&opt->len, min, max); } @@ -5233,44 +5233,44 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) if (r) break; if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) { - if (env->mmd.max == 0 && - NTYPE(qn->target) == NT_CANY && qn->greedy) { - if (IS_MULTILINE(env->options)) - /* implicit anchor: /.*a/ ==> /\A.*a/ */ - add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); - else - add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); - } + if (env->mmd.max == 0 && + NTYPE(qn->target) == NT_CANY && qn->greedy) { + if (IS_MULTILINE(env->options)) + /* implicit anchor: /.*a/ ==> /\A.*a/ */ + add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); + else + add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); + } } else { - if (qn->lower > 0) { - copy_node_opt_info(opt, &nopt); - if (nopt.exb.len > 0) { - if (nopt.exb.reach_end) { - for (i = 2; i <= qn->lower && - ! is_full_opt_exact_info(&opt->exb); i++) { - concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc); - } - if (i < qn->lower) { - opt->exb.reach_end = 0; - } - } - } - - if (qn->lower != qn->upper) { - opt->exb.reach_end = 0; - opt->exm.reach_end = 0; - } - if (qn->lower > 1) - opt->exm.reach_end = 0; - } + if (qn->lower > 0) { + copy_node_opt_info(opt, &nopt); + if (nopt.exb.len > 0) { + if (nopt.exb.reach_end) { + for (i = 2; i <= qn->lower && + ! is_full_opt_exact_info(&opt->exb); i++) { + concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc); + } + if (i < qn->lower) { + opt->exb.reach_end = 0; + } + } + } + + if (qn->lower != qn->upper) { + opt->exb.reach_end = 0; + opt->exm.reach_end = 0; + } + if (qn->lower > 1) + opt->exm.reach_end = 0; + } } min = distance_multiply(nopt.len.min, qn->lower); if (IS_REPEAT_INFINITE(qn->upper)) - max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0); + max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0); else - max = distance_multiply(nopt.len.max, qn->upper); + max = distance_multiply(nopt.len.max, qn->upper); set_mml(&opt->len, min, max); } @@ -5282,47 +5282,47 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) switch (en->type) { case ENCLOSE_OPTION: - { - OnigOptionType save = env->options; + { + OnigOptionType save = env->options; - env->options = en->option; - r = optimize_node_left(en->target, opt, env); - env->options = save; - } - break; + env->options = en->option; + r = optimize_node_left(en->target, opt, env); + env->options = save; + } + break; case ENCLOSE_MEMORY: #ifdef USE_SUBEXP_CALL - en->opt_count++; - if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { - OnigDistance min, max; - - min = 0; - max = ONIG_INFINITE_DISTANCE; - if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len; - if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len; - set_mml(&opt->len, min, max); - } - else + en->opt_count++; + if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { + OnigDistance min, max; + + min = 0; + max = ONIG_INFINITE_DISTANCE; + if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len; + if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len; + set_mml(&opt->len, min, max); + } + else #endif - { - r = optimize_node_left(en->target, opt, env); + { + r = optimize_node_left(en->target, opt, env); - if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) { - if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum)) - remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK); - } - } - break; + if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) { + if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum)) + remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK); + } + } + break; case ENCLOSE_STOP_BACKTRACK: case ENCLOSE_CONDITION: - r = optimize_node_left(en->target, opt, env); - break; + r = optimize_node_left(en->target, opt, env); + break; case ENCLOSE_ABSENT: - set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); - break; + set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); + break; } } break; @@ -5330,7 +5330,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) default: #ifdef ONIG_DEBUG fprintf(stderr, "optimize_node_left: undefined node type %d\n", - NTYPE(node)); + NTYPE(node)); #endif r = ONIGERR_TYPE_BUG; break; @@ -5353,18 +5353,18 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) reg->exact_end = reg->exact + e->len; allow_reverse = - ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); + ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); if (e->ignore_case > 0) { if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { r = set_bm_skip(reg->exact, reg->exact_end, reg, - reg->map, &(reg->int_map), 1); + reg->map, &(reg->int_map), 1); if (r == 0) { - reg->optimize = (allow_reverse != 0 - ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC); + reg->optimize = (allow_reverse != 0 + ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC); } else { - reg->optimize = ONIG_OPTIMIZE_EXACT_IC; + reg->optimize = ONIG_OPTIMIZE_EXACT_IC; } } else { @@ -5374,13 +5374,13 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) else { if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { r = set_bm_skip(reg->exact, reg->exact_end, reg, - reg->map, &(reg->int_map), 0); + reg->map, &(reg->int_map), 0); if (r == 0) { - reg->optimize = (allow_reverse != 0 - ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); + reg->optimize = (allow_reverse != 0 + ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); } else { - reg->optimize = ONIG_OPTIMIZE_EXACT; + reg->optimize = ONIG_OPTIMIZE_EXACT; } } else { @@ -5451,7 +5451,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML; reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF | - ANCHOR_PREC_READ_NOT); + ANCHOR_PREC_READ_NOT); if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { reg->anchor_dmin = opt.len.min; @@ -5461,7 +5461,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) if (opt.exb.len > 0 || opt.exm.len > 0) { select_opt_exact_info(reg->enc, &opt.exb, &opt.exm); if (opt.map.value > 0 && - comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { + comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { goto set_map; } else { @@ -5503,7 +5503,7 @@ clear_optimize_info(regex_t* reg) #ifdef ONIG_DEBUG static void print_enc_string(FILE* fp, OnigEncoding enc, - const UChar *s, const UChar *end) + const UChar *s, const UChar *end) { fprintf(fp, "\nPATTERN: /"); @@ -5515,10 +5515,10 @@ static void print_enc_string(FILE* fp, OnigEncoding enc, while (p < end) { code = ONIGENC_MBC_TO_CODE(enc, p, end); if (code >= 0x80) { - fprintf(fp, " 0x%04x ", (int )code); + fprintf(fp, " 0x%04x ", (int )code); } else { - fputc((int )code, fp); + fputc((int )code, fp); } p += enclen(enc, p, end); @@ -5639,15 +5639,15 @@ print_optimize_info(FILE* f, regex_t* reg) c = 0; fputc('[', f); for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { - if (reg->map[i] != 0) { - if (c > 0) fputs(", ", f); - c++; - if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 && - ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i)) - fputc(i, f); - else - fprintf(f, "%d", i); - } + if (reg->map[i] != 0) { + if (c > 0) fputs(", ", f); + c++; + if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 && + ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i)) + fputc(i, f); + else + fprintf(f, "%d", i); + } } fprintf(f, "]\n"); } @@ -5806,7 +5806,7 @@ static void print_tree(FILE* f, Node* node); #ifdef RUBY extern int onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, - OnigErrorInfo* einfo) + OnigErrorInfo* einfo) { return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0); } @@ -5815,11 +5815,11 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, #ifdef RUBY extern int onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end, - OnigErrorInfo* einfo, const char *sourcefile, int sourceline) + OnigErrorInfo* einfo, const char *sourcefile, int sourceline) #else extern int onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, - OnigErrorInfo* einfo) + OnigErrorInfo* einfo) #endif { #define COMPILE_INIT_SIZE 20 @@ -5936,10 +5936,10 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, if (scan_env.comb_exp_max_regnum > 0) { int i; for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) { - if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) { - scan_env.num_comb_exp_check = 0; - break; - } + if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) { + scan_env.num_comb_exp_check = 0; + break; + } } } } @@ -5973,9 +5973,9 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, reg->stack_pop_level = STACK_POP_LEVEL_ALL; else { if (reg->bt_mem_start != 0) - reg->stack_pop_level = STACK_POP_LEVEL_MEM_START; + reg->stack_pop_level = STACK_POP_LEVEL_MEM_START; else - reg->stack_pop_level = STACK_POP_LEVEL_FREE; + reg->stack_pop_level = STACK_POP_LEVEL_FREE; } } #ifdef USE_SUBEXP_CALL @@ -6021,8 +6021,8 @@ static int onig_inited = 0; extern int onig_reg_init(regex_t* reg, OnigOptionType option, - OnigCaseFoldType case_fold_flag, - OnigEncoding enc, const OnigSyntaxType* syntax) + OnigCaseFoldType case_fold_flag, + OnigEncoding enc, const OnigSyntaxType* syntax) { if (! onig_inited) onig_init(); @@ -6082,8 +6082,8 @@ onig_new_without_alloc(regex_t* reg, const UChar* pattern, extern int onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, - OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, - OnigErrorInfo* einfo) + OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, + OnigErrorInfo* einfo) { *reg = (regex_t* )xmalloc(sizeof(regex_t)); if (IS_NULL(*reg)) return ONIGERR_MEMORY; @@ -6436,9 +6436,9 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp, break; case ARG_OPTION: { - OnigOptionType option = *((OnigOptionType* )bp); - bp += SIZE_OPTION; - fprintf(f, ":%d", option); + OnigOptionType option = *((OnigOptionType* )bp); + bp += SIZE_OPTION; + fprintf(f, ":%d", option); } break; @@ -6487,13 +6487,13 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp, break; case OP_EXACTMBN: { - int mb_len; + int mb_len; - GET_LENGTH_INC(mb_len, bp); - GET_LENGTH_INC(len, bp); - fprintf(f, ":%d:%d:", mb_len, len); - n = len * mb_len; - while (n-- > 0) { fputc(*bp++, f); } + GET_LENGTH_INC(mb_len, bp); + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d:%d:", mb_len, len); + n = len * mb_len; + while (n-- > 0) { fputc(*bp++, f); } } break; @@ -6557,40 +6557,40 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp, fputs(" ", f); GET_LENGTH_INC(len, bp); for (i = 0; i < len; i++) { - GET_MEMNUM_INC(mem, bp); - if (i > 0) fputs(", ", f); - fprintf(f, "%d", mem); + GET_MEMNUM_INC(mem, bp); + if (i > 0) fputs(", ", f); + fprintf(f, "%d", mem); } break; case OP_BACKREF_WITH_LEVEL: { - OnigOptionType option; - LengthType level; - - GET_OPTION_INC(option, bp); - fprintf(f, ":%d", option); - GET_LENGTH_INC(level, bp); - fprintf(f, ":%d", level); - - fputs(" ", f); - GET_LENGTH_INC(len, bp); - for (i = 0; i < len; i++) { - GET_MEMNUM_INC(mem, bp); - if (i > 0) fputs(", ", f); - fprintf(f, "%d", mem); - } + OnigOptionType option; + LengthType level; + + GET_OPTION_INC(option, bp); + fprintf(f, ":%d", option); + GET_LENGTH_INC(level, bp); + fprintf(f, ":%d", level); + + fputs(" ", f); + GET_LENGTH_INC(len, bp); + for (i = 0; i < len; i++) { + GET_MEMNUM_INC(mem, bp); + if (i > 0) fputs(", ", f); + fprintf(f, "%d", mem); + } } break; case OP_REPEAT: case OP_REPEAT_NG: { - mem = *((MemNumType* )bp); - bp += SIZE_MEMNUM; - addr = *((RelAddrType* )bp); - bp += SIZE_RELADDR; - fprintf(f, ":%d:%d", mem, addr); + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fprintf(f, ":%d:%d", mem, addr); } break; @@ -6631,7 +6631,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp, default: fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", - bp[-1]); + bp[-1]); } } fputs("]", f); @@ -6688,8 +6688,8 @@ print_indent_tree(FILE* f, Node* node, int indent) print_indent_tree(f, NCAR(node), indent + add); while (IS_NOT_NULL(node = NCDR(node))) { if (NTYPE(node) != type) { - fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node)); - exit(0); + fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node)); + exit(0); } print_indent_tree(f, NCAR(node), indent + add); } @@ -6697,12 +6697,12 @@ print_indent_tree(FILE* f, Node* node, int indent) case NT_STR: fprintf(f, "", - (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node); + (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node); for (p = NSTR(node)->s; p < NSTR(node)->end; p++) { if (*p >= 0x20 && *p < 0x7f) - fputc(*p, f); + fputc(*p, f); else { - fprintf(f, " 0x%02x", *p); + fprintf(f, " 0x%02x", *p); } } break; @@ -6716,8 +6716,8 @@ print_indent_tree(FILE* f, Node* node, int indent) OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used); fprintf(f, "%d", *data++); for (; data < end; data+=2) { - fprintf(f, ","); - fprintf(f, "%04x-%04x", data[0], data[1]); + fprintf(f, ","); + fprintf(f, "%04x-%04x", data[0], data[1]); } } break; @@ -6727,9 +6727,9 @@ print_indent_tree(FILE* f, Node* node, int indent) switch (NCTYPE(node)->ctype) { case ONIGENC_CTYPE_WORD: if (NCTYPE(node)->not != 0) - fputs("not word", f); + fputs("not word", f); else - fputs("word", f); + fputs("word", f); break; default: @@ -6777,8 +6777,8 @@ print_indent_tree(FILE* f, Node* node, int indent) p = BACKREFS_P(br); fprintf(f, "", (intptr_t )node); for (i = 0; i < br->back_num; i++) { - if (i > 0) fputs(", ", f); - fprintf(f, "%d", p[i]); + if (i > 0) fputs(", ", f); + fprintf(f, "%d", p[i]); } } break; @@ -6795,8 +6795,8 @@ print_indent_tree(FILE* f, Node* node, int indent) case NT_QTFR: fprintf(f, "{%d,%d}%s\n", (intptr_t )node, - NQTFR(node)->lower, NQTFR(node)->upper, - (NQTFR(node)->greedy ? "" : "?")); + NQTFR(node)->lower, NQTFR(node)->upper, + (NQTFR(node)->greedy ? "" : "?")); print_indent_tree(f, NQTFR(node)->target, indent + add); break; diff --git a/regenc.c b/regenc.c index c554f4eb316ea0..0afdf22cb7bdc1 100644 --- a/regenc.c +++ b/regenc.c @@ -89,7 +89,7 @@ onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const U extern UChar* onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, - const UChar* start, const UChar* s, const UChar* end, const UChar** prev) + const UChar* start, const UChar* s, const UChar* end, const UChar** prev) { UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end); @@ -425,8 +425,8 @@ const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = { extern int onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, - OnigApplyAllCaseFoldFunc f, void* arg, - OnigEncoding enc ARG_UNUSED) + OnigApplyAllCaseFoldFunc f, void* arg, + OnigEncoding enc ARG_UNUSED) { OnigCodePoint code; int i, r; @@ -446,8 +446,8 @@ onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, extern int onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, - const OnigUChar* p, const OnigUChar* end ARG_UNUSED, - OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED) + const OnigUChar* p, const OnigUChar* end ARG_UNUSED, + OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED) { if (0x41 <= *p && *p <= 0x5a) { items[0].byte_len = 1; @@ -467,7 +467,7 @@ onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, static int ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, - OnigApplyAllCaseFoldFunc f, void* arg) + OnigApplyAllCaseFoldFunc f, void* arg) { OnigCodePoint ss[] = { 0x73, 0x73 }; @@ -513,7 +513,7 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size, items[0].code_len = 1; items[0].code[0] = (OnigCodePoint )(*p + 0x20); if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1 - && (*(p+1) == 0x53 || *(p+1) == 0x73)) { + && (*(p+1) == 0x53 || *(p+1) == 0x73)) { /* SS */ items[1].byte_len = 2; items[1].code_len = 1; @@ -528,7 +528,7 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size, items[0].code_len = 1; items[0].code[0] = (OnigCodePoint )(*p - 0x20); if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1 - && (*(p+1) == 0x73 || *(p+1) == 0x53)) { + && (*(p+1) == 0x73 || *(p+1) == 0x53)) { /* ss */ items[1].byte_len = 2; items[1].code_len = 1; @@ -566,16 +566,16 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size, for (i = 0; i < map_size; i++) { if (*p == map[i].from) { - items[0].byte_len = 1; - items[0].code_len = 1; - items[0].code[0] = map[i].to; - return 1; + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = map[i].to; + return 1; } else if (*p == map[i].to) { - items[0].byte_len = 1; - items[0].code_len = 1; - items[0].code[0] = map[i].from; - return 1; + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = map[i].from; + return 1; } } } @@ -586,9 +586,9 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size, extern int onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED, - OnigCodePoint* sb_out ARG_UNUSED, - const OnigCodePoint* ranges[] ARG_UNUSED, - OnigEncoding enc) + OnigCodePoint* sb_out ARG_UNUSED, + const OnigCodePoint* ranges[] ARG_UNUSED, + OnigEncoding enc) { return ONIG_NO_SUPPORT_CONFIG; } @@ -605,7 +605,7 @@ onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc A /* for single byte encodings */ extern int onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p, - const UChar* end, UChar* lower, OnigEncoding enc ARG_UNUSED) + const UChar* end, UChar* lower, OnigEncoding enc ARG_UNUSED) { *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p); @@ -616,7 +616,7 @@ onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p, #if 0 extern int onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag ARG_UNUSED, - const UChar** pp, const UChar* end ARG_UNUSED) + const UChar** pp, const UChar* end ARG_UNUSED) { const UChar* p = *pp; @@ -627,14 +627,14 @@ onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag ARG_UNUSED, extern int onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED, const UChar* e ARG_UNUSED, - OnigEncoding enc ARG_UNUSED) + OnigEncoding enc ARG_UNUSED) { return 1; } extern OnigCodePoint onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED, - OnigEncoding enc ARG_UNUSED) + OnigEncoding enc ARG_UNUSED) { return (OnigCodePoint )(*p); } @@ -658,25 +658,25 @@ onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc extern UChar* onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, - const UChar* s, - const UChar* end ARG_UNUSED, - OnigEncoding enc ARG_UNUSED) + const UChar* s, + const UChar* end ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) { return (UChar* )s; } extern int onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, - const UChar* end ARG_UNUSED, - OnigEncoding enc ARG_UNUSED) + const UChar* end ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) { return TRUE; } extern int onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, - const UChar* end ARG_UNUSED, - OnigEncoding enc ARG_UNUSED) + const UChar* end ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) { return FALSE; } @@ -712,7 +712,7 @@ onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end) extern int onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end ARG_UNUSED, - UChar* lower) + UChar* lower) { int len; const UChar *p = *pp; @@ -843,7 +843,7 @@ onigenc_minimum_property_name_to_ctype(OnigEncoding enc, const UChar* p, const U extern int onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, - unsigned int ctype) + unsigned int ctype) { if (code < 128) return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); @@ -858,7 +858,7 @@ onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, extern int onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, - unsigned int ctype) + unsigned int ctype) { if (code < 128) return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); @@ -961,14 +961,14 @@ onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop, *pnum = *pnum + 1; onig_st_insert_strend(*table, name, name + strlen((char* )name), - (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE)); + (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE)); return 0; } #endif extern int onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, - OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc) + OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc) { OnigCodePoint code; OnigUChar *to_start = to; @@ -987,7 +987,7 @@ onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const code += 'A' - 'a'; } else if (code >= 'A' && code <= 'Z' && - (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { + (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { flags |= ONIGENC_CASE_MODIFIED; code += 'a' - 'A'; } @@ -1001,8 +1001,8 @@ onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const extern int onigenc_single_byte_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, - const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, - const struct OnigEncodingTypeST* enc) + const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, + const struct OnigEncodingTypeST* enc) { OnigCodePoint code; OnigUChar *to_start = to; @@ -1016,7 +1016,7 @@ onigenc_single_byte_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar code += 'A' - 'a'; } else if (code >= 'A' && code <= 'Z' && - (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { + (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { flags |= ONIGENC_CASE_MODIFIED; code += 'a' - 'A'; } diff --git a/regenc.h b/regenc.h index 4b4d21a715b4af..4fbe403b6301d8 100644 --- a/regenc.h +++ b/regenc.h @@ -192,8 +192,8 @@ ONIG_EXTERN int onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, OnigA #define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc) #define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8) #define UNICODE_VALID_CODEPOINT_P(c) ( \ - ((c) <= 0x10ffff) && \ - !((c) < 0x10000 && UTF16_IS_SURROGATE((c) >> 8))) + ((c) <= 0x10ffff) && \ + !((c) < 0x10000 && UTF16_IS_SURROGATE((c) >> 8))) #define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \ OnigEncISO_8859_1_ToLowerCaseTable[c] @@ -239,8 +239,8 @@ extern int ONIG_ENC_REGISTER(const char *, OnigEncoding); # define OnigEncodingDefine(f,n) \ OnigEncodingDeclare(n); \ void Init_##f(void) { \ - ONIG_ENC_REGISTER(OnigEncodingName(n).name, \ - &OnigEncodingName(n)); \ + ONIG_ENC_REGISTER(OnigEncodingName(n).name, \ + &OnigEncodingName(n)); \ } \ OnigEncodingDeclare(n) #else diff --git a/regerror.c b/regerror.c index df5e964cc305c6..8667084d41c931 100644 --- a/regerror.c +++ b/regerror.c @@ -194,7 +194,7 @@ static void sprint_byte_with_x(char* s, unsigned int v) } static int to_ascii(OnigEncoding enc, UChar *s, UChar *end, - UChar buf[], int buf_size, int *is_over) + UChar buf[], int buf_size, int *is_over) { int len; UChar *p; @@ -206,24 +206,24 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end, while (p < end) { code = ONIGENC_MBC_TO_CODE(enc, p, end); if (code >= 0x80) { - if (code > 0xffff && len + 10 <= buf_size) { - sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 24)); - sprint_byte((char*)(&(buf[len+4])), (unsigned int)(code >> 16)); - sprint_byte((char*)(&(buf[len+6])), (unsigned int)(code >> 8)); - sprint_byte((char*)(&(buf[len+8])), (unsigned int)code); - len += 10; - } - else if (len + 6 <= buf_size) { - sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 8)); - sprint_byte((char*)(&(buf[len+4])), (unsigned int)code); - len += 6; - } - else { - break; - } + if (code > 0xffff && len + 10 <= buf_size) { + sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 24)); + sprint_byte((char*)(&(buf[len+4])), (unsigned int)(code >> 16)); + sprint_byte((char*)(&(buf[len+6])), (unsigned int)(code >> 8)); + sprint_byte((char*)(&(buf[len+8])), (unsigned int)code); + len += 10; + } + else if (len + 6 <= buf_size) { + sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 8)); + sprint_byte((char*)(&(buf[len+4])), (unsigned int)code); + len += 6; + } + else { + break; + } } else { - buf[len++] = (UChar )code; + buf[len++] = (UChar )code; } p += enclen(enc, p, end); @@ -267,27 +267,27 @@ onig_error_code_to_str(UChar* s, OnigPosition code, ...) case ONIGERR_INVALID_CHAR_PROPERTY_NAME: einfo = va_arg(vargs, OnigErrorInfo*); len = to_ascii(einfo->enc, einfo->par, einfo->par_end, - parbuf, MAX_ERROR_PAR_LEN - 3, &is_over); + parbuf, MAX_ERROR_PAR_LEN - 3, &is_over); q = onig_error_code_to_format(code); p = s; while (*q != '\0') { if (*q == '%') { - q++; - if (*q == 'n') { /* '%n': name */ - xmemcpy(p, parbuf, len); - p += len; - if (is_over != 0) { - xmemcpy(p, "...", 3); - p += 3; - } - q++; - } - else - goto normal_char; + q++; + if (*q == 'n') { /* '%n': name */ + xmemcpy(p, parbuf, len); + p += len; + if (is_over != 0) { + xmemcpy(p, "...", 3); + p += 3; + } + q++; + } + else + goto normal_char; } else { normal_char: - *p++ = *q++; + *p++ = *q++; } } *p = '\0'; @@ -348,24 +348,24 @@ onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, } } else if (*p == '\\') { - *s++ = *p++; - len = enclen(enc, p, pat_end); - while (len-- > 0) *s++ = *p++; + *s++ = *p++; + len = enclen(enc, p, pat_end); + while (len-- > 0) *s++ = *p++; } else if (*p == '/') { - *s++ = (unsigned char )'\\'; - *s++ = *p++; + *s++ = (unsigned char )'\\'; + *s++ = *p++; } else if (!ONIGENC_IS_CODE_PRINT(enc, *p) && - (!ONIGENC_IS_CODE_SPACE(enc, *p) || + (!ONIGENC_IS_CODE_SPACE(enc, *p) || ONIGENC_IS_CODE_CNTRL(enc, *p))) { - sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); - len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); + sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); + len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); bp = bs; - while (len-- > 0) *s++ = *bp++; + while (len-- > 0) *s++ = *bp++; } else { - *s++ = *p++; + *s++ = *p++; } } @@ -382,7 +382,7 @@ onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, va_list args; va_start(args, fmt); onig_vsnprintf_with_pattern(buf, bufsize, enc, - pat, pat_end, fmt, args); + pat, pat_end, fmt, args); va_end(args); } #endif diff --git a/regexec.c b/regexec.c index 94cfb586e25c8f..b8d174ec8ed472 100644 --- a/regexec.c +++ b/regexec.c @@ -55,7 +55,7 @@ rb_enc_asciicompat(OnigEncoding enc) # define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \ (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \ onigenc_ascii_is_code_ctype( \ - ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc)) + ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc)) #endif /* RUBY */ #ifdef USE_CRNL_AS_LINE_TERMINATOR @@ -66,28 +66,28 @@ rb_enc_asciicompat(OnigEncoding enc) is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev)) static int is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start, - const UChar *end, OnigOptionType option, int check_prev) + const UChar *end, OnigOptionType option, int check_prev) { if (IS_NEWLINE_CRLF(option)) { if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) { if (check_prev) { - const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end); - if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d) - return 0; - else - return 1; + const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end); + if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d) + return 0; + else + return 1; } else - return 1; + return 1; } else { const UChar *pnext = p + enclen(enc, p, end); if (pnext < end && - ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d && - ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a) - return 1; + ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d && + ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a) + return 1; if (ONIGENC_IS_MBC_NEWLINE(enc, p, end)) - return 1; + return 1; return 0; } } @@ -111,7 +111,7 @@ history_tree_clear(OnigCaptureTreeNode* node) if (IS_NOT_NULL(node)) { for (i = 0; i < node->num_childs; i++) { if (IS_NOT_NULL(node->childs[i])) { - history_tree_free(node->childs[i]); + history_tree_free(node->childs[i]); } } for (i = 0; i < node->allocated; i++) { @@ -170,18 +170,18 @@ history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child) if (IS_NULL(parent->childs)) { n = HISTORY_TREE_INIT_ALLOC_SIZE; parent->childs = - (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n); + (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n); CHECK_NULL_RETURN_MEMERR(parent->childs); } else { OnigCaptureTreeNode** tmp; n = parent->allocated * 2; tmp = - (OnigCaptureTreeNode** )xrealloc(parent->childs, - sizeof(OnigCaptureTreeNode*) * n); + (OnigCaptureTreeNode** )xrealloc(parent->childs, + sizeof(OnigCaptureTreeNode*) * n); if (tmp == 0) { - history_tree_clear(parent); - return ONIGERR_MEMORY; + history_tree_clear(parent); + return ONIGERR_MEMORY; } parent->childs = tmp; } @@ -276,7 +276,7 @@ static OnigPosition count_num_cache_opcodes_inner( switch (*p++) { case OP_FINISH: case OP_END: - break; + break; case OP_EXACT1: p++; break; case OP_EXACT2: p += 2; break; @@ -284,50 +284,50 @@ static OnigPosition count_num_cache_opcodes_inner( case OP_EXACT4: p += 4; break; case OP_EXACT5: p += 5; break; case OP_EXACTN: - GET_LENGTH_INC(len, p); p += len; break; + GET_LENGTH_INC(len, p); p += len; break; case OP_EXACTMB2N1: p += 2; break; case OP_EXACTMB2N2: p += 4; break; case OP_EXACTMB2N3: p += 6; break; case OP_EXACTMB2N: - GET_LENGTH_INC(len, p); p += len * 2; break; + GET_LENGTH_INC(len, p); p += len * 2; break; case OP_EXACTMB3N: - GET_LENGTH_INC(len, p); p += len * 3; break; + GET_LENGTH_INC(len, p); p += len * 3; break; case OP_EXACTMBN: - { - int mb_len; - GET_LENGTH_INC(mb_len, p); - GET_LENGTH_INC(len, p); - p += mb_len * len; - } - break; + { + int mb_len; + GET_LENGTH_INC(mb_len, p); + GET_LENGTH_INC(len, p); + p += mb_len * len; + } + break; case OP_EXACT1_IC: - len = enclen(enc, p, pend); p += len; break; + len = enclen(enc, p, pend); p += len; break; case OP_EXACTN_IC: - GET_LENGTH_INC(len, p); p += len; break; + GET_LENGTH_INC(len, p); p += len; break; case OP_CCLASS: case OP_CCLASS_NOT: - p += SIZE_BITSET; break; + p += SIZE_BITSET; break; case OP_CCLASS_MB: case OP_CCLASS_MB_NOT: - GET_LENGTH_INC(len, p); p += len; break; + GET_LENGTH_INC(len, p); p += len; break; case OP_CCLASS_MIX: case OP_CCLASS_MIX_NOT: - p += SIZE_BITSET; - GET_LENGTH_INC(len, p); - p += len; - break; + p += SIZE_BITSET; + GET_LENGTH_INC(len, p); + p += len; + break; case OP_ANYCHAR: case OP_ANYCHAR_ML: - break; + break; case OP_ANYCHAR_STAR: case OP_ANYCHAR_ML_STAR: - num_cache_opcodes++; break; + num_cache_opcodes++; break; case OP_ANYCHAR_STAR_PEEK_NEXT: case OP_ANYCHAR_ML_STAR_PEEK_NEXT: - p++; num_cache_opcodes++; break; + p++; num_cache_opcodes++; break; case OP_WORD: case OP_NOT_WORD: @@ -335,7 +335,7 @@ static OnigPosition count_num_cache_opcodes_inner( case OP_NOT_WORD_BOUND: case OP_WORD_BEGIN: case OP_WORD_END: - break; + break; case OP_ASCII_WORD: case OP_NOT_ASCII_WORD: @@ -343,7 +343,7 @@ static OnigPosition count_num_cache_opcodes_inner( case OP_NOT_ASCII_WORD_BOUND: case OP_ASCII_WORD_BEGIN: case OP_ASCII_WORD_END: - break; + break; case OP_BEGIN_BUF: case OP_END_BUF: @@ -351,7 +351,7 @@ static OnigPosition count_num_cache_opcodes_inner( case OP_END_LINE: case OP_SEMI_END_BUF: case OP_BEGIN_POSITION: - break; + break; case OP_BACKREF1: case OP_BACKREF2: @@ -360,7 +360,7 @@ static OnigPosition count_num_cache_opcodes_inner( case OP_BACKREF_MULTI: case OP_BACKREF_MULTI_IC: case OP_BACKREF_WITH_LEVEL: - goto impossible; + goto impossible; case OP_MEMORY_START: case OP_MEMORY_START_PUSH: @@ -368,158 +368,158 @@ static OnigPosition count_num_cache_opcodes_inner( case OP_MEMORY_END_PUSH_REC: case OP_MEMORY_END: case OP_MEMORY_END_REC: - p += SIZE_MEMNUM; - // A memory (capture) in look-around is found. - if (lookaround_nesting != 0) { - goto impossible; + p += SIZE_MEMNUM; + // A memory (capture) in look-around is found. + if (lookaround_nesting != 0) { + goto impossible; } - break; + break; case OP_KEEP: - break; + break; case OP_FAIL: - break; + break; case OP_JUMP: - p += SIZE_RELADDR; - break; + p += SIZE_RELADDR; + break; case OP_PUSH: - p += SIZE_RELADDR; - num_cache_opcodes++; - break; + p += SIZE_RELADDR; + num_cache_opcodes++; + break; case OP_POP: - break; + break; case OP_PUSH_OR_JUMP_EXACT1: case OP_PUSH_IF_PEEK_NEXT: - p += SIZE_RELADDR + 1; num_cache_opcodes++; break; + p += SIZE_RELADDR + 1; num_cache_opcodes++; break; case OP_REPEAT: case OP_REPEAT_NG: - if (current_repeat_mem != -1) { - // A nested OP_REPEAT is not yet supported. - goto impossible; - } - GET_MEMNUM_INC(repeat_mem, p); - p += SIZE_RELADDR; - if (reg->repeat_range[repeat_mem].lower == 0 && reg->repeat_range[repeat_mem].upper == 0) { - long dummy_num_cache_opcodes = 0; - result = count_num_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &p, &dummy_num_cache_opcodes); - if (result < 0 || dummy_num_cache_opcodes < 0) { - goto fail; - } - } else { - if (reg->repeat_range[repeat_mem].lower == 0) { - num_cache_opcodes++; - } - result = count_num_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &p, &num_cache_opcodes); - if (result < 0 || num_cache_opcodes < 0) { - goto fail; - } - OnigRepeatRange *repeat_range = ®->repeat_range[repeat_mem]; - if (repeat_range->lower < repeat_range->upper) { - num_cache_opcodes++; - } - } - break; + if (current_repeat_mem != -1) { + // A nested OP_REPEAT is not yet supported. + goto impossible; + } + GET_MEMNUM_INC(repeat_mem, p); + p += SIZE_RELADDR; + if (reg->repeat_range[repeat_mem].lower == 0 && reg->repeat_range[repeat_mem].upper == 0) { + long dummy_num_cache_opcodes = 0; + result = count_num_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &p, &dummy_num_cache_opcodes); + if (result < 0 || dummy_num_cache_opcodes < 0) { + goto fail; + } + } else { + if (reg->repeat_range[repeat_mem].lower == 0) { + num_cache_opcodes++; + } + result = count_num_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &p, &num_cache_opcodes); + if (result < 0 || num_cache_opcodes < 0) { + goto fail; + } + OnigRepeatRange *repeat_range = ®->repeat_range[repeat_mem]; + if (repeat_range->lower < repeat_range->upper) { + num_cache_opcodes++; + } + } + break; case OP_REPEAT_INC: case OP_REPEAT_INC_NG: - GET_MEMNUM_INC(repeat_mem, p); - if (repeat_mem != current_repeat_mem) { - // A lone or invalid OP_REPEAT_INC is found. - goto impossible; - } - goto exit; + GET_MEMNUM_INC(repeat_mem, p); + if (repeat_mem != current_repeat_mem) { + // A lone or invalid OP_REPEAT_INC is found. + goto impossible; + } + goto exit; case OP_REPEAT_INC_SG: case OP_REPEAT_INC_NG_SG: - goto impossible; + goto impossible; case OP_NULL_CHECK_START: - p += SIZE_MEMNUM; - break; + p += SIZE_MEMNUM; + break; case OP_NULL_CHECK_END: case OP_NULL_CHECK_END_MEMST_PUSH: - p += SIZE_MEMNUM; - break; + p += SIZE_MEMNUM; + break; case OP_NULL_CHECK_END_MEMST: - p += SIZE_MEMNUM; - break; + p += SIZE_MEMNUM; + break; case OP_PUSH_POS: - if (lookaround_nesting < 0) { - // A look-around nested in a atomic grouping is found. - goto impossible; - } - result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes); - if (result < 0 || num_cache_opcodes < 0) { - goto fail; - } - break; + if (lookaround_nesting < 0) { + // A look-around nested in a atomic grouping is found. + goto impossible; + } + result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes); + if (result < 0 || num_cache_opcodes < 0) { + goto fail; + } + break; case OP_PUSH_POS_NOT: - if (lookaround_nesting < 0) { - // A look-around nested in a atomic grouping is found. - goto impossible; - } - p += SIZE_RELADDR; - result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes); - if (result < 0 || num_cache_opcodes < 0) { - goto fail; - } - break; + if (lookaround_nesting < 0) { + // A look-around nested in a atomic grouping is found. + goto impossible; + } + p += SIZE_RELADDR; + result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes); + if (result < 0 || num_cache_opcodes < 0) { + goto fail; + } + break; case OP_PUSH_LOOK_BEHIND_NOT: - if (lookaround_nesting < 0) { - // A look-around nested in a atomic grouping is found. - goto impossible; - } - p += SIZE_RELADDR; - p += SIZE_LENGTH; - result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes); - if (result < 0 || num_cache_opcodes < 0) { - goto fail; - } - break; + if (lookaround_nesting < 0) { + // A look-around nested in a atomic grouping is found. + goto impossible; + } + p += SIZE_RELADDR; + p += SIZE_LENGTH; + result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes); + if (result < 0 || num_cache_opcodes < 0) { + goto fail; + } + break; case OP_PUSH_STOP_BT: - if (lookaround_nesting != 0) { - // A nested atomic grouping is found. - goto impossible; - } - result = count_num_cache_opcodes_inner(reg, current_repeat_mem, -1, &p, &num_cache_opcodes); - if (result < 0 || num_cache_opcodes < 0) { - goto fail; - } - break; + if (lookaround_nesting != 0) { + // A nested atomic grouping is found. + goto impossible; + } + result = count_num_cache_opcodes_inner(reg, current_repeat_mem, -1, &p, &num_cache_opcodes); + if (result < 0 || num_cache_opcodes < 0) { + goto fail; + } + break; case OP_POP_POS: case OP_FAIL_POS: case OP_FAIL_LOOK_BEHIND_NOT: case OP_POP_STOP_BT: - goto exit; + goto exit; case OP_LOOK_BEHIND: - p += SIZE_LENGTH; - break; + p += SIZE_LENGTH; + break; case OP_PUSH_ABSENT_POS: case OP_ABSENT_END: case OP_ABSENT: - goto impossible; + goto impossible; case OP_CALL: case OP_RETURN: - goto impossible; + goto impossible; case OP_CONDITION: - goto impossible; + goto impossible; case OP_STATE_CHECK_PUSH: case OP_STATE_CHECK_PUSH_OR_JUMP: case OP_STATE_CHECK: case OP_STATE_CHECK_ANYCHAR_STAR: case OP_STATE_CHECK_ANYCHAR_ML_STAR: - goto impossible; + goto impossible; case OP_SET_OPTION_PUSH: case OP_SET_OPTION: - p += SIZE_OPTION; - break; + p += SIZE_OPTION; + break; default: - goto bytecode_error; + goto bytecode_error; } } @@ -588,7 +588,7 @@ init_cache_opcodes_inner( switch (*p++) { case OP_FINISH: case OP_END: - break; + break; case OP_EXACT1: p++; break; case OP_EXACT2: p += 2; break; @@ -596,53 +596,53 @@ init_cache_opcodes_inner( case OP_EXACT4: p += 4; break; case OP_EXACT5: p += 5; break; case OP_EXACTN: - GET_LENGTH_INC(len, p); p += len; break; + GET_LENGTH_INC(len, p); p += len; break; case OP_EXACTMB2N1: p += 2; break; case OP_EXACTMB2N2: p += 4; break; case OP_EXACTMB2N3: p += 6; break; case OP_EXACTMB2N: - GET_LENGTH_INC(len, p); p += len * 2; break; + GET_LENGTH_INC(len, p); p += len * 2; break; case OP_EXACTMB3N: - GET_LENGTH_INC(len, p); p += len * 3; break; + GET_LENGTH_INC(len, p); p += len * 3; break; case OP_EXACTMBN: - { - int mb_len; - GET_LENGTH_INC(mb_len, p); - GET_LENGTH_INC(len, p); - p += mb_len * len; - } - break; + { + int mb_len; + GET_LENGTH_INC(mb_len, p); + GET_LENGTH_INC(len, p); + p += mb_len * len; + } + break; case OP_EXACT1_IC: - len = enclen(enc, p, pend); p += len; break; + len = enclen(enc, p, pend); p += len; break; case OP_EXACTN_IC: - GET_LENGTH_INC(len, p); p += len; break; + GET_LENGTH_INC(len, p); p += len; break; case OP_CCLASS: case OP_CCLASS_NOT: - p += SIZE_BITSET; break; + p += SIZE_BITSET; break; case OP_CCLASS_MB: case OP_CCLASS_MB_NOT: - GET_LENGTH_INC(len, p); p += len; break; + GET_LENGTH_INC(len, p); p += len; break; case OP_CCLASS_MIX: case OP_CCLASS_MIX_NOT: - p += SIZE_BITSET; - GET_LENGTH_INC(len, p); - p += len; - break; + p += SIZE_BITSET; + GET_LENGTH_INC(len, p); + p += len; + break; case OP_ANYCHAR: case OP_ANYCHAR_ML: - break; + break; case OP_ANYCHAR_STAR: case OP_ANYCHAR_ML_STAR: - INC_CACHE_OPCODES; - break; + INC_CACHE_OPCODES; + break; case OP_ANYCHAR_STAR_PEEK_NEXT: case OP_ANYCHAR_ML_STAR_PEEK_NEXT: - p++; - INC_CACHE_OPCODES; - break; + p++; + INC_CACHE_OPCODES; + break; case OP_WORD: case OP_NOT_WORD: @@ -650,7 +650,7 @@ init_cache_opcodes_inner( case OP_NOT_WORD_BOUND: case OP_WORD_BEGIN: case OP_WORD_END: - break; + break; case OP_ASCII_WORD: case OP_NOT_ASCII_WORD: @@ -658,7 +658,7 @@ init_cache_opcodes_inner( case OP_NOT_ASCII_WORD_BOUND: case OP_ASCII_WORD_BEGIN: case OP_ASCII_WORD_END: - break; + break; case OP_BEGIN_BUF: case OP_END_BUF: @@ -666,7 +666,7 @@ init_cache_opcodes_inner( case OP_END_LINE: case OP_SEMI_END_BUF: case OP_BEGIN_POSITION: - break; + break; case OP_BACKREF1: case OP_BACKREF2: @@ -675,7 +675,7 @@ init_cache_opcodes_inner( case OP_BACKREF_MULTI: case OP_BACKREF_MULTI_IC: case OP_BACKREF_WITH_LEVEL: - goto unexpected_bytecode_error; + goto unexpected_bytecode_error; case OP_MEMORY_START: case OP_MEMORY_START_PUSH: @@ -683,158 +683,158 @@ init_cache_opcodes_inner( case OP_MEMORY_END_PUSH_REC: case OP_MEMORY_END: case OP_MEMORY_END_REC: - p += SIZE_MEMNUM; - if (lookaround_nesting != 0) { - goto unexpected_bytecode_error; - } - break; + p += SIZE_MEMNUM; + if (lookaround_nesting != 0) { + goto unexpected_bytecode_error; + } + break; case OP_KEEP: - break; + break; case OP_FAIL: - break; + break; case OP_JUMP: - p += SIZE_RELADDR; - break; + p += SIZE_RELADDR; + break; case OP_PUSH: - p += SIZE_RELADDR; - INC_CACHE_OPCODES; - break; + p += SIZE_RELADDR; + INC_CACHE_OPCODES; + break; case OP_POP: - break; + break; case OP_PUSH_OR_JUMP_EXACT1: case OP_PUSH_IF_PEEK_NEXT: - p += SIZE_RELADDR + 1; - INC_CACHE_OPCODES; - break; + p += SIZE_RELADDR + 1; + INC_CACHE_OPCODES; + break; case OP_REPEAT: case OP_REPEAT_NG: - GET_MEMNUM_INC(repeat_mem, p); - p += SIZE_RELADDR; - if (reg->repeat_range[repeat_mem].lower == 0 && reg->repeat_range[repeat_mem].upper == 0) { - long dummy_num_cache_points = 0; - OnigCacheOpcode* dummy_cache_opcodes = NULL; - result = init_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &dummy_cache_opcodes, &p, &dummy_num_cache_points); - if (result != 0) { - goto fail; - } - } else { - if (reg->repeat_range[repeat_mem].lower == 0) { - INC_CACHE_OPCODES; - } - { - long num_cache_points_in_repeat = 0; - long num_cache_points_at_repeat = cache_point; - OnigCacheOpcode* cache_opcodes_in_repeat = cache_opcodes; - result = init_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &cache_opcodes, &p, &num_cache_points_in_repeat); - if (result != 0) { - goto fail; - } - OnigRepeatRange *repeat_range = ®->repeat_range[repeat_mem]; - if (repeat_range->lower < repeat_range->upper) { - INC_CACHE_OPCODES; - cache_point -= lookaround_nesting != 0 ? 2 : 1; - } - int repeat_bounds = repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower; - cache_point += num_cache_points_in_repeat * repeat_range->lower + (num_cache_points_in_repeat + (lookaround_nesting != 0 ? 2 : 1)) * repeat_bounds; - for (; cache_opcodes_in_repeat < cache_opcodes; cache_opcodes_in_repeat++) { - cache_opcodes_in_repeat->num_cache_points_at_outer_repeat = num_cache_points_at_repeat; - cache_opcodes_in_repeat->num_cache_points_in_outer_repeat = num_cache_points_in_repeat; - } - } - } - break; + GET_MEMNUM_INC(repeat_mem, p); + p += SIZE_RELADDR; + if (reg->repeat_range[repeat_mem].lower == 0 && reg->repeat_range[repeat_mem].upper == 0) { + long dummy_num_cache_points = 0; + OnigCacheOpcode* dummy_cache_opcodes = NULL; + result = init_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &dummy_cache_opcodes, &p, &dummy_num_cache_points); + if (result != 0) { + goto fail; + } + } else { + if (reg->repeat_range[repeat_mem].lower == 0) { + INC_CACHE_OPCODES; + } + { + long num_cache_points_in_repeat = 0; + long num_cache_points_at_repeat = cache_point; + OnigCacheOpcode* cache_opcodes_in_repeat = cache_opcodes; + result = init_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &cache_opcodes, &p, &num_cache_points_in_repeat); + if (result != 0) { + goto fail; + } + OnigRepeatRange *repeat_range = ®->repeat_range[repeat_mem]; + if (repeat_range->lower < repeat_range->upper) { + INC_CACHE_OPCODES; + cache_point -= lookaround_nesting != 0 ? 2 : 1; + } + int repeat_bounds = repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower; + cache_point += num_cache_points_in_repeat * repeat_range->lower + (num_cache_points_in_repeat + (lookaround_nesting != 0 ? 2 : 1)) * repeat_bounds; + for (; cache_opcodes_in_repeat < cache_opcodes; cache_opcodes_in_repeat++) { + cache_opcodes_in_repeat->num_cache_points_at_outer_repeat = num_cache_points_at_repeat; + cache_opcodes_in_repeat->num_cache_points_in_outer_repeat = num_cache_points_in_repeat; + } + } + } + break; case OP_REPEAT_INC: case OP_REPEAT_INC_NG: - p += SIZE_MEMNUM; + p += SIZE_MEMNUM; goto exit; case OP_REPEAT_INC_SG: case OP_REPEAT_INC_NG_SG: - goto unexpected_bytecode_error; + goto unexpected_bytecode_error; case OP_NULL_CHECK_START: - p += SIZE_MEMNUM; - break; + p += SIZE_MEMNUM; + break; case OP_NULL_CHECK_END: case OP_NULL_CHECK_END_MEMST_PUSH: - p += SIZE_MEMNUM; - break; + p += SIZE_MEMNUM; + break; case OP_NULL_CHECK_END_MEMST: - p += SIZE_MEMNUM; - break; + p += SIZE_MEMNUM; + break; case OP_PUSH_POS: - lookaround: - { - OnigCacheOpcode* cache_opcodes_in_lookaround = cache_opcodes; - result = init_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &cache_opcodes, &p, &cache_point); - if (result != 0) { - goto fail; - } - UChar* match_addr = p - 1; - for (; cache_opcodes_in_lookaround < cache_opcodes; cache_opcodes_in_lookaround++) { - if (cache_opcodes_in_lookaround->match_addr == NULL) { - cache_opcodes_in_lookaround->match_addr = match_addr; - } - } - } - break; + lookaround: + { + OnigCacheOpcode* cache_opcodes_in_lookaround = cache_opcodes; + result = init_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &cache_opcodes, &p, &cache_point); + if (result != 0) { + goto fail; + } + UChar* match_addr = p - 1; + for (; cache_opcodes_in_lookaround < cache_opcodes; cache_opcodes_in_lookaround++) { + if (cache_opcodes_in_lookaround->match_addr == NULL) { + cache_opcodes_in_lookaround->match_addr = match_addr; + } + } + } + break; case OP_PUSH_POS_NOT: - p += SIZE_RELADDR; + p += SIZE_RELADDR; goto lookaround; case OP_PUSH_LOOK_BEHIND_NOT: - p += SIZE_RELADDR; - p += SIZE_LENGTH; + p += SIZE_RELADDR; + p += SIZE_LENGTH; goto lookaround; case OP_PUSH_STOP_BT: - { - OnigCacheOpcode* cache_opcodes_in_atomic = cache_opcodes; - result = init_cache_opcodes_inner(reg, current_repeat_mem, -1, &cache_opcodes, &p, &cache_point); - if (result != 0) { - goto fail; - } - UChar* match_addr = p - 1; - for (; cache_opcodes_in_atomic < cache_opcodes; cache_opcodes_in_atomic++) { - if (cache_opcodes_in_atomic->match_addr == NULL) { - cache_opcodes_in_atomic->match_addr = match_addr; - } - } - } - break; + { + OnigCacheOpcode* cache_opcodes_in_atomic = cache_opcodes; + result = init_cache_opcodes_inner(reg, current_repeat_mem, -1, &cache_opcodes, &p, &cache_point); + if (result != 0) { + goto fail; + } + UChar* match_addr = p - 1; + for (; cache_opcodes_in_atomic < cache_opcodes; cache_opcodes_in_atomic++) { + if (cache_opcodes_in_atomic->match_addr == NULL) { + cache_opcodes_in_atomic->match_addr = match_addr; + } + } + } + break; case OP_POP_POS: case OP_FAIL_POS: case OP_FAIL_LOOK_BEHIND_NOT: case OP_POP_STOP_BT: - goto exit; + goto exit; case OP_LOOK_BEHIND: - p += SIZE_LENGTH; - break; + p += SIZE_LENGTH; + break; case OP_ABSENT_END: case OP_ABSENT: - goto unexpected_bytecode_error; + goto unexpected_bytecode_error; case OP_CALL: case OP_RETURN: - goto unexpected_bytecode_error; + goto unexpected_bytecode_error; case OP_CONDITION: - goto unexpected_bytecode_error; + goto unexpected_bytecode_error; case OP_STATE_CHECK_PUSH: case OP_STATE_CHECK_PUSH_OR_JUMP: case OP_STATE_CHECK: case OP_STATE_CHECK_ANYCHAR_STAR: case OP_STATE_CHECK_ANYCHAR_ML_STAR: - goto unexpected_bytecode_error; + goto unexpected_bytecode_error; case OP_SET_OPTION_PUSH: case OP_SET_OPTION: - p += SIZE_OPTION; - break; + p += SIZE_OPTION; + break; default: - goto bytecode_error; + goto bytecode_error; } } @@ -1192,7 +1192,7 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from) }\ else {\ alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\ - + sizeof(OnigStackType) * (stack_num));\ + + sizeof(OnigStackType) * (stack_num));\ heap_addr = NULL;\ stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\ stk_base = stk_alloc;\ @@ -1225,7 +1225,7 @@ onig_set_match_stack_limit_size(unsigned int size) static int stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, - OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa) + OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa) { size_t n; OnigStackType *x, *stk_base, *stk_end, *stk; @@ -1249,9 +1249,9 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, n *= 2; if (limit_size != 0 && n > limit_size) { if ((unsigned int )(stk_end - stk_base) == limit_size) - return ONIGERR_MATCH_STACK_LIMIT_OVER; + return ONIGERR_MATCH_STACK_LIMIT_OVER; else - n = limit_size; + n = limit_size; } x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n); if (IS_NULL(x)) { @@ -1901,7 +1901,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, } while(0) static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, - UChar* s1, UChar** ps2, OnigDistance mblen, const UChar* text_end) + UChar* s1, UChar** ps2, OnigDistance mblen, const UChar* text_end) { UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN]; UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN]; @@ -1988,29 +1988,29 @@ make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp, if (k->type == STK_MEM_START) { n = k->u.mem.num; if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && - BIT_STATUS_AT(reg->capture_history, n) != 0) { - child = history_node_new(); - CHECK_NULL_RETURN_MEMERR(child); - child->group = n; - child->beg = k->u.mem.pstr - str; - r = history_tree_add_child(node, child); - if (r != 0) { - history_tree_free(child); - return r; - } - *kp = (k + 1); - r = make_capture_history_tree(child, kp, stk_top, str, reg); - if (r != 0) return r; - - k = *kp; - child->end = k->u.mem.pstr - str; + BIT_STATUS_AT(reg->capture_history, n) != 0) { + child = history_node_new(); + CHECK_NULL_RETURN_MEMERR(child); + child->group = n; + child->beg = k->u.mem.pstr - str; + r = history_tree_add_child(node, child); + if (r != 0) { + history_tree_free(child); + return r; + } + *kp = (k + 1); + r = make_capture_history_tree(child, kp, stk_top, str, reg); + if (r != 0) return r; + + k = *kp; + child->end = k->u.mem.pstr - str; } } else if (k->type == STK_MEM_END) { if (k->u.mem.num == node->group) { - node->end = k->u.mem.pstr - str; - *kp = k; - return 0; + node->end = k->u.mem.pstr - str; + *kp = k; + return 0; } } k++; @@ -2035,9 +2035,9 @@ mem_is_in_memp(int mem, int num, UChar* memp) } static int backref_match_at_nested_level(regex_t* reg, - OnigStackType* top, OnigStackType* stk_base, - int ignore_case, int case_fold_flag, - int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) + OnigStackType* top, OnigStackType* stk_base, + int ignore_case, int case_fold_flag, + int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) { UChar *ss, *p, *pstart, *pend = NULL_UCHARP; int level; @@ -2055,33 +2055,33 @@ static int backref_match_at_nested_level(regex_t* reg, } else if (level == nest) { if (k->type == STK_MEM_START) { - if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { - pstart = k->u.mem.pstr; - if (pend != NULL_UCHARP) { - if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ - p = pstart; - ss = *s; - - if (ignore_case != 0) { - if (string_cmp_ic(reg->enc, case_fold_flag, - pstart, &ss, pend - pstart, send) == 0) - return 0; /* or goto next_mem; */ - } - else { - while (p < pend) { - if (*p++ != *ss++) return 0; /* or goto next_mem; */ - } - } - - *s = ss; - return 1; - } - } + if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + pstart = k->u.mem.pstr; + if (pend != NULL_UCHARP) { + if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ + p = pstart; + ss = *s; + + if (ignore_case != 0) { + if (string_cmp_ic(reg->enc, case_fold_flag, + pstart, &ss, pend - pstart, send) == 0) + return 0; /* or goto next_mem; */ + } + else { + while (p < pend) { + if (*p++ != *ss++) return 0; /* or goto next_mem; */ + } + } + + *s = ss; + return 1; + } + } } else if (k->type == STK_MEM_END) { - if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { - pend = k->u.mem.pstr; - } + if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + pend = k->u.mem.pstr; + } } } k--; @@ -2099,7 +2099,7 @@ static int backref_match_at_nested_level(regex_t* reg, static LARGE_INTEGER ts, te, freq; # define GETTIME(t) QueryPerformanceCounter(&(t)) # define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \ - * 1000000 / freq.QuadPart) + * 1000000 / freq.QuadPart) # else /* _WIN32 */ # define USE_TIMEOFDAY @@ -2165,7 +2165,7 @@ onig_print_statistics(FILE* f) fprintf(f, " count prev time\n"); for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { fprintf(f, "%8d: %8d: %10lu: %s\n", - OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); + OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); } fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); } @@ -2310,9 +2310,9 @@ memoize_extended_match_cache_point(uint8_t *match_cache_buf, long match_cache_po static OnigPosition match_at(regex_t* reg, const UChar* str, const UChar* end, #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - const UChar* right_range, + const UChar* right_range, #endif - const UChar* sstart, UChar* sprev, OnigMatchArg* msa) + const UChar* sstart, UChar* sprev, OnigMatchArg* msa) { static const UChar FinishCode[] = { OP_FINISH }; @@ -2562,16 +2562,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } #ifndef USE_SUBEXP_CALL mem_start_stk--; /* for index start from 1, - mem_start_stk[1]..mem_start_stk[num_mem] */ + mem_start_stk[1]..mem_start_stk[num_mem] */ mem_end_stk--; /* for index start from 1, - mem_end_stk[1]..mem_end_stk[num_mem] */ + mem_end_stk[1]..mem_end_stk[num_mem] */ #endif #ifdef ONIG_DEBUG_MATCH fprintf(stderr, "match_at: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), start: %"PRIuPTR" (%p), sprev: %"PRIuPTR" (%p)\n", - (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev); + (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev); fprintf(stderr, "size: %d, start offset: %d\n", - (int )(end - str), (int )(sstart - str)); + (int )(end - str), (int )(sstart - str)); fprintf(stderr, "\n ofs> str stk:type addr:opcode\n"); #endif @@ -2591,10 +2591,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, bp = buf; \ q = s; \ if (*op != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ \ - for (i = 0; i < 7 && q < end; i++) { \ - len = enclen(encode, q, end); \ - while (len-- > 0) *bp++ = *q++; \ - } \ + for (i = 0; i < 7 && q < end; i++) { \ + len = enclen(encode, q, end); \ + while (len-- > 0) *bp++ = *q++; \ + } \ if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \ } \ xmemcpy(bp, "\"", 1); bp += 1; \ @@ -2602,9 +2602,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, fputs((char* )buf, stderr); \ for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \ fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \ - stk - stk_base - 1, \ - (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \ - (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \ + stk - stk_base - 1, \ + (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \ + (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \ onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \ fprintf(stderr, "\n"); \ } @@ -2633,18 +2633,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, uint8_t match_cache_point_mask = 1 << (match_cache_point & 7);\ MATCH_CACHE_DEBUG;\ if (msa->match_cache_buf[match_cache_point_index] & match_cache_point_mask) {\ - MATCH_CACHE_DEBUG_HIT; MATCH_CACHE_HIT;\ - if (cache_opcode->lookaround_nesting == 0) goto fail;\ - else if (cache_opcode->lookaround_nesting < 0) {\ - if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\ + MATCH_CACHE_DEBUG_HIT; MATCH_CACHE_HIT;\ + if (cache_opcode->lookaround_nesting == 0) goto fail;\ + else if (cache_opcode->lookaround_nesting < 0) {\ + if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\ STACK_STOP_BT_FAIL;\ goto fail;\ }\ else goto fail;\ }\ else {\ - if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\ - p = cache_opcode->match_addr;\ + if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\ + p = cache_opcode->match_addr;\ MOP_OUT;\ JUMP;\ }\ @@ -2663,66 +2663,66 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_END) MOP_IN(OP_END); n = s - sstart; if (n > best_len) { - OnigRegion* region; + OnigRegion* region; #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE - if (IS_FIND_LONGEST(option)) { - if (n > msa->best_len) { - msa->best_len = n; - msa->best_s = (UChar* )sstart; - } - else - goto end_best_len; - } + if (IS_FIND_LONGEST(option)) { + if (n > msa->best_len) { + msa->best_len = n; + msa->best_s = (UChar* )sstart; + } + else + goto end_best_len; + } #endif - best_len = n; - region = msa->region; - if (region) { - region->beg[0] = ((pkeep > s) ? s : pkeep) - str; - region->end[0] = s - str; - for (i = 1; i <= num_mem; i++) { - if (mem_end_stk[i] != INVALID_STACK_INDEX) { - if (BIT_STATUS_AT(reg->bt_mem_start, i)) - region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; - else - region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str; - - region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i) - ? STACK_AT(mem_end_stk[i])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[i])) - str; - } - else { - region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; - } - } + best_len = n; + region = msa->region; + if (region) { + region->beg[0] = ((pkeep > s) ? s : pkeep) - str; + region->end[0] = s - str; + for (i = 1; i <= num_mem; i++) { + if (mem_end_stk[i] != INVALID_STACK_INDEX) { + if (BIT_STATUS_AT(reg->bt_mem_start, i)) + region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; + else + region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str; + + region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i) + ? STACK_AT(mem_end_stk[i])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[i])) - str; + } + else { + region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; + } + } #ifdef USE_CAPTURE_HISTORY - if (reg->capture_history != 0) { - int r; - OnigCaptureTreeNode* node; - - if (IS_NULL(region->history_root)) { - region->history_root = node = history_node_new(); - CHECK_NULL_RETURN_MEMERR(node); - } - else { - node = region->history_root; - history_tree_clear(node); - } - - node->group = 0; - node->beg = ((pkeep > s) ? s : pkeep) - str; - node->end = s - str; - - stkp = stk_base; - r = make_capture_history_tree(region->history_root, &stkp, - stk, (UChar* )str, reg); - if (r < 0) { - best_len = r; /* error code */ - goto finish; - } - } + if (reg->capture_history != 0) { + int r; + OnigCaptureTreeNode* node; + + if (IS_NULL(region->history_root)) { + region->history_root = node = history_node_new(); + CHECK_NULL_RETURN_MEMERR(node); + } + else { + node = region->history_root; + history_tree_clear(node); + } + + node->group = 0; + node->beg = ((pkeep > s) ? s : pkeep) - str; + node->end = s - str; + + stkp = stk_base; + r = make_capture_history_tree(region->history_root, &stkp, + stk, (UChar* )str, reg); + if (r < 0) { + best_len = r; /* error code */ + goto finish; + } + } #endif /* USE_CAPTURE_HISTORY */ - } /* if (region) */ + } /* if (region) */ } /* n > best_len */ #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE @@ -2731,13 +2731,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, MOP_OUT; if (IS_FIND_CONDITION(option)) { - if (IS_FIND_NOT_EMPTY(option) && s == sstart) { - best_len = ONIG_MISMATCH; - goto fail; /* for retry */ - } - if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { - goto fail; /* for retry */ - } + if (IS_FIND_NOT_EMPTY(option) && s == sstart) { + best_len = ONIG_MISMATCH; + goto fail; /* for retry */ + } + if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { + goto fail; /* for retry */ + } } /* default behavior: return first-matching result. */ @@ -2753,22 +2753,22 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_EXACT1_IC) MOP_IN(OP_EXACT1_IC); { - int len; - UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - - DATA_ENSURE(1); - len = ONIGENC_MBC_CASE_FOLD(encode, - /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ - case_fold_flag, - &s, end, lowbuf); - DATA_ENSURE(0); - q = lowbuf; - while (len-- > 0) { - if (*p != *q) { - goto fail; - } - p++; q++; - } + int len; + UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + + DATA_ENSURE(1); + len = ONIGENC_MBC_CASE_FOLD(encode, + /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ + case_fold_flag, + &s, end, lowbuf); + DATA_ENSURE(0); + q = lowbuf; + while (len-- > 0) { + if (*p != *q) { + goto fail; + } + p++; q++; + } } MOP_OUT; NEXT; @@ -2829,7 +2829,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen); while (tlen-- > 0) { - if (*p++ != *s++) goto fail; + if (*p++ != *s++) goto fail; } sprev = s - 1; MOP_OUT; @@ -2837,26 +2837,26 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_EXACTN_IC) MOP_IN(OP_EXACTN_IC); { - int len; - UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - - GET_LENGTH_INC(tlen, p); - endp = p + tlen; - - while (p < endp) { - sprev = s; - DATA_ENSURE(1); - len = ONIGENC_MBC_CASE_FOLD(encode, - /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ - case_fold_flag, - &s, end, lowbuf); - DATA_ENSURE(0); - q = lowbuf; - while (len-- > 0) { - if (*p != *q) goto fail; - p++; q++; - } - } + int len; + UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + + GET_LENGTH_INC(tlen, p); + endp = p + tlen; + + while (p < endp) { + sprev = s; + DATA_ENSURE(1); + len = ONIGENC_MBC_CASE_FOLD(encode, + /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ + case_fold_flag, + &s, end, lowbuf); + DATA_ENSURE(0); + q = lowbuf; + while (len-- > 0) { + if (*p != *q) goto fail; + p++; q++; + } + } } MOP_OUT; @@ -2907,10 +2907,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 2); while (tlen-- > 0) { - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; } sprev = s - 2; MOP_OUT; @@ -2920,12 +2920,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 3); while (tlen-- > 0) { - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; } sprev = s - 3; MOP_OUT; @@ -2937,8 +2937,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, tlen2 *= tlen; DATA_ENSURE(tlen2); while (tlen2-- > 0) { - if (*p != *s) goto fail; - p++; s++; + if (*p != *s) goto fail; + p++; s++; } sprev = s - tlen; MOP_OUT; @@ -2958,23 +2958,23 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, cclass_mb: GET_LENGTH_INC(tlen, p); { - OnigCodePoint code; - UChar *ss; - int mb_len; + OnigCodePoint code; + UChar *ss; + int mb_len; - DATA_ENSURE(1); - mb_len = enclen_approx(encode, s, end); - DATA_ENSURE(mb_len); - ss = s; - s += mb_len; - code = ONIGENC_MBC_TO_CODE(encode, ss, s); + DATA_ENSURE(1); + mb_len = enclen_approx(encode, s, end); + DATA_ENSURE(mb_len); + ss = s; + s += mb_len; + code = ONIGENC_MBC_TO_CODE(encode, ss, s); #ifdef PLATFORM_UNALIGNED_WORD_ACCESS - if (! onig_is_in_code_range(p, code)) goto fail; + if (! onig_is_in_code_range(p, code)) goto fail; #else - q = p; - ALIGNMENT_RIGHT(q); - if (! onig_is_in_code_range(q, code)) goto fail; + q = p; + ALIGNMENT_RIGHT(q); + if (! onig_is_in_code_range(q, code)) goto fail; #endif } p += tlen; @@ -2984,17 +2984,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_CCLASS_MIX) MOP_IN(OP_CCLASS_MIX); DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s, end)) { - p += SIZE_BITSET; - goto cclass_mb; + p += SIZE_BITSET; + goto cclass_mb; } else { - if (BITSET_AT(((BitSetRef )p), *s) == 0) - goto fail; + if (BITSET_AT(((BitSetRef )p), *s) == 0) + goto fail; - p += SIZE_BITSET; - GET_LENGTH_INC(tlen, p); - p += tlen; - s++; + p += SIZE_BITSET; + GET_LENGTH_INC(tlen, p); + p += tlen; + s++; } MOP_OUT; NEXT; @@ -3010,36 +3010,36 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_CCLASS_MB_NOT) MOP_IN(OP_CCLASS_MB_NOT); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) { - s++; - GET_LENGTH_INC(tlen, p); - p += tlen; - goto cc_mb_not_success; + s++; + GET_LENGTH_INC(tlen, p); + p += tlen; + goto cc_mb_not_success; } cclass_mb_not: GET_LENGTH_INC(tlen, p); { - OnigCodePoint code; - UChar *ss; - int mb_len = enclen(encode, s, end); - - if (! DATA_ENSURE_CHECK(mb_len)) { - DATA_ENSURE(1); - s = (UChar* )end; - p += tlen; - goto cc_mb_not_success; - } + OnigCodePoint code; + UChar *ss; + int mb_len = enclen(encode, s, end); + + if (! DATA_ENSURE_CHECK(mb_len)) { + DATA_ENSURE(1); + s = (UChar* )end; + p += tlen; + goto cc_mb_not_success; + } - ss = s; - s += mb_len; - code = ONIGENC_MBC_TO_CODE(encode, ss, s); + ss = s; + s += mb_len; + code = ONIGENC_MBC_TO_CODE(encode, ss, s); #ifdef PLATFORM_UNALIGNED_WORD_ACCESS - if (onig_is_in_code_range(p, code)) goto fail; + if (onig_is_in_code_range(p, code)) goto fail; #else - q = p; - ALIGNMENT_RIGHT(q); - if (onig_is_in_code_range(q, code)) goto fail; + q = p; + ALIGNMENT_RIGHT(q); + if (onig_is_in_code_range(q, code)) goto fail; #endif } p += tlen; @@ -3051,17 +3051,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_CCLASS_MIX_NOT) MOP_IN(OP_CCLASS_MIX_NOT); DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s, end)) { - p += SIZE_BITSET; - goto cclass_mb_not; + p += SIZE_BITSET; + goto cclass_mb_not; } else { - if (BITSET_AT(((BitSetRef )p), *s) != 0) - goto fail; + if (BITSET_AT(((BitSetRef )p), *s) != 0) + goto fail; - p += SIZE_BITSET; - GET_LENGTH_INC(tlen, p); - p += tlen; - s++; + p += SIZE_BITSET; + GET_LENGTH_INC(tlen, p); + p += tlen; + s++; } MOP_OUT; NEXT; @@ -3085,52 +3085,52 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR); while (DATA_ENSURE_CHECK1) { - CHECK_MATCH_CACHE; - STACK_PUSH_ALT(p, s, sprev, pkeep); - n = enclen_approx(encode, s, end); - DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; - sprev = s; - s += n; + CHECK_MATCH_CACHE; + STACK_PUSH_ALT(p, s, sprev, pkeep); + n = enclen_approx(encode, s, end); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; + sprev = s; + s += n; } MOP_OUT; JUMP; CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR); while (DATA_ENSURE_CHECK1) { - CHECK_MATCH_CACHE; - STACK_PUSH_ALT(p, s, sprev, pkeep); - n = enclen_approx(encode, s, end); - if (n > 1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } + CHECK_MATCH_CACHE; + STACK_PUSH_ALT(p, s, sprev, pkeep); + n = enclen_approx(encode, s, end); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } } MOP_OUT; JUMP; CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { - CHECK_MATCH_CACHE; - if (*p == *s) { - STACK_PUSH_ALT(p + 1, s, sprev, pkeep); - } else { + CHECK_MATCH_CACHE; + if (*p == *s) { + STACK_PUSH_ALT(p + 1, s, sprev, pkeep); + } else { #ifdef USE_MATCH_CACHE - /* We need to increment num_fails here, for invoking a cache optimization correctly. */ - /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR` simply in this case.*/ - msa->num_fails++; + /* We need to increment num_fails here, for invoking a cache optimization correctly. */ + /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR` simply in this case.*/ + msa->num_fails++; #endif - } - n = enclen_approx(encode, s, end); - DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; - sprev = s; - s += n; + } + n = enclen_approx(encode, s, end); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; + sprev = s; + s += n; } p++; MOP_OUT; @@ -3138,26 +3138,26 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { - CHECK_MATCH_CACHE; - if (*p == *s) { - STACK_PUSH_ALT(p + 1, s, sprev, pkeep); - } else { + CHECK_MATCH_CACHE; + if (*p == *s) { + STACK_PUSH_ALT(p + 1, s, sprev, pkeep); + } else { #ifdef USE_MATCH_CACHE - /* We need to increment num_fails here, for invoking a cache optimization correctly. */ - /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR_ML` simply in this case.*/ - msa->num_fails++; + /* We need to increment num_fails here, for invoking a cache optimization correctly. */ + /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR_ML` simply in this case.*/ + msa->num_fails++; #endif - } - n = enclen_approx(encode, s, end); - if (n > 1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } + } + n = enclen_approx(encode, s, end); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } } p++; MOP_OUT; @@ -3167,15 +3167,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_STATE_CHECK_ANYCHAR_STAR) MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR); GET_STATE_CHECK_NUM_INC(mem, p); while (DATA_ENSURE_CHECK1) { - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep); - n = enclen_approx(encode, s, end); - DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; - sprev = s; - s += n; + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep); + n = enclen_approx(encode, s, end); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; + sprev = s; + s += n; } MOP_OUT; NEXT; @@ -3185,20 +3185,20 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_STATE_CHECK_NUM_INC(mem, p); while (DATA_ENSURE_CHECK1) { - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep); - n = enclen_approx(encode, s, end); - if (n > 1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep); + n = enclen_approx(encode, s, end); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } } MOP_OUT; NEXT; @@ -3207,7 +3207,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_WORD) MOP_IN(OP_WORD); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; + goto fail; s += enclen(encode, s, end); MOP_OUT; @@ -3216,7 +3216,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_ASCII_WORD) MOP_IN(OP_ASCII_WORD); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) - goto fail; + goto fail; s += enclen(encode, s, end); MOP_OUT; @@ -3225,7 +3225,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_NOT_WORD) MOP_IN(OP_NOT_WORD); DATA_ENSURE(1); if (ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; + goto fail; s += enclen(encode, s, end); MOP_OUT; @@ -3234,7 +3234,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_NOT_ASCII_WORD) MOP_IN(OP_NOT_ASCII_WORD); DATA_ENSURE(1); if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) - goto fail; + goto fail; s += enclen(encode, s, end); MOP_OUT; @@ -3242,70 +3242,70 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_WORD_BOUND) MOP_IN(OP_WORD_BOUND); if (ON_STR_BEGIN(s)) { - DATA_ENSURE(1); - if (! ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; } else if (ON_STR_END(s)) { - if (! ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; + if (! ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; } else { - if (ONIGENC_IS_MBC_WORD(encode, s, end) - == ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; + if (ONIGENC_IS_MBC_WORD(encode, s, end) + == ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; } MOP_OUT; JUMP; CASE(OP_ASCII_WORD_BOUND) MOP_IN(OP_ASCII_WORD_BOUND); if (ON_STR_BEGIN(s)) { - DATA_ENSURE(1); - if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) - goto fail; + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) + goto fail; } else if (ON_STR_END(s)) { - if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) - goto fail; + if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) + goto fail; } else { - if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end) - == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) - goto fail; + if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end) + == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) + goto fail; } MOP_OUT; JUMP; CASE(OP_NOT_WORD_BOUND) MOP_IN(OP_NOT_WORD_BOUND); if (ON_STR_BEGIN(s)) { - if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; + if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; } else if (ON_STR_END(s)) { - if (ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; + if (ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; } else { - if (ONIGENC_IS_MBC_WORD(encode, s, end) - != ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; + if (ONIGENC_IS_MBC_WORD(encode, s, end) + != ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; } MOP_OUT; JUMP; CASE(OP_NOT_ASCII_WORD_BOUND) MOP_IN(OP_NOT_ASCII_WORD_BOUND); if (ON_STR_BEGIN(s)) { - if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) - goto fail; + if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) + goto fail; } else if (ON_STR_END(s)) { - if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) - goto fail; + if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) + goto fail; } else { - if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end) - != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) - goto fail; + if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end) + != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) + goto fail; } MOP_OUT; JUMP; @@ -3313,40 +3313,40 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #ifdef USE_WORD_BEGIN_END CASE(OP_WORD_BEGIN) MOP_IN(OP_WORD_BEGIN); if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) { - if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { - MOP_OUT; - JUMP; - } + if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { + MOP_OUT; + JUMP; + } } goto fail; NEXT; CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN); if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) { - if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) { - MOP_OUT; - JUMP; - } + if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) { + MOP_OUT; + JUMP; + } } goto fail; NEXT; CASE(OP_WORD_END) MOP_IN(OP_WORD_END); if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { - if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { - MOP_OUT; - JUMP; - } + if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { + MOP_OUT; + JUMP; + } } goto fail; NEXT; CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END); if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) { - if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) { - MOP_OUT; - JUMP; - } + if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) { + MOP_OUT; + JUMP; + } } goto fail; NEXT; @@ -3368,18 +3368,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_BEGIN_LINE) MOP_IN(OP_BEGIN_LINE); if (ON_STR_BEGIN(s)) { - if (IS_NOTBOL(msa->options)) goto fail; - MOP_OUT; - JUMP; + if (IS_NOTBOL(msa->options)) goto fail; + MOP_OUT; + JUMP; } else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) #ifdef USE_CRNL_AS_LINE_TERMINATOR - && !(IS_NEWLINE_CRLF(option) - && ONIGENC_IS_MBC_CRNL(encode, sprev, end)) + && !(IS_NEWLINE_CRLF(option) + && ONIGENC_IS_MBC_CRNL(encode, sprev, end)) #endif - && !ON_STR_END(s)) { - MOP_OUT; - JUMP; + && !ON_STR_END(s)) { + MOP_OUT; + JUMP; } goto fail; NEXT; @@ -3387,18 +3387,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_END_LINE) MOP_IN(OP_END_LINE); if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) { + if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) { #endif - if (IS_NOTEOL(msa->options)) goto fail; - MOP_OUT; - JUMP; + if (IS_NOTEOL(msa->options)) goto fail; + MOP_OUT; + JUMP; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - } + } #endif } else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) { - MOP_OUT; - JUMP; + MOP_OUT; + JUMP; } goto fail; NEXT; @@ -3406,30 +3406,30 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF); if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) { + if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) { #endif - if (IS_NOTEOL(msa->options)) goto fail; - MOP_OUT; - JUMP; + if (IS_NOTEOL(msa->options)) goto fail; + MOP_OUT; + JUMP; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - } + } #endif } else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) { - UChar* ss = s + enclen(encode, s, end); - if (ON_STR_END(ss)) { - MOP_OUT; - JUMP; - } + UChar* ss = s + enclen(encode, s, end); + if (ON_STR_END(ss)) { + MOP_OUT; + JUMP; + } #ifdef USE_CRNL_AS_LINE_TERMINATOR - else if (IS_NEWLINE_CRLF(option) - && ONIGENC_IS_MBC_CRNL(encode, s, end)) { - ss += enclen(encode, ss, end); - if (ON_STR_END(ss)) { - MOP_OUT; - JUMP; - } - } + else if (IS_NEWLINE_CRLF(option) + && ONIGENC_IS_MBC_CRNL(encode, s, end)) { + ss += enclen(encode, ss, end); + if (ON_STR_END(ss)) { + MOP_OUT; + JUMP; + } + } #endif } goto fail; @@ -3437,7 +3437,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION); if (s != msa->gpos) - goto fail; + goto fail; MOP_OUT; JUMP; @@ -3487,9 +3487,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_GET_MEM_START(mem, stkp); if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - mem_start_stk[mem] = GET_STACK_INDEX(stkp); + mem_start_stk[mem] = GET_STACK_INDEX(stkp); else - mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr); + mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr); STACK_PUSH_MEM_END_MARK(mem); MOP_OUT; @@ -3510,167 +3510,167 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_MEMNUM_INC(mem, p); backref: { - int len; - UChar *pstart, *pend; - - /* if you want to remove following line, - you should check in parse and compile time. */ - if (mem > num_mem) goto fail; - if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); - n = pend - pstart; - DATA_ENSURE(n); - sprev = s; - STRING_CMP(pstart, s, n); - while (sprev + (len = enclen_approx(encode, sprev, end)) < s) - sprev += len; - - MOP_OUT; - JUMP; + int len; + UChar *pstart, *pend; + + /* if you want to remove following line, + you should check in parse and compile time. */ + if (mem > num_mem) goto fail; + if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + STRING_CMP(pstart, s, n); + while (sprev + (len = enclen_approx(encode, sprev, end)) < s) + sprev += len; + + MOP_OUT; + JUMP; } CASE(OP_BACKREFN_IC) MOP_IN(OP_BACKREFN_IC); GET_MEMNUM_INC(mem, p); { - int len; - UChar *pstart, *pend; - - /* if you want to remove following line, - you should check in parse and compile time. */ - if (mem > num_mem) goto fail; - if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); - n = pend - pstart; - DATA_ENSURE(n); - sprev = s; - STRING_CMP_IC(case_fold_flag, pstart, &s, n, end); - while (sprev + (len = enclen_approx(encode, sprev, end)) < s) - sprev += len; - - MOP_OUT; - JUMP; + int len; + UChar *pstart, *pend; + + /* if you want to remove following line, + you should check in parse and compile time. */ + if (mem > num_mem) goto fail; + if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + STRING_CMP_IC(case_fold_flag, pstart, &s, n, end); + while (sprev + (len = enclen_approx(encode, sprev, end)) < s) + sprev += len; + + MOP_OUT; + JUMP; } NEXT; CASE(OP_BACKREF_MULTI) MOP_IN(OP_BACKREF_MULTI); { - int len, is_fail; - UChar *pstart, *pend, *swork; - - GET_LENGTH_INC(tlen, p); - for (i = 0; i < tlen; i++) { - GET_MEMNUM_INC(mem, p); - - if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); - n = pend - pstart; - DATA_ENSURE_CONTINUE(n); - sprev = s; - swork = s; - STRING_CMP_VALUE(pstart, swork, n, is_fail); - if (is_fail) continue; - s = swork; - while (sprev + (len = enclen_approx(encode, sprev, end)) < s) - sprev += len; - - p += (SIZE_MEMNUM * (tlen - i - 1)); - break; /* success */ - } - if (i == tlen) goto fail; - MOP_OUT; - JUMP; + int len, is_fail; + UChar *pstart, *pend, *swork; + + GET_LENGTH_INC(tlen, p); + for (i = 0; i < tlen; i++) { + GET_MEMNUM_INC(mem, p); + + if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE_CONTINUE(n); + sprev = s; + swork = s; + STRING_CMP_VALUE(pstart, swork, n, is_fail); + if (is_fail) continue; + s = swork; + while (sprev + (len = enclen_approx(encode, sprev, end)) < s) + sprev += len; + + p += (SIZE_MEMNUM * (tlen - i - 1)); + break; /* success */ + } + if (i == tlen) goto fail; + MOP_OUT; + JUMP; } NEXT; CASE(OP_BACKREF_MULTI_IC) MOP_IN(OP_BACKREF_MULTI_IC); { - int len, is_fail; - UChar *pstart, *pend, *swork; - - GET_LENGTH_INC(tlen, p); - for (i = 0; i < tlen; i++) { - GET_MEMNUM_INC(mem, p); - - if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); - n = pend - pstart; - DATA_ENSURE_CONTINUE(n); - sprev = s; - swork = s; - STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail); - if (is_fail) continue; - s = swork; - while (sprev + (len = enclen(encode, sprev, end)) < s) - sprev += len; - - p += (SIZE_MEMNUM * (tlen - i - 1)); - break; /* success */ - } - if (i == tlen) goto fail; - MOP_OUT; - JUMP; + int len, is_fail; + UChar *pstart, *pend, *swork; + + GET_LENGTH_INC(tlen, p); + for (i = 0; i < tlen; i++) { + GET_MEMNUM_INC(mem, p); + + if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE_CONTINUE(n); + sprev = s; + swork = s; + STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail); + if (is_fail) continue; + s = swork; + while (sprev + (len = enclen(encode, sprev, end)) < s) + sprev += len; + + p += (SIZE_MEMNUM * (tlen - i - 1)); + break; /* success */ + } + if (i == tlen) goto fail; + MOP_OUT; + JUMP; } #ifdef USE_BACKREF_WITH_LEVEL CASE(OP_BACKREF_WITH_LEVEL) { - int len; - OnigOptionType ic; - LengthType level; - - GET_OPTION_INC(ic, p); - GET_LENGTH_INC(level, p); - GET_LENGTH_INC(tlen, p); - - sprev = s; - if (backref_match_at_nested_level(reg, stk, stk_base, ic, - case_fold_flag, (int )level, (int )tlen, p, &s, end)) { - while (sprev + (len = enclen(encode, sprev, end)) < s) - sprev += len; - - p += (SIZE_MEMNUM * tlen); - } - else - goto fail; - - MOP_OUT; - JUMP; + int len; + OnigOptionType ic; + LengthType level; + + GET_OPTION_INC(ic, p); + GET_LENGTH_INC(level, p); + GET_LENGTH_INC(tlen, p); + + sprev = s; + if (backref_match_at_nested_level(reg, stk, stk_base, ic, + case_fold_flag, (int )level, (int )tlen, p, &s, end)) { + while (sprev + (len = enclen(encode, sprev, end)) < s) + sprev += len; + + p += (SIZE_MEMNUM * tlen); + } + else + goto fail; + + MOP_OUT; + JUMP; } #endif @@ -3697,33 +3697,33 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_NULL_CHECK_END) MOP_IN(OP_NULL_CHECK_END); { - int isnull; + int isnull; - GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_NULL_CHECK(isnull, mem, s); - if (isnull) { + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_NULL_CHECK(isnull, mem, s); + if (isnull) { #ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIuPTR" (%p)\n", - (int )mem, (uintptr_t )s, s); + fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIuPTR" (%p)\n", + (int )mem, (uintptr_t )s, s); #endif - null_check_found: - /* empty loop founded, skip next instruction */ - switch (*p++) { - case OP_JUMP: - case OP_PUSH: - p += SIZE_RELADDR; - break; - case OP_REPEAT_INC: - case OP_REPEAT_INC_NG: - case OP_REPEAT_INC_SG: - case OP_REPEAT_INC_NG_SG: - p += SIZE_MEMNUM; - break; - default: - goto unexpected_bytecode_error; - break; - } - } + null_check_found: + /* empty loop founded, skip next instruction */ + switch (*p++) { + case OP_JUMP: + case OP_PUSH: + p += SIZE_RELADDR; + break; + case OP_REPEAT_INC: + case OP_REPEAT_INC_NG: + case OP_REPEAT_INC_SG: + case OP_REPEAT_INC_NG_SG: + p += SIZE_MEMNUM; + break; + default: + goto unexpected_bytecode_error; + break; + } + } } MOP_OUT; JUMP; @@ -3731,18 +3731,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST); { - int isnull; + int isnull; - GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); - if (isnull) { + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); + if (isnull) { # ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n", - (int )mem, (uintptr_t )s, s); + fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n", + (int )mem, (uintptr_t )s, s); # endif - if (isnull == -1) goto fail; - goto null_check_found; - } + if (isnull == -1) goto fail; + goto null_check_found; + } } MOP_OUT; JUMP; @@ -3752,25 +3752,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_NULL_CHECK_END_MEMST_PUSH) MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH); { - int isnull; + int isnull; - GET_MEMNUM_INC(mem, p); /* mem: null check id */ + GET_MEMNUM_INC(mem, p); /* mem: null check id */ # ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT - STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); + STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); # else - STACK_NULL_CHECK_REC(isnull, mem, s); + STACK_NULL_CHECK_REC(isnull, mem, s); # endif - if (isnull) { + if (isnull) { # ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR" (%p)\n", - (int )mem, (uintptr_t )s, s); + fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR" (%p)\n", + (int )mem, (uintptr_t )s, s); # endif - if (isnull == -1) goto fail; - goto null_check_found; - } - else { - STACK_PUSH_NULL_CHECK_END(mem); - } + if (isnull == -1) goto fail; + goto null_check_found; + } + else { + STACK_PUSH_NULL_CHECK_END(mem); + } } MOP_OUT; JUMP; @@ -3806,10 +3806,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_RELADDR_INC(addr, p); STATE_CHECK_VAL(scv, mem); if (scv) { - p += addr; + p += addr; } else { - STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep); + STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep); } MOP_OUT; JUMP; @@ -3838,11 +3838,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1); GET_RELADDR_INC(addr, p); if (*p == *s && DATA_ENSURE_CHECK1) { - p++; - CHECK_MATCH_CACHE; - STACK_PUSH_ALT(p + addr, s, sprev, pkeep); - MOP_OUT; - JUMP; + p++; + CHECK_MATCH_CACHE; + STACK_PUSH_ALT(p + addr, s, sprev, pkeep); + MOP_OUT; + JUMP; } p += (addr + 1); MOP_OUT; @@ -3853,10 +3853,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_RELADDR_INC(addr, p); CHECK_MATCH_CACHE; if (*p == *s) { - p++; - STACK_PUSH_ALT(p + addr, s, sprev, pkeep); - MOP_OUT; - JUMP; + p++; + STACK_PUSH_ALT(p + addr, s, sprev, pkeep); + MOP_OUT; + JUMP; } p++; INC_NUM_FAILS; @@ -3865,35 +3865,35 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_REPEAT) MOP_IN(OP_REPEAT); { - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - GET_RELADDR_INC(addr, p); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + GET_RELADDR_INC(addr, p); - STACK_ENSURE(1); - repeat_stk[mem] = GET_STACK_INDEX(stk); - STACK_PUSH_REPEAT(mem, p); + STACK_ENSURE(1); + repeat_stk[mem] = GET_STACK_INDEX(stk); + STACK_PUSH_REPEAT(mem, p); - if (reg->repeat_range[mem].lower == 0) { - CHECK_MATCH_CACHE; - STACK_PUSH_ALT(p + addr, s, sprev, pkeep); - } + if (reg->repeat_range[mem].lower == 0) { + CHECK_MATCH_CACHE; + STACK_PUSH_ALT(p + addr, s, sprev, pkeep); + } } MOP_OUT; JUMP; CASE(OP_REPEAT_NG) MOP_IN(OP_REPEAT_NG); { - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - GET_RELADDR_INC(addr, p); - - STACK_ENSURE(1); - repeat_stk[mem] = GET_STACK_INDEX(stk); - STACK_PUSH_REPEAT(mem, p); - - if (reg->repeat_range[mem].lower == 0) { - CHECK_MATCH_CACHE; - STACK_PUSH_ALT(p, s, sprev, pkeep); - p += addr; - } + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + GET_RELADDR_INC(addr, p); + + STACK_ENSURE(1); + repeat_stk[mem] = GET_STACK_INDEX(stk); + STACK_PUSH_REPEAT(mem, p); + + if (reg->repeat_range[mem].lower == 0) { + CHECK_MATCH_CACHE; + STACK_PUSH_ALT(p, s, sprev, pkeep); + p += addr; + } } MOP_OUT; JUMP; @@ -3906,23 +3906,23 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, repeat_inc: stkp->u.repeat.count++; if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) { - /* end of repeat. Nothing to do. */ + /* end of repeat. Nothing to do. */ } else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { #ifdef USE_MATCH_CACHE - if (*pbegin == OP_REPEAT_INC) { + if (*pbegin == OP_REPEAT_INC) { #undef MATCH_CACHE_HIT #define MATCH_CACHE_HIT stkp->u.repeat.count--; - CHECK_MATCH_CACHE; + CHECK_MATCH_CACHE; #undef MATCH_CACHE_HIT #define MATCH_CACHE_HIT ((void) 0) - } + } #endif - STACK_PUSH_ALT(p, s, sprev, pkeep); - p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ + STACK_PUSH_ALT(p, s, sprev, pkeep); + p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ } else { - p = stkp->u.repeat.pcode; + p = stkp->u.repeat.pcode; } STACK_PUSH_REPEAT_INC(si); MOP_OUT; @@ -3944,22 +3944,22 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, repeat_inc_ng: stkp->u.repeat.count++; if (stkp->u.repeat.count < reg->repeat_range[mem].upper) { - if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { - UChar* pcode = stkp->u.repeat.pcode; - - STACK_PUSH_REPEAT_INC(si); - if (*pbegin == OP_REPEAT_INC_NG) { - CHECK_MATCH_CACHE; - } - STACK_PUSH_ALT(pcode, s, sprev, pkeep); - } - else { - p = stkp->u.repeat.pcode; - STACK_PUSH_REPEAT_INC(si); - } + if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { + UChar* pcode = stkp->u.repeat.pcode; + + STACK_PUSH_REPEAT_INC(si); + if (*pbegin == OP_REPEAT_INC_NG) { + CHECK_MATCH_CACHE; + } + STACK_PUSH_ALT(pcode, s, sprev, pkeep); + } + else { + p = stkp->u.repeat.pcode; + STACK_PUSH_REPEAT_INC(si); + } } else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { - STACK_PUSH_REPEAT_INC(si); + STACK_PUSH_REPEAT_INC(si); } MOP_OUT; CHECK_INTERRUPT_IN_MATCH_AT; @@ -3979,9 +3979,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_POP_POS) MOP_IN(OP_POP_POS); { - STACK_POS_END(stkp); - s = stkp->u.state.pstr; - sprev = stkp->u.state.pstr_prev; + STACK_POS_END(stkp); + s = stkp->u.state.pstr; + sprev = stkp->u.state.pstr_prev; } MOP_OUT; JUMP; @@ -4020,15 +4020,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_LENGTH_INC(tlen, p); q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen); if (IS_NULL(q)) { - /* too short case -> success. ex. /(? success. ex. /(? aend) && (s > absent)) { - /* An empty match occurred in (?~...) at the start point. - * Never match. */ - STACK_POP; - goto fail; - } - else if ((s >= aend) && (s > absent)) { - if (s > aend) { - /* Only one (or less) character matched in the last iteration. - * This is not a possible point. */ - goto fail; - } - /* All possible points were found. Try matching after (?~...). */ - DATA_ENSURE(0); - p += addr; - } - else if (s == end) { - /* At the end of the string, just match with it */ - DATA_ENSURE(0); - p += addr; - } - else { - STACK_PUSH_ALT(p + addr, s, sprev, pkeep); /* Push possible point. */ - n = enclen(encode, s, end); - STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS); /* Save the original pos. */ - STACK_PUSH_ALT(selfp, s + n, s, pkeep); /* Next iteration. */ - STACK_PUSH_ABSENT; - ABSENT_END_POS = aend; - } + if ((absent > aend) && (s > absent)) { + /* An empty match occurred in (?~...) at the start point. + * Never match. */ + STACK_POP; + goto fail; + } + else if ((s >= aend) && (s > absent)) { + if (s > aend) { + /* Only one (or less) character matched in the last iteration. + * This is not a possible point. */ + goto fail; + } + /* All possible points were found. Try matching after (?~...). */ + DATA_ENSURE(0); + p += addr; + } + else if (s == end) { + /* At the end of the string, just match with it */ + DATA_ENSURE(0); + p += addr; + } + else { + STACK_PUSH_ALT(p + addr, s, sprev, pkeep); /* Push possible point. */ + n = enclen(encode, s, end); + STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS); /* Save the original pos. */ + STACK_PUSH_ALT(selfp, s + n, s, pkeep); /* Next iteration. */ + STACK_PUSH_ABSENT; + ABSENT_END_POS = aend; + } } MOP_OUT; JUMP; @@ -4092,7 +4092,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, /* The pattern inside (?~...) was matched. * Set the end-pos temporary and go to next iteration. */ if (sprev < ABSENT_END_POS) - ABSENT_END_POS = sprev; + ABSENT_END_POS = sprev; #ifdef ONIG_DEBUG_MATCH fprintf(stderr, "ABSENT_END: end:%p\n", ABSENT_END_POS); #endif @@ -4119,9 +4119,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, GET_MEMNUM_INC(mem, p); GET_RELADDR_INC(addr, p); if ((mem > num_mem) || - (mem_end_stk[mem] == INVALID_STACK_INDEX) || - (mem_start_stk[mem] == INVALID_STACK_INDEX)) { - p += addr; + (mem_end_stk[mem] == INVALID_STACK_INDEX) || + (mem_start_stk[mem] == INVALID_STACK_INDEX)) { + p += addr; } MOP_OUT; JUMP; @@ -4132,9 +4132,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_FAIL) if (0) { - /* fall */ + /* fall */ fail: - MOP_OUT; + MOP_OUT; } MOP_IN(OP_FAIL); STACK_POP; @@ -4145,71 +4145,71 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #ifdef USE_MATCH_CACHE if ( - msa->match_cache_status != MATCH_CACHE_STATUS_DISABLED && - ++msa->num_fails >= (long)(end - str) * msa->num_cache_opcodes + msa->match_cache_status != MATCH_CACHE_STATUS_DISABLED && + ++msa->num_fails >= (long)(end - str) * msa->num_cache_opcodes ) { - if (msa->match_cache_status == MATCH_CACHE_STATUS_UNINIT) { - msa->match_cache_status = MATCH_CACHE_STATUS_INIT; - OnigPosition r = count_num_cache_opcodes(reg, &msa->num_cache_opcodes); - if (r < 0) goto bytecode_error; - } - if (msa->num_cache_opcodes == NUM_CACHE_OPCODES_IMPOSSIBLE || msa->num_cache_opcodes == 0) { - msa->match_cache_status = MATCH_CACHE_STATUS_DISABLED; - goto fail_match_cache; - } - if (msa->num_fails < (long)(end - str) * msa->num_cache_opcodes) { - goto fail_match_cache; - } - if (msa->cache_opcodes == NULL) { - msa->match_cache_status = MATCH_CACHE_STATUS_ENABLED; - OnigCacheOpcode* cache_opcodes = (OnigCacheOpcode*)xmalloc(msa->num_cache_opcodes * sizeof(OnigCacheOpcode)); - if (cache_opcodes == NULL) { - return ONIGERR_MEMORY; - } - OnigPosition r = init_cache_opcodes(reg, cache_opcodes, &msa->num_cache_points); - if (r < 0) { - if (r == ONIGERR_UNEXPECTED_BYTECODE) goto unexpected_bytecode_error; - else goto bytecode_error; - } - msa->cache_opcodes = cache_opcodes; + if (msa->match_cache_status == MATCH_CACHE_STATUS_UNINIT) { + msa->match_cache_status = MATCH_CACHE_STATUS_INIT; + OnigPosition r = count_num_cache_opcodes(reg, &msa->num_cache_opcodes); + if (r < 0) goto bytecode_error; + } + if (msa->num_cache_opcodes == NUM_CACHE_OPCODES_IMPOSSIBLE || msa->num_cache_opcodes == 0) { + msa->match_cache_status = MATCH_CACHE_STATUS_DISABLED; + goto fail_match_cache; + } + if (msa->num_fails < (long)(end - str) * msa->num_cache_opcodes) { + goto fail_match_cache; + } + if (msa->cache_opcodes == NULL) { + msa->match_cache_status = MATCH_CACHE_STATUS_ENABLED; + OnigCacheOpcode* cache_opcodes = (OnigCacheOpcode*)xmalloc(msa->num_cache_opcodes * sizeof(OnigCacheOpcode)); + if (cache_opcodes == NULL) { + return ONIGERR_MEMORY; + } + OnigPosition r = init_cache_opcodes(reg, cache_opcodes, &msa->num_cache_points); + if (r < 0) { + if (r == ONIGERR_UNEXPECTED_BYTECODE) goto unexpected_bytecode_error; + else goto bytecode_error; + } + msa->cache_opcodes = cache_opcodes; #ifdef ONIG_DEBUG_MATCH_CACHE - fprintf(stderr, "MATCH CACHE: #cache opcodes = %ld\n", msa->num_cache_opcodes); - fprintf(stderr, "MATCH CACHE: #cache points = %ld\n", msa->num_cache_points); - fprintf(stderr, "MATCH CACHE: cache opcodes (%p):\n", msa->cache_opcodes); - for (int i = 0; i < msa->num_cache_opcodes; i++) { - fprintf(stderr, "MATCH CACHE: [%p] cache_point=%ld outer_repeat_mem=%d num_cache_opcodes_at_outer_repeat=%ld num_cache_opcodes_in_outer_repeat=%ld lookaround_nesting=%d match_addr=%p\n", msa->cache_opcodes[i].addr, msa->cache_opcodes[i].cache_point, msa->cache_opcodes[i].outer_repeat_mem, msa->cache_opcodes[i].num_cache_points_at_outer_repeat, msa->cache_opcodes[i].num_cache_points_in_outer_repeat, msa->cache_opcodes[i].lookaround_nesting, msa->cache_opcodes[i].match_addr); - } + fprintf(stderr, "MATCH CACHE: #cache opcodes = %ld\n", msa->num_cache_opcodes); + fprintf(stderr, "MATCH CACHE: #cache points = %ld\n", msa->num_cache_points); + fprintf(stderr, "MATCH CACHE: cache opcodes (%p):\n", msa->cache_opcodes); + for (int i = 0; i < msa->num_cache_opcodes; i++) { + fprintf(stderr, "MATCH CACHE: [%p] cache_point=%ld outer_repeat_mem=%d num_cache_opcodes_at_outer_repeat=%ld num_cache_opcodes_in_outer_repeat=%ld lookaround_nesting=%d match_addr=%p\n", msa->cache_opcodes[i].addr, msa->cache_opcodes[i].cache_point, msa->cache_opcodes[i].outer_repeat_mem, msa->cache_opcodes[i].num_cache_points_at_outer_repeat, msa->cache_opcodes[i].num_cache_points_in_outer_repeat, msa->cache_opcodes[i].lookaround_nesting, msa->cache_opcodes[i].match_addr); + } #endif - } - if (msa->match_cache_buf == NULL) { - size_t length = (end - str) + 1; - size_t num_match_cache_points = (size_t)msa->num_cache_points * length; + } + if (msa->match_cache_buf == NULL) { + size_t length = (end - str) + 1; + size_t num_match_cache_points = (size_t)msa->num_cache_points * length; #ifdef ONIG_DEBUG_MATCH_CACHE - fprintf(stderr, "MATCH CACHE: #match cache points = %zu (length = %zu)\n", num_match_cache_points, length); + fprintf(stderr, "MATCH CACHE: #match cache points = %zu (length = %zu)\n", num_match_cache_points, length); #endif - /* Overflow check */ - if (num_match_cache_points / length != (size_t)msa->num_cache_points) { - return ONIGERR_MEMORY; - } - if (num_match_cache_points >= LONG_MAX_LIMIT) { - return ONIGERR_MEMORY; - } - size_t match_cache_buf_length = (num_match_cache_points >> 3) + (num_match_cache_points & 7 ? 1 : 0) + 1; - uint8_t* match_cache_buf = (uint8_t*)xmalloc(match_cache_buf_length * sizeof(uint8_t)); - if (match_cache_buf == NULL) { - return ONIGERR_MEMORY; - } - xmemset(match_cache_buf, 0, match_cache_buf_length * sizeof(uint8_t)); - msa->match_cache_buf = match_cache_buf; - } + /* Overflow check */ + if (num_match_cache_points / length != (size_t)msa->num_cache_points) { + return ONIGERR_MEMORY; + } + if (num_match_cache_points >= LONG_MAX_LIMIT) { + return ONIGERR_MEMORY; + } + size_t match_cache_buf_length = (num_match_cache_points >> 3) + (num_match_cache_points & 7 ? 1 : 0) + 1; + uint8_t* match_cache_buf = (uint8_t*)xmalloc(match_cache_buf_length * sizeof(uint8_t)); + if (match_cache_buf == NULL) { + return ONIGERR_MEMORY; + } + xmemset(match_cache_buf, 0, match_cache_buf_length * sizeof(uint8_t)); + msa->match_cache_buf = match_cache_buf; + } } fail_match_cache: #endif #ifdef USE_COMBINATION_EXPLOSION_CHECK if (stk->u.state.state_check != 0) { - stk->type = STK_STATE_CHECK_MARK; - stk++; + stk->type = STK_STATE_CHECK_MARK; + stk++; } #endif @@ -4252,7 +4252,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, static UChar* slow_search(OnigEncoding enc, UChar* target, UChar* target_end, - const UChar* text, const UChar* text_end, UChar* text_range) + const UChar* text, const UChar* text_end, UChar* text_range) { UChar *t, *p, *s, *end; @@ -4268,10 +4268,10 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end, while (s < end) { if (*s == *target) { - p = s + 1; - t = target + 1; - if (target_end == t || memcmp(t, p, target_end - t) == 0) - return s; + p = s + 1; + t = target + 1; + if (target_end == t || memcmp(t, p, target_end - t) == 0) + return s; } s += n; } @@ -4282,7 +4282,7 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end, p = s + 1; t = target + 1; if (target_end == t || memcmp(t, p, target_end - t) == 0) - return s; + return s; } s += enclen(enc, s, text_end); } @@ -4292,8 +4292,8 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end, static int str_lower_case_match(OnigEncoding enc, int case_fold_flag, - const UChar* t, const UChar* tend, - const UChar* p, const UChar* end) + const UChar* t, const UChar* tend, + const UChar* p, const UChar* end) { int lowlen; UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; @@ -4312,8 +4312,8 @@ str_lower_case_match(OnigEncoding enc, int case_fold_flag, static UChar* slow_search_ic(OnigEncoding enc, int case_fold_flag, - UChar* target, UChar* target_end, - const UChar* text, const UChar* text_end, UChar* text_range) + UChar* target, UChar* target_end, + const UChar* text, const UChar* text_end, UChar* text_range) { UChar *s, *end; @@ -4326,7 +4326,7 @@ slow_search_ic(OnigEncoding enc, int case_fold_flag, while (s < end) { if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, text_end)) + s, text_end)) return s; s += enclen(enc, s, text_end); @@ -4337,8 +4337,8 @@ slow_search_ic(OnigEncoding enc, int case_fold_flag, static UChar* slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) { UChar *t, *p, *s; @@ -4354,12 +4354,12 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, p = s + 1; t = target + 1; while (t < target_end) { - if (*t != *p++) - break; - t++; + if (*t != *p++) + break; + t++; } if (t == target_end) - return s; + return s; } s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end); } @@ -4369,9 +4369,9 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, static UChar* slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, - UChar* target, UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) + UChar* target, UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) { UChar *s; @@ -4384,7 +4384,7 @@ slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, while (s >= text) { if (str_lower_case_match(enc, case_fold_flag, - target, target_end, s, text_end)) + target, target_end, s, text_end)) return s; s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end); @@ -4397,8 +4397,8 @@ slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, /* Boyer-Moore-Horspool search applied to a multibyte string */ static UChar* bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, - const UChar* text_range) + const UChar* text, const UChar* text_end, + const UChar* text_range) { const UChar *s, *se, *t, *p, *end; const UChar *tail; @@ -4406,7 +4406,7 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, # ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", - (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); + (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); # endif tail = target_end - 1; @@ -4422,13 +4422,13 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, p = se = s + tlen1; t = tail; while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; + if (t == target) return (UChar* )s; + p--; t--; } skip = reg->map[*se]; t = s; do { - s += enclen(reg->enc, s, end); + s += enclen(reg->enc, s, end); } while ((s - t) < skip && s < end); } } @@ -4438,13 +4438,13 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, p = se = s + tlen1; t = tail; while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; + if (t == target) return (UChar* )s; + p--; t--; } skip = reg->int_map[*se]; t = s; do { - s += enclen(reg->enc, s, end); + s += enclen(reg->enc, s, end); } while ((s - t) < skip && s < end); } # endif @@ -4456,14 +4456,14 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, /* Boyer-Moore-Horspool search */ static UChar* bm_search(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, const UChar* text_range) + const UChar* text, const UChar* text_end, const UChar* text_range) { const UChar *s, *t, *p, *end; const UChar *tail; # ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", - (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); + (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); # endif end = text_range + (target_end - target) - 1; @@ -4478,11 +4478,11 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, t = tail; # ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n", - (intptr_t )(s - text), s); + (intptr_t )(s - text), s); # endif while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; + if (t == target) return (UChar* )p; + p--; t--; } s += reg->map[*s]; } @@ -4493,8 +4493,8 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, p = s; t = tail; while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; + if (t == target) return (UChar* )p; + p--; t--; } s += reg->int_map[*s]; } @@ -4506,8 +4506,8 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, /* Boyer-Moore-Horspool search applied to a multibyte string (ignore case) */ static UChar* bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, - const UChar* text_range) + const UChar* text, const UChar* text_end, + const UChar* text_range) { const UChar *s, *se, *t, *end; const UChar *tail; @@ -4517,7 +4517,7 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, # ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n", - (int )text, text, (int )text_end, text_end, (int )text_range, text_range); + (int )text, text, (int )text_end, text_end, (int )text_range, text_range); # endif tail = target_end - 1; @@ -4532,12 +4532,12 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, while (s < end) { se = s + tlen1; if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, se + 1)) - return (UChar* )s; + s, se + 1)) + return (UChar* )s; skip = reg->map[*se]; t = s; do { - s += enclen(reg->enc, s, end); + s += enclen(reg->enc, s, end); } while ((s - t) < skip && s < end); } } @@ -4546,12 +4546,12 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, while (s < end) { se = s + tlen1; if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, se + 1)) - return (UChar* )s; + s, se + 1)) + return (UChar* )s; skip = reg->int_map[*se]; t = s; do { - s += enclen(reg->enc, s, end); + s += enclen(reg->enc, s, end); } while ((s - t) < skip && s < end); } # endif @@ -4563,7 +4563,7 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, /* Boyer-Moore-Horspool search (ignore case) */ static UChar* bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, const UChar* text_range) + const UChar* text, const UChar* text_end, const UChar* text_range) { const UChar *s, *p, *end; const UChar *tail; @@ -4572,7 +4572,7 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, # ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n", - (int )text, text, (int )text_end, text_end, (int )text_range, text_range); + (int )text, text, (int )text_end, text_end, (int )text_range, text_range); # endif end = text_range + (target_end - target) - 1; @@ -4585,8 +4585,8 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, while (s < end) { p = s - (target_end - target) + 1; if (str_lower_case_match(enc, case_fold_flag, target, target_end, - p, s + 1)) - return (UChar* )p; + p, s + 1)) + return (UChar* )p; s += reg->map[*s]; } } @@ -4595,8 +4595,8 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, while (s < end) { p = s - (target_end - target) + 1; if (str_lower_case_match(enc, case_fold_flag, target, target_end, - p, s + 1)) - return (UChar* )p; + p, s + 1)) + return (UChar* )p; s += reg->int_map[*s]; } # endif @@ -4609,8 +4609,8 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, /* Sunday's quick search applied to a multibyte string */ static UChar* bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, - const UChar* text_range) + const UChar* text, const UChar* text_end, + const UChar* text_range) { const UChar *s, *se, *t, *p, *end; const UChar *tail; @@ -4619,7 +4619,7 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, # ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", - (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); + (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); # endif tail = target_end - 1; @@ -4635,14 +4635,14 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, p = se = s + tlen1; t = tail; while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; + if (t == target) return (UChar* )s; + p--; t--; } if (s + 1 >= end) break; skip = reg->map[se[1]]; t = s; do { - s += enclen(enc, s, end); + s += enclen(enc, s, end); } while ((s - t) < skip && s < end); } } @@ -4652,14 +4652,14 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, p = se = s + tlen1; t = tail; while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; + if (t == target) return (UChar* )s; + p--; t--; } if (s + 1 >= end) break; skip = reg->int_map[se[1]]; t = s; do { - s += enclen(enc, s, end); + s += enclen(enc, s, end); } while ((s - t) < skip && s < end); } # endif @@ -4671,7 +4671,7 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, /* Sunday's quick search */ static UChar* bm_search(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, const UChar* text_range) + const UChar* text, const UChar* text_end, const UChar* text_range) { const UChar *s, *t, *p, *end; const UChar *tail; @@ -4679,7 +4679,7 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, # ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", - (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); + (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); # endif tail = target_end - 1; @@ -4694,8 +4694,8 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, p = s; t = tail; while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; + if (t == target) return (UChar* )p; + p--; t--; } if (s + 1 >= end) break; s += reg->map[s[1]]; @@ -4707,8 +4707,8 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, p = s; t = tail; while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; + if (t == target) return (UChar* )p; + p--; t--; } if (s + 1 >= end) break; s += reg->int_map[s[1]]; @@ -4721,8 +4721,8 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, /* Sunday's quick search applied to a multibyte string (ignore case) */ static UChar* bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, - const UChar* text_range) + const UChar* text, const UChar* text_end, + const UChar* text_range) { const UChar *s, *se, *t, *end; const UChar *tail; @@ -4732,7 +4732,7 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, # ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "bm_search_notrev_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", - (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); + (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); # endif tail = target_end - 1; @@ -4747,13 +4747,13 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, while (s < end) { se = s + tlen1; if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, se + 1)) - return (UChar* )s; + s, se + 1)) + return (UChar* )s; if (s + 1 >= end) break; skip = reg->map[se[1]]; t = s; do { - s += enclen(enc, s, end); + s += enclen(enc, s, end); } while ((s - t) < skip && s < end); } } @@ -4762,13 +4762,13 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, while (s < end) { se = s + tlen1; if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, se + 1)) - return (UChar* )s; + s, se + 1)) + return (UChar* )s; if (s + 1 >= end) break; skip = reg->int_map[se[1]]; t = s; do { - s += enclen(enc, s, end); + s += enclen(enc, s, end); } while ((s - t) < skip && s < end); } # endif @@ -4780,7 +4780,7 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, /* Sunday's quick search (ignore case) */ static UChar* bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, const UChar* text_range) + const UChar* text, const UChar* text_end, const UChar* text_range) { const UChar *s, *p, *end; const UChar *tail; @@ -4790,7 +4790,7 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, # ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "bm_search_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", - (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); + (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); # endif tail = target_end - 1; @@ -4804,8 +4804,8 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, while (s < end) { p = s - tlen1; if (str_lower_case_match(enc, case_fold_flag, target, target_end, - p, s + 1)) - return (UChar* )p; + p, s + 1)) + return (UChar* )p; if (s + 1 >= end) break; s += reg->map[s[1]]; } @@ -4815,8 +4815,8 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, while (s < end) { p = s - tlen1; if (str_lower_case_match(enc, case_fold_flag, target, target_end, - p, s + 1)) - return (UChar* )p; + p, s + 1)) + return (UChar* )p; if (s + 1 >= end) break; s += reg->int_map[s[1]]; } @@ -4829,7 +4829,7 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, #ifdef USE_INT_MAP_BACKWARD static int set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, - int** skip) + int** skip) { int i, len; @@ -4850,8 +4850,8 @@ set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, static UChar* bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) { const UChar *s, *t, *p; @@ -4880,7 +4880,7 @@ bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, static UChar* map_search(OnigEncoding enc, UChar map[], - const UChar* text, const UChar* text_range, const UChar* text_end) + const UChar* text, const UChar* text_range, const UChar* text_end) { const UChar *s = text; @@ -4894,8 +4894,8 @@ map_search(OnigEncoding enc, UChar map[], static UChar* map_search_backward(OnigEncoding enc, UChar map[], - const UChar* text, const UChar* adjust_text, - const UChar* text_start, const UChar* text_end) + const UChar* text, const UChar* adjust_text, + const UChar* text_start, const UChar* text_end) { const UChar *s = text_start; @@ -4909,7 +4909,7 @@ map_search_backward(OnigEncoding enc, UChar map[], extern OnigPosition onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, - OnigOptionType option) + OnigOptionType option) { ptrdiff_t r; UChar *prev; @@ -4933,9 +4933,9 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end); r = match_at(reg, str, end, #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - end, + end, #endif - at, prev, &msa); + at, prev, &msa); } MATCH_ARG_FREE(msa); @@ -4944,14 +4944,14 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On static int forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, - UChar* range, UChar** low, UChar** high, UChar** low_prev) + UChar* range, UChar** low, UChar** high, UChar** low_prev) { UChar *p, *pprev = (UChar* )NULL; size_t input_len = end - str; #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n", - (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range); + (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range); #endif if (reg->dmin > input_len) { @@ -4978,7 +4978,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, break; case ONIG_OPTIMIZE_EXACT_IC: p = slow_search_ic(reg->enc, reg->case_fold_flag, - reg->exact, reg->exact_end, p, end, range); + reg->exact, reg->exact_end, p, end, range); break; case ONIG_OPTIMIZE_EXACT_BM: @@ -5015,62 +5015,62 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, switch (reg->sub_anchor) { case ANCHOR_BEGIN_LINE: - if (!ON_STR_BEGIN(p)) { - prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), p, end); - if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) - goto retry_gate; - } - break; + if (!ON_STR_BEGIN(p)) { + prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p, end); + if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) + goto retry_gate; + } + break; case ANCHOR_END_LINE: - if (ON_STR_END(p)) { + if (ON_STR_END(p)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - prev = (UChar* )onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), p); - if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) - goto retry_gate; + prev = (UChar* )onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p); + if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) + goto retry_gate; #endif - } - else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) - goto retry_gate; - break; + } + else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) + goto retry_gate; + break; } } if (reg->dmax == 0) { *low = p; if (low_prev) { - if (*low > s) - *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end); - else - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), p, end); + if (*low > s) + *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end); + else + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p, end); } *high = p; } else { if (reg->dmax != ONIG_INFINITE_DISTANCE) { - if ((OnigDistance)(p - str) < reg->dmax) { - *low = (UChar* )str; - if (low_prev) - *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end); - } - else { - *low = p - reg->dmax; - if (*low > s) { - *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, - *low, end, (const UChar** )low_prev); - if (low_prev && IS_NULL(*low_prev)) - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : s), *low, end); - } - else { - if (low_prev) - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), *low, end); - } - } + if ((OnigDistance)(p - str) < reg->dmax) { + *low = (UChar* )str; + if (low_prev) + *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end); + } + else { + *low = p - reg->dmax; + if (*low > s) { + *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, + *low, end, (const UChar** )low_prev); + if (low_prev && IS_NULL(*low_prev)) + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : s), *low, end); + } + else { + if (low_prev) + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), *low, end); + } + } } /* no needs to adjust *high, *high is used as range check only */ if ((OnigDistance)(p - str) < reg->dmin) @@ -5082,7 +5082,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "forward_search_range success: low: %"PRIdPTR", high: %"PRIdPTR", dmin: %"PRIdPTR", dmax: %"PRIdPTR"\n", - *low - str, *high - str, reg->dmin, reg->dmax); + *low - str, *high - str, reg->dmin, reg->dmax); #endif return 1; /* success */ } @@ -5094,8 +5094,8 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, static int backward_search_range(regex_t* reg, const UChar* str, const UChar* end, - UChar* s, const UChar* range, UChar* adjrange, - UChar** low, UChar** high) + UChar* s, const UChar* range, UChar* adjrange, + UChar** low, UChar** high) { UChar *p; size_t input_len = end - str; @@ -5111,15 +5111,15 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, case ONIG_OPTIMIZE_EXACT: exact_method: p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, - range, adjrange, end, p); + range, adjrange, end, p); break; case ONIG_OPTIMIZE_EXACT_IC: case ONIG_OPTIMIZE_EXACT_BM_IC: case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC: p = slow_search_backward_ic(reg->enc, reg->case_fold_flag, - reg->exact, reg->exact_end, - range, adjrange, end, p); + reg->exact, reg->exact_end, + range, adjrange, end, p); break; case ONIG_OPTIMIZE_EXACT_BM: @@ -5128,14 +5128,14 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, if (IS_NULL(reg->int_map_backward)) { int r; if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) - goto exact_method; + goto exact_method; r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, - &(reg->int_map_backward)); + &(reg->int_map_backward)); if (r) return r; } p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, - end, p); + end, p); #else goto exact_method; #endif @@ -5152,49 +5152,49 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, switch (reg->sub_anchor) { case ANCHOR_BEGIN_LINE: - if (!ON_STR_BEGIN(p)) { - prev = onigenc_get_prev_char_head(reg->enc, str, p, end); - if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) { - p = prev; - goto retry; - } - } - break; + if (!ON_STR_BEGIN(p)) { + prev = onigenc_get_prev_char_head(reg->enc, str, p, end); + if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) { + p = prev; + goto retry; + } + } + break; case ANCHOR_END_LINE: - if (ON_STR_END(p)) { + if (ON_STR_END(p)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); - if (IS_NULL(prev)) goto fail; - if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) { - p = prev; - goto retry; - } + prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); + if (IS_NULL(prev)) goto fail; + if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) { + p = prev; + goto retry; + } #endif - } - else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) { - p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end); - if (IS_NULL(p)) goto fail; - goto retry; - } - break; + } + else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) { + p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end); + if (IS_NULL(p)) goto fail; + goto retry; + } + break; } } if (reg->dmax != ONIG_INFINITE_DISTANCE) { if ((OnigDistance)(p - str) < reg->dmax) - *low = (UChar* )str; + *low = (UChar* )str; else - *low = p - reg->dmax; + *low = p - reg->dmax; if (reg->dmin != 0) { - if ((OnigDistance)(p - str) < reg->dmin) - *high = (UChar* )str; - else - *high = p - reg->dmin; + if ((OnigDistance)(p - str) < reg->dmin) + *high = (UChar* )str; + else + *high = p - reg->dmin; } else { - *high = p; + *high = p; } *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end); @@ -5202,7 +5202,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "backward_search_range: low: %d, high: %d\n", - (int )(*low - str), (int )(*high - str)); + (int )(*low - str), (int )(*high - str)); #endif return 1; /* success */ } @@ -5217,15 +5217,15 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, extern OnigPosition onig_search(regex_t* reg, const UChar* str, const UChar* end, - const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) + const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) { return onig_search_gpos(reg, str, end, start, start, range, region, option); } extern OnigPosition onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, - const UChar* global_pos, - const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) + const UChar* global_pos, + const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) { ptrdiff_t r; UChar *s, *prev; @@ -5325,30 +5325,30 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, begin_position: if (range > start) { - if (global_pos > start) - { - if (global_pos < range) - range = global_pos + 1; - } - else - range = start + 1; + if (global_pos > start) + { + if (global_pos < range) + range = global_pos + 1; + } + else + range = start + 1; } else - range = start; + range = start; } else if (reg->anchor & ANCHOR_BEGIN_BUF) { /* search str-position only */ if (range > start) { - if (start != str) goto mismatch_no_msa; - range = str + 1; + if (start != str) goto mismatch_no_msa; + range = str + 1; } else { - if (range <= str) { - start = str; - range = str; - } - else - goto mismatch_no_msa; + if (range <= str) { + start = str; + range = str; + } + else + goto mismatch_no_msa; } } else if (reg->anchor & ANCHOR_END_BUF) { @@ -5356,38 +5356,38 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, end_buf: if ((OnigDistance)(max_semi_end - str) < reg->anchor_dmin) - goto mismatch_no_msa; + goto mismatch_no_msa; if (range > start) { - if ((OnigDistance)(min_semi_end - start) > reg->anchor_dmax) { - start = min_semi_end - reg->anchor_dmax; - if (start < end) - start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end); - } - if ((OnigDistance)(max_semi_end - (range - 1)) < reg->anchor_dmin) { - if ((OnigDistance)(max_semi_end - str + 1) < reg->anchor_dmin) - goto mismatch_no_msa; - else - range = max_semi_end - reg->anchor_dmin + 1; - } - - if (start > range) goto mismatch_no_msa; - /* If start == range, match with empty at end. - Backward search is used. */ + if ((OnigDistance)(min_semi_end - start) > reg->anchor_dmax) { + start = min_semi_end - reg->anchor_dmax; + if (start < end) + start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end); + } + if ((OnigDistance)(max_semi_end - (range - 1)) < reg->anchor_dmin) { + if ((OnigDistance)(max_semi_end - str + 1) < reg->anchor_dmin) + goto mismatch_no_msa; + else + range = max_semi_end - reg->anchor_dmin + 1; + } + + if (start > range) goto mismatch_no_msa; + /* If start == range, match with empty at end. + Backward search is used. */ } else { - if ((OnigDistance)(min_semi_end - range) > reg->anchor_dmax) { - range = min_semi_end - reg->anchor_dmax; - } - if ((OnigDistance)(max_semi_end - start) < reg->anchor_dmin) { - if ((OnigDistance)(max_semi_end - str) < reg->anchor_dmin) - goto mismatch_no_msa; - else { - start = max_semi_end - reg->anchor_dmin; - start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end); - } - } - if (range > start) goto mismatch_no_msa; + if ((OnigDistance)(min_semi_end - range) > reg->anchor_dmax) { + range = min_semi_end - reg->anchor_dmax; + } + if ((OnigDistance)(max_semi_end - start) < reg->anchor_dmin) { + if ((OnigDistance)(max_semi_end - str) < reg->anchor_dmin) + goto mismatch_no_msa; + else { + start = max_semi_end - reg->anchor_dmin; + start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end); + } + } + if (range > start) goto mismatch_no_msa; } } else if (reg->anchor & ANCHOR_SEMI_END_BUF) { @@ -5395,23 +5395,23 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, max_semi_end = (UChar* )end; if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) { - min_semi_end = pre_end; + min_semi_end = pre_end; #ifdef USE_CRNL_AS_LINE_TERMINATOR - pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1); - if (IS_NOT_NULL(pre_end) && - IS_NEWLINE_CRLF(reg->options) && - ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) { - min_semi_end = pre_end; - } + pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1); + if (IS_NOT_NULL(pre_end) && + IS_NEWLINE_CRLF(reg->options) && + ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) { + min_semi_end = pre_end; + } #endif - if (min_semi_end > str && start <= min_semi_end) { - goto end_buf; - } + if (min_semi_end > str && start <= min_semi_end) { + goto end_buf; + } } else { - min_semi_end = (UChar* )end; - goto end_buf; + min_semi_end = (UChar* )end; + goto end_buf; } } else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) { @@ -5443,7 +5443,7 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n", - (int )(end - str), (int )(start - str), (int )(range - str)); + (int )(end - str), (int )(start - str), (int )(range - str)); #endif MATCH_ARG_INIT(msa, option, region, start, global_pos); @@ -5465,58 +5465,58 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, UChar *sch_range, *low, *high, *low_prev; if (reg->dmax != 0) { - if (reg->dmax == ONIG_INFINITE_DISTANCE) - sch_range = (UChar* )end; - else { - if ((OnigDistance)(end - range) < reg->dmax) - sch_range = (UChar* )end; - else { - sch_range = (UChar* )range + reg->dmax; - } - } + if (reg->dmax == ONIG_INFINITE_DISTANCE) + sch_range = (UChar* )end; + else { + if ((OnigDistance)(end - range) < reg->dmax) + sch_range = (UChar* )end; + else { + sch_range = (UChar* )range + reg->dmax; + } + } } else - sch_range = (UChar* )range; + sch_range = (UChar* )range; if ((end - start) < reg->threshold_len) - goto mismatch; + goto mismatch; if (reg->dmax != ONIG_INFINITE_DISTANCE) { - do { - if (! forward_search_range(reg, str, end, s, sch_range, - &low, &high, &low_prev)) goto mismatch; - if (s < low) { - s = low; - prev = low_prev; - } - while (s <= high) { - MATCH_AND_RETURN_CHECK(orig_range); - prev = s; - s += enclen(reg->enc, s, end); - } - } while (s < range); - goto mismatch; + do { + if (! forward_search_range(reg, str, end, s, sch_range, + &low, &high, &low_prev)) goto mismatch; + if (s < low) { + s = low; + prev = low_prev; + } + while (s <= high) { + MATCH_AND_RETURN_CHECK(orig_range); + prev = s; + s += enclen(reg->enc, s, end); + } + } while (s < range); + goto mismatch; } else { /* check only. */ - if (! forward_search_range(reg, str, end, s, sch_range, - &low, &high, (UChar** )NULL)) goto mismatch; - - if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { - do { - MATCH_AND_RETURN_CHECK(orig_range); - prev = s; - s += enclen(reg->enc, s, end); - - if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) { - while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0) - && s < range) { - prev = s; - s += enclen(reg->enc, s, end); - } - } - } while (s < range); - goto mismatch; - } + if (! forward_search_range(reg, str, end, s, sch_range, + &low, &high, (UChar** )NULL)) goto mismatch; + + if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { + do { + MATCH_AND_RETURN_CHECK(orig_range); + prev = s; + s += enclen(reg->enc, s, end); + + if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) { + while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0) + && s < range) { + prev = s; + s += enclen(reg->enc, s, end); + } + } + } while (s < range); + goto mismatch; + } } } @@ -5536,58 +5536,58 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, const UChar *min_range; if (range < end) - adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end); + adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end); else - adjrange = (UChar* )end; + adjrange = (UChar* )end; if ((OnigDistance)(end - range) > reg->dmin) - min_range = range + reg->dmin; + min_range = range + reg->dmin; else - min_range = end; + min_range = end; if (reg->dmax != ONIG_INFINITE_DISTANCE && - end - range >= reg->threshold_len) { - do { - if ((OnigDistance)(end - s) > reg->dmax) - sch_start = s + reg->dmax; - else - sch_start = (UChar* )end; - - if (backward_search_range(reg, str, end, sch_start, min_range, adjrange, - &low, &high) <= 0) - goto mismatch; - - if (s > high) - s = high; - - while (s >= low) { - prev = onigenc_get_prev_char_head(reg->enc, str, s, end); - MATCH_AND_RETURN_CHECK(orig_start); - s = prev; - } - } while (s >= range); - goto mismatch; + end - range >= reg->threshold_len) { + do { + if ((OnigDistance)(end - s) > reg->dmax) + sch_start = s + reg->dmax; + else + sch_start = (UChar* )end; + + if (backward_search_range(reg, str, end, sch_start, min_range, adjrange, + &low, &high) <= 0) + goto mismatch; + + if (s > high) + s = high; + + while (s >= low) { + prev = onigenc_get_prev_char_head(reg->enc, str, s, end); + MATCH_AND_RETURN_CHECK(orig_start); + s = prev; + } + } while (s >= range); + goto mismatch; } else { /* check only. */ - if (end - range < reg->threshold_len) goto mismatch; - - if (reg->dmax != 0) { - if (reg->dmax == ONIG_INFINITE_DISTANCE) - sch_start = (UChar* )end; - else { - if ((OnigDistance)(end - s) > reg->dmax) { - sch_start = s + reg->dmax; - sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, - start, sch_start, end); - } else - sch_start = (UChar* )end; - } - } - else - sch_start = (UChar* )s; - - if (backward_search_range(reg, str, end, sch_start, min_range, adjrange, - &low, &high) <= 0) goto mismatch; + if (end - range < reg->threshold_len) goto mismatch; + + if (reg->dmax != 0) { + if (reg->dmax == ONIG_INFINITE_DISTANCE) + sch_start = (UChar* )end; + else { + if ((OnigDistance)(end - s) > reg->dmax) { + sch_start = s + reg->dmax; + sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, + start, sch_start, end); + } else + sch_start = (UChar* )end; + } + } + else + sch_start = (UChar* )s; + + if (backward_search_range(reg, str, end, sch_start, min_range, adjrange, + &low, &high) <= 0) goto mismatch; } } @@ -5644,9 +5644,9 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, extern OnigPosition onig_scan(regex_t* reg, const UChar* str, const UChar* end, - OnigRegion* region, OnigOptionType option, - int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*), - void* callback_arg) + OnigRegion* region, OnigOptionType option, + int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*), + void* callback_arg) { OnigPosition r; OnigPosition n; @@ -5661,17 +5661,17 @@ onig_scan(regex_t* reg, const UChar* str, const UChar* end, rs = scan_callback(n, r, region, callback_arg); n++; if (rs != 0) - return rs; + return rs; if (region->end[0] == start - str) { - if (start >= end) break; - start += enclen(reg->enc, start, end); + if (start >= end) break; + start += enclen(reg->enc, start, end); } else - start = str + region->end[0]; + start = str + region->end[0]; if (start > end) - break; + break; } else if (r == ONIG_MISMATCH) { break; diff --git a/regparse.c b/regparse.c index 7b2bc7eea86f2f..418bd3814076d9 100644 --- a/regparse.c +++ b/regparse.c @@ -329,7 +329,7 @@ strdup_with_null(OnigEncoding enc, const UChar* s, const UChar* end) static UChar* strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end, - size_t capa) + size_t capa) { UChar* r; @@ -346,7 +346,7 @@ strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end /* dest on static area */ static UChar* strcat_capa_from_static(UChar* dest, UChar* dest_end, - const UChar* src, const UChar* src_end, size_t capa) + const UChar* src, const UChar* src_end, size_t capa) { UChar* r; @@ -426,7 +426,7 @@ onig_st_init_strend_table_with_size(st_index_t size) extern int onig_st_lookup_strend(hash_table_type* table, const UChar* str_key, - const UChar* end_key, hash_data_type *value) + const UChar* end_key, hash_data_type *value) { st_str_end_key key; @@ -438,7 +438,7 @@ onig_st_lookup_strend(hash_table_type* table, const UChar* str_key, extern int onig_st_insert_strend(hash_table_type* table, const UChar* str_key, - const UChar* end_key, hash_data_type value) + const UChar* end_key, hash_data_type value) { st_str_end_key* key; int result; @@ -629,10 +629,10 @@ i_names(HashDataType key_ ARG_UNUSED, HashDataType e_, HashDataType arg_) NameEntry* e = (NameEntry *)e_; INamesArg* arg = (INamesArg *)arg_; int r = (*(arg->func))(e->name, - e->name + e->name_len, - e->back_num, - (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), - arg->reg, arg->arg); + e->name + e->name_len, + e->back_num, + (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), + arg->reg, arg->arg); if (r != 0) { arg->ret = r; return ST_STOP; @@ -724,16 +724,16 @@ onig_print_names(FILE* fp, regex_t* reg) e = &(t->e[i]); fprintf(fp, "%s: ", e->name); if (e->back_num == 0) { - fputs("-", fp); + fputs("-", fp); } else if (e->back_num == 1) { - fprintf(fp, "%d", e->back_ref1); + fprintf(fp, "%d", e->back_ref1); } else { - for (j = 0; j < e->back_num; j++) { - if (j > 0) fprintf(fp, ", "); - fprintf(fp, "%d", e->back_refs[j]); - } + for (j = 0; j < e->back_num; j++) { + if (j > 0) fprintf(fp, ", "); + fprintf(fp, "%d", e->back_refs[j]); + } } fputs("\n", fp); } @@ -754,13 +754,13 @@ names_clear(regex_t* reg) for (i = 0; i < t->num; i++) { e = &(t->e[i]); if (IS_NOT_NULL(e->name)) { - xfree(e->name); - e->name = NULL; - e->name_len = 0; - e->back_num = 0; - e->back_alloc = 0; - xfree(e->back_refs); - e->back_refs = (int* )NULL; + xfree(e->name); + e->name = NULL; + e->name_len = 0; + e->back_num = 0; + e->back_alloc = 0; + xfree(e->back_refs); + e->back_refs = (int* )NULL; } } @@ -840,7 +840,7 @@ name_find(regex_t* reg, const UChar* name, const UChar* name_end) for (i = 0; i < t->num; i++) { e = &(t->e[i]); if (len == e->name_len && onig_strncmp(name, e->name, len) == 0) - return e; + return e; } } return (NameEntry* )NULL; @@ -858,8 +858,8 @@ onig_foreach_name(regex_t* reg, for (i = 0; i < t->num; i++) { e = &(t->e[i]); r = (*func)(e->name, e->name + e->name_len, e->back_num, - (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), - reg, arg); + (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), + reg, arg); if (r != 0) return r; } } @@ -948,8 +948,8 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc); if (IS_NULL(t->e)) { - xfree(t); - return ONIGERR_MEMORY; + xfree(t); + return ONIGERR_MEMORY; } t->alloc = alloc; reg->name_table = t; @@ -967,11 +967,11 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) clear: for (i = t->num; i < t->alloc; i++) { - t->e[i].name = NULL; - t->e[i].name_len = 0; - t->e[i].back_num = 0; - t->e[i].back_alloc = 0; - t->e[i].back_refs = (int* )NULL; + t->e[i].name = NULL; + t->e[i].name_len = 0; + t->e[i].back_num = 0; + t->e[i].back_alloc = 0; + t->e[i].back_refs = (int* )NULL; } } e = &(t->e[t->num]); @@ -985,7 +985,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) if (e->back_num >= 1 && ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) { onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME, - name, name_end); + name, name_end); return ONIGERR_MULTIPLEX_DEFINED_NAME; } @@ -1004,12 +1004,12 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) } else { if (e->back_num > e->back_alloc) { - int* p; - alloc = e->back_alloc * 2; - p = (int* )xrealloc(e->back_refs, sizeof(int) * alloc); - CHECK_NULL_RETURN_MEMERR(p); - e->back_refs = p; - e->back_alloc = alloc; + int* p; + alloc = e->back_alloc * 2; + p = (int* )xrealloc(e->back_refs, sizeof(int) * alloc); + CHECK_NULL_RETURN_MEMERR(p); + e->back_refs = p; + e->back_alloc = alloc; } e->back_refs[e->back_num - 1] = backref; } @@ -1020,7 +1020,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) extern int onig_name_to_group_numbers(regex_t* reg, const UChar* name, - const UChar* name_end, int** nums) + const UChar* name_end, int** nums) { NameEntry* e = name_find(reg, name, name_end); @@ -1042,7 +1042,7 @@ onig_name_to_group_numbers(regex_t* reg, const UChar* name, extern int onig_name_to_backref_number(regex_t* reg, const UChar* name, - const UChar* name_end, const OnigRegion *region) + const UChar* name_end, const OnigRegion *region) { int i, n, *nums; @@ -1056,8 +1056,8 @@ onig_name_to_backref_number(regex_t* reg, const UChar* name, else { if (IS_NOT_NULL(region)) { for (i = n - 1; i >= 0; i--) { - if (region->beg[nums[i]] != ONIG_REGION_NOTPOS) - return nums[i]; + if (region->beg[nums[i]] != ONIG_REGION_NOTPOS) + return nums[i]; } } return nums[n - 1]; @@ -1068,14 +1068,14 @@ onig_name_to_backref_number(regex_t* reg, const UChar* name, extern int onig_name_to_group_numbers(regex_t* reg, const UChar* name, - const UChar* name_end, int** nums) + const UChar* name_end, int** nums) { return ONIG_NO_SUPPORT_CONFIG; } extern int onig_name_to_backref_number(regex_t* reg, const UChar* name, - const UChar* name_end, const OnigRegion* region) + const UChar* name_end, const OnigRegion* region) { return ONIG_NO_SUPPORT_CONFIG; } @@ -1164,20 +1164,20 @@ scan_env_add_mem_entry(ScanEnv* env) if (need >= SCANENV_MEMNODES_SIZE) { if (env->mem_alloc <= need) { if (IS_NULL(env->mem_nodes_dynamic)) { - alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE; - p = (Node** )xmalloc(sizeof(Node*) * alloc); - CHECK_NULL_RETURN_MEMERR(p); - xmemcpy(p, env->mem_nodes_static, - sizeof(Node*) * SCANENV_MEMNODES_SIZE); + alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE; + p = (Node** )xmalloc(sizeof(Node*) * alloc); + CHECK_NULL_RETURN_MEMERR(p); + xmemcpy(p, env->mem_nodes_static, + sizeof(Node*) * SCANENV_MEMNODES_SIZE); } else { - alloc = env->mem_alloc * 2; - p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc); - CHECK_NULL_RETURN_MEMERR(p); + alloc = env->mem_alloc * 2; + p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc); + CHECK_NULL_RETURN_MEMERR(p); } for (i = env->num_mem + 1; i < alloc; i++) - p[i] = NULL_NODE; + p[i] = NULL_NODE; env->mem_nodes_dynamic = p; env->mem_alloc = alloc; @@ -1208,7 +1208,7 @@ onig_node_free(Node* node) switch (NTYPE(node)) { case NT_STR: if (NSTR(node)->capa != 0 && - IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { + IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { xfree(NSTR(node)->s); } break; @@ -1371,9 +1371,9 @@ onig_node_new_anchor(int type) static Node* node_new_backref(int back_num, int* backrefs, int by_name, #ifdef USE_BACKREF_WITH_LEVEL - int exist_level, int nest_level, + int exist_level, int nest_level, #endif - ScanEnv* env) + ScanEnv* env) { int i; Node* node = node_new(); @@ -1396,7 +1396,7 @@ node_new_backref(int back_num, int* backrefs, int by_name, for (i = 0; i < back_num; i++) { if (backrefs[i] <= env->num_mem && - IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) { + IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) { NBREF(node)->state |= NST_RECURSION; /* /...(\1).../ */ break; } @@ -1521,18 +1521,18 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end) ptrdiff_t capa = len + addlen + NODE_STR_MARGIN; if (capa <= NSTR(node)->capa) { - onig_strcpy(NSTR(node)->s + len, s, end); + onig_strcpy(NSTR(node)->s + len, s, end); } else { - if (NSTR(node)->s == NSTR(node)->buf) - p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end, - s, end, capa); - else - p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa); + if (NSTR(node)->s == NSTR(node)->buf) + p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end, + s, end, capa); + else + p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa); - CHECK_NULL_RETURN_MEMERR(p); - NSTR(node)->s = p; - NSTR(node)->capa = (int )capa; + CHECK_NULL_RETURN_MEMERR(p); + NSTR(node)->s = p; + NSTR(node)->capa = (int )capa; } } else { @@ -1654,7 +1654,7 @@ str_node_split_last_char(StrNode* sn, OnigEncoding enc) if (p && p > sn->s) { /* can be split. */ n = node_new_str(p, sn->end); if (IS_NOT_NULL(n) && (sn->flag & NSTR_RAW) != 0) - NSTRING_SET_RAW(n); + NSTRING_SET_RAW(n); sn->end = (UChar* )p; } } @@ -1702,7 +1702,7 @@ onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc) if (ONIGENC_IS_CODE_DIGIT(enc, c)) { val = (unsigned int )DIGITVAL(c); if ((INT_MAX_LIMIT - val) / 10UL < num) - return -1; /* overflow */ + return -1; /* overflow */ num = num * 10 + val; } @@ -1717,7 +1717,7 @@ onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc) static int scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen, - int maxlen, OnigEncoding enc) + int maxlen, OnigEncoding enc) { OnigCodePoint c; unsigned int num, val; @@ -1732,7 +1732,7 @@ scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen, if (ONIGENC_IS_CODE_XDIGIT(enc, c)) { val = (unsigned int )XDIGITVAL(enc,c); if ((INT_MAX_LIMIT - val) / 16UL < num) - return -1; /* overflow */ + return -1; /* overflow */ num = (num << 4) + XDIGITVAL(enc,c); } @@ -1750,7 +1750,7 @@ scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen, static int scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen, - OnigEncoding enc) + OnigEncoding enc) { OnigCodePoint c; unsigned int num, val; @@ -1763,7 +1763,7 @@ scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen, if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') { val = ODIGITVAL(c); if ((INT_MAX_LIMIT - val) / 8UL < num) - return -1; /* overflow */ + return -1; /* overflow */ num = (num << 3) + val; } @@ -1804,7 +1804,7 @@ new_code_range(BBuf** pbuf) static int add_code_range_to_buf0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to, - int checkdup) + int checkdup) { int r, inc_n, pos; OnigCodePoint low, high, bound, x; @@ -1855,7 +1855,7 @@ add_code_range_to_buf0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePo if (inc_n != 1) { if (checkdup && from <= data[low*2+1] - && (data[low*2] <= from || data[low*2+1] <= to)) + && (data[low*2] <= from || data[low*2+1] <= to)) CC_DUP_WARN(env, from, to); if (from > data[low*2]) from = data[low*2]; @@ -1869,8 +1869,8 @@ add_code_range_to_buf0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePo if (inc_n > 0) { if (high < n) { - int size = (n - high) * 2 * SIZE_CODE_POINT; - BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size); + int size = (n - high) * 2 * SIZE_CODE_POINT; + BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size); } } else { @@ -1980,10 +1980,10 @@ or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1, } else { if (not2 == 0) { - return bbuf_clone(pbuf, bbuf2); + return bbuf_clone(pbuf, bbuf2); } else { - return not_code_range_buf(enc, bbuf2, pbuf, env); + return not_code_range_buf(enc, bbuf2, pbuf, env); } } } @@ -2014,7 +2014,7 @@ or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1, static int and_code_range1(BBuf** pbuf, ScanEnv* env, OnigCodePoint from1, OnigCodePoint to1, - OnigCodePoint* data, int n) + OnigCodePoint* data, int n) { int i, r; OnigCodePoint from2, to2; @@ -2025,19 +2025,19 @@ and_code_range1(BBuf** pbuf, ScanEnv* env, OnigCodePoint from1, OnigCodePoint to if (from2 < from1) { if (to2 < from1) continue; else { - from1 = to2 + 1; + from1 = to2 + 1; } } else if (from2 <= to1) { if (to2 < to1) { - if (from1 <= from2 - 1) { - r = add_code_range_to_buf(pbuf, env, from1, from2-1); - if (r != 0) return r; - } - from1 = to2 + 1; + if (from1 <= from2 - 1) { + r = add_code_range_to_buf(pbuf, env, from1, from2-1); + if (r != 0) return r; + } + from1 = to2 + 1; } else { - to1 = from2 - 1; + to1 = from2 - 1; } } else { @@ -2086,14 +2086,14 @@ and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf, Sc from1 = data1[i*2]; to1 = data1[i*2+1]; for (j = 0; j < n2; j++) { - from2 = data2[j*2]; - to2 = data2[j*2+1]; - if (from2 > to1) break; - if (to2 < from1) continue; - from = MAX(from1, from2); - to = MIN(to1, to2); - r = add_code_range_to_buf(pbuf, env, from, to); - if (r != 0) return r; + from2 = data2[j*2]; + to2 = data2[j*2+1]; + if (from2 > to1) break; + if (to2 < from1) continue; + from = MAX(from1, from2); + to = MIN(to1, to2); + r = add_code_range_to_buf(pbuf, env, from, to); + if (r != 0) return r; } } } @@ -2149,10 +2149,10 @@ and_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env) else { r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf, env); if (r == 0 && not1 != 0) { - BBuf *tbuf = 0; - r = not_code_range_buf(enc, pbuf, &tbuf, env); - bbuf_free(pbuf); - pbuf = tbuf; + BBuf *tbuf = 0; + r = not_code_range_buf(enc, pbuf, &tbuf, env); + bbuf_free(pbuf); + pbuf = tbuf; } } if (r != 0) { @@ -2207,10 +2207,10 @@ or_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env) else { r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf, env); if (r == 0 && not1 != 0) { - BBuf *tbuf = 0; - r = not_code_range_buf(enc, pbuf, &tbuf, env); - bbuf_free(pbuf); - pbuf = tbuf; + BBuf *tbuf = 0; + r = not_code_range_buf(enc, pbuf, &tbuf, env); + bbuf_free(pbuf); + pbuf = tbuf; } } if (r != 0) { @@ -2242,12 +2242,12 @@ conv_backslash_value(OnigCodePoint c, ScanEnv* env) case 'e': return '\033'; case 'v': if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB)) - return '\v'; + return '\v'; break; default: if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')) - UNKNOWN_ESC_WARN(env, c); + UNKNOWN_ESC_WARN(env, c); break; } } @@ -2506,7 +2506,7 @@ fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env) if (p == prev) { if (non_low != 0) - goto invalid; + goto invalid; up = REPEAT_INFINITE; /* {n,} : {n,infinite} */ } } @@ -2566,8 +2566,8 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val) if (PEND) return ONIGERR_END_PATTERN_AT_META; PFETCH_S(c); if (c == MC_ESC(env->syntax)) { - v = fetch_escaped_value(&p, end, env, &c); - if (v < 0) return v; + v = fetch_escaped_value(&p, end, env, &c); + if (v < 0) return v; } c = ((c & 0xff) | 0x80); } @@ -2591,14 +2591,14 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val) if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; PFETCH_S(c); if (c == '?') { - c = 0177; + c = 0177; } else { - if (c == MC_ESC(env->syntax)) { - v = fetch_escaped_value(&p, end, env, &c); - if (v < 0) return v; - } - c &= 0x9f; + if (c == MC_ESC(env->syntax)) { + v = fetch_escaped_value(&p, end, env, &c); + if (v < 0) return v; + } + c &= 0x9f; } break; } @@ -2649,8 +2649,8 @@ get_name_end_code_point(OnigCodePoint start) */ static int fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, - UChar** rname_end, ScanEnv* env, - int* rback_num, int* rlevel) + UChar** rname_end, ScanEnv* env, + int* rback_num, int* rlevel) { int r, sign, is_num, exist_level; OnigCodePoint end_code; @@ -2701,11 +2701,11 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, if (is_num != 0) { if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - is_num = 1; + is_num = 1; } else { - r = ONIGERR_INVALID_GROUP_NAME; - is_num = 0; + r = ONIGERR_INVALID_GROUP_NAME; + is_num = 0; } } else if (!ONIGENC_IS_CODE_NAME(enc, c)) { @@ -2719,8 +2719,8 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, int flag = (c == '-' ? -1 : 1); if (PEND) { - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - goto end; + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + goto end; } PFETCH(c); if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err; @@ -2731,9 +2731,9 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, exist_level = 1; if (!PEND) { - PFETCH(c); - if (c == end_code) - goto end; + PFETCH(c); + if (c == end_code) + goto end; } } @@ -2769,7 +2769,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, */ static int fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, - UChar** rname_end, ScanEnv* env, int* rback_num, int ref) + UChar** rname_end, ScanEnv* env, int* rback_num, int ref) { int r, is_num, sign; OnigCodePoint end_code; @@ -2798,21 +2798,21 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, if (ONIGENC_IS_CODE_DIGIT(enc, c)) { if (ref == 1) - is_num = 1; + is_num = 1; else { - r = ONIGERR_INVALID_GROUP_NAME; - is_num = 0; + r = ONIGERR_INVALID_GROUP_NAME; + is_num = 0; } } else if (c == '-') { if (ref == 1) { - is_num = 2; - sign = -1; - pnum_head = p; + is_num = 2; + sign = -1; + pnum_head = p; } else { - r = ONIGERR_INVALID_GROUP_NAME; - is_num = 0; + r = ONIGERR_INVALID_GROUP_NAME; + is_num = 0; } } else if (!ONIGENC_IS_CODE_NAME(enc, c)) { @@ -2825,30 +2825,30 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, name_end = p; PFETCH_S(c); if (c == end_code || c == ')') { - if (is_num == 2) { - r = ONIGERR_INVALID_GROUP_NAME; - goto teardown; - } - break; + if (is_num == 2) { + r = ONIGERR_INVALID_GROUP_NAME; + goto teardown; + } + break; } if (is_num != 0) { - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - is_num = 1; - } - else { - if (!ONIGENC_IS_CODE_WORD(enc, c)) - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - else - r = ONIGERR_INVALID_GROUP_NAME; - goto teardown; - } + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + is_num = 1; + } + else { + if (!ONIGENC_IS_CODE_WORD(enc, c)) + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + else + r = ONIGERR_INVALID_GROUP_NAME; + goto teardown; + } } else { - if (!ONIGENC_IS_CODE_NAME(enc, c)) { - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - goto teardown; - } + if (!ONIGENC_IS_CODE_NAME(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + goto teardown; + } } } @@ -2862,8 +2862,8 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; else if (*rback_num == 0) { - r = ONIGERR_INVALID_GROUP_NAME; - goto err; + r = ONIGERR_INVALID_GROUP_NAME; + goto err; } *rback_num *= sign; @@ -2879,7 +2879,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, name_end = p; PFETCH_S(c); if (c == end_code || c == ')') - break; + break; } if (PEND) name_end = end; @@ -2892,7 +2892,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, #else static int fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, - UChar** rname_end, ScanEnv* env, int* rback_num, int ref) + UChar** rname_end, ScanEnv* env, int* rback_num, int ref) { int r, is_num, sign; OnigCodePoint end_code; @@ -2980,8 +2980,8 @@ onig_syntax_warn(ScanEnv *env, const char *fmt, ...) UChar buf[WARN_BUFSIZE]; va_start(args, fmt); onig_vsnprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, - env->pattern, env->pattern_end, - fmt, args); + env->pattern, env->pattern_end, + fmt, args); va_end(args); #ifdef RUBY if (env->sourcefile == NULL) @@ -3043,7 +3043,7 @@ UNKNOWN_ESC_WARN(ScanEnv *env, int c) static UChar* find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to, - UChar **next, OnigEncoding enc) + UChar **next, OnigEncoding enc) { int i; OnigCodePoint x; @@ -3055,14 +3055,14 @@ find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to, q = p + enclen(enc, p, to); if (x == s[0]) { for (i = 1; i < n && q < to; i++) { - x = ONIGENC_MBC_TO_CODE(enc, q, to); - if (x != s[i]) break; - q += enclen(enc, q, to); + x = ONIGENC_MBC_TO_CODE(enc, q, to); + if (x != s[i]) break; + q += enclen(enc, q, to); } if (i >= n) { - if (IS_NOT_NULL(next)) - *next = q; - return p; + if (IS_NOT_NULL(next)) + *next = q; + return p; } } p = q; @@ -3072,7 +3072,7 @@ find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to, static int str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to, - OnigCodePoint bad, OnigEncoding enc, const OnigSyntaxType* syn) + OnigCodePoint bad, OnigEncoding enc, const OnigSyntaxType* syn) { int i, in_esc; OnigCodePoint x; @@ -3089,19 +3089,19 @@ str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to, x = ONIGENC_MBC_TO_CODE(enc, p, to); q = p + enclen(enc, p, to); if (x == s[0]) { - for (i = 1; i < n && q < to; i++) { - x = ONIGENC_MBC_TO_CODE(enc, q, to); - if (x != s[i]) break; - q += enclen(enc, q, to); - } - if (i >= n) return 1; - p += enclen(enc, p, to); + for (i = 1; i < n && q < to; i++) { + x = ONIGENC_MBC_TO_CODE(enc, q, to); + if (x != s[i]) break; + q += enclen(enc, q, to); + } + if (i >= n) return 1; + p += enclen(enc, p, to); } else { - x = ONIGENC_MBC_TO_CODE(enc, p, to); - if (x == bad) return 0; - else if (x == MC_ESC(syn)) in_esc = 1; - p = q; + x = ONIGENC_MBC_TO_CODE(enc, p, to); + if (x == bad) return 0; + else if (x == MC_ESC(syn)) in_esc = 1; + p = q; } } } @@ -3195,22 +3195,22 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) c2 = PPEEK; if (c2 == '{' && - IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { - PINC; - tok->type = TK_CHAR_PROPERTY; - tok->u.prop.not = (c == 'P' ? 1 : 0); - - if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { - PFETCH(c2); - if (c2 == '^') { - tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); - } - else - PUNFETCH; - } + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { + PINC; + tok->type = TK_CHAR_PROPERTY; + tok->u.prop.not = (c == 'P' ? 1 : 0); + + if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { + PFETCH(c2); + if (c2 == '^') { + tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); + } + else + PUNFETCH; + } } else { - onig_syntax_warn(env, "invalid Unicode Property \\%c", c); + onig_syntax_warn(env, "invalid Unicode Property \\%c", c); } break; @@ -3219,35 +3219,35 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) prev = p; if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { - PINC; - num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc); - if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; - if (!PEND) { - c2 = PPEEK; - if (ONIGENC_IS_CODE_XDIGIT(enc, c2)) - return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; - } - - if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) { - PINC; - tok->type = TK_CODE_POINT; - tok->base = 16; - tok->u.code = (OnigCodePoint )num; - } - else { - /* can't read nothing or invalid format */ - p = prev; - } + PINC; + num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc); + if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + if (!PEND) { + c2 = PPEEK; + if (ONIGENC_IS_CODE_XDIGIT(enc, c2)) + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + + if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) { + PINC; + tok->type = TK_CODE_POINT; + tok->base = 16; + tok->u.code = (OnigCodePoint )num; + } + else { + /* can't read nothing or invalid format */ + p = prev; + } } else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { - num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_RAW_BYTE; - tok->base = 16; - tok->u.c = num; + num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 16; + tok->u.c = num; } break; @@ -3256,15 +3256,15 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) prev = p; if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { - num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc); - if (num < -1) return ONIGERR_TOO_SHORT_DIGITS; - else if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_CODE_POINT; - tok->base = 16; - tok->u.code = (OnigCodePoint )num; + num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc); + if (num < -1) return ONIGERR_TOO_SHORT_DIGITS; + else if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_CODE_POINT; + tok->base = 16; + tok->u.code = (OnigCodePoint )num; } break; @@ -3273,41 +3273,41 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) prev = p; if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) { - PINC; - num = scan_unsigned_octal_number(&p, end, 11, enc); - if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; - if (!PEND) { - c2 = PPEEK; - if (ONIGENC_IS_CODE_DIGIT(enc, c2) && c2 < '8') - return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; - } - - if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) { - PINC; - tok->type = TK_CODE_POINT; - tok->base = 8; - tok->u.code = (OnigCodePoint )num; - } - else { - /* can't read nothing or invalid format */ - p = prev; - } + PINC; + num = scan_unsigned_octal_number(&p, end, 11, enc); + if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + if (!PEND) { + c2 = PPEEK; + if (ONIGENC_IS_CODE_DIGIT(enc, c2) && c2 < '8') + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + + if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) { + PINC; + tok->type = TK_CODE_POINT; + tok->base = 8; + tok->u.code = (OnigCodePoint )num; + } + else { + /* can't read nothing or invalid format */ + p = prev; + } } break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { - PUNFETCH; - prev = p; - num = scan_unsigned_octal_number(&p, end, 3, enc); - if (num < 0 || 0xff < num) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_RAW_BYTE; - tok->base = 8; - tok->u.c = num; + PUNFETCH; + prev = p; + num = scan_unsigned_octal_number(&p, end, 3, enc); + if (num < 0 || 0xff < num) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 8; + tok->u.c = num; } break; @@ -3316,8 +3316,8 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) num = fetch_escaped_value(&p, end, env, &c2); if (num < 0) return num; if ((OnigCodePoint )tok->u.c != c2) { - tok->u.code = (OnigCodePoint )c2; - tok->type = TK_CODE_POINT; + tok->u.code = (OnigCodePoint )c2; + tok->type = TK_CODE_POINT; } break; } @@ -3329,26 +3329,26 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) PINC; if (str_exist_check_with_esc(send, 2, p, end, (OnigCodePoint )']', enc, syn)) { - tok->type = TK_POSIX_BRACKET_OPEN; + tok->type = TK_POSIX_BRACKET_OPEN; } else { - PUNFETCH; - goto cc_in_cc; + PUNFETCH; + goto cc_in_cc; } } else { cc_in_cc: if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) { - tok->type = TK_CC_CC_OPEN; + tok->type = TK_CC_CC_OPEN; } else { - CC_ESC_WARN(env, (UChar* )"["); + CC_ESC_WARN(env, (UChar* )"["); } } } else if (c == '&') { if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) && - !PEND && (PPEEK_IS('&'))) { + !PEND && (PPEEK_IS('&'))) { PINC; tok->type = TK_CC_AND; } @@ -3362,7 +3362,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) #ifdef USE_NAMED_GROUP static int fetch_named_backref_token(OnigCodePoint c, OnigToken* tok, UChar** src, - UChar* end, ScanEnv* env) + UChar* end, ScanEnv* env) { int r, num; const OnigSyntaxType* syn = env->syntax; @@ -3377,7 +3377,7 @@ fetch_named_backref_token(OnigCodePoint c, OnigToken* tok, UChar** src, # ifdef USE_BACKREF_WITH_LEVEL name_end = NULL_UCHARP; /* no need. escape gcc warning. */ r = fetch_name_with_level(c, &p, end, &name_end, - env, &back_num, &tok->u.backref.level); + env, &back_num, &tok->u.backref.level); if (r == 1) tok->u.backref.exist_level = 1; else tok->u.backref.exist_level = 0; # else @@ -3389,13 +3389,13 @@ fetch_named_backref_token(OnigCodePoint c, OnigToken* tok, UChar** src, if (back_num < 0) { back_num = BACKREF_REL_TO_ABS(back_num, env); if (back_num <= 0) - return ONIGERR_INVALID_BACKREF; + return ONIGERR_INVALID_BACKREF; } if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { if (back_num > env->num_mem || - IS_NULL(SCANENV_MEM_NODES(env)[back_num])) - return ONIGERR_INVALID_BACKREF; + IS_NULL(SCANENV_MEM_NODES(env)[back_num])) + return ONIGERR_INVALID_BACKREF; } tok->type = TK_BACKREF; tok->u.backref.by_name = 0; @@ -3406,15 +3406,15 @@ fetch_named_backref_token(OnigCodePoint c, OnigToken* tok, UChar** src, num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs); if (num <= 0) { onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end); + ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end); return ONIGERR_UNDEFINED_NAME_REFERENCE; } if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { int i; for (i = 0; i < num; i++) { - if (backs[i] > env->num_mem || - IS_NULL(SCANENV_MEM_NODES(env)[backs[i]])) - return ONIGERR_INVALID_BACKREF; + if (backs[i] > env->num_mem || + IS_NULL(SCANENV_MEM_NODES(env)[backs[i]])) + return ONIGERR_INVALID_BACKREF; } } @@ -3488,26 +3488,26 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->u.repeat.upper = 1; greedy_check: if (!PEND && PPEEK_IS('?') && - IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) { - PFETCH(c); - tok->u.repeat.greedy = 0; - tok->u.repeat.possessive = 0; + IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) { + PFETCH(c); + tok->u.repeat.greedy = 0; + tok->u.repeat.possessive = 0; } else { possessive_check: - if (!PEND && PPEEK_IS('+') && - ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) && - tok->type != TK_INTERVAL) || - (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) && - tok->type == TK_INTERVAL))) { - PFETCH(c); - tok->u.repeat.greedy = 1; - tok->u.repeat.possessive = 1; - } - else { - tok->u.repeat.greedy = 1; - tok->u.repeat.possessive = 0; - } + if (!PEND && PPEEK_IS('+') && + ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) && + tok->type != TK_INTERVAL) || + (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) && + tok->type == TK_INTERVAL))) { + PFETCH(c); + tok->u.repeat.greedy = 1; + tok->u.repeat.possessive = 1; + } + else { + tok->u.repeat.greedy = 1; + tok->u.repeat.possessive = 0; + } } break; @@ -3517,10 +3517,10 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (r < 0) return r; /* error */ if (r == 0) goto greedy_check; else if (r == 2) { /* {n} */ - if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY)) - goto possessive_check; + if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY)) + goto possessive_check; - goto greedy_check; + goto greedy_check; } /* r == 1 : normal char */ break; @@ -3559,7 +3559,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->type = TK_ANCHOR; tok->u.anchor.subtype = ANCHOR_WORD_BOUND; tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option) - && ! IS_WORD_BOUND_ALL_RANGE(env->option); + && ! IS_WORD_BOUND_ALL_RANGE(env->option); break; case 'B': @@ -3567,7 +3567,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->type = TK_ANCHOR; tok->u.anchor.subtype = ANCHOR_NOT_WORD_BOUND; tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option) - && ! IS_WORD_BOUND_ALL_RANGE(env->option); + && ! IS_WORD_BOUND_ALL_RANGE(env->option); break; #ifdef USE_WORD_BEGIN_END @@ -3669,33 +3669,33 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) prev = p; if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { - PINC; - num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc); - if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; - if (!PEND) { - if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK)) - return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; - } - - if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) { - PINC; - tok->type = TK_CODE_POINT; - tok->u.code = (OnigCodePoint )num; - } - else { - /* can't read nothing or invalid format */ - p = prev; - } + PINC; + num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc); + if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + if (!PEND) { + if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK)) + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + + if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) { + PINC; + tok->type = TK_CODE_POINT; + tok->u.code = (OnigCodePoint )num; + } + else { + /* can't read nothing or invalid format */ + p = prev; + } } else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { - num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_RAW_BYTE; - tok->base = 16; - tok->u.c = num; + num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 16; + tok->u.c = num; } break; @@ -3704,15 +3704,15 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) prev = p; if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { - num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc); - if (num < -1) return ONIGERR_TOO_SHORT_DIGITS; - else if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_CODE_POINT; - tok->base = 16; - tok->u.code = (OnigCodePoint )num; + num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc); + if (num < -1) return ONIGERR_TOO_SHORT_DIGITS; + else if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_CODE_POINT; + tok->base = 16; + tok->u.code = (OnigCodePoint )num; } break; @@ -3721,24 +3721,24 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) prev = p; if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) { - PINC; - num = scan_unsigned_octal_number(&p, end, 11, enc); - if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; - if (!PEND) { - OnigCodePoint c = PPEEK; - if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') - return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; - } - - if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) { - PINC; - tok->type = TK_CODE_POINT; - tok->u.code = (OnigCodePoint )num; - } - else { - /* can't read nothing or invalid format */ - p = prev; - } + PINC; + num = scan_unsigned_octal_number(&p, end, 11, enc); + if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + if (!PEND) { + OnigCodePoint c = PPEEK; + if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + + if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) { + PINC; + tok->type = TK_CODE_POINT; + tok->u.code = (OnigCodePoint )num; + } + else { + /* can't read nothing or invalid format */ + p = prev; + } } break; @@ -3748,64 +3748,64 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) prev = p; num = onig_scan_unsigned_number(&p, end, enc); if (num < 0 || num > ONIG_MAX_BACKREF_NUM) { - goto skip_backref; + goto skip_backref; } if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && - (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */ - if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { - if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num])) - return ONIGERR_INVALID_BACKREF; - } - - tok->type = TK_BACKREF; - tok->u.backref.num = 1; - tok->u.backref.ref1 = num; - tok->u.backref.by_name = 0; + (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */ + if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { + if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num])) + return ONIGERR_INVALID_BACKREF; + } + + tok->type = TK_BACKREF; + tok->u.backref.num = 1; + tok->u.backref.ref1 = num; + tok->u.backref.by_name = 0; #ifdef USE_BACKREF_WITH_LEVEL - tok->u.backref.exist_level = 0; + tok->u.backref.exist_level = 0; #endif - break; + break; } skip_backref: if (c == '8' || c == '9') { - /* normal char */ - p = prev; PINC; - break; + /* normal char */ + p = prev; PINC; + break; } p = prev; /* fall through */ case '0': if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { - prev = p; - num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc); - if (num < 0 || 0xff < num) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_RAW_BYTE; - tok->base = 8; - tok->u.c = num; + prev = p; + num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc); + if (num < 0 || 0xff < num) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 8; + tok->u.c = num; } else if (c != '0') { - PINC; + PINC; } break; #ifdef USE_NAMED_GROUP case 'k': if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) { - PFETCH(c); - if (c == '<' || c == '\'') { - r = fetch_named_backref_token(c, tok, &p, end, env); - if (r < 0) return r; - } - else { - PUNFETCH; - onig_syntax_warn(env, "invalid back reference"); - } + PFETCH(c); + if (c == '<' || c == '\'') { + r = fetch_named_backref_token(c, tok, &p, end, env); + if (r < 0) return r; + } + else { + PUNFETCH; + onig_syntax_warn(env, "invalid back reference"); + } } break; #endif @@ -3814,52 +3814,52 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case 'g': # ifdef USE_NAMED_GROUP if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_BRACE_BACKREF)) { - PFETCH(c); - if (c == '{') { - r = fetch_named_backref_token(c, tok, &p, end, env); - if (r < 0) return r; - } - else - PUNFETCH; + PFETCH(c); + if (c == '{') { + r = fetch_named_backref_token(c, tok, &p, end, env); + if (r < 0) return r; + } + else + PUNFETCH; } # endif # ifdef USE_SUBEXP_CALL if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) { - PFETCH(c); - if (c == '<' || c == '\'') { - int gnum = -1, rel = 0; - UChar* name_end; - OnigCodePoint cnext; - - cnext = PPEEK; - if (cnext == '0') { - PINC; - if (PPEEK_IS(get_name_end_code_point(c))) { /* \g<0>, \g'0' */ - PINC; - name_end = p; - gnum = 0; - } - } - else if (cnext == '+') { - PINC; - rel = 1; - } - prev = p; - if (gnum < 0) { - r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1); - if (r < 0) return r; - } - - tok->type = TK_CALL; - tok->u.call.name = prev; - tok->u.call.name_end = name_end; - tok->u.call.gnum = gnum; - tok->u.call.rel = rel; - } - else { - onig_syntax_warn(env, "invalid subexp call"); - PUNFETCH; - } + PFETCH(c); + if (c == '<' || c == '\'') { + int gnum = -1, rel = 0; + UChar* name_end; + OnigCodePoint cnext; + + cnext = PPEEK; + if (cnext == '0') { + PINC; + if (PPEEK_IS(get_name_end_code_point(c))) { /* \g<0>, \g'0' */ + PINC; + name_end = p; + gnum = 0; + } + } + else if (cnext == '+') { + PINC; + rel = 1; + } + prev = p; + if (gnum < 0) { + r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1); + if (r < 0) return r; + } + + tok->type = TK_CALL; + tok->u.call.name = prev; + tok->u.call.name_end = name_end; + tok->u.call.gnum = gnum; + tok->u.call.rel = rel; + } + else { + onig_syntax_warn(env, "invalid subexp call"); + PUNFETCH; + } } # endif break; @@ -3867,65 +3867,65 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case 'Q': if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) { - tok->type = TK_QUOTE_OPEN; + tok->type = TK_QUOTE_OPEN; } break; case 'p': case 'P': if (PPEEK_IS('{') && - IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { - PINC; - tok->type = TK_CHAR_PROPERTY; - tok->u.prop.not = (c == 'P' ? 1 : 0); - - if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { - PFETCH(c); - if (c == '^') { - tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); - } - else - PUNFETCH; - } + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { + PINC; + tok->type = TK_CHAR_PROPERTY; + tok->u.prop.not = (c == 'P' ? 1 : 0); + + if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { + PFETCH(c); + if (c == '^') { + tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); + } + else + PUNFETCH; + } } else { - onig_syntax_warn(env, "invalid Unicode Property \\%c", c); + onig_syntax_warn(env, "invalid Unicode Property \\%c", c); } break; case 'R': if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK)) { - tok->type = TK_LINEBREAK; + tok->type = TK_LINEBREAK; } break; case 'X': if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER)) { - tok->type = TK_EXTENDED_GRAPHEME_CLUSTER; + tok->type = TK_EXTENDED_GRAPHEME_CLUSTER; } break; case 'K': if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) { - tok->type = TK_KEEP; + tok->type = TK_KEEP; } break; default: { - OnigCodePoint c2; - - PUNFETCH; - num = fetch_escaped_value(&p, end, env, &c2); - if (num < 0) return num; - /* set_raw: */ - if ((OnigCodePoint )tok->u.c != c2) { - tok->type = TK_CODE_POINT; - tok->u.code = (OnigCodePoint )c2; - } - else { /* string */ - p = tok->backp + enclen(enc, tok->backp, end); - } + OnigCodePoint c2; + + PUNFETCH; + num = fetch_escaped_value(&p, end, env, &c2); + if (num < 0) return num; + /* set_raw: */ + if ((OnigCodePoint )tok->u.c != c2) { + tok->type = TK_CODE_POINT; + tok->u.code = (OnigCodePoint )c2; + } + else { /* string */ + p = tok->backp + enclen(enc, tok->backp, end); + } } break; } @@ -3936,18 +3936,18 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) #ifdef USE_VARIABLE_META_CHARS if ((c != ONIG_INEFFECTIVE_META_CHAR) && - IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) { + IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) { if (c == MC_ANYCHAR(syn)) - goto any_char; + goto any_char; else if (c == MC_ANYTIME(syn)) - goto anytime; + goto anytime; else if (c == MC_ZERO_OR_ONE_TIME(syn)) - goto zero_or_one_time; + goto zero_or_one_time; else if (c == MC_ONE_OR_MORE_TIME(syn)) - goto one_or_more_time; + goto one_or_more_time; else if (c == MC_ANYCHAR_ANYTIME(syn)) { - tok->type = TK_ANYCHAR_ANYTIME; - goto out; + tok->type = TK_ANYCHAR_ANYTIME; + goto out; } } #endif @@ -4000,10 +4000,10 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (r < 0) return r; /* error */ if (r == 0) goto greedy_check; else if (r == 2) { /* {n} */ - if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY)) - goto possessive_check; + if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY)) + goto possessive_check; - goto greedy_check; + goto greedy_check; } /* r == 1 : normal char */ break; @@ -4015,114 +4015,114 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case '(': if (PPEEK_IS('?') && - IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { - PINC; - if (PPEEK_IS('#')) { - PFETCH(c); - while (1) { - if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; - PFETCH(c); - if (c == MC_ESC(syn)) { - if (!PEND) PFETCH(c); - } - else { - if (c == ')') break; - } - } - goto start; - } + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { + PINC; + if (PPEEK_IS('#')) { + PFETCH(c); + while (1) { + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH(c); + if (c == MC_ESC(syn)) { + if (!PEND) PFETCH(c); + } + else { + if (c == ')') break; + } + } + goto start; + } #ifdef USE_PERL_SUBEXP_CALL - /* (?&name), (?n), (?R), (?0), (?+n), (?-n) */ - c = PPEEK; - if ((c == '&' || c == 'R' || ONIGENC_IS_CODE_DIGIT(enc, c)) && - IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_SUBEXP_CALL)) { - /* (?&name), (?n), (?R), (?0) */ - int gnum; - UChar *name; - UChar *name_end; - - if (c == 'R' || c == '0') { - PINC; /* skip 'R' / '0' */ - if (!PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME; - PINC; /* skip ')' */ - name_end = name = p; - gnum = 0; - } - else { - int numref = 1; - if (c == '&') { /* (?&name) */ - PINC; - numref = 0; /* don't allow number name */ - } - name = p; - r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, numref); - if (r < 0) return r; - } - - tok->type = TK_CALL; - tok->u.call.name = name; - tok->u.call.name_end = name_end; - tok->u.call.gnum = gnum; - tok->u.call.rel = 0; - break; - } - else if ((c == '-' || c == '+') && - IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_SUBEXP_CALL)) { - /* (?+n), (?-n) */ - int gnum; - UChar *name; - UChar *name_end; - OnigCodePoint cnext; - PFETCH_READY; - - PINC; /* skip '-' / '+' */ - cnext = PPEEK; - if (ONIGENC_IS_CODE_DIGIT(enc, cnext)) { - if (c == '-') PUNFETCH; - name = p; - r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, 1); - if (r < 0) return r; - - tok->type = TK_CALL; - tok->u.call.name = name; - tok->u.call.name_end = name_end; - tok->u.call.gnum = gnum; - tok->u.call.rel = 1; - break; - } - } + /* (?&name), (?n), (?R), (?0), (?+n), (?-n) */ + c = PPEEK; + if ((c == '&' || c == 'R' || ONIGENC_IS_CODE_DIGIT(enc, c)) && + IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_SUBEXP_CALL)) { + /* (?&name), (?n), (?R), (?0) */ + int gnum; + UChar *name; + UChar *name_end; + + if (c == 'R' || c == '0') { + PINC; /* skip 'R' / '0' */ + if (!PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME; + PINC; /* skip ')' */ + name_end = name = p; + gnum = 0; + } + else { + int numref = 1; + if (c == '&') { /* (?&name) */ + PINC; + numref = 0; /* don't allow number name */ + } + name = p; + r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, numref); + if (r < 0) return r; + } + + tok->type = TK_CALL; + tok->u.call.name = name; + tok->u.call.name_end = name_end; + tok->u.call.gnum = gnum; + tok->u.call.rel = 0; + break; + } + else if ((c == '-' || c == '+') && + IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_SUBEXP_CALL)) { + /* (?+n), (?-n) */ + int gnum; + UChar *name; + UChar *name_end; + OnigCodePoint cnext; + PFETCH_READY; + + PINC; /* skip '-' / '+' */ + cnext = PPEEK; + if (ONIGENC_IS_CODE_DIGIT(enc, cnext)) { + if (c == '-') PUNFETCH; + name = p; + r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, 1); + if (r < 0) return r; + + tok->type = TK_CALL; + tok->u.call.name = name; + tok->u.call.name_end = name_end; + tok->u.call.gnum = gnum; + tok->u.call.rel = 1; + break; + } + } #endif /* USE_PERL_SUBEXP_CALL */ #ifdef USE_CAPITAL_P_NAMED_GROUP - if (PPEEK_IS('P') && - IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP)) { - int gnum; - UChar *name; - UChar *name_end; - PFETCH_READY; - - PINC; /* skip 'P' */ - if (PEND) return ONIGERR_UNDEFINED_GROUP_OPTION; - PFETCH(c); - if (c == '=') { /* (?P=name): backref */ - r = fetch_named_backref_token((OnigCodePoint )'(', tok, &p, end, env); - if (r < 0) return r; - break; - } - else if (c == '>') { /* (?P>name): subexp call */ - name = p; - r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, 0); - if (r < 0) return r; - - tok->type = TK_CALL; - tok->u.call.name = name; - tok->u.call.name_end = name_end; - tok->u.call.gnum = gnum; - tok->u.call.rel = 0; - break; - } - } + if (PPEEK_IS('P') && + IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP)) { + int gnum; + UChar *name; + UChar *name_end; + PFETCH_READY; + + PINC; /* skip 'P' */ + if (PEND) return ONIGERR_UNDEFINED_GROUP_OPTION; + PFETCH(c); + if (c == '=') { /* (?P=name): backref */ + r = fetch_named_backref_token((OnigCodePoint )'(', tok, &p, end, env); + if (r < 0) return r; + break; + } + else if (c == '>') { /* (?P>name): subexp call */ + name = p; + r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, 0); + if (r < 0) return r; + + tok->type = TK_CALL; + tok->u.call.name = name; + tok->u.call.name_end = name_end; + tok->u.call.gnum = gnum; + tok->u.call.rel = 0; + break; + } + } #endif /* USE_CAPITAL_P_NAMED_GROUP */ - PUNFETCH; + PUNFETCH; } if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break; @@ -4138,14 +4138,14 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; tok->type = TK_ANCHOR; tok->u.anchor.subtype = (IS_SINGLELINE(env->option) - ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE); + ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE); break; case '$': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; tok->type = TK_ANCHOR; tok->u.anchor.subtype = (IS_SINGLELINE(env->option) - ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE); + ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE); break; case '[': @@ -4155,24 +4155,24 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case ']': if (*src > env->pattern) /* /].../ is allowed. */ - CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]"); + CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]"); break; case '#': if (IS_EXTEND(env->option)) { - while (!PEND) { - PFETCH(c); - if (ONIGENC_IS_CODE_NEWLINE(enc, c)) - break; - } - goto start; - break; + while (!PEND) { + PFETCH(c); + if (ONIGENC_IS_CODE_NEWLINE(enc, c)) + break; + } + goto start; + break; } break; case ' ': case '\t': case '\n': case '\r': case '\f': if (IS_EXTEND(env->option)) - goto start; + goto start; break; default: @@ -4201,18 +4201,18 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not, if (not == 0) { for (i = 0; i < n; i++) { for (j = ONIGENC_CODE_RANGE_FROM(mbr, i); - j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) { - if (j >= sb_out) { - if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) { - r = add_code_range_to_buf(&(cc->mbuf), env, j, - ONIGENC_CODE_RANGE_TO(mbr, i)); - if (r != 0) return r; - i++; - } + j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) { + if (j >= sb_out) { + if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) { + r = add_code_range_to_buf(&(cc->mbuf), env, j, + ONIGENC_CODE_RANGE_TO(mbr, i)); + if (r != 0) return r; + i++; + } - goto sb_end; - } - BITSET_SET_BIT_CHKDUP(cc->bs, j); + goto sb_end; + } + BITSET_SET_BIT_CHKDUP(cc->bs, j); } } @@ -4229,11 +4229,11 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not, for (i = 0; i < n; i++) { for (j = prev; - j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) { - if (j >= sb_out) { - goto sb_end2; - } - BITSET_SET_BIT_CHKDUP(cc->bs, j); + j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) { + if (j >= sb_out) { + goto sb_end2; + } + BITSET_SET_BIT_CHKDUP(cc->bs, j); } prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; } @@ -4246,9 +4246,9 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not, for (i = 0; i < n; i++) { if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) { - r = add_code_range_to_buf(&(cc->mbuf), env, prev, + r = add_code_range_to_buf(&(cc->mbuf), env, prev, ONIGENC_CODE_RANGE_FROM(mbr, i) - 1); - if (r != 0) return r; + if (r != 0) return r; } prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; } @@ -4276,30 +4276,30 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, int ascii_range, ScanEnv* en CClassNode ccwork; initialize_cclass(&ccwork); r = add_ctype_to_cc_by_range(&ccwork, ctype, not, env, sb_out, - ranges); + ranges); if (r == 0) { - if (not) { - r = add_code_range_to_buf0(&(ccwork.mbuf), env, 0x80, ONIG_LAST_CODE_POINT, FALSE); - } - else { - CClassNode ccascii; - initialize_cclass(&ccascii); - if (ONIGENC_MBC_MINLEN(env->enc) > 1) { - r = add_code_range(&(ccascii.mbuf), env, 0x00, 0x7F); - } - else { - bitset_set_range(env, ccascii.bs, 0x00, 0x7F); - r = 0; - } - if (r == 0) { - r = and_cclass(&ccwork, &ccascii, env); - } - if (IS_NOT_NULL(ccascii.mbuf)) bbuf_free(ccascii.mbuf); - } - if (r == 0) { - r = or_cclass(cc, &ccwork, env); - } - if (IS_NOT_NULL(ccwork.mbuf)) bbuf_free(ccwork.mbuf); + if (not) { + r = add_code_range_to_buf0(&(ccwork.mbuf), env, 0x80, ONIG_LAST_CODE_POINT, FALSE); + } + else { + CClassNode ccascii; + initialize_cclass(&ccascii); + if (ONIGENC_MBC_MINLEN(env->enc) > 1) { + r = add_code_range(&(ccascii.mbuf), env, 0x00, 0x7F); + } + else { + bitset_set_range(env, ccascii.bs, 0x00, 0x7F); + r = 0; + } + if (r == 0) { + r = and_cclass(&ccwork, &ccascii, env); + } + if (IS_NOT_NULL(ccascii.mbuf)) bbuf_free(ccascii.mbuf); + } + if (r == 0) { + r = or_cclass(cc, &ccwork, env); + } + if (IS_NOT_NULL(ccwork.mbuf)) bbuf_free(ccwork.mbuf); } } else { @@ -4327,15 +4327,15 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, int ascii_range, ScanEnv* en case ONIGENC_CTYPE_ALNUM: if (not != 0) { for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) - BITSET_SET_BIT_CHKDUP(cc->bs, c); + if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT_CHKDUP(cc->bs, c); } ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); } else { for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) - BITSET_SET_BIT_CHKDUP(cc->bs, c); + if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT_CHKDUP(cc->bs, c); } } break; @@ -4344,39 +4344,39 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, int ascii_range, ScanEnv* en case ONIGENC_CTYPE_PRINT: if (not != 0) { for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype) - || c >= maxcode) - BITSET_SET_BIT_CHKDUP(cc->bs, c); + if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype) + || c >= maxcode) + BITSET_SET_BIT_CHKDUP(cc->bs, c); } if (ascii_range) - ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); } else { for (c = 0; c < maxcode; c++) { - if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) - BITSET_SET_BIT_CHKDUP(cc->bs, c); + if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT_CHKDUP(cc->bs, c); } if (! ascii_range) - ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); } break; case ONIGENC_CTYPE_WORD: if (not == 0) { for (c = 0; c < maxcode; c++) { - if (ONIGENC_IS_CODE_WORD(enc, c)) BITSET_SET_BIT_CHKDUP(cc->bs, c); + if (ONIGENC_IS_CODE_WORD(enc, c)) BITSET_SET_BIT_CHKDUP(cc->bs, c); } if (! ascii_range) - ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); } else { for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */ - && (! ONIGENC_IS_CODE_WORD(enc, c) || c >= maxcode)) - BITSET_SET_BIT_CHKDUP(cc->bs, c); + if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */ + && (! ONIGENC_IS_CODE_WORD(enc, c) || c >= maxcode)) + BITSET_SET_BIT_CHKDUP(cc->bs, c); } if (ascii_range) - ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); } break; @@ -4390,7 +4390,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, int ascii_range, ScanEnv* en static int parse_posix_bracket(CClassNode* cc, CClassNode* asc_cc, - UChar** src, UChar* end, ScanEnv* env) + UChar** src, UChar* end, ScanEnv* env) { #define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20 #define POSIX_BRACKET_NAME_MIN_LEN 4 @@ -4430,22 +4430,22 @@ parse_posix_bracket(CClassNode* cc, CClassNode* asc_cc, goto not_posix_bracket; ascii_range = IS_ASCII_RANGE(env->option) && - ! IS_POSIX_BRACKET_ALL_RANGE(env->option); + ! IS_POSIX_BRACKET_ALL_RANGE(env->option); for (pb = PBS; pb < PBS + numberof(PBS); pb++) { if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) { p = (UChar* )onigenc_step(enc, p, end, pb->len); if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0) - return ONIGERR_INVALID_POSIX_BRACKET_TYPE; + return ONIGERR_INVALID_POSIX_BRACKET_TYPE; r = add_ctype_to_cc(cc, pb->ctype, not, ascii_range, env); if (r != 0) return r; if (IS_NOT_NULL(asc_cc)) { - if (pb->ctype != ONIGENC_CTYPE_WORD && - pb->ctype != ONIGENC_CTYPE_ASCII && - !ascii_range) - r = add_ctype_to_cc(asc_cc, pb->ctype, not, ascii_range, env); - if (r != 0) return r; + if (pb->ctype != ONIGENC_CTYPE_WORD && + pb->ctype != ONIGENC_CTYPE_ASCII && + !ascii_range) + r = add_ctype_to_cc(asc_cc, pb->ctype, not, ascii_range, env); + if (r != 0) return r; } PINC_S; PINC_S; @@ -4466,7 +4466,7 @@ parse_posix_bracket(CClassNode* cc, CClassNode* asc_cc, if (! PEND) { PFETCH_S(c); if (c == ']') - return ONIGERR_INVALID_POSIX_BRACKET_TYPE; + return ONIGERR_INVALID_POSIX_BRACKET_TYPE; } } @@ -4507,7 +4507,7 @@ static int cclass_case_fold(Node** np, CClassNode* cc, CClassNode* asc_cc, ScanE static int parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, - ScanEnv* env) + ScanEnv* env) { int r, ctype; CClassNode* cc; @@ -4545,8 +4545,8 @@ enum CCVALTYPE { static int next_state_class(CClassNode* cc, CClassNode* asc_cc, - OnigCodePoint* vs, enum CCVALTYPE* type, - enum CCSTATE* state, ScanEnv* env) + OnigCodePoint* vs, enum CCVALTYPE* type, + enum CCSTATE* state, ScanEnv* env) { int r; @@ -4557,14 +4557,14 @@ next_state_class(CClassNode* cc, CClassNode* asc_cc, if (*type == CCV_SB) { BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs)); if (IS_NOT_NULL(asc_cc)) - BITSET_SET_BIT(asc_cc->bs, (int )(*vs)); + BITSET_SET_BIT(asc_cc->bs, (int )(*vs)); } else if (*type == CCV_CODE_POINT) { r = add_code_range(&(cc->mbuf), env, *vs, *vs); if (r < 0) return r; if (IS_NOT_NULL(asc_cc)) { - r = add_code_range0(&(asc_cc->mbuf), env, *vs, *vs, 0); - if (r < 0) return r; + r = add_code_range0(&(asc_cc->mbuf), env, *vs, *vs, 0); + if (r < 0) return r; } } } @@ -4576,10 +4576,10 @@ next_state_class(CClassNode* cc, CClassNode* asc_cc, static int next_state_val(CClassNode* cc, CClassNode* asc_cc, - OnigCodePoint *from, OnigCodePoint to, - int* from_israw, int to_israw, - enum CCVALTYPE intype, enum CCVALTYPE* type, - enum CCSTATE* state, ScanEnv* env) + OnigCodePoint *from, OnigCodePoint to, + int* from_israw, int to_israw, + enum CCVALTYPE intype, enum CCVALTYPE* type, + enum CCSTATE* state, ScanEnv* env) { int r; @@ -4588,14 +4588,14 @@ next_state_val(CClassNode* cc, CClassNode* asc_cc, if (*type == CCV_SB) { BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*from)); if (IS_NOT_NULL(asc_cc)) - BITSET_SET_BIT(asc_cc->bs, (int )(*from)); + BITSET_SET_BIT(asc_cc->bs, (int )(*from)); } else if (*type == CCV_CODE_POINT) { r = add_code_range(&(cc->mbuf), env, *from, *from); if (r < 0) return r; if (IS_NOT_NULL(asc_cc)) { - r = add_code_range0(&(asc_cc->mbuf), env, *from, *from, 0); - if (r < 0) return r; + r = add_code_range0(&(asc_cc->mbuf), env, *from, *from, 0); + if (r < 0) return r; } } break; @@ -4603,42 +4603,42 @@ next_state_val(CClassNode* cc, CClassNode* asc_cc, case CCS_RANGE: if (intype == *type) { if (intype == CCV_SB) { - if (*from > 0xff || to > 0xff) - return ONIGERR_INVALID_CODE_POINT_VALUE; - - if (*from > to) { - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) - goto ccs_range_end; - else - return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; - } - bitset_set_range(env, cc->bs, (int )*from, (int )to); - if (IS_NOT_NULL(asc_cc)) - bitset_set_range(env, asc_cc->bs, (int )*from, (int )to); + if (*from > 0xff || to > 0xff) + return ONIGERR_INVALID_CODE_POINT_VALUE; + + if (*from > to) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) + goto ccs_range_end; + else + return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; + } + bitset_set_range(env, cc->bs, (int )*from, (int )to); + if (IS_NOT_NULL(asc_cc)) + bitset_set_range(env, asc_cc->bs, (int )*from, (int )to); } else { - r = add_code_range(&(cc->mbuf), env, *from, to); - if (r < 0) return r; - if (IS_NOT_NULL(asc_cc)) { - r = add_code_range0(&(asc_cc->mbuf), env, *from, to, 0); - if (r < 0) return r; - } + r = add_code_range(&(cc->mbuf), env, *from, to); + if (r < 0) return r; + if (IS_NOT_NULL(asc_cc)) { + r = add_code_range0(&(asc_cc->mbuf), env, *from, to, 0); + if (r < 0) return r; + } } } else { if (*from > to) { - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) - goto ccs_range_end; - else - return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) + goto ccs_range_end; + else + return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; } bitset_set_range(env, cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff)); r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to); if (r < 0) return r; if (IS_NOT_NULL(asc_cc)) { - bitset_set_range(env, asc_cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff)); - r = add_code_range0(&(asc_cc->mbuf), env, (OnigCodePoint )*from, to, 0); - if (r < 0) return r; + bitset_set_range(env, asc_cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff)); + r = add_code_range0(&(asc_cc->mbuf), env, (OnigCodePoint )*from, to, 0); + if (r < 0) return r; } } ccs_range_end: @@ -4662,7 +4662,7 @@ next_state_val(CClassNode* cc, CClassNode* asc_cc, static int code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped, - ScanEnv* env) + ScanEnv* env) { int in_esc; OnigCodePoint code; @@ -4685,7 +4685,7 @@ code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped, static int parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* end, - ScanEnv* env) + ScanEnv* env) { int r, neg, len, fetched, and_start; OnigCodePoint v, vs; @@ -4746,16 +4746,16 @@ parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* e switch (r) { case TK_CHAR: if ((tok->u.code >= SINGLE_BYTE_SIZE) || - (len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c)) > 1) { - in_type = CCV_CODE_POINT; + (len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c)) > 1) { + in_type = CCV_CODE_POINT; } else if (len < 0) { - r = len; - goto err; + r = len; + goto err; } else { sb_char: - in_type = CCV_SB; + in_type = CCV_SB; } v = (OnigCodePoint )tok->u.c; in_israw = 0; @@ -4765,54 +4765,54 @@ parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* e case TK_RAW_BYTE: /* tok->base != 0 : octal or hexadec. */ if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) { - UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; - UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN; - UChar* psave = p; - int i, base = tok->base; - - buf[0] = (UChar )tok->u.c; - for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) { - r = fetch_token_in_cc(tok, &p, end, env); - if (r < 0) goto err; - if (r != TK_RAW_BYTE || tok->base != base) { - fetched = 1; - break; - } - buf[i] = (UChar )tok->u.c; - } - - if (i < ONIGENC_MBC_MINLEN(env->enc)) { - r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; - goto err; - } - - len = enclen(env->enc, buf, buf + i); - if (i < len) { - r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; - goto err; - } - else if (i > len) { /* fetch back */ - p = psave; - for (i = 1; i < len; i++) { - (void)fetch_token_in_cc(tok, &p, end, env); - /* no need to check the return value (already checked above) */ - } - fetched = 0; - } - - if (i == 1) { - v = (OnigCodePoint )buf[0]; - goto raw_single; - } - else { - v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe); - in_type = CCV_CODE_POINT; - } + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN; + UChar* psave = p; + int i, base = tok->base; + + buf[0] = (UChar )tok->u.c; + for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) { + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + if (r != TK_RAW_BYTE || tok->base != base) { + fetched = 1; + break; + } + buf[i] = (UChar )tok->u.c; + } + + if (i < ONIGENC_MBC_MINLEN(env->enc)) { + r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; + goto err; + } + + len = enclen(env->enc, buf, buf + i); + if (i < len) { + r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; + goto err; + } + else if (i > len) { /* fetch back */ + p = psave; + for (i = 1; i < len; i++) { + (void)fetch_token_in_cc(tok, &p, end, env); + /* no need to check the return value (already checked above) */ + } + fetched = 0; + } + + if (i == 1) { + v = (OnigCodePoint )buf[0]; + goto raw_single; + } + else { + v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe); + in_type = CCV_CODE_POINT; + } } else { - v = (OnigCodePoint )tok->u.c; + v = (OnigCodePoint )tok->u.c; raw_single: - in_type = CCV_SB; + in_type = CCV_SB; } in_israw = 1; goto val_entry2; @@ -4824,13 +4824,13 @@ parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* e val_entry: len = ONIGENC_CODE_TO_MBCLEN(env->enc, v); if (len < 0) { - r = len; - goto err; + r = len; + goto err; } in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT); val_entry2: r = next_state_val(cc, asc_cc, &vs, v, &val_israw, in_israw, in_type, &val_type, - &state, env); + &state, env); if (r != 0) goto err; break; @@ -4838,24 +4838,24 @@ parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* e r = parse_posix_bracket(cc, asc_cc, &p, end, env); if (r < 0) goto err; if (r == 1) { /* is not POSIX bracket */ - CC_ESC_WARN(env, (UChar* )"["); - p = tok->backp; - v = (OnigCodePoint )tok->u.c; - in_israw = 0; - goto val_entry; + CC_ESC_WARN(env, (UChar* )"["); + p = tok->backp; + v = (OnigCodePoint )tok->u.c; + in_israw = 0; + goto val_entry; } goto next_class; break; case TK_CHAR_TYPE: r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, - IS_ASCII_RANGE(env->option), env); + IS_ASCII_RANGE(env->option), env); if (r != 0) return r; if (IS_NOT_NULL(asc_cc)) { - if (tok->u.prop.ctype != ONIGENC_CTYPE_WORD) - r = add_ctype_to_cc(asc_cc, tok->u.prop.ctype, tok->u.prop.not, - IS_ASCII_RANGE(env->option), env); - if (r != 0) return r; + if (tok->u.prop.ctype != ONIGENC_CTYPE_WORD) + r = add_ctype_to_cc(asc_cc, tok->u.prop.ctype, tok->u.prop.not, + IS_ASCII_RANGE(env->option), env); + if (r != 0) return r; } next_class: @@ -4865,133 +4865,133 @@ parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* e case TK_CHAR_PROPERTY: { - int ctype; - - ctype = fetch_char_property_to_ctype(&p, end, env); - if (ctype < 0) return ctype; - r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, 0, env); - if (r != 0) return r; - if (IS_NOT_NULL(asc_cc)) { - if (ctype != ONIGENC_CTYPE_ASCII) - r = add_ctype_to_cc(asc_cc, ctype, tok->u.prop.not, 0, env); - if (r != 0) return r; - } - goto next_class; + int ctype; + + ctype = fetch_char_property_to_ctype(&p, end, env); + if (ctype < 0) return ctype; + r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, 0, env); + if (r != 0) return r; + if (IS_NOT_NULL(asc_cc)) { + if (ctype != ONIGENC_CTYPE_ASCII) + r = add_ctype_to_cc(asc_cc, ctype, tok->u.prop.not, 0, env); + if (r != 0) return r; + } + goto next_class; } break; case TK_CC_RANGE: if (state == CCS_VALUE) { - r = fetch_token_in_cc(tok, &p, end, env); - if (r < 0) goto err; - fetched = 1; - if (r == TK_CC_CLOSE) { /* allow [x-] */ - range_end_val: - v = (OnigCodePoint )'-'; - in_israw = 0; - goto val_entry; - } - else if (r == TK_CC_AND) { - CC_ESC_WARN(env, (UChar* )"-"); - goto range_end_val; - } - - if (val_type == CCV_CLASS) { - r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; - goto err; - } - - state = CCS_RANGE; + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + fetched = 1; + if (r == TK_CC_CLOSE) { /* allow [x-] */ + range_end_val: + v = (OnigCodePoint )'-'; + in_israw = 0; + goto val_entry; + } + else if (r == TK_CC_AND) { + CC_ESC_WARN(env, (UChar* )"-"); + goto range_end_val; + } + + if (val_type == CCV_CLASS) { + r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; + goto err; + } + + state = CCS_RANGE; } else if (state == CCS_START) { - /* [-xa] is allowed */ - v = (OnigCodePoint )tok->u.c; - in_israw = 0; + /* [-xa] is allowed */ + v = (OnigCodePoint )tok->u.c; + in_israw = 0; - r = fetch_token_in_cc(tok, &p, end, env); - if (r < 0) goto err; - fetched = 1; - /* [--x] or [a&&-x] is warned. */ - if (r == TK_CC_RANGE || and_start != 0) - CC_ESC_WARN(env, (UChar* )"-"); + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + fetched = 1; + /* [--x] or [a&&-x] is warned. */ + if (r == TK_CC_RANGE || and_start != 0) + CC_ESC_WARN(env, (UChar* )"-"); - goto val_entry; + goto val_entry; } else if (state == CCS_RANGE) { - CC_ESC_WARN(env, (UChar* )"-"); - goto sb_char; /* [!--x] is allowed */ + CC_ESC_WARN(env, (UChar* )"-"); + goto sb_char; /* [!--x] is allowed */ } else { /* CCS_COMPLETE */ - r = fetch_token_in_cc(tok, &p, end, env); - if (r < 0) goto err; - fetched = 1; - if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */ - else if (r == TK_CC_AND) { - CC_ESC_WARN(env, (UChar* )"-"); - goto range_end_val; - } - - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) { - CC_ESC_WARN(env, (UChar* )"-"); - goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */ - } - r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; - goto err; + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + fetched = 1; + if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */ + else if (r == TK_CC_AND) { + CC_ESC_WARN(env, (UChar* )"-"); + goto range_end_val; + } + + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) { + CC_ESC_WARN(env, (UChar* )"-"); + goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */ + } + r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; + goto err; } break; case TK_CC_CC_OPEN: /* [ */ { - Node *anode, *aasc_node; - CClassNode* acc; - - r = parse_char_class(&anode, &aasc_node, tok, &p, end, env); - if (r == 0) { - acc = NCCLASS(anode); - r = or_cclass(cc, acc, env); - } - if (r == 0 && IS_NOT_NULL(aasc_node)) { - acc = NCCLASS(aasc_node); - r = or_cclass(asc_cc, acc, env); - } - onig_node_free(anode); - onig_node_free(aasc_node); - if (r != 0) goto err; + Node *anode, *aasc_node; + CClassNode* acc; + + r = parse_char_class(&anode, &aasc_node, tok, &p, end, env); + if (r == 0) { + acc = NCCLASS(anode); + r = or_cclass(cc, acc, env); + } + if (r == 0 && IS_NOT_NULL(aasc_node)) { + acc = NCCLASS(aasc_node); + r = or_cclass(asc_cc, acc, env); + } + onig_node_free(anode); + onig_node_free(aasc_node); + if (r != 0) goto err; } break; case TK_CC_AND: /* && */ { - if (state == CCS_VALUE) { - r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type, - &val_type, &state, env); - if (r != 0) goto err; - } - /* initialize local variables */ - and_start = 1; - state = CCS_START; - - if (IS_NOT_NULL(prev_cc)) { - r = and_cclass(prev_cc, cc, env); - if (r != 0) goto err; - bbuf_free(cc->mbuf); - if (IS_NOT_NULL(asc_cc)) { - r = and_cclass(asc_prev_cc, asc_cc, env); - if (r != 0) goto err; - bbuf_free(asc_cc->mbuf); - } - } - else { - prev_cc = cc; - cc = &work_cc; - if (IS_NOT_NULL(asc_cc)) { - asc_prev_cc = asc_cc; - asc_cc = &asc_work_cc; - } - } - initialize_cclass(cc); - if (IS_NOT_NULL(asc_cc)) - initialize_cclass(asc_cc); + if (state == CCS_VALUE) { + r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type, + &val_type, &state, env); + if (r != 0) goto err; + } + /* initialize local variables */ + and_start = 1; + state = CCS_START; + + if (IS_NOT_NULL(prev_cc)) { + r = and_cclass(prev_cc, cc, env); + if (r != 0) goto err; + bbuf_free(cc->mbuf); + if (IS_NOT_NULL(asc_cc)) { + r = and_cclass(asc_prev_cc, asc_cc, env); + if (r != 0) goto err; + bbuf_free(asc_cc->mbuf); + } + } + else { + prev_cc = cc; + cc = &work_cc; + if (IS_NOT_NULL(asc_cc)) { + asc_prev_cc = asc_cc; + asc_cc = &asc_work_cc; + } + } + initialize_cclass(cc); + if (IS_NOT_NULL(asc_cc)) + initialize_cclass(asc_cc); } break; @@ -5015,7 +5015,7 @@ parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* e if (state == CCS_VALUE) { r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type, - &val_type, &state, env); + &val_type, &state, env); if (r != 0) goto err; } @@ -5054,12 +5054,12 @@ parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* e #define NEWLINE_CODE 0x0a if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) { - if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1) - BITSET_SET_BIT_CHKDUP(cc->bs, NEWLINE_CODE); - else { - r = add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE); - if (r < 0) goto err; - } + if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1) + BITSET_SET_BIT_CHKDUP(cc->bs, NEWLINE_CODE); + else { + r = add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE); + if (r < 0) goto err; + } } } } @@ -5076,11 +5076,11 @@ parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* e } static int parse_subexp(Node** top, OnigToken* tok, int term, - UChar** src, UChar* end, ScanEnv* env); + UChar** src, UChar* end, ScanEnv* env); static int parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, - ScanEnv* env) + ScanEnv* env) { int r = 0, num; Node *target, *work1 = NULL, *work2 = NULL; @@ -5127,28 +5127,28 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, break; case '~': /* (?~...) absent operator */ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_TILDE_ABSENT)) { - *np = node_new_enclose(ENCLOSE_ABSENT); + *np = node_new_enclose(ENCLOSE_ABSENT); } else { - return ONIGERR_UNDEFINED_GROUP_OPTION; + return ONIGERR_UNDEFINED_GROUP_OPTION; } break; #ifdef USE_NAMED_GROUP case '\'': if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { - goto named_group1; + goto named_group1; } else - return ONIGERR_UNDEFINED_GROUP_OPTION; + return ONIGERR_UNDEFINED_GROUP_OPTION; break; # ifdef USE_CAPITAL_P_NAMED_GROUP case 'P': /* (?P...) */ if (!PEND && - IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP)) { - PFETCH(c); - if (c == '<') goto named_group1; + IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP)) { + PFETCH(c); + if (c == '<') goto named_group1; } return ONIGERR_UNDEFINED_GROUP_OPTION; break; @@ -5159,49 +5159,49 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; PFETCH(c); if (c == '=') - *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND); + *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND); else if (c == '!') - *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT); + *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT); #ifdef USE_NAMED_GROUP else { /* (?...) */ - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { - UChar *name; - UChar *name_end; + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { + UChar *name; + UChar *name_end; - PUNFETCH; - c = '<'; + PUNFETCH; + c = '<'; - named_group1: - list_capture = 0; + named_group1: + list_capture = 0; # ifdef USE_CAPTURE_HISTORY - named_group2: + named_group2: # endif - name = p; - r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0); - if (r < 0) return r; - - num = scan_env_add_mem_entry(env); - if (num < 0) return num; - if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM) - return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; - - r = name_add(env->reg, name, name_end, num, env); - if (r != 0) return r; - *np = node_new_enclose_memory(env->option, 1); - CHECK_NULL_RETURN_MEMERR(*np); - NENCLOSE(*np)->regnum = num; - if (list_capture != 0) - BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); - env->num_named++; - } - else { - return ONIGERR_UNDEFINED_GROUP_OPTION; - } + name = p; + r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0); + if (r < 0) return r; + + num = scan_env_add_mem_entry(env); + if (num < 0) return num; + if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM) + return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; + + r = name_add(env->reg, name, name_end, num, env); + if (r != 0) return r; + *np = node_new_enclose_memory(env->option, 1); + CHECK_NULL_RETURN_MEMERR(*np); + NENCLOSE(*np)->regnum = num; + if (list_capture != 0) + BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); + env->num_named++; + } + else { + return ONIGERR_UNDEFINED_GROUP_OPTION; + } } #else else { - return ONIGERR_UNDEFINED_GROUP_OPTION; + return ONIGERR_UNDEFINED_GROUP_OPTION; } #endif break; @@ -5210,122 +5210,122 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, case '@': if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) { # ifdef USE_NAMED_GROUP - if (!PEND && - IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { - PFETCH(c); - if (c == '<' || c == '\'') { - list_capture = 1; - goto named_group2; /* (?@...) */ - } - PUNFETCH; - } + if (!PEND && + IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { + PFETCH(c); + if (c == '<' || c == '\'') { + list_capture = 1; + goto named_group2; /* (?@...) */ + } + PUNFETCH; + } # endif - *np = node_new_enclose_memory(env->option, 0); - CHECK_NULL_RETURN_MEMERR(*np); - num = scan_env_add_mem_entry(env); - if (num < 0) return num; - if (num >= (int )BIT_STATUS_BITS_NUM) - return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; + *np = node_new_enclose_memory(env->option, 0); + CHECK_NULL_RETURN_MEMERR(*np); + num = scan_env_add_mem_entry(env); + if (num < 0) return num; + if (num >= (int )BIT_STATUS_BITS_NUM) + return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; - NENCLOSE(*np)->regnum = num; - BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); + NENCLOSE(*np)->regnum = num; + BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); } else { - return ONIGERR_UNDEFINED_GROUP_OPTION; + return ONIGERR_UNDEFINED_GROUP_OPTION; } break; #endif /* USE_CAPTURE_HISTORY */ case '(': /* conditional expression: (?(cond)yes), (?(cond)yes|no) */ if (!PEND && - IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_CONDITION)) { - UChar *name = NULL; - UChar *name_end; - PFETCH(c); - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { /* (n) */ - PUNFETCH; - r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &num, 1); - if (r < 0) return r; + IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_CONDITION)) { + UChar *name = NULL; + UChar *name_end; + PFETCH(c); + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { /* (n) */ + PUNFETCH; + r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &num, 1); + if (r < 0) return r; #if 0 - /* Relative number is not currently supported. (same as Perl) */ - if (num < 0) { - num = BACKREF_REL_TO_ABS(num, env); - if (num <= 0) - return ONIGERR_INVALID_BACKREF; - } + /* Relative number is not currently supported. (same as Perl) */ + if (num < 0) { + num = BACKREF_REL_TO_ABS(num, env); + if (num <= 0) + return ONIGERR_INVALID_BACKREF; + } #endif - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) { - if (num > env->num_mem || - IS_NULL(SCANENV_MEM_NODES(env)[num])) - return ONIGERR_INVALID_BACKREF; - } - } + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) { + if (num > env->num_mem || + IS_NULL(SCANENV_MEM_NODES(env)[num])) + return ONIGERR_INVALID_BACKREF; + } + } #ifdef USE_NAMED_GROUP - else if (c == '<' || c == '\'') { /* (), ('name') */ - name = p; - r = fetch_named_backref_token(c, tok, &p, end, env); - if (r < 0) return r; - if (!PPEEK_IS(')')) return ONIGERR_UNDEFINED_GROUP_OPTION; - PINC; - - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP)) { - num = tok->u.backref.ref1; - } - else { - /* FIXME: - * Use left most named group for now. This is the same as Perl. - * However this should use the same strategy as normal back- - * references on Ruby syntax; search right to left. */ - int len = tok->u.backref.num; - num = len > 1 ? tok->u.backref.refs[0] : tok->u.backref.ref1; - } - } + else if (c == '<' || c == '\'') { /* (), ('name') */ + name = p; + r = fetch_named_backref_token(c, tok, &p, end, env); + if (r < 0) return r; + if (!PPEEK_IS(')')) return ONIGERR_UNDEFINED_GROUP_OPTION; + PINC; + + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP)) { + num = tok->u.backref.ref1; + } + else { + /* FIXME: + * Use left most named group for now. This is the same as Perl. + * However this should use the same strategy as normal back- + * references on Ruby syntax; search right to left. */ + int len = tok->u.backref.num; + num = len > 1 ? tok->u.backref.refs[0] : tok->u.backref.ref1; + } + } #endif - else - return ONIGERR_INVALID_CONDITION_PATTERN; - *np = node_new_enclose(ENCLOSE_CONDITION); - CHECK_NULL_RETURN_MEMERR(*np); - NENCLOSE(*np)->regnum = num; - if (IS_NOT_NULL(name)) NENCLOSE(*np)->state |= NST_NAME_REF; + else + return ONIGERR_INVALID_CONDITION_PATTERN; + *np = node_new_enclose(ENCLOSE_CONDITION); + CHECK_NULL_RETURN_MEMERR(*np); + NENCLOSE(*np)->regnum = num; + if (IS_NOT_NULL(name)) NENCLOSE(*np)->state |= NST_NAME_REF; } else - return ONIGERR_UNDEFINED_GROUP_OPTION; + return ONIGERR_UNDEFINED_GROUP_OPTION; break; #if 0 case '|': /* branch reset: (?|...) */ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_VBAR_BRANCH_RESET)) { - /* TODO */ + /* TODO */ } else - return ONIGERR_UNDEFINED_GROUP_OPTION; + return ONIGERR_UNDEFINED_GROUP_OPTION; break; #endif case '^': /* loads default options */ if (!PEND && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { - /* d-imsx */ - ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1); - ONOFF(option, ONIG_OPTION_IGNORECASE, 1); - ONOFF(option, ONIG_OPTION_SINGLELINE, 0); - ONOFF(option, ONIG_OPTION_MULTILINE, 1); - ONOFF(option, ONIG_OPTION_EXTEND, 1); - PFETCH(c); + /* d-imsx */ + ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1); + ONOFF(option, ONIG_OPTION_IGNORECASE, 1); + ONOFF(option, ONIG_OPTION_SINGLELINE, 0); + ONOFF(option, ONIG_OPTION_MULTILINE, 1); + ONOFF(option, ONIG_OPTION_EXTEND, 1); + PFETCH(c); } #if 0 else if (!PEND && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) { - /* d-imx */ - ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0); - ONOFF(option, ONIG_OPTION_POSIX_BRACKET_ALL_RANGE, 0); - ONOFF(option, ONIG_OPTION_WORD_BOUND_ALL_RANGE, 0); - ONOFF(option, ONIG_OPTION_IGNORECASE, 1); - ONOFF(option, ONIG_OPTION_MULTILINE, 1); - ONOFF(option, ONIG_OPTION_EXTEND, 1); - PFETCH(c); + /* d-imx */ + ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0); + ONOFF(option, ONIG_OPTION_POSIX_BRACKET_ALL_RANGE, 0); + ONOFF(option, ONIG_OPTION_WORD_BOUND_ALL_RANGE, 0); + ONOFF(option, ONIG_OPTION_IGNORECASE, 1); + ONOFF(option, ONIG_OPTION_MULTILINE, 1); + ONOFF(option, ONIG_OPTION_EXTEND, 1); + PFETCH(c); } #endif else { - return ONIGERR_UNDEFINED_GROUP_OPTION; + return ONIGERR_UNDEFINED_GROUP_OPTION; } /* fall through */ #ifdef USE_POSIXLINE_OPTION @@ -5334,120 +5334,120 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, case '-': case 'i': case 'm': case 's': case 'x': case 'a': case 'd': case 'l': case 'u': { - int neg = 0; - - while (1) { - switch (c) { - case ':': - case ')': - break; - - case '-': neg = 1; break; - case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break; - case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break; - case 's': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { - ONOFF(option, ONIG_OPTION_MULTILINE, neg); - } - else - return ONIGERR_UNDEFINED_GROUP_OPTION; - break; - - case 'm': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { - ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0)); - } - else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) { - ONOFF(option, ONIG_OPTION_MULTILINE, neg); - } - else - return ONIGERR_UNDEFINED_GROUP_OPTION; - break; + int neg = 0; + + while (1) { + switch (c) { + case ':': + case ')': + break; + + case '-': neg = 1; break; + case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break; + case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break; + case 's': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { + ONOFF(option, ONIG_OPTION_MULTILINE, neg); + } + else + return ONIGERR_UNDEFINED_GROUP_OPTION; + break; + + case 'm': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { + ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0)); + } + else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) { + ONOFF(option, ONIG_OPTION_MULTILINE, neg); + } + else + return ONIGERR_UNDEFINED_GROUP_OPTION; + break; #ifdef USE_POSIXLINE_OPTION - case 'p': - ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg); - break; + case 'p': + ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg); + break; #endif - case 'a': /* limits \d, \s, \w and POSIX brackets to ASCII range */ - if ((IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) || - IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) && - (neg == 0)) { - ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0); - ONOFF(option, ONIG_OPTION_POSIX_BRACKET_ALL_RANGE, 1); - ONOFF(option, ONIG_OPTION_WORD_BOUND_ALL_RANGE, 1); - } - else - return ONIGERR_UNDEFINED_GROUP_OPTION; - break; - - case 'u': - if ((IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) || - IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) && - (neg == 0)) { - ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1); - ONOFF(option, ONIG_OPTION_POSIX_BRACKET_ALL_RANGE, 1); - ONOFF(option, ONIG_OPTION_WORD_BOUND_ALL_RANGE, 1); - } - else - return ONIGERR_UNDEFINED_GROUP_OPTION; - break; - - case 'd': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) && - (neg == 0)) { - ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1); - } - else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY) && - (neg == 0)) { - ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0); - ONOFF(option, ONIG_OPTION_POSIX_BRACKET_ALL_RANGE, 0); - ONOFF(option, ONIG_OPTION_WORD_BOUND_ALL_RANGE, 0); - } - else - return ONIGERR_UNDEFINED_GROUP_OPTION; - break; - - case 'l': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) && (neg == 0)) { - ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1); - } - else - return ONIGERR_UNDEFINED_GROUP_OPTION; - break; - - default: - return ONIGERR_UNDEFINED_GROUP_OPTION; - } - - if (c == ')') { - *np = node_new_option(option); - CHECK_NULL_RETURN_MEMERR(*np); - *src = p; - return 2; /* option only */ - } - else if (c == ':') { - OnigOptionType prev = env->option; - - env->option = option; - r = fetch_token(tok, &p, end, env); - if (r < 0) { - env->option = prev; - return r; - } - r = parse_subexp(&target, tok, term, &p, end, env); - env->option = prev; - if (r < 0) return r; - *np = node_new_option(option); - CHECK_NULL_RETURN_MEMERR(*np); - NENCLOSE(*np)->target = target; - *src = p; - return 0; - } - - if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; - PFETCH(c); - } + case 'a': /* limits \d, \s, \w and POSIX brackets to ASCII range */ + if ((IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) || + IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) && + (neg == 0)) { + ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0); + ONOFF(option, ONIG_OPTION_POSIX_BRACKET_ALL_RANGE, 1); + ONOFF(option, ONIG_OPTION_WORD_BOUND_ALL_RANGE, 1); + } + else + return ONIGERR_UNDEFINED_GROUP_OPTION; + break; + + case 'u': + if ((IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) || + IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) && + (neg == 0)) { + ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1); + ONOFF(option, ONIG_OPTION_POSIX_BRACKET_ALL_RANGE, 1); + ONOFF(option, ONIG_OPTION_WORD_BOUND_ALL_RANGE, 1); + } + else + return ONIGERR_UNDEFINED_GROUP_OPTION; + break; + + case 'd': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) && + (neg == 0)) { + ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1); + } + else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY) && + (neg == 0)) { + ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0); + ONOFF(option, ONIG_OPTION_POSIX_BRACKET_ALL_RANGE, 0); + ONOFF(option, ONIG_OPTION_WORD_BOUND_ALL_RANGE, 0); + } + else + return ONIGERR_UNDEFINED_GROUP_OPTION; + break; + + case 'l': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) && (neg == 0)) { + ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1); + } + else + return ONIGERR_UNDEFINED_GROUP_OPTION; + break; + + default: + return ONIGERR_UNDEFINED_GROUP_OPTION; + } + + if (c == ')') { + *np = node_new_option(option); + CHECK_NULL_RETURN_MEMERR(*np); + *src = p; + return 2; /* option only */ + } + else if (c == ':') { + OnigOptionType prev = env->option; + + env->option = option; + r = fetch_token(tok, &p, end, env); + if (r < 0) { + env->option = prev; + return r; + } + r = parse_subexp(&target, tok, term, &p, end, env); + env->option = prev; + if (r < 0) return r; + *np = node_new_option(option); + CHECK_NULL_RETURN_MEMERR(*np); + NENCLOSE(*np)->target = target; + *src = p; + return 0; + } + + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH(c); + } } break; @@ -5486,14 +5486,14 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, } else if (NENCLOSE(*np)->type == ENCLOSE_CONDITION) { if (NTYPE(target) != NT_ALT) { - /* convert (?(cond)yes) to (?(cond)yes|empty) */ - work1 = node_new_empty(); - if (IS_NULL(work1)) goto err; - work2 = onig_node_new_alt(work1, NULL_NODE); - if (IS_NULL(work2)) goto err; - work1 = onig_node_new_alt(target, work2); - if (IS_NULL(work1)) goto err; - NENCLOSE(*np)->target = work1; + /* convert (?(cond)yes) to (?(cond)yes|empty) */ + work1 = node_new_empty(); + if (IS_NULL(work1)) goto err; + work2 = onig_node_new_alt(work1, NULL_NODE); + if (IS_NULL(work2)) goto err; + work1 = onig_node_new_alt(target, work2); + if (IS_NULL(work1)) goto err; + NENCLOSE(*np)->target = work1; } } } @@ -5532,11 +5532,11 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) if (! group) { StrNode* sn = NSTR(target); if (str_node_can_be_split(sn, env->enc)) { - Node* n = str_node_split_last_char(sn, env->enc); - if (IS_NOT_NULL(n)) { - qn->target = n; - return 2; - } + Node* n = str_node_split_last_char(sn, env->enc); + if (IS_NOT_NULL(n)) { + qn->target = n; + return 2; + } } } break; @@ -5550,43 +5550,43 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR if (nestq_num >= 0 && targetq_num >= 0 && - IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) { - switch (ReduceTypeTable[targetq_num][nestq_num]) { - case RQ_ASIS: - break; - - case RQ_DEL: - if (onig_warn != onig_null_warn) { - onig_syntax_warn(env, "regular expression has redundant nested repeat operator '%s'", - PopularQStr[targetq_num]); - } - goto warn_exit; - break; - - default: - if (onig_warn != onig_null_warn) { - onig_syntax_warn(env, "nested repeat operator '%s' and '%s' was replaced with '%s' in regular expression", - PopularQStr[targetq_num], PopularQStr[nestq_num], - ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]); - } - goto warn_exit; - break; - } + IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) { + switch (ReduceTypeTable[targetq_num][nestq_num]) { + case RQ_ASIS: + break; + + case RQ_DEL: + if (onig_warn != onig_null_warn) { + onig_syntax_warn(env, "regular expression has redundant nested repeat operator '%s'", + PopularQStr[targetq_num]); + } + goto warn_exit; + break; + + default: + if (onig_warn != onig_null_warn) { + onig_syntax_warn(env, "nested repeat operator '%s' and '%s' was replaced with '%s' in regular expression", + PopularQStr[targetq_num], PopularQStr[nestq_num], + ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]); + } + goto warn_exit; + break; + } } warn_exit: #endif if (targetq_num >= 0) { - if (nestq_num >= 0) { - onig_reduce_nested_quantifier(qnode, target); - goto q_exit; - } - else if (targetq_num == 1 || targetq_num == 2) { /* * or + */ - /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */ - if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) { - qn->upper = (qn->lower == 0 ? 1 : qn->lower); - } - } + if (nestq_num >= 0) { + onig_reduce_nested_quantifier(qnode, target); + goto q_exit; + } + else if (targetq_num == 1 || targetq_num == 2) { /* * or + */ + /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */ + if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) { + qn->upper = (qn->lower == 0 ? 1 : qn->lower); + } + } } } break; @@ -5652,7 +5652,7 @@ typedef struct { static int i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], - int to_len, void* arg) + int to_len, void* arg) { IApplyCaseFoldArg* iarg; ScanEnv* env; @@ -5683,7 +5683,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int is_in = onig_is_code_in_cc(env->enc, from, cc); #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) || - (is_in == 0 && IS_NCCLASS_NOT(cc))) { + (is_in == 0 && IS_NCCLASS_NOT(cc))) { if (add_flag) { if (is_singlebyte_range(*to, env->enc)) { BITSET_SET_BIT(bs, *to); @@ -5721,26 +5721,26 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], if (onig_is_code_in_cc(env->enc, from, cc) #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS - && !IS_NCCLASS_NOT(cc) + && !IS_NCCLASS_NOT(cc) #endif - ) { + ) { for (i = 0; i < to_len; i++) { - len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf); - if (i == 0) { - snode = onig_node_new_str(buf, buf + len); - CHECK_NULL_RETURN_MEMERR(snode); - - /* char-class expanded multi-char only - compare with string folded at match time. */ - NSTRING_SET_AMBIG(snode); - } - else { - r = onig_node_str_cat(snode, buf, buf + len); - if (r < 0) { - onig_node_free(snode); - return r; - } - } + len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf); + if (i == 0) { + snode = onig_node_new_str(buf, buf + len); + CHECK_NULL_RETURN_MEMERR(snode); + + /* char-class expanded multi-char only + compare with string folded at match time. */ + NSTRING_SET_AMBIG(snode); + } + else { + r = onig_node_str_cat(snode, buf, buf + len); + if (r < 0) { + onig_node_free(snode); + return r; + } + } } *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE); @@ -5765,7 +5765,7 @@ cclass_case_fold(Node** np, CClassNode* cc, CClassNode* asc_cc, ScanEnv* env) iarg.ptail = &(iarg.alt_root); r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag, - i_apply_case_fold, &iarg); + i_apply_case_fold, &iarg); if (r != 0) { onig_node_free(iarg.alt_root); return r; @@ -6270,8 +6270,8 @@ is_onechar_cclass(CClassNode* cc, OnigCodePoint* code) /* only one char found in the bbuf, save the code point. */ c = data[0]; if (((c < SINGLE_BYTE_SIZE) && BITSET_AT(cc->bs, c))) { - /* skip if c is included in the bitset */ - c = not_found; + /* skip if c is included in the bitset */ + c = not_found; } } else { @@ -6284,10 +6284,10 @@ is_onechar_cclass(CClassNode* cc, OnigCodePoint* code) Bits b1 = cc->bs[i]; if (b1 != 0) { if (((b1 & (b1 - 1)) == 0) && (c == not_found)) { - c = BITS_IN_ROOM * i + countbits(b1 - 1); + c = BITS_IN_ROOM * i + countbits(b1 - 1); } else { - return 0; /* the character class contains multiple chars */ + return 0; /* the character class contains multiple chars */ } } } @@ -6304,7 +6304,7 @@ is_onechar_cclass(CClassNode* cc, OnigCodePoint* code) static int parse_exp(Node** np, OnigToken* tok, int term, - UChar** src, UChar* end, ScanEnv* env) + UChar** src, UChar* end, ScanEnv* env) { int r, len, group = 0; Node* qn; @@ -6333,14 +6333,14 @@ parse_exp(Node** np, OnigToken* tok, int term, env->option = NENCLOSE(*np)->option; r = fetch_token(tok, src, end, env); if (r < 0) { - env->option = prev; - return r; + env->option = prev; + return r; } r = parse_subexp(&target, tok, term, src, end, env); env->option = prev; if (r < 0) { - onig_node_free(target); - return r; + onig_node_free(target); + return r; } NENCLOSE(*np)->target = target; return tok->type; @@ -6378,20 +6378,20 @@ parse_exp(Node** np, OnigToken* tok, int term, string_loop: while (1) { - r = fetch_token(tok, src, end, env); - if (r < 0) return r; - if (r == TK_STRING) { - r = onig_node_str_cat(*np, tok->backp, *src); - } + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + if (r == TK_STRING) { + r = onig_node_str_cat(*np, tok->backp, *src); + } #ifndef NUMBERED_CHAR_IS_NOT_CASE_AMBIG - else if (r == TK_CODE_POINT) { - r = node_str_cat_codepoint(*np, env->enc, tok->u.code); - } + else if (r == TK_CODE_POINT) { + r = node_str_cat_codepoint(*np, env->enc, tok->u.code); + } #endif - else { - break; - } - if (r < 0) return r; + else { + break; + } + if (r < 0) return r; } string_end: @@ -6407,36 +6407,36 @@ parse_exp(Node** np, OnigToken* tok, int term, CHECK_NULL_RETURN_MEMERR(*np); len = 1; while (1) { - if (len >= ONIGENC_MBC_MINLEN(env->enc)) { - if (len == enclen(env->enc, NSTR(*np)->s, NSTR(*np)->end)) { - r = fetch_token(tok, src, end, env); - NSTRING_CLEAR_RAW(*np); - goto string_end; - } - } - - r = fetch_token(tok, src, end, env); - if (r < 0) return r; - if (r != TK_RAW_BYTE) { - /* Don't use this, it is wrong for little endian encodings. */ + if (len >= ONIGENC_MBC_MINLEN(env->enc)) { + if (len == enclen(env->enc, NSTR(*np)->s, NSTR(*np)->end)) { + r = fetch_token(tok, src, end, env); + NSTRING_CLEAR_RAW(*np); + goto string_end; + } + } + + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + if (r != TK_RAW_BYTE) { + /* Don't use this, it is wrong for little endian encodings. */ #ifdef USE_PAD_TO_SHORT_BYTE_CHAR - int rem; - if (len < ONIGENC_MBC_MINLEN(env->enc)) { - rem = ONIGENC_MBC_MINLEN(env->enc) - len; - (void )node_str_head_pad(NSTR(*np), rem, (UChar )0); - if (len + rem == enclen(env->enc, NSTR(*np)->s)) { - NSTRING_CLEAR_RAW(*np); - goto string_end; - } - } + int rem; + if (len < ONIGENC_MBC_MINLEN(env->enc)) { + rem = ONIGENC_MBC_MINLEN(env->enc) - len; + (void )node_str_head_pad(NSTR(*np), rem, (UChar )0); + if (len + rem == enclen(env->enc, NSTR(*np)->s)) { + NSTRING_CLEAR_RAW(*np); + goto string_end; + } + } #endif - return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; - } + return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; + } - r = node_str_cat_char(*np, (UChar )tok->u.c); - if (r < 0) return r; + r = node_str_cat_char(*np, (UChar )tok->u.c); + if (r < 0) return r; - len++; + len++; } } break; @@ -6465,7 +6465,7 @@ parse_exp(Node** np, OnigToken* tok, int term, qstart = *src; qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc); if (IS_NULL(qend)) { - nextp = qend = end; + nextp = qend = end; } *np = node_new_str(qstart, qend); CHECK_NULL_RETURN_MEMERR(*np); @@ -6477,30 +6477,30 @@ parse_exp(Node** np, OnigToken* tok, int term, { switch (tok->u.prop.ctype) { case ONIGENC_CTYPE_WORD: - *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not, - IS_ASCII_RANGE(env->option)); - CHECK_NULL_RETURN_MEMERR(*np); - break; + *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not, + IS_ASCII_RANGE(env->option)); + CHECK_NULL_RETURN_MEMERR(*np); + break; case ONIGENC_CTYPE_SPACE: case ONIGENC_CTYPE_DIGIT: case ONIGENC_CTYPE_XDIGIT: - { - CClassNode* cc; - - *np = node_new_cclass(); - CHECK_NULL_RETURN_MEMERR(*np); - cc = NCCLASS(*np); - r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0, - IS_ASCII_RANGE(env->option), env); - if (r != 0) return r; - if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); - } - break; + { + CClassNode* cc; + + *np = node_new_cclass(); + CHECK_NULL_RETURN_MEMERR(*np); + cc = NCCLASS(*np); + r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0, + IS_ASCII_RANGE(env->option), env); + if (r != 0) return r; + if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); + } + break; default: - return ONIGERR_PARSER_BUG; - break; + return ONIGERR_PARSER_BUG; + break; } } break; @@ -6518,26 +6518,26 @@ parse_exp(Node** np, OnigToken* tok, int term, r = parse_char_class(np, &asc_node, tok, src, end, env); if (r != 0) { - onig_node_free(asc_node); - return r; + onig_node_free(asc_node); + return r; } cc = NCCLASS(*np); if (is_onechar_cclass(cc, &code)) { - onig_node_free(*np); - onig_node_free(asc_node); - *np = node_new_empty(); - CHECK_NULL_RETURN_MEMERR(*np); - r = node_str_cat_codepoint(*np, env->enc, code); - if (r != 0) return r; - goto string_loop; + onig_node_free(*np); + onig_node_free(asc_node); + *np = node_new_empty(); + CHECK_NULL_RETURN_MEMERR(*np); + r = node_str_cat_codepoint(*np, env->enc, code); + if (r != 0) return r; + goto string_loop; } if (IS_IGNORECASE(env->option)) { - r = cclass_case_fold(np, cc, NCCLASS(asc_node), env); - if (r != 0) { - onig_node_free(asc_node); - return r; - } + r = cclass_case_fold(np, cc, NCCLASS(asc_node), env); + if (r != 0) { + onig_node_free(asc_node); + return r; + } } onig_node_free(asc_node); } @@ -6560,13 +6560,13 @@ parse_exp(Node** np, OnigToken* tok, int term, case TK_BACKREF: len = tok->u.backref.num; *np = node_new_backref(len, - (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), - tok->u.backref.by_name, + (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), + tok->u.backref.by_name, #ifdef USE_BACKREF_WITH_LEVEL - tok->u.backref.exist_level, - tok->u.backref.level, + tok->u.backref.exist_level, + tok->u.backref.level, #endif - env); + env); CHECK_NULL_RETURN_MEMERR(*np); break; @@ -6576,10 +6576,10 @@ parse_exp(Node** np, OnigToken* tok, int term, int gnum = tok->u.call.gnum; if (gnum < 0 || tok->u.call.rel != 0) { - if (gnum > 0) gnum--; - gnum = BACKREF_REL_TO_ABS(gnum, env); - if (gnum <= 0) - return ONIGERR_INVALID_BACKREF; + if (gnum > 0) gnum--; + gnum = BACKREF_REL_TO_ABS(gnum, env); + if (gnum <= 0) + return ONIGERR_INVALID_BACKREF; } *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum); CHECK_NULL_RETURN_MEMERR(*np); @@ -6598,9 +6598,9 @@ parse_exp(Node** np, OnigToken* tok, int term, case TK_INTERVAL: if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) { if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS)) - return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED; + return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED; else - *np = node_new_empty(); + *np = node_new_empty(); } else { goto tk_byte; @@ -6622,49 +6622,49 @@ parse_exp(Node** np, OnigToken* tok, int term, repeat: if (r == TK_OP_REPEAT || r == TK_INTERVAL) { if (is_invalid_quantifier_target(*targetp)) - return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID; + return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID; qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper, - (r == TK_INTERVAL ? 1 : 0)); + (r == TK_INTERVAL ? 1 : 0)); CHECK_NULL_RETURN_MEMERR(qn); NQTFR(qn)->greedy = tok->u.repeat.greedy; r = set_quantifier(qn, *targetp, group, env); if (r < 0) { - onig_node_free(qn); - return r; + onig_node_free(qn); + return r; } if (tok->u.repeat.possessive != 0) { - Node* en; - en = node_new_enclose(ENCLOSE_STOP_BACKTRACK); - if (IS_NULL(en)) { - onig_node_free(qn); - return ONIGERR_MEMORY; - } - NENCLOSE(en)->target = qn; - qn = en; + Node* en; + en = node_new_enclose(ENCLOSE_STOP_BACKTRACK); + if (IS_NULL(en)) { + onig_node_free(qn); + return ONIGERR_MEMORY; + } + NENCLOSE(en)->target = qn; + qn = en; } if (r == 0) { - *targetp = qn; + *targetp = qn; } else if (r == 1) { - onig_node_free(qn); + onig_node_free(qn); } else if (r == 2) { /* split case: /abc+/ */ - Node *tmp; - - *targetp = node_new_list(*targetp, NULL); - if (IS_NULL(*targetp)) { - onig_node_free(qn); - return ONIGERR_MEMORY; - } - tmp = NCDR(*targetp) = node_new_list(qn, NULL); - if (IS_NULL(tmp)) { - onig_node_free(qn); - return ONIGERR_MEMORY; - } - targetp = &(NCAR(tmp)); + Node *tmp; + + *targetp = node_new_list(*targetp, NULL); + if (IS_NULL(*targetp)) { + onig_node_free(qn); + return ONIGERR_MEMORY; + } + tmp = NCDR(*targetp) = node_new_list(qn, NULL); + if (IS_NULL(tmp)) { + onig_node_free(qn); + return ONIGERR_MEMORY; + } + targetp = &(NCAR(tmp)); } goto re_entry; } @@ -6675,7 +6675,7 @@ parse_exp(Node** np, OnigToken* tok, int term, static int parse_branch(Node** top, OnigToken* tok, int term, - UChar** src, UChar* end, ScanEnv* env) + UChar** src, UChar* end, ScanEnv* env) { int r; Node *node, **headp; @@ -6696,18 +6696,18 @@ parse_branch(Node** top, OnigToken* tok, int term, while (r != TK_EOT && r != term && r != TK_ALT) { r = parse_exp(&node, tok, term, src, end, env); if (r < 0) { - onig_node_free(node); - return r; + onig_node_free(node); + return r; } if (NTYPE(node) == NT_LIST) { - *headp = node; - while (IS_NOT_NULL(NCDR(node))) node = NCDR(node); - headp = &(NCDR(node)); + *headp = node; + while (IS_NOT_NULL(NCDR(node))) node = NCDR(node); + headp = &(NCDR(node)); } else { - *headp = node_new_list(node, NULL); - headp = &(NCDR(*headp)); + *headp = node_new_list(node, NULL); + headp = &(NCDR(*headp)); } } } @@ -6718,7 +6718,7 @@ parse_branch(Node** top, OnigToken* tok, int term, /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */ static int parse_subexp(Node** top, OnigToken* tok, int term, - UChar** src, UChar* end, ScanEnv* env) + UChar** src, UChar* end, ScanEnv* env) { int r; Node *node, *topnode, **headp; @@ -6742,13 +6742,13 @@ parse_subexp(Node** top, OnigToken* tok, int term, while (r == TK_ALT) { r = fetch_token(tok, src, end, env); if (r < 0) { - onig_node_free(topnode); - return r; + onig_node_free(topnode); + return r; } r = parse_branch(&node, tok, term, src, end, env); if (r < 0) { - onig_node_free(topnode); - return r; + onig_node_free(topnode); + return r; } *headp = onig_node_new_alt(node, NULL); @@ -6796,8 +6796,8 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) NENCLOSE(np)->target = *top; r = scan_env_set_mem_node(env, num, np); if (r != 0) { - onig_node_free(np); - return r; + onig_node_free(np); + return r; } *top = np; } @@ -6807,7 +6807,7 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) extern int onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, - regex_t* reg, ScanEnv* env) + regex_t* reg, ScanEnv* env) { int r; UChar* p; @@ -6834,7 +6834,7 @@ onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, extern void onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED, - UChar* arg, UChar* arg_end) + UChar* arg, UChar* arg_end) { env->error = arg; env->error_end = arg_end; diff --git a/regparse.h b/regparse.h index de980d0ac8cb1c..dd35d485255bad 100644 --- a/regparse.h +++ b/regparse.h @@ -69,8 +69,8 @@ RUBY_SYMBOL_EXPORT_BEGIN #define NTYPE(node) ((node)->u.base.type) #define SET_NTYPE(node, ntype) \ do { \ - int value = ntype; \ - memcpy(&((node)->u.base.type), &value, sizeof(int)); \ + int value = ntype; \ + memcpy(&((node)->u.base.type), &value, sizeof(int)); \ } while (0) #define NSTR(node) (&((node)->u.str)) diff --git a/ruby.c b/ruby.c index cc67b0b25db8f8..a01e3d8afa9524 100644 --- a/ruby.c +++ b/ruby.c @@ -1196,14 +1196,12 @@ setup_yjit_options(const char *s) #if USE_ZJIT static void -setup_zjit_options(ruby_cmdline_options_t *opt, const char *s) +setup_zjit_options(const char *s) { // The option parsing is done in zjit/src/options.rs - extern void *rb_zjit_init_options(void); - extern bool rb_zjit_parse_option(void *options, const char *s); + extern bool rb_zjit_parse_option(const char *s); - if (!opt->zjit) opt->zjit = rb_zjit_init_options(); - if (!rb_zjit_parse_option(opt->zjit, s)) { + if (!rb_zjit_parse_option(s)) { rb_raise(rb_eRuntimeError, "invalid ZJIT option '%s' (--help will show valid zjit options)", s); } } @@ -1481,7 +1479,7 @@ proc_long_options(ruby_cmdline_options_t *opt, const char *s, long argc, char ** else if (is_option_with_optarg("zjit", '-', true, false, false)) { #if USE_ZJIT FEATURE_SET(opt->features, FEATURE_BIT(zjit)); - setup_zjit_options(opt, s); + setup_zjit_options(s); #else rb_warn("Ruby was built without ZJIT support." " You may need to install rustc to build Ruby with ZJIT."); @@ -1828,8 +1826,8 @@ ruby_opt_init(ruby_cmdline_options_t *opt) #endif #if USE_ZJIT if (opt->zjit) { - extern void rb_zjit_init(void *options); - rb_zjit_init(opt->zjit); + extern void rb_zjit_init(void); + rb_zjit_init(); } #endif @@ -2370,8 +2368,9 @@ process_options(int argc, char **argv, ruby_cmdline_options_t *opt) #endif #if USE_ZJIT if (FEATURE_SET_P(opt->features, zjit) && !opt->zjit) { - extern void *rb_zjit_init_options(void); - opt->zjit = rb_zjit_init_options(); + extern void rb_zjit_prepare_options(void); + rb_zjit_prepare_options(); + opt->zjit = true; } #endif diff --git a/shape.c b/shape.c index e296ab2d8fa741..6e1b49352f922b 100644 --- a/shape.c +++ b/shape.c @@ -716,6 +716,7 @@ shape_transition_object_id(shape_id_t original_shape_id) rb_shape_t *shape = get_next_shape_internal(RSHAPE(original_shape_id), id_object_id, SHAPE_OBJ_ID, &dont_care, true); if (!shape) { shape = RSHAPE(ROOT_SHAPE_WITH_OBJ_ID); + return transition_complex(shape_id(shape, original_shape_id) | SHAPE_ID_FL_HAS_OBJECT_ID); } RUBY_ASSERT(shape); diff --git a/shape.h b/shape.h index a418dc78218693..2d13c9b762b615 100644 --- a/shape.h +++ b/shape.h @@ -271,11 +271,18 @@ rb_shape_root(size_t heap_id) } static inline shape_id_t -RSHAPE_PARENT(shape_id_t shape_id) +RSHAPE_PARENT_RAW_ID(shape_id_t shape_id) { return RSHAPE(shape_id)->parent_id; } +static inline bool +RSHAPE_DIRECT_CHILD_P(shape_id_t parent_id, shape_id_t child_id) +{ + return (parent_id & SHAPE_ID_FLAGS_MASK) == (child_id & SHAPE_ID_FLAGS_MASK) && + RSHAPE(child_id)->parent_id == (parent_id & SHAPE_ID_OFFSET_MASK); +} + static inline enum shape_type RSHAPE_TYPE(shape_id_t shape_id) { diff --git a/string.c b/string.c index 58fe632463c50f..68c4f5f1d7948c 100644 --- a/string.c +++ b/string.c @@ -4243,17 +4243,7 @@ rb_str_equal(VALUE str1, VALUE str2) * call-seq: * eql?(object) -> true or false * - * Returns +true+ if +object+ has the same length and content; - * as +self+; +false+ otherwise: - * - * s = 'foo' - * s.eql?('foo') # => true - * s.eql?('food') # => false - * s.eql?('FOO') # => false - * - * Returns +false+ if the two strings' encodings are not compatible: - * - * "\u{e4 f6 fc}".encode(Encoding::ISO_8859_1).eql?("\u{c4 d6 dc}") # => false + * :include: doc/string/eql_p.rdoc * */ @@ -6695,14 +6685,8 @@ rb_str_chr(VALUE str) * call-seq: * getbyte(index) -> integer or nil * - * Returns the byte at zero-based +index+ as an integer, or +nil+ if +index+ is out of range: - * - * s = 'abcde' # => "abcde" - * s.getbyte(0) # => 97 - * s.getbyte(-1) # => 101 - * s.getbyte(5) # => nil + * :include: doc/string/getbyte.rdoc * - * Related: String#setbyte. */ VALUE rb_str_getbyte(VALUE str, VALUE index) diff --git a/symbol.c b/symbol.c index 840bb6752332b5..43ab0ffa3271c5 100644 --- a/symbol.c +++ b/symbol.c @@ -93,7 +93,7 @@ enum id_entry_type { }; typedef struct { - rb_id_serial_t last_id; + rb_atomic_t next_id; VALUE sym_set; VALUE ids; @@ -212,31 +212,12 @@ rb_str_symname_type(VALUE name, unsigned int allowed_attrset) return type; } -static ID -next_id_base_with_lock(rb_symbols_t *symbols) -{ - ID id; - rb_id_serial_t next_serial = symbols->last_id + 1; - - if (next_serial == 0) { - id = (ID)-1; - } - else { - const size_t num = ++symbols->last_id; - id = num << ID_SCOPE_SHIFT; - } - - return id; -} - static ID next_id_base(void) { - ID id; - GLOBAL_SYMBOLS_LOCKING(symbols) { - id = next_id_base_with_lock(symbols); - } - return id; + rb_atomic_t serial = RUBY_ATOMIC_FETCH_ADD(ruby_global_symbols.next_id, 1); + + return (ID)serial << ID_SCOPE_SHIFT; } static void @@ -293,13 +274,7 @@ sym_set_create(VALUE sym, void *data) ID id = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN); if (id == (ID)-1) id = ID_INTERNAL; - ID nid = next_id_base(); - if (nid == (ID)-1) { - str = rb_str_ellipsize(str, 20); - rb_raise(rb_eRuntimeError, "symbol table overflow (symbol %"PRIsVALUE")", str); - } - - id |= nid; + id |= next_id_base(); id |= ID_STATIC_SYM; static_sym = STATIC_ID2SYM(id); @@ -729,7 +704,7 @@ get_id_serial_entry(rb_id_serial_t num, ID id, const enum id_entry_type t) VALUE result = 0; GLOBAL_SYMBOLS_LOCKING(symbols) { - if (num && num <= symbols->last_id) { + if (num && num < RUBY_ATOMIC_LOAD(symbols->next_id)) { size_t idx = num / ID_ENTRY_UNIT; VALUE ids = symbols->ids; VALUE ary; @@ -983,7 +958,7 @@ rb_sym2id(VALUE sym) if (UNLIKELY(!(id & ~ID_SCOPE_MASK))) { VALUE fstr = RSYMBOL(sym)->fstr; - ID num = next_id_base_with_lock(symbols); + ID num = next_id_base(); RSYMBOL(sym)->id = id |= num; /* make it permanent object */ @@ -1061,7 +1036,7 @@ rb_make_temporary_id(size_t n) { const ID max_id = RB_ID_SERIAL_MAX & ~0xffff; const ID id = max_id - (ID)n; - if (id <= ruby_global_symbols.last_id) { + if (id < RUBY_ATOMIC_LOAD(ruby_global_symbols.next_id)) { rb_raise(rb_eRuntimeError, "too big to make temporary ID: %" PRIdSIZE, n); } return (id << ID_SCOPE_SHIFT) | ID_STATIC_SYM | ID_INTERNAL; @@ -1102,7 +1077,7 @@ rb_sym_all_symbols(void) size_t rb_sym_immortal_count(void) { - return (size_t)ruby_global_symbols.last_id; + return (size_t)(RUBY_ATOMIC_LOAD(ruby_global_symbols.next_id) - 1); } int diff --git a/template/fake.rb.in b/template/fake.rb.in index b592fbd253a0e7..a02582a9dc012f 100644 --- a/template/fake.rb.in +++ b/template/fake.rb.in @@ -9,6 +9,7 @@ while /\A(\w+)=(.*)/ =~ ARGV[0] end if inc = arg['i'] src = inc == '-' ? STDIN.read : File.read(inc) + src.tr!("\r", " ") src.gsub!(/^#.*\n/, '') else src = "" diff --git a/test/.excludes-zjit/TestKeywordArguments.rb b/test/.excludes-zjit/TestKeywordArguments.rb deleted file mode 100644 index f52bdf6d30d6c6..00000000000000 --- a/test/.excludes-zjit/TestKeywordArguments.rb +++ /dev/null @@ -1 +0,0 @@ -exclude(/test_/, 'Multiple tests make ZJIT panic') diff --git a/test/openssl/test_pkcs7.rb b/test/openssl/test_pkcs7.rb index 5a52f4ce5f346b..85ee68c6d18fde 100644 --- a/test/openssl/test_pkcs7.rb +++ b/test/openssl/test_pkcs7.rb @@ -6,95 +6,125 @@ class OpenSSL::TestPKCS7 < OpenSSL::TestCase def setup super - @rsa1024 = Fixtures.pkey("rsa1024") - @rsa2048 = Fixtures.pkey("rsa2048") - ca = OpenSSL::X509::Name.parse("/DC=org/DC=ruby-lang/CN=CA") - ee1 = OpenSSL::X509::Name.parse("/DC=org/DC=ruby-lang/CN=EE1") - ee2 = OpenSSL::X509::Name.parse("/DC=org/DC=ruby-lang/CN=EE2") + @ca_key = Fixtures.pkey("rsa-1") + @ee1_key = Fixtures.pkey("rsa-2") + @ee2_key = Fixtures.pkey("rsa-3") + ca = OpenSSL::X509::Name.new([["CN", "CA"]]) + ee1 = OpenSSL::X509::Name.new([["CN", "EE1"]]) + ee2 = OpenSSL::X509::Name.new([["CN", "EE2"]]) ca_exts = [ - ["basicConstraints","CA:TRUE",true], - ["keyUsage","keyCertSign, cRLSign",true], - ["subjectKeyIdentifier","hash",false], - ["authorityKeyIdentifier","keyid:always",false], + ["basicConstraints", "CA:TRUE", true], + ["keyUsage", "keyCertSign, cRLSign", true], + ["subjectKeyIdentifier", "hash", false], + ["authorityKeyIdentifier", "keyid:always", false], ] - @ca_cert = issue_cert(ca, @rsa2048, 1, ca_exts, nil, nil) + @ca_cert = issue_cert(ca, @ca_key, 1, ca_exts, nil, nil) ee_exts = [ - ["keyUsage","Non Repudiation, Digital Signature, Key Encipherment",true], - ["authorityKeyIdentifier","keyid:always",false], - ["extendedKeyUsage","clientAuth, emailProtection, codeSigning",false], + ["keyUsage", "nonRepudiation, digitalSignature, keyEncipherment", true], + ["authorityKeyIdentifier", "keyid:always", false], + ["extendedKeyUsage", "clientAuth, emailProtection, codeSigning", false], ] - @ee1_cert = issue_cert(ee1, @rsa1024, 2, ee_exts, @ca_cert, @rsa2048) - @ee2_cert = issue_cert(ee2, @rsa1024, 3, ee_exts, @ca_cert, @rsa2048) + @ee1_cert = issue_cert(ee1, @ee1_key, 2, ee_exts, @ca_cert, @ca_key) + @ee2_cert = issue_cert(ee2, @ee2_key, 3, ee_exts, @ca_cert, @ca_key) end def test_signed store = OpenSSL::X509::Store.new store.add_cert(@ca_cert) + + data = "aaaaa\nbbbbb\nccccc\n" ca_certs = [@ca_cert] + tmp = OpenSSL::PKCS7.sign(@ee1_cert, @ee1_key, data, ca_certs) + # TODO: #data contains untranslated content + assert_equal("aaaaa\nbbbbb\nccccc\n", tmp.data) + assert_nil(tmp.error_string) - data = "aaaaa\r\nbbbbb\r\nccccc\r\n" - tmp = OpenSSL::PKCS7.sign(@ee1_cert, @rsa1024, data, ca_certs) p7 = OpenSSL::PKCS7.new(tmp.to_der) + assert_nil(p7.data) + assert_nil(p7.error_string) + + assert_true(p7.verify([], store)) + # AWS-LC does not appear to convert to CRLF automatically + assert_equal("aaaaa\r\nbbbbb\r\nccccc\r\n", p7.data) unless aws_lc? + assert_nil(p7.error_string) + certs = p7.certificates - signers = p7.signers - assert(p7.verify([], store)) - assert_equal(data, p7.data) assert_equal(2, certs.size) - assert_equal(@ee1_cert.subject.to_s, certs[0].subject.to_s) - assert_equal(@ca_cert.subject.to_s, certs[1].subject.to_s) + assert_equal(@ee1_cert.subject, certs[0].subject) + assert_equal(@ca_cert.subject, certs[1].subject) + + signers = p7.signers assert_equal(1, signers.size) assert_equal(@ee1_cert.serial, signers[0].serial) - assert_equal(@ee1_cert.issuer.to_s, signers[0].issuer.to_s) + assert_equal(@ee1_cert.issuer, signers[0].issuer) # AWS-LC does not generate authenticatedAttributes assert_in_delta(Time.now, signers[0].signed_time, 10) unless aws_lc? + assert_false(p7.verify([@ca_cert], OpenSSL::X509::Store.new)) + end + + def test_signed_flags + store = OpenSSL::X509::Store.new + store.add_cert(@ca_cert) + # Normally OpenSSL tries to translate the supplied content into canonical # MIME format (e.g. a newline character is converted into CR+LF). # If the content is a binary, PKCS7::BINARY flag should be used. - + # + # PKCS7::NOATTR flag suppresses authenticatedAttributes. data = "aaaaa\nbbbbb\nccccc\n" flag = OpenSSL::PKCS7::BINARY | OpenSSL::PKCS7::NOATTR - tmp = OpenSSL::PKCS7.sign(@ee1_cert, @rsa1024, data, ca_certs, flag) + tmp = OpenSSL::PKCS7.sign(@ee1_cert, @ee1_key, data, [@ca_cert], flag) p7 = OpenSSL::PKCS7.new(tmp.to_der) - certs = p7.certificates - signers = p7.signers - assert(p7.verify([], store)) + + assert_true(p7.verify([], store)) assert_equal(data, p7.data) + + certs = p7.certificates assert_equal(2, certs.size) - assert_equal(@ee1_cert.subject.to_s, certs[0].subject.to_s) - assert_equal(@ca_cert.subject.to_s, certs[1].subject.to_s) + assert_equal(@ee1_cert.subject, certs[0].subject) + assert_equal(@ca_cert.subject, certs[1].subject) + + signers = p7.signers assert_equal(1, signers.size) assert_equal(@ee1_cert.serial, signers[0].serial) - assert_equal(@ee1_cert.issuer.to_s, signers[0].issuer.to_s) + assert_equal(@ee1_cert.issuer, signers[0].issuer) assert_raise(OpenSSL::PKCS7::PKCS7Error) { signers[0].signed_time } + end + + def test_signed_multiple_signers + store = OpenSSL::X509::Store.new + store.add_cert(@ca_cert) # A signed-data which have multiple signatures can be created # through the following steps. # 1. create two signed-data # 2. copy signerInfo and certificate from one to another - - tmp1 = OpenSSL::PKCS7.sign(@ee1_cert, @rsa1024, data, [], flag) - tmp2 = OpenSSL::PKCS7.sign(@ee2_cert, @rsa1024, data, [], flag) + data = "aaaaa\r\nbbbbb\r\nccccc\r\n" + tmp1 = OpenSSL::PKCS7.sign(@ee1_cert, @ee1_key, data) + tmp2 = OpenSSL::PKCS7.sign(@ee2_cert, @ee2_key, data) tmp1.add_signer(tmp2.signers[0]) tmp1.add_certificate(@ee2_cert) p7 = OpenSSL::PKCS7.new(tmp1.to_der) - certs = p7.certificates - signers = p7.signers - assert(p7.verify([], store)) + assert_true(p7.verify([], store)) assert_equal(data, p7.data) + + certs = p7.certificates assert_equal(2, certs.size) + + signers = p7.signers assert_equal(2, signers.size) assert_equal(@ee1_cert.serial, signers[0].serial) - assert_equal(@ee1_cert.issuer.to_s, signers[0].issuer.to_s) + assert_equal(@ee1_cert.issuer, signers[0].issuer) assert_equal(@ee2_cert.serial, signers[1].serial) - assert_equal(@ee2_cert.issuer.to_s, signers[1].issuer.to_s) + assert_equal(@ee2_cert.issuer, signers[1].issuer) end def test_signed_add_signer data = "aaaaa\nbbbbb\nccccc\n" - psi = OpenSSL::PKCS7::SignerInfo.new(@ee1_cert, @rsa1024, "sha256") + psi = OpenSSL::PKCS7::SignerInfo.new(@ee1_cert, @ee1_key, "sha256") p7 = OpenSSL::PKCS7.new p7.type = :signed p7.add_signer(psi) @@ -113,27 +143,33 @@ def test_signed_add_signer def test_detached_sign store = OpenSSL::X509::Store.new store.add_cert(@ca_cert) - ca_certs = [@ca_cert] data = "aaaaa\nbbbbb\nccccc\n" + ca_certs = [@ca_cert] flag = OpenSSL::PKCS7::BINARY|OpenSSL::PKCS7::DETACHED - tmp = OpenSSL::PKCS7.sign(@ee1_cert, @rsa1024, data, ca_certs, flag) + tmp = OpenSSL::PKCS7.sign(@ee1_cert, @ee1_key, data, ca_certs, flag) p7 = OpenSSL::PKCS7.new(tmp.to_der) - assert_nothing_raised do - OpenSSL::ASN1.decode(p7) - end + assert_predicate(p7, :detached?) + assert_true(p7.detached) - certs = p7.certificates - signers = p7.signers - assert(!p7.verify([], store)) - assert(p7.verify([], store, data)) + assert_false(p7.verify([], store)) + # FIXME: Should it be nil? + assert_equal("", p7.data) + assert_match(/no content|NO_CONTENT/, p7.error_string) + + assert_true(p7.verify([], store, data)) assert_equal(data, p7.data) + assert_nil(p7.error_string) + + certs = p7.certificates assert_equal(2, certs.size) - assert_equal(@ee1_cert.subject.to_s, certs[0].subject.to_s) - assert_equal(@ca_cert.subject.to_s, certs[1].subject.to_s) + assert_equal(@ee1_cert.subject, certs[0].subject) + assert_equal(@ca_cert.subject, certs[1].subject) + + signers = p7.signers assert_equal(1, signers.size) assert_equal(@ee1_cert.serial, signers[0].serial) - assert_equal(@ee1_cert.issuer.to_s, signers[0].issuer.to_s) + assert_equal(@ee1_cert.issuer, signers[0].issuer) end def test_signed_authenticated_attributes @@ -181,6 +217,8 @@ def test_signed_authenticated_attributes end def test_enveloped + omit_on_fips # PKCS #1 v1.5 padding + certs = [@ee1_cert, @ee2_cert] cipher = OpenSSL::Cipher::AES.new("128-CBC") data = "aaaaa\nbbbbb\nccccc\n" @@ -191,15 +229,20 @@ def test_enveloped assert_equal(:enveloped, p7.type) assert_equal(2, recip.size) - assert_equal(@ca_cert.subject.to_s, recip[0].issuer.to_s) - assert_equal(2, recip[0].serial) - assert_equal(data, p7.decrypt(@rsa1024, @ee1_cert)) + assert_equal(@ca_cert.subject, recip[0].issuer) + assert_equal(@ee1_cert.serial, recip[0].serial) + assert_equal(16, @ee1_key.decrypt(recip[0].enc_key).size) + assert_equal(data, p7.decrypt(@ee1_key, @ee1_cert)) - assert_equal(@ca_cert.subject.to_s, recip[1].issuer.to_s) - assert_equal(3, recip[1].serial) - assert_equal(data, p7.decrypt(@rsa1024, @ee2_cert)) + assert_equal(@ca_cert.subject, recip[1].issuer) + assert_equal(@ee2_cert.serial, recip[1].serial) + assert_equal(data, p7.decrypt(@ee2_key, @ee2_cert)) - assert_equal(data, p7.decrypt(@rsa1024)) + assert_equal(data, p7.decrypt(@ee1_key)) + + assert_raise(OpenSSL::PKCS7::PKCS7Error) { + p7.decrypt(@ca_key, @ca_cert) + } # Default cipher has been removed in v3.3 assert_raise_with_message(ArgumentError, /RC2-40-CBC/) { @@ -207,6 +250,28 @@ def test_enveloped } end + def test_enveloped_add_recipient + omit_on_fips # PKCS #1 v1.5 padding + + data = "aaaaa\nbbbbb\nccccc\n" + ktri_ee1 = OpenSSL::PKCS7::RecipientInfo.new(@ee1_cert) + ktri_ee2 = OpenSSL::PKCS7::RecipientInfo.new(@ee2_cert) + + tmp = OpenSSL::PKCS7.new + tmp.type = :enveloped + tmp.cipher = "AES-128-CBC" + tmp.add_recipient(ktri_ee1) + tmp.add_recipient(ktri_ee2) + tmp.add_data(data) + + p7 = OpenSSL::PKCS7.new(tmp.to_der) + assert_equal(:enveloped, p7.type) + assert_equal(data, p7.decrypt(@ee1_key, @ee1_cert)) + assert_equal(data, p7.decrypt(@ee2_key, @ee2_cert)) + assert_equal([@ee1_cert.serial, @ee2_cert.serial].sort, + p7.recipients.map(&:serial).sort) + end + def test_data asn1 = OpenSSL::ASN1::Sequence([ OpenSSL::ASN1::ObjectId("pkcs7-data"), @@ -222,6 +287,7 @@ def test_data p7 = OpenSSL::PKCS7.new(asn1) assert_equal(:data, p7.type) + assert_equal(false, p7.detached) assert_equal(false, p7.detached?) # Not applicable assert_nil(p7.certificates) @@ -232,7 +298,8 @@ def test_data # PKCS7#verify can't distinguish verification failure and other errors store = OpenSSL::X509::Store.new assert_equal(false, p7.verify([@ee1_cert], store)) - assert_raise(OpenSSL::PKCS7::PKCS7Error) { p7.decrypt(@rsa1024) } + assert_match(/wrong content type|WRONG_CONTENT_TYPE/, p7.error_string) + assert_raise(OpenSSL::PKCS7::PKCS7Error) { p7.decrypt(@ee1_key) } end def test_empty_signed_data_ruby_bug_19974 @@ -273,12 +340,6 @@ def test_set_type_signed_and_enveloped assert_equal(:signedAndEnveloped, p7.type) end - def test_set_type_enveloped - p7 = OpenSSL::PKCS7.new - p7.type = "enveloped" - assert_equal(:enveloped, p7.type) - end - def test_set_type_encrypted p7 = OpenSSL::PKCS7.new p7.type = "encrypted" @@ -293,7 +354,7 @@ def test_smime ca_certs = [@ca_cert] data = "aaaaa\r\nbbbbb\r\nccccc\r\n" - tmp = OpenSSL::PKCS7.sign(@ee1_cert, @rsa1024, data, ca_certs) + tmp = OpenSSL::PKCS7.sign(@ee1_cert, @ee1_key, data, ca_certs) p7 = OpenSSL::PKCS7.new(tmp.to_der) smime = OpenSSL::PKCS7.write_smime(p7) assert_equal(true, smime.start_with?(< { it } diff --git a/test/prism/ruby/parser_test.rb b/test/prism/ruby/parser_test.rb index cd52758f2e51eb..2396f4186cec0e 100644 --- a/test/prism/ruby/parser_test.rb +++ b/test/prism/ruby/parser_test.rb @@ -181,9 +181,13 @@ def test_it_block_parameter_syntax actual_ast = Prism::Translation::Parser34.new.tokenize(buffer)[0] it_block_parameter_sexp = parse_sexp { + s(:begin, s(:itblock, s(:send, nil, :x), :it, - s(:lvar, :it)) + s(:lvar, :it)), + s(:itblock, + s(:lambda), :it, + s(:lvar, :it))) } assert_equal(it_block_parameter_sexp, actual_ast.to_sexp) diff --git a/test/prism/ruby/ruby_parser_test.rb b/test/prism/ruby/ruby_parser_test.rb index 03bcfafc42b767..960e7f63e46385 100644 --- a/test/prism/ruby/ruby_parser_test.rb +++ b/test/prism/ruby/ruby_parser_test.rb @@ -13,23 +13,11 @@ return end -# We want to also compare lines and files to make sure we're setting them -# correctly. -Sexp.prepend( - Module.new do - def ==(other) - super && line == other.line && file == other.file # && line_max == other.line_max - end - end -) - module Prism class RubyParserTest < TestCase todos = [ "encoding_euc_jp.txt", - "newline_terminated.txt", "regex_char_width.txt", - "seattlerb/bug169.txt", "seattlerb/masgn_colon3.txt", "seattlerb/messy_op_asgn_lineno.txt", "seattlerb/op_asgn_primary_colon_const_command_call.txt", @@ -37,15 +25,10 @@ class RubyParserTest < TestCase "seattlerb/str_lit_concat_bad_encodings.txt", "strings.txt", "unescaping.txt", - "unparser/corpus/literal/kwbegin.txt", - "unparser/corpus/literal/send.txt", "whitequark/masgn_const.txt", "whitequark/pattern_matching_constants.txt", - "whitequark/pattern_matching_implicit_array_match.txt", "whitequark/pattern_matching_single_match.txt", "whitequark/ruby_bug_12402.txt", - "whitequark/ruby_bug_14690.txt", - "whitequark/space_args_block.txt" ] # https://github.com/seattlerb/ruby_parser/issues/344 @@ -105,10 +88,16 @@ def assert_ruby_parser(fixture, allowed_failure) source = fixture.read expected = ignore_warnings { ::RubyParser.new.parse(source, fixture.path) } actual = Prism::Translation::RubyParser.new.parse(source, fixture.path) + on_failure = -> { message(expected, actual) } if !allowed_failure - assert_equal(expected, actual, -> { message(expected, actual) }) - elsif expected == actual + assert_equal(expected, actual, on_failure) + + unless actual.nil? + assert_equal(expected.line, actual.line, on_failure) + assert_equal(expected.file, actual.file, on_failure) + end + elsif expected == actual && expected.line && actual.line && expected.file == actual.file puts "#{name} now passes" end end diff --git a/test/ruby/test_marshal.rb b/test/ruby/test_marshal.rb index bfb4a9056eea0f..eb669948017d98 100644 --- a/test/ruby/test_marshal.rb +++ b/test/ruby/test_marshal.rb @@ -469,6 +469,30 @@ def test_marshal_private_class assert_equal(o1.foo, o2.foo) end + class TooComplex + def initialize + @marshal_too_complex = 1 + end + end + + def test_complex_shape_object_id_not_dumped + if defined?(RubyVM::Shape::SHAPE_MAX_VARIATIONS) + assert_equal 8, RubyVM::Shape::SHAPE_MAX_VARIATIONS + end + 8.times do |i| + TooComplex.new.instance_variable_set("@TestObjectIdTooComplex#{i}", 1) + end + obj = TooComplex.new + ivar = "@a#{rand(10_000).to_s.rjust(5, '0')}" + obj.instance_variable_set(ivar, 1) + + if defined?(RubyVM::Shape) + assert_predicate(RubyVM::Shape.of(obj), :too_complex?) + end + obj.object_id + assert_equal "\x04\bo:\x1CTestMarshal::TooComplex\a:\x19@marshal_too_complexi\x06:\f#{ivar}i\x06".b, Marshal.dump(obj) + end + def test_marshal_complex assert_raise(ArgumentError){Marshal.load("\x04\bU:\fComplex[\x05")} assert_raise(ArgumentError){Marshal.load("\x04\bU:\fComplex[\x06i\x00")} diff --git a/test/ruby/test_shapes.rb b/test/ruby/test_shapes.rb index a4cf23c6d5ae89..ed55b95c3edfde 100644 --- a/test/ruby/test_shapes.rb +++ b/test/ruby/test_shapes.rb @@ -655,6 +655,14 @@ class TooComplex end def test_object_id_transition_too_complex + assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}") + begin; + obj = Object.new + obj.instance_variable_set(:@a, 1) + RubyVM::Shape.exhaust_shapes + assert_equal obj.object_id, obj.object_id + end; + assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}") begin; class Hi; end diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index b78d53e682233c..6db57e18ba729c 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -826,6 +826,17 @@ def a(n1,n2,n3,n4,n5,n6,n7,n8) = [n8] } end + def test_forty_param_method + # This used to a trigger a miscomp on A64 due + # to a memory displacement larger than 9 bits. + assert_compiles '1', %Q{ + def foo(#{'_,' * 39} n40) = n40 + + foo(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1) + } + end + + def test_opt_aref_with assert_compiles ':ok', %q{ def aref_with(hash) = hash["key"] @@ -1084,6 +1095,21 @@ def test_require_rubygems_with_auto_compact }, call_threshold: 2 end + def test_stats + assert_runs 'true', %q{ + def test = 1 + test + RubyVM::ZJIT.stats[:zjit_insns_count] > 0 + }, stats: true + end + + def test_zjit_option_uses_array_each_in_ruby + omit 'ZJIT wrongly compiles Array#each, so it is disabled for now' + assert_runs '""', %q{ + Array.instance_method(:each).source_location&.first + } + end + def test_profile_under_nested_jit_call assert_compiles '[nil, nil, 3]', %q{ def profile @@ -1396,12 +1422,13 @@ def assert_runs(expected, test_script, insns: [], assert_compiles: false, **opts end # Run a Ruby process with ZJIT options and a pipe for writing test results - def eval_with_jit(script, call_threshold: 1, num_profiles: 1, timeout: 1000, pipe_fd:, debug: true) + def eval_with_jit(script, call_threshold: 1, num_profiles: 1, stats: false, debug: true, timeout: 1000, pipe_fd:) args = [ "--disable-gems", "--zjit-call-threshold=#{call_threshold}", "--zjit-num-profiles=#{num_profiles}", ] + args << "--zjit-stats" if stats args << "--zjit-debug" if debug args << "-e" << script_shell_encode(script) pipe_r, pipe_w = IO.pipe diff --git a/variable.c b/variable.c index 2bd9b3de4921ca..5ae2d3e3b01b3f 100644 --- a/variable.c +++ b/variable.c @@ -1774,7 +1774,9 @@ general_ivar_set(VALUE obj, ID id, VALUE val, void *data, shape_resize_fields_func(obj, RSHAPE_CAPACITY(current_shape_id), RSHAPE_CAPACITY(next_shape_id), data); } - RUBY_ASSERT(RSHAPE_TYPE_P(next_shape_id, SHAPE_IVAR)); + RUBY_ASSERT(RSHAPE_TYPE_P(next_shape_id, SHAPE_IVAR), + "next_shape_id: 0x%" PRIx32 " RSHAPE_TYPE(next_shape_id): %d", + next_shape_id, (int)RSHAPE_TYPE(next_shape_id)); RUBY_ASSERT(index == (RSHAPE_INDEX(next_shape_id))); set_shape_id_func(obj, next_shape_id, data); } @@ -2253,6 +2255,9 @@ each_hash_iv(st_data_t id, st_data_t val, st_data_t data) { struct iv_itr_data * itr_data = (struct iv_itr_data *)data; rb_ivar_foreach_callback_func *callback = itr_data->func; + if (is_internal_id((ID)id)) { + return ST_CONTINUE; + } return callback((ID)id, (VALUE)val, itr_data->arg); } diff --git a/vm.c b/vm.c index da5a51d25b73ba..9284a2ce69ffe3 100644 --- a/vm.c +++ b/vm.c @@ -607,7 +607,7 @@ rb_serial_t ruby_vm_global_cvar_state = 1; static const struct rb_callcache vm_empty_cc = { .flags = T_IMEMO | (imemo_callcache << FL_USHIFT) | VM_CALLCACHE_UNMARKABLE, - .klass = Qfalse, + .klass = Qundef, .cme_ = NULL, .call_ = vm_call_general, .aux_ = { @@ -617,7 +617,7 @@ static const struct rb_callcache vm_empty_cc = { static const struct rb_callcache vm_empty_cc_for_super = { .flags = T_IMEMO | (imemo_callcache << FL_USHIFT) | VM_CALLCACHE_UNMARKABLE, - .klass = Qfalse, + .klass = Qundef, .cme_ = NULL, .call_ = vm_call_super_method, .aux_ = { @@ -4509,14 +4509,21 @@ Init_vm_objects(void) vm->cc_refinement_table = rb_set_init_numtable(); } +#if USE_ZJIT +extern VALUE rb_zjit_option_enabled_p(rb_execution_context_t *ec, VALUE self); +#else +static VALUE rb_zjit_option_enabled_p(rb_execution_context_t *ec, VALUE self) { return Qfalse; } +#endif + +// Whether JIT is enabled or not, we need to load/undef `#with_jit` for other builtins. +#include "jit_hook.rbinc" +#include "jit_undef.rbinc" + // Stub for builtin function when not building YJIT units #if !USE_YJIT void Init_builtin_yjit(void) {} #endif -// Whether YJIT is enabled or not, we load yjit_hook.rb to remove Kernel#with_yjit. -#include "yjit_hook.rbinc" - // Stub for builtin function when not building ZJIT units #if !USE_ZJIT void Init_builtin_zjit(void) {} diff --git a/vm_callinfo.h b/vm_callinfo.h index 0ce25c2c0f89dc..3b6880e3205920 100644 --- a/vm_callinfo.h +++ b/vm_callinfo.h @@ -279,9 +279,7 @@ struct rb_callcache { const VALUE flags; /* inline cache: key */ - const VALUE klass; // should not mark it because klass can not be free'd - // because of this marking. When klass is collected, - // cc will be cleared (cc->klass = 0) at vm_ccs_free(). + const VALUE klass; // Weak reference. When klass is collected, `cc->klass = Qundef`. /* inline cache: values */ const struct rb_callable_method_entry_struct * const cme_; @@ -324,12 +322,24 @@ vm_cc_attr_index_initialize(const struct rb_callcache *cc, shape_id_t shape_id) vm_cc_attr_index_set(cc, (attr_index_t)-1, shape_id); } +static inline VALUE +cc_check_class(VALUE klass) +{ + VM_ASSERT(klass == Qundef || RB_TYPE_P(klass, T_CLASS) || RB_TYPE_P(klass, T_ICLASS)); + return klass; +} + +VALUE rb_vm_cc_table_create(size_t capa); +VALUE rb_vm_cc_table_dup(VALUE old_table); +void rb_vm_cc_table_delete(VALUE table, ID mid); + static inline const struct rb_callcache * vm_cc_new(VALUE klass, const struct rb_callable_method_entry_struct *cme, vm_call_handler call, enum vm_cc_type type) { + cc_check_class(klass); struct rb_callcache *cc = IMEMO_NEW(struct rb_callcache, imemo_callcache, klass); *((struct rb_callable_method_entry_struct **)&cc->cme_) = (struct rb_callable_method_entry_struct *)cme; *((vm_call_handler *)&cc->call_) = call; @@ -374,7 +384,7 @@ vm_cc_refinement_p(const struct rb_callcache *cc) (imemo_callcache << FL_USHIFT) | \ VM_CALLCACHE_UNMARKABLE | \ VM_CALLCACHE_ON_STACK, \ - .klass = clazz, \ + .klass = cc_check_class(clazz), \ .cme_ = cme, \ .call_ = call, \ .aux_ = aux, \ @@ -384,8 +394,7 @@ static inline bool vm_cc_class_check(const struct rb_callcache *cc, VALUE klass) { VM_ASSERT(IMEMO_TYPE_P(cc, imemo_callcache)); - VM_ASSERT(cc->klass == 0 || - RB_TYPE_P(cc->klass, T_CLASS) || RB_TYPE_P(cc->klass, T_ICLASS)); + VM_ASSERT(cc_check_class(cc->klass)); return cc->klass == klass; } @@ -396,6 +405,15 @@ vm_cc_markable(const struct rb_callcache *cc) return FL_TEST_RAW((VALUE)cc, VM_CALLCACHE_UNMARKABLE) == 0; } +static inline bool +vm_cc_valid(const struct rb_callcache *cc) +{ + VM_ASSERT(IMEMO_TYPE_P(cc, imemo_callcache)); + VM_ASSERT(cc_check_class(cc->klass)); + + return !UNDEF_P(cc->klass); +} + static inline const struct rb_callable_method_entry_struct * vm_cc_cme(const struct rb_callcache *cc) { @@ -447,7 +465,7 @@ vm_cc_cmethod_missing_reason(const struct rb_callcache *cc) static inline bool vm_cc_invalidated_p(const struct rb_callcache *cc) { - if (cc->klass && !METHOD_ENTRY_INVALIDATED(vm_cc_cme(cc))) { + if (vm_cc_valid(cc) && !METHOD_ENTRY_INVALIDATED(vm_cc_cme(cc))) { return false; } else { @@ -543,9 +561,9 @@ vm_cc_invalidate(const struct rb_callcache *cc) { VM_ASSERT(IMEMO_TYPE_P(cc, imemo_callcache)); VM_ASSERT(cc != vm_cc_empty()); - VM_ASSERT(cc->klass != 0); // should be enable + VM_ASSERT(cc->klass != Qundef); // should be enable - *(VALUE *)&cc->klass = 0; + *(VALUE *)&cc->klass = Qundef; RB_DEBUG_COUNTER_INC(cc_ent_invalidate); } @@ -567,9 +585,15 @@ struct rb_class_cc_entries { unsigned int argc; unsigned int flag; const struct rb_callcache *cc; - } *entries; + } entries[FLEX_ARY_LEN]; }; +static inline size_t +vm_ccs_alloc_size(size_t capa) +{ + return offsetof(struct rb_class_cc_entries, entries) + (sizeof(struct rb_class_cc_entries_entry) * capa); +} + #if VM_CHECK_MODE > 0 const rb_callable_method_entry_t *rb_vm_lookup_overloaded_cme(const rb_callable_method_entry_t *cme); @@ -584,11 +608,14 @@ vm_ccs_p(const struct rb_class_cc_entries *ccs) static inline bool vm_cc_check_cme(const struct rb_callcache *cc, const rb_callable_method_entry_t *cme) { - if (vm_cc_cme(cc) == cme || - (cme->def->iseq_overload && vm_cc_cme(cc) == rb_vm_lookup_overloaded_cme(cme))) { + bool valid; + RB_VM_LOCKING() { + valid = vm_cc_cme(cc) == cme || + (cme->def->iseq_overload && vm_cc_cme(cc) == rb_vm_lookup_overloaded_cme(cme)); + } + if (valid) { return true; } - else { #if 1 // debug print @@ -600,13 +627,9 @@ vm_cc_check_cme(const struct rb_callcache *cc, const rb_callable_method_entry_t rp(vm_cc_cme(cc)); rp(rb_vm_lookup_overloaded_cme(cme)); #endif - return false; - } + return false; } #endif -// gc.c -void rb_vm_ccs_free(struct rb_class_cc_entries *ccs); - #endif /* RUBY_VM_CALLINFO_H */ diff --git a/vm_eval.c b/vm_eval.c index dff0e47c7e1290..68b692ac9c4112 100644 --- a/vm_eval.c +++ b/vm_eval.c @@ -57,7 +57,7 @@ static inline VALUE vm_call0_cc(rb_execution_context_t *ec, VALUE recv, ID id, i VALUE rb_vm_call0(rb_execution_context_t *ec, VALUE recv, ID id, int argc, const VALUE *argv, const rb_callable_method_entry_t *cme, int kw_splat) { - const struct rb_callcache cc = VM_CC_ON_STACK(Qfalse, vm_call_general, {{ 0 }}, cme); + const struct rb_callcache cc = VM_CC_ON_STACK(Qundef, vm_call_general, {{ 0 }}, cme); return vm_call0_cc(ec, recv, id, argc, argv, &cc, kw_splat); } @@ -104,7 +104,7 @@ vm_call0_cc(rb_execution_context_t *ec, VALUE recv, ID id, int argc, const VALUE static VALUE vm_call0_cme(rb_execution_context_t *ec, struct rb_calling_info *calling, const VALUE *argv, const rb_callable_method_entry_t *cme) { - calling->cc = &VM_CC_ON_STACK(Qfalse, vm_call_general, {{ 0 }}, cme); + calling->cc = &VM_CC_ON_STACK(Qundef, vm_call_general, {{ 0 }}, cme); return vm_call0_body(ec, calling, argv); } diff --git a/vm_insnhelper.c b/vm_insnhelper.c index d5eb84e691d4b5..7842d7657aeda5 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -1473,7 +1473,7 @@ vm_setivar_default(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_i RUBY_ASSERT(dest_shape_id != INVALID_SHAPE_ID && shape_id != INVALID_SHAPE_ID); } else if (dest_shape_id != INVALID_SHAPE_ID) { - if (shape_id == RSHAPE_PARENT(dest_shape_id) && RSHAPE_EDGE_NAME(dest_shape_id) == id && RSHAPE_CAPACITY(shape_id) == RSHAPE_CAPACITY(dest_shape_id)) { + if (RSHAPE_DIRECT_CHILD_P(shape_id, dest_shape_id) && RSHAPE_EDGE_NAME(dest_shape_id) == id && RSHAPE_CAPACITY(shape_id) == RSHAPE_CAPACITY(dest_shape_id)) { RUBY_ASSERT(index < RSHAPE_CAPACITY(dest_shape_id)); } else { @@ -1514,14 +1514,11 @@ vm_setivar(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t i VM_ASSERT(!rb_ractor_shareable_p(obj)); } else if (dest_shape_id != INVALID_SHAPE_ID) { - shape_id_t source_shape_id = RSHAPE_PARENT(dest_shape_id); - - if (shape_id == source_shape_id && RSHAPE_EDGE_NAME(dest_shape_id) == id && RSHAPE_CAPACITY(shape_id) == RSHAPE_CAPACITY(dest_shape_id)) { + if (RSHAPE_DIRECT_CHILD_P(shape_id, dest_shape_id) && RSHAPE_EDGE_NAME(dest_shape_id) == id && RSHAPE_CAPACITY(shape_id) == RSHAPE_CAPACITY(dest_shape_id)) { RUBY_ASSERT(dest_shape_id != INVALID_SHAPE_ID && shape_id != INVALID_SHAPE_ID); RBASIC_SET_SHAPE_ID(obj, dest_shape_id); - RUBY_ASSERT(rb_shape_get_next_iv_shape(source_shape_id, id) == dest_shape_id); RUBY_ASSERT(index < RSHAPE_CAPACITY(dest_shape_id)); } else { @@ -1982,46 +1979,46 @@ static VALUE vm_call_general(rb_execution_context_t *ec, rb_control_frame_t *reg static VALUE vm_mtbl_dump(VALUE klass, ID target_mid); static struct rb_class_cc_entries * -vm_ccs_create(VALUE klass, struct rb_id_table *cc_tbl, ID mid, const rb_callable_method_entry_t *cme) +vm_ccs_create(VALUE klass, VALUE cc_tbl, ID mid, const rb_callable_method_entry_t *cme) { - struct rb_class_cc_entries *ccs = ALLOC(struct rb_class_cc_entries); + int initial_capa = 2; + struct rb_class_cc_entries *ccs = ruby_xmalloc(vm_ccs_alloc_size(initial_capa)); #if VM_CHECK_MODE > 0 ccs->debug_sig = ~(VALUE)ccs; #endif - ccs->capa = 0; + ccs->capa = initial_capa; ccs->len = 0; ccs->cme = cme; METHOD_ENTRY_CACHED_SET((rb_callable_method_entry_t *)cme); - ccs->entries = NULL; - rb_id_table_insert(cc_tbl, mid, (VALUE)ccs); - RB_OBJ_WRITTEN(klass, Qundef, cme); + rb_managed_id_table_insert(cc_tbl, mid, (VALUE)ccs); + RB_OBJ_WRITTEN(cc_tbl, Qundef, cme); return ccs; } static void -vm_ccs_push(VALUE klass, struct rb_class_cc_entries *ccs, const struct rb_callinfo *ci, const struct rb_callcache *cc) +vm_ccs_push(VALUE cc_tbl, ID mid, struct rb_class_cc_entries *ccs, const struct rb_callinfo *ci, const struct rb_callcache *cc) { if (! vm_cc_markable(cc)) { return; } if (UNLIKELY(ccs->len == ccs->capa)) { - if (ccs->capa == 0) { - ccs->capa = 1; - ccs->entries = ALLOC_N(struct rb_class_cc_entries_entry, ccs->capa); - } - else { - ccs->capa *= 2; - REALLOC_N(ccs->entries, struct rb_class_cc_entries_entry, ccs->capa); - } + RUBY_ASSERT(ccs->capa > 0); + ccs->capa *= 2; + ccs = ruby_xrealloc(ccs, vm_ccs_alloc_size(ccs->capa)); +#if VM_CHECK_MODE > 0 + ccs->debug_sig = ~(VALUE)ccs; +#endif + // GC? + rb_managed_id_table_insert(cc_tbl, mid, (VALUE)ccs); } VM_ASSERT(ccs->len < ccs->capa); const int pos = ccs->len++; ccs->entries[pos].argc = vm_ci_argc(ci); ccs->entries[pos].flag = vm_ci_flag(ci); - RB_OBJ_WRITE(klass, &ccs->entries[pos].cc, cc); + RB_OBJ_WRITE(cc_tbl, &ccs->entries[pos].cc, cc); if (RB_DEBUG_COUNTER_SETMAX(ccs_maxlen, ccs->len)) { // for tuning @@ -2063,58 +2060,49 @@ vm_ccs_verify(struct rb_class_cc_entries *ccs, ID mid, VALUE klass) const rb_callable_method_entry_t *rb_check_overloaded_cme(const rb_callable_method_entry_t *cme, const struct rb_callinfo * const ci); -static const struct rb_callcache * -vm_search_cc(const VALUE klass, const struct rb_callinfo * const ci) +static void +vm_evict_cc(VALUE klass, VALUE cc_tbl, ID mid) { - const ID mid = vm_ci_mid(ci); - struct rb_id_table *cc_tbl = RCLASS_WRITABLE_CC_TBL(klass); - struct rb_class_cc_entries *ccs = NULL; - VALUE ccs_data; - - if (cc_tbl) { - // CCS data is keyed on method id, so we don't need the method id - // for doing comparisons in the `for` loop below. - if (rb_id_table_lookup(cc_tbl, mid, &ccs_data)) { - ccs = (struct rb_class_cc_entries *)ccs_data; - const int ccs_len = ccs->len; + ASSERT_vm_locking(); - if (UNLIKELY(METHOD_ENTRY_INVALIDATED(ccs->cme))) { - rb_vm_ccs_free(ccs); - rb_id_table_delete(cc_tbl, mid); - ccs = NULL; - } - else { - VM_ASSERT(vm_ccs_verify(ccs, mid, klass)); + if (rb_multi_ractor_p()) { + if (RCLASS_WRITABLE_CC_TBL(klass) != cc_tbl) { + // Another ractor updated the CC table while we were waiting on the VM lock. + // We have to retry. + return; + } - // We already know the method id is correct because we had - // to look up the ccs_data by method id. All we need to - // compare is argc and flag - unsigned int argc = vm_ci_argc(ci); - unsigned int flag = vm_ci_flag(ci); + struct rb_class_cc_entries *ccs = NULL; + rb_managed_id_table_lookup(cc_tbl, mid, (VALUE *)&ccs); - for (int i=0; ientries[i].argc; - unsigned int ccs_ci_flag = ccs->entries[i].flag; - const struct rb_callcache *ccs_cc = ccs->entries[i].cc; + if (!ccs || !METHOD_ENTRY_INVALIDATED(ccs->cme)) { + // Another ractor replaced that entry while we were waiting on the VM lock. + return; + } - VM_ASSERT(IMEMO_TYPE_P(ccs_cc, imemo_callcache)); + VALUE new_table = rb_vm_cc_table_dup(cc_tbl); + rb_vm_cc_table_delete(new_table, mid); + RB_OBJ_ATOMIC_WRITE(klass, &RCLASS_WRITABLE_CC_TBL(klass), new_table); + } + else { + rb_vm_cc_table_delete(cc_tbl, mid); + } +} - if (ccs_ci_argc == argc && ccs_ci_flag == flag) { - RB_DEBUG_COUNTER_INC(cc_found_in_ccs); +static const struct rb_callcache * +vm_populate_cc(VALUE klass, const struct rb_callinfo * const ci, ID mid) +{ + ASSERT_vm_locking(); - VM_ASSERT(vm_cc_cme(ccs_cc)->called_id == mid); - VM_ASSERT(ccs_cc->klass == klass); - VM_ASSERT(!METHOD_ENTRY_INVALIDATED(vm_cc_cme(ccs_cc))); + VALUE cc_tbl = RCLASS_WRITABLE_CC_TBL(klass); + const VALUE original_cc_table = cc_tbl; + struct rb_class_cc_entries *ccs = NULL; - return ccs_cc; - } - } - } - } + if (!cc_tbl) { + cc_tbl = rb_vm_cc_table_create(1); } - else { - cc_tbl = rb_id_table_create(2); - RCLASS_WRITE_CC_TBL(klass, cc_tbl); + else if (rb_multi_ractor_p()) { + cc_tbl = rb_vm_cc_table_dup(cc_tbl); } RB_DEBUG_COUNTER_INC(cc_not_found_in_ccs); @@ -2144,13 +2132,9 @@ vm_search_cc(const VALUE klass, const struct rb_callinfo * const ci) METHOD_ENTRY_CACHED_SET((struct rb_callable_method_entry_struct *)cme); if (ccs == NULL) { - VM_ASSERT(cc_tbl != NULL); + VM_ASSERT(cc_tbl); - if (LIKELY(rb_id_table_lookup(cc_tbl, mid, &ccs_data))) { - // rb_callable_method_entry() prepares ccs. - ccs = (struct rb_class_cc_entries *)ccs_data; - } - else { + if (!LIKELY(rb_managed_id_table_lookup(cc_tbl, mid, (VALUE *)&ccs))) { // TODO: required? ccs = vm_ccs_create(klass, cc_tbl, mid, cme); } @@ -2159,12 +2143,97 @@ vm_search_cc(const VALUE klass, const struct rb_callinfo * const ci) cme = rb_check_overloaded_cme(cme, ci); const struct rb_callcache *cc = vm_cc_new(klass, cme, vm_call_general, cc_type_normal); - vm_ccs_push(klass, ccs, ci, cc); + vm_ccs_push(cc_tbl, mid, ccs, ci, cc); VM_ASSERT(vm_cc_cme(cc) != NULL); VM_ASSERT(cme->called_id == mid); VM_ASSERT(vm_cc_cme(cc)->called_id == mid); + if (original_cc_table != cc_tbl) { + RB_OBJ_ATOMIC_WRITE(klass, &RCLASS_WRITABLE_CC_TBL(klass), cc_tbl); + } + + return cc; +} + +static const struct rb_callcache * +vm_lookup_cc(const VALUE klass, const struct rb_callinfo * const ci, ID mid) +{ + VALUE cc_tbl; + struct rb_class_cc_entries *ccs; +retry: + cc_tbl = RUBY_ATOMIC_VALUE_LOAD(RCLASS_WRITABLE_CC_TBL(klass)); + ccs = NULL; + + if (cc_tbl) { + // CCS data is keyed on method id, so we don't need the method id + // for doing comparisons in the `for` loop below. + + if (rb_managed_id_table_lookup(cc_tbl, mid, (VALUE *)&ccs)) { + const int ccs_len = ccs->len; + + if (UNLIKELY(METHOD_ENTRY_INVALIDATED(ccs->cme))) { + RB_VM_LOCKING() { + vm_evict_cc(klass, cc_tbl, mid); + } + goto retry; + } + else { + VM_ASSERT(vm_ccs_verify(ccs, mid, klass)); + + // We already know the method id is correct because we had + // to look up the ccs_data by method id. All we need to + // compare is argc and flag + unsigned int argc = vm_ci_argc(ci); + unsigned int flag = vm_ci_flag(ci); + + for (int i=0; ientries[i].argc; + unsigned int ccs_ci_flag = ccs->entries[i].flag; + const struct rb_callcache *ccs_cc = ccs->entries[i].cc; + + VM_ASSERT(IMEMO_TYPE_P(ccs_cc, imemo_callcache)); + + if (ccs_ci_argc == argc && ccs_ci_flag == flag) { + RB_DEBUG_COUNTER_INC(cc_found_in_ccs); + + VM_ASSERT(vm_cc_cme(ccs_cc)->called_id == mid); + VM_ASSERT(ccs_cc->klass == klass); + VM_ASSERT(!METHOD_ENTRY_INVALIDATED(vm_cc_cme(ccs_cc))); + + return ccs_cc; + } + } + } + } + } + + RB_GC_GUARD(cc_tbl); + return NULL; +} + +static const struct rb_callcache * +vm_search_cc(const VALUE klass, const struct rb_callinfo * const ci) +{ + const ID mid = vm_ci_mid(ci); + + const struct rb_callcache *cc = vm_lookup_cc(klass, ci, mid); + if (cc) { + return cc; + } + + RB_VM_LOCKING() { + if (rb_multi_ractor_p()) { + // The CC may have been populated by another ractor while we were waiting on the lock, + // so we must lookup a second time. + cc = vm_lookup_cc(klass, ci, mid); + } + + if (!cc) { + cc = vm_populate_cc(klass, ci, mid); + } + } + return cc; } @@ -2175,16 +2244,14 @@ rb_vm_search_method_slowpath(const struct rb_callinfo *ci, VALUE klass) VM_ASSERT_TYPE2(klass, T_CLASS, T_ICLASS); - RB_VM_LOCKING() { - cc = vm_search_cc(klass, ci); + cc = vm_search_cc(klass, ci); - VM_ASSERT(cc); - VM_ASSERT(IMEMO_TYPE_P(cc, imemo_callcache)); - VM_ASSERT(cc == vm_cc_empty() || cc->klass == klass); - VM_ASSERT(cc == vm_cc_empty() || callable_method_entry_p(vm_cc_cme(cc))); - VM_ASSERT(cc == vm_cc_empty() || !METHOD_ENTRY_INVALIDATED(vm_cc_cme(cc))); - VM_ASSERT(cc == vm_cc_empty() || vm_cc_cme(cc)->called_id == vm_ci_mid(ci)); - } + VM_ASSERT(cc); + VM_ASSERT(IMEMO_TYPE_P(cc, imemo_callcache)); + VM_ASSERT(cc == vm_cc_empty() || cc->klass == klass); + VM_ASSERT(cc == vm_cc_empty() || callable_method_entry_p(vm_cc_cme(cc))); + VM_ASSERT(cc == vm_cc_empty() || !METHOD_ENTRY_INVALIDATED(vm_cc_cme(cc))); + VM_ASSERT(cc == vm_cc_empty() || vm_cc_cme(cc)->called_id == vm_ci_mid(ci)); return cc; } diff --git a/vm_method.c b/vm_method.c index fa81d56c74119d..76b1c97d046c2b 100644 --- a/vm_method.c +++ b/vm_method.c @@ -22,6 +22,175 @@ static inline rb_method_entry_t *lookup_method_table(VALUE klass, ID id); #define ruby_running (GET_VM()->running) /* int ruby_running = 0; */ +static enum rb_id_table_iterator_result +mark_cc_entry_i(VALUE ccs_ptr, void *data) +{ + struct rb_class_cc_entries *ccs = (struct rb_class_cc_entries *)ccs_ptr; + + VM_ASSERT(vm_ccs_p(ccs)); + + if (METHOD_ENTRY_INVALIDATED(ccs->cme)) { + ruby_xfree(ccs); + return ID_TABLE_DELETE; + } + else { + rb_gc_mark_movable((VALUE)ccs->cme); + + for (int i=0; ilen; i++) { + VM_ASSERT(vm_cc_check_cme(ccs->entries[i].cc, ccs->cme)); + + rb_gc_mark_movable((VALUE)ccs->entries[i].cc); + } + return ID_TABLE_CONTINUE; + } +} + +static void +vm_cc_table_mark(void *data) +{ + struct rb_id_table *tbl = (struct rb_id_table *)data; + if (tbl) { + rb_id_table_foreach_values(tbl, mark_cc_entry_i, NULL); + } +} + +static enum rb_id_table_iterator_result +cc_table_free_i(VALUE ccs_ptr, void *data) +{ + struct rb_class_cc_entries *ccs = (struct rb_class_cc_entries *)ccs_ptr; + VM_ASSERT(vm_ccs_p(ccs)); + + ruby_xfree(ccs); + + return ID_TABLE_CONTINUE; +} + +static void +vm_cc_table_free(void *data) +{ + struct rb_id_table *tbl = (struct rb_id_table *)data; + + rb_id_table_foreach_values(tbl, cc_table_free_i, NULL); + rb_managed_id_table_type.function.dfree(data); +} + +static enum rb_id_table_iterator_result +cc_table_memsize_i(VALUE ccs_ptr, void *data_ptr) +{ + size_t *total_size = data_ptr; + struct rb_class_cc_entries *ccs = (struct rb_class_cc_entries *)ccs_ptr; + *total_size += sizeof(*ccs); + *total_size += sizeof(ccs->entries[0]) * ccs->capa; + return ID_TABLE_CONTINUE; +} + +static size_t +vm_cc_table_memsize(const void *data) +{ + size_t memsize = rb_managed_id_table_type.function.dsize(data); + struct rb_id_table *tbl = (struct rb_id_table *)data; + rb_id_table_foreach_values(tbl, cc_table_memsize_i, &memsize); + return memsize; +} + +static enum rb_id_table_iterator_result +compact_cc_entry_i(VALUE ccs_ptr, void *data) +{ + struct rb_class_cc_entries *ccs = (struct rb_class_cc_entries *)ccs_ptr; + + ccs->cme = (const struct rb_callable_method_entry_struct *)rb_gc_location((VALUE)ccs->cme); + VM_ASSERT(vm_ccs_p(ccs)); + + for (int i=0; ilen; i++) { + ccs->entries[i].cc = (const struct rb_callcache *)rb_gc_location((VALUE)ccs->entries[i].cc); + } + + return ID_TABLE_CONTINUE; +} + +static void +vm_cc_table_compact(void *data) +{ + struct rb_id_table *tbl = (struct rb_id_table *)data; + rb_id_table_foreach_values(tbl, compact_cc_entry_i, NULL); +} + +static const rb_data_type_t cc_table_type = { + .wrap_struct_name = "VM/cc_table", + .function = { + .dmark = vm_cc_table_mark, + .dfree = vm_cc_table_free, + .dsize = vm_cc_table_memsize, + .dcompact = vm_cc_table_compact, + }, + .parent = &rb_managed_id_table_type, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE, +}; + +VALUE +rb_vm_cc_table_create(size_t capa) +{ + return rb_managed_id_table_create(&cc_table_type, capa); +} + +static enum rb_id_table_iterator_result +vm_cc_table_dup_i(ID key, VALUE old_ccs_ptr, void *data) +{ + VALUE new_table = (VALUE)data; + struct rb_class_cc_entries *old_ccs = (struct rb_class_cc_entries *)old_ccs_ptr; + size_t memsize = vm_ccs_alloc_size(old_ccs->capa); + struct rb_class_cc_entries *new_ccs = ruby_xcalloc(1, memsize); + rb_managed_id_table_insert(new_table, key, (VALUE)new_ccs); + + memcpy(new_ccs, old_ccs, memsize); + +#if VM_CHECK_MODE > 0 + new_ccs->debug_sig = ~(VALUE)new_ccs; +#endif + + RB_OBJ_WRITTEN(new_table, Qundef, (VALUE)new_ccs->cme); + for (int index = 0; index < new_ccs->len; index++) { + RB_OBJ_WRITTEN(new_table, Qundef, new_ccs->entries[index].cc); + } + return ID_TABLE_CONTINUE; +} + +VALUE +rb_vm_cc_table_dup(VALUE old_table) +{ + VALUE new_table = rb_vm_cc_table_create(rb_managed_id_table_size(old_table)); + rb_managed_id_table_foreach(old_table, vm_cc_table_dup_i, (void *)new_table); + return new_table; +} + +static void +vm_ccs_invalidate(struct rb_class_cc_entries *ccs) +{ + for (int i=0; ilen; i++) { + const struct rb_callcache *cc = ccs->entries[i].cc; + VM_ASSERT(!vm_cc_super_p(cc) && !vm_cc_refinement_p(cc)); + vm_cc_invalidate(cc); + } +} + +void +rb_vm_ccs_invalidate_and_free(struct rb_class_cc_entries *ccs) +{ + RB_DEBUG_COUNTER_INC(ccs_free); + vm_ccs_invalidate(ccs); + ruby_xfree(ccs); +} + +void +rb_vm_cc_table_delete(VALUE table, ID mid) +{ + struct rb_class_cc_entries *ccs; + if (rb_managed_id_table_lookup(table, mid, (VALUE *)&ccs)) { + rb_managed_id_table_delete(table, mid); + rb_vm_ccs_invalidate_and_free(ccs); + } +} + static enum rb_id_table_iterator_result vm_ccs_dump_i(ID mid, VALUE val, void *data) { @@ -39,18 +208,18 @@ vm_ccs_dump_i(ID mid, VALUE val, void *data) static void vm_ccs_dump(VALUE klass, ID target_mid) { - struct rb_id_table *cc_tbl = RCLASS_WRITABLE_CC_TBL(klass); + VALUE cc_tbl = RCLASS_WRITABLE_CC_TBL(klass); if (cc_tbl) { VALUE ccs; if (target_mid) { - if (rb_id_table_lookup(cc_tbl, target_mid, &ccs)) { + if (rb_managed_id_table_lookup(cc_tbl, target_mid, &ccs)) { fprintf(stderr, " [CCTB] %p\n", (void *)cc_tbl); vm_ccs_dump_i(target_mid, ccs, NULL); } } else { fprintf(stderr, " [CCTB] %p\n", (void *)cc_tbl); - rb_id_table_foreach(cc_tbl, vm_ccs_dump_i, (void *)target_mid); + rb_managed_id_table_foreach(cc_tbl, vm_ccs_dump_i, (void *)target_mid); } } } @@ -169,15 +338,15 @@ static const rb_callable_method_entry_t *complemented_callable_method_entry(VALU static const rb_callable_method_entry_t *lookup_overloaded_cme(const rb_callable_method_entry_t *cme); static void -invalidate_method_cache_in_cc_table(struct rb_id_table *tbl, ID mid) +invalidate_method_cache_in_cc_table(VALUE tbl, ID mid) { VALUE ccs_data; - if (tbl && rb_id_table_lookup(tbl, mid, &ccs_data)) { + if (tbl && rb_managed_id_table_lookup(tbl, mid, &ccs_data)) { struct rb_class_cc_entries *ccs = (struct rb_class_cc_entries *)ccs_data; rb_yjit_cme_invalidate((rb_callable_method_entry_t *)ccs->cme); if (NIL_P(ccs->cme->owner)) invalidate_negative_cache(mid); - rb_vm_ccs_free(ccs); - rb_id_table_delete(tbl, mid); + rb_vm_ccs_invalidate_and_free(ccs); + rb_managed_id_table_delete(tbl, mid); RB_DEBUG_COUNTER_INC(cc_invalidate_leaf_ccs); } } @@ -253,7 +422,7 @@ clear_method_cache_by_id_in_class(VALUE klass, ID mid) // check only current class // invalidate CCs - struct rb_id_table *cc_tbl = RCLASS_WRITABLE_CC_TBL(klass); + VALUE cc_tbl = RCLASS_WRITABLE_CC_TBL(klass); invalidate_method_cache_in_cc_table(cc_tbl, mid); if (RCLASS_CC_TBL_NOT_PRIME_P(klass, cc_tbl)) { invalidate_method_cache_in_cc_table(RCLASS_PRIME_CC_TBL(klass), mid); @@ -385,13 +554,13 @@ invalidate_ccs_in_iclass_cc_tbl(VALUE value, void *data) } void -rb_invalidate_method_caches(struct rb_id_table *cm_tbl, struct rb_id_table *cc_tbl) +rb_invalidate_method_caches(struct rb_id_table *cm_tbl, VALUE cc_tbl) { if (cm_tbl) { rb_id_table_foreach_values(cm_tbl, invalidate_method_entry_in_iclass_callable_m_tbl, NULL); } if (cc_tbl) { - rb_id_table_foreach_values(cc_tbl, invalidate_ccs_in_iclass_cc_tbl, NULL); + rb_managed_id_table_foreach_values(cc_tbl, invalidate_ccs_in_iclass_cc_tbl, NULL); } } @@ -409,7 +578,7 @@ invalidate_cc_refinement(st_data_t key, st_data_t data) VM_ASSERT(vm_cc_refinement_p(cc)); - if (cc->klass) { + if (vm_cc_valid(cc)) { vm_cc_invalidate(cc); } } @@ -775,7 +944,7 @@ rb_method_definition_set(const rb_method_entry_t *me, rb_method_definition_t *de /* setup iseq first (before invoking GC) */ RB_OBJ_WRITE(me, &def->body.iseq.iseqptr, iseq); - // Methods defined in `with_yjit` should be considered METHOD_ENTRY_BASIC + // Methods defined in `with_jit` should be considered METHOD_ENTRY_BASIC if (rb_iseq_attr_p(iseq, BUILTIN_ATTR_C_TRACE)) { METHOD_ENTRY_BASIC_SET((rb_method_entry_t *)me, TRUE); } @@ -1559,10 +1728,10 @@ cached_callable_method_entry(VALUE klass, ID mid) { ASSERT_vm_locking(); - struct rb_id_table *cc_tbl = RCLASS_WRITABLE_CC_TBL(klass); + VALUE cc_tbl = RCLASS_WRITABLE_CC_TBL(klass); VALUE ccs_data; - if (cc_tbl && rb_id_table_lookup(cc_tbl, mid, &ccs_data)) { + if (cc_tbl && rb_managed_id_table_lookup(cc_tbl, mid, &ccs_data)) { struct rb_class_cc_entries *ccs = (struct rb_class_cc_entries *)ccs_data; VM_ASSERT(vm_ccs_p(ccs)); @@ -1572,8 +1741,8 @@ cached_callable_method_entry(VALUE klass, ID mid) return ccs->cme; } else { - rb_vm_ccs_free(ccs); - rb_id_table_delete(cc_tbl, mid); + rb_managed_id_table_delete(cc_tbl, mid); + rb_vm_ccs_invalidate_and_free(ccs); } } @@ -1587,15 +1756,15 @@ cache_callable_method_entry(VALUE klass, ID mid, const rb_callable_method_entry_ ASSERT_vm_locking(); VM_ASSERT(cme != NULL); - struct rb_id_table *cc_tbl = RCLASS_WRITABLE_CC_TBL(klass); + VALUE cc_tbl = RCLASS_WRITABLE_CC_TBL(klass); VALUE ccs_data; if (!cc_tbl) { - cc_tbl = rb_id_table_create(2); + cc_tbl = rb_vm_cc_table_create(2); RCLASS_WRITE_CC_TBL(klass, cc_tbl); } - if (rb_id_table_lookup(cc_tbl, mid, &ccs_data)) { + if (rb_managed_id_table_lookup(cc_tbl, mid, &ccs_data)) { #if VM_CHECK_MODE > 0 struct rb_class_cc_entries *ccs = (struct rb_class_cc_entries *)ccs_data; VM_ASSERT(ccs->cme == cme); diff --git a/win32/mkexports.rb b/win32/mkexports.rb index 389b49def83544..97939cdd093436 100755 --- a/win32/mkexports.rb +++ b/win32/mkexports.rb @@ -146,7 +146,9 @@ def exports(*) end def each_line(objs, &block) - IO.foreach("|#{self.class.nm} --extern-only --defined-only #{objs.join(' ')}", &block) + IO.popen(%W[#{self.class.nm} --extern-only --defined-only] + objs) do |f| + f.each(&block) + end end def each_export(objs) @@ -155,7 +157,7 @@ def each_export(objs) re = /\s(?:(T)|[[:upper:]])\s#{symprefix}((?!#{PrivateNames}).*)$/ objdump(objs) do |l| next if /@.*@/ =~ l - yield $2, !$1 if re =~ l + yield $2.strip, !$1 if re =~ l end end end diff --git a/yjit.c b/yjit.c index 46f89e2020c3f1..f83a330bd6f927 100644 --- a/yjit.c +++ b/yjit.c @@ -499,18 +499,6 @@ rb_yjit_str_simple_append(VALUE str1, VALUE str2) return rb_str_cat(str1, RSTRING_PTR(str2), RSTRING_LEN(str2)); } -void -rb_set_cfp_pc(struct rb_control_frame_struct *cfp, const VALUE *pc) -{ - cfp->pc = pc; -} - -void -rb_set_cfp_sp(struct rb_control_frame_struct *cfp, VALUE *sp) -{ - cfp->sp = sp; -} - extern VALUE *rb_vm_base_ptr(struct rb_control_frame_struct *cfp); // YJIT needs this function to never allocate and never raise diff --git a/yjit.rb b/yjit.rb index e4fafa729eea75..1655529b5ee7f5 100644 --- a/yjit.rb +++ b/yjit.rb @@ -264,23 +264,23 @@ def self.simulate_oom! # :nodoc: end # Blocks that are called when YJIT is enabled - @yjit_hooks = [] + @jit_hooks = [] class << self # :stopdoc: private # Register a block to be called when YJIT is enabled - def add_yjit_hook(hook) - @yjit_hooks << hook + def add_jit_hook(hook) + @jit_hooks << hook end - # Run YJIT hooks registered by RubyVM::YJIT.with_yjit - def call_yjit_hooks + # Run YJIT hooks registered by `#with_jit` + def call_jit_hooks # Skip using builtin methods in Ruby if --yjit-c-builtin is given return if Primitive.yjit_c_builtin_p - @yjit_hooks.each(&:call) - @yjit_hooks.clear + @jit_hooks.each(&:call) + @jit_hooks.clear end # Print stats and dump exit locations diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index eeabbf594df9bb..c8a58f424e385f 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -1202,8 +1202,6 @@ extern "C" { pub fn rb_yjit_iseq_builtin_attrs(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; pub fn rb_yjit_builtin_function(iseq: *const rb_iseq_t) -> *const rb_builtin_function; pub fn rb_yjit_str_simple_append(str1: VALUE, str2: VALUE) -> VALUE; - pub fn rb_set_cfp_pc(cfp: *mut rb_control_frame_struct, pc: *const VALUE); - pub fn rb_set_cfp_sp(cfp: *mut rb_control_frame_struct, sp: *mut VALUE); pub fn rb_vm_base_ptr(cfp: *mut rb_control_frame_struct) -> *mut VALUE; pub fn rb_yarv_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE; pub fn rb_str_neq_internal(str1: VALUE, str2: VALUE) -> VALUE; @@ -1330,4 +1328,6 @@ extern "C" { pub fn rb_IMEMO_TYPE_P(imemo: VALUE, imemo_type: imemo_type) -> ::std::os::raw::c_int; pub fn rb_assert_cme_handle(handle: VALUE); pub fn rb_yarv_ary_entry_internal(ary: VALUE, offset: ::std::os::raw::c_long) -> VALUE; + pub fn rb_set_cfp_pc(cfp: *mut rb_control_frame_struct, pc: *const VALUE); + pub fn rb_set_cfp_sp(cfp: *mut rb_control_frame_struct, sp: *mut VALUE); } diff --git a/yjit/src/options.rs b/yjit/src/options.rs index 9f7c70536966a8..c87a436091279f 100644 --- a/yjit/src/options.rs +++ b/yjit/src/options.rs @@ -46,7 +46,7 @@ pub struct Options { // The number of registers allocated for stack temps pub num_temp_regs: usize, - // Disable Ruby builtin methods defined by `with_yjit` hooks, e.g. Array#each in Ruby + // Disable Ruby builtin methods defined by `with_jit` hooks, e.g. Array#each in Ruby pub c_builtin: bool, // Capture stats diff --git a/yjit/src/yjit.rs b/yjit/src/yjit.rs index 8df1163d64b725..517a0daae5b9e2 100644 --- a/yjit/src/yjit.rs +++ b/yjit/src/yjit.rs @@ -57,7 +57,7 @@ fn yjit_init() { // Call YJIT hooks before enabling YJIT to avoid compiling the hooks themselves unsafe { let yjit = rb_const_get(rb_cRubyVM, rust_str_to_id("YJIT")); - rb_funcall(yjit, rust_str_to_id("call_yjit_hooks"), 0); + rb_funcall(yjit, rust_str_to_id("call_jit_hooks"), 0); } // Catch panics to avoid UB for unwinding into C frames. diff --git a/yjit_hook.rb b/yjit_hook.rb deleted file mode 100644 index 610a7be3303e2c..00000000000000 --- a/yjit_hook.rb +++ /dev/null @@ -1,4 +0,0 @@ -# Remove the helper defined in kernel.rb -class Module - undef :with_yjit -end diff --git a/zjit/src/asm/arm64/arg/shifted_imm.rs b/zjit/src/asm/arm64/arg/shifted_imm.rs index 4602ac64ab9495..06daefdef7715d 100644 --- a/zjit/src/asm/arm64/arg/shifted_imm.rs +++ b/zjit/src/asm/arm64/arg/shifted_imm.rs @@ -16,7 +16,6 @@ pub struct ShiftedImmediate { impl TryFrom for ShiftedImmediate { type Error = (); - /// Attempt to convert a u64 into a BitmaskImm. fn try_from(value: u64) -> Result { let current = value; if current < 2_u64.pow(12) { diff --git a/zjit/src/asm/arm64/inst/mov.rs b/zjit/src/asm/arm64/inst/mov.rs index eae4565c3ab0ce..58877ae94040c7 100644 --- a/zjit/src/asm/arm64/inst/mov.rs +++ b/zjit/src/asm/arm64/inst/mov.rs @@ -2,6 +2,9 @@ use super::super::arg::Sf; /// Which operation is being performed. enum Op { + /// A movn operation which inverts the immediate and zeroes out the other bits. + MOVN = 0b00, + /// A movz operation which zeroes out the other bits. MOVZ = 0b10, @@ -61,6 +64,12 @@ impl Mov { Self { rd, imm16, hw: hw.into(), op: Op::MOVK, sf: num_bits.into() } } + /// MOVN + /// + pub fn movn(rd: u8, imm16: u16, hw: u8, num_bits: u8) -> Self { + Self { rd, imm16, hw: hw.into(), op: Op::MOVN, sf: num_bits.into() } + } + /// MOVZ /// pub fn movz(rd: u8, imm16: u16, hw: u8, num_bits: u8) -> Self { @@ -104,6 +113,34 @@ mod tests { assert_eq!(0xf2800f60, result); } + #[test] + fn test_movn_unshifted() { + let inst = Mov::movn(0, 123, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0x92800f60, result); + } + + #[test] + fn test_movn_shifted_16() { + let inst = Mov::movn(0, 123, 16, 64); + let result: u32 = inst.into(); + assert_eq!(0x92a00f60, result); + } + + #[test] + fn test_movn_shifted_32() { + let inst = Mov::movn(0, 123, 32, 64); + let result: u32 = inst.into(); + assert_eq!(0x92c00f60, result); + } + + #[test] + fn test_movn_shifted_48() { + let inst = Mov::movn(0, 123, 48, 64); + let result: u32 = inst.into(); + assert_eq!(0x92e00f60, result); + } + #[test] fn test_movk_shifted_16() { let inst = Mov::movk(0, 123, 16, 64); diff --git a/zjit/src/asm/arm64/mod.rs b/zjit/src/asm/arm64/mod.rs index d1fa3b0d9f9c9b..0576b230907ff5 100644 --- a/zjit/src/asm/arm64/mod.rs +++ b/zjit/src/asm/arm64/mod.rs @@ -13,6 +13,21 @@ use inst::*; pub use arg::*; pub use opnd::*; +/// The extend type for register operands in extended register instructions. +/// It's the reuslt size is determined by the the destination register and +/// the source size interpreted using the last letter. +#[derive(Clone, Copy)] +pub enum ExtendType { + UXTB = 0b000, // unsigned extend byte + UXTH = 0b001, // unsigned extend halfword + UXTW = 0b010, // unsigned extend word + UXTX = 0b011, // unsigned extend doubleword + SXTB = 0b100, // signed extend byte + SXTH = 0b101, // signed extend halfword + SXTW = 0b110, // signed extend word + SXTX = 0b111, // signed extend doubleword +} + /// Checks that a signed value fits within the specified number of bits. pub const fn imm_fits_bits(imm: i64, num_bits: u8) -> bool { let minimum = if num_bits == 64 { i64::MIN } else { -(2_i64.pow((num_bits as u32) - 1)) }; @@ -59,6 +74,42 @@ pub fn add(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { cb.write_bytes(&bytes); } +/// Encode ADD (extended register) +/// +/// +/// +/// 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +/// 0 1 0 1 1 0 0 1 │ │ │ │ │ │ │ │ │ │ +/// sf op S └────rm─────┘ └option┘ └─imm3─┘ └────rn─────┘ └────rd─────┘ +fn encode_add_extend(rd: u8, rn: u8, rm: u8, extend_type: ExtendType, shift: u8, num_bits: u8) -> [u8; 4] { + assert!(shift <= 4, "shift must be 0-4"); + + ((Sf::from(num_bits) as u32) << 31 | + 0b0 << 30 | // op = 0 for add + 0b0 << 29 | // S = 0 for non-flag-setting + 0b01011001 << 21 | + (rm as u32) << 16 | + (extend_type as u32) << 13 | + (shift as u32) << 10 | + (rn as u32) << 5 | + rd as u32).to_le_bytes() +} + +/// ADD (extended register) - add rn and rm with UXTX extension (no extension for 64-bit registers) +/// This is equivalent to a regular ADD for 64-bit registers since UXTX with shift 0 means no modification. +/// For reg_no=31, rd and rn mean SP while with rm means the zero register. +pub fn add_extended(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + encode_add_extend(rd.reg_no, rn.reg_no, rm.reg_no, ExtendType::UXTX, 0, rd.num_bits) + }, + _ => panic!("Invalid operand combination to add_extend instruction."), + }; + + cb.write_bytes(&bytes); +} + /// ADDS - add rn and rm, put the result in rd, update flags pub fn adds(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { let bytes: [u8; 4] = match (rd, rn, rm) { @@ -665,6 +716,21 @@ pub fn movk(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { cb.write_bytes(&bytes); } +/// MOVN - load a register with the complement of a shifted then zero extended 16-bit immediate +/// +pub fn movn(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { + let bytes: [u8; 4] = match (rd, imm16) { + (A64Opnd::Reg(rd), A64Opnd::UImm(imm16)) => { + assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less."); + + Mov::movn(rd.reg_no, imm16 as u16, shift, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to movn instruction.") + }; + + cb.write_bytes(&bytes); +} + /// MOVZ - move a 16 bit immediate into a register, zero the other bits pub fn movz(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { let bytes: [u8; 4] = match (rd, imm16) { @@ -1142,6 +1208,7 @@ fn cbz_cbnz(num_bits: u8, op: bool, offset: InstructionOffset, rt: u8) -> [u8; 4 #[cfg(test)] mod tests { use super::*; + use crate::assertions::assert_disasm; /// Check that the bytes for an instruction sequence match a hex string fn check_bytes(bytes: &str, run: R) where R: FnOnce(&mut super::CodeBlock) { @@ -1491,6 +1558,11 @@ mod tests { check_bytes("600fa0f2", |cb| movk(cb, X0, A64Opnd::new_uimm(123), 16)); } + #[test] + fn test_movn() { + check_bytes("600fa092", |cb| movn(cb, X0, A64Opnd::new_uimm(123), 16)); + } + #[test] fn test_movz() { check_bytes("600fa0d2", |cb| movz(cb, X0, A64Opnd::new_uimm(123), 16)); @@ -1675,4 +1747,19 @@ mod tests { fn test_tst_32b_immediate() { check_bytes("1f3c0072", |cb| tst(cb, W0, A64Opnd::new_uimm(0xffff))); } + + #[test] + fn test_add_extend_various_regs() { + let mut cb = CodeBlock::new_dummy(); + + add_extended(&mut cb, X10, X11, X9); + add_extended(&mut cb, X30, X30, X30); + add_extended(&mut cb, X31, X31, X31); + + assert_disasm!(cb, "6a61298bde633e8bff633f8b", " + 0x0: add x10, x11, x9, uxtx + 0x4: add x30, x30, x30, uxtx + 0x8: add sp, sp, xzr + "); + } } diff --git a/zjit/src/asm/mod.rs b/zjit/src/asm/mod.rs index 3b9b8a26f7778f..6c3e95546302ae 100644 --- a/zjit/src/asm/mod.rs +++ b/zjit/src/asm/mod.rs @@ -7,7 +7,9 @@ use crate::virtualmem::*; // Lots of manual vertical alignment in there that rustfmt doesn't handle well. #[rustfmt::skip] +#[cfg(target_arch = "x86_64")] pub mod x86_64; +#[cfg(target_arch = "aarch64")] pub mod arm64; /// Index to a label created by cb.new_label() diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index 42dc31c90fd5cc..148d01ea862e97 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -140,6 +140,10 @@ fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize { // instruction, then we'll use that. movz(cb, rd, A64Opnd::new_uimm(current), 0); return 1; + } else if u16::try_from(!value).is_ok() { + // For small negative values, use a single movn + movn(cb, rd, A64Opnd::new_uimm(!value), 0); + return 1; } else if BitmaskImmediate::try_from(current).is_ok() { // Otherwise, if the immediate can be encoded // with the special bitmask immediate encoding, @@ -191,11 +195,15 @@ pub const ALLOC_REGS: &'static [Reg] = &[ impl Assembler { - // Special scratch registers for intermediate processing. - // This register is caller-saved (so we don't have to save it before using it) + /// Special scratch registers for intermediate processing. + /// This register is call-clobbered (so we don't have to save it before using it). + /// Avoid using if you can since this is used to lower [Insn] internally and + /// so conflicts are possible. pub const SCRATCH_REG: Reg = X16_REG; - const SCRATCH0: A64Opnd = A64Opnd::Reg(Assembler::SCRATCH_REG); - const SCRATCH1: A64Opnd = A64Opnd::Reg(X17_REG); + const SCRATCH0_REG: Reg = Self::SCRATCH_REG; + const SCRATCH1_REG: Reg = X17_REG; + const SCRATCH0: A64Opnd = A64Opnd::Reg(Self::SCRATCH0_REG); + const SCRATCH1: A64Opnd = A64Opnd::Reg(Self::SCRATCH1_REG); /// Get the list of registers from which we will allocate on this platform pub fn get_alloc_regs() -> Vec { @@ -219,29 +227,6 @@ impl Assembler /// have no memory operands. fn arm64_split(mut self) -> Assembler { - /// When we're attempting to load a memory address into a register, the - /// displacement must fit into the maximum number of bits for an Op::Add - /// immediate. If it doesn't, we have to load the displacement into a - /// register first. - fn split_lea_operand(asm: &mut Assembler, opnd: Opnd) -> Opnd { - match opnd { - Opnd::Mem(Mem { base, disp, num_bits }) => { - if disp >= 0 && ShiftedImmediate::try_from(disp as u64).is_ok() { - asm.lea(opnd) - } else { - let disp = asm.load(Opnd::Imm(disp.into())); - let reg = match base { - MemBase::Reg(reg_no) => Opnd::Reg(Reg { reg_no, num_bits }), - MemBase::VReg(idx) => Opnd::VReg { idx, num_bits } - }; - - asm.add(reg, disp) - } - }, - _ => unreachable!("Op::Lea only accepts Opnd::Mem operands.") - } - } - /// When you're storing a register into a memory location or loading a /// memory location into a register, the displacement from the base /// register of the memory location must fit into 9 bits. If it doesn't, @@ -252,7 +237,7 @@ impl Assembler if mem_disp_fits_bits(mem.disp) { opnd } else { - let base = split_lea_operand(asm, opnd); + let base = asm.lea(opnd); Opnd::mem(64, base, 0) } }, @@ -575,7 +560,7 @@ impl Assembler }, Insn::IncrCounter { mem, value } => { let counter_addr = match mem { - Opnd::Mem(_) => split_lea_operand(asm, *mem), + Opnd::Mem(_) => asm.lea(*mem), _ => *mem }; @@ -671,31 +656,6 @@ impl Assembler *opnd = split_load_operand(asm, *opnd); asm.push_insn(insn); }, - Insn::Store { dest, src } => { - // The value being stored must be in a register, so if it's - // not already one we'll load it first. - let opnd1 = match src { - // If the first operand is zero, then we can just use - // the zero register. - Opnd::UImm(0) | Opnd::Imm(0) => Opnd::Reg(XZR_REG), - // Otherwise we'll check if we need to load it first. - _ => split_load_operand(asm, *src) - }; - - match dest { - Opnd::Reg(_) => { - // Store does not support a register as a dest operand. - asm.mov(*dest, opnd1); - } - _ => { - // The displacement for the STUR instruction can't be more - // than 9 bits long. If it's longer, we need to load the - // memory address into a register first. - let opnd0 = split_memory_address(asm, *dest); - asm.store(opnd0, opnd1); - } - } - }, Insn::Mul { left, right, .. } => { *left = split_load_operand(asm, *left); *right = split_load_operand(asm, *right); @@ -862,6 +822,42 @@ impl Assembler } } + /// Do the address calculation of `out_reg = base_reg + disp` + fn load_effective_address(cb: &mut CodeBlock, out: A64Opnd, base_reg_no: u8, disp: i32) { + let base_reg = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: base_reg_no }); + assert_ne!(31, out.unwrap_reg().reg_no, "Lea sp, [sp, #imm] not always encodable. Use add/sub instead."); + + if ShiftedImmediate::try_from(disp.unsigned_abs() as u64).is_ok() { + // Use ADD/SUB if the displacement fits + add(cb, out, base_reg, A64Opnd::new_imm(disp.into())); + } else { + // Use add_extended() to interpret reg_no=31 as sp + // since the base register is never the zero register. + // Careful! Only the first two operands can refer to sp. + emit_load_value(cb, out, disp as u64); + add_extended(cb, out, base_reg, out); + }; + } + + /// Load a VALUE to a register and remember it for GC marking and reference updating + fn emit_load_gc_value(cb: &mut CodeBlock, gc_offsets: &mut Vec, dest: A64Opnd, value: VALUE) { + // We dont need to check if it's a special const + // here because we only allow these operands to hit + // this point if they're not a special const. + assert!(!value.special_const_p()); + + // This assumes only load instructions can contain + // references to GC'd Value operands. If the value + // being loaded is a heap object, we'll report that + // back out to the gc_offsets list. + ldr_literal(cb, dest, 2.into()); + b(cb, InstructionOffset::from_bytes(4 + (SIZEOF_VALUE as i32))); + cb.write_bytes(&value.as_u64().to_le_bytes()); + + let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE); + gc_offsets.push(ptr_offset); + } + /// Emit a push instruction for the given operand by adding to the stack /// pointer and then storing the given value. fn emit_push(cb: &mut CodeBlock, opnd: A64Opnd) { @@ -1032,12 +1028,84 @@ impl Assembler Insn::LShift { opnd, shift, out } => { lsl(cb, out.into(), opnd.into(), shift.into()); }, - Insn::Store { dest, src } => { + store_insn @ Insn::Store { dest, src } => { + // With minor exceptions, as long as `dest` is a Mem, all forms of `src` are + // accepted. As a rule of thumb, avoid using Assembler::SCRATCH as a memory + // base register to gurantee things will work. + let &Opnd::Mem(Mem { num_bits: dest_num_bits, base: MemBase::Reg(base_reg_no), disp }) = dest else { + panic!("Unexpected Insn::Store destination in arm64_emit: {dest:?}"); + }; + + // This kind of tricky clobber can only happen for explicit use of SCRATCH_REG, + // so we panic to get the author to change their code. + #[track_caller] + fn assert_no_clobber(store_insn: &Insn, user_use: u8, backend_use: Reg) { + assert_ne!( + backend_use.reg_no, + user_use, + "Emitting {store_insn:?} would clobber {user_use:?}, in conflict with its semantics" + ); + } + + // Split src into SCRATCH0 if necessary + let src_reg: A64Reg = match src { + Opnd::Reg(reg) => *reg, + // Use zero register when possible + Opnd::UImm(0) | Opnd::Imm(0) => XZR_REG, + // Immediates + &Opnd::Imm(imm) => { + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG); + emit_load_value(cb, Self::SCRATCH0, imm as u64); + Self::SCRATCH0_REG + } + &Opnd::UImm(imm) => { + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG); + emit_load_value(cb, Self::SCRATCH0, imm); + Self::SCRATCH0_REG + } + &Opnd::Value(value) => { + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG); + emit_load_gc_value(cb, &mut gc_offsets, Self::SCRATCH0, value); + Self::SCRATCH0_REG + } + src_mem @ &Opnd::Mem(Mem { num_bits: src_num_bits, base: MemBase::Reg(src_base_reg_no), disp: src_disp }) => { + // For mem-to-mem store, load the source into SCRATCH0 + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG); + let src_mem = if mem_disp_fits_bits(src_disp) { + src_mem.into() + } else { + // Split the load address into SCRATCH0 first if necessary + assert_no_clobber(store_insn, src_base_reg_no, Self::SCRATCH0_REG); + load_effective_address(cb, Self::SCRATCH0, src_base_reg_no, src_disp); + A64Opnd::new_mem(dest_num_bits, Self::SCRATCH0, 0) + }; + match src_num_bits { + 64 | 32 => ldur(cb, Self::SCRATCH0, src_mem), + 16 => ldurh(cb, Self::SCRATCH0, src_mem), + 8 => ldurb(cb, Self::SCRATCH0, src_mem), + num_bits => panic!("unexpected num_bits: {num_bits}") + }; + Self::SCRATCH0_REG + } + src @ (Opnd::Mem(_) | Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during arm64_emit: {src:?}") + }; + let src = A64Opnd::Reg(src_reg); + + // Split dest into SCRATCH1 if necessary. + let dest = if mem_disp_fits_bits(disp) { + dest.into() + } else { + assert_no_clobber(store_insn, src_reg.reg_no, Self::SCRATCH1_REG); + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH1_REG); + load_effective_address(cb, Self::SCRATCH1, base_reg_no, disp); + A64Opnd::new_mem(dest_num_bits, Self::SCRATCH1, 0) + }; + // This order may be surprising but it is correct. The way // the Arm64 assembler works, the register that is going to // be stored is first and the address is second. However in // our IR we have the address first and the register second. - match dest.rm_num_bits() { + match dest_num_bits { 64 | 32 => stur(cb, src.into(), dest.into()), 16 => sturh(cb, src.into(), dest.into()), num_bits => panic!("unexpected dest num_bits: {} (src: {:#?}, dest: {:#?})", num_bits, src, dest), @@ -1064,21 +1132,7 @@ impl Assembler }; }, Opnd::Value(value) => { - // We dont need to check if it's a special const - // here because we only allow these operands to hit - // this point if they're not a special const. - assert!(!value.special_const_p()); - - // This assumes only load instructions can contain - // references to GC'd Value operands. If the value - // being loaded is a heap object, we'll report that - // back out to the gc_offsets list. - ldr_literal(cb, out.into(), 2.into()); - b(cb, InstructionOffset::from_bytes(4 + (SIZEOF_VALUE as i32))); - cb.write_bytes(&value.as_u64().to_le_bytes()); - - let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE); - gc_offsets.push(ptr_offset); + emit_load_gc_value(cb, &mut gc_offsets, out.into(), value); }, Opnd::None => { unreachable!("Attempted to load from None operand"); @@ -1113,22 +1167,11 @@ impl Assembler } }, Insn::Lea { opnd, out } => { - let opnd: A64Opnd = opnd.into(); - - match opnd { - A64Opnd::Mem(mem) => { - add( - cb, - out.into(), - A64Opnd::Reg(A64Reg { reg_no: mem.base_reg_no, num_bits: 64 }), - A64Opnd::new_imm(mem.disp.into()) - ); - }, - _ => { - panic!("Op::Lea only accepts Opnd::Mem operands."); - } + let &Opnd::Mem(Mem { num_bits: _, base: MemBase::Reg(base_reg_no), disp }) = opnd else { + panic!("Unexpected Insn::Lea operand in arm64_emit: {opnd:?}"); }; - }, + load_effective_address(cb, out.into(), base_reg_no, disp); + } Insn::LeaJumpTarget { out, target, .. } => { if let Target::Label(label_idx) = target { // Set output to the raw address of the label @@ -1262,8 +1305,7 @@ impl Assembler } last_patch_pos = Some(cb.get_write_pos()); }, - Insn::IncrCounter { mem: _, value: _ } => { - /* + Insn::IncrCounter { mem, value } => { let label = cb.new_label("incr_counter_loop".to_string()); cb.write_label(label); @@ -1279,8 +1321,6 @@ impl Assembler cmp(cb, Self::SCRATCH1, A64Opnd::new_uimm(0)); emit_conditional_jump::<{Condition::NE}>(cb, Target::Label(label)); - */ - unimplemented!("labels are not supported yet"); }, Insn::Breakpoint => { brk(cb, A64Opnd::None); @@ -1607,6 +1647,101 @@ mod tests { asm.compile_with_num_regs(&mut cb, 0); } + #[test] + fn test_emit_lea() { + let (mut asm, mut cb) = setup_asm(); + + // Test values that exercise various types of immediates. + // - 9 bit displacement for Load/Store + // - 12 bit ADD/SUB shifted immediate + // - 16 bit MOV family shifted immediates + // - bit mask immediates + for displacement in [i32::MAX, 0x10008, 0x1800, 0x208, -0x208, -0x1800, -0x10008, i32::MIN] { + let mem = Opnd::mem(64, NATIVE_STACK_PTR, displacement); + asm.lea_into(Opnd::Reg(X0_REG), mem); + } + + asm.compile_with_num_regs(&mut cb, 0); + assert_disasm!(cb, "e07b40b2e063208b000180d22000a0f2e063208b000083d2e063208be0230891e02308d1e0ff8292e063208b00ff9fd2c0ffbff2e0ffdff2e0fffff2e063208be08361b2e063208b", " + 0x0: orr x0, xzr, #0x7fffffff + 0x4: add x0, sp, x0 + 0x8: mov x0, #8 + 0xc: movk x0, #1, lsl #16 + 0x10: add x0, sp, x0 + 0x14: mov x0, #0x1800 + 0x18: add x0, sp, x0 + 0x1c: add x0, sp, #0x208 + 0x20: sub x0, sp, #0x208 + 0x24: mov x0, #-0x1800 + 0x28: add x0, sp, x0 + 0x2c: mov x0, #0xfff8 + 0x30: movk x0, #0xfffe, lsl #16 + 0x34: movk x0, #0xffff, lsl #32 + 0x38: movk x0, #0xffff, lsl #48 + 0x3c: add x0, sp, x0 + 0x40: orr x0, xzr, #0xffffffff80000000 + 0x44: add x0, sp, x0 + "); + } + + #[test] + fn test_store() { + let (mut asm, mut cb) = setup_asm(); + + // Large memory offsets in combinations of destination and source + let large_mem = Opnd::mem(64, NATIVE_STACK_PTR, -0x305); + let small_mem = Opnd::mem(64, C_RET_OPND, 0); + asm.store(small_mem, large_mem); + asm.store(large_mem, small_mem); + asm.store(large_mem, large_mem); + + asm.compile_with_num_regs(&mut cb, 0); + assert_disasm!(cb, "f0170cd1100240f8100000f8100040f8f1170cd1300200f8f0170cd1100240f8f1170cd1300200f8", " + 0x0: sub x16, sp, #0x305 + 0x4: ldur x16, [x16] + 0x8: stur x16, [x0] + 0xc: ldur x16, [x0] + 0x10: sub x17, sp, #0x305 + 0x14: stur x16, [x17] + 0x18: sub x16, sp, #0x305 + 0x1c: ldur x16, [x16] + 0x20: sub x17, sp, #0x305 + 0x24: stur x16, [x17] + "); + } + + #[test] + fn test_store_value_without_split() { + let (mut asm, mut cb) = setup_asm(); + + let imitation_heap_value = VALUE(0x1000); + assert!(imitation_heap_value.heap_object_p()); + asm.store(Opnd::mem(VALUE_BITS, SP, 0), imitation_heap_value.into()); + + // Side exit code are compiled without the split pass, so we directly call emit here to + // emulate that scenario. + let gc_offsets = asm.arm64_emit(&mut cb).unwrap(); + assert_eq!(1, gc_offsets.len(), "VALUE source operand should be reported as gc offset"); + + assert_disasm!(cb, "50000058030000140010000000000000b00200f8", " + 0x0: ldr x16, #8 + 0x4: b #0x10 + 0x8: .byte 0x00, 0x10, 0x00, 0x00 + 0xc: .byte 0x00, 0x00, 0x00, 0x00 + 0x10: stur x16, [x21] + "); + } + + #[test] + #[should_panic] + fn test_store_unserviceable() { + let (mut asm, mut cb) = setup_asm(); + // This would put the source into SCRATCH_REG, messing up the destination + asm.store(Opnd::mem(64, Opnd::Reg(Assembler::SCRATCH_REG), 0), 0x83902.into()); + + asm.compile_with_num_regs(&mut cb, 0); + } + /* #[test] fn test_emit_lea_label() { diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index 36e783bd4e658a..b910052dae1b04 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -1822,29 +1822,16 @@ impl Assembler }; self.write_label(side_exit_label.clone()); - // Load an operand that cannot be used as a source of Insn::Store - fn split_store_source(asm: &mut Assembler, opnd: Opnd) -> Opnd { - if matches!(opnd, Opnd::Mem(_) | Opnd::Value(_)) || - (cfg!(target_arch = "aarch64") && matches!(opnd, Opnd::UImm(_))) { - asm.load_into(Opnd::Reg(Assembler::SCRATCH_REG), opnd); - Opnd::Reg(Assembler::SCRATCH_REG) - } else { - opnd - } - } - // Restore the PC and the stack for regular side exits. We don't do this for // side exits right after JIT-to-JIT calls, which restore them before the call. if let Some(SideExitContext { pc, stack, locals }) = context { asm_comment!(self, "write stack slots: {stack:?}"); for (idx, &opnd) in stack.iter().enumerate() { - let opnd = split_store_source(self, opnd); self.store(Opnd::mem(64, SP, idx as i32 * SIZEOF_VALUE_I32), opnd); } asm_comment!(self, "write locals: {locals:?}"); for (idx, &opnd) in locals.iter().enumerate() { - let opnd = split_store_source(self, opnd); self.store(Opnd::mem(64, SP, (-local_size_and_idx_to_ep_offset(locals.len(), idx) - 1) * SIZEOF_VALUE_I32), opnd); } diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs index 4543252573800c..d21c7ee09c82d2 100644 --- a/zjit/src/backend/x86_64/mod.rs +++ b/zjit/src/backend/x86_64/mod.rs @@ -96,8 +96,10 @@ pub const ALLOC_REGS: &'static [Reg] = &[ impl Assembler { - // A special scratch register for intermediate processing. - // This register is caller-saved (so we don't have to save it before using it) + /// Special scratch registers for intermediate processing. + /// This register is call-clobbered (so we don't have to save it before using it). + /// Avoid using if you can since this is used to lower [Insn] internally and + /// so conflicts are possible. pub const SCRATCH_REG: Reg = R11_REG; const SCRATCH0: X86Opnd = X86Opnd::Reg(Assembler::SCRATCH_REG); @@ -293,38 +295,11 @@ impl Assembler asm.push_insn(insn); }, - Insn::Mov { dest, src } | Insn::Store { dest, src } => { - match (&dest, &src) { - (Opnd::Mem(_), Opnd::Mem(_)) => { - // We load opnd1 because for mov, opnd0 is the output - let opnd1 = asm.load(*src); - asm.mov(*dest, opnd1); - }, - (Opnd::Mem(Mem { num_bits, .. }), Opnd::UImm(value)) => { - // For 64 bit destinations, 32-bit values will be sign-extended - if *num_bits == 64 && imm_num_bits(*value as i64) > 32 { - let opnd1 = asm.load(*src); - asm.mov(*dest, opnd1); - } else { - asm.mov(*dest, *src); - } - }, - (Opnd::Mem(Mem { num_bits, .. }), Opnd::Imm(value)) => { - // For 64 bit destinations, 32-bit values will be sign-extended - if *num_bits == 64 && imm_num_bits(*value) > 32 { - let opnd1 = asm.load(*src); - asm.mov(*dest, opnd1); - } else if uimm_num_bits(*value as u64) <= *num_bits { - // If the bit string is short enough for the destination, use the unsigned representation. - // Note that 64-bit and negative values are ruled out. - asm.mov(*dest, Opnd::UImm(*value as u64)); - } else { - asm.mov(*dest, *src); - } - }, - _ => { - asm.mov(*dest, *src); - } + Insn::Mov { dest, src } => { + if let Opnd::Mem(_) = dest { + asm.store(*dest, *src); + } else { + asm.mov(*dest, *src); } }, Insn::Not { opnd, .. } => { @@ -440,6 +415,14 @@ impl Assembler } } + fn emit_load_gc_value(cb: &mut CodeBlock, gc_offsets: &mut Vec, dest_reg: X86Opnd, value: VALUE) { + // Using movabs because mov might write value in 32 bits + movabs(cb, dest_reg, value.0 as _); + // The pointer immediate is encoded as the last part of the mov written out + let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE); + gc_offsets.push(ptr_offset); + } + // List of GC offsets let mut gc_offsets: Vec = Vec::new(); @@ -552,20 +535,71 @@ impl Assembler shr(cb, opnd.into(), shift.into()) }, - Insn::Store { dest, src } => { - mov(cb, dest.into(), src.into()); - }, + store_insn @ Insn::Store { dest, src } => { + let &Opnd::Mem(Mem { num_bits, base: MemBase::Reg(base_reg_no), disp: _ }) = dest else { + panic!("Unexpected Insn::Store destination in x64_emit: {dest:?}"); + }; + + // This kind of tricky clobber can only happen for explicit use of SCRATCH_REG, + // so we panic to get the author to change their code. + #[track_caller] + fn assert_no_clobber(store_insn: &Insn, user_use: u8, backend_use: Reg) { + assert_ne!( + backend_use.reg_no, + user_use, + "Emitting {store_insn:?} would clobber {user_use:?}, in conflict with its semantics" + ); + } + + let scratch = X86Opnd::Reg(Self::SCRATCH_REG); + let src = match src { + Opnd::Reg(_) => src.into(), + &Opnd::Mem(_) => { + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG); + mov(cb, scratch, src.into()); + scratch + } + &Opnd::Imm(imm) => { + // For 64 bit destinations, 32-bit values will be sign-extended + if num_bits == 64 && imm_num_bits(imm) > 32 { + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG); + mov(cb, scratch, src.into()); + scratch + } else if uimm_num_bits(imm as u64) <= num_bits { + // If the bit string is short enough for the destination, use the unsigned representation. + // Note that 64-bit and negative values are ruled out. + uimm_opnd(imm as u64) + } else { + src.into() + } + } + &Opnd::UImm(imm) => { + // For 64 bit destinations, 32-bit values will be sign-extended + if num_bits == 64 && imm_num_bits(imm as i64) > 32 { + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG); + mov(cb, scratch, src.into()); + scratch + } else { + src.into() + } + } + &Opnd::Value(value) => { + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG); + emit_load_gc_value(cb, &mut gc_offsets, scratch, value); + scratch + } + src @ (Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during x86_emit: {src:?}") + + }; + mov(cb, dest.into(), src); + } // This assumes only load instructions can contain references to GC'd Value operands Insn::Load { opnd, out } | Insn::LoadInto { dest: out, opnd } => { match opnd { Opnd::Value(val) if val.heap_object_p() => { - // Using movabs because mov might write value in 32 bits - movabs(cb, out.into(), val.0 as _); - // The pointer immediate is encoded as the last part of the mov written out - let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE); - gc_offsets.push(ptr_offset); + emit_load_gc_value(cb, &mut gc_offsets, out.into(), *val); } _ => mov(cb, out.into(), opnd.into()) } @@ -1352,4 +1386,21 @@ mod tests { 0x29: pop rbp "}); } + + #[test] + fn test_store_value_without_split() { + let (mut asm, mut cb) = setup_asm(); + + let imitation_heap_value = VALUE(0x1000); + assert!(imitation_heap_value.heap_object_p()); + asm.store(Opnd::mem(VALUE_BITS, SP, 0), imitation_heap_value.into()); + + let gc_offsets = asm.x86_emit(&mut cb).unwrap(); + assert_eq!(1, gc_offsets.len(), "VALUE source operand should be reported as gc offset"); + + assert_disasm!(cb, "49bb00100000000000004c891b", " + 0x0: movabs r11, 0x1000 + 0xa: mov qword ptr [rbx], r11 + "); + } } diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 23b2dc2bd4192d..433b22e15d2c79 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -1,6 +1,6 @@ use std::cell::Cell; use std::rc::Rc; -use std::ffi::{c_int}; +use std::ffi::{c_int, c_void}; use crate::asm::Label; use crate::backend::current::{Reg, ALLOC_REGS}; @@ -10,7 +10,7 @@ use crate::state::ZJITState; use crate::stats::{counter_ptr, Counter}; use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr}; use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, SideExitContext, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, NATIVE_BASE_PTR, SP}; -use crate::hir::{iseq_to_hir, Block, BlockId, BranchEdge, CallInfo, Invariant, RangeType, SideExitReason, SideExitReason::*, SpecialObjectType, SELF_PARAM_IDX}; +use crate::hir::{iseq_to_hir, Block, BlockId, BranchEdge, Invariant, RangeType, SideExitReason, SideExitReason::*, SpecialObjectType, SELF_PARAM_IDX}; use crate::hir::{Const, FrameState, Function, Insn, InsnId}; use crate::hir_type::{types, Type}; use crate::options::get_option; @@ -105,53 +105,33 @@ fn gen_iseq_entry_point(iseq: IseqPtr) -> *const u8 { let code_ptr = gen_iseq_entry_point_body(cb, iseq); // Always mark the code region executable if asm.compile() has been used. - // We need to do this even if code_ptr is null because, whether gen_entry() - // or gen_iseq() fails or not, gen_function() has already used asm.compile(). + // We need to do this even if code_ptr is null because, whether gen_entry() or + // gen_function_stub() fails or not, gen_function() has already used asm.compile(). cb.mark_all_executable(); - code_ptr + code_ptr.map_or(std::ptr::null(), |ptr| ptr.raw_ptr(cb)) } /// Compile an entry point for a given ISEQ -fn gen_iseq_entry_point_body(cb: &mut CodeBlock, iseq: IseqPtr) -> *const u8 { +fn gen_iseq_entry_point_body(cb: &mut CodeBlock, iseq: IseqPtr) -> Option { // Compile ISEQ into High-level IR - let function = match compile_iseq(iseq) { - Some(function) => function, - None => return std::ptr::null(), - }; + let function = compile_iseq(iseq)?; // Compile the High-level IR let Some((start_ptr, gc_offsets, jit)) = gen_function(cb, iseq, &function) else { debug!("Failed to compile iseq: gen_function failed: {}", iseq_get_location(iseq, 0)); - return std::ptr::null(); + return None; }; // Compile an entry point to the JIT code let Some(entry_ptr) = gen_entry(cb, iseq, &function, start_ptr) else { debug!("Failed to compile iseq: gen_entry failed: {}", iseq_get_location(iseq, 0)); - return std::ptr::null(); + return None; }; - let mut branch_iseqs = jit.branch_iseqs; - - // Recursively compile callee ISEQs - let caller_iseq = iseq; - while let Some((branch, iseq)) = branch_iseqs.pop() { - // Disable profiling. This will be the last use of the profiling information for the ISEQ. - unsafe { rb_zjit_profile_disable(iseq); } - - // Compile the ISEQ - let Some((callee_ptr, callee_branch_iseqs)) = gen_iseq(cb, iseq) else { - // Failed to compile the callee. Bail out of compiling this graph of ISEQs. - debug!("Failed to compile iseq: could not compile callee: {} -> {}", - iseq_get_location(caller_iseq, 0), iseq_get_location(iseq, 0)); - return std::ptr::null(); - }; - let callee_addr = callee_ptr.raw_ptr(cb); - branch.regenerate(cb, |asm| { - asm.ccall(callee_addr, vec![]); - }); - branch_iseqs.extend(callee_branch_iseqs); + // Stub callee ISEQs for JIT-to-JIT calls + for (branch, callee_iseq) in jit.branch_iseqs.into_iter() { + gen_iseq_branch(cb, callee_iseq, iseq, branch)?; } // Remember the block address to reuse it later @@ -160,7 +140,27 @@ fn gen_iseq_entry_point_body(cb: &mut CodeBlock, iseq: IseqPtr) -> *const u8 { append_gc_offsets(iseq, &gc_offsets); // Return a JIT code address - entry_ptr.raw_ptr(cb) + Some(entry_ptr) +} + +/// Stub a branch for a JIT-to-JIT call +fn gen_iseq_branch(cb: &mut CodeBlock, iseq: IseqPtr, caller_iseq: IseqPtr, branch: Rc) -> Option<()> { + // Compile a function stub + let Some((stub_ptr, gc_offsets)) = gen_function_stub(cb, iseq, branch.clone()) else { + // Failed to compile the stub. Bail out of compiling the caller ISEQ. + debug!("Failed to compile iseq: could not compile stub: {} -> {}", + iseq_get_location(caller_iseq, 0), iseq_get_location(iseq, 0)); + return None; + }; + append_gc_offsets(iseq, &gc_offsets); + + // Update the JIT-to-JIT call to call the stub + let stub_addr = stub_ptr.raw_ptr(cb); + branch.regenerate(cb, |asm| { + asm_comment!(asm, "call function stub: {}", iseq_get_location(iseq, 0)); + asm.ccall(stub_addr, vec![]); + }); + Some(()) } /// Write an entry to the perf map in /tmp @@ -244,7 +244,11 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function) -> Optio let reverse_post_order = function.rpo(); for &block_id in reverse_post_order.iter() { let block = function.block(block_id); - asm_comment!(asm, "Block: {block_id}({})", block.params().map(|param| format!("{param}")).collect::>().join(", ")); + asm_comment!( + asm, "{block_id}({}): {}", + block.params().map(|param| format!("{param}")).collect::>().join(", "), + iseq_get_location(iseq, block.insn_idx), + ); // Write a label to jump to the basic block let label = jit.get_label(&mut asm, block_id); @@ -331,12 +335,12 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::Jump(branch) => return gen_jump(jit, asm, branch), Insn::IfTrue { val, target } => return gen_if_true(jit, asm, opnd!(val), target), Insn::IfFalse { val, target } => return gen_if_false(jit, asm, opnd!(val), target), - Insn::SendWithoutBlock { call_info, cd, state, self_val, args, .. } => gen_send_without_block(jit, asm, call_info, *cd, &function.frame_state(*state), opnd!(self_val), opnds!(args))?, + Insn::SendWithoutBlock { cd, state, self_val, args, .. } => gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), opnd!(self_val), opnds!(args))?, // Give up SendWithoutBlockDirect for 6+ args since asm.ccall() doesn't support it. - Insn::SendWithoutBlockDirect { call_info, cd, state, self_val, args, .. } if args.len() + 1 > C_ARG_OPNDS.len() => // +1 for self - gen_send_without_block(jit, asm, call_info, *cd, &function.frame_state(*state), opnd!(self_val), opnds!(args))?, + Insn::SendWithoutBlockDirect { cd, state, self_val, args, .. } if args.len() + 1 > C_ARG_OPNDS.len() => // +1 for self + gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), opnd!(self_val), opnds!(args))?, Insn::SendWithoutBlockDirect { cme, iseq, self_val, args, state, .. } => gen_send_without_block_direct(cb, jit, asm, *cme, *iseq, opnd!(self_val), opnds!(args), &function.frame_state(*state))?, - Insn::InvokeBuiltin { bf, args, state } => gen_invokebuiltin(asm, &function.frame_state(*state), bf, opnds!(args))?, + Insn::InvokeBuiltin { bf, args, state, .. } => gen_invokebuiltin(asm, &function.frame_state(*state), bf, opnds!(args))?, Insn::Return { val } => return Some(gen_return(asm, opnd!(val))?), Insn::FixnumAdd { left, right, state } => gen_fixnum_add(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state))?, Insn::FixnumSub { left, right, state } => gen_fixnum_sub(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state))?, @@ -367,7 +371,22 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::AnyToString { val, str, state } => gen_anytostring(asm, opnd!(val), opnd!(str), &function.frame_state(*state))?, Insn::Defined { op_type, obj, pushval, v } => gen_defined(jit, asm, *op_type, *obj, *pushval, opnd!(v))?, &Insn::IncrCounter(counter) => return Some(gen_incr_counter(asm, counter)), - _ => { + Insn::ArrayExtend { .. } + | Insn::ArrayMax { .. } + | Insn::ArrayPush { .. } + | Insn::DefinedIvar { .. } + | Insn::FixnumDiv { .. } + | Insn::FixnumMod { .. } + | Insn::HashDup { .. } + | Insn::NewHash { .. } + | Insn::ObjToString { .. } + | Insn::Send { .. } + | Insn::StringIntern { .. } + | Insn::Throw { .. } + | Insn::ToArray { .. } + | Insn::ToNewArray { .. } + | Insn::Const { .. } + => { debug!("ZJIT: gen_function: unexpected insn {insn}"); return None; } @@ -748,7 +767,6 @@ fn gen_if_false(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, branch: fn gen_send_without_block( jit: &mut JITState, asm: &mut Assembler, - call_info: &CallInfo, cd: *const rb_call_data, state: &FrameState, self_val: Opnd, @@ -774,7 +792,7 @@ fn gen_send_without_block( gen_save_pc(asm, state); gen_save_sp(asm, 1 + args.len()); // +1 for receiver - asm_comment!(asm, "call #{} with dynamic dispatch", call_info.method_name); + asm_comment!(asm, "call #{} with dynamic dispatch", ruby_call_method_name(cd)); unsafe extern "C" { fn rb_vm_opt_send_without_block(ec: EcPtr, cfp: CfpPtr, cd: VALUE) -> VALUE; } @@ -1249,6 +1267,127 @@ fn max_num_params(function: &Function) -> usize { }).max().unwrap_or(0) } +#[cfg(target_arch = "x86_64")] +macro_rules! c_callable { + ($(#[$outer:meta])* + fn $f:ident $args:tt $(-> $ret:ty)? $body:block) => { + $(#[$outer])* + extern "sysv64" fn $f $args $(-> $ret)? $body + }; +} +#[cfg(target_arch = "aarch64")] +macro_rules! c_callable { + ($(#[$outer:meta])* + fn $f:ident $args:tt $(-> $ret:ty)? $body:block) => { + $(#[$outer])* + extern "C" fn $f $args $(-> $ret)? $body + }; +} +pub(crate) use c_callable; + +c_callable! { + /// Generated code calls this function with the SysV calling convention. + /// See [gen_function_stub]. + fn function_stub_hit(iseq: IseqPtr, branch_ptr: *const c_void, ec: EcPtr, sp: *mut VALUE) -> *const u8 { + with_vm_lock(src_loc!(), || { + // Get a pointer to compiled code or the side-exit trampoline + let cb = ZJITState::get_code_block(); + let code_ptr = if let Some(code_ptr) = function_stub_hit_body(cb, iseq, branch_ptr) { + code_ptr + } else { + // gen_push_frame() doesn't set PC and SP, so we need to set them for side-exit + // TODO: We could generate code that sets PC/SP. Note that we'd still need to handle OOM. + let cfp = unsafe { get_ec_cfp(ec) }; + let pc = unsafe { rb_iseq_pc_at_idx(iseq, 0) }; // TODO: handle opt_pc once supported + unsafe { rb_set_cfp_pc(cfp, pc) }; + unsafe { rb_set_cfp_sp(cfp, sp) }; + + // Exit to the interpreter + ZJITState::get_stub_exit() + }; + + cb.mark_all_executable(); + code_ptr.raw_ptr(cb) + }) + } +} + +/// Compile an ISEQ for a function stub +fn function_stub_hit_body(cb: &mut CodeBlock, iseq: IseqPtr, branch_ptr: *const c_void) -> Option { + // Compile the stubbed ISEQ + let Some((code_ptr, branch_iseqs)) = gen_iseq(cb, iseq) else { + debug!("Failed to compile iseq: gen_iseq failed: {}", iseq_get_location(iseq, 0)); + return None; + }; + + // Stub callee ISEQs for JIT-to-JIT calls + for (branch, callee_iseq) in branch_iseqs.into_iter() { + gen_iseq_branch(cb, callee_iseq, iseq, branch)?; + } + + // Update the stub to call the code pointer + let branch = unsafe { Rc::from_raw(branch_ptr as *const Branch) }; + let code_addr = code_ptr.raw_ptr(cb); + branch.regenerate(cb, |asm| { + asm_comment!(asm, "call compiled function: {}", iseq_get_location(iseq, 0)); + asm.ccall(code_addr, vec![]); + }); + + Some(code_ptr) +} + +/// Compile a stub for an ISEQ called by SendWithoutBlockDirect +/// TODO: Consider creating a trampoline to share some of the code among function stubs +fn gen_function_stub(cb: &mut CodeBlock, iseq: IseqPtr, branch: Rc) -> Option<(CodePtr, Vec)> { + let mut asm = Assembler::new(); + asm_comment!(asm, "Stub: {}", iseq_get_location(iseq, 0)); + + // Maintain alignment for x86_64, and set up a frame for arm64 properly + asm.frame_setup(&[], 0); + + asm_comment!(asm, "preserve argument registers"); + for ® in ALLOC_REGS.iter() { + asm.cpush(Opnd::Reg(reg)); + } + const { assert!(ALLOC_REGS.len() % 2 == 0, "x86_64 would need to push one more if we push an odd number of regs"); } + + // Compile the stubbed ISEQ + let branch_addr = Rc::into_raw(branch); + let jump_addr = asm_ccall!(asm, function_stub_hit, + Opnd::Value(iseq.into()), + Opnd::const_ptr(branch_addr as *const u8), + EC, + SP + ); + asm.mov(Opnd::Reg(Assembler::SCRATCH_REG), jump_addr); + + asm_comment!(asm, "restore argument registers"); + for ® in ALLOC_REGS.iter().rev() { + asm.cpop_into(Opnd::Reg(reg)); + } + + // Discard the current frame since the JIT function will set it up again + asm.frame_teardown(&[]); + + // Jump to SCRATCH_REG so that cpop_all() doesn't clobber it + asm.jmp_opnd(Opnd::Reg(Assembler::SCRATCH_REG)); + asm.compile(cb) +} + +/// Generate a trampoline that is used when a function stub fails to compile the ISEQ +pub fn gen_stub_exit(cb: &mut CodeBlock) -> Option { + let mut asm = Assembler::new(); + + asm_comment!(asm, "exit from function stub"); + asm.frame_teardown(lir::JIT_PRESERVED_REGS); + asm.cret(Qundef.into()); + + asm.compile(cb).map(|(code_ptr, gc_offsets)| { + assert_eq!(gc_offsets.len(), 0); + code_ptr + }) +} + impl Assembler { /// Make a C call while marking the start and end positions of it fn ccall_with_branch(&mut self, fptr: *const u8, opnds: Vec, branch: &Rc) -> Opnd { diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs index 56e342cba03001..afa3ddfb4989c8 100644 --- a/zjit/src/cruby.rs +++ b/zjit/src/cruby.rs @@ -715,7 +715,7 @@ pub fn iseq_name(iseq: IseqPtr) -> String { // Location is the file defining the method, colon, method name. // Filenames are sometimes internal strings supplied to eval, // so be careful with them. -pub fn iseq_get_location(iseq: IseqPtr, pos: u16) -> String { +pub fn iseq_get_location(iseq: IseqPtr, pos: u32) -> String { let iseq_path = unsafe { rb_iseq_path(iseq) }; let iseq_lineno = unsafe { rb_iseq_line_no(iseq, pos as usize) }; @@ -750,6 +750,16 @@ pub fn ruby_sym_to_rust_string(v: VALUE) -> String { ruby_str_to_rust_string(ruby_str) } +pub fn ruby_call_method_id(cd: *const rb_call_data) -> ID { + let call_info = unsafe { rb_get_call_data_ci(cd) }; + unsafe { rb_vm_ci_mid(call_info) } +} + +pub fn ruby_call_method_name(cd: *const rb_call_data) -> String { + let mid = ruby_call_method_id(cd); + mid.contents_lossy().to_string() +} + /// A location in Rust code for integrating with debugging facilities defined in C. /// Use the [src_loc!] macro to crate an instance. pub struct SourceLocation { @@ -957,7 +967,7 @@ pub use manual_defs::*; pub mod test_utils { use std::{ptr::null, sync::Once}; - use crate::{options::init_options, state::rb_zjit_enabled_p, state::ZJITState}; + use crate::{options::rb_zjit_prepare_options, state::rb_zjit_enabled_p, state::ZJITState}; use super::*; @@ -979,6 +989,7 @@ pub mod test_utils { // , though let mut var: VALUE = Qnil; ruby_init_stack(&mut var as *mut VALUE as *mut _); + rb_zjit_prepare_options(); // enable `#with_jit` on builtins ruby_init(); // Pass command line options so the VM loads core library methods defined in @@ -994,7 +1005,7 @@ pub mod test_utils { } // Set up globals for convenience - ZJITState::init(init_options()); + ZJITState::init(); // Enable zjit_* instructions unsafe { rb_zjit_enabled_p = true; } @@ -1210,6 +1221,21 @@ pub(crate) mod ids { name: to_s name: compile name: eval + name: plus content: b"+" + name: minus content: b"-" + name: mult content: b"*" + name: div content: b"/" + name: modulo content: b"%" + name: neq content: b"!=" + name: lt content: b"<" + name: le content: b"<=" + name: gt content: b">" + name: ge content: b">=" + name: and content: b"&" + name: or content: b"|" + name: freeze + name: minusat content: b"-@" + name: aref content: b"[]" } /// Get an CRuby `ID` to an interned string, e.g. a particular method name. diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index 10f12798f66dad..7fe1a0406ad9d8 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -1015,4 +1015,6 @@ unsafe extern "C" { pub fn rb_IMEMO_TYPE_P(imemo: VALUE, imemo_type: imemo_type) -> ::std::os::raw::c_int; pub fn rb_assert_cme_handle(handle: VALUE); pub fn rb_yarv_ary_entry_internal(ary: VALUE, offset: ::std::os::raw::c_long) -> VALUE; + pub fn rb_set_cfp_pc(cfp: *mut rb_control_frame_struct, pc: *const VALUE); + pub fn rb_set_cfp_sp(cfp: *mut rb_control_frame_struct, sp: *mut VALUE); } diff --git a/zjit/src/cruby_methods.rs b/zjit/src/cruby_methods.rs index 9c29ed5472d8f1..8d1548f92b1717 100644 --- a/zjit/src/cruby_methods.rs +++ b/zjit/src/cruby_methods.rs @@ -15,6 +15,7 @@ use crate::hir_type::{types, Type}; pub struct Annotations { cfuncs: HashMap<*mut c_void, FnProperties>, + builtin_funcs: HashMap<*mut c_void, FnProperties>, } /// Runtime behaviors of C functions that implement a Ruby method @@ -41,6 +42,12 @@ impl Annotations { }; self.cfuncs.get(&fn_ptr).copied() } + + /// Query about properties of a builtin function by its pointer + pub fn get_builtin_properties(&self, bf: *const rb_builtin_function) -> Option { + let func_ptr = unsafe { (*bf).func_ptr as *mut c_void }; + self.builtin_funcs.get(&func_ptr).copied() + } } fn annotate_c_method(props_map: &mut HashMap<*mut c_void, FnProperties>, class: VALUE, method_name: &'static str, props: FnProperties) { @@ -59,10 +66,78 @@ fn annotate_c_method(props_map: &mut HashMap<*mut c_void, FnProperties>, class: props_map.insert(fn_ptr, props); } +/// Look up a method and find its builtin function pointer by parsing its ISEQ +/// We currently only support methods with exactly one invokebuiltin instruction +fn annotate_builtin_method(props_map: &mut HashMap<*mut c_void, FnProperties>, class: VALUE, method_name: &'static str, props: FnProperties) { + unsafe { + let method_id = rb_intern2(method_name.as_ptr().cast(), method_name.len().try_into().unwrap()); + let method = rb_method_entry_at(class, method_id); + if method.is_null() { + panic!("Method {}#{} not found", std::ffi::CStr::from_ptr(rb_class2name(class)).to_str().unwrap_or("?"), method_name); + } + + // Cast ME to CME - they have identical layout + let cme = method.cast::(); + let def_type = get_cme_def_type(cme); + + if def_type != VM_METHOD_TYPE_ISEQ { + panic!("Method {}#{} is not an ISEQ method (type: {})", + std::ffi::CStr::from_ptr(rb_class2name(class)).to_str().unwrap_or("?"), + method_name, def_type); + } + + // Get the ISEQ from the method definition + let iseq = get_def_iseq_ptr((*cme).def); + if iseq.is_null() { + panic!("Failed to get ISEQ for {}#{}", + std::ffi::CStr::from_ptr(rb_class2name(class)).to_str().unwrap_or("?"), + method_name); + } + + // Get the size of the ISEQ in instruction units + let encoded_size = rb_iseq_encoded_size(iseq); + + // Scan through the ISEQ to find invokebuiltin instructions + let mut insn_idx: u32 = 0; + let mut func_ptr = std::ptr::null_mut::(); + + while insn_idx < encoded_size { + // Get the PC for this instruction index + let pc = rb_iseq_pc_at_idx(iseq, insn_idx); + + // Get the opcode using the proper decoder + let opcode = rb_iseq_opcode_at_pc(iseq, pc); + + if opcode == YARVINSN_invokebuiltin as i32 || + opcode == YARVINSN_opt_invokebuiltin_delegate as i32 || + opcode == YARVINSN_opt_invokebuiltin_delegate_leave as i32 { + // The first operand is the builtin function pointer + let bf_value = *pc.add(1); + let bf_ptr = bf_value.as_ptr() as *const rb_builtin_function; + + if func_ptr.is_null() { + func_ptr = (*bf_ptr).func_ptr as *mut c_void; + } else { + panic!("Multiple invokebuiltin instructions found in ISEQ for {}#{}", + std::ffi::CStr::from_ptr(rb_class2name(class)).to_str().unwrap_or("?"), + method_name); + } + } + + // Move to the next instruction using the proper length + insn_idx = insn_idx.saturating_add(rb_insn_len(VALUE(opcode as usize)).try_into().unwrap()); + } + + // Only insert the properties if its iseq has exactly one invokebuiltin instruction + props_map.insert(func_ptr, props); + } +} + /// Gather annotations. Run this right after boot since the annotations /// are about the stock versions of methods. pub fn init() -> Annotations { let cfuncs = &mut HashMap::new(); + let builtin_funcs = &mut HashMap::new(); macro_rules! annotate { ($module:ident, $method_name:literal, $return_type:expr, $($properties:ident),+) => { @@ -74,6 +149,22 @@ pub fn init() -> Annotations { } } + macro_rules! annotate_builtin { + ($module:ident, $method_name:literal, $return_type:expr) => { + annotate_builtin!($module, $method_name, $return_type, no_gc, leaf, elidable) + }; + ($module:ident, $method_name:literal, $return_type:expr, $($properties:ident),+) => { + let mut props = FnProperties { + no_gc: false, + leaf: false, + elidable: false, + return_type: $return_type + }; + $(props.$properties = true;)+ + annotate_builtin_method(builtin_funcs, unsafe { $module }, $method_name, props); + } + } + annotate!(rb_mKernel, "itself", types::BasicObject, no_gc, leaf, elidable); annotate!(rb_cString, "bytesize", types::Fixnum, no_gc, leaf); annotate!(rb_cModule, "name", types::StringExact.union(types::NilClass), no_gc, leaf, elidable); @@ -83,7 +174,12 @@ pub fn init() -> Annotations { annotate!(rb_cNilClass, "nil?", types::TrueClass, no_gc, leaf, elidable); annotate!(rb_mKernel, "nil?", types::FalseClass, no_gc, leaf, elidable); + annotate_builtin!(rb_mKernel, "Float", types::Flonum); + annotate_builtin!(rb_mKernel, "Integer", types::Integer); + annotate_builtin!(rb_mKernel, "class", types::Class, leaf); + Annotations { - cfuncs: std::mem::take(cfuncs) + cfuncs: std::mem::take(cfuncs), + builtin_funcs: std::mem::take(builtin_funcs), } } diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 06c00e3d99a10b..976580c85b25e1 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -107,11 +107,6 @@ impl std::fmt::Display for BranchEdge { } } -#[derive(Debug, PartialEq, Clone)] -pub struct CallInfo { - pub method_name: String, -} - /// Invalidation reasons #[derive(Debug, Clone, Copy)] pub enum Invariant { @@ -297,7 +292,7 @@ impl std::fmt::Display for RangeType { impl std::fmt::Debug for RangeType { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{}", self.to_string()) + write!(f, "{}", self) } } @@ -462,7 +457,6 @@ pub enum Insn { /// NewHash contains a vec of (key, value) pairs NewHash { elements: Vec<(InsnId,InsnId)>, state: InsnId }, NewRange { low: InsnId, high: InsnId, flag: RangeType, state: InsnId }, - ArraySet { array: InsnId, idx: usize, val: InsnId }, ArrayDup { val: InsnId, state: InsnId }, ArrayMax { elements: Vec, state: InsnId }, /// Extend `left` with the elements from `right`. `left` and `right` must both be `Array`. @@ -516,11 +510,10 @@ pub enum Insn { /// Send without block with dynamic dispatch /// Ignoring keyword arguments etc for now - SendWithoutBlock { self_val: InsnId, call_info: CallInfo, cd: *const rb_call_data, args: Vec, state: InsnId }, - Send { self_val: InsnId, call_info: CallInfo, cd: *const rb_call_data, blockiseq: IseqPtr, args: Vec, state: InsnId }, + SendWithoutBlock { self_val: InsnId, cd: *const rb_call_data, args: Vec, state: InsnId }, + Send { self_val: InsnId, cd: *const rb_call_data, blockiseq: IseqPtr, args: Vec, state: InsnId }, SendWithoutBlockDirect { self_val: InsnId, - call_info: CallInfo, cd: *const rb_call_data, cme: *const rb_callable_method_entry_t, iseq: IseqPtr, @@ -529,7 +522,12 @@ pub enum Insn { }, // Invoke a builtin function - InvokeBuiltin { bf: rb_builtin_function, args: Vec, state: InsnId }, + InvokeBuiltin { + bf: rb_builtin_function, + args: Vec, + state: InsnId, + return_type: Option, // None for unannotated builtins + }, /// Control flow instructions Return { val: InsnId }, @@ -552,7 +550,7 @@ pub enum Insn { FixnumOr { left: InsnId, right: InsnId }, // Distinct from `SendWithoutBlock` with `mid:to_s` because does not have a patch point for String to_s being redefined - ObjToString { val: InsnId, call_info: CallInfo, cd: *const rb_call_data, state: InsnId }, + ObjToString { val: InsnId, cd: *const rb_call_data, state: InsnId }, AnyToString { val: InsnId, str: InsnId, state: InsnId }, /// Side-exit if val doesn't have the expected type. @@ -575,7 +573,7 @@ impl Insn { /// Not every instruction returns a value. Return true if the instruction does and false otherwise. pub fn has_output(&self) -> bool { match self { - Insn::ArraySet { .. } | Insn::Jump(_) + Insn::Jump(_) | Insn::IfTrue { .. } | Insn::IfFalse { .. } | Insn::Return { .. } | Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::ArrayExtend { .. } | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetGlobal { .. } @@ -673,7 +671,6 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { } Ok(()) } - Insn::ArraySet { array, idx, val } => { write!(f, "ArraySet {array}, {idx}, {val}") } Insn::ArrayDup { val, .. } => { write!(f, "ArrayDup {val}") } Insn::HashDup { val, .. } => { write!(f, "HashDup {val}") } Insn::StringCopy { val, .. } => { write!(f, "StringCopy {val}") } @@ -682,25 +679,25 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::Jump(target) => { write!(f, "Jump {target}") } Insn::IfTrue { val, target } => { write!(f, "IfTrue {val}, {target}") } Insn::IfFalse { val, target } => { write!(f, "IfFalse {val}, {target}") } - Insn::SendWithoutBlock { self_val, call_info, args, .. } => { - write!(f, "SendWithoutBlock {self_val}, :{}", call_info.method_name)?; + Insn::SendWithoutBlock { self_val, cd, args, .. } => { + write!(f, "SendWithoutBlock {self_val}, :{}", ruby_call_method_name(*cd))?; for arg in args { write!(f, ", {arg}")?; } Ok(()) } - Insn::SendWithoutBlockDirect { self_val, call_info, iseq, args, .. } => { - write!(f, "SendWithoutBlockDirect {self_val}, :{} ({:?})", call_info.method_name, self.ptr_map.map_ptr(iseq))?; + Insn::SendWithoutBlockDirect { self_val, cd, iseq, args, .. } => { + write!(f, "SendWithoutBlockDirect {self_val}, :{} ({:?})", ruby_call_method_name(*cd), self.ptr_map.map_ptr(iseq))?; for arg in args { write!(f, ", {arg}")?; } Ok(()) } - Insn::Send { self_val, call_info, args, blockiseq, .. } => { + Insn::Send { self_val, cd, args, blockiseq, .. } => { // For tests, we want to check HIR snippets textually. Addresses change // between runs, making tests fail. Instead, pick an arbitrary hex value to // use as a "pointer" so we can check the rest of the HIR. - write!(f, "Send {self_val}, {:p}, :{}", self.ptr_map.map_ptr(blockiseq), call_info.method_name)?; + write!(f, "Send {self_val}, {:p}, :{}", self.ptr_map.map_ptr(blockiseq), ruby_call_method_name(*cd))?; for arg in args { write!(f, ", {arg}")?; } @@ -742,24 +739,24 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::Defined { op_type, v, .. } => { // op_type (enum defined_type) printing logic from iseq.c. // Not sure why rb_iseq_defined_string() isn't exhaustive. - use std::borrow::Cow; + write!(f, "Defined ")?; let op_type = *op_type as u32; - let op_type = if op_type == DEFINED_FUNC { - Cow::Borrowed("func") + if op_type == DEFINED_FUNC { + write!(f, "func")?; } else if op_type == DEFINED_REF { - Cow::Borrowed("ref") + write!(f, "ref")?; } else if op_type == DEFINED_CONST_FROM { - Cow::Borrowed("constant-from") + write!(f, "constant-from")?; } else { - String::from_utf8_lossy(unsafe { rb_iseq_defined_string(op_type).as_rstring_byte_slice().unwrap() }) + write!(f, "{}", String::from_utf8_lossy(unsafe { rb_iseq_defined_string(op_type).as_rstring_byte_slice().unwrap() }))?; }; - write!(f, "Defined {op_type}, {v}") + write!(f, ", {v}") } - Insn::DefinedIvar { self_val, id, .. } => write!(f, "DefinedIvar {self_val}, :{}", id.contents_lossy().into_owned()), - Insn::GetIvar { self_val, id, .. } => write!(f, "GetIvar {self_val}, :{}", id.contents_lossy().into_owned()), - Insn::SetIvar { self_val, id, val, .. } => write!(f, "SetIvar {self_val}, :{}, {val}", id.contents_lossy().into_owned()), - Insn::GetGlobal { id, .. } => write!(f, "GetGlobal :{}", id.contents_lossy().into_owned()), - Insn::SetGlobal { id, val, .. } => write!(f, "SetGlobal :{}, {val}", id.contents_lossy().into_owned()), + Insn::DefinedIvar { self_val, id, .. } => write!(f, "DefinedIvar {self_val}, :{}", id.contents_lossy()), + Insn::GetIvar { self_val, id, .. } => write!(f, "GetIvar {self_val}, :{}", id.contents_lossy()), + Insn::SetIvar { self_val, id, val, .. } => write!(f, "SetIvar {self_val}, :{}, {val}", id.contents_lossy()), + Insn::GetGlobal { id, .. } => write!(f, "GetGlobal :{}", id.contents_lossy()), + Insn::SetGlobal { id, val, .. } => write!(f, "SetGlobal :{}, {val}", id.contents_lossy()), Insn::GetLocal { level, ep_offset } => write!(f, "GetLocal l{level}, EP@{ep_offset}"), Insn::SetLocal { val, level, ep_offset } => write!(f, "SetLocal l{level}, EP@{ep_offset}, {val}"), Insn::ToArray { val, .. } => write!(f, "ToArray {val}"), @@ -771,23 +768,23 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::SideExit { reason, .. } => write!(f, "SideExit {reason}"), Insn::PutSpecialObject { value_type } => write!(f, "PutSpecialObject {value_type}"), Insn::Throw { throw_state, val } => { - let mut state_string = match throw_state & VM_THROW_STATE_MASK { - RUBY_TAG_NONE => "TAG_NONE".to_string(), - RUBY_TAG_RETURN => "TAG_RETURN".to_string(), - RUBY_TAG_BREAK => "TAG_BREAK".to_string(), - RUBY_TAG_NEXT => "TAG_NEXT".to_string(), - RUBY_TAG_RETRY => "TAG_RETRY".to_string(), - RUBY_TAG_REDO => "TAG_REDO".to_string(), - RUBY_TAG_RAISE => "TAG_RAISE".to_string(), - RUBY_TAG_THROW => "TAG_THROW".to_string(), - RUBY_TAG_FATAL => "TAG_FATAL".to_string(), - tag => format!("{tag}") - }; + write!(f, "Throw ")?; + match throw_state & VM_THROW_STATE_MASK { + RUBY_TAG_NONE => write!(f, "TAG_NONE"), + RUBY_TAG_RETURN => write!(f, "TAG_RETURN"), + RUBY_TAG_BREAK => write!(f, "TAG_BREAK"), + RUBY_TAG_NEXT => write!(f, "TAG_NEXT"), + RUBY_TAG_RETRY => write!(f, "TAG_RETRY"), + RUBY_TAG_REDO => write!(f, "TAG_REDO"), + RUBY_TAG_RAISE => write!(f, "TAG_RAISE"), + RUBY_TAG_THROW => write!(f, "TAG_THROW"), + RUBY_TAG_FATAL => write!(f, "TAG_FATAL"), + tag => write!(f, "{tag}") + }?; if throw_state & VM_THROW_NO_ESCAPE_FLAG != 0 { - use std::fmt::Write; - write!(state_string, "|NO_ESCAPE")?; + write!(f, "|NO_ESCAPE")?; } - write!(f, "Throw {state_string}, {val}") + write!(f, ", {val}") } Insn::IncrCounter(counter) => write!(f, "IncrCounter {counter:?}"), insn => { write!(f, "{insn:?}") } @@ -804,6 +801,8 @@ impl std::fmt::Display for Insn { /// An extended basic block in a [`Function`]. #[derive(Default, Debug)] pub struct Block { + /// The index of the first YARV instruction for the Block in the ISEQ + pub insn_idx: u32, params: Vec, insns: Vec, } @@ -1032,9 +1031,11 @@ impl Function { } } - fn new_block(&mut self) -> BlockId { + fn new_block(&mut self, insn_idx: u32) -> BlockId { let id = BlockId(self.blocks.len()); - self.blocks.push(Block::default()); + let mut block = Block::default(); + block.insn_idx = insn_idx; + self.blocks.push(block); id } @@ -1102,83 +1103,78 @@ impl Function { | GetLocal {..} | SideExit {..} | IncrCounter(_)) => result.clone(), - Snapshot { state: FrameState { iseq, insn_idx, pc, stack, locals } } => + &Snapshot { state: FrameState { iseq, insn_idx, pc, ref stack, ref locals } } => Snapshot { state: FrameState { - iseq: *iseq, - insn_idx: *insn_idx, - pc: *pc, + iseq, + insn_idx, + pc, stack: find_vec!(stack), locals: find_vec!(locals), } }, - Return { val } => Return { val: find!(*val) }, + &Return { val } => Return { val: find!(val) }, &Throw { throw_state, val } => Throw { throw_state, val: find!(val) }, - StringCopy { val, chilled } => StringCopy { val: find!(*val), chilled: *chilled }, - StringIntern { val } => StringIntern { val: find!(*val) }, - Test { val } => Test { val: find!(*val) }, + &StringCopy { val, chilled } => StringCopy { val: find!(val), chilled }, + &StringIntern { val } => StringIntern { val: find!(val) }, + &Test { val } => Test { val: find!(val) }, &IsNil { val } => IsNil { val: find!(val) }, - Jump(target) => Jump(find_branch_edge!(target)), - IfTrue { val, target } => IfTrue { val: find!(*val), target: find_branch_edge!(target) }, - IfFalse { val, target } => IfFalse { val: find!(*val), target: find_branch_edge!(target) }, - GuardType { val, guard_type, state } => GuardType { val: find!(*val), guard_type: *guard_type, state: *state }, - GuardBitEquals { val, expected, state } => GuardBitEquals { val: find!(*val), expected: *expected, state: *state }, - FixnumAdd { left, right, state } => FixnumAdd { left: find!(*left), right: find!(*right), state: *state }, - FixnumSub { left, right, state } => FixnumSub { left: find!(*left), right: find!(*right), state: *state }, - FixnumMult { left, right, state } => FixnumMult { left: find!(*left), right: find!(*right), state: *state }, - FixnumDiv { left, right, state } => FixnumDiv { left: find!(*left), right: find!(*right), state: *state }, - FixnumMod { left, right, state } => FixnumMod { left: find!(*left), right: find!(*right), state: *state }, - FixnumNeq { left, right } => FixnumNeq { left: find!(*left), right: find!(*right) }, - FixnumEq { left, right } => FixnumEq { left: find!(*left), right: find!(*right) }, - FixnumGt { left, right } => FixnumGt { left: find!(*left), right: find!(*right) }, - FixnumGe { left, right } => FixnumGe { left: find!(*left), right: find!(*right) }, - FixnumLt { left, right } => FixnumLt { left: find!(*left), right: find!(*right) }, - FixnumLe { left, right } => FixnumLe { left: find!(*left), right: find!(*right) }, - FixnumAnd { left, right } => FixnumAnd { left: find!(*left), right: find!(*right) }, - FixnumOr { left, right } => FixnumOr { left: find!(*left), right: find!(*right) }, - ObjToString { val, call_info, cd, state } => ObjToString { - val: find!(*val), - call_info: call_info.clone(), - cd: *cd, - state: *state, + &Jump(ref target) => Jump(find_branch_edge!(target)), + &IfTrue { val, ref target } => IfTrue { val: find!(val), target: find_branch_edge!(target) }, + &IfFalse { val, ref target } => IfFalse { val: find!(val), target: find_branch_edge!(target) }, + &GuardType { val, guard_type, state } => GuardType { val: find!(val), guard_type: guard_type, state }, + &GuardBitEquals { val, expected, state } => GuardBitEquals { val: find!(val), expected: expected, state }, + &FixnumAdd { left, right, state } => FixnumAdd { left: find!(left), right: find!(right), state }, + &FixnumSub { left, right, state } => FixnumSub { left: find!(left), right: find!(right), state }, + &FixnumMult { left, right, state } => FixnumMult { left: find!(left), right: find!(right), state }, + &FixnumDiv { left, right, state } => FixnumDiv { left: find!(left), right: find!(right), state }, + &FixnumMod { left, right, state } => FixnumMod { left: find!(left), right: find!(right), state }, + &FixnumNeq { left, right } => FixnumNeq { left: find!(left), right: find!(right) }, + &FixnumEq { left, right } => FixnumEq { left: find!(left), right: find!(right) }, + &FixnumGt { left, right } => FixnumGt { left: find!(left), right: find!(right) }, + &FixnumGe { left, right } => FixnumGe { left: find!(left), right: find!(right) }, + &FixnumLt { left, right } => FixnumLt { left: find!(left), right: find!(right) }, + &FixnumLe { left, right } => FixnumLe { left: find!(left), right: find!(right) }, + &FixnumAnd { left, right } => FixnumAnd { left: find!(left), right: find!(right) }, + &FixnumOr { left, right } => FixnumOr { left: find!(left), right: find!(right) }, + &ObjToString { val, cd, state } => ObjToString { + val: find!(val), + cd: cd, + state, }, - AnyToString { val, str, state } => AnyToString { - val: find!(*val), - str: find!(*str), - state: *state, + &AnyToString { val, str, state } => AnyToString { + val: find!(val), + str: find!(str), + state, }, - SendWithoutBlock { self_val, call_info, cd, args, state } => SendWithoutBlock { - self_val: find!(*self_val), - call_info: call_info.clone(), - cd: *cd, + &SendWithoutBlock { self_val, cd, ref args, state } => SendWithoutBlock { + self_val: find!(self_val), + cd: cd, args: find_vec!(args), - state: *state, + state, }, - SendWithoutBlockDirect { self_val, call_info, cd, cme, iseq, args, state } => SendWithoutBlockDirect { - self_val: find!(*self_val), - call_info: call_info.clone(), - cd: *cd, - cme: *cme, - iseq: *iseq, + &SendWithoutBlockDirect { self_val, cd, cme, iseq, ref args, state } => SendWithoutBlockDirect { + self_val: find!(self_val), + cd: cd, + cme: cme, + iseq: iseq, args: find_vec!(args), - state: *state, + state, }, - Send { self_val, call_info, cd, blockiseq, args, state } => Send { - self_val: find!(*self_val), - call_info: call_info.clone(), - cd: *cd, - blockiseq: *blockiseq, + &Send { self_val, cd, blockiseq, ref args, state } => Send { + self_val: find!(self_val), + cd: cd, + blockiseq: blockiseq, args: find_vec!(args), - state: *state, + state, }, - InvokeBuiltin { bf, args, state } => InvokeBuiltin { bf: *bf, args: find_vec!(*args), state: *state }, - ArraySet { array, idx, val } => ArraySet { array: find!(*array), idx: *idx, val: find!(*val) }, - ArrayDup { val , state } => ArrayDup { val: find!(*val), state: *state }, - &HashDup { val , state } => HashDup { val: find!(val), state }, + &InvokeBuiltin { bf, ref args, state, return_type } => InvokeBuiltin { bf, args: find_vec!(args), state, return_type }, + &ArrayDup { val, state } => ArrayDup { val: find!(val), state }, + &HashDup { val, state } => HashDup { val: find!(val), state }, &CCall { cfun, ref args, name, return_type, elidable } => CCall { cfun, args: find_vec!(args), name, return_type, elidable }, &Defined { op_type, obj, pushval, v } => Defined { op_type, obj, pushval, v: find!(v) }, &DefinedIvar { self_val, pushval, id, state } => DefinedIvar { self_val: find!(self_val), pushval, id, state }, - NewArray { elements, state } => NewArray { elements: find_vec!(*elements), state: find!(*state) }, + &NewArray { ref elements, state } => NewArray { elements: find_vec!(elements), state: find!(state) }, &NewHash { ref elements, state } => { let mut found_elements = vec![]; for &(key, value) in elements { @@ -1187,7 +1183,7 @@ impl Function { NewHash { elements: found_elements, state: find!(state) } } &NewRange { low, high, flag, state } => NewRange { low: find!(low), high: find!(high), flag, state: find!(state) }, - ArrayMax { elements, state } => ArrayMax { elements: find_vec!(*elements), state: find!(*state) }, + &ArrayMax { ref elements, state } => ArrayMax { elements: find_vec!(elements), state: find!(state) }, &SetGlobal { id, val, state } => SetGlobal { id, val: find!(val), state }, &GetIvar { self_val, id, state } => GetIvar { self_val: find!(self_val), id, state }, &SetIvar { self_val, id, val, state } => SetIvar { self_val: find!(self_val), id, val: find!(val), state }, @@ -1219,7 +1215,7 @@ impl Function { assert!(self.insns[insn.0].has_output()); match &self.insns[insn.0] { Insn::Param { .. } => unimplemented!("params should not be present in block.insns"), - Insn::SetGlobal { .. } | Insn::ArraySet { .. } | Insn::Jump(_) + Insn::SetGlobal { .. } | Insn::Jump(_) | Insn::IfTrue { .. } | Insn::IfFalse { .. } | Insn::Return { .. } | Insn::Throw { .. } | Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::ArrayExtend { .. } | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetLocal { .. } | Insn::IncrCounter(_) => @@ -1269,7 +1265,7 @@ impl Function { Insn::SendWithoutBlock { .. } => types::BasicObject, Insn::SendWithoutBlockDirect { .. } => types::BasicObject, Insn::Send { .. } => types::BasicObject, - Insn::InvokeBuiltin { .. } => types::BasicObject, + Insn::InvokeBuiltin { return_type, .. } => return_type.unwrap_or(types::BasicObject), Insn::Defined { .. } => types::BasicObject, Insn::DefinedIvar { .. } => types::BasicObject, Insn::GetConstantPath { .. } => types::BasicObject, @@ -1480,39 +1476,39 @@ impl Function { assert!(self.blocks[block.0].insns.is_empty()); for insn_id in old_insns { match self.find(insn_id) { - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "+" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(plus) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumAdd { left, right, state }, BOP_PLUS, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "-" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(minus) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumSub { left, right, state }, BOP_MINUS, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "*" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(mult) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumMult { left, right, state }, BOP_MULT, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "/" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(div) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumDiv { left, right, state }, BOP_DIV, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "%" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(modulo) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumMod { left, right, state }, BOP_MOD, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "==" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(eq) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumEq { left, right }, BOP_EQ, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "!=" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(neq) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumNeq { left, right }, BOP_NEQ, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "<" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(lt) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumLt { left, right }, BOP_LT, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "<=" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(le) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumLe { left, right }, BOP_LE, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == ">" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(gt) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumGt { left, right }, BOP_GT, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == ">=" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(ge) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumGe { left, right }, BOP_GE, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "&" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(and) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumAnd { left, right }, BOP_AND, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "|" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(or) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumOr { left, right }, BOP_OR, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "freeze" && args.len() == 0 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(freeze) && args.len() == 0 => self.try_rewrite_freeze(block, insn_id, self_val, state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "-@" && args.len() == 0 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(minusat) && args.len() == 0 => self.try_rewrite_uminus(block, insn_id, self_val, state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "[]" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(aref) && args.len() == 1 => self.try_rewrite_aref(block, insn_id, self_val, args[0], state), - Insn::SendWithoutBlock { mut self_val, call_info, cd, args, state } => { + Insn::SendWithoutBlock { mut self_val, cd, args, state } => { let frame_state = self.frame_state(state); let (klass, guard_equal_to) = if let Some(klass) = self.type_of(self_val).runtime_exact_ruby_class() { // If we know the class statically, use it to fold the lookup at compile-time. @@ -1549,7 +1545,7 @@ impl Function { if let Some(expected) = guard_equal_to { self_val = self.push_insn(block, Insn::GuardBitEquals { val: self_val, expected, state }); } - let send_direct = self.push_insn(block, Insn::SendWithoutBlockDirect { self_val, call_info, cd, cme, iseq, args, state }); + let send_direct = self.push_insn(block, Insn::SendWithoutBlockDirect { self_val, cd, cme, iseq, args, state }); self.make_equal_to(insn_id, send_direct); } Insn::GetConstantPath { ic, state, .. } => { @@ -1572,12 +1568,12 @@ impl Function { self.insn_types[replacement.0] = self.infer_type(replacement); self.make_equal_to(insn_id, replacement); } - Insn::ObjToString { val, call_info, cd, state, .. } => { + Insn::ObjToString { val, cd, state, .. } => { if self.is_a(val, types::String) { // behaves differently from `SendWithoutBlock` with `mid:to_s` because ObjToString should not have a patch point for String to_s being redefined self.make_equal_to(insn_id, val); } else { - let replacement = self.push_insn(block, Insn::SendWithoutBlock { self_val: val, call_info, cd, args: vec![], state }); + let replacement = self.push_insn(block, Insn::SendWithoutBlock { self_val: val, cd, args: vec![], state }); self.make_equal_to(insn_id, replacement) } } @@ -1881,10 +1877,6 @@ impl Function { worklist.push_back(val); worklist.push_back(state); } - &Insn::ArraySet { array, val, .. } => { - worklist.push_back(array); - worklist.push_back(val); - } &Insn::Snapshot { ref state } => { worklist.extend(&state.stack); worklist.extend(&state.locals); @@ -2560,7 +2552,7 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { if insn_idx == 0 { todo!("Separate entry block for param/self/..."); } - insn_idx_to_block.insert(insn_idx, fun.new_block()); + insn_idx_to_block.insert(insn_idx, fun.new_block(insn_idx)); } // Iteratively fill out basic blocks using a queue @@ -2941,18 +2933,13 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { } let argc = unsafe { vm_ci_argc((*cd).ci) }; - let method_name = unsafe { - let mid = rb_vm_ci_mid(call_info); - mid.contents_lossy().into_owned() - }; - assert_eq!(1, argc, "opt_aref_with should only be emitted for argc=1"); let aref_arg = fun.push_insn(block, Insn::Const { val: Const::Value(get_arg(pc, 0)) }); let args = vec![aref_arg]; let recv = state.stack_pop()?; let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); - let send = fun.push_insn(block, Insn::SendWithoutBlock { self_val: recv, call_info: CallInfo { method_name }, cd, args, state: exit_id }); + let send = fun.push_insn(block, Insn::SendWithoutBlock { self_val: recv, cd, args, state: exit_id }); state.stack_push(send); } YARVINSN_opt_neq => { @@ -2967,11 +2954,6 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { } let argc = unsafe { vm_ci_argc((*cd).ci) }; - - let method_name = unsafe { - let mid = rb_vm_ci_mid(call_info); - mid.contents_lossy().into_owned() - }; let mut args = vec![]; for _ in 0..argc { args.push(state.stack_pop()?); @@ -2980,7 +2962,7 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let recv = state.stack_pop()?; let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); - let send = fun.push_insn(block, Insn::SendWithoutBlock { self_val: recv, call_info: CallInfo { method_name }, cd, args, state: exit_id }); + let send = fun.push_insn(block, Insn::SendWithoutBlock { self_val: recv, cd, args, state: exit_id }); state.stack_push(send); } YARVINSN_opt_hash_freeze | @@ -3001,14 +2983,9 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { assert_eq!(0, argc, "{name} should not have args"); let args = vec![]; - let method_name = unsafe { - let mid = rb_vm_ci_mid(call_info); - mid.contents_lossy().into_owned() - }; - let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); let recv = fun.push_insn(block, Insn::Const { val: Const::Value(get_arg(pc, 0)) }); - let send = fun.push_insn(block, Insn::SendWithoutBlock { self_val: recv, call_info: CallInfo { method_name }, cd, args, state: exit_id }); + let send = fun.push_insn(block, Insn::SendWithoutBlock { self_val: recv, cd, args, state: exit_id }); state.stack_push(send); } @@ -3058,11 +3035,6 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { } let argc = unsafe { vm_ci_argc((*cd).ci) }; - - let method_name = unsafe { - let mid = rb_vm_ci_mid(call_info); - mid.contents_lossy().into_owned() - }; let mut args = vec![]; for _ in 0..argc { args.push(state.stack_pop()?); @@ -3071,7 +3043,7 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let recv = state.stack_pop()?; let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); - let send = fun.push_insn(block, Insn::SendWithoutBlock { self_val: recv, call_info: CallInfo { method_name }, cd, args, state: exit_id }); + let send = fun.push_insn(block, Insn::SendWithoutBlock { self_val: recv, cd, args, state: exit_id }); state.stack_push(send); } YARVINSN_send => { @@ -3086,10 +3058,6 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { } let argc = unsafe { vm_ci_argc((*cd).ci) }; - let method_name = unsafe { - let mid = rb_vm_ci_mid(call_info); - mid.contents_lossy().into_owned() - }; let mut args = vec![]; for _ in 0..argc { args.push(state.stack_pop()?); @@ -3098,7 +3066,7 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let recv = state.stack_pop()?; let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); - let send = fun.push_insn(block, Insn::Send { self_val: recv, call_info: CallInfo { method_name }, cd, blockiseq, args, state: exit_id }); + let send = fun.push_insn(block, Insn::Send { self_val: recv, cd, blockiseq, args, state: exit_id }); state.stack_push(send); } YARVINSN_getglobal => { @@ -3156,7 +3124,18 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { args.reverse(); let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); - let insn_id = fun.push_insn(block, Insn::InvokeBuiltin { bf, args, state: exit_id }); + + // Check if this builtin is annotated + let return_type = ZJITState::get_method_annotations() + .get_builtin_properties(&bf) + .map(|props| props.return_type); + + let insn_id = fun.push_insn(block, Insn::InvokeBuiltin { + bf, + args, + state: exit_id, + return_type, + }); state.stack_push(insn_id); } YARVINSN_opt_invokebuiltin_delegate | @@ -3171,7 +3150,18 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { } let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); - let insn_id = fun.push_insn(block, Insn::InvokeBuiltin { bf, args, state: exit_id }); + + // Check if this builtin is annotated + let return_type = ZJITState::get_method_annotations() + .get_builtin_properties(&bf) + .map(|props| props.return_type); + + let insn_id = fun.push_insn(block, Insn::InvokeBuiltin { + bf, + args, + state: exit_id, + return_type, + }); state.stack_push(insn_id); } YARVINSN_objtostring => { @@ -3184,14 +3174,9 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let argc = unsafe { vm_ci_argc((*cd).ci) }; assert_eq!(0, argc, "objtostring should not have args"); - let method_name: String = unsafe { - let mid = rb_vm_ci_mid(call_info); - mid.contents_lossy().into_owned() - }; - let recv = state.stack_pop()?; let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); - let objtostring = fun.push_insn(block, Insn::ObjToString { val: recv, call_info: CallInfo { method_name }, cd, state: exit_id }); + let objtostring = fun.push_insn(block, Insn::ObjToString { val: recv, cd, state: exit_id }); state.stack_push(objtostring) } YARVINSN_anytostring => { @@ -3289,7 +3274,7 @@ mod rpo_tests { fn jump() { let mut function = Function::new(std::ptr::null()); let entry = function.entry_block; - let exit = function.new_block(); + let exit = function.new_block(0); function.push_insn(entry, Insn::Jump(BranchEdge { target: exit, args: vec![] })); let val = function.push_insn(entry, Insn::Const { val: Const::Value(Qnil) }); function.push_insn(entry, Insn::Return { val }); @@ -3300,8 +3285,8 @@ mod rpo_tests { fn diamond_iftrue() { let mut function = Function::new(std::ptr::null()); let entry = function.entry_block; - let side = function.new_block(); - let exit = function.new_block(); + let side = function.new_block(0); + let exit = function.new_block(0); function.push_insn(side, Insn::Jump(BranchEdge { target: exit, args: vec![] })); let val = function.push_insn(entry, Insn::Const { val: Const::Value(Qnil) }); function.push_insn(entry, Insn::IfTrue { val, target: BranchEdge { target: side, args: vec![] } }); @@ -3315,8 +3300,8 @@ mod rpo_tests { fn diamond_iffalse() { let mut function = Function::new(std::ptr::null()); let entry = function.entry_block; - let side = function.new_block(); - let exit = function.new_block(); + let side = function.new_block(0); + let exit = function.new_block(0); function.push_insn(side, Insn::Jump(BranchEdge { target: exit, args: vec![] })); let val = function.push_insn(entry, Insn::Const { val: Const::Value(Qnil) }); function.push_insn(entry, Insn::IfFalse { val, target: BranchEdge { target: side, args: vec![] } }); @@ -3371,7 +3356,7 @@ mod validation_tests { fn iftrue_mismatch_args() { let mut function = Function::new(std::ptr::null()); let entry = function.entry_block; - let side = function.new_block(); + let side = function.new_block(0); let val = function.push_insn(entry, Insn::Const { val: Const::Value(Qnil) }); function.push_insn(entry, Insn::IfTrue { val, target: BranchEdge { target: side, args: vec![val, val, val] } }); assert_matches_err(function.validate(), ValidationError::MismatchedBlockArity(entry, 0, 3)); @@ -3381,7 +3366,7 @@ mod validation_tests { fn iffalse_mismatch_args() { let mut function = Function::new(std::ptr::null()); let entry = function.entry_block; - let side = function.new_block(); + let side = function.new_block(0); let val = function.push_insn(entry, Insn::Const { val: Const::Value(Qnil) }); function.push_insn(entry, Insn::IfFalse { val, target: BranchEdge { target: side, args: vec![val, val, val] } }); assert_matches_err(function.validate(), ValidationError::MismatchedBlockArity(entry, 0, 3)); @@ -3391,7 +3376,7 @@ mod validation_tests { fn jump_mismatch_args() { let mut function = Function::new(std::ptr::null()); let entry = function.entry_block; - let side = function.new_block(); + let side = function.new_block(0); let val = function.push_insn(entry, Insn::Const { val: Const::Value(Qnil) }); function.push_insn(entry, Insn::Jump ( BranchEdge { target: side, args: vec![val, val, val] } )); assert_matches_err(function.validate(), ValidationError::MismatchedBlockArity(entry, 0, 3)); @@ -3423,8 +3408,8 @@ mod validation_tests { // This tests that one branch is missing a definition which fails. let mut function = Function::new(std::ptr::null()); let entry = function.entry_block; - let side = function.new_block(); - let exit = function.new_block(); + let side = function.new_block(0); + let exit = function.new_block(0); let v0 = function.push_insn(side, Insn::Const { val: Const::Value(VALUE::fixnum_from_usize(3)) }); function.push_insn(side, Insn::Jump(BranchEdge { target: exit, args: vec![] })); let val1 = function.push_insn(entry, Insn::Const { val: Const::CBool(false) }); @@ -3442,8 +3427,8 @@ mod validation_tests { // This tests that both branches with a definition succeeds. let mut function = Function::new(std::ptr::null()); let entry = function.entry_block; - let side = function.new_block(); - let exit = function.new_block(); + let side = function.new_block(0); + let exit = function.new_block(0); let v0 = function.push_insn(entry, Insn::Const { val: Const::Value(VALUE::fixnum_from_usize(3)) }); function.push_insn(side, Insn::Jump(BranchEdge { target: exit, args: vec![] })); let val = function.push_insn(entry, Insn::Const { val: Const::CBool(false) }); @@ -3483,7 +3468,7 @@ mod validation_tests { let mut function = Function::new(std::ptr::null()); let entry = function.entry_block; let val = function.push_insn(entry, Insn::Const { val: Const::Value(Qnil) }); - let exit = function.new_block(); + let exit = function.new_block(0); function.push_insn(entry, Insn::Jump(BranchEdge { target: exit, args: vec![] })); function.push_insn_id(exit, val); function.push_insn(exit, Insn::Return { val }); @@ -3578,8 +3563,8 @@ mod infer_tests { fn diamond_iffalse_merge_fixnum() { let mut function = Function::new(std::ptr::null()); let entry = function.entry_block; - let side = function.new_block(); - let exit = function.new_block(); + let side = function.new_block(0); + let exit = function.new_block(0); let v0 = function.push_insn(side, Insn::Const { val: Const::Value(VALUE::fixnum_from_usize(3)) }); function.push_insn(side, Insn::Jump(BranchEdge { target: exit, args: vec![v0] })); let val = function.push_insn(entry, Insn::Const { val: Const::CBool(false) }); @@ -3597,8 +3582,8 @@ mod infer_tests { fn diamond_iffalse_merge_bool() { let mut function = Function::new(std::ptr::null()); let entry = function.entry_block; - let side = function.new_block(); - let exit = function.new_block(); + let side = function.new_block(0); + let exit = function.new_block(0); let v0 = function.push_insn(side, Insn::Const { val: Const::Value(Qtrue) }); function.push_insn(side, Insn::Jump(BranchEdge { target: exit, args: vec![v0] })); let val = function.push_insn(entry, Insn::Const { val: Const::CBool(false) }); @@ -5015,23 +5000,53 @@ mod tests { } #[test] - fn test_invokebuiltin_delegate_with_args() { + fn test_invokebuiltin_delegate_annotated() { assert_method_hir_with_opcode("Float", YARVINSN_opt_invokebuiltin_delegate_leave, expect![[r#" fn Float@:197: bb0(v0:BasicObject, v1:BasicObject, v2:BasicObject, v3:BasicObject): - v6:BasicObject = InvokeBuiltin rb_f_float, v0, v1, v2 + v6:Flonum = InvokeBuiltin rb_f_float, v0, v1, v2 Jump bb1(v0, v1, v2, v3, v6) - bb1(v8:BasicObject, v9:BasicObject, v10:BasicObject, v11:BasicObject, v12:BasicObject): + bb1(v8:BasicObject, v9:BasicObject, v10:BasicObject, v11:BasicObject, v12:Flonum): Return v12 "#]]); } #[test] - fn test_invokebuiltin_delegate_without_args() { + fn test_invokebuiltin_cexpr_annotated() { assert_method_hir_with_opcode("class", YARVINSN_opt_invokebuiltin_delegate_leave, expect![[r#" fn class@:20: bb0(v0:BasicObject): - v3:BasicObject = InvokeBuiltin _bi20, v0 + v3:Class = InvokeBuiltin _bi20, v0 + Jump bb1(v0, v3) + bb1(v5:BasicObject, v6:Class): + Return v6 + "#]]); + } + + #[test] + fn test_invokebuiltin_delegate_with_args() { + // Using an unannotated builtin to test InvokeBuiltin generation + let iseq = crate::cruby::with_rubyvm(|| get_method_iseq("Dir", "open")); + assert!(iseq_contains_opcode(iseq, YARVINSN_opt_invokebuiltin_delegate), "iseq Dir.open does not contain invokebuiltin"); + let function = iseq_to_hir(iseq).unwrap(); + assert_function_hir(function, expect![[r#" + fn open@:184: + bb0(v0:BasicObject, v1:BasicObject, v2:BasicObject, v3:BasicObject, v4:BasicObject): + v5:NilClass = Const Value(nil) + v8:BasicObject = InvokeBuiltin dir_s_open, v0, v1, v2 + SideExit UnknownOpcode(getblockparamproxy) + "#]]); + } + + #[test] + fn test_invokebuiltin_delegate_without_args() { + let iseq = crate::cruby::with_rubyvm(|| get_method_iseq("GC", "enable")); + assert!(iseq_contains_opcode(iseq, YARVINSN_opt_invokebuiltin_delegate_leave), "iseq GC.enable does not contain invokebuiltin"); + let function = iseq_to_hir(iseq).unwrap(); + assert_function_hir(function, expect![[r#" + fn enable@:55: + bb0(v0:BasicObject): + v3:BasicObject = InvokeBuiltin gc_enable, v0 Jump bb1(v0, v3) bb1(v5:BasicObject, v6:BasicObject): Return v6 diff --git a/zjit/src/options.rs b/zjit/src/options.rs index bb26cc2deeb27e..340812f089acf3 100644 --- a/zjit/src/options.rs +++ b/zjit/src/options.rs @@ -16,9 +16,9 @@ pub static mut rb_zjit_profile_threshold: u64 = 1; #[allow(non_upper_case_globals)] pub static mut rb_zjit_call_threshold: u64 = 2; -/// True if --zjit-stats is enabled. -#[allow(non_upper_case_globals)] -static mut zjit_stats_enabled_p: bool = false; +/// ZJIT command-line options. This is set before rb_zjit_init() sets +/// ZJITState so that we can query some options while loading builtins. +pub static mut OPTIONS: Option = None; #[derive(Clone, Debug)] pub struct Options { @@ -53,19 +53,20 @@ pub struct Options { pub log_compiled_iseqs: Option, } -/// Return an Options with default values -pub fn init_options() -> Options { - Options { - num_profiles: 1, - stats: false, - debug: false, - dump_hir_init: None, - dump_hir_opt: None, - dump_lir: false, - dump_disasm: false, - perf: false, - allowed_iseqs: None, - log_compiled_iseqs: None, +impl Default for Options { + fn default() -> Self { + Options { + num_profiles: 1, + stats: false, + debug: false, + dump_hir_init: None, + dump_hir_opt: None, + dump_lir: false, + dump_disasm: false, + perf: false, + allowed_iseqs: None, + log_compiled_iseqs: None, + } } } @@ -95,28 +96,26 @@ macro_rules! get_option { // Unsafe is ok here because options are initialized // once before any Ruby code executes ($option_name:ident) => { - { - use crate::state::ZJITState; - ZJITState::get_options().$option_name - } + unsafe { crate::options::OPTIONS.as_ref() }.unwrap().$option_name }; } pub(crate) use get_option; -/// Allocate Options on the heap, initialize it, and return the address of it. -/// The return value will be modified by rb_zjit_parse_option() and then -/// passed to rb_zjit_init() for initialization. +/// Set default values to ZJIT options. Setting Some to OPTIONS will make `#with_jit` +/// enable the JIT hook while not enabling compilation yet. #[unsafe(no_mangle)] -pub extern "C" fn rb_zjit_init_options() -> *const u8 { - let options = init_options(); - Box::into_raw(Box::new(options)) as *const u8 +pub extern "C" fn rb_zjit_prepare_options() { + // rb_zjit_prepare_options() could be called for feature flags or $RUBY_ZJIT_ENABLE + // after rb_zjit_parse_option() is called, so we need to handle the already-initialized case. + if unsafe { OPTIONS.is_none() } { + unsafe { OPTIONS = Some(Options::default()); } + } } /// Parse a --zjit* command-line flag #[unsafe(no_mangle)] -pub extern "C" fn rb_zjit_parse_option(options: *const u8, str_ptr: *const c_char) -> bool { - let options = unsafe { &mut *(options as *mut Options) }; - parse_option(options, str_ptr).is_some() +pub extern "C" fn rb_zjit_parse_option(str_ptr: *const c_char) -> bool { + parse_option(str_ptr).is_some() } fn parse_jit_list(path_like: &str) -> HashSet { @@ -142,7 +141,10 @@ fn parse_jit_list(path_like: &str) -> HashSet { /// Expected to receive what comes after the third dash in "--zjit-*". /// Empty string means user passed only "--zjit". C code rejects when /// they pass exact "--zjit-". -fn parse_option(options: &mut Options, str_ptr: *const std::os::raw::c_char) -> Option<()> { +fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> { + rb_zjit_prepare_options(); + let options = unsafe { OPTIONS.as_mut().unwrap() }; + let c_str: &CStr = unsafe { CStr::from_ptr(str_ptr) }; let opt_str: &str = c_str.to_str().ok()?; @@ -161,7 +163,7 @@ fn parse_option(options: &mut Options, str_ptr: *const std::os::raw::c_char) -> ("call-threshold", _) => match opt_val.parse() { Ok(n) => { unsafe { rb_zjit_call_threshold = n; } - update_profile_threshold(options); + update_profile_threshold(); }, Err(_) => return None, }, @@ -169,13 +171,12 @@ fn parse_option(options: &mut Options, str_ptr: *const std::os::raw::c_char) -> ("num-profiles", _) => match opt_val.parse() { Ok(n) => { options.num_profiles = n; - update_profile_threshold(options); + update_profile_threshold(); }, Err(_) => return None, }, ("stats", "") => { - unsafe { zjit_stats_enabled_p = true; } options.stats = true; } @@ -217,15 +218,14 @@ fn parse_option(options: &mut Options, str_ptr: *const std::os::raw::c_char) -> } /// Update rb_zjit_profile_threshold based on rb_zjit_call_threshold and options.num_profiles -fn update_profile_threshold(options: &Options) { - unsafe { - if rb_zjit_call_threshold == 1 { - // If --zjit-call-threshold=1, never rewrite ISEQs to profile instructions. - rb_zjit_profile_threshold = 0; - } else { - // Otherwise, profile instructions at least once. - rb_zjit_profile_threshold = rb_zjit_call_threshold.saturating_sub(options.num_profiles as u64).max(1); - } +fn update_profile_threshold() { + if unsafe { rb_zjit_call_threshold == 1 } { + // If --zjit-call-threshold=1, never rewrite ISEQs to profile instructions. + unsafe { rb_zjit_profile_threshold = 0; } + } else { + // Otherwise, profile instructions at least once. + let num_profiles = get_option!(num_profiles) as u64; + unsafe { rb_zjit_profile_threshold = rb_zjit_call_threshold.saturating_sub(num_profiles).max(1) }; } } @@ -254,12 +254,23 @@ macro_rules! debug { } pub(crate) use debug; -/// Return Qtrue if --zjit-stats has been enabled +/// Return Qtrue if --zjit* has been specified. For the `#with_jit` hook, +/// this becomes Qtrue before ZJIT is actually initialized and enabled. +#[unsafe(no_mangle)] +pub extern "C" fn rb_zjit_option_enabled_p(_ec: EcPtr, _self: VALUE) -> VALUE { + // If any --zjit* option is specified, OPTIONS becomes Some. + if unsafe { OPTIONS.is_some() } { + Qtrue + } else { + Qfalse + } +} + +/// Return Qtrue if --zjit-stats has been specified. #[unsafe(no_mangle)] pub extern "C" fn rb_zjit_stats_enabled_p(_ec: EcPtr, _self: VALUE) -> VALUE { - // ZJITState is not initialized yet when loading builtins, so this relies - // on a separate global variable. - if unsafe { zjit_stats_enabled_p } { + // Builtin zjit.rb calls this even if ZJIT is disabled, so OPTIONS may not be set. + if unsafe { OPTIONS.as_ref() }.map_or(false, |opts| opts.stats) { Qtrue } else { Qfalse diff --git a/zjit/src/state.rs b/zjit/src/state.rs index ee7cd15d5fb9b1..79be91fd85e5c6 100644 --- a/zjit/src/state.rs +++ b/zjit/src/state.rs @@ -1,9 +1,11 @@ +use crate::codegen::gen_stub_exit; use crate::cruby::{self, rb_bug_panic_hook, rb_vm_insns_count, EcPtr, Qnil, VALUE}; use crate::cruby_methods; use crate::invariants::Invariants; -use crate::options::Options; use crate::asm::CodeBlock; +use crate::options::get_option; use crate::stats::Counters; +use crate::virtualmem::CodePtr; #[allow(non_upper_case_globals)] #[unsafe(no_mangle)] @@ -19,9 +21,6 @@ pub struct ZJITState { /// Inline code block (fast path) code_block: CodeBlock, - /// ZJIT command-line options - options: Options, - /// ZJIT statistics counters: Counters, @@ -33,17 +32,21 @@ pub struct ZJITState { /// Properties of core library methods method_annotations: cruby_methods::Annotations, + + /// Side-exit trampoline used when it fails to compile the ISEQ for a function stub + stub_exit: CodePtr, } /// Private singleton instance of the codegen globals static mut ZJIT_STATE: Option = None; impl ZJITState { - /// Initialize the ZJIT globals, given options allocated by rb_zjit_init_options() - pub fn init(options: Options) { + /// Initialize the ZJIT globals + pub fn init() { #[cfg(not(test))] - let cb = { + let mut cb = { use crate::cruby::*; + use crate::options::*; let exec_mem_size: usize = 64 * 1024 * 1024; // TODO: implement the option let virt_block: *mut u8 = unsafe { rb_zjit_reserve_addr_space(64 * 1024 * 1024) }; @@ -75,19 +78,21 @@ impl ZJITState { ); let mem_block = Rc::new(RefCell::new(mem_block)); - CodeBlock::new(mem_block.clone(), options.dump_disasm) + CodeBlock::new(mem_block.clone(), get_option!(dump_disasm)) }; #[cfg(test)] - let cb = CodeBlock::new_dummy(); + let mut cb = CodeBlock::new_dummy(); + + let stub_exit = gen_stub_exit(&mut cb).unwrap(); // Initialize the codegen globals instance let zjit_state = ZJITState { code_block: cb, - options, counters: Counters::default(), invariants: Invariants::default(), assert_compiles: false, method_annotations: cruby_methods::init(), + stub_exit, }; unsafe { ZJIT_STATE = Some(zjit_state); } } @@ -107,11 +112,6 @@ impl ZJITState { &mut ZJITState::get_instance().code_block } - /// Get a mutable reference to the options - pub fn get_options() -> &'static mut Options { - &mut ZJITState::get_instance().options - } - /// Get a mutable reference to the invariants pub fn get_invariants() -> &'static mut Invariants { &mut ZJITState::get_instance().invariants @@ -139,13 +139,13 @@ impl ZJITState { /// Was --zjit-save-compiled-iseqs specified? pub fn should_log_compiled_iseqs() -> bool { - ZJITState::get_instance().options.log_compiled_iseqs.is_some() + get_option!(log_compiled_iseqs).is_some() } /// Log the name of a compiled ISEQ to the file specified in options.log_compiled_iseqs pub fn log_compile(iseq_name: String) { assert!(ZJITState::should_log_compiled_iseqs()); - let filename = ZJITState::get_instance().options.log_compiled_iseqs.as_ref().unwrap(); + let filename = get_option!(log_compiled_iseqs).as_ref().unwrap(); use std::io::Write; let mut file = match std::fs::OpenOptions::new().create(true).append(true).open(filename) { Ok(f) => f, @@ -161,26 +161,31 @@ impl ZJITState { /// Check if we are allowed to compile a given ISEQ based on --zjit-allowed-iseqs pub fn can_compile_iseq(iseq: cruby::IseqPtr) -> bool { - if let Some(ref allowed_iseqs) = ZJITState::get_instance().options.allowed_iseqs { + if let Some(ref allowed_iseqs) = get_option!(allowed_iseqs) { let name = cruby::iseq_get_location(iseq, 0); allowed_iseqs.contains(&name) } else { true // If no restrictions, allow all ISEQs } } + + /// Return a code pointer to the side-exit trampoline for function stubs + pub fn get_stub_exit() -> CodePtr { + ZJITState::get_instance().stub_exit + } } -/// Initialize ZJIT, given options allocated by rb_zjit_init_options() +/// Initialize ZJIT #[unsafe(no_mangle)] -pub extern "C" fn rb_zjit_init(options: *const u8) { +pub extern "C" fn rb_zjit_init() { // Catch panics to avoid UB for unwinding into C frames. // See https://doc.rust-lang.org/nomicon/exception-safety.html let result = std::panic::catch_unwind(|| { + // Initialize ZJIT states cruby::ids::init(); + ZJITState::init(); - let options = unsafe { Box::from_raw(options as *mut Options) }; - ZJITState::init(*options); - + // Install a panic hook for ZJIT rb_bug_panic_hook(); // Discard the instruction count for boot which we never compile pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy