untrusted comment: verify with openbsd-67-base.pub RWRmkIA877Io3lZ4k6gA70BhQ+PwjaA42aQIGV2F8GjFbTnWpnzXH5/L9vzK+kuliLNlbZMSvqjQ5aNrVP4wsOEw5uORvpzkbwc= OpenBSD 6.7 errata 007, June 1, 2020: Several problems in Perl's regular expression compiler could lead to corruption of the intermediate language state of a compiled regular expression. Apply by doing: signify -Vep /etc/signify/openbsd-67-base.pub -x 007_perl.patch.sig \ -m - | (cd /usr/src && patch -p0) And then rebuild and install perl: cd /usr/src/gnu/usr.bin/perl/ make -f Makefile.bsd-wrapper obj make -f Makefile.bsd-wrapper depend make -f Makefile.bsd-wrapper make -f Makefile.bsd-wrapper install Index: gnu/usr.bin/perl/embed.fnc =================================================================== RCS file: /cvs/src/gnu/usr.bin/perl/embed.fnc,v retrieving revision 1.10 diff -u -p -r1.10 embed.fnc --- gnu/usr.bin/perl/embed.fnc 9 Apr 2020 01:32:11 -0000 1.10 +++ gnu/usr.bin/perl/embed.fnc 25 May 2020 22:58:55 -0000 @@ -2480,7 +2480,8 @@ Es |SSize_t|study_chunk |NN RExC_state_t |NULLOK struct scan_data_t *data \ |I32 stopparen|U32 recursed_depth \ |NULLOK regnode_ssc *and_withp \ - |U32 flags|U32 depth + |U32 flags|U32 depth|bool was_mutate_ok +Es |void |rck_elide_nothing|NN regnode *node EsR |SV * |get_ANYOFM_contents|NN const regnode * n EsRn |U32 |add_data |NN RExC_state_t* const pRExC_state \ |NN const char* const s|const U32 n Index: gnu/usr.bin/perl/embed.h =================================================================== RCS file: /cvs/src/gnu/usr.bin/perl/embed.h,v retrieving revision 1.25 diff -u -p -r1.25 embed.h --- gnu/usr.bin/perl/embed.h 30 Dec 2019 02:13:41 -0000 1.25 +++ gnu/usr.bin/perl/embed.h 25 May 2020 22:58:56 -0000 @@ -1208,6 +1208,7 @@ #define parse_lparen_question_flags(a) S_parse_lparen_question_flags(aTHX_ a) #define parse_uniprop_string(a,b,c,d,e,f,g,h,i) Perl_parse_uniprop_string(aTHX_ a,b,c,d,e,f,g,h,i) #define populate_ANYOF_from_invlist(a,b) S_populate_ANYOF_from_invlist(aTHX_ a,b) +#define rck_elide_nothing(a) S_rck_elide_nothing(aTHX_ a) #define reg(a,b,c,d) S_reg(aTHX_ a,b,c,d) #define reg2Lanode(a,b,c,d) S_reg2Lanode(aTHX_ a,b,c,d) #define reg_node(a,b) S_reg_node(aTHX_ a,b) @@ -1238,7 +1239,7 @@ #define ssc_is_cp_posixl_init S_ssc_is_cp_posixl_init #define ssc_or(a,b,c) S_ssc_or(aTHX_ a,b,c) #define ssc_union(a,b,c) S_ssc_union(aTHX_ a,b,c) -#define study_chunk(a,b,c,d,e,f,g,h,i,j,k) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k) +#define study_chunk(a,b,c,d,e,f,g,h,i,j,k,l) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k,l) # endif # if defined(PERL_IN_REGCOMP_C) || defined (PERL_IN_DUMP_C) #define _invlist_dump(a,b,c,d) Perl__invlist_dump(aTHX_ a,b,c,d) Index: gnu/usr.bin/perl/proto.h =================================================================== RCS file: /cvs/src/gnu/usr.bin/perl/proto.h,v retrieving revision 1.26 diff -u -p -r1.26 proto.h --- gnu/usr.bin/perl/proto.h 9 Apr 2020 01:32:11 -0000 1.26 +++ gnu/usr.bin/perl/proto.h 25 May 2020 22:58:56 -0000 @@ -5543,6 +5543,9 @@ PERL_CALLCONV SV * Perl_parse_uniprop_st STATIC void S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr); #define PERL_ARGS_ASSERT_POPULATE_ANYOF_FROM_INVLIST \ assert(node); assert(invlist_ptr) +STATIC void S_rck_elide_nothing(pTHX_ regnode *node); +#define PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING \ + assert(node) PERL_STATIC_NO_RET void S_re_croak2(pTHX_ bool utf8, const char* pat1, const char* pat2, ...) __attribute__noreturn__; #define PERL_ARGS_ASSERT_RE_CROAK2 \ @@ -5656,7 +5659,7 @@ PERL_STATIC_INLINE void S_ssc_union(pTHX #define PERL_ARGS_ASSERT_SSC_UNION \ assert(ssc); assert(invlist) #endif -STATIC SSize_t S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, U32 flags, U32 depth); +STATIC SSize_t S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, U32 flags, U32 depth, bool was_mutate_ok); #define PERL_ARGS_ASSERT_STUDY_CHUNK \ assert(pRExC_state); assert(scanp); assert(minlenp); assert(deltap); assert(last) #endif Index: gnu/usr.bin/perl/regcomp.c =================================================================== RCS file: /cvs/src/gnu/usr.bin/perl/regcomp.c,v retrieving revision 1.29 diff -u -p -r1.29 regcomp.c --- gnu/usr.bin/perl/regcomp.c 9 Apr 2020 01:32:12 -0000 1.29 +++ gnu/usr.bin/perl/regcomp.c 25 May 2020 22:58:58 -0000 @@ -106,6 +106,7 @@ typedef struct scan_frame { regnode *next_regnode; /* next node to process when last is reached */ U32 prev_recursed_depth; I32 stopparen; /* what stopparen do we use */ + bool in_gosub; /* this or an outer frame is for GOSUB */ struct scan_frame *this_prev_frame; /* this previous frame */ struct scan_frame *prev_frame; /* previous frame */ @@ -4450,6 +4451,44 @@ S_unwind_scan_frames(pTHX_ const void *p } while (f); } +/* Follow the next-chain of the current node and optimize away + all the NOTHINGs from it. + */ +STATIC void +S_rck_elide_nothing(pTHX_ regnode *node) +{ + dVAR; + + PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING; + + if (OP(node) != CURLYX) { + const int max = (reg_off_by_arg[OP(node)] + ? I32_MAX + /* I32 may be smaller than U16 on CRAYs! */ + : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX)); + int off = (reg_off_by_arg[OP(node)] ? ARG(node) : NEXT_OFF(node)); + int noff; + regnode *n = node; + + /* Skip NOTHING and LONGJMP. */ + while ( + (n = regnext(n)) + && ( + (PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n))) + || ((OP(n) == LONGJMP) && (noff = ARG(n))) + ) + && off + noff < max + ) { + off += noff; + } + if (reg_off_by_arg[OP(node)]) + ARG(node) = off; + else + NEXT_OFF(node) = off; + } + return; +} + /* the return from this sub is the minimum length that could possibly match */ STATIC SSize_t S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, @@ -4459,7 +4498,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_ I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, - U32 flags, U32 depth) + U32 flags, U32 depth, bool was_mutate_ok) /* scanp: Start here (read-write). */ /* deltap: Write maxlen-minlen here. */ /* last: Stop before this one. */ @@ -4538,6 +4577,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_ node length to get a real minimum (because the folded version may be shorter) */ bool unfolded_multi_char = FALSE; + /* avoid mutating ops if we are anywhere within the recursed or + * enframed handling for a GOSUB: the outermost level will handle it. + */ + bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub); /* Peephole optimizer: */ DEBUG_STUDYDATA("Peep", data, depth, is_inf); DEBUG_PEEP("Peep", scan, depth, flags); @@ -4548,30 +4591,13 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_ * parsing code, as each (?:..) is handled by a different invocation of * reg() -- Yves */ - JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0); + if (mutate_ok) + JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0); - /* Follow the next-chain of the current node and optimize - away all the NOTHINGs from it. */ - if (OP(scan) != CURLYX) { - const int max = (reg_off_by_arg[OP(scan)] - ? I32_MAX - /* I32 may be smaller than U16 on CRAYs! */ - : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX)); - int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan)); - int noff; - regnode *n = scan; - - /* Skip NOTHING and LONGJMP. */ - while ((n = regnext(n)) - && ((PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n))) - || ((OP(n) == LONGJMP) && (noff = ARG(n)))) - && off + noff < max) - off += noff; - if (reg_off_by_arg[OP(scan)]) - ARG(scan) = off; - else - NEXT_OFF(scan) = off; - } + /* Follow the next-chain of the current node and optimize + away all the NOTHINGs from it. + */ + rck_elide_nothing(scan); /* The principal pseudo-switch. Cannot be a switch, since we look into several different things. */ @@ -4598,7 +4624,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_ /* DEFINEP study_chunk() recursion */ (void)study_chunk(pRExC_state, &scan, &minlen, &deltanext, next, &data_fake, stopparen, - recursed_depth, NULL, f, depth+1); + recursed_depth, NULL, f, depth+1, mutate_ok); scan = next; } else @@ -4666,7 +4692,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_ /* recurse study_chunk() for each BRANCH in an alternation */ minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext, next, &data_fake, stopparen, - recursed_depth, NULL, f, depth+1); + recursed_depth, NULL, f, depth+1, + mutate_ok); if (min1 > minnext) min1 = minnext; @@ -4733,9 +4760,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_ } } - if (PERL_ENABLE_TRIE_OPTIMISATION && - OP( startbranch ) == BRANCH ) - { + if (PERL_ENABLE_TRIE_OPTIMISATION + && OP(startbranch) == BRANCH + && mutate_ok + ) { /* demq. Assuming this was/is a branch we are dealing with: 'scan' @@ -5190,6 +5218,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_ newframe->stopparen = stopparen; newframe->prev_recursed_depth = recursed_depth; newframe->this_prev_frame= frame; + newframe->in_gosub = ( + (frame && frame->in_gosub) || OP(scan) == GOSUB + ); DEBUG_STUDYDATA("frame-new", data, depth, is_inf); DEBUG_PEEP("fnew", scan, depth, flags); @@ -5347,7 +5378,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_ /* This temporary node can now be turned into EXACTFU, and * must, as regexec.c doesn't handle it */ - if (OP(next) == EXACTFU_S_EDGE) { + if (OP(next) == EXACTFU_S_EDGE && mutate_ok) { OP(next) = EXACTFU; } @@ -5355,8 +5386,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_ && isALPHA_A(* STRING(next)) && ( OP(next) == EXACTFAA || ( OP(next) == EXACTFU - && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(next))))) - { + && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(next)))) + && mutate_ok + ) { /* These differ in just one bit */ U8 mask = ~ ('A' ^ 'a'); @@ -5443,7 +5475,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_ (mincount == 0 ? (f & ~SCF_DO_SUBSTR) : f) - ,depth+1); + , depth+1, mutate_ok); if (flags & SCF_DO_STCLASS) data->start_class = oclass; @@ -5489,6 +5521,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_ RExC_precomp))); } + if ( ( minnext > 0 && mincount >= SSize_t_MAX / minnext ) + || min >= SSize_t_MAX - minnext * mincount ) + { + FAIL("Regexp out of space"); + } + min += minnext * mincount; is_inf_internal |= deltanext == SSize_t_MAX || (maxcount == REG_INFTY && minnext + deltanext > 0); @@ -5503,7 +5541,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_ if ( OP(oscan) == CURLYX && data && data->flags & SF_IN_PAR && !(data->flags & SF_HAS_EVAL) - && !deltanext && minnext == 1 ) { + && !deltanext && minnext == 1 + && mutate_ok + ) { /* Try to optimize to CURLYN. */ regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; regnode * const nxt1 = nxt; @@ -5553,10 +5593,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_ && !(data->flags & SF_HAS_EVAL) && !deltanext /* atom is fixed width */ && minnext != 0 /* CURLYM can't handle zero width */ - /* Nor characters whose fold at run-time may be * multi-character */ && ! (RExC_seen & REG_UNFOLDED_MULTI_SEEN) + && mutate_ok ) { /* XXXX How to optimize if data == 0? */ /* Optimize to a simpler form. */ @@ -5609,7 +5649,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_ /* recurse study_chunk() on optimised CURLYX => CURLYM */ study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt, NULL, stopparen, recursed_depth, NULL, 0, - depth+1); + depth+1, mutate_ok); } else oscan->flags = 0; @@ -5739,11 +5779,7 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RH if (data && (fl & SF_HAS_EVAL)) data->flags |= SF_HAS_EVAL; optimize_curly_tail: - if (OP(oscan) != CURLYX) { - while (PL_regkind[OP(next = regnext(oscan))] == NOTHING - && NEXT_OFF(next)) - NEXT_OFF(oscan) += NEXT_OFF(next); - } + rck_elide_nothing(oscan); continue; default: @@ -6018,7 +6054,8 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RH /* recurse study_chunk() for lookahead body */ minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext, last, &data_fake, stopparen, - recursed_depth, NULL, f, depth+1); + recursed_depth, NULL, f, depth+1, + mutate_ok); if (scan->flags) { if ( deltanext < 0 || deltanext > (I32) U8_MAX @@ -6123,7 +6160,7 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RH *minnextp = study_chunk(pRExC_state, &nscan, minnextp, &deltanext, last, &data_fake, stopparen, recursed_depth, NULL, - f, depth+1); + f, depth+1, mutate_ok); if (scan->flags) { assert(0); /* This code has never been tested since this is normally not compiled */ @@ -6291,7 +6328,8 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RH /* optimise study_chunk() for TRIE */ minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext, (regnode *)nextbranch, &data_fake, - stopparen, recursed_depth, NULL, f, depth+1); + stopparen, recursed_depth, NULL, f, depth+1, + mutate_ok); } if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH) nextbranch= regnext((regnode*)nextbranch); @@ -7740,6 +7778,13 @@ Perl_re_op_compile(pTHX_ SV ** const pat /* We have that number in RExC_npar */ RExC_total_parens = RExC_npar; + + /* XXX For backporting, use long jumps if there is any possibility of + * overflow */ + if (RExC_size > U16_MAX && ! RExC_use_BRANCHJ) { + RExC_use_BRANCHJ = TRUE; + flags |= RESTART_PARSE; + } } else if (! MUST_RESTART(flags)) { ReREFCNT_dec(Rx); @@ -8077,7 +8122,7 @@ Perl_re_op_compile(pTHX_ SV ** const pat &data, -1, 0, NULL, SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag | (restudied ? SCF_TRIE_DOING_RESTUDY : 0), - 0); + 0, TRUE); CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk")); @@ -8206,7 +8251,7 @@ Perl_re_op_compile(pTHX_ SV ** const pat SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS|(restudied ? SCF_TRIE_DOING_RESTUDY : 0), - 0); + 0, TRUE); CHECK_RESTUDY_GOTO_butfirst(NOOP);