To: vim_dev@googlegroups.com Subject: Patch 7.3.1111 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 7.3.1111 Problem: nfa_recognize_char_class() implementation is inefficient. Solution: Use bits in an int instead of chars in a string. (Dominique Pelle) Files: src/regexp_nfa.c, src/testdir/test36.in, src/testdir/test36.ok *** ../vim-7.3.1110/src/regexp_nfa.c 2013-06-04 14:23:00.000000000 +0200 --- src/regexp_nfa.c 2013-06-04 17:38:43.000000000 +0200 *************** *** 380,417 **** char_u *end; int extra_newl; { ! int i; ! /* Each of these variables takes up a char in "config[]", ! * in the order they are here. */ ! int not = FALSE, af = FALSE, AF = FALSE, az = FALSE, AZ = FALSE, ! o7 = FALSE, o9 = FALSE, underscore = FALSE, newl = FALSE; char_u *p; ! #define NCONFIGS 16 ! int classid[NCONFIGS] = { ! NFA_DIGIT, NFA_NDIGIT, NFA_HEX, NFA_NHEX, ! NFA_OCTAL, NFA_NOCTAL, NFA_WORD, NFA_NWORD, ! NFA_HEAD, NFA_NHEAD, NFA_ALPHA, NFA_NALPHA, ! NFA_LOWER, NFA_NLOWER, NFA_UPPER, NFA_NUPPER ! }; ! char_u myconfig[10]; ! char_u config[NCONFIGS][9] = { ! "000000100", /* digit */ ! "100000100", /* non digit */ ! "011000100", /* hex-digit */ ! "111000100", /* non hex-digit */ ! "000001000", /* octal-digit */ ! "100001000", /* [^0-7] */ ! "000110110", /* [0-9A-Za-z_] */ ! "100110110", /* [^0-9A-Za-z_] */ ! "000110010", /* head of word */ ! "100110010", /* not head of word */ ! "000110000", /* alphabetic char a-z */ ! "100110000", /* non alphabetic char */ ! "000100000", /* lowercase letter */ ! "100100000", /* non lowercase */ ! "000010000", /* uppercase */ ! "100010000" /* non uppercase */ ! }; if (extra_newl == TRUE) newl = TRUE; --- 380,397 ---- char_u *end; int extra_newl; { ! # define CLASS_not 0x80 ! # define CLASS_af 0x40 ! # define CLASS_AF 0x20 ! # define CLASS_az 0x10 ! # define CLASS_AZ 0x08 ! # define CLASS_o7 0x04 ! # define CLASS_o9 0x02 ! # define CLASS_underscore 0x01 ! ! int newl = FALSE; char_u *p; ! int config = 0; if (extra_newl == TRUE) newl = TRUE; *************** *** 421,427 **** p = start; if (*p == '^') { ! not = TRUE; p++; } --- 401,407 ---- p = start; if (*p == '^') { ! config |= CLASS_not; p++; } *************** *** 434,470 **** case '0': if (*(p + 2) == '9') { ! o9 = TRUE; break; } else if (*(p + 2) == '7') { ! o7 = TRUE; break; } case 'a': if (*(p + 2) == 'z') { ! az = TRUE; break; } else if (*(p + 2) == 'f') { ! af = TRUE; break; } case 'A': if (*(p + 2) == 'Z') { ! AZ = TRUE; break; } else if (*(p + 2) == 'F') { ! AF = TRUE; break; } /* FALLTHROUGH */ --- 414,450 ---- case '0': if (*(p + 2) == '9') { ! config |= CLASS_o9; break; } else if (*(p + 2) == '7') { ! config |= CLASS_o7; break; } case 'a': if (*(p + 2) == 'z') { ! config |= CLASS_az; break; } else if (*(p + 2) == 'f') { ! config |= CLASS_af; break; } case 'A': if (*(p + 2) == 'Z') { ! config |= CLASS_AZ; break; } else if (*(p + 2) == 'F') { ! config |= CLASS_AF; break; } /* FALLTHROUGH */ *************** *** 480,486 **** } else if (*p == '_') { ! underscore = TRUE; p ++; } else if (*p == '\n') --- 460,466 ---- } else if (*p == '_') { ! config |= CLASS_underscore; p ++; } else if (*p == '\n') *************** *** 495,532 **** if (p != end) return FAIL; - /* build the config that represents the ranges we gathered */ - STRCPY(myconfig, "000000000"); - if (not == TRUE) - myconfig[0] = '1'; - if (af == TRUE) - myconfig[1] = '1'; - if (AF == TRUE) - myconfig[2] = '1'; - if (az == TRUE) - myconfig[3] = '1'; - if (AZ == TRUE) - myconfig[4] = '1'; - if (o7 == TRUE) - myconfig[5] = '1'; - if (o9 == TRUE) - myconfig[6] = '1'; - if (underscore == TRUE) - myconfig[7] = '1'; if (newl == TRUE) - { - myconfig[8] = '1'; extra_newl = ADD_NL; - } - /* try to recognize character classes */ - for (i = 0; i < NCONFIGS; i++) - if (STRNCMP(myconfig, config[i], 8) == 0) - return classid[i] + extra_newl; ! /* fallthrough => no success so far */ return FAIL; - - #undef NCONFIGS } /* --- 475,519 ---- if (p != end) return FAIL; if (newl == TRUE) extra_newl = ADD_NL; ! switch (config) ! { ! case CLASS_o9: ! return extra_newl + NFA_DIGIT; ! case CLASS_not | CLASS_o9: ! return extra_newl + NFA_NDIGIT; ! case CLASS_af | CLASS_AF | CLASS_o9: ! return extra_newl + NFA_HEX; ! case CLASS_not | CLASS_af | CLASS_AF | CLASS_o9: ! return extra_newl + NFA_NHEX; ! case CLASS_o7: ! return extra_newl + NFA_OCTAL; ! case CLASS_not | CLASS_o7: ! return extra_newl + NFA_NOCTAL; ! case CLASS_az | CLASS_AZ | CLASS_o9 | CLASS_underscore: ! return extra_newl + NFA_WORD; ! case CLASS_not | CLASS_az | CLASS_AZ | CLASS_o9 | CLASS_underscore: ! return extra_newl + NFA_NWORD; ! case CLASS_az | CLASS_AZ | CLASS_underscore: ! return extra_newl + NFA_HEAD; ! case CLASS_not | CLASS_az | CLASS_AZ | CLASS_underscore: ! return extra_newl + NFA_NHEAD; ! case CLASS_az | CLASS_AZ: ! return extra_newl + NFA_ALPHA; ! case CLASS_not | CLASS_az | CLASS_AZ: ! return extra_newl + NFA_NALPHA; ! case CLASS_az: ! return extra_newl + NFA_LOWER; ! case CLASS_not | CLASS_az: ! return extra_newl + NFA_NLOWER; ! case CLASS_AZ: ! return extra_newl + NFA_UPPER; ! case CLASS_not | CLASS_AZ: ! return extra_newl + NFA_NUPPER; ! } return FAIL; } /* *************** *** 900,906 **** EMSG_RET_FAIL(_(e_z1_not_allowed)); EMIT(NFA_ZREF1 + (no_Magic(c) - '1')); /* No need to set nfa_has_backref, the sub-matches don't ! * change when \z1 .. \z9 maches or not. */ re_has_z = REX_USE; break; case '(': --- 887,893 ---- EMSG_RET_FAIL(_(e_z1_not_allowed)); EMIT(NFA_ZREF1 + (no_Magic(c) - '1')); /* No need to set nfa_has_backref, the sub-matches don't ! * change when \z1 .. \z9 matches or not. */ re_has_z = REX_USE; break; case '(': *************** *** 4658,4664 **** } else { ! /* skip ofer the matched characters, set character * count in NFA_SKIP */ ll = nextlist; add_state = t->state->out; --- 4645,4651 ---- } else { ! /* skip over the matched characters, set character * count in NFA_SKIP */ ll = nextlist; add_state = t->state->out; *** ../vim-7.3.1110/src/testdir/test36.in 2010-08-15 21:57:29.000000000 +0200 --- src/testdir/test36.in 2013-06-04 17:38:14.000000000 +0200 *************** *** 1,40 **** ! Test character classes in regexp STARTTEST ! /^start-here ! j:s/\d//g ! j:s/\D//g ! j:s/\o//g ! j:s/\O//g ! j:s/\x//g ! j:s/\X//g ! j:s/\w//g ! j:s/\W//g ! j:s/\h//g ! j:s/\H//g ! j:s/\a//g ! j:s/\A//g ! j:s/\l//g ! j:s/\L//g ! j:s/\u//g ! j:s/\U//g :/^start-here/+1,$wq! test.out ENDTEST start-here !"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ - !"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ - !"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ - !"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ - !"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ - !"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ - !"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ - !"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ - !"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ - !"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ - !"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ - !"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ - !"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ - !"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ - !"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ - !"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ --- 1,105 ---- ! Test character classes in regexp using regexpengine 0, 1, 2. STARTTEST ! /^start-here/+1 ! Y:s/\%#=0\d//g ! p:s/\%#=1\d//g ! p:s/\%#=2\d//g ! p:s/\%#=0[0-9]//g ! p:s/\%#=1[0-9]//g ! p:s/\%#=2[0-9]//g ! p:s/\%#=0\D//g ! p:s/\%#=1\D//g ! p:s/\%#=2\D//g ! p:s/\%#=0[^0-9]//g ! p:s/\%#=1[^0-9]//g ! p:s/\%#=2[^0-9]//g ! p:s/\%#=0\o//g ! p:s/\%#=1\o//g ! p:s/\%#=2\o//g ! p:s/\%#=0[0-7]//g ! p:s/\%#=1[0-7]//g ! p:s/\%#=2[0-7]//g ! p:s/\%#=0\O//g ! p:s/\%#=1\O//g ! p:s/\%#=2\O//g ! p:s/\%#=0[^0-7]//g ! p:s/\%#=1[^0-7]//g ! p:s/\%#=2[^0-7]//g ! p:s/\%#=0\x//g ! p:s/\%#=1\x//g ! p:s/\%#=2\x//g ! p:s/\%#=0[0-9A-Fa-f]//g ! p:s/\%#=1[0-9A-Fa-f]//g ! p:s/\%#=2[0-9A-Fa-f]//g ! p:s/\%#=0\X//g ! p:s/\%#=1\X//g ! p:s/\%#=2\X//g ! p:s/\%#=0[^0-9A-Fa-f]//g ! p:s/\%#=1[^0-9A-Fa-f]//g ! p:s/\%#=2[^0-9A-Fa-f]//g ! p:s/\%#=0\w//g ! p:s/\%#=1\w//g ! p:s/\%#=2\w//g ! p:s/\%#=0[0-9A-Za-z_]//g ! p:s/\%#=1[0-9A-Za-z_]//g ! p:s/\%#=2[0-9A-Za-z_]//g ! p:s/\%#=0\W//g ! p:s/\%#=1\W//g ! p:s/\%#=2\W//g ! p:s/\%#=0[^0-9A-Za-z_]//g ! p:s/\%#=1[^0-9A-Za-z_]//g ! p:s/\%#=2[^0-9A-Za-z_]//g ! p:s/\%#=0\h//g ! p:s/\%#=1\h//g ! p:s/\%#=2\h//g ! p:s/\%#=0[A-Za-z_]//g ! p:s/\%#=1[A-Za-z_]//g ! p:s/\%#=2[A-Za-z_]//g ! p:s/\%#=0\H//g ! p:s/\%#=1\H//g ! p:s/\%#=2\H//g ! p:s/\%#=0[^A-Za-z_]//g ! p:s/\%#=1[^A-Za-z_]//g ! p:s/\%#=2[^A-Za-z_]//g ! p:s/\%#=0\a//g ! p:s/\%#=1\a//g ! p:s/\%#=2\a//g ! p:s/\%#=0[A-Za-z]//g ! p:s/\%#=1[A-Za-z]//g ! p:s/\%#=2[A-Za-z]//g ! p:s/\%#=0\A//g ! p:s/\%#=1\A//g ! p:s/\%#=2\A//g ! p:s/\%#=0[^A-Za-z]//g ! p:s/\%#=1[^A-Za-z]//g ! p:s/\%#=2[^A-Za-z]//g ! p:s/\%#=0\l//g ! p:s/\%#=1\l//g ! p:s/\%#=2\l//g ! p:s/\%#=0[a-z]//g ! p:s/\%#=1[a-z]//g ! p:s/\%#=2[a-z]//g ! p:s/\%#=0\L//g ! p:s/\%#=1\L//g ! p:s/\%#=2\L//g ! p:s/\%#=0[^a-z]//g ! p:s/\%#=1[^a-z]//g ! p:s/\%#=2[^a-z]//g ! p:s/\%#=0\u//g ! p:s/\%#=1\u//g ! p:s/\%#=2\u//g ! p:s/\%#=0[A-Z]//g ! p:s/\%#=1[A-Z]//g ! p:s/\%#=2[A-Z]//g ! p:s/\%#=0\U//g ! p:s/\%#=1\U//g ! p:s/\%#=2\U//g ! p:s/\%#=0[^A-Z]//g ! p:s/\%#=1[^A-Z]//g ! p:s/\%#=2[^A-Z]//g :/^start-here/+1,$wq! test.out ENDTEST start-here !"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ *** ../vim-7.3.1110/src/testdir/test36.ok 2010-08-15 21:57:29.000000000 +0200 --- src/testdir/test36.ok 2013-06-04 17:38:14.000000000 +0200 *************** *** 1,16 **** --- 1,96 ---- !"#$%&'()#+'-./:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ + !"#$%&'()#+'-./:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ + !"#$%&'()#+'-./:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ + !"#$%&'()#+'-./:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ + !"#$%&'()#+'-./:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ + !"#$%&'()#+'-./:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ + 0123456789 + 0123456789 + 0123456789 + 0123456789 + 0123456789 0123456789 !"#$%&'()#+'-./89:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ + !"#$%&'()#+'-./89:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ + !"#$%&'()#+'-./89:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ + !"#$%&'()#+'-./89:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ + !"#$%&'()#+'-./89:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ + !"#$%&'()#+'-./89:;<=>?@ABCDEFGHIXYZ[\]^_`abcdefghiwxyz{|}~ + 01234567 + 01234567 + 01234567 + 01234567 + 01234567 01234567 !"#$%&'()#+'-./:;<=>?@GHIXYZ[\]^_`ghiwxyz{|}~ + !"#$%&'()#+'-./:;<=>?@GHIXYZ[\]^_`ghiwxyz{|}~ + !"#$%&'()#+'-./:;<=>?@GHIXYZ[\]^_`ghiwxyz{|}~ + !"#$%&'()#+'-./:;<=>?@GHIXYZ[\]^_`ghiwxyz{|}~ + !"#$%&'()#+'-./:;<=>?@GHIXYZ[\]^_`ghiwxyz{|}~ + !"#$%&'()#+'-./:;<=>?@GHIXYZ[\]^_`ghiwxyz{|}~ + 0123456789ABCDEFabcdef + 0123456789ABCDEFabcdef + 0123456789ABCDEFabcdef + 0123456789ABCDEFabcdef + 0123456789ABCDEFabcdef 0123456789ABCDEFabcdef !"#$%&'()#+'-./:;<=>?@[\]^`{|}~ + !"#$%&'()#+'-./:;<=>?@[\]^`{|}~ + !"#$%&'()#+'-./:;<=>?@[\]^`{|}~ + !"#$%&'()#+'-./:;<=>?@[\]^`{|}~ + !"#$%&'()#+'-./:;<=>?@[\]^`{|}~ + !"#$%&'()#+'-./:;<=>?@[\]^`{|}~ + 0123456789ABCDEFGHIXYZ_abcdefghiwxyz + 0123456789ABCDEFGHIXYZ_abcdefghiwxyz + 0123456789ABCDEFGHIXYZ_abcdefghiwxyz + 0123456789ABCDEFGHIXYZ_abcdefghiwxyz + 0123456789ABCDEFGHIXYZ_abcdefghiwxyz 0123456789ABCDEFGHIXYZ_abcdefghiwxyz !"#$%&'()#+'-./0123456789:;<=>?@[\]^`{|}~ + !"#$%&'()#+'-./0123456789:;<=>?@[\]^`{|}~ + !"#$%&'()#+'-./0123456789:;<=>?@[\]^`{|}~ + !"#$%&'()#+'-./0123456789:;<=>?@[\]^`{|}~ + !"#$%&'()#+'-./0123456789:;<=>?@[\]^`{|}~ + !"#$%&'()#+'-./0123456789:;<=>?@[\]^`{|}~ + ABCDEFGHIXYZ_abcdefghiwxyz + ABCDEFGHIXYZ_abcdefghiwxyz + ABCDEFGHIXYZ_abcdefghiwxyz + ABCDEFGHIXYZ_abcdefghiwxyz + ABCDEFGHIXYZ_abcdefghiwxyz ABCDEFGHIXYZ_abcdefghiwxyz !"#$%&'()#+'-./0123456789:;<=>?@[\]^_`{|}~ + !"#$%&'()#+'-./0123456789:;<=>?@[\]^_`{|}~ + !"#$%&'()#+'-./0123456789:;<=>?@[\]^_`{|}~ + !"#$%&'()#+'-./0123456789:;<=>?@[\]^_`{|}~ + !"#$%&'()#+'-./0123456789:;<=>?@[\]^_`{|}~ + !"#$%&'()#+'-./0123456789:;<=>?@[\]^_`{|}~ + ABCDEFGHIXYZabcdefghiwxyz + ABCDEFGHIXYZabcdefghiwxyz + ABCDEFGHIXYZabcdefghiwxyz + ABCDEFGHIXYZabcdefghiwxyz + ABCDEFGHIXYZabcdefghiwxyz ABCDEFGHIXYZabcdefghiwxyz !"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`{|}~ + !"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`{|}~ + !"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`{|}~ + !"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`{|}~ + !"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`{|}~ + !"#$%&'()#+'-./0123456789:;<=>?@ABCDEFGHIXYZ[\]^_`{|}~ + abcdefghiwxyz + abcdefghiwxyz + abcdefghiwxyz + abcdefghiwxyz + abcdefghiwxyz abcdefghiwxyz !"#$%&'()#+'-./0123456789:;<=>?@[\]^_`abcdefghiwxyz{|}~ + !"#$%&'()#+'-./0123456789:;<=>?@[\]^_`abcdefghiwxyz{|}~ + !"#$%&'()#+'-./0123456789:;<=>?@[\]^_`abcdefghiwxyz{|}~ + !"#$%&'()#+'-./0123456789:;<=>?@[\]^_`abcdefghiwxyz{|}~ + !"#$%&'()#+'-./0123456789:;<=>?@[\]^_`abcdefghiwxyz{|}~ + !"#$%&'()#+'-./0123456789:;<=>?@[\]^_`abcdefghiwxyz{|}~ + ABCDEFGHIXYZ + ABCDEFGHIXYZ + ABCDEFGHIXYZ + ABCDEFGHIXYZ + ABCDEFGHIXYZ ABCDEFGHIXYZ *** ../vim-7.3.1110/src/version.c 2013-06-04 14:23:00.000000000 +0200 --- src/version.c 2013-06-04 17:42:06.000000000 +0200 *************** *** 730,731 **** --- 730,733 ---- { /* Add new patch number below this line */ + /**/ + 1111, /**/ -- Ed's Radiator Shop: The Best Place in Town to Take a Leak. /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ an exciting new programming language -- http://www.Zimbu.org /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///