|
Atrinik Server 2.5
|
00001 /************************************************************************ 00002 * Atrinik, a Multiplayer Online Role Playing Game * 00003 * * 00004 * Copyright (C) 2009-2011 Alex Tokar and Atrinik Development Team * 00005 * * 00006 * Fork from Daimonin (Massive Multiplayer Online Role Playing Game) * 00007 * and Crossfire (Multiplayer game for X-windows). * 00008 * * 00009 * This program is free software; you can redistribute it and/or modify * 00010 * it under the terms of the GNU General Public License as published by * 00011 * the Free Software Foundation; either version 2 of the License, or * 00012 * (at your option) any later version. * 00013 * * 00014 * This program is distributed in the hope that it will be useful, * 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 00017 * GNU General Public License for more details. * 00018 * * 00019 * You should have received a copy of the GNU General Public License * 00020 * along with this program; if not, write to the Free Software * 00021 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * 00022 * * 00023 * The author can be reached at admin@atrinik.org * 00024 ************************************************************************/ 00025 00041 #include <stdio.h> 00042 #include <stdlib.h> 00043 #include <memory.h> 00044 #include <limits.h> 00045 #include <re-cmp.h> 00046 #include <ctype.h> 00047 00048 const char *re_cmp(const char *, const char *); 00049 static int re_cmp_step(const char *, const char *, int, int); 00050 static void re_init(void); 00051 static int re_match_token(unsigned char, selection *); 00052 static const char *re_get_token(selection *, const char *); 00053 #ifdef DEBUG2 00054 static void re_dump_sel(selection *); 00055 #endif 00056 00057 static int re_init_done = 0; 00058 static selection *re_token[RE_TOKEN_MAX]; 00059 static const char *re_substr[RE_TOKEN_MAX]; 00060 static unsigned int re_token_depth; 00061 00069 const char *re_cmp(const char *str, const char *regexp) 00070 { 00071 const char *next_regexp; 00072 int once = 0; 00073 int matched = 0; 00074 00075 if (re_init_done == 0) 00076 { 00077 re_init(); 00078 } 00079 00080 #ifdef SAFE_CHECKS 00081 if (regexp == NULL || str == NULL) 00082 { 00083 return NULL; 00084 } 00085 #endif 00086 00087 if (*regexp == '^') 00088 { 00089 once = 1; 00090 ++regexp; 00091 } 00092 00093 if (*regexp == '\0') 00094 { 00095 /* // or /^/ matches any string */ 00096 return str; 00097 } 00098 00099 next_regexp = re_get_token(re_token[0], regexp); 00100 re_token_depth = 0; 00101 re_substr[0] = next_regexp; 00102 00103 while (*str != '\0' && !(matched = re_match_token(*str, re_token[0]))) 00104 { 00105 /* If '^' was present and we didn't get a match above, return 00106 * NULL now so things like '^hi$' won't match on 'thi'. */ 00107 if (once) 00108 { 00109 return NULL; 00110 } 00111 00112 str++; 00113 } 00114 00115 if (matched && *next_regexp == 0) 00116 { 00117 return str; 00118 } 00119 00120 /* Apologies for the nearly duplicated code below, hopefully it 00121 * speeds things up. */ 00122 if (once) 00123 { 00124 switch (re_token[0]->repeat) 00125 { 00126 case rep_once: 00127 if (matched == 0) 00128 { 00129 return NULL; 00130 } 00131 00132 break; 00133 00134 case rep_once_or_more: 00135 if (matched == 0) 00136 { 00137 return NULL; 00138 } 00139 00140 if (re_cmp_step(str + 1, regexp, 0, 1)) 00141 { 00142 return str; 00143 } 00144 00145 break; 00146 00147 case rep_null_or_once: 00148 if (matched == 0) 00149 { 00150 return re_cmp_step(str, next_regexp, 1, 0) ? str : NULL; 00151 } 00152 00153 break; 00154 00155 case rep_null_or_more: 00156 if (matched) 00157 { 00158 if (re_cmp_step(str + 1, regexp, 0, 1)) 00159 { 00160 return str; 00161 } 00162 } 00163 else 00164 { 00165 return re_cmp_step(str, next_regexp, 1, 0) ? str : NULL; 00166 } 00167 00168 break; 00169 } 00170 00171 return re_cmp_step(str + 1, next_regexp, 1, 0) ? str : NULL; 00172 } 00173 00174 if (matched) 00175 { 00176 switch (re_token[0]->repeat) 00177 { 00178 case rep_once: 00179 case rep_null_or_once: 00180 break; 00181 00182 case rep_once_or_more: 00183 case rep_null_or_more: 00184 if (re_cmp_step(str + 1, regexp, 0, 1)) 00185 { 00186 return str; 00187 } 00188 00189 break; 00190 } 00191 00192 /* The logic here is that re_match_token only sees if the one 00193 * letter matches. Thus, if the regex is like '@match dragon', and 00194 * the the user enters anything with 'd', re_match_token returns 00195 * true, but they really need to match the entire regexp, which 00196 * re_cmp_step will do. However, what happens is that there can 00197 * be a case where the string being match is something like 00198 * 'where is dragon'. In this case, the re_match_token matches 00199 * that first 'd', but the re_cmp_step below, fails because the 00200 * next character ('a') doesn't match the 'r'. So we call re_cmp 00201 * with the string after the first 'a', so that it should hopefully 00202 * match up properly. */ 00203 if (re_cmp_step(str + 1, next_regexp, 1, 0)) 00204 { 00205 return str; 00206 } 00207 else if (*(str + 1) != 0) 00208 { 00209 return re_cmp(str + 1, regexp); 00210 } 00211 } 00212 00213 return NULL; 00214 } 00215 00223 static int re_cmp_step(const char *str, const char *regexp, int slot, int matches) 00224 { 00225 const char *next_regexp; 00226 int matched; 00227 00228 if (*regexp == '\0') 00229 { 00230 /* When we reach the end of the regexp, the match is a success */ 00231 return 1; 00232 } 00233 00234 /* This chunk of code makes sure that the regexp-tokenising happens 00235 * only once. We only tokenise as much as we need. */ 00236 if ((unsigned int) slot > re_token_depth) 00237 { 00238 re_token_depth = slot; 00239 00240 if (re_token[slot] == NULL) 00241 { 00242 re_token[slot] = (selection *) malloc(sizeof(selection)); 00243 } 00244 00245 next_regexp = re_get_token(re_token[slot], regexp); 00246 00247 if (next_regexp == NULL) 00248 { 00249 /* Syntax error, what else can we do? */ 00250 return 0; 00251 } 00252 00253 re_substr[slot] = next_regexp; 00254 } 00255 else 00256 { 00257 next_regexp = re_substr[slot]; 00258 } 00259 00260 matched = re_match_token(*str, re_token[slot]); 00261 00262 if (matched) 00263 { 00264 ++matches; 00265 } 00266 00267 if (*str == '\0') 00268 { 00269 return (*next_regexp == '\0' || re_token[slot]->type == sel_end) && matched; 00270 } 00271 00272 switch (re_token[slot]->repeat) 00273 { 00274 case rep_once: 00275 /* (matches == 1) => (matched == 1) */ 00276 if (matches == 1) 00277 { 00278 return re_cmp_step(str + 1, next_regexp, slot + 1, 0); 00279 } 00280 00281 return 0; 00282 00283 case rep_once_or_more: 00284 /* (matched == 1) => (matches >= 1) */ 00285 if (matched) 00286 { 00287 /* First check if the current token repeats more */ 00288 if (re_cmp_step(str + 1, regexp, slot, matches)) 00289 { 00290 return 1; 00291 } 00292 00293 return re_cmp_step(str + 1, next_regexp, slot + 1, 0); 00294 } 00295 00296 return 0; 00297 00298 case rep_null_or_once: 00299 /* We must go on to the next token, but should we advance str? */ 00300 if (matches == 0) 00301 { 00302 return re_cmp_step(str, next_regexp, slot + 1, 0); 00303 } 00304 else if (matches == 1) 00305 { 00306 return re_cmp_step(str + 1, next_regexp, slot + 1, 0); 00307 } 00308 00309 /* Not reached */ 00310 return 0; 00311 00312 case rep_null_or_more: 00313 if (matched) 00314 { 00315 /* Look for further repeats, advance str */ 00316 if (re_cmp_step(str + 1, regexp, slot, matches)) 00317 { 00318 return 1; 00319 } 00320 00321 return re_cmp_step(str, next_regexp, slot + 1, 0); 00322 } 00323 00324 return re_cmp_step(str, next_regexp, slot + 1, 0); 00325 } 00326 00327 return 0; 00328 } 00329 00332 static void re_init() 00333 { 00334 int i; 00335 00336 re_token[0] = (selection *) malloc(sizeof(selection)); 00337 00338 for (i = 1; i < RE_TOKEN_MAX; i++) 00339 { 00340 re_token[i] = NULL; 00341 } 00342 00343 re_init_done = 1; 00344 } 00345 00351 static int re_match_token(unsigned char c, selection *sel) 00352 { 00353 switch (sel->type) 00354 { 00355 case sel_any: 00356 return 1; 00357 00358 case sel_end: 00359 return (c == '\0'); 00360 00361 case sel_single: 00362 return (tolower(c) == tolower(sel->u.single)); 00363 00364 case sel_range: 00365 return (c >= sel->u.range.low && c <= sel->u.range.high); 00366 00367 case sel_array: 00368 return (sel->u.array[c]); 00369 00370 case sel_not_single: 00371 return (tolower(c) != tolower(sel->u.single)); 00372 00373 case sel_not_range: 00374 return (c < sel->u.range.low && c > sel->u.range.high); 00375 } 00376 00377 return 0; 00378 } 00379 00387 static const char *re_get_token(selection *sel, const char *regexp) 00388 { 00389 #ifdef SAFE_CHECKS 00390 # define exit_if_null if (*regexp == '\0') return NULL 00391 #else 00392 # define exit_if_null 00393 #endif 00394 00395 int quoted = 0; 00396 unsigned char looking_at; 00397 00398 #ifdef SAFE_CHECKS 00399 if (sel == NULL || regexp == NULL || *regexp == '\0') 00400 { 00401 return NULL; 00402 } 00403 #endif 00404 00405 do 00406 { 00407 looking_at = *regexp++; 00408 00409 switch (looking_at) 00410 { 00411 case '$': 00412 if (quoted) 00413 { 00414 quoted = 0; 00415 sel->type = sel_single; 00416 sel->u.single = looking_at; 00417 } 00418 else 00419 { 00420 sel->type = sel_end; 00421 } 00422 00423 break; 00424 00425 case '.': 00426 if (quoted) 00427 { 00428 quoted = 0; 00429 sel->type = sel_single; 00430 sel->u.single = looking_at; 00431 } 00432 else 00433 { 00434 sel->type = sel_any; 00435 } 00436 00437 break; 00438 00439 case '[': 00440 /* The fun stuff... perhaps a little obfuscated since I 00441 * don't trust the compiler to analyze liveness. */ 00442 if (quoted) 00443 { 00444 quoted = 0; 00445 sel->type = sel_single; 00446 sel->u.single = looking_at; 00447 } 00448 else 00449 { 00450 int neg = 0; 00451 unsigned char first, last = 0; 00452 00453 exit_if_null; 00454 looking_at = *regexp++; 00455 00456 if (looking_at == '^') 00457 { 00458 neg = 1; 00459 exit_if_null; 00460 looking_at = *regexp++; 00461 } 00462 00463 first = looking_at; 00464 exit_if_null; 00465 00466 looking_at = *regexp++; 00467 00468 if (looking_at == ']') 00469 { 00470 /* On the form [q] or [^q] */ 00471 sel->type = neg ? sel_not_single : sel_single; 00472 sel->u.single = first; 00473 break; 00474 } 00475 else if (looking_at == '-') 00476 { 00477 exit_if_null; 00478 last = *regexp++; 00479 00480 if (last == ']') 00481 { 00482 /* On the form [A-] or [^A-]. Checking for 00483 * [,-] and making it a range is probably not 00484 * worth it :-) */ 00485 sel->type = sel_array; 00486 memset(sel->u.array, neg, sizeof(sel->u.array)); 00487 sel->u.array[first] = sel->u.array['-'] = !neg; 00488 break; 00489 } 00490 else 00491 { 00492 exit_if_null; 00493 looking_at = *regexp++; 00494 00495 if (looking_at == ']') 00496 { 00497 /* On the form [A-G] or [^A-G]. Note that [G-A] 00498 * is a syntax error. Fair enough, I think. */ 00499 #ifdef SAFE_CHECKS 00500 if (first > last) 00501 { 00502 return NULL; 00503 } 00504 #endif 00505 sel->type = neg ? sel_not_range : sel_range; 00506 sel->u.range.low = first; 00507 sel->u.range.high = last; 00508 break; 00509 } 00510 } 00511 } 00512 00513 { 00514 /* The datastructure can only represent a RE this 00515 * complex with an array. */ 00516 int i; 00517 unsigned char previous; 00518 00519 sel->type = sel_array; 00520 memset(sel->u.array, neg, sizeof(sel->u.array)); 00521 00522 if (last) 00523 { 00524 /* It starts with a range */ 00525 #ifdef SAFE_CHECKS 00526 if (first > last) 00527 { 00528 return NULL; 00529 } 00530 #endif 00531 for (i = first; i <= last; i++) 00532 { 00533 sel->u.array[i] = !neg; 00534 } 00535 } 00536 else 00537 { 00538 /* It begins with a "random" character */ 00539 sel->u.array[first] = !neg; 00540 } 00541 00542 sel->u.array[looking_at] = !neg; 00543 00544 exit_if_null; 00545 previous = looking_at; 00546 looking_at = *regexp++; 00547 00548 /* Add more characters to the array until we reach 00549 * ]. Quoting doesn't and shouldn't work in here. 00550 * ("]" should be put first, and "-" last if they 00551 * are needed inside this construct.) 00552 * Look for ranges as we go along. */ 00553 while (looking_at != ']') 00554 { 00555 if (looking_at == '-') 00556 { 00557 exit_if_null; 00558 looking_at = *regexp++; 00559 00560 if (looking_at != ']') 00561 { 00562 #ifdef SAFE_CHECKS 00563 if (previous > looking_at) 00564 { 00565 return NULL; 00566 } 00567 #endif 00568 for (i = previous + 1; i < looking_at; i++) 00569 { 00570 /* previous has already been set and 00571 * looking_at is set below. */ 00572 sel->u.array[i] = !neg; 00573 } 00574 00575 exit_if_null; 00576 } 00577 else 00578 { 00579 sel->u.array['-'] = !neg; 00580 break; 00581 } 00582 } 00583 00584 sel->u.array[looking_at] = !neg; 00585 previous = looking_at; 00586 exit_if_null; 00587 looking_at = *regexp++; 00588 } 00589 } 00590 } 00591 00592 break; 00593 00594 case '\\': 00595 if (quoted) 00596 { 00597 quoted = 0; 00598 sel->type = sel_single; 00599 sel->u.single = looking_at; 00600 } 00601 else 00602 { 00603 quoted = 1; 00604 } 00605 00606 break; 00607 00608 default: 00609 quoted = 0; 00610 sel->type = sel_single; 00611 sel->u.single = looking_at; 00612 break; 00613 } 00614 } 00615 while (quoted); 00616 00617 if (*regexp == '*') 00618 { 00619 sel->repeat = rep_null_or_more; 00620 ++regexp; 00621 } 00622 else if (*regexp == '?') 00623 { 00624 sel->repeat = rep_null_or_once; 00625 ++regexp; 00626 } 00627 else if (*regexp == '+') 00628 { 00629 sel->repeat = rep_once_or_more; 00630 ++regexp; 00631 } 00632 else 00633 { 00634 sel->repeat = rep_once; 00635 } 00636 00637 return regexp; 00638 } 00639 00640 #ifdef DEBUG2 00641 00645 static void re_dump_sel(selection *sel) 00646 { 00647 switch (sel->type) 00648 { 00649 case sel_any: 00650 printf("."); 00651 break; 00652 00653 case sel_end: 00654 printf("$"); 00655 break; 00656 00657 case sel_single: 00658 printf("<%c>", sel->u.single); 00659 break; 00660 00661 case sel_range: 00662 printf("[%c-%c]", sel->u.range.low, sel->u.range.high); 00663 break; 00664 00665 case sel_array: 00666 { 00667 int i; 00668 00669 printf("["); 00670 00671 for (i = 0; i < UCHAR_MAX; i++) 00672 { 00673 if (sel->u.array[i]) 00674 { 00675 printf("%c", i); 00676 } 00677 } 00678 00679 printf("]"); 00680 break; 00681 } 00682 00683 case sel_not_single: 00684 printf("[^%c]", sel->u.single); 00685 break; 00686 00687 case sel_not_range: 00688 printf("[^%c-%c]", sel->u.range.low, sel->u.range.high); 00689 break; 00690 00691 default: 00692 printf("<UNKNOWN TOKEN!>"); 00693 break; 00694 } 00695 00696 switch (sel->repeat) 00697 { 00698 case rep_once: 00699 break; 00700 00701 case rep_null_or_once: 00702 printf("?"); 00703 break; 00704 00705 case rep_null_or_more: 00706 printf("*"); 00707 break; 00708 00709 case rep_once_or_more: 00710 printf("+"); 00711 break; 00712 00713 default: 00714 printf("<UNKNOWN REP-TOKEN!>"); 00715 break; 00716 } 00717 } 00718 00719 int main(int argc, char *argv[]) 00720 { 00721 char *re, *m; 00722 selection sel; 00723 00724 re = re_get_token(&sel, argv[1]); 00725 00726 printf("'%s' -> '%s'\n", argv[1], re); 00727 re_dump_sel(&sel); 00728 printf("\n"); 00729 m = re_cmp(argv[2], argv[1]); 00730 00731 if (m) 00732 { 00733 printf("MATCH! -> '%s'\n", m); 00734 } 00735 00736 return 0; 00737 } 00738 #endif
1.7.4