source: freewrt/tools/paxmirabilis/src/pat_rep.c@ a569125

freewrt_1_0 freewrt_2_0
Last change on this file since a569125 was a569125, checked in by Thorsten Glaser <tg@…>, 14 years ago

even FreeWRT 1.0-stable deserves paxmirabilis-20120216 compiled with LTO ☺

git-svn-id: svn://www.freewrt.org/branches/freewrt_1_0@3981 afb5a338-a214-0410-bd46-81f09a774fd1

  • Property mode set to 100644
File size: 27.1 KB
Line 
1/* $OpenBSD: pat_rep.c,v 1.31 2009/10/27 23:59:22 deraadt Exp $ */
2/* $NetBSD: pat_rep.c,v 1.4 1995/03/21 09:07:33 cgd Exp $ */
3
4/*-
5 * Copyright (c) 1992 Keith Muller.
6 * Copyright (c) 1992, 1993
7 * The Regents of the University of California. All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * Keith Muller of the University of California, San Diego.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#include <sys/param.h>
38#include <sys/time.h>
39#include <sys/stat.h>
40#include <stdio.h>
41#include <string.h>
42#include <unistd.h>
43#include <stdlib.h>
44#include <errno.h>
45#include <regex.h>
46#include "pax.h"
47#include "pat_rep.h"
48#include "extern.h"
49
50__RCSID("$MirOS: src/bin/pax/pat_rep.c,v 1.4 2012/02/12 00:44:57 tg Exp $");
51
52/*
53 * routines to handle pattern matching, name modification (regular expression
54 * substitution and interactive renames), and destination name modification for
55 * copy (-rw). Both file name and link names are adjusted as required in these
56 * routines.
57 */
58
59#define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */
60static PATTERN *pathead = NULL; /* file pattern match list head */
61static PATTERN *pattail = NULL; /* file pattern match list tail */
62static REPLACE *rephead = NULL; /* replacement string list head */
63static REPLACE *reptail = NULL; /* replacement string list tail */
64
65static int rep_name(char *, size_t, int *, int);
66static int tty_rename(ARCHD *);
67static int fix_path(char *, int *, char *, int);
68static int fn_match(char *, char *, char **);
69static char * range_match(char *, int);
70static int resub(regex_t *, regmatch_t *, char *, char *, char *, char *);
71
72/*
73 * rep_add()
74 * parses the -s replacement string; compiles the regular expression
75 * and stores the compiled value and it's replacement string together in
76 * replacement string list. Input to this function is of the form:
77 * /old/new/pg
78 * The first char in the string specifies the delimiter used by this
79 * replacement string. "Old" is a regular expression in "ed" format which
80 * is compiled by regcomp() and is applied to filenames. "new" is the
81 * substitution string; p and g are options flags for printing and global
82 * replacement (over the single filename)
83 * Return:
84 * 0 if a proper replacement string and regular expression was added to
85 * the list of replacement patterns; -1 otherwise.
86 */
87
88int
89rep_add(char *str)
90{
91 char *pt1;
92 char *pt2;
93 REPLACE *rep;
94 int res;
95 char rebuf[BUFSIZ];
96
97 /*
98 * throw out the bad parameters
99 */
100 if ((str == NULL) || (*str == '\0')) {
101 paxwarn(1, "Empty replacement string");
102 return(-1);
103 }
104
105 /*
106 * first character in the string specifies what the delimiter is for
107 * this expression
108 */
109 for (pt1 = str+1; *pt1; pt1++) {
110 if (*pt1 == '\\') {
111 pt1++;
112 continue;
113 }
114 if (*pt1 == *str)
115 break;
116 }
117 if (*pt1 == '\0') {
118 paxwarn(1, "Invalid replacement string %s", str);
119 return(-1);
120 }
121
122 /*
123 * allocate space for the node that handles this replacement pattern
124 * and split out the regular expression and try to compile it
125 */
126 if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) {
127 paxwarn(1, "Unable to allocate memory for replacement string");
128 return(-1);
129 }
130
131 *pt1 = '\0';
132 if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) {
133 regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf));
134 paxwarn(1, "%s while compiling regular expression %s", rebuf, str);
135 (void)free((char *)rep);
136 return(-1);
137 }
138
139 /*
140 * put the delimiter back in case we need an error message and
141 * locate the delimiter at the end of the replacement string
142 * we then point the node at the new substitution string
143 */
144 *pt1++ = *str;
145 for (pt2 = pt1; *pt2; pt2++) {
146 if (*pt2 == '\\') {
147 pt2++;
148 continue;
149 }
150 if (*pt2 == *str)
151 break;
152 }
153 if (*pt2 == '\0') {
154 regfree(&(rep->rcmp));
155 (void)free((char *)rep);
156 paxwarn(1, "Invalid replacement string %s", str);
157 return(-1);
158 }
159
160 *pt2 = '\0';
161 rep->nstr = pt1;
162 pt1 = pt2++;
163 rep->flgs = 0;
164
165 /*
166 * set the options if any
167 */
168 while (*pt2 != '\0') {
169 switch (*pt2) {
170 case 'g':
171 case 'G':
172 rep->flgs |= GLOB;
173 break;
174 case 'p':
175 case 'P':
176 rep->flgs |= PRNT;
177 break;
178 default:
179 regfree(&(rep->rcmp));
180 (void)free((char *)rep);
181 *pt1 = *str;
182 paxwarn(1, "Invalid replacement string option %s", str);
183 return(-1);
184 }
185 ++pt2;
186 }
187
188 /*
189 * all done, link it in at the end
190 */
191 rep->fow = NULL;
192 if (rephead == NULL) {
193 reptail = rephead = rep;
194 return(0);
195 }
196 reptail->fow = rep;
197 reptail = rep;
198 return(0);
199}
200
201/*
202 * pat_add()
203 * add a pattern match to the pattern match list. Pattern matches are used
204 * to select which archive members are extracted. (They appear as
205 * arguments to pax in the list and read modes). If no patterns are
206 * supplied to pax, all members in the archive will be selected (and the
207 * pattern match list is empty).
208 * Return:
209 * 0 if the pattern was added to the list, -1 otherwise
210 */
211
212int
213pat_add(char *str, char *chd_name)
214{
215 PATTERN *pt;
216
217 /*
218 * throw out the junk
219 */
220 if ((str == NULL) || (*str == '\0')) {
221 paxwarn(1, "Empty pattern string");
222 return(-1);
223 }
224
225 /*
226 * allocate space for the pattern and store the pattern. the pattern is
227 * part of argv so do not bother to copy it, just point at it. Add the
228 * node to the end of the pattern list
229 */
230 if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) {
231 paxwarn(1, "Unable to allocate memory for pattern string");
232 return(-1);
233 }
234
235 pt->pstr = str;
236 pt->pend = NULL;
237 pt->plen = strlen(str);
238 pt->fow = NULL;
239 pt->flgs = 0;
240 pt->chdname = chd_name;
241
242 if (pathead == NULL) {
243 pattail = pathead = pt;
244 return(0);
245 }
246 pattail->fow = pt;
247 pattail = pt;
248 return(0);
249}
250
251/*
252 * pat_chk()
253 * complain if any the user supplied pattern did not result in a match to
254 * a selected archive member.
255 */
256
257void
258pat_chk(void)
259{
260 PATTERN *pt;
261 int wban = 0;
262
263 /*
264 * walk down the list checking the flags to make sure MTCH was set,
265 * if not complain
266 */
267 for (pt = pathead; pt != NULL; pt = pt->fow) {
268 if (pt->flgs & MTCH)
269 continue;
270 if (!wban) {
271 paxwarn(1, "WARNING! These patterns were not matched:");
272 ++wban;
273 }
274 (void)fprintf(stderr, "%s\n", pt->pstr);
275 }
276}
277
278/*
279 * pat_sel()
280 * the archive member which matches a pattern was selected. Mark the
281 * pattern as having selected an archive member. arcn->pat points at the
282 * pattern that was matched. arcn->pat is set in pat_match()
283 *
284 * NOTE: When the -c option is used, we are called when there was no match
285 * by pat_match() (that means we did match before the inverted sense of
286 * the logic). Now this seems really strange at first, but with -c we
287 * need to keep track of those patterns that cause an archive member to NOT
288 * be selected (it found an archive member with a specified pattern)
289 * Return:
290 * 0 if the pattern pointed at by arcn->pat was tagged as creating a
291 * match, -1 otherwise.
292 */
293
294int
295pat_sel(ARCHD *arcn)
296{
297 PATTERN *pt;
298 PATTERN **ppt;
299 int len;
300
301 /*
302 * if no patterns just return
303 */
304 if ((pathead == NULL) || ((pt = arcn->pat) == NULL))
305 return(0);
306
307 /*
308 * when we are NOT limited to a single match per pattern mark the
309 * pattern and return
310 */
311 if (!nflag) {
312 pt->flgs |= MTCH;
313 return(0);
314 }
315
316 /*
317 * we reach this point only when we allow a single selected match per
318 * pattern, if the pattern matches a directory and we do not have -d
319 * (dflag) we are done with this pattern. We may also be handed a file
320 * in the subtree of a directory. in that case when we are operating
321 * with -d, this pattern was already selected and we are done
322 */
323 if (pt->flgs & DIR_MTCH)
324 return(0);
325
326 if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) {
327 /*
328 * ok we matched a directory and we are allowing
329 * subtree matches but because of the -n only its children will
330 * match. This is tagged as a DIR_MTCH type.
331 * WATCH IT, the code assumes that pt->pend points
332 * into arcn->name and arcn->name has not been modified.
333 * If not we will have a big mess. Yup this is another kludge
334 */
335
336 /*
337 * if this was a prefix match, remove trailing part of path
338 * so we can copy it. Future matches will be exact prefix match
339 */
340 if (pt->pend != NULL)
341 *pt->pend = '\0';
342
343 if ((pt->pstr = strdup(arcn->name)) == NULL) {
344 paxwarn(1, "Pattern select out of memory");
345 if (pt->pend != NULL)
346 *pt->pend = '/';
347 pt->pend = NULL;
348 return(-1);
349 }
350
351 /*
352 * put the trailing / back in the source string
353 */
354 if (pt->pend != NULL) {
355 *pt->pend = '/';
356 pt->pend = NULL;
357 }
358 pt->plen = strlen(pt->pstr);
359
360 /*
361 * strip off any trailing /, this should really never happen
362 */
363 len = pt->plen - 1;
364 if (*(pt->pstr + len) == '/') {
365 *(pt->pstr + len) = '\0';
366 pt->plen = len;
367 }
368 pt->flgs = DIR_MTCH | MTCH;
369 arcn->pat = pt;
370 return(0);
371 }
372
373 /*
374 * we are then done with this pattern, so we delete it from the list
375 * because it can never be used for another match.
376 * Seems kind of strange to do for a -c, but the pax spec is really
377 * vague on the interaction of -c, -n and -d. We assume that when -c
378 * and the pattern rejects a member (i.e. it matched it) it is done.
379 * In effect we place the order of the flags as having -c last.
380 */
381 pt = pathead;
382 ppt = &pathead;
383 while ((pt != NULL) && (pt != arcn->pat)) {
384 ppt = &(pt->fow);
385 pt = pt->fow;
386 }
387
388 if (pt == NULL) {
389 /*
390 * should never happen....
391 */
392 paxwarn(1, "Pattern list inconsistent");
393 return(-1);
394 }
395 *ppt = pt->fow;
396 (void)free((char *)pt);
397 arcn->pat = NULL;
398 return(0);
399}
400
401/*
402 * pat_match()
403 * see if this archive member matches any supplied pattern, if a match
404 * is found, arcn->pat is set to point at the potential pattern. Later if
405 * this archive member is "selected" we process and mark the pattern as
406 * one which matched a selected archive member (see pat_sel())
407 * Return:
408 * 0 if this archive member should be processed, 1 if it should be
409 * skipped and -1 if we are done with all patterns (and pax should quit
410 * looking for more members)
411 */
412
413int
414pat_match(ARCHD *arcn)
415{
416 PATTERN *pt;
417
418 arcn->pat = NULL;
419
420 /*
421 * if there are no more patterns and we have -n (and not -c) we are
422 * done. otherwise with no patterns to match, matches all
423 */
424 if (pathead == NULL) {
425 if (nflag && !cflag)
426 return(-1);
427 return(0);
428 }
429
430 /*
431 * have to search down the list one at a time looking for a match.
432 */
433 pt = pathead;
434 while (pt != NULL) {
435 /*
436 * check for a file name match unless we have DIR_MTCH set in
437 * this pattern then we want a prefix match
438 */
439 if (pt->flgs & DIR_MTCH) {
440 /*
441 * this pattern was matched before to a directory
442 * as we must have -n set for this (but not -d). We can
443 * only match CHILDREN of that directory so we must use
444 * an exact prefix match (no wildcards).
445 */
446 if ((arcn->name[pt->plen] == '/') &&
447 (strncmp(pt->pstr, arcn->name, pt->plen) == 0))
448 break;
449 } else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0)
450 break;
451 pt = pt->fow;
452 }
453
454 /*
455 * return the result, remember that cflag (-c) inverts the sense of a
456 * match
457 */
458 if (pt == NULL)
459 return(cflag ? 0 : 1);
460
461 /*
462 * we had a match, now when we invert the sense (-c) we reject this
463 * member. However we have to tag the pattern a being successful, (in a
464 * match, not in selecting a archive member) so we call pat_sel() here.
465 */
466 arcn->pat = pt;
467 if (!cflag)
468 return(0);
469
470 if (pat_sel(arcn) < 0)
471 return(-1);
472 arcn->pat = NULL;
473 return(1);
474}
475
476/*
477 * fn_match()
478 * Return:
479 * 0 if this archive member should be processed, 1 if it should be
480 * skipped and -1 if we are done with all patterns (and pax should quit
481 * looking for more members)
482 * Note: *pend may be changed to show where the prefix ends.
483 */
484
485static int
486fn_match(char *pattern, char *string, char **pend)
487{
488 char c;
489 char test;
490
491 *pend = NULL;
492 for (;;) {
493 switch (c = *pattern++) {
494 case '\0':
495 /*
496 * Ok we found an exact match
497 */
498 if (*string == '\0')
499 return(0);
500
501 /*
502 * Check if it is a prefix match
503 */
504 if ((dflag == 1) || (*string != '/'))
505 return(-1);
506
507 /*
508 * It is a prefix match, remember where the trailing
509 * / is located
510 */
511 *pend = string;
512 return(0);
513 case '?':
514 if ((test = *string++) == '\0')
515 return (-1);
516 break;
517 case '*':
518 c = *pattern;
519 /*
520 * Collapse multiple *'s.
521 */
522 while (c == '*')
523 c = *++pattern;
524
525 /*
526 * optimised hack for pattern with a * at the end
527 */
528 if (c == '\0')
529 return (0);
530
531 /*
532 * General case, use recursion.
533 */
534 while ((test = *string) != '\0') {
535 if (!fn_match(pattern, string, pend))
536 return (0);
537 ++string;
538 }
539 return (-1);
540 case '[':
541 /*
542 * range match
543 */
544 if (((test = *string++) == '\0') ||
545 ((pattern = range_match(pattern, test)) == NULL))
546 return (-1);
547 break;
548 case '\\':
549 default:
550 if (c != *string++)
551 return (-1);
552 break;
553 }
554 }
555 /* NOTREACHED */
556}
557
558static char *
559range_match(char *pattern, int test)
560{
561 char c;
562 char c2;
563 int negate;
564 int ok = 0;
565
566 if ((negate = (*pattern == '!')) != 0)
567 ++pattern;
568
569 while ((c = *pattern++) != ']') {
570 /*
571 * Illegal pattern
572 */
573 if (c == '\0')
574 return (NULL);
575
576 if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') &&
577 (c2 != ']')) {
578 if ((c <= test) && (test <= c2))
579 ok = 1;
580 pattern += 2;
581 } else if (c == test)
582 ok = 1;
583 }
584 return (ok == negate ? NULL : pattern);
585}
586
587/*
588 * mod_name()
589 * modify a selected file name. first attempt to apply replacement string
590 * expressions, then apply interactive file rename. We apply replacement
591 * string expressions to both filenames and file links (if we didn't the
592 * links would point to the wrong place, and we could never be able to
593 * move an archive that has a file link in it). When we rename files
594 * interactively, we store that mapping (old name to user input name) so
595 * if we spot any file links to the old file name in the future, we will
596 * know exactly how to fix the file link.
597 * Return:
598 * 0 continue to process file, 1 skip this file, -1 pax is finished
599 */
600
601int
602mod_name(ARCHD *arcn)
603{
604 int res = 0;
605
606 /*
607 * Strip off leading '/' if appropriate.
608 * Currently, this option is only set for the tar format.
609 */
610 while (rmleadslash && arcn->name[0] == '/') {
611 if (arcn->name[1] == '\0') {
612 arcn->name[0] = '.';
613 } else {
614 (void)memmove(arcn->name, &arcn->name[1],
615 strlen(arcn->name));
616 arcn->nlen--;
617 }
618 if (rmleadslash < 2) {
619 rmleadslash = 2;
620 paxwarn(0, "Removing leading / from absolute path names in the archive");
621 }
622 }
623 while (rmleadslash && arcn->ln_name[0] == '/' &&
624 (arcn->type == PAX_HLK || arcn->type == PAX_HRG)) {
625 if (arcn->ln_name[1] == '\0') {
626 arcn->ln_name[0] = '.';
627 } else {
628 (void)memmove(arcn->ln_name, &arcn->ln_name[1],
629 strlen(arcn->ln_name));
630 arcn->ln_nlen--;
631 }
632 if (rmleadslash < 2) {
633 rmleadslash = 2;
634 paxwarn(0, "Removing leading / from absolute path names in the archive");
635 }
636 }
637
638 /*
639 * IMPORTANT: We have a problem. what do we do with symlinks?
640 * Modifying a hard link name makes sense, as we know the file it
641 * points at should have been seen already in the archive (and if it
642 * wasn't seen because of a read error or a bad archive, we lose
643 * anyway). But there are no such requirements for symlinks. On one
644 * hand the symlink that refers to a file in the archive will have to
645 * be modified to so it will still work at its new location in the
646 * file system. On the other hand a symlink that points elsewhere (and
647 * should continue to do so) should not be modified. There is clearly
648 * no perfect solution here. So we handle them like hardlinks. Clearly
649 * a replacement made by the interactive rename mapping is very likely
650 * to be correct since it applies to a single file and is an exact
651 * match. The regular expression replacements are a little harder to
652 * justify though. We claim that the symlink name is only likely
653 * to be replaced when it points within the file tree being moved and
654 * in that case it should be modified. what we really need to do is to
655 * call an oracle here. :)
656 */
657 if (rephead != NULL) {
658 /*
659 * we have replacement strings, modify the name and the link
660 * name if any.
661 */
662 if ((res = rep_name(arcn->name, sizeof(arcn->name), &(arcn->nlen), 1)) != 0)
663 return(res);
664
665 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
666 (arcn->type == PAX_HRG)) &&
667 ((res = rep_name(arcn->ln_name, sizeof(arcn->ln_name), &(arcn->ln_nlen), 0)) != 0))
668 return(res);
669 }
670
671 if (iflag) {
672 /*
673 * perform interactive file rename, then map the link if any
674 */
675 if ((res = tty_rename(arcn)) != 0)
676 return(res);
677 if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
678 (arcn->type == PAX_HRG))
679 sub_name(arcn->ln_name, &(arcn->ln_nlen), sizeof(arcn->ln_name));
680 }
681 return(res);
682}
683
684/*
685 * tty_rename()
686 * Prompt the user for a replacement file name. A "." keeps the old name,
687 * a empty line skips the file, and an EOF on reading the tty, will cause
688 * pax to stop processing and exit. Otherwise the file name input, replaces
689 * the old one.
690 * Return:
691 * 0 process this file, 1 skip this file, -1 we need to exit pax
692 */
693
694static int
695tty_rename(ARCHD *arcn)
696{
697 char tmpname[PAXPATHLEN+2];
698 int res;
699
700 /*
701 * prompt user for the replacement name for a file, keep trying until
702 * we get some reasonable input. Archives may have more than one file
703 * on them with the same name (from updates etc). We print verbose info
704 * on the file so the user knows what is up.
705 */
706 tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0);
707
708 for (;;) {
709 ls_tty(arcn);
710 tty_prnt("Input new name, or a \".\" to keep the old name, ");
711 tty_prnt("or a \"return\" to skip this file.\n");
712 tty_prnt("Input > ");
713 if (tty_read(tmpname, sizeof(tmpname)) < 0)
714 return(-1);
715 if (strcmp(tmpname, "..") == 0) {
716 tty_prnt("Try again, illegal file name: ..\n");
717 continue;
718 }
719 if (strlen(tmpname) > PAXPATHLEN) {
720 tty_prnt("Try again, file name too long\n");
721 continue;
722 }
723 break;
724 }
725
726 /*
727 * empty file name, skips this file. a "." leaves it alone
728 */
729 if (tmpname[0] == '\0') {
730 tty_prnt("Skipping file.\n");
731 return(1);
732 }
733 if ((tmpname[0] == '.') && (tmpname[1] == '\0')) {
734 tty_prnt("Processing continues, name unchanged.\n");
735 return(0);
736 }
737
738 /*
739 * ok the name changed. We may run into links that point at this
740 * file later. we have to remember where the user sent the file
741 * in order to repair any links.
742 */
743 tty_prnt("Processing continues, name changed to: %s\n", tmpname);
744 res = add_name(arcn->name, arcn->nlen, tmpname);
745 arcn->nlen = strlcpy(arcn->name, tmpname, sizeof(arcn->name));
746 if ((size_t)arcn->nlen >= sizeof(arcn->name))
747 arcn->nlen = sizeof(arcn->name) - 1; /* XXX truncate? */
748 if (res < 0)
749 return(-1);
750 return(0);
751}
752
753/*
754 * set_dest()
755 * fix up the file name and the link name (if any) so this file will land
756 * in the destination directory (used during copy() -rw).
757 * Return:
758 * 0 if ok, -1 if failure (name too long)
759 */
760
761int
762set_dest(ARCHD *arcn, char *dest_dir, int dir_len)
763{
764 if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0)
765 return(-1);
766
767 /*
768 * It is really hard to deal with symlinks here, we cannot be sure
769 * if the name they point was moved (or will be moved). It is best to
770 * leave them alone.
771 */
772 if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG))
773 return(0);
774
775 if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0)
776 return(-1);
777 return(0);
778}
779
780/*
781 * fix_path
782 * concatenate dir_name and or_name and store the result in or_name (if
783 * it fits). This is one ugly function.
784 * Return:
785 * 0 if ok, -1 if the final name is too long
786 */
787
788static int
789fix_path(char *or_name, int *or_len, char *dir_name, int dir_len)
790{
791 char *src;
792 char *dest;
793 char *start;
794 int len;
795
796 /*
797 * we shift the or_name to the right enough to tack in the dir_name
798 * at the front. We make sure we have enough space for it all before
799 * we start. since dest always ends in a slash, we skip of or_name
800 * if it also starts with one.
801 */
802 start = or_name;
803 src = start + *or_len;
804 dest = src + dir_len;
805 if (*start == '/') {
806 ++start;
807 --dest;
808 }
809 if ((len = dest - or_name) > PAXPATHLEN) {
810 paxwarn(1, "File name %s/%s, too long", dir_name, start);
811 return(-1);
812 }
813 *or_len = len;
814
815 /*
816 * enough space, shift
817 */
818 while (src >= start)
819 *dest-- = *src--;
820 src = dir_name + dir_len - 1;
821
822 /*
823 * splice in the destination directory name
824 */
825 while (src >= dir_name)
826 *dest-- = *src--;
827
828 *(or_name + len) = '\0';
829 return(0);
830}
831
832/*
833 * rep_name()
834 * walk down the list of replacement strings applying each one in order.
835 * when we find one with a successful substitution, we modify the name
836 * as specified. if required, we print the results. if the resulting name
837 * is empty, we will skip this archive member. We use the regexp(3)
838 * routines (regexp() ought to win a price as having the most cryptic
839 * library function manual page).
840 * --Parameters--
841 * name is the file name we are going to apply the regular expressions to
842 * (and may be modified)
843 * nsize is the size of the name buffer.
844 * nlen is the length of this name (and is modified to hold the length of
845 * the final string).
846 * prnt is a flag that says whether to print the final result.
847 * Return:
848 * 0 if substitution was successful, 1 if we are to skip the file (the name
849 * ended up empty)
850 */
851
852static int
853rep_name(char *name, size_t nsize, int *nlen, int prnt)
854{
855 REPLACE *pt;
856 char *inpt;
857 char *outpt;
858 char *endpt;
859 char *rpt;
860 int found = 0;
861 int res;
862 regmatch_t pm[MAXSUBEXP];
863 char nname[PAXPATHLEN+1]; /* final result of all replacements */
864 char buf1[PAXPATHLEN+1]; /* where we work on the name */
865
866 /*
867 * copy the name into buf1, where we will work on it. We need to keep
868 * the orig string around so we can print out the result of the final
869 * replacement. We build up the final result in nname. inpt points at
870 * the string we apply the regular expression to. prnt is used to
871 * suppress printing when we handle replacements on the link field
872 * (the user already saw that substitution go by)
873 */
874 pt = rephead;
875 (void)strlcpy(buf1, name, sizeof(buf1));
876 inpt = buf1;
877 outpt = nname;
878 endpt = outpt + PAXPATHLEN;
879
880 /*
881 * try each replacement string in order
882 */
883 while (pt != NULL) {
884 do {
885 char *oinpt = inpt;
886 /*
887 * check for a successful substitution, if not go to
888 * the next pattern, or cleanup if we were global
889 */
890 if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0)
891 break;
892
893 /*
894 * ok we found one. We have three parts, the prefix
895 * which did not match, the section that did and the
896 * tail (that also did not match). Copy the prefix to
897 * the final output buffer (watching to make sure we
898 * do not create a string too long).
899 */
900 found = 1;
901 rpt = inpt + pm[0].rm_so;
902
903 while ((inpt < rpt) && (outpt < endpt))
904 *outpt++ = *inpt++;
905 if (outpt == endpt)
906 break;
907
908 /*
909 * for the second part (which matched the regular
910 * expression) apply the substitution using the
911 * replacement string and place it the prefix in the
912 * final output. If we have problems, skip it.
913 */
914 if ((res = resub(&(pt->rcmp),pm,pt->nstr,oinpt,outpt,endpt))
915 < 0) {
916 if (prnt)
917 paxwarn(1, "Replacement name error %s",
918 name);
919 return(1);
920 }
921 outpt += res;
922
923 /*
924 * we set up to look again starting at the first
925 * character in the tail (of the input string right
926 * after the last character matched by the regular
927 * expression (inpt always points at the first char in
928 * the string to process). If we are not doing a global
929 * substitution, we will use inpt to copy the tail to
930 * the final result. Make sure we do not overrun the
931 * output buffer
932 */
933 inpt += pm[0].rm_eo - pm[0].rm_so;
934
935 if ((outpt == endpt) || (*inpt == '\0'))
936 break;
937
938 /*
939 * if the user wants global we keep trying to
940 * substitute until it fails, then we are done.
941 */
942 } while (pt->flgs & GLOB);
943
944 if (found)
945 break;
946
947 /*
948 * a successful substitution did NOT occur, try the next one
949 */
950 pt = pt->fow;
951 }
952
953 if (found) {
954 /*
955 * we had a substitution, copy the last tail piece (if there is
956 * room) to the final result
957 */
958 while ((outpt < endpt) && (*inpt != '\0'))
959 *outpt++ = *inpt++;
960
961 *outpt = '\0';
962 if ((outpt == endpt) && (*inpt != '\0')) {
963 if (prnt)
964 paxwarn(1,"Replacement name too long %s >> %s",
965 name, nname);
966 return(1);
967 }
968
969 /*
970 * inform the user of the result if wanted
971 */
972 if (prnt && (pt->flgs & PRNT)) {
973 if (*nname == '\0')
974 (void)fprintf(stderr,"%s >> <empty string>\n",
975 name);
976 else
977 (void)fprintf(stderr,"%s >> %s\n", name, nname);
978 }
979
980 /*
981 * if empty inform the caller this file is to be skipped
982 * otherwise copy the new name over the orig name and return
983 */
984 if (*nname == '\0')
985 return(1);
986 *nlen = strlcpy(name, nname, nsize);
987 }
988 return(0);
989}
990
991/*
992 * resub()
993 * apply the replacement to the matched expression. expand out the old
994 * style ed(1) subexpression expansion.
995 * Return:
996 * -1 if error, or the number of characters added to the destination.
997 */
998
999static int
1000resub(regex_t *rp, regmatch_t *pm, char *src, char *inpt, char *dest,
1001 char *destend)
1002{
1003 char *spt;
1004 char *dpt;
1005 char c;
1006 regmatch_t *pmpt;
1007 int len;
1008 int subexcnt;
1009
1010 spt = src;
1011 dpt = dest;
1012 subexcnt = rp->re_nsub;
1013 while ((dpt < destend) && ((c = *spt++) != '\0')) {
1014 /*
1015 * see if we just have an ordinary replacement character
1016 * or we refer to a subexpression.
1017 */
1018 if (c == '&') {
1019 pmpt = pm;
1020 } else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) {
1021 /*
1022 * make sure there is a subexpression as specified
1023 */
1024 if ((len = *spt++ - '0') > subexcnt)
1025 return(-1);
1026 pmpt = pm + len;
1027 } else {
1028 /*
1029 * Ordinary character, just copy it
1030 */
1031 if ((c == '\\') && (*spt != '\0'))
1032 c = *spt++;
1033 *dpt++ = c;
1034 continue;
1035 }
1036
1037 /*
1038 * continue if the subexpression is bogus
1039 */
1040 if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) ||
1041 ((len = pmpt->rm_eo - pmpt->rm_so) <= 0))
1042 continue;
1043
1044 /*
1045 * copy the subexpression to the destination.
1046 * fail if we run out of space or the match string is damaged
1047 */
1048 if (len > (destend - dpt))
1049 return (-1);
1050 strncpy(dpt, inpt + pmpt->rm_so, len);
1051 dpt += len;
1052 }
1053 return(dpt - dest);
1054}
Note: See TracBrowser for help on using the repository browser.