| 1 |
/* $OpenBSD: diffreg.c,v 1.93 2019/06/28 13:35:00 deraadt Exp $ */ |
| 2 |
|
| 3 |
/* |
| 4 |
* Copyright (C) Caldera International Inc. 2001-2002. |
| 5 |
* All rights reserved. |
| 6 |
* |
| 7 |
* Redistribution and use in source and binary forms, with or without |
| 8 |
* modification, are permitted provided that the following conditions |
| 9 |
* are met: |
| 10 |
* 1. Redistributions of source code and documentation must retain the above |
| 11 |
* copyright notice, this list of conditions and the following disclaimer. |
| 12 |
* 2. Redistributions in binary form must reproduce the above copyright |
| 13 |
* notice, this list of conditions and the following disclaimer in the |
| 14 |
* documentation and/or other materials provided with the distribution. |
| 15 |
* 3. All advertising materials mentioning features or use of this software |
| 16 |
* must display the following acknowledgement: |
| 17 |
* This product includes software developed or owned by Caldera |
| 18 |
* International, Inc. |
| 19 |
* 4. Neither the name of Caldera International, Inc. nor the names of other |
| 20 |
* contributors may be used to endorse or promote products derived from |
| 21 |
* this software without specific prior written permission. |
| 22 |
* |
| 23 |
* USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA |
| 24 |
* INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR |
| 25 |
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| 26 |
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| 27 |
* IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, |
| 28 |
* INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| 29 |
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| 30 |
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 31 |
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 32 |
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING |
| 33 |
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 34 |
* POSSIBILITY OF SUCH DAMAGE. |
| 35 |
*/ |
| 36 |
/*- |
| 37 |
* Copyright (c) 1991, 1993 |
| 38 |
* The Regents of the University of California. All rights reserved. |
| 39 |
* |
| 40 |
* Redistribution and use in source and binary forms, with or without |
| 41 |
* modification, are permitted provided that the following conditions |
| 42 |
* are met: |
| 43 |
* 1. Redistributions of source code must retain the above copyright |
| 44 |
* notice, this list of conditions and the following disclaimer. |
| 45 |
* 2. Redistributions in binary form must reproduce the above copyright |
| 46 |
* notice, this list of conditions and the following disclaimer in the |
| 47 |
* documentation and/or other materials provided with the distribution. |
| 48 |
* 3. Neither the name of the University nor the names of its contributors |
| 49 |
* may be used to endorse or promote products derived from this software |
| 50 |
* without specific prior written permission. |
| 51 |
* |
| 52 |
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 53 |
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 54 |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 55 |
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 56 |
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 57 |
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 58 |
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 59 |
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 60 |
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 61 |
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 62 |
* SUCH DAMAGE. |
| 63 |
* |
| 64 |
* @(#)diffreg.c 8.1 (Berkeley) 6/6/93 |
| 65 |
*/ |
| 66 |
|
| 67 |
#include <ctype.h> |
| 68 |
#include <errno.h> |
| 69 |
#include <fcntl.h> |
| 70 |
#include <stddef.h> |
| 71 |
#include <stdio.h> |
| 72 |
#include <stdlib.h> |
| 73 |
#include <string.h> |
| 74 |
|
| 75 |
#include <unix.h> |
| 76 |
|
| 77 |
#include "diff.h" |
| 78 |
#include "util.h" |
| 79 |
|
| 80 |
#define MINIMUM(a, b) (((a) < (b)) ? (a) : (b)) |
| 81 |
#define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b)) |
| 82 |
|
| 83 |
/* |
| 84 |
* diff - compare two files. |
| 85 |
*/ |
| 86 |
|
| 87 |
/* |
| 88 |
* Uses an algorithm due to Harold Stone, which finds |
| 89 |
* a pair of longest identical subsequences in the two |
| 90 |
* files. |
| 91 |
* |
| 92 |
* The major goal is to generate the match vector J. |
| 93 |
* J[i] is the index of the line in file1 corresponding |
| 94 |
* to line i file0. J[i] = 0 if there is no |
| 95 |
* such line in file1. |
| 96 |
* |
| 97 |
* Lines are hashed so as to work in core. All potential |
| 98 |
* matches are located by sorting the lines of each file |
| 99 |
* on the hash (called ``value''). In particular, this |
| 100 |
* collects the equivalence classes in file1 together. |
| 101 |
* Subroutine equiv replaces the value of each line in |
| 102 |
* file0 by the index of the first element of its |
| 103 |
* matching equivalence in (the reordered) file1. |
| 104 |
* To save space equiv squeezes file1 into a single |
| 105 |
* array member in which the equivalence classes |
| 106 |
* are simply concatenated, except that their first |
| 107 |
* members are flagged by changing sign. |
| 108 |
* |
| 109 |
* Next the indices that point into member are unsorted into |
| 110 |
* array class according to the original order of file0. |
| 111 |
* |
| 112 |
* The cleverness lies in routine stone. This marches |
| 113 |
* through the lines of file0, developing a vector klist |
| 114 |
* of "k-candidates". At step i a k-candidate is a matched |
| 115 |
* pair of lines x,y (x in file0 y in file1) such that |
| 116 |
* there is a common subsequence of length k |
| 117 |
* between the first i lines of file0 and the first y |
| 118 |
* lines of file1, but there is no such subsequence for |
| 119 |
* any smaller y. x is the earliest possible mate to y |
| 120 |
* that occurs in such a subsequence. |
| 121 |
* |
| 122 |
* Whenever any of the members of the equivalence class of |
| 123 |
* lines in file1 matable to a line in file0 has serial number |
| 124 |
* less than the y of some k-candidate, that k-candidate |
| 125 |
* with the smallest such y is replaced. The new |
| 126 |
* k-candidate is chained (via pred) to the current |
| 127 |
* k-1 candidate so that the actual subsequence can |
| 128 |
* be recovered. When a member has serial number greater |
| 129 |
* that the y of all k-candidates, the klist is extended. |
| 130 |
* At the end, the longest subsequence is pulled out |
| 131 |
* and placed in the array J by unravel |
| 132 |
* |
| 133 |
* With J in hand, the matches there recorded are |
| 134 |
* check'ed against reality to assure that no spurious |
| 135 |
* matches have crept in due to hashing. If they have, |
| 136 |
* they are broken, and "jackpot" is recorded--a harmless |
| 137 |
* matter except that a true match for a spuriously |
| 138 |
* mated line may now be unnecessarily reported as a change. |
| 139 |
* |
| 140 |
* Much of the complexity of the program comes simply |
| 141 |
* from trying to minimize core utilization and |
| 142 |
* maximize the range of doable problems by dynamically |
| 143 |
* allocating what is needed and reusing what is not. |
| 144 |
* The core requirements for problems larger than somewhat |
| 145 |
* are (in words) 2*length(file0) + length(file1) + |
| 146 |
* 3*(number of k-candidates installed), typically about |
| 147 |
* 6n words for files of length n. |
| 148 |
*/ |
| 149 |
|
| 150 |
struct cand { |
| 151 |
int x; |
| 152 |
int y; |
| 153 |
int pred; |
| 154 |
}; |
| 155 |
|
| 156 |
struct line { |
| 157 |
int serial; |
| 158 |
int value; |
| 159 |
} *file[2]; |
| 160 |
|
| 161 |
/* |
| 162 |
* The following struct is used to record change information when |
| 163 |
* doing a "context" or "unified" diff. (see routine "change" to |
| 164 |
* understand the highly mnemonic field names) |
| 165 |
*/ |
| 166 |
struct context_vec { |
| 167 |
int a; /* start line in old file */ |
| 168 |
int b; /* end line in old file */ |
| 169 |
int c; /* start line in new file */ |
| 170 |
int d; /* end line in new file */ |
| 171 |
}; |
| 172 |
|
| 173 |
static void output(char *, FILE *, char *, FILE *, int); |
| 174 |
static void check(FILE *, FILE *, int); |
| 175 |
static void range(int, int, char *); |
| 176 |
static void uni_range(int, int); |
| 177 |
static void dump_context_vec(FILE *, FILE *, int); |
| 178 |
static void dump_unified_vec(FILE *, FILE *, int); |
| 179 |
static void prepare(int, FILE *, off_t, int); |
| 180 |
static void prune(void); |
| 181 |
static void equiv(struct line *, int, struct line *, int, int *); |
| 182 |
static void unravel(int); |
| 183 |
static void unsort(struct line *, int, int *); |
| 184 |
static void change(char *, FILE *, char *, FILE *, int, int, int, int, int *); |
| 185 |
static void sort(struct line *, int); |
| 186 |
static void print_header(const char *, const char *); |
| 187 |
static int ignoreline(char *); |
| 188 |
static int asciifile(FILE *); |
| 189 |
static int fetch(long *, int, int, FILE *, int, int, int); |
| 190 |
static int newcand(int, int, int); |
| 191 |
static int search(int *, int, int); |
| 192 |
static int skipline(FILE *); |
| 193 |
static int isqrt(int); |
| 194 |
static int stone(int *, int, int *, int *, int); |
| 195 |
static int readhash(FILE *, int); |
| 196 |
static int files_differ(FILE *, FILE *, int); |
| 197 |
static char *match_function(const long *, int, FILE *); |
| 198 |
static char *preadline(int, size_t, off_t); |
| 199 |
|
| 200 |
static int *J; /* will be overlaid on class */ |
| 201 |
static int *class; /* will be overlaid on file[0] */ |
| 202 |
static int *klist; /* will be overlaid on file[0] after class */ |
| 203 |
static int *member; /* will be overlaid on file[1] */ |
| 204 |
static int clen; |
| 205 |
static int inifdef; /* whether or not we are in a #ifdef block */ |
| 206 |
static int len[2]; |
| 207 |
static int pref, suff; /* length of prefix and suffix */ |
| 208 |
static int slen[2]; |
| 209 |
static int anychange; |
| 210 |
static long *ixnew; /* will be overlaid on file[1] */ |
| 211 |
static long *ixold; /* will be overlaid on klist */ |
| 212 |
static struct cand *clist; /* merely a free storage pot for candidates */ |
| 213 |
static int clistlen; /* the length of clist */ |
| 214 |
static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ |
| 215 |
static u_char *chrtran; /* translation table for case-folding */ |
| 216 |
static struct context_vec *context_vec_start; |
| 217 |
static struct context_vec *context_vec_end; |
| 218 |
static struct context_vec *context_vec_ptr; |
| 219 |
|
| 220 |
#define FUNCTION_CONTEXT_SIZE 55 |
| 221 |
static char lastbuf[FUNCTION_CONTEXT_SIZE]; |
| 222 |
static int lastline; |
| 223 |
static int lastmatchline; |
| 224 |
|
| 225 |
|
| 226 |
/* |
| 227 |
* chrtran points to one of 2 translation tables: cup2low if folding upper to |
| 228 |
* lower case clow2low if not folding case |
| 229 |
*/ |
| 230 |
u_char clow2low[256] = { |
| 231 |
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, |
| 232 |
0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, |
| 233 |
0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, |
| 234 |
0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, |
| 235 |
0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, |
| 236 |
0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, |
| 237 |
0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, |
| 238 |
0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, |
| 239 |
0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, |
| 240 |
0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, |
| 241 |
0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, |
| 242 |
0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, |
| 243 |
0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, |
| 244 |
0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, |
| 245 |
0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, |
| 246 |
0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, |
| 247 |
0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, |
| 248 |
0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, |
| 249 |
0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, |
| 250 |
0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, |
| 251 |
0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, |
| 252 |
0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, |
| 253 |
0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, |
| 254 |
0xfd, 0xfe, 0xff |
| 255 |
}; |
| 256 |
|
| 257 |
u_char cup2low[256] = { |
| 258 |
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, |
| 259 |
0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, |
| 260 |
0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, |
| 261 |
0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, |
| 262 |
0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, |
| 263 |
0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, |
| 264 |
0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, |
| 265 |
0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, |
| 266 |
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, |
| 267 |
0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, |
| 268 |
0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, |
| 269 |
0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, |
| 270 |
0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, |
| 271 |
0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, |
| 272 |
0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, |
| 273 |
0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, |
| 274 |
0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, |
| 275 |
0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, |
| 276 |
0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, |
| 277 |
0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, |
| 278 |
0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, |
| 279 |
0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, |
| 280 |
0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, |
| 281 |
0xfd, 0xfe, 0xff |
| 282 |
}; |
| 283 |
|
| 284 |
int |
| 285 |
diffreg(char *file1, char *file2, int flags) |
| 286 |
{ |
| 287 |
FILE *f1, *f2; |
| 288 |
int i, rval; |
| 289 |
|
| 290 |
f1 = f2 = NULL; |
| 291 |
rval = D_SAME; |
| 292 |
anychange = 0; |
| 293 |
lastline = 0; |
| 294 |
lastmatchline = 0; |
| 295 |
context_vec_ptr = context_vec_start - 1; |
| 296 |
if (flags & D_IGNORECASE) |
| 297 |
chrtran = cup2low; |
| 298 |
else |
| 299 |
chrtran = clow2low; |
| 300 |
if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) |
| 301 |
goto closem; |
| 302 |
|
| 303 |
f1 = fopen(file1, "rb"); |
| 304 |
if (f1 == NULL) { |
| 305 |
warn("failed to fopen %s", file1); |
| 306 |
status |= 2; |
| 307 |
goto closem; |
| 308 |
} |
| 309 |
|
| 310 |
f2 = fopen(file2, "rb"); |
| 311 |
if (f2 == NULL) { |
| 312 |
warn("failed to fopen %s", file2); |
| 313 |
status |= 2; |
| 314 |
goto closem; |
| 315 |
} |
| 316 |
|
| 317 |
switch (files_differ(f1, f2, flags)) { |
| 318 |
case 0: |
| 319 |
goto closem; |
| 320 |
case 1: |
| 321 |
break; |
| 322 |
default: |
| 323 |
/* error */ |
| 324 |
status |= 2; |
| 325 |
goto closem; |
| 326 |
} |
| 327 |
|
| 328 |
if ((flags & D_FORCEASCII) == 0 && |
| 329 |
(!asciifile(f1) || !asciifile(f2))) { |
| 330 |
rval = D_BINARY; |
| 331 |
status |= 1; |
| 332 |
goto closem; |
| 333 |
} |
| 334 |
prepare(0, f1, stb1.st_size, flags); |
| 335 |
prepare(1, f2, stb2.st_size, flags); |
| 336 |
|
| 337 |
prune(); |
| 338 |
sort(sfile[0], slen[0]); |
| 339 |
sort(sfile[1], slen[1]); |
| 340 |
|
| 341 |
member = (int *)file[1]; |
| 342 |
equiv(sfile[0], slen[0], sfile[1], slen[1], member); |
| 343 |
member = xreallocarray(member, slen[1] + 2, sizeof(*member)); |
| 344 |
|
| 345 |
class = (int *)file[0]; |
| 346 |
unsort(sfile[0], slen[0], class); |
| 347 |
class = xreallocarray(class, slen[0] + 2, sizeof(*class)); |
| 348 |
|
| 349 |
klist = xcalloc(slen[0] + 2, sizeof(*klist), "diffreg klist"); |
| 350 |
clen = 0; |
| 351 |
clistlen = 100; |
| 352 |
clist = xcalloc(clistlen, sizeof(*clist), "diffreg clist"); |
| 353 |
i = stone(class, slen[0], member, klist, flags); |
| 354 |
xfree(&member); |
| 355 |
xfree(&class); |
| 356 |
|
| 357 |
J = xreallocarray(J, len[0] + 2, sizeof(*J)); |
| 358 |
unravel(klist[i]); |
| 359 |
xfree(&clist); |
| 360 |
xfree(&klist); |
| 361 |
|
| 362 |
ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold)); |
| 363 |
ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew)); |
| 364 |
check(f1, f2, flags); |
| 365 |
output(file1, f1, file2, f2, flags); |
| 366 |
closem: |
| 367 |
if (anychange) { |
| 368 |
status |= 1; |
| 369 |
if (rval == D_SAME) |
| 370 |
rval = D_DIFFER; |
| 371 |
} |
| 372 |
if (f1 != NULL) |
| 373 |
fclose(f1); |
| 374 |
if (f2 != NULL) |
| 375 |
fclose(f2); |
| 376 |
|
| 377 |
return (rval); |
| 378 |
} |
| 379 |
|
| 380 |
/* |
| 381 |
* Check to see if the given files differ. |
| 382 |
* Returns 0 if they are the same, 1 if different, and -1 on error. |
| 383 |
* XXX - could use code from cmp(1) [faster] |
| 384 |
*/ |
| 385 |
static int |
| 386 |
files_differ(FILE *f1, FILE *f2, int flags) |
| 387 |
{ |
| 388 |
char buf1[BUFSIZ], buf2[BUFSIZ]; |
| 389 |
size_t i, j; |
| 390 |
|
| 391 |
if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size) |
| 392 |
return (1); |
| 393 |
for (;;) { |
| 394 |
i = fread(buf1, 1, sizeof(buf1), f1); |
| 395 |
j = fread(buf2, 1, sizeof(buf2), f2); |
| 396 |
if ((!i && ferror(f1)) || (!j && ferror(f2))) |
| 397 |
return (-1); |
| 398 |
if (i != j) |
| 399 |
return (1); |
| 400 |
if (i == 0) |
| 401 |
return (0); |
| 402 |
if (memcmp(buf1, buf2, i) != 0) |
| 403 |
return (1); |
| 404 |
} |
| 405 |
} |
| 406 |
|
| 407 |
static void |
| 408 |
prepare(int i, FILE *fd, off_t filesize, int flags) |
| 409 |
{ |
| 410 |
struct line *p; |
| 411 |
int j, h; |
| 412 |
size_t sz; |
| 413 |
|
| 414 |
rewind(fd); |
| 415 |
|
| 416 |
sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25; |
| 417 |
if (sz < 100) |
| 418 |
sz = 100; |
| 419 |
|
| 420 |
p = xcalloc(sz + 3, sizeof(*p), "diff prepare"); |
| 421 |
for (j = 0; (h = readhash(fd, flags));) { |
| 422 |
if (j == sz) { |
| 423 |
sz = sz * 3 / 2; |
| 424 |
p = xreallocarray(p, sz + 3, sizeof(*p)); |
| 425 |
} |
| 426 |
p[++j].value = h; |
| 427 |
} |
| 428 |
len[i] = j; |
| 429 |
file[i] = p; |
| 430 |
} |
| 431 |
|
| 432 |
static void |
| 433 |
prune(void) |
| 434 |
{ |
| 435 |
int i, j; |
| 436 |
|
| 437 |
for (pref = 0; pref < len[0] && pref < len[1] && |
| 438 |
file[0][pref + 1].value == file[1][pref + 1].value; |
| 439 |
pref++) |
| 440 |
; |
| 441 |
for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && |
| 442 |
file[0][len[0] - suff].value == file[1][len[1] - suff].value; |
| 443 |
suff++) |
| 444 |
; |
| 445 |
for (j = 0; j < 2; j++) { |
| 446 |
sfile[j] = file[j] + pref; |
| 447 |
slen[j] = len[j] - pref - suff; |
| 448 |
for (i = 0; i <= slen[j]; i++) |
| 449 |
sfile[j][i].serial = i; |
| 450 |
} |
| 451 |
} |
| 452 |
|
| 453 |
static void |
| 454 |
equiv(struct line *a, int n, struct line *b, int m, int *c) |
| 455 |
{ |
| 456 |
int i, j; |
| 457 |
|
| 458 |
i = j = 1; |
| 459 |
while (i <= n && j <= m) { |
| 460 |
if (a[i].value < b[j].value) |
| 461 |
a[i++].value = 0; |
| 462 |
else if (a[i].value == b[j].value) |
| 463 |
a[i++].value = j; |
| 464 |
else |
| 465 |
j++; |
| 466 |
} |
| 467 |
while (i <= n) |
| 468 |
a[i++].value = 0; |
| 469 |
b[m + 1].value = 0; |
| 470 |
j = 0; |
| 471 |
while (++j <= m) { |
| 472 |
c[j] = -b[j].serial; |
| 473 |
while (b[j + 1].value == b[j].value) { |
| 474 |
j++; |
| 475 |
c[j] = b[j].serial; |
| 476 |
} |
| 477 |
} |
| 478 |
c[j] = -1; |
| 479 |
} |
| 480 |
|
| 481 |
/* Code taken from ping.c */ |
| 482 |
static int |
| 483 |
isqrt(int n) |
| 484 |
{ |
| 485 |
int y, x = 1; |
| 486 |
|
| 487 |
if (n == 0) |
| 488 |
return (0); |
| 489 |
|
| 490 |
do { /* newton was a stinker */ |
| 491 |
y = x; |
| 492 |
x = n / x; |
| 493 |
x += y; |
| 494 |
x /= 2; |
| 495 |
} while ((x - y) > 1 || (x - y) < -1); |
| 496 |
|
| 497 |
return (x); |
| 498 |
} |
| 499 |
|
| 500 |
static int |
| 501 |
stone(int *a, int n, int *b, int *c, int flags) |
| 502 |
{ |
| 503 |
int i, k, y, j, l; |
| 504 |
int oldc, tc, oldl, sq; |
| 505 |
u_int numtries, bound; |
| 506 |
|
| 507 |
if (flags & D_MINIMAL) |
| 508 |
bound = UINT_MAX; |
| 509 |
else { |
| 510 |
sq = isqrt(n); |
| 511 |
bound = MAXIMUM(256, sq); |
| 512 |
} |
| 513 |
|
| 514 |
k = 0; |
| 515 |
c[0] = newcand(0, 0, 0); |
| 516 |
for (i = 1; i <= n; i++) { |
| 517 |
j = a[i]; |
| 518 |
if (j == 0) |
| 519 |
continue; |
| 520 |
y = -b[j]; |
| 521 |
oldl = 0; |
| 522 |
oldc = c[0]; |
| 523 |
numtries = 0; |
| 524 |
do { |
| 525 |
if (y <= clist[oldc].y) |
| 526 |
continue; |
| 527 |
l = search(c, k, y); |
| 528 |
if (l != oldl + 1) |
| 529 |
oldc = c[l - 1]; |
| 530 |
if (l <= k) { |
| 531 |
if (clist[c[l]].y <= y) |
| 532 |
continue; |
| 533 |
tc = c[l]; |
| 534 |
c[l] = newcand(i, y, oldc); |
| 535 |
oldc = tc; |
| 536 |
oldl = l; |
| 537 |
numtries++; |
| 538 |
} else { |
| 539 |
c[l] = newcand(i, y, oldc); |
| 540 |
k++; |
| 541 |
break; |
| 542 |
} |
| 543 |
} while ((y = b[++j]) > 0 && numtries < bound); |
| 544 |
} |
| 545 |
return (k); |
| 546 |
} |
| 547 |
|
| 548 |
static int |
| 549 |
newcand(int x, int y, int pred) |
| 550 |
{ |
| 551 |
struct cand *q; |
| 552 |
|
| 553 |
if (clen == clistlen) { |
| 554 |
clistlen = clistlen * 11 / 10; |
| 555 |
clist = xreallocarray(clist, clistlen, sizeof(*clist)); |
| 556 |
} |
| 557 |
q = clist + clen; |
| 558 |
q->x = x; |
| 559 |
q->y = y; |
| 560 |
q->pred = pred; |
| 561 |
return (clen++); |
| 562 |
} |
| 563 |
|
| 564 |
static int |
| 565 |
search(int *c, int k, int y) |
| 566 |
{ |
| 567 |
int i, j, l, t; |
| 568 |
|
| 569 |
if (clist[c[k]].y < y) /* quick look for typical case */ |
| 570 |
return (k + 1); |
| 571 |
i = 0; |
| 572 |
j = k + 1; |
| 573 |
for (;;) { |
| 574 |
l = (i + j) / 2; |
| 575 |
if (l <= i) |
| 576 |
break; |
| 577 |
t = clist[c[l]].y; |
| 578 |
if (t > y) |
| 579 |
j = l; |
| 580 |
else if (t < y) |
| 581 |
i = l; |
| 582 |
else |
| 583 |
return (l); |
| 584 |
} |
| 585 |
return (l + 1); |
| 586 |
} |
| 587 |
|
| 588 |
static void |
| 589 |
unravel(int p) |
| 590 |
{ |
| 591 |
struct cand *q; |
| 592 |
int i; |
| 593 |
|
| 594 |
for (i = 0; i <= len[0]; i++) |
| 595 |
J[i] = i <= pref ? i : |
| 596 |
i > len[0] - suff ? i + len[1] - len[0] : 0; |
| 597 |
for (q = clist + p; q->y != 0; q = clist + q->pred) |
| 598 |
J[q->x + pref] = q->y + pref; |
| 599 |
} |
| 600 |
|
| 601 |
/* |
| 602 |
* Check does double duty: |
| 603 |
* 1. ferret out any fortuitous correspondences due |
| 604 |
* to confounding by hashing (which result in "jackpot") |
| 605 |
* 2. collect random access indexes to the two files |
| 606 |
*/ |
| 607 |
static void |
| 608 |
check(FILE *f1, FILE *f2, int flags) |
| 609 |
{ |
| 610 |
int i, j, jackpot, c, d; |
| 611 |
long ctold, ctnew; |
| 612 |
|
| 613 |
rewind(f1); |
| 614 |
rewind(f2); |
| 615 |
j = 1; |
| 616 |
ixold[0] = ixnew[0] = 0; |
| 617 |
jackpot = 0; |
| 618 |
ctold = ctnew = 0; |
| 619 |
for (i = 1; i <= len[0]; i++) { |
| 620 |
if (J[i] == 0) { |
| 621 |
ixold[i] = ctold += skipline(f1); |
| 622 |
continue; |
| 623 |
} |
| 624 |
while (j < J[i]) { |
| 625 |
ixnew[j] = ctnew += skipline(f2); |
| 626 |
j++; |
| 627 |
} |
| 628 |
if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) { |
| 629 |
for (;;) { |
| 630 |
c = getc(f1); |
| 631 |
d = getc(f2); |
| 632 |
/* |
| 633 |
* GNU diff ignores a missing newline |
| 634 |
* in one file for -b or -w. |
| 635 |
*/ |
| 636 |
if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) { |
| 637 |
if (c == EOF && d == '\r') { |
| 638 |
ctnew++; |
| 639 |
break; |
| 640 |
} else if (c == '\r' && d == EOF) { |
| 641 |
ctold++; |
| 642 |
break; |
| 643 |
} |
| 644 |
} |
| 645 |
ctold++; |
| 646 |
ctnew++; |
| 647 |
if ((flags & D_FOLDBLANKS) && isspace(c) && |
| 648 |
isspace(d)) { |
| 649 |
do { |
| 650 |
if (c == '\r') |
| 651 |
break; |
| 652 |
ctold++; |
| 653 |
} while (isspace(c = getc(f1))); |
| 654 |
do { |
| 655 |
if (d == '\r') |
| 656 |
break; |
| 657 |
ctnew++; |
| 658 |
} while (isspace(d = getc(f2))); |
| 659 |
} else if ((flags & D_IGNOREBLANKS)) { |
| 660 |
while (isspace(c) && c != '\r') { |
| 661 |
c = getc(f1); |
| 662 |
ctold++; |
| 663 |
} |
| 664 |
while (isspace(d) && d != '\r') { |
| 665 |
d = getc(f2); |
| 666 |
ctnew++; |
| 667 |
} |
| 668 |
} |
| 669 |
if (chrtran[c] != chrtran[d]) { |
| 670 |
jackpot++; |
| 671 |
J[i] = 0; |
| 672 |
if (c != '\r' && c != EOF) |
| 673 |
ctold += skipline(f1); |
| 674 |
if (d != '\r' && c != EOF) |
| 675 |
ctnew += skipline(f2); |
| 676 |
break; |
| 677 |
} |
| 678 |
if (c == '\r' || c == EOF) |
| 679 |
break; |
| 680 |
} |
| 681 |
} else { |
| 682 |
for (;;) { |
| 683 |
ctold++; |
| 684 |
ctnew++; |
| 685 |
if ((c = getc(f1)) != (d = getc(f2))) { |
| 686 |
/* jackpot++; */ |
| 687 |
J[i] = 0; |
| 688 |
if (c != '\r' && c != EOF) |
| 689 |
ctold += skipline(f1); |
| 690 |
if (d != '\r' && c != EOF) |
| 691 |
ctnew += skipline(f2); |
| 692 |
break; |
| 693 |
} |
| 694 |
if (c == '\r' || c == EOF) |
| 695 |
break; |
| 696 |
} |
| 697 |
} |
| 698 |
ixold[i] = ctold; |
| 699 |
ixnew[j] = ctnew; |
| 700 |
j++; |
| 701 |
} |
| 702 |
for (; j <= len[1]; j++) |
| 703 |
ixnew[j] = ctnew += skipline(f2); |
| 704 |
/* |
| 705 |
* if (jackpot) |
| 706 |
* fprintf(stderr, "jackpot\n"); |
| 707 |
*/ |
| 708 |
} |
| 709 |
|
| 710 |
/* shellsort CACM #201 */ |
| 711 |
static void |
| 712 |
sort(struct line *a, int n) |
| 713 |
{ |
| 714 |
struct line *ai, *aim, w; |
| 715 |
int j, m = 0, k; |
| 716 |
|
| 717 |
if (n == 0) |
| 718 |
return; |
| 719 |
for (j = 1; j <= n; j *= 2) |
| 720 |
m = 2 * j - 1; |
| 721 |
for (m /= 2; m != 0; m /= 2) { |
| 722 |
k = n - m; |
| 723 |
for (j = 1; j <= k; j++) { |
| 724 |
for (ai = &a[j]; ai > a; ai -= m) { |
| 725 |
aim = &ai[m]; |
| 726 |
if (aim < ai) |
| 727 |
break; /* wraparound */ |
| 728 |
if (aim->value > ai[0].value || |
| 729 |
(aim->value == ai[0].value && |
| 730 |
aim->serial > ai[0].serial)) |
| 731 |
break; |
| 732 |
w.value = ai[0].value; |
| 733 |
ai[0].value = aim->value; |
| 734 |
aim->value = w.value; |
| 735 |
w.serial = ai[0].serial; |
| 736 |
ai[0].serial = aim->serial; |
| 737 |
aim->serial = w.serial; |
| 738 |
} |
| 739 |
} |
| 740 |
} |
| 741 |
} |
| 742 |
|
| 743 |
static void |
| 744 |
unsort(struct line *f, int l, int *b) |
| 745 |
{ |
| 746 |
int *a, i; |
| 747 |
|
| 748 |
a = xcalloc(l + 1, sizeof(*a), "diff unsort"); |
| 749 |
for (i = 1; i <= l; i++) |
| 750 |
a[f[i].serial] = f[i].value; |
| 751 |
for (i = 1; i <= l; i++) |
| 752 |
b[i] = a[i]; |
| 753 |
xfree(&a); |
| 754 |
} |
| 755 |
|
| 756 |
static int |
| 757 |
skipline(FILE *f) |
| 758 |
{ |
| 759 |
int i, c; |
| 760 |
|
| 761 |
for (i = 1; (c = getc(f)) != '\r' && c != EOF; i++) |
| 762 |
continue; |
| 763 |
return (i); |
| 764 |
} |
| 765 |
|
| 766 |
static void |
| 767 |
output(char *file1, FILE *f1, char *file2, FILE *f2, int flags) |
| 768 |
{ |
| 769 |
int m, i0, i1, j0, j1; |
| 770 |
|
| 771 |
rewind(f1); |
| 772 |
rewind(f2); |
| 773 |
m = len[0]; |
| 774 |
J[0] = 0; |
| 775 |
J[m + 1] = len[1] + 1; |
| 776 |
if (diff_format != D_EDIT) { |
| 777 |
for (i0 = 1; i0 <= m; i0 = i1 + 1) { |
| 778 |
while (i0 <= m && J[i0] == J[i0 - 1] + 1) |
| 779 |
i0++; |
| 780 |
j0 = J[i0 - 1] + 1; |
| 781 |
i1 = i0 - 1; |
| 782 |
while (i1 < m && J[i1 + 1] == 0) |
| 783 |
i1++; |
| 784 |
j1 = J[i1 + 1] - 1; |
| 785 |
J[i1] = j1; |
| 786 |
change(file1, f1, file2, f2, i0, i1, j0, j1, &flags); |
| 787 |
} |
| 788 |
} else { |
| 789 |
for (i0 = m; i0 >= 1; i0 = i1 - 1) { |
| 790 |
while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) |
| 791 |
i0--; |
| 792 |
j0 = J[i0 + 1] - 1; |
| 793 |
i1 = i0 + 1; |
| 794 |
while (i1 > 1 && J[i1 - 1] == 0) |
| 795 |
i1--; |
| 796 |
j1 = J[i1 - 1] + 1; |
| 797 |
J[i1] = j1; |
| 798 |
change(file1, f1, file2, f2, i1, i0, j1, j0, &flags); |
| 799 |
} |
| 800 |
} |
| 801 |
if (m == 0) |
| 802 |
change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags); |
| 803 |
if (diff_format == D_IFDEF) { |
| 804 |
for (;;) { |
| 805 |
#define c i0 |
| 806 |
if ((c = getc(f1)) == EOF) |
| 807 |
return; |
| 808 |
diff_output("%c", c); |
| 809 |
} |
| 810 |
#undef c |
| 811 |
} |
| 812 |
if (anychange != 0) { |
| 813 |
if (diff_format == D_CONTEXT) |
| 814 |
dump_context_vec(f1, f2, flags); |
| 815 |
else if (diff_format == D_UNIFIED) |
| 816 |
dump_unified_vec(f1, f2, flags); |
| 817 |
} |
| 818 |
} |
| 819 |
|
| 820 |
static void |
| 821 |
range(int a, int b, char *separator) |
| 822 |
{ |
| 823 |
diff_output("%d", a > b ? b : a); |
| 824 |
if (a < b) |
| 825 |
diff_output("%s%d", separator, b); |
| 826 |
} |
| 827 |
|
| 828 |
static void |
| 829 |
uni_range(int a, int b) |
| 830 |
{ |
| 831 |
if (a < b) |
| 832 |
diff_output("%d,%d", a, b - a + 1); |
| 833 |
else if (a == b) |
| 834 |
diff_output("%d", b); |
| 835 |
else |
| 836 |
diff_output("%d,0", b); |
| 837 |
} |
| 838 |
|
| 839 |
static char * |
| 840 |
preadline(int fd, size_t rlen, off_t off) |
| 841 |
{ |
| 842 |
char *line; |
| 843 |
ssize_t nr; |
| 844 |
off_t pos; |
| 845 |
|
| 846 |
line = xmalloc(rlen + 1, "diff preadline"); |
| 847 |
pos = lseek(fd, 0, SEEK_CUR); |
| 848 |
lseek(fd, off, SEEK_SET); |
| 849 |
if ((nr = read(fd, line, rlen)) == -1) |
| 850 |
err(2, "preadline"); |
| 851 |
lseek(fd, pos, SEEK_SET); |
| 852 |
if (nr > 0 && line[nr-1] == '\r') |
| 853 |
nr--; |
| 854 |
line[nr] = '\0'; |
| 855 |
return (line); |
| 856 |
} |
| 857 |
|
| 858 |
static int |
| 859 |
ignoreline(char *line) |
| 860 |
{ |
| 861 |
#if 0 |
| 862 |
int ret; |
| 863 |
|
| 864 |
ret = regexec(&ignore_re, line, 0, NULL, 0); |
| 865 |
xfree(&line); |
| 866 |
return (ret == 0); /* if it matched, it should be ignored. */ |
| 867 |
#endif |
| 868 |
return 0; |
| 869 |
} |
| 870 |
|
| 871 |
/* |
| 872 |
* Indicate that there is a difference between lines a and b of the from file |
| 873 |
* to get to lines c to d of the to file. If a is greater then b then there |
| 874 |
* are no lines in the from file involved and this means that there were |
| 875 |
* lines appended (beginning at b). If c is greater than d then there are |
| 876 |
* lines missing from the to file. |
| 877 |
*/ |
| 878 |
static void |
| 879 |
change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d, |
| 880 |
int *pflags) |
| 881 |
{ |
| 882 |
static size_t max_context = 64; |
| 883 |
int i; |
| 884 |
|
| 885 |
restart: |
| 886 |
if (diff_format != D_IFDEF && a > b && c > d) |
| 887 |
return; |
| 888 |
if (ignore_pats != NULL) { |
| 889 |
char *line; |
| 890 |
/* |
| 891 |
* All lines in the change, insert, or delete must |
| 892 |
* match an ignore pattern for the change to be |
| 893 |
* ignored. |
| 894 |
*/ |
| 895 |
if (a <= b) { /* Changes and deletes. */ |
| 896 |
for (i = a; i <= b; i++) { |
| 897 |
line = preadline(fileno(f1), |
| 898 |
ixold[i] - ixold[i - 1], ixold[i - 1]); |
| 899 |
if (!ignoreline(line)) |
| 900 |
goto proceed; |
| 901 |
} |
| 902 |
} |
| 903 |
if (a > b || c <= d) { /* Changes and inserts. */ |
| 904 |
for (i = c; i <= d; i++) { |
| 905 |
line = preadline(fileno(f2), |
| 906 |
ixnew[i] - ixnew[i - 1], ixnew[i - 1]); |
| 907 |
if (!ignoreline(line)) |
| 908 |
goto proceed; |
| 909 |
} |
| 910 |
} |
| 911 |
return; |
| 912 |
} |
| 913 |
proceed: |
| 914 |
if (*pflags & D_HEADER) { |
| 915 |
diff_output("%s %s\r", file1, file2); |
| 916 |
*pflags &= ~D_HEADER; |
| 917 |
} |
| 918 |
if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) { |
| 919 |
/* |
| 920 |
* Allocate change records as needed. |
| 921 |
*/ |
| 922 |
if (context_vec_ptr == context_vec_end - 1) { |
| 923 |
ptrdiff_t offset = context_vec_ptr - context_vec_start; |
| 924 |
max_context <<= 1; |
| 925 |
context_vec_start = xreallocarray(context_vec_start, |
| 926 |
max_context, sizeof(*context_vec_start)); |
| 927 |
context_vec_end = context_vec_start + max_context; |
| 928 |
context_vec_ptr = context_vec_start + offset; |
| 929 |
} |
| 930 |
if (anychange == 0) { |
| 931 |
/* |
| 932 |
* Print the context/unidiff header first time through. |
| 933 |
*/ |
| 934 |
print_header(file1, file2); |
| 935 |
anychange = 1; |
| 936 |
} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 && |
| 937 |
c > context_vec_ptr->d + (2 * diff_context) + 1) { |
| 938 |
/* |
| 939 |
* If this change is more than 'diff_context' lines from the |
| 940 |
* previous change, dump the record and reset it. |
| 941 |
*/ |
| 942 |
if (diff_format == D_CONTEXT) |
| 943 |
dump_context_vec(f1, f2, *pflags); |
| 944 |
else |
| 945 |
dump_unified_vec(f1, f2, *pflags); |
| 946 |
} |
| 947 |
context_vec_ptr++; |
| 948 |
context_vec_ptr->a = a; |
| 949 |
context_vec_ptr->b = b; |
| 950 |
context_vec_ptr->c = c; |
| 951 |
context_vec_ptr->d = d; |
| 952 |
return; |
| 953 |
} |
| 954 |
if (anychange == 0) |
| 955 |
anychange = 1; |
| 956 |
switch (diff_format) { |
| 957 |
case D_BRIEF: |
| 958 |
return; |
| 959 |
case D_NORMAL: |
| 960 |
case D_EDIT: |
| 961 |
range(a, b, ","); |
| 962 |
diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); |
| 963 |
if (diff_format == D_NORMAL) |
| 964 |
range(c, d, ","); |
| 965 |
diff_output("\r"); |
| 966 |
break; |
| 967 |
case D_REVERSE: |
| 968 |
diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); |
| 969 |
range(a, b, " "); |
| 970 |
diff_output("\r"); |
| 971 |
break; |
| 972 |
case D_NREVERSE: |
| 973 |
if (a > b) |
| 974 |
diff_output("a%d %d\r", b, d - c + 1); |
| 975 |
else { |
| 976 |
diff_output("d%d %d\r", a, b - a + 1); |
| 977 |
if (!(c > d)) |
| 978 |
/* add changed lines */ |
| 979 |
diff_output("a%d %d\r", b, d - c + 1); |
| 980 |
} |
| 981 |
break; |
| 982 |
} |
| 983 |
if (diff_format == D_NORMAL || diff_format == D_IFDEF) { |
| 984 |
fetch(ixold, a, b, f1, '<', 1, *pflags); |
| 985 |
if (a <= b && c <= d && diff_format == D_NORMAL) |
| 986 |
diff_output("---\r"); |
| 987 |
} |
| 988 |
i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags); |
| 989 |
if (i != 0 && diff_format == D_EDIT) { |
| 990 |
/* |
| 991 |
* A non-zero return value for D_EDIT indicates that the |
| 992 |
* last line printed was a bare dot (".") that has been |
| 993 |
* escaped as ".." to prevent ed(1) from misinterpreting |
| 994 |
* it. We have to add a substitute command to change this |
| 995 |
* back and restart where we left off. |
| 996 |
*/ |
| 997 |
diff_output(".\r"); |
| 998 |
diff_output("%ds/.//\r", a + i - 1); |
| 999 |
b = a + i - 1; |
| 1000 |
a = b + 1; |
| 1001 |
c += i; |
| 1002 |
goto restart; |
| 1003 |
} |
| 1004 |
if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d) |
| 1005 |
diff_output(".\r"); |
| 1006 |
if (inifdef) { |
| 1007 |
diff_output("#endif /* %s */\r", ifdefname); |
| 1008 |
inifdef = 0; |
| 1009 |
} |
| 1010 |
} |
| 1011 |
|
| 1012 |
static int |
| 1013 |
fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags) |
| 1014 |
{ |
| 1015 |
int i, j, c, lastc, col, nc; |
| 1016 |
|
| 1017 |
/* |
| 1018 |
* When doing #ifdef's, copy down to current line |
| 1019 |
* if this is the first file, so that stuff makes it to output. |
| 1020 |
*/ |
| 1021 |
if (diff_format == D_IFDEF && oldfile) { |
| 1022 |
long curpos = ftell(lb); |
| 1023 |
/* print through if append (a>b), else to (nb: 0 vs 1 orig) */ |
| 1024 |
nc = f[a > b ? b : a - 1] - curpos; |
| 1025 |
for (i = 0; i < nc; i++) |
| 1026 |
diff_output("%c", getc(lb)); |
| 1027 |
} |
| 1028 |
if (a > b) |
| 1029 |
return (0); |
| 1030 |
if (diff_format == D_IFDEF) { |
| 1031 |
if (inifdef) { |
| 1032 |
diff_output("#else /* %s%s */\r", |
| 1033 |
oldfile == 1 ? "!" : "", ifdefname); |
| 1034 |
} else { |
| 1035 |
if (oldfile) |
| 1036 |
diff_output("#ifndef %s\r", ifdefname); |
| 1037 |
else |
| 1038 |
diff_output("#ifdef %s\r", ifdefname); |
| 1039 |
} |
| 1040 |
inifdef = 1 + oldfile; |
| 1041 |
} |
| 1042 |
for (i = a; i <= b; i++) { |
| 1043 |
fseek(lb, f[i - 1], SEEK_SET); |
| 1044 |
nc = f[i] - f[i - 1]; |
| 1045 |
if (diff_format != D_IFDEF && ch != '\0') { |
| 1046 |
diff_output("%c", ch); |
| 1047 |
#if 0 |
| 1048 |
if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT |
| 1049 |
|| diff_format == D_UNIFIED)) |
| 1050 |
diff_output("\t"); |
| 1051 |
else |
| 1052 |
#endif |
| 1053 |
if (diff_format != D_UNIFIED) |
| 1054 |
diff_output(" "); |
| 1055 |
} |
| 1056 |
col = 0; |
| 1057 |
for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) { |
| 1058 |
if ((c = getc(lb)) == EOF) { |
| 1059 |
if (diff_format == D_EDIT || diff_format == D_REVERSE || |
| 1060 |
diff_format == D_NREVERSE) |
| 1061 |
warnx("No newline at end of file"); |
| 1062 |
else |
| 1063 |
#if 0 |
| 1064 |
diff_output("\r\\ No newline at end of " |
| 1065 |
"file\r"); |
| 1066 |
#else |
| 1067 |
diff_output("\r"); |
| 1068 |
#endif |
| 1069 |
return (0); |
| 1070 |
} |
| 1071 |
if (c == '\n') |
| 1072 |
c = '\n'; |
| 1073 |
if (c == '\t' && (flags & D_EXPANDTABS)) { |
| 1074 |
do { |
| 1075 |
diff_output(" "); |
| 1076 |
} while (++col & 7); |
| 1077 |
} else { |
| 1078 |
if (diff_format == D_EDIT && j == 1 && c == '\r' |
| 1079 |
&& lastc == '.') { |
| 1080 |
/* |
| 1081 |
* Don't print a bare "." line |
| 1082 |
* since that will confuse ed(1). |
| 1083 |
* Print ".." instead and return, |
| 1084 |
* giving the caller an offset |
| 1085 |
* from which to restart. |
| 1086 |
*/ |
| 1087 |
diff_output(".\r"); |
| 1088 |
return (i - a + 1); |
| 1089 |
} |
| 1090 |
diff_output("%c", c); |
| 1091 |
col++; |
| 1092 |
} |
| 1093 |
} |
| 1094 |
} |
| 1095 |
return (0); |
| 1096 |
} |
| 1097 |
|
| 1098 |
/* |
| 1099 |
* Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. |
| 1100 |
*/ |
| 1101 |
static int |
| 1102 |
readhash(FILE *f, int flags) |
| 1103 |
{ |
| 1104 |
int i, t, space; |
| 1105 |
int sum; |
| 1106 |
|
| 1107 |
sum = 1; |
| 1108 |
space = 0; |
| 1109 |
if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) { |
| 1110 |
if (flags & D_IGNORECASE) |
| 1111 |
for (i = 0; (t = getc(f)) != '\r'; i++) { |
| 1112 |
if (t == EOF) { |
| 1113 |
if (i == 0) |
| 1114 |
return (0); |
| 1115 |
break; |
| 1116 |
} |
| 1117 |
sum = sum * 127 + chrtran[t]; |
| 1118 |
} |
| 1119 |
else |
| 1120 |
for (i = 0; (t = getc(f)) != '\r'; i++) { |
| 1121 |
if (t == EOF) { |
| 1122 |
if (i == 0) |
| 1123 |
return (0); |
| 1124 |
break; |
| 1125 |
} |
| 1126 |
sum = sum * 127 + t; |
| 1127 |
} |
| 1128 |
} else { |
| 1129 |
for (i = 0;;) { |
| 1130 |
switch (t = getc(f)) { |
| 1131 |
case '\t': |
| 1132 |
case '\n': |
| 1133 |
case '\v': |
| 1134 |
case '\f': |
| 1135 |
case ' ': |
| 1136 |
space++; |
| 1137 |
continue; |
| 1138 |
default: |
| 1139 |
if (space && (flags & D_IGNOREBLANKS) == 0) { |
| 1140 |
i++; |
| 1141 |
space = 0; |
| 1142 |
} |
| 1143 |
sum = sum * 127 + chrtran[t]; |
| 1144 |
i++; |
| 1145 |
continue; |
| 1146 |
case EOF: |
| 1147 |
if (i == 0) |
| 1148 |
return (0); |
| 1149 |
/* FALLTHROUGH */ |
| 1150 |
case '\r': |
| 1151 |
break; |
| 1152 |
} |
| 1153 |
break; |
| 1154 |
} |
| 1155 |
} |
| 1156 |
/* |
| 1157 |
* There is a remote possibility that we end up with a zero sum. |
| 1158 |
* Zero is used as an EOF marker, so return 1 instead. |
| 1159 |
*/ |
| 1160 |
return (sum == 0 ? 1 : sum); |
| 1161 |
} |
| 1162 |
|
| 1163 |
static int |
| 1164 |
asciifile(FILE *f) |
| 1165 |
{ |
| 1166 |
unsigned char buf[BUFSIZ]; |
| 1167 |
size_t cnt; |
| 1168 |
|
| 1169 |
if (f == NULL) |
| 1170 |
return (1); |
| 1171 |
|
| 1172 |
rewind(f); |
| 1173 |
cnt = fread(buf, 1, sizeof(buf), f); |
| 1174 |
return (memchr(buf, '\0', cnt) == NULL); |
| 1175 |
} |
| 1176 |
|
| 1177 |
#define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0) |
| 1178 |
|
| 1179 |
static char * |
| 1180 |
match_function(const long *f, int pos, FILE *fp) |
| 1181 |
{ |
| 1182 |
unsigned char buf[FUNCTION_CONTEXT_SIZE]; |
| 1183 |
size_t nc; |
| 1184 |
int last = lastline; |
| 1185 |
char *state = NULL; |
| 1186 |
|
| 1187 |
lastline = pos; |
| 1188 |
while (pos > last) { |
| 1189 |
fseek(fp, f[pos - 1], SEEK_SET); |
| 1190 |
nc = f[pos] - f[pos - 1]; |
| 1191 |
if (nc >= sizeof(buf)) |
| 1192 |
nc = sizeof(buf) - 1; |
| 1193 |
nc = fread(buf, 1, nc, fp); |
| 1194 |
if (nc > 0) { |
| 1195 |
buf[nc] = '\0'; |
| 1196 |
buf[strcspn((const char *)buf, "\r")] = '\0'; |
| 1197 |
if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') { |
| 1198 |
if (begins_with((const char *)buf, "private:")) { |
| 1199 |
if (!state) |
| 1200 |
state = " (private)"; |
| 1201 |
} else if (begins_with((const char *)buf, "protected:")) { |
| 1202 |
if (!state) |
| 1203 |
state = " (protected)"; |
| 1204 |
} else if (begins_with((const char *)buf, "public:")) { |
| 1205 |
if (!state) |
| 1206 |
state = " (public)"; |
| 1207 |
} else { |
| 1208 |
strlcpy(lastbuf, (const char *)buf, sizeof lastbuf); |
| 1209 |
if (state) |
| 1210 |
strlcat(lastbuf, (const char *)state, sizeof lastbuf); |
| 1211 |
lastmatchline = pos; |
| 1212 |
return lastbuf; |
| 1213 |
} |
| 1214 |
} |
| 1215 |
} |
| 1216 |
pos--; |
| 1217 |
} |
| 1218 |
return lastmatchline > 0 ? lastbuf : NULL; |
| 1219 |
} |
| 1220 |
|
| 1221 |
/* dump accumulated "context" diff changes */ |
| 1222 |
static void |
| 1223 |
dump_context_vec(FILE *f1, FILE *f2, int flags) |
| 1224 |
{ |
| 1225 |
struct context_vec *cvp = context_vec_start; |
| 1226 |
int lowa, upb, lowc, upd, do_output; |
| 1227 |
int a, b, c, d; |
| 1228 |
char ch, *f; |
| 1229 |
|
| 1230 |
if (context_vec_start > context_vec_ptr) |
| 1231 |
return; |
| 1232 |
|
| 1233 |
b = d = 0; /* gcc */ |
| 1234 |
lowa = MAXIMUM(1, cvp->a - diff_context); |
| 1235 |
upb = MINIMUM(len[0], context_vec_ptr->b + diff_context); |
| 1236 |
lowc = MAXIMUM(1, cvp->c - diff_context); |
| 1237 |
upd = MINIMUM(len[1], context_vec_ptr->d + diff_context); |
| 1238 |
|
| 1239 |
diff_output("***************"); |
| 1240 |
if ((flags & D_PROTOTYPE)) { |
| 1241 |
f = match_function(ixold, lowa-1, f1); |
| 1242 |
if (f != NULL) |
| 1243 |
diff_output(" %s", f); |
| 1244 |
} |
| 1245 |
diff_output("\r*** "); |
| 1246 |
range(lowa, upb, ","); |
| 1247 |
diff_output(" ****\r"); |
| 1248 |
|
| 1249 |
/* |
| 1250 |
* Output changes to the "old" file. The first loop suppresses |
| 1251 |
* output if there were no changes to the "old" file (we'll see |
| 1252 |
* the "old" lines as context in the "new" list). |
| 1253 |
*/ |
| 1254 |
do_output = 0; |
| 1255 |
for (; cvp <= context_vec_ptr; cvp++) |
| 1256 |
if (cvp->a <= cvp->b) { |
| 1257 |
cvp = context_vec_start; |
| 1258 |
do_output++; |
| 1259 |
break; |
| 1260 |
} |
| 1261 |
if (do_output) { |
| 1262 |
while (cvp <= context_vec_ptr) { |
| 1263 |
a = cvp->a; |
| 1264 |
b = cvp->b; |
| 1265 |
c = cvp->c; |
| 1266 |
d = cvp->d; |
| 1267 |
|
| 1268 |
if (a <= b && c <= d) |
| 1269 |
ch = 'c'; |
| 1270 |
else |
| 1271 |
ch = (a <= b) ? 'd' : 'a'; |
| 1272 |
|
| 1273 |
if (ch == 'a') |
| 1274 |
fetch(ixold, lowa, b, f1, ' ', 0, flags); |
| 1275 |
else { |
| 1276 |
fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); |
| 1277 |
fetch(ixold, a, b, f1, |
| 1278 |
ch == 'c' ? '!' : '-', 0, flags); |
| 1279 |
} |
| 1280 |
lowa = b + 1; |
| 1281 |
cvp++; |
| 1282 |
} |
| 1283 |
fetch(ixold, b + 1, upb, f1, ' ', 0, flags); |
| 1284 |
} |
| 1285 |
/* output changes to the "new" file */ |
| 1286 |
diff_output("--- "); |
| 1287 |
range(lowc, upd, ","); |
| 1288 |
diff_output(" ----\r"); |
| 1289 |
|
| 1290 |
do_output = 0; |
| 1291 |
for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) |
| 1292 |
if (cvp->c <= cvp->d) { |
| 1293 |
cvp = context_vec_start; |
| 1294 |
do_output++; |
| 1295 |
break; |
| 1296 |
} |
| 1297 |
if (do_output) { |
| 1298 |
while (cvp <= context_vec_ptr) { |
| 1299 |
a = cvp->a; |
| 1300 |
b = cvp->b; |
| 1301 |
c = cvp->c; |
| 1302 |
d = cvp->d; |
| 1303 |
|
| 1304 |
if (a <= b && c <= d) |
| 1305 |
ch = 'c'; |
| 1306 |
else |
| 1307 |
ch = (a <= b) ? 'd' : 'a'; |
| 1308 |
|
| 1309 |
if (ch == 'd') |
| 1310 |
fetch(ixnew, lowc, d, f2, ' ', 0, flags); |
| 1311 |
else { |
| 1312 |
fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); |
| 1313 |
fetch(ixnew, c, d, f2, |
| 1314 |
ch == 'c' ? '!' : '+', 0, flags); |
| 1315 |
} |
| 1316 |
lowc = d + 1; |
| 1317 |
cvp++; |
| 1318 |
} |
| 1319 |
fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); |
| 1320 |
} |
| 1321 |
context_vec_ptr = context_vec_start - 1; |
| 1322 |
} |
| 1323 |
|
| 1324 |
/* dump accumulated "unified" diff changes */ |
| 1325 |
static void |
| 1326 |
dump_unified_vec(FILE *f1, FILE *f2, int flags) |
| 1327 |
{ |
| 1328 |
struct context_vec *cvp = context_vec_start; |
| 1329 |
int lowa, upb, lowc, upd; |
| 1330 |
int a, b, c, d; |
| 1331 |
char ch, *f; |
| 1332 |
|
| 1333 |
if (context_vec_start > context_vec_ptr) |
| 1334 |
return; |
| 1335 |
|
| 1336 |
b = d = 0; /* gcc */ |
| 1337 |
lowa = MAXIMUM(1, cvp->a - diff_context); |
| 1338 |
upb = MINIMUM(len[0], context_vec_ptr->b + diff_context); |
| 1339 |
lowc = MAXIMUM(1, cvp->c - diff_context); |
| 1340 |
upd = MINIMUM(len[1], context_vec_ptr->d + diff_context); |
| 1341 |
|
| 1342 |
diff_output("@@ -"); |
| 1343 |
uni_range(lowa, upb); |
| 1344 |
diff_output(" +"); |
| 1345 |
uni_range(lowc, upd); |
| 1346 |
diff_output(" @@"); |
| 1347 |
if ((flags & D_PROTOTYPE)) { |
| 1348 |
f = match_function(ixold, lowa-1, f1); |
| 1349 |
if (f != NULL) |
| 1350 |
diff_output(" %s", f); |
| 1351 |
} |
| 1352 |
diff_output("\r"); |
| 1353 |
|
| 1354 |
/* |
| 1355 |
* Output changes in "unified" diff format--the old and new lines |
| 1356 |
* are printed together. |
| 1357 |
*/ |
| 1358 |
for (; cvp <= context_vec_ptr; cvp++) { |
| 1359 |
a = cvp->a; |
| 1360 |
b = cvp->b; |
| 1361 |
c = cvp->c; |
| 1362 |
d = cvp->d; |
| 1363 |
|
| 1364 |
/* |
| 1365 |
* c: both new and old changes |
| 1366 |
* d: only changes in the old file |
| 1367 |
* a: only changes in the new file |
| 1368 |
*/ |
| 1369 |
if (a <= b && c <= d) |
| 1370 |
ch = 'c'; |
| 1371 |
else |
| 1372 |
ch = (a <= b) ? 'd' : 'a'; |
| 1373 |
|
| 1374 |
switch (ch) { |
| 1375 |
case 'c': |
| 1376 |
fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); |
| 1377 |
fetch(ixold, a, b, f1, '-', 0, flags); |
| 1378 |
fetch(ixnew, c, d, f2, '+', 0, flags); |
| 1379 |
break; |
| 1380 |
case 'd': |
| 1381 |
fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); |
| 1382 |
fetch(ixold, a, b, f1, '-', 0, flags); |
| 1383 |
break; |
| 1384 |
case 'a': |
| 1385 |
fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); |
| 1386 |
fetch(ixnew, c, d, f2, '+', 0, flags); |
| 1387 |
break; |
| 1388 |
} |
| 1389 |
lowa = b + 1; |
| 1390 |
lowc = d + 1; |
| 1391 |
} |
| 1392 |
fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); |
| 1393 |
|
| 1394 |
context_vec_ptr = context_vec_start - 1; |
| 1395 |
} |
| 1396 |
|
| 1397 |
static void |
| 1398 |
print_header(const char *file1, const char *file2) |
| 1399 |
{ |
| 1400 |
if (label[0] != NULL) |
| 1401 |
diff_output("%s %s\r", diff_format == D_CONTEXT ? "***" : "---", |
| 1402 |
label[0]); |
| 1403 |
else |
| 1404 |
diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---", |
| 1405 |
file1, ctime(&stb1.st_mtime)); |
| 1406 |
if (label[1] != NULL) |
| 1407 |
diff_output("%s %s\r", diff_format == D_CONTEXT ? "---" : "+++", |
| 1408 |
label[1]); |
| 1409 |
else |
| 1410 |
diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++", |
| 1411 |
file2, ctime(&stb2.st_mtime)); |
| 1412 |
} |