Download
jcs
/amend
/diffreg.c
(View History)
jcs *: Lots of little fixes and dead variable removal | Latest amendment: 110 on 2023-02-06 |
1 | /* $OpenBSD: diffreg.c,v 1.93 2019/06/28 13:35:00 deraadt Exp $ */ |
2 | |
3 | /* |
4 | * Copyright (C) Caldera International Inc. 2001-2002. |
5 | * All rights reserved. |
6 | * |
7 | * Redistribution and use in source and binary forms, with or without |
8 | * modification, are permitted provided that the following conditions |
9 | * are met: |
10 | * 1. Redistributions of source code and documentation must retain the above |
11 | * copyright notice, this list of conditions and the following disclaimer. |
12 | * 2. Redistributions in binary form must reproduce the above copyright |
13 | * notice, this list of conditions and the following disclaimer in the |
14 | * documentation and/or other materials provided with the distribution. |
15 | * 3. All advertising materials mentioning features or use of this software |
16 | * must display the following acknowledgement: |
17 | * This product includes software developed or owned by Caldera |
18 | * International, Inc. |
19 | * 4. Neither the name of Caldera International, Inc. nor the names of other |
20 | * contributors may be used to endorse or promote products derived from |
21 | * this software without specific prior written permission. |
22 | * |
23 | * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA |
24 | * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR |
25 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
26 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
27 | * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, |
28 | * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
29 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
30 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
32 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING |
33 | * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
34 | * POSSIBILITY OF SUCH DAMAGE. |
35 | */ |
36 | /*- |
37 | * Copyright (c) 1991, 1993 |
38 | * The Regents of the University of California. All rights reserved. |
39 | * |
40 | * Redistribution and use in source and binary forms, with or without |
41 | * modification, are permitted provided that the following conditions |
42 | * are met: |
43 | * 1. Redistributions of source code must retain the above copyright |
44 | * notice, this list of conditions and the following disclaimer. |
45 | * 2. Redistributions in binary form must reproduce the above copyright |
46 | * notice, this list of conditions and the following disclaimer in the |
47 | * documentation and/or other materials provided with the distribution. |
48 | * 3. Neither the name of the University nor the names of its contributors |
49 | * may be used to endorse or promote products derived from this software |
50 | * without specific prior written permission. |
51 | * |
52 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
53 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
54 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
55 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
56 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
57 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
58 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
59 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
60 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
61 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
62 | * SUCH DAMAGE. |
63 | * |
64 | * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 |
65 | */ |
66 | |
67 | #include <ctype.h> |
68 | #include <errno.h> |
69 | #include <fcntl.h> |
70 | #include <stddef.h> |
71 | #include <stdio.h> |
72 | #include <stdlib.h> |
73 | #include <string.h> |
74 | |
75 | #include <unix.h> |
76 | |
77 | #include "diff.h" |
78 | #include "util.h" |
79 | |
80 | #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b)) |
81 | #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b)) |
82 | |
83 | /* |
84 | * diff - compare two files. |
85 | */ |
86 | |
87 | /* |
88 | * Uses an algorithm due to Harold Stone, which finds |
89 | * a pair of longest identical subsequences in the two |
90 | * files. |
91 | * |
92 | * The major goal is to generate the match vector J. |
93 | * J[i] is the index of the line in file1 corresponding |
94 | * to line i file0. J[i] = 0 if there is no |
95 | * such line in file1. |
96 | * |
97 | * Lines are hashed so as to work in core. All potential |
98 | * matches are located by sorting the lines of each file |
99 | * on the hash (called ``value''). In particular, this |
100 | * collects the equivalence classes in file1 together. |
101 | * Subroutine equiv replaces the value of each line in |
102 | * file0 by the index of the first element of its |
103 | * matching equivalence in (the reordered) file1. |
104 | * To save space equiv squeezes file1 into a single |
105 | * array member in which the equivalence classes |
106 | * are simply concatenated, except that their first |
107 | * members are flagged by changing sign. |
108 | * |
109 | * Next the indices that point into member are unsorted into |
110 | * array class according to the original order of file0. |
111 | * |
112 | * The cleverness lies in routine stone. This marches |
113 | * through the lines of file0, developing a vector klist |
114 | * of "k-candidates". At step i a k-candidate is a matched |
115 | * pair of lines x,y (x in file0 y in file1) such that |
116 | * there is a common subsequence of length k |
117 | * between the first i lines of file0 and the first y |
118 | * lines of file1, but there is no such subsequence for |
119 | * any smaller y. x is the earliest possible mate to y |
120 | * that occurs in such a subsequence. |
121 | * |
122 | * Whenever any of the members of the equivalence class of |
123 | * lines in file1 matable to a line in file0 has serial number |
124 | * less than the y of some k-candidate, that k-candidate |
125 | * with the smallest such y is replaced. The new |
126 | * k-candidate is chained (via pred) to the current |
127 | * k-1 candidate so that the actual subsequence can |
128 | * be recovered. When a member has serial number greater |
129 | * that the y of all k-candidates, the klist is extended. |
130 | * At the end, the longest subsequence is pulled out |
131 | * and placed in the array J by unravel |
132 | * |
133 | * With J in hand, the matches there recorded are |
134 | * check'ed against reality to assure that no spurious |
135 | * matches have crept in due to hashing. If they have, |
136 | * they are broken, and "jackpot" is recorded--a harmless |
137 | * matter except that a true match for a spuriously |
138 | * mated line may now be unnecessarily reported as a change. |
139 | * |
140 | * Much of the complexity of the program comes simply |
141 | * from trying to minimize core utilization and |
142 | * maximize the range of doable problems by dynamically |
143 | * allocating what is needed and reusing what is not. |
144 | * The core requirements for problems larger than somewhat |
145 | * are (in words) 2*length(file0) + length(file1) + |
146 | * 3*(number of k-candidates installed), typically about |
147 | * 6n words for files of length n. |
148 | */ |
149 | |
150 | struct cand { |
151 | int x; |
152 | int y; |
153 | int pred; |
154 | }; |
155 | |
156 | struct line { |
157 | int serial; |
158 | int value; |
159 | } *file[2]; |
160 | |
161 | /* |
162 | * The following struct is used to record change information when |
163 | * doing a "context" or "unified" diff. (see routine "change" to |
164 | * understand the highly mnemonic field names) |
165 | */ |
166 | struct context_vec { |
167 | int a; /* start line in old file */ |
168 | int b; /* end line in old file */ |
169 | int c; /* start line in new file */ |
170 | int d; /* end line in new file */ |
171 | }; |
172 | |
173 | static void output(char *, FILE *, char *, FILE *, int); |
174 | static void check(FILE *, FILE *, int); |
175 | static void range(int, int, char *); |
176 | static void uni_range(int, int); |
177 | static void dump_context_vec(FILE *, FILE *, int); |
178 | static void dump_unified_vec(FILE *, FILE *, int); |
179 | static void prepare(int, FILE *, off_t, int); |
180 | static void prune(void); |
181 | static void equiv(struct line *, int, struct line *, int, int *); |
182 | static void unravel(int); |
183 | static void unsort(struct line *, int, int *); |
184 | static void change(char *, FILE *, char *, FILE *, int, int, int, int, int *); |
185 | static void sort(struct line *, int); |
186 | static void print_header(const char *, const char *); |
187 | static int ignoreline(char *); |
188 | static int asciifile(FILE *); |
189 | static int fetch(long *, int, int, FILE *, int, int, int); |
190 | static int newcand(int, int, int); |
191 | static int search(int *, int, int); |
192 | static int skipline(FILE *); |
193 | static int isqrt(int); |
194 | static int stone(int *, int, int *, int *, int); |
195 | static int readhash(FILE *, int); |
196 | static int files_differ(FILE *, FILE *, int); |
197 | static char *match_function(const long *, int, FILE *); |
198 | static char *preadline(int, size_t, off_t); |
199 | |
200 | static int *J; /* will be overlaid on class */ |
201 | static int *class; /* will be overlaid on file[0] */ |
202 | static int *klist; /* will be overlaid on file[0] after class */ |
203 | static int *member; /* will be overlaid on file[1] */ |
204 | static int clen; |
205 | static int inifdef; /* whether or not we are in a #ifdef block */ |
206 | static int len[2]; |
207 | static int pref, suff; /* length of prefix and suffix */ |
208 | static int slen[2]; |
209 | static int anychange; |
210 | static long *ixnew; /* will be overlaid on file[1] */ |
211 | static long *ixold; /* will be overlaid on klist */ |
212 | static struct cand *clist; /* merely a free storage pot for candidates */ |
213 | static int clistlen; /* the length of clist */ |
214 | static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ |
215 | static u_char *chrtran; /* translation table for case-folding */ |
216 | static struct context_vec *context_vec_start; |
217 | static struct context_vec *context_vec_end; |
218 | static struct context_vec *context_vec_ptr; |
219 | |
220 | #define FUNCTION_CONTEXT_SIZE 55 |
221 | static char lastbuf[FUNCTION_CONTEXT_SIZE]; |
222 | static int lastline; |
223 | static int lastmatchline; |
224 | |
225 | |
226 | /* |
227 | * chrtran points to one of 2 translation tables: cup2low if folding upper to |
228 | * lower case clow2low if not folding case |
229 | */ |
230 | u_char clow2low[256] = { |
231 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, |
232 | 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, |
233 | 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, |
234 | 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, |
235 | 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, |
236 | 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, |
237 | 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, |
238 | 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, |
239 | 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, |
240 | 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, |
241 | 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, |
242 | 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, |
243 | 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, |
244 | 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, |
245 | 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, |
246 | 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, |
247 | 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, |
248 | 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, |
249 | 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, |
250 | 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, |
251 | 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, |
252 | 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, |
253 | 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, |
254 | 0xfd, 0xfe, 0xff |
255 | }; |
256 | |
257 | u_char cup2low[256] = { |
258 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, |
259 | 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, |
260 | 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, |
261 | 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, |
262 | 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, |
263 | 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, |
264 | 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, |
265 | 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, |
266 | 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, |
267 | 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, |
268 | 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, |
269 | 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, |
270 | 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, |
271 | 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, |
272 | 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, |
273 | 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, |
274 | 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, |
275 | 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, |
276 | 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, |
277 | 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, |
278 | 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, |
279 | 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, |
280 | 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, |
281 | 0xfd, 0xfe, 0xff |
282 | }; |
283 | |
284 | int |
285 | diffreg(char *file1, char *file2, int flags) |
286 | { |
287 | FILE *f1, *f2; |
288 | int i, rval; |
289 | |
290 | f1 = f2 = NULL; |
291 | rval = D_SAME; |
292 | anychange = 0; |
293 | lastline = 0; |
294 | lastmatchline = 0; |
295 | context_vec_ptr = context_vec_start - 1; |
296 | if (flags & D_IGNORECASE) |
297 | chrtran = cup2low; |
298 | else |
299 | chrtran = clow2low; |
300 | if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) |
301 | goto closem; |
302 | |
303 | f1 = fopen(file1, "rb"); |
304 | if (f1 == NULL) { |
305 | warn("failed to fopen %s", file1); |
306 | status |= 2; |
307 | goto closem; |
308 | } |
309 | |
310 | f2 = fopen(file2, "rb"); |
311 | if (f2 == NULL) { |
312 | warn("failed to fopen %s", file2); |
313 | status |= 2; |
314 | goto closem; |
315 | } |
316 | |
317 | switch (files_differ(f1, f2, flags)) { |
318 | case 0: |
319 | goto closem; |
320 | case 1: |
321 | break; |
322 | default: |
323 | /* error */ |
324 | status |= 2; |
325 | goto closem; |
326 | } |
327 | |
328 | if ((flags & D_FORCEASCII) == 0 && |
329 | (!asciifile(f1) || !asciifile(f2))) { |
330 | rval = D_BINARY; |
331 | status |= 1; |
332 | goto closem; |
333 | } |
334 | prepare(0, f1, stb1.st_size, flags); |
335 | prepare(1, f2, stb2.st_size, flags); |
336 | |
337 | prune(); |
338 | sort(sfile[0], slen[0]); |
339 | sort(sfile[1], slen[1]); |
340 | |
341 | member = (int *)file[1]; |
342 | equiv(sfile[0], slen[0], sfile[1], slen[1], member); |
343 | member = xreallocarray(member, slen[1] + 2, sizeof(*member)); |
344 | |
345 | class = (int *)file[0]; |
346 | unsort(sfile[0], slen[0], class); |
347 | class = xreallocarray(class, slen[0] + 2, sizeof(*class)); |
348 | |
349 | klist = xcalloc(slen[0] + 2, sizeof(*klist), "diffreg klist"); |
350 | clen = 0; |
351 | clistlen = 100; |
352 | clist = xcalloc(clistlen, sizeof(*clist), "diffreg clist"); |
353 | i = stone(class, slen[0], member, klist, flags); |
354 | xfree(&member); |
355 | xfree(&class); |
356 | |
357 | J = xreallocarray(J, len[0] + 2, sizeof(*J)); |
358 | unravel(klist[i]); |
359 | xfree(&clist); |
360 | xfree(&klist); |
361 | |
362 | ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold)); |
363 | ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew)); |
364 | check(f1, f2, flags); |
365 | output(file1, f1, file2, f2, flags); |
366 | closem: |
367 | if (anychange) { |
368 | status |= 1; |
369 | if (rval == D_SAME) |
370 | rval = D_DIFFER; |
371 | } |
372 | if (f1 != NULL) |
373 | fclose(f1); |
374 | if (f2 != NULL) |
375 | fclose(f2); |
376 | |
377 | return (rval); |
378 | } |
379 | |
380 | /* |
381 | * Check to see if the given files differ. |
382 | * Returns 0 if they are the same, 1 if different, and -1 on error. |
383 | * XXX - could use code from cmp(1) [faster] |
384 | */ |
385 | static int |
386 | files_differ(FILE *f1, FILE *f2, int flags) |
387 | { |
388 | char buf1[BUFSIZ], buf2[BUFSIZ]; |
389 | size_t i, j; |
390 | |
391 | if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size) |
392 | return (1); |
393 | for (;;) { |
394 | i = fread(buf1, 1, sizeof(buf1), f1); |
395 | j = fread(buf2, 1, sizeof(buf2), f2); |
396 | if ((!i && ferror(f1)) || (!j && ferror(f2))) |
397 | return (-1); |
398 | if (i != j) |
399 | return (1); |
400 | if (i == 0) |
401 | return (0); |
402 | if (memcmp(buf1, buf2, i) != 0) |
403 | return (1); |
404 | } |
405 | } |
406 | |
407 | static void |
408 | prepare(int i, FILE *fd, off_t filesize, int flags) |
409 | { |
410 | struct line *p; |
411 | int j, h; |
412 | size_t sz; |
413 | |
414 | rewind(fd); |
415 | |
416 | sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25; |
417 | if (sz < 100) |
418 | sz = 100; |
419 | |
420 | p = xcalloc(sz + 3, sizeof(*p), "diff prepare"); |
421 | for (j = 0; (h = readhash(fd, flags));) { |
422 | if (j == sz) { |
423 | sz = sz * 3 / 2; |
424 | p = xreallocarray(p, sz + 3, sizeof(*p)); |
425 | } |
426 | p[++j].value = h; |
427 | } |
428 | len[i] = j; |
429 | file[i] = p; |
430 | } |
431 | |
432 | static void |
433 | prune(void) |
434 | { |
435 | int i, j; |
436 | |
437 | for (pref = 0; pref < len[0] && pref < len[1] && |
438 | file[0][pref + 1].value == file[1][pref + 1].value; |
439 | pref++) |
440 | ; |
441 | for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && |
442 | file[0][len[0] - suff].value == file[1][len[1] - suff].value; |
443 | suff++) |
444 | ; |
445 | for (j = 0; j < 2; j++) { |
446 | sfile[j] = file[j] + pref; |
447 | slen[j] = len[j] - pref - suff; |
448 | for (i = 0; i <= slen[j]; i++) |
449 | sfile[j][i].serial = i; |
450 | } |
451 | } |
452 | |
453 | static void |
454 | equiv(struct line *a, int n, struct line *b, int m, int *c) |
455 | { |
456 | int i, j; |
457 | |
458 | i = j = 1; |
459 | while (i <= n && j <= m) { |
460 | if (a[i].value < b[j].value) |
461 | a[i++].value = 0; |
462 | else if (a[i].value == b[j].value) |
463 | a[i++].value = j; |
464 | else |
465 | j++; |
466 | } |
467 | while (i <= n) |
468 | a[i++].value = 0; |
469 | b[m + 1].value = 0; |
470 | j = 0; |
471 | while (++j <= m) { |
472 | c[j] = -b[j].serial; |
473 | while (b[j + 1].value == b[j].value) { |
474 | j++; |
475 | c[j] = b[j].serial; |
476 | } |
477 | } |
478 | c[j] = -1; |
479 | } |
480 | |
481 | /* Code taken from ping.c */ |
482 | static int |
483 | isqrt(int n) |
484 | { |
485 | int y, x = 1; |
486 | |
487 | if (n == 0) |
488 | return (0); |
489 | |
490 | do { /* newton was a stinker */ |
491 | y = x; |
492 | x = n / x; |
493 | x += y; |
494 | x /= 2; |
495 | } while ((x - y) > 1 || (x - y) < -1); |
496 | |
497 | return (x); |
498 | } |
499 | |
500 | static int |
501 | stone(int *a, int n, int *b, int *c, int flags) |
502 | { |
503 | int i, k, y, j, l; |
504 | int oldc, tc, oldl, sq; |
505 | u_int numtries, bound; |
506 | |
507 | if (flags & D_MINIMAL) |
508 | bound = UINT_MAX; |
509 | else { |
510 | sq = isqrt(n); |
511 | bound = MAXIMUM(256, sq); |
512 | } |
513 | |
514 | k = 0; |
515 | c[0] = newcand(0, 0, 0); |
516 | for (i = 1; i <= n; i++) { |
517 | j = a[i]; |
518 | if (j == 0) |
519 | continue; |
520 | y = -b[j]; |
521 | oldl = 0; |
522 | oldc = c[0]; |
523 | numtries = 0; |
524 | do { |
525 | if (y <= clist[oldc].y) |
526 | continue; |
527 | l = search(c, k, y); |
528 | if (l != oldl + 1) |
529 | oldc = c[l - 1]; |
530 | if (l <= k) { |
531 | if (clist[c[l]].y <= y) |
532 | continue; |
533 | tc = c[l]; |
534 | c[l] = newcand(i, y, oldc); |
535 | oldc = tc; |
536 | oldl = l; |
537 | numtries++; |
538 | } else { |
539 | c[l] = newcand(i, y, oldc); |
540 | k++; |
541 | break; |
542 | } |
543 | } while ((y = b[++j]) > 0 && numtries < bound); |
544 | } |
545 | return (k); |
546 | } |
547 | |
548 | static int |
549 | newcand(int x, int y, int pred) |
550 | { |
551 | struct cand *q; |
552 | |
553 | if (clen == clistlen) { |
554 | clistlen = clistlen * 11 / 10; |
555 | clist = xreallocarray(clist, clistlen, sizeof(*clist)); |
556 | } |
557 | q = clist + clen; |
558 | q->x = x; |
559 | q->y = y; |
560 | q->pred = pred; |
561 | return (clen++); |
562 | } |
563 | |
564 | static int |
565 | search(int *c, int k, int y) |
566 | { |
567 | int i, j, l, t; |
568 | |
569 | if (clist[c[k]].y < y) /* quick look for typical case */ |
570 | return (k + 1); |
571 | i = 0; |
572 | j = k + 1; |
573 | for (;;) { |
574 | l = (i + j) / 2; |
575 | if (l <= i) |
576 | break; |
577 | t = clist[c[l]].y; |
578 | if (t > y) |
579 | j = l; |
580 | else if (t < y) |
581 | i = l; |
582 | else |
583 | return (l); |
584 | } |
585 | return (l + 1); |
586 | } |
587 | |
588 | static void |
589 | unravel(int p) |
590 | { |
591 | struct cand *q; |
592 | int i; |
593 | |
594 | for (i = 0; i <= len[0]; i++) |
595 | J[i] = i <= pref ? i : |
596 | i > len[0] - suff ? i + len[1] - len[0] : 0; |
597 | for (q = clist + p; q->y != 0; q = clist + q->pred) |
598 | J[q->x + pref] = q->y + pref; |
599 | } |
600 | |
601 | /* |
602 | * Check does double duty: |
603 | * 1. ferret out any fortuitous correspondences due |
604 | * to confounding by hashing (which result in "jackpot") |
605 | * 2. collect random access indexes to the two files |
606 | */ |
607 | static void |
608 | check(FILE *f1, FILE *f2, int flags) |
609 | { |
610 | int i, j, jackpot, c, d; |
611 | long ctold, ctnew; |
612 | |
613 | rewind(f1); |
614 | rewind(f2); |
615 | j = 1; |
616 | ixold[0] = ixnew[0] = 0; |
617 | jackpot = 0; |
618 | ctold = ctnew = 0; |
619 | for (i = 1; i <= len[0]; i++) { |
620 | if (J[i] == 0) { |
621 | ixold[i] = ctold += skipline(f1); |
622 | continue; |
623 | } |
624 | while (j < J[i]) { |
625 | ixnew[j] = ctnew += skipline(f2); |
626 | j++; |
627 | } |
628 | if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) { |
629 | for (;;) { |
630 | c = getc(f1); |
631 | d = getc(f2); |
632 | /* |
633 | * GNU diff ignores a missing newline |
634 | * in one file for -b or -w. |
635 | */ |
636 | if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) { |
637 | if (c == EOF && d == '\r') { |
638 | ctnew++; |
639 | break; |
640 | } else if (c == '\r' && d == EOF) { |
641 | ctold++; |
642 | break; |
643 | } |
644 | } |
645 | ctold++; |
646 | ctnew++; |
647 | if ((flags & D_FOLDBLANKS) && isspace(c) && |
648 | isspace(d)) { |
649 | do { |
650 | if (c == '\r') |
651 | break; |
652 | ctold++; |
653 | } while (isspace(c = getc(f1))); |
654 | do { |
655 | if (d == '\r') |
656 | break; |
657 | ctnew++; |
658 | } while (isspace(d = getc(f2))); |
659 | } else if ((flags & D_IGNOREBLANKS)) { |
660 | while (isspace(c) && c != '\r') { |
661 | c = getc(f1); |
662 | ctold++; |
663 | } |
664 | while (isspace(d) && d != '\r') { |
665 | d = getc(f2); |
666 | ctnew++; |
667 | } |
668 | } |
669 | if (chrtran[c] != chrtran[d]) { |
670 | jackpot++; |
671 | J[i] = 0; |
672 | if (c != '\r' && c != EOF) |
673 | ctold += skipline(f1); |
674 | if (d != '\r' && c != EOF) |
675 | ctnew += skipline(f2); |
676 | break; |
677 | } |
678 | if (c == '\r' || c == EOF) |
679 | break; |
680 | } |
681 | } else { |
682 | for (;;) { |
683 | ctold++; |
684 | ctnew++; |
685 | if ((c = getc(f1)) != (d = getc(f2))) { |
686 | /* jackpot++; */ |
687 | J[i] = 0; |
688 | if (c != '\r' && c != EOF) |
689 | ctold += skipline(f1); |
690 | if (d != '\r' && c != EOF) |
691 | ctnew += skipline(f2); |
692 | break; |
693 | } |
694 | if (c == '\r' || c == EOF) |
695 | break; |
696 | } |
697 | } |
698 | ixold[i] = ctold; |
699 | ixnew[j] = ctnew; |
700 | j++; |
701 | } |
702 | for (; j <= len[1]; j++) |
703 | ixnew[j] = ctnew += skipline(f2); |
704 | /* |
705 | * if (jackpot) |
706 | * fprintf(stderr, "jackpot\n"); |
707 | */ |
708 | } |
709 | |
710 | /* shellsort CACM #201 */ |
711 | static void |
712 | sort(struct line *a, int n) |
713 | { |
714 | struct line *ai, *aim, w; |
715 | int j, m = 0, k; |
716 | |
717 | if (n == 0) |
718 | return; |
719 | for (j = 1; j <= n; j *= 2) |
720 | m = 2 * j - 1; |
721 | for (m /= 2; m != 0; m /= 2) { |
722 | k = n - m; |
723 | for (j = 1; j <= k; j++) { |
724 | for (ai = &a[j]; ai > a; ai -= m) { |
725 | aim = &ai[m]; |
726 | if (aim < ai) |
727 | break; /* wraparound */ |
728 | if (aim->value > ai[0].value || |
729 | (aim->value == ai[0].value && |
730 | aim->serial > ai[0].serial)) |
731 | break; |
732 | w.value = ai[0].value; |
733 | ai[0].value = aim->value; |
734 | aim->value = w.value; |
735 | w.serial = ai[0].serial; |
736 | ai[0].serial = aim->serial; |
737 | aim->serial = w.serial; |
738 | } |
739 | } |
740 | } |
741 | } |
742 | |
743 | static void |
744 | unsort(struct line *f, int l, int *b) |
745 | { |
746 | int *a, i; |
747 | |
748 | a = xcalloc(l + 1, sizeof(*a), "diff unsort"); |
749 | for (i = 1; i <= l; i++) |
750 | a[f[i].serial] = f[i].value; |
751 | for (i = 1; i <= l; i++) |
752 | b[i] = a[i]; |
753 | xfree(&a); |
754 | } |
755 | |
756 | static int |
757 | skipline(FILE *f) |
758 | { |
759 | int i, c; |
760 | |
761 | for (i = 1; (c = getc(f)) != '\r' && c != EOF; i++) |
762 | continue; |
763 | return (i); |
764 | } |
765 | |
766 | static void |
767 | output(char *file1, FILE *f1, char *file2, FILE *f2, int flags) |
768 | { |
769 | int m, i0, i1, j0, j1; |
770 | |
771 | rewind(f1); |
772 | rewind(f2); |
773 | m = len[0]; |
774 | J[0] = 0; |
775 | J[m + 1] = len[1] + 1; |
776 | if (diff_format != D_EDIT) { |
777 | for (i0 = 1; i0 <= m; i0 = i1 + 1) { |
778 | while (i0 <= m && J[i0] == J[i0 - 1] + 1) |
779 | i0++; |
780 | j0 = J[i0 - 1] + 1; |
781 | i1 = i0 - 1; |
782 | while (i1 < m && J[i1 + 1] == 0) |
783 | i1++; |
784 | j1 = J[i1 + 1] - 1; |
785 | J[i1] = j1; |
786 | change(file1, f1, file2, f2, i0, i1, j0, j1, &flags); |
787 | } |
788 | } else { |
789 | for (i0 = m; i0 >= 1; i0 = i1 - 1) { |
790 | while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) |
791 | i0--; |
792 | j0 = J[i0 + 1] - 1; |
793 | i1 = i0 + 1; |
794 | while (i1 > 1 && J[i1 - 1] == 0) |
795 | i1--; |
796 | j1 = J[i1 - 1] + 1; |
797 | J[i1] = j1; |
798 | change(file1, f1, file2, f2, i1, i0, j1, j0, &flags); |
799 | } |
800 | } |
801 | if (m == 0) |
802 | change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags); |
803 | if (diff_format == D_IFDEF) { |
804 | for (;;) { |
805 | #define c i0 |
806 | if ((c = getc(f1)) == EOF) |
807 | return; |
808 | diff_output("%c", c); |
809 | } |
810 | #undef c |
811 | } |
812 | if (anychange != 0) { |
813 | if (diff_format == D_CONTEXT) |
814 | dump_context_vec(f1, f2, flags); |
815 | else if (diff_format == D_UNIFIED) |
816 | dump_unified_vec(f1, f2, flags); |
817 | } |
818 | } |
819 | |
820 | static void |
821 | range(int a, int b, char *separator) |
822 | { |
823 | diff_output("%d", a > b ? b : a); |
824 | if (a < b) |
825 | diff_output("%s%d", separator, b); |
826 | } |
827 | |
828 | static void |
829 | uni_range(int a, int b) |
830 | { |
831 | if (a < b) |
832 | diff_output("%d,%d", a, b - a + 1); |
833 | else if (a == b) |
834 | diff_output("%d", b); |
835 | else |
836 | diff_output("%d,0", b); |
837 | } |
838 | |
839 | static char * |
840 | preadline(int fd, size_t rlen, off_t off) |
841 | { |
842 | char *line; |
843 | ssize_t nr; |
844 | off_t pos; |
845 | |
846 | line = xmalloc(rlen + 1, "diff preadline"); |
847 | pos = lseek(fd, 0, SEEK_CUR); |
848 | lseek(fd, off, SEEK_SET); |
849 | if ((nr = read(fd, line, rlen)) == -1) |
850 | err(2, "preadline"); |
851 | lseek(fd, pos, SEEK_SET); |
852 | if (nr > 0 && line[nr-1] == '\r') |
853 | nr--; |
854 | line[nr] = '\0'; |
855 | return (line); |
856 | } |
857 | |
858 | static int |
859 | ignoreline(char *line) |
860 | { |
861 | #if 0 |
862 | int ret; |
863 | |
864 | ret = regexec(&ignore_re, line, 0, NULL, 0); |
865 | xfree(&line); |
866 | return (ret == 0); /* if it matched, it should be ignored. */ |
867 | #endif |
868 | return 0; |
869 | } |
870 | |
871 | /* |
872 | * Indicate that there is a difference between lines a and b of the from file |
873 | * to get to lines c to d of the to file. If a is greater then b then there |
874 | * are no lines in the from file involved and this means that there were |
875 | * lines appended (beginning at b). If c is greater than d then there are |
876 | * lines missing from the to file. |
877 | */ |
878 | static void |
879 | change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d, |
880 | int *pflags) |
881 | { |
882 | static size_t max_context = 64; |
883 | int i; |
884 | |
885 | restart: |
886 | if (diff_format != D_IFDEF && a > b && c > d) |
887 | return; |
888 | if (ignore_pats != NULL) { |
889 | char *line; |
890 | /* |
891 | * All lines in the change, insert, or delete must |
892 | * match an ignore pattern for the change to be |
893 | * ignored. |
894 | */ |
895 | if (a <= b) { /* Changes and deletes. */ |
896 | for (i = a; i <= b; i++) { |
897 | line = preadline(fileno(f1), |
898 | ixold[i] - ixold[i - 1], ixold[i - 1]); |
899 | if (!ignoreline(line)) |
900 | goto proceed; |
901 | } |
902 | } |
903 | if (a > b || c <= d) { /* Changes and inserts. */ |
904 | for (i = c; i <= d; i++) { |
905 | line = preadline(fileno(f2), |
906 | ixnew[i] - ixnew[i - 1], ixnew[i - 1]); |
907 | if (!ignoreline(line)) |
908 | goto proceed; |
909 | } |
910 | } |
911 | return; |
912 | } |
913 | proceed: |
914 | if (*pflags & D_HEADER) { |
915 | diff_output("%s %s\r", file1, file2); |
916 | *pflags &= ~D_HEADER; |
917 | } |
918 | if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) { |
919 | /* |
920 | * Allocate change records as needed. |
921 | */ |
922 | if (context_vec_ptr == context_vec_end - 1) { |
923 | ptrdiff_t offset = context_vec_ptr - context_vec_start; |
924 | max_context <<= 1; |
925 | context_vec_start = xreallocarray(context_vec_start, |
926 | max_context, sizeof(*context_vec_start)); |
927 | context_vec_end = context_vec_start + max_context; |
928 | context_vec_ptr = context_vec_start + offset; |
929 | } |
930 | if (anychange == 0) { |
931 | /* |
932 | * Print the context/unidiff header first time through. |
933 | */ |
934 | print_header(file1, file2); |
935 | anychange = 1; |
936 | } else if (a > context_vec_ptr->b + (2 * diff_context) + 1 && |
937 | c > context_vec_ptr->d + (2 * diff_context) + 1) { |
938 | /* |
939 | * If this change is more than 'diff_context' lines from the |
940 | * previous change, dump the record and reset it. |
941 | */ |
942 | if (diff_format == D_CONTEXT) |
943 | dump_context_vec(f1, f2, *pflags); |
944 | else |
945 | dump_unified_vec(f1, f2, *pflags); |
946 | } |
947 | context_vec_ptr++; |
948 | context_vec_ptr->a = a; |
949 | context_vec_ptr->b = b; |
950 | context_vec_ptr->c = c; |
951 | context_vec_ptr->d = d; |
952 | return; |
953 | } |
954 | if (anychange == 0) |
955 | anychange = 1; |
956 | switch (diff_format) { |
957 | case D_BRIEF: |
958 | return; |
959 | case D_NORMAL: |
960 | case D_EDIT: |
961 | range(a, b, ","); |
962 | diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); |
963 | if (diff_format == D_NORMAL) |
964 | range(c, d, ","); |
965 | diff_output("\r"); |
966 | break; |
967 | case D_REVERSE: |
968 | diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); |
969 | range(a, b, " "); |
970 | diff_output("\r"); |
971 | break; |
972 | case D_NREVERSE: |
973 | if (a > b) |
974 | diff_output("a%d %d\r", b, d - c + 1); |
975 | else { |
976 | diff_output("d%d %d\r", a, b - a + 1); |
977 | if (!(c > d)) |
978 | /* add changed lines */ |
979 | diff_output("a%d %d\r", b, d - c + 1); |
980 | } |
981 | break; |
982 | } |
983 | if (diff_format == D_NORMAL || diff_format == D_IFDEF) { |
984 | fetch(ixold, a, b, f1, '<', 1, *pflags); |
985 | if (a <= b && c <= d && diff_format == D_NORMAL) |
986 | diff_output("---\r"); |
987 | } |
988 | i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags); |
989 | if (i != 0 && diff_format == D_EDIT) { |
990 | /* |
991 | * A non-zero return value for D_EDIT indicates that the |
992 | * last line printed was a bare dot (".") that has been |
993 | * escaped as ".." to prevent ed(1) from misinterpreting |
994 | * it. We have to add a substitute command to change this |
995 | * back and restart where we left off. |
996 | */ |
997 | diff_output(".\r"); |
998 | diff_output("%ds/.//\r", a + i - 1); |
999 | b = a + i - 1; |
1000 | a = b + 1; |
1001 | c += i; |
1002 | goto restart; |
1003 | } |
1004 | if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d) |
1005 | diff_output(".\r"); |
1006 | if (inifdef) { |
1007 | diff_output("#endif /* %s */\r", ifdefname); |
1008 | inifdef = 0; |
1009 | } |
1010 | } |
1011 | |
1012 | static int |
1013 | fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags) |
1014 | { |
1015 | int i, j, c, lastc, col, nc; |
1016 | |
1017 | /* |
1018 | * When doing #ifdef's, copy down to current line |
1019 | * if this is the first file, so that stuff makes it to output. |
1020 | */ |
1021 | if (diff_format == D_IFDEF && oldfile) { |
1022 | long curpos = ftell(lb); |
1023 | /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ |
1024 | nc = f[a > b ? b : a - 1] - curpos; |
1025 | for (i = 0; i < nc; i++) |
1026 | diff_output("%c", getc(lb)); |
1027 | } |
1028 | if (a > b) |
1029 | return (0); |
1030 | if (diff_format == D_IFDEF) { |
1031 | if (inifdef) { |
1032 | diff_output("#else /* %s%s */\r", |
1033 | oldfile == 1 ? "!" : "", ifdefname); |
1034 | } else { |
1035 | if (oldfile) |
1036 | diff_output("#ifndef %s\r", ifdefname); |
1037 | else |
1038 | diff_output("#ifdef %s\r", ifdefname); |
1039 | } |
1040 | inifdef = 1 + oldfile; |
1041 | } |
1042 | for (i = a; i <= b; i++) { |
1043 | fseek(lb, f[i - 1], SEEK_SET); |
1044 | nc = f[i] - f[i - 1]; |
1045 | if (diff_format != D_IFDEF && ch != '\0') { |
1046 | diff_output("%c", ch); |
1047 | #if 0 |
1048 | if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT |
1049 | || diff_format == D_UNIFIED)) |
1050 | diff_output("\t"); |
1051 | else |
1052 | #endif |
1053 | if (diff_format != D_UNIFIED) |
1054 | diff_output(" "); |
1055 | } |
1056 | col = 0; |
1057 | for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) { |
1058 | if ((c = getc(lb)) == EOF) { |
1059 | if (diff_format == D_EDIT || diff_format == D_REVERSE || |
1060 | diff_format == D_NREVERSE) |
1061 | warnx("No newline at end of file"); |
1062 | else |
1063 | #if 0 |
1064 | diff_output("\r\\ No newline at end of " |
1065 | "file\r"); |
1066 | #else |
1067 | diff_output("\r"); |
1068 | #endif |
1069 | return (0); |
1070 | } |
1071 | if (c == '\n') |
1072 | c = '\n'; |
1073 | if (c == '\t' && (flags & D_EXPANDTABS)) { |
1074 | do { |
1075 | diff_output(" "); |
1076 | } while (++col & 7); |
1077 | } else { |
1078 | if (diff_format == D_EDIT && j == 1 && c == '\r' |
1079 | && lastc == '.') { |
1080 | /* |
1081 | * Don't print a bare "." line |
1082 | * since that will confuse ed(1). |
1083 | * Print ".." instead and return, |
1084 | * giving the caller an offset |
1085 | * from which to restart. |
1086 | */ |
1087 | diff_output(".\r"); |
1088 | return (i - a + 1); |
1089 | } |
1090 | diff_output("%c", c); |
1091 | col++; |
1092 | } |
1093 | } |
1094 | } |
1095 | return (0); |
1096 | } |
1097 | |
1098 | /* |
1099 | * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. |
1100 | */ |
1101 | static int |
1102 | readhash(FILE *f, int flags) |
1103 | { |
1104 | int i, t, space; |
1105 | int sum; |
1106 | |
1107 | sum = 1; |
1108 | space = 0; |
1109 | if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) { |
1110 | if (flags & D_IGNORECASE) |
1111 | for (i = 0; (t = getc(f)) != '\r'; i++) { |
1112 | if (t == EOF) { |
1113 | if (i == 0) |
1114 | return (0); |
1115 | break; |
1116 | } |
1117 | sum = sum * 127 + chrtran[t]; |
1118 | } |
1119 | else |
1120 | for (i = 0; (t = getc(f)) != '\r'; i++) { |
1121 | if (t == EOF) { |
1122 | if (i == 0) |
1123 | return (0); |
1124 | break; |
1125 | } |
1126 | sum = sum * 127 + t; |
1127 | } |
1128 | } else { |
1129 | for (i = 0;;) { |
1130 | switch (t = getc(f)) { |
1131 | case '\t': |
1132 | case '\n': |
1133 | case '\v': |
1134 | case '\f': |
1135 | case ' ': |
1136 | space++; |
1137 | continue; |
1138 | default: |
1139 | if (space && (flags & D_IGNOREBLANKS) == 0) { |
1140 | i++; |
1141 | space = 0; |
1142 | } |
1143 | sum = sum * 127 + chrtran[t]; |
1144 | i++; |
1145 | continue; |
1146 | case EOF: |
1147 | if (i == 0) |
1148 | return (0); |
1149 | /* FALLTHROUGH */ |
1150 | case '\r': |
1151 | break; |
1152 | } |
1153 | break; |
1154 | } |
1155 | } |
1156 | /* |
1157 | * There is a remote possibility that we end up with a zero sum. |
1158 | * Zero is used as an EOF marker, so return 1 instead. |
1159 | */ |
1160 | return (sum == 0 ? 1 : sum); |
1161 | } |
1162 | |
1163 | static int |
1164 | asciifile(FILE *f) |
1165 | { |
1166 | unsigned char buf[BUFSIZ]; |
1167 | size_t cnt; |
1168 | |
1169 | if (f == NULL) |
1170 | return (1); |
1171 | |
1172 | rewind(f); |
1173 | cnt = fread(buf, 1, sizeof(buf), f); |
1174 | return (memchr(buf, '\0', cnt) == NULL); |
1175 | } |
1176 | |
1177 | #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0) |
1178 | |
1179 | static char * |
1180 | match_function(const long *f, int pos, FILE *fp) |
1181 | { |
1182 | unsigned char buf[FUNCTION_CONTEXT_SIZE]; |
1183 | size_t nc; |
1184 | int last = lastline; |
1185 | char *state = NULL; |
1186 | |
1187 | lastline = pos; |
1188 | while (pos > last) { |
1189 | fseek(fp, f[pos - 1], SEEK_SET); |
1190 | nc = f[pos] - f[pos - 1]; |
1191 | if (nc >= sizeof(buf)) |
1192 | nc = sizeof(buf) - 1; |
1193 | nc = fread(buf, 1, nc, fp); |
1194 | if (nc > 0) { |
1195 | buf[nc] = '\0'; |
1196 | buf[strcspn((const char *)buf, "\r")] = '\0'; |
1197 | if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') { |
1198 | if (begins_with((const char *)buf, "private:")) { |
1199 | if (!state) |
1200 | state = " (private)"; |
1201 | } else if (begins_with((const char *)buf, "protected:")) { |
1202 | if (!state) |
1203 | state = " (protected)"; |
1204 | } else if (begins_with((const char *)buf, "public:")) { |
1205 | if (!state) |
1206 | state = " (public)"; |
1207 | } else { |
1208 | strlcpy(lastbuf, (const char *)buf, sizeof lastbuf); |
1209 | if (state) |
1210 | strlcat(lastbuf, (const char *)state, sizeof lastbuf); |
1211 | lastmatchline = pos; |
1212 | return lastbuf; |
1213 | } |
1214 | } |
1215 | } |
1216 | pos--; |
1217 | } |
1218 | return lastmatchline > 0 ? lastbuf : NULL; |
1219 | } |
1220 | |
1221 | /* dump accumulated "context" diff changes */ |
1222 | static void |
1223 | dump_context_vec(FILE *f1, FILE *f2, int flags) |
1224 | { |
1225 | struct context_vec *cvp = context_vec_start; |
1226 | int lowa, upb, lowc, upd, do_output; |
1227 | int a, b, c, d; |
1228 | char ch, *f; |
1229 | |
1230 | if (context_vec_start > context_vec_ptr) |
1231 | return; |
1232 | |
1233 | b = d = 0; /* gcc */ |
1234 | lowa = MAXIMUM(1, cvp->a - diff_context); |
1235 | upb = MINIMUM(len[0], context_vec_ptr->b + diff_context); |
1236 | lowc = MAXIMUM(1, cvp->c - diff_context); |
1237 | upd = MINIMUM(len[1], context_vec_ptr->d + diff_context); |
1238 | |
1239 | diff_output("***************"); |
1240 | if ((flags & D_PROTOTYPE)) { |
1241 | f = match_function(ixold, lowa-1, f1); |
1242 | if (f != NULL) |
1243 | diff_output(" %s", f); |
1244 | } |
1245 | diff_output("\r*** "); |
1246 | range(lowa, upb, ","); |
1247 | diff_output(" ****\r"); |
1248 | |
1249 | /* |
1250 | * Output changes to the "old" file. The first loop suppresses |
1251 | * output if there were no changes to the "old" file (we'll see |
1252 | * the "old" lines as context in the "new" list). |
1253 | */ |
1254 | do_output = 0; |
1255 | for (; cvp <= context_vec_ptr; cvp++) |
1256 | if (cvp->a <= cvp->b) { |
1257 | cvp = context_vec_start; |
1258 | do_output++; |
1259 | break; |
1260 | } |
1261 | if (do_output) { |
1262 | while (cvp <= context_vec_ptr) { |
1263 | a = cvp->a; |
1264 | b = cvp->b; |
1265 | c = cvp->c; |
1266 | d = cvp->d; |
1267 | |
1268 | if (a <= b && c <= d) |
1269 | ch = 'c'; |
1270 | else |
1271 | ch = (a <= b) ? 'd' : 'a'; |
1272 | |
1273 | if (ch == 'a') |
1274 | fetch(ixold, lowa, b, f1, ' ', 0, flags); |
1275 | else { |
1276 | fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); |
1277 | fetch(ixold, a, b, f1, |
1278 | ch == 'c' ? '!' : '-', 0, flags); |
1279 | } |
1280 | lowa = b + 1; |
1281 | cvp++; |
1282 | } |
1283 | fetch(ixold, b + 1, upb, f1, ' ', 0, flags); |
1284 | } |
1285 | /* output changes to the "new" file */ |
1286 | diff_output("--- "); |
1287 | range(lowc, upd, ","); |
1288 | diff_output(" ----\r"); |
1289 | |
1290 | do_output = 0; |
1291 | for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) |
1292 | if (cvp->c <= cvp->d) { |
1293 | cvp = context_vec_start; |
1294 | do_output++; |
1295 | break; |
1296 | } |
1297 | if (do_output) { |
1298 | while (cvp <= context_vec_ptr) { |
1299 | a = cvp->a; |
1300 | b = cvp->b; |
1301 | c = cvp->c; |
1302 | d = cvp->d; |
1303 | |
1304 | if (a <= b && c <= d) |
1305 | ch = 'c'; |
1306 | else |
1307 | ch = (a <= b) ? 'd' : 'a'; |
1308 | |
1309 | if (ch == 'd') |
1310 | fetch(ixnew, lowc, d, f2, ' ', 0, flags); |
1311 | else { |
1312 | fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); |
1313 | fetch(ixnew, c, d, f2, |
1314 | ch == 'c' ? '!' : '+', 0, flags); |
1315 | } |
1316 | lowc = d + 1; |
1317 | cvp++; |
1318 | } |
1319 | fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); |
1320 | } |
1321 | context_vec_ptr = context_vec_start - 1; |
1322 | } |
1323 | |
1324 | /* dump accumulated "unified" diff changes */ |
1325 | static void |
1326 | dump_unified_vec(FILE *f1, FILE *f2, int flags) |
1327 | { |
1328 | struct context_vec *cvp = context_vec_start; |
1329 | int lowa, upb, lowc, upd; |
1330 | int a, b, c, d; |
1331 | char ch, *f; |
1332 | |
1333 | if (context_vec_start > context_vec_ptr) |
1334 | return; |
1335 | |
1336 | b = d = 0; /* gcc */ |
1337 | lowa = MAXIMUM(1, cvp->a - diff_context); |
1338 | upb = MINIMUM(len[0], context_vec_ptr->b + diff_context); |
1339 | lowc = MAXIMUM(1, cvp->c - diff_context); |
1340 | upd = MINIMUM(len[1], context_vec_ptr->d + diff_context); |
1341 | |
1342 | diff_output("@@ -"); |
1343 | uni_range(lowa, upb); |
1344 | diff_output(" +"); |
1345 | uni_range(lowc, upd); |
1346 | diff_output(" @@"); |
1347 | if ((flags & D_PROTOTYPE)) { |
1348 | f = match_function(ixold, lowa-1, f1); |
1349 | if (f != NULL) |
1350 | diff_output(" %s", f); |
1351 | } |
1352 | diff_output("\r"); |
1353 | |
1354 | /* |
1355 | * Output changes in "unified" diff format--the old and new lines |
1356 | * are printed together. |
1357 | */ |
1358 | for (; cvp <= context_vec_ptr; cvp++) { |
1359 | a = cvp->a; |
1360 | b = cvp->b; |
1361 | c = cvp->c; |
1362 | d = cvp->d; |
1363 | |
1364 | /* |
1365 | * c: both new and old changes |
1366 | * d: only changes in the old file |
1367 | * a: only changes in the new file |
1368 | */ |
1369 | if (a <= b && c <= d) |
1370 | ch = 'c'; |
1371 | else |
1372 | ch = (a <= b) ? 'd' : 'a'; |
1373 | |
1374 | switch (ch) { |
1375 | case 'c': |
1376 | fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); |
1377 | fetch(ixold, a, b, f1, '-', 0, flags); |
1378 | fetch(ixnew, c, d, f2, '+', 0, flags); |
1379 | break; |
1380 | case 'd': |
1381 | fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); |
1382 | fetch(ixold, a, b, f1, '-', 0, flags); |
1383 | break; |
1384 | case 'a': |
1385 | fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); |
1386 | fetch(ixnew, c, d, f2, '+', 0, flags); |
1387 | break; |
1388 | } |
1389 | lowa = b + 1; |
1390 | lowc = d + 1; |
1391 | } |
1392 | fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); |
1393 | |
1394 | context_vec_ptr = context_vec_start - 1; |
1395 | } |
1396 | |
1397 | static void |
1398 | print_header(const char *file1, const char *file2) |
1399 | { |
1400 | if (label[0] != NULL) |
1401 | diff_output("%s %s\r", diff_format == D_CONTEXT ? "***" : "---", |
1402 | label[0]); |
1403 | else |
1404 | diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---", |
1405 | file1, ctime(&stb1.st_mtime)); |
1406 | if (label[1] != NULL) |
1407 | diff_output("%s %s\r", diff_format == D_CONTEXT ? "---" : "+++", |
1408 | label[1]); |
1409 | else |
1410 | diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++", |
1411 | file2, ctime(&stb2.st_mtime)); |
1412 | } |