]>
Commit | Line | Data |
---|---|---|
cb77f0d6 | 1 | #!/usr/bin/env perl |
b2441318 | 2 | # SPDX-License-Identifier: GPL-2.0 |
6e019b00 PA |
3 | # |
4 | # Clean a patch file -- or directory of patch files -- of stealth whitespace. | |
5 | # WARNING: this can be a highly destructive operation. Use with caution. | |
6 | # | |
7 | ||
cb77f0d6 | 8 | use warnings; |
6e019b00 PA |
9 | use bytes; |
10 | use File::Basename; | |
11 | ||
cb3ed5b7 PA |
12 | # Default options |
13 | $max_width = 79; | |
14 | ||
6e019b00 PA |
15 | # Clean up space-tab sequences, either by removing spaces or |
16 | # replacing them with tabs. | |
17 | sub clean_space_tabs($) | |
18 | { | |
19 | no bytes; # Tab alignment depends on characters | |
20 | ||
21 | my($li) = @_; | |
22 | my($lo) = ''; | |
23 | my $pos = 0; | |
24 | my $nsp = 0; | |
25 | my($i, $c); | |
26 | ||
27 | for ($i = 0; $i < length($li); $i++) { | |
28 | $c = substr($li, $i, 1); | |
29 | if ($c eq "\t") { | |
30 | my $npos = ($pos+$nsp+8) & ~7; | |
31 | my $ntab = ($npos >> 3) - ($pos >> 3); | |
32 | $lo .= "\t" x $ntab; | |
33 | $pos = $npos; | |
34 | $nsp = 0; | |
35 | } elsif ($c eq "\n" || $c eq "\r") { | |
36 | $lo .= " " x $nsp; | |
37 | $pos += $nsp; | |
38 | $nsp = 0; | |
39 | $lo .= $c; | |
40 | $pos = 0; | |
41 | } elsif ($c eq " ") { | |
42 | $nsp++; | |
43 | } else { | |
44 | $lo .= " " x $nsp; | |
45 | $pos += $nsp; | |
46 | $nsp = 0; | |
47 | $lo .= $c; | |
48 | $pos++; | |
49 | } | |
50 | } | |
51 | $lo .= " " x $nsp; | |
52 | return $lo; | |
53 | } | |
54 | ||
cb3ed5b7 PA |
55 | # Compute the visual width of a string |
56 | sub strwidth($) { | |
57 | no bytes; # Tab alignment depends on characters | |
58 | ||
59 | my($li) = @_; | |
60 | my($c, $i); | |
61 | my $pos = 0; | |
62 | my $mlen = 0; | |
63 | ||
64 | for ($i = 0; $i < length($li); $i++) { | |
65 | $c = substr($li,$i,1); | |
66 | if ($c eq "\t") { | |
67 | $pos = ($pos+8) & ~7; | |
68 | } elsif ($c eq "\n") { | |
69 | $mlen = $pos if ($pos > $mlen); | |
70 | $pos = 0; | |
71 | } else { | |
72 | $pos++; | |
73 | } | |
74 | } | |
75 | ||
76 | $mlen = $pos if ($pos > $mlen); | |
77 | return $mlen; | |
78 | } | |
79 | ||
6e019b00 PA |
80 | $name = basename($0); |
81 | ||
cb3ed5b7 PA |
82 | @files = (); |
83 | ||
84 | while (defined($a = shift(@ARGV))) { | |
85 | if ($a =~ /^-/) { | |
86 | if ($a eq '-width' || $a eq '-w') { | |
87 | $max_width = shift(@ARGV)+0; | |
88 | } else { | |
89 | print STDERR "Usage: $name [-width #] files...\n"; | |
90 | exit 1; | |
91 | } | |
92 | } else { | |
93 | push(@files, $a); | |
94 | } | |
95 | } | |
96 | ||
97 | foreach $f ( @files ) { | |
6e019b00 PA |
98 | print STDERR "$name: $f\n"; |
99 | ||
100 | if (! -f $f) { | |
101 | print STDERR "$f: not a file\n"; | |
102 | next; | |
103 | } | |
104 | ||
105 | if (!open(FILE, '+<', $f)) { | |
106 | print STDERR "$name: Cannot open file: $f: $!\n"; | |
107 | next; | |
108 | } | |
109 | ||
110 | binmode FILE; | |
111 | ||
112 | # First, verify that it is not a binary file; consider any file | |
113 | # with a zero byte to be a binary file. Is there any better, or | |
114 | # additional, heuristic that should be applied? | |
115 | $is_binary = 0; | |
116 | ||
117 | while (read(FILE, $data, 65536) > 0) { | |
118 | if ($data =~ /\0/) { | |
119 | $is_binary = 1; | |
120 | last; | |
121 | } | |
122 | } | |
123 | ||
124 | if ($is_binary) { | |
125 | print STDERR "$name: $f: binary file\n"; | |
126 | next; | |
127 | } | |
128 | ||
129 | seek(FILE, 0, 0); | |
130 | ||
131 | $in_bytes = 0; | |
132 | $out_bytes = 0; | |
cb3ed5b7 | 133 | $lineno = 0; |
6e019b00 PA |
134 | |
135 | @lines = (); | |
136 | ||
137 | $in_hunk = 0; | |
138 | $err = 0; | |
139 | ||
140 | while ( defined($line = <FILE>) ) { | |
cb3ed5b7 | 141 | $lineno++; |
6e019b00 PA |
142 | $in_bytes += length($line); |
143 | ||
144 | if (!$in_hunk) { | |
cb3ed5b7 PA |
145 | if ($line =~ |
146 | /^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@/) { | |
6e019b00 PA |
147 | $minus_lines = $2; |
148 | $plus_lines = $4; | |
149 | if ($minus_lines || $plus_lines) { | |
150 | $in_hunk = 1; | |
151 | @hunk_lines = ($line); | |
152 | } | |
153 | } else { | |
154 | push(@lines, $line); | |
155 | $out_bytes += length($line); | |
156 | } | |
157 | } else { | |
158 | # We're in a hunk | |
159 | ||
160 | if ($line =~ /^\+/) { | |
161 | $plus_lines--; | |
162 | ||
163 | $text = substr($line, 1); | |
164 | $text =~ s/[ \t\r]*$//; # Remove trailing spaces | |
165 | $text = clean_space_tabs($text); | |
166 | ||
cb3ed5b7 PA |
167 | $l_width = strwidth($text); |
168 | if ($max_width && $l_width > $max_width) { | |
169 | print STDERR | |
170 | "$f:$lineno: adds line exceeds $max_width ", | |
171 | "characters ($l_width)\n"; | |
172 | } | |
173 | ||
6e019b00 PA |
174 | push(@hunk_lines, '+'.$text); |
175 | } elsif ($line =~ /^\-/) { | |
176 | $minus_lines--; | |
177 | push(@hunk_lines, $line); | |
178 | } elsif ($line =~ /^ /) { | |
179 | $plus_lines--; | |
180 | $minus_lines--; | |
181 | push(@hunk_lines, $line); | |
182 | } else { | |
183 | print STDERR "$name: $f: malformed patch\n"; | |
184 | $err = 1; | |
185 | last; | |
186 | } | |
187 | ||
188 | if ($plus_lines < 0 || $minus_lines < 0) { | |
189 | print STDERR "$name: $f: malformed patch\n"; | |
190 | $err = 1; | |
191 | last; | |
192 | } elsif ($plus_lines == 0 && $minus_lines == 0) { | |
193 | # End of a hunk. Process this hunk. | |
194 | my $i; | |
195 | my $l; | |
196 | my @h = (); | |
197 | my $adj = 0; | |
198 | my $done = 0; | |
199 | ||
200 | for ($i = scalar(@hunk_lines)-1; $i > 0; $i--) { | |
201 | $l = $hunk_lines[$i]; | |
202 | if (!$done && $l eq "+\n") { | |
203 | $adj++; # Skip this line | |
204 | } elsif ($l =~ /^[ +]/) { | |
205 | $done = 1; | |
206 | unshift(@h, $l); | |
207 | } else { | |
208 | unshift(@h, $l); | |
209 | } | |
210 | } | |
211 | ||
212 | $l = $hunk_lines[0]; # Hunk header | |
213 | undef @hunk_lines; # Free memory | |
214 | ||
215 | if ($adj) { | |
216 | die unless | |
217 | ($l =~ /^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@(.*)$/); | |
218 | my $mstart = $1; | |
219 | my $mlin = $2; | |
220 | my $pstart = $3; | |
221 | my $plin = $4; | |
222 | my $tail = $5; # doesn't include the final newline | |
223 | ||
224 | $l = sprintf("@@ -%d,%d +%d,%d @@%s\n", | |
225 | $mstart, $mlin, $pstart, $plin-$adj, | |
226 | $tail); | |
227 | } | |
228 | unshift(@h, $l); | |
229 | ||
230 | # Transfer to the output array | |
231 | foreach $l (@h) { | |
232 | $out_bytes += length($l); | |
233 | push(@lines, $l); | |
234 | } | |
235 | ||
236 | $in_hunk = 0; | |
237 | } | |
238 | } | |
239 | } | |
240 | ||
241 | if ($in_hunk) { | |
242 | print STDERR "$name: $f: malformed patch\n"; | |
243 | $err = 1; | |
244 | } | |
245 | ||
246 | if (!$err) { | |
247 | if ($in_bytes != $out_bytes) { | |
248 | # Only write to the file if changed | |
249 | seek(FILE, 0, 0); | |
250 | print FILE @lines; | |
251 | ||
252 | if ( !defined($where = tell(FILE)) || | |
253 | !truncate(FILE, $where) ) { | |
254 | die "$name: Failed to truncate modified file: $f: $!\n"; | |
255 | } | |
256 | } | |
257 | } | |
258 | ||
259 | close(FILE); | |
260 | } |