]>
Commit | Line | Data |
---|---|---|
cb77f0d6 | 1 | #!/usr/bin/env perl |
12b31560 PA |
2 | # |
3 | # Clean a text file -- or directory of text files -- of stealth whitespace. | |
4 | # WARNING: this can be a highly destructive operation. Use with caution. | |
5 | # | |
6 | ||
cb77f0d6 | 7 | use warnings; |
12b31560 PA |
8 | use bytes; |
9 | use File::Basename; | |
10 | ||
cb3ed5b7 PA |
11 | # Default options |
12 | $max_width = 79; | |
13 | ||
12b31560 PA |
14 | # Clean up space-tab sequences, either by removing spaces or |
15 | # replacing them with tabs. | |
16 | sub clean_space_tabs($) | |
17 | { | |
18 | no bytes; # Tab alignment depends on characters | |
19 | ||
20 | my($li) = @_; | |
21 | my($lo) = ''; | |
22 | my $pos = 0; | |
23 | my $nsp = 0; | |
24 | my($i, $c); | |
25 | ||
26 | for ($i = 0; $i < length($li); $i++) { | |
27 | $c = substr($li, $i, 1); | |
28 | if ($c eq "\t") { | |
29 | my $npos = ($pos+$nsp+8) & ~7; | |
30 | my $ntab = ($npos >> 3) - ($pos >> 3); | |
31 | $lo .= "\t" x $ntab; | |
32 | $pos = $npos; | |
33 | $nsp = 0; | |
34 | } elsif ($c eq "\n" || $c eq "\r") { | |
35 | $lo .= " " x $nsp; | |
36 | $pos += $nsp; | |
37 | $nsp = 0; | |
38 | $lo .= $c; | |
39 | $pos = 0; | |
40 | } elsif ($c eq " ") { | |
41 | $nsp++; | |
42 | } else { | |
43 | $lo .= " " x $nsp; | |
44 | $pos += $nsp; | |
45 | $nsp = 0; | |
46 | $lo .= $c; | |
47 | $pos++; | |
48 | } | |
49 | } | |
50 | $lo .= " " x $nsp; | |
51 | return $lo; | |
52 | } | |
53 | ||
cb3ed5b7 PA |
54 | # Compute the visual width of a string |
55 | sub strwidth($) { | |
56 | no bytes; # Tab alignment depends on characters | |
57 | ||
58 | my($li) = @_; | |
59 | my($c, $i); | |
60 | my $pos = 0; | |
61 | my $mlen = 0; | |
62 | ||
63 | for ($i = 0; $i < length($li); $i++) { | |
64 | $c = substr($li,$i,1); | |
65 | if ($c eq "\t") { | |
66 | $pos = ($pos+8) & ~7; | |
67 | } elsif ($c eq "\n") { | |
68 | $mlen = $pos if ($pos > $mlen); | |
69 | $pos = 0; | |
70 | } else { | |
71 | $pos++; | |
72 | } | |
73 | } | |
74 | ||
75 | $mlen = $pos if ($pos > $mlen); | |
76 | return $mlen; | |
77 | } | |
78 | ||
12b31560 PA |
79 | $name = basename($0); |
80 | ||
cb3ed5b7 PA |
81 | @files = (); |
82 | ||
83 | while (defined($a = shift(@ARGV))) { | |
84 | if ($a =~ /^-/) { | |
85 | if ($a eq '-width' || $a eq '-w') { | |
86 | $max_width = shift(@ARGV)+0; | |
87 | } else { | |
88 | print STDERR "Usage: $name [-width #] files...\n"; | |
89 | exit 1; | |
90 | } | |
91 | } else { | |
92 | push(@files, $a); | |
93 | } | |
94 | } | |
95 | ||
96 | foreach $f ( @files ) { | |
12b31560 PA |
97 | print STDERR "$name: $f\n"; |
98 | ||
99 | if (! -f $f) { | |
100 | print STDERR "$f: not a file\n"; | |
101 | next; | |
102 | } | |
103 | ||
104 | if (!open(FILE, '+<', $f)) { | |
105 | print STDERR "$name: Cannot open file: $f: $!\n"; | |
106 | next; | |
107 | } | |
108 | ||
109 | binmode FILE; | |
110 | ||
111 | # First, verify that it is not a binary file; consider any file | |
112 | # with a zero byte to be a binary file. Is there any better, or | |
113 | # additional, heuristic that should be applied? | |
114 | $is_binary = 0; | |
115 | ||
116 | while (read(FILE, $data, 65536) > 0) { | |
117 | if ($data =~ /\0/) { | |
118 | $is_binary = 1; | |
119 | last; | |
120 | } | |
121 | } | |
122 | ||
123 | if ($is_binary) { | |
124 | print STDERR "$name: $f: binary file\n"; | |
125 | next; | |
126 | } | |
127 | ||
128 | seek(FILE, 0, 0); | |
129 | ||
130 | $in_bytes = 0; | |
131 | $out_bytes = 0; | |
132 | $blank_bytes = 0; | |
133 | ||
134 | @blanks = (); | |
135 | @lines = (); | |
cb3ed5b7 | 136 | $lineno = 0; |
12b31560 PA |
137 | |
138 | while ( defined($line = <FILE>) ) { | |
cb3ed5b7 | 139 | $lineno++; |
12b31560 PA |
140 | $in_bytes += length($line); |
141 | $line =~ s/[ \t\r]*$//; # Remove trailing spaces | |
142 | $line = clean_space_tabs($line); | |
143 | ||
144 | if ( $line eq "\n" ) { | |
145 | push(@blanks, $line); | |
146 | $blank_bytes += length($line); | |
147 | } else { | |
148 | push(@lines, @blanks); | |
149 | $out_bytes += $blank_bytes; | |
150 | push(@lines, $line); | |
151 | $out_bytes += length($line); | |
152 | @blanks = (); | |
153 | $blank_bytes = 0; | |
154 | } | |
cb3ed5b7 PA |
155 | |
156 | $l_width = strwidth($line); | |
157 | if ($max_width && $l_width > $max_width) { | |
158 | print STDERR | |
159 | "$f:$lineno: line exceeds $max_width characters ($l_width)\n"; | |
160 | } | |
12b31560 PA |
161 | } |
162 | ||
163 | # Any blanks at the end of the file are discarded | |
164 | ||
165 | if ($in_bytes != $out_bytes) { | |
166 | # Only write to the file if changed | |
167 | seek(FILE, 0, 0); | |
168 | print FILE @lines; | |
169 | ||
170 | if ( !defined($where = tell(FILE)) || | |
171 | !truncate(FILE, $where) ) { | |
172 | die "$name: Failed to truncate modified file: $f: $!\n"; | |
173 | } | |
174 | } | |
175 | ||
176 | close(FILE); | |
177 | } |