Rev 11793 | Rev 17170 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 11793 | Rev 14815 | ||
---|---|---|---|
Line 2... | Line 2... | ||
2 | 2 | ||
3 | use strict "subs"; |
3 | use strict "subs"; |
4 | use locale; |
4 | use locale; |
5 | use warnings; |
5 | use warnings; |
6 | $/ = undef; |
6 | $/ = undef; |
7 | ### most of these "mistakes" has been done by |
7 | ### most of these "mistakes" has been done by Msg2wims |
8 | ### should be better to correct it but more difficult ! |
8 | ### should be better to correct it but more difficult ! |
9 | ### introduction of \n can break the inline folds ... |
9 | ### introduction of \n can break the inline folds ... |
10 | my $file=$ARGV[0]; |
10 | my $file=$ARGV[0]; |
- | 11 | my $fileout=$file; |
|
11 | open (IN ,$file) ; |
12 | open (IN ,$file) ; |
12 | my $text = <IN>; |
13 | my $text = <IN>; |
13 | ##multiple <p> |
14 | ##multiple <p> |
14 | $text =~ s:(<p>\s*){2,}:<p>:g; |
15 | $text =~ s:(<p>\s*){2,}:<p>:g; |
15 | ## close br and hr |
16 | ## close br and hr |
16 | $text =~ s:<(b|h) |
17 | $text =~ s:<(b|h)r\s*>:<$1r />:g; |
- | 18 | ## delete multiple <br> |
|
- | 19 | $text =~ s:(<br\s*/>\s*){2,}:<br />:g; |
|
17 | $text =~ s:<p> |
20 | $text =~ s:(<p>\s*){1,}(<li>|</?ul>|</?ol>):$2:g; |
18 | ## delete < |
21 | ## delete <br> before <li></?ul></?ol> |
19 | $text =~ s:< |
22 | $text =~ s:(<br\s*/?>\s*){1,}(<h[1-5].*>|<li|</h[1-5]>|</?ul>|</?ol>):$2:g; |
20 | ## delete <li> empty |
23 | ## delete <li> empty |
21 | $text =~ s:<li> |
24 | $text =~ s:(<li>\s*){1,}</(u|o)l>:</$2l>:g; |
22 | ## transform some hr |
25 | ## transform some hr |
23 | $text =~ s|<hr\s+width="(\d+)%"\s*/?/>|<hr style="width:$1%"/>|g; |
26 | $text =~ s|<hr\s+width="(\d+)%"\s*/?/>|<hr style="width:$1%"/>|g; |
24 | ## delete <br/> before table ? |
27 | ## delete <br/> before table ? |
25 | $text =~ s|< |
28 | $text =~ s|(<br\s*/>\s*){1,}</table>|</table>|g; |
26 | ## delete multiple <br> |
- | |
27 | $text =~ s:(<br ?/>\s*)+:<br/>:g; |
- | |
28 | $text =~ s:</div>\s+</div>:</div></div>:g; |
29 | $text =~ s:</div>\s+</div>:</div></div>:g; |
29 | $text =~ s:</div>\s* |
30 | $text =~ s:</div>\s*(<br\s*/>\s*){1,}</div>:</div></div>:g; |
30 | ## delete <br/> before div |
31 | ## delete <br/> before div |
31 | $text =~ s: |
32 | $text =~ s:(<br\s*/>\s*){1,}<div:<div:g; |
32 | $text =~ s:< |
33 | $text =~ s:<br\s*/> *!:<br />\n!:g; |
33 | $text =~ s:\n{3,}:\n\n:g; |
34 | $text =~ s:\n{3,}:\n\n:g; |
34 | close(IN); |
35 | close(IN); |
35 | 36 | ||
36 | open (OUT, "> |
37 | open (OUT, ">$fileout"); |
37 | print OUT $text ; close OUT; |
38 | print OUT $text ; close OUT; |