use strict;
use warnings;
use Encode;
use Exporter;
our @ISA = 'Exporter';
our @EXPORT = ('hashdomain', 'listdomain', 'out', 'sortuniq', 'treate_accent',
'treate_dict', 'treate_language', 'dictionnary', 'reverse_dic', 'canonify');
sub canonify
{ my ($w)=@_; treate_accent
(lc($w)) }
sub treate_dict { my ($file) = @_;
my %ref = ( ) ; my $ref=\
%ref;
my $text;
while (<IN>) {
$text = $_ ;
$text =~ s/\n +/\n/g;
$text =~ s/\\//g;
$text =~ s/^(\s+)//g;
$text=~ s/\n+/\n/g;
my @L = sortuniq
(split("\n",$text));
for my $l (@L) {
$ref{canonify
($la[0])}=join(':',@la[1..@la-1]) if ($la[1]);
}
}
}
sub dictionnary { my ($file, @words)=@_;
my %dic=treate_dict($file) ;
my $dic=\%dic;
my @W=();
if (@words) {
for my $w (@words) {
next if !($w);
for my $ww (split(',', $w)) {
if ( $dic{canonify
($ww)} ) { push @W, $dic{canonify
($ww)} ; };
}
}
sortuniq ( @W )
}
@W;
}
sub treate_accent {my ($txt) = @_;
$txt=decode('iso-8859-1',$txt);
$txt =~ tr/éèêëàáâãäåùìíîïóôòç/eeeeaaaaaauiiiioooc/;
$txt =~ s/[ÀÁÂÃÄÅ]/A/g;
$txt =~ s/Ç/C/g;
$txt =~ s/[ÈÉÊË]/E/g;
$txt =~ s/[ÌÏÎÍ]/I/g;
$txt =~ s/'/ /g;
$txt= encode("iso-8859-1",$txt);
$txt
}
sub out { my ($bloc, $text) = @_;
}
sub sortuniq {
my $prev = "not $_[0]";
grep { $_ ne $prev && ($prev = $_, 1) } sort @_;
}
sub listdomain { my ($file)=@_ ;
my $text='';
while (<IN>) { $text = $_ ;
$text =~ s/[,:]/\n/g;
$text =~ s/\n +/\n/g;
$text =~ s/\\//g;
$text =~ s/^(\s+)//g;
$text=~ s/\n+/\n/g;
}
sortuniq
(split("\n",$text))
}
## reversing the domain tree
sub hashdomain { my ($file)=@_;
my %ref = ( ) ; my $ref=\
%ref;
while (<IN>) { my $text=$_ ; $text=~ s/\\\n\s*//g;
$text=~ s/\n\s+/\n/g;
my @text= split("\n", $text);
for my $line (@text) {
next if !($line) ;
$line =~ s/\s+//g;
my @cut=split(":", $line) ;
if ($cut[1]) {
my @son=split(',',$cut[1]);
$ref->{'next'}{$cut[0]}=$cut[1];
for my $s (@son) {
if ($ref->{'prev'}{$s}) { $ref->{'prev'}{$s} .= "," . $cut[0]} else {$ref->{'prev'}{$s} = $cut[0]}
}
}
}
}
}
sub treate_language {
my $site_language='en fr nl it cn es';
my $conf='../../../log/wims.conf';
if (-e "$conf") {
while (<IN>) {
if ($_ =~ s/site_languages
=//) { $site_language= $_ ; }
}
}
$site_language=~ s/,/ /g; $site_language=~ s/ +/ /g;
split(' ', $site_language) ;
}
sub reverse_dic { my @liste=@_;
my %ref = ( ) ; my $ref=\
%ref;
for my $file (@liste) {
while (<IN>) {
next if ($_ =~ /^#/);
my $text= $_ ; $text=~ s/\n//;
my @text=split(":", $text);
my $t=$text[0]; $t=~ s/\n//;
my @L= split(",",$text[1]);
for my $a (@L) {
if ($ref{$a}) { $ref{$a}.= "," . $t ; }
else { $ref{$a} = $t ; }
}
}
}
}