Subversion Repositories wimsdev

Rev

Rev 9089 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
6519 bpr 1
package search;
2
 
3
use strict;
4
use warnings;
6935 bpr 5
use Encode;
6519 bpr 6
 
7
use Exporter;
8
our @ISA = 'Exporter';
8618 bpr 9
our @EXPORT = ('hashdomain', 'listdomain', 'out', 'sortuniq', 'treate_accent',
8633 bpr 10
  'treate_dict', 'treate_language', 'dictionnary', 'reverse_dic', 'canonify');
6519 bpr 11
 
7693 bpr 12
sub canonify { my ($w)=@_; treate_accent(lc($w)) }
13
 
7016 bpr 14
sub treate_dict { my ($file) = @_;
8618 bpr 15
   my %ref = ( ) ; my $ref=\%ref;
16
   my $text;
6519 bpr 17
   open IN, "$file";
6935 bpr 18
   while (<IN>) {
6519 bpr 19
   $text = $_ ;
20
   $text =~ s/\n +/\n/g;
21
   $text =~ s/\\//g;
22
   $text =~ s/^(\s+)//g;
23
   $text=~ s/\n+/\n/g;
24
   my @L = sortuniq(split("\n",$text));
25
   for my $l (@L) {
26
     my @la=split(":", $l) ;
8636 bpr 27
     $ref{canonify($la[0])}=join(':',@la[1..@la-1]) if ($la[1]);
6519 bpr 28
    }
29
   }
30
  close IN;
31
 %ref;
32
}
33
 
7693 bpr 34
sub dictionnary { my ($file, @words)=@_;
35
  my %dic=treate_dict($file) ;
36
  my $dic=\%dic;
37
  my @W=();
38
  if (@words) {
39
   for my $w (@words) {
40
    next if !($w);
41
    for my $ww (split(',', $w)) {
42
     if ( $dic{canonify($ww)} ) { push @W, $dic{canonify($ww)} ; };
43
     }
44
   }
45
   sortuniq ( @W )
46
   }
47
   @W;
48
}
49
 
6935 bpr 50
sub treate_accent {my ($txt) = @_;
6519 bpr 51
  $txt=decode('iso-8859-1',$txt);
52
  $txt =~ tr/éèêëàáâãäåùìíîïóôòç/eeeeaaaaaauiiiioooc/;
53
  $txt =~ s/[ÀÁÂÃÄÅ]/A/g;
54
  $txt =~ s/Ç/C/g;
55
  $txt =~ s/[ÈÉÊË]/E/g;
56
  $txt =~ s/[ÌÏÎÍ]/I/g;
8937 bpr 57
  $txt =~ s/'/ /g;
6519 bpr 58
  $txt= encode("iso-8859-1",$txt);
59
  $txt
60
}
61
 
62
sub out { my ($bloc, $text) = @_;
63
  open  (OUT, ">$bloc") ;
64
  print OUT $text ; close OUT;
65
}
66
 
67
sub sortuniq {
6935 bpr 68
  return if !(@_) ;
6519 bpr 69
  my $prev = "not $_[0]";
70
  grep { $_ ne $prev && ($prev = $_, 1) } sort @_;
71
}
72
 
6935 bpr 73
sub listdomain { my ($file)=@_ ;
6519 bpr 74
  my $text='';
75
  open IN, "$file";
76
  while (<IN>) { $text = $_ ;
77
   $text =~ s/[,:]/\n/g;
78
   $text =~ s/\n +/\n/g;
79
   $text =~ s/\\//g;
80
   $text =~ s/^(\s+)//g;
81
   $text=~ s/\n+/\n/g;
82
 }
83
 close IN;
84
 sortuniq(split("\n",$text))
85
}
86
 
87
## reversing the domain tree
88
 
89
sub hashdomain { my ($file)=@_;
8618 bpr 90
 $/ = undef;
6519 bpr 91
 open IN, "$file";
92
 my %ref = ( ) ; my $ref=\%ref;
93
 while (<IN>) { my $text=$_ ; $text=~ s/\\\n\s*//g;
94
   $text=~ s/\n\s+/\n/g;
95
   my @text= split("\n", $text);
6935 bpr 96
   for my $line (@text) {
6519 bpr 97
     next if !($line) ;
98
     $line =~ s/\s+//g;
99
     my @cut=split(":", $line) ;
100
     if ($cut[1]) {
101
       my @son=split(',',$cut[1]);
8618 bpr 102
       $ref->{'next'}{$cut[0]}=$cut[1];
6935 bpr 103
      for my $s (@son) {
8618 bpr 104
        if ($ref->{'prev'}{$s}) { $ref->{'prev'}{$s} .= "," . $cut[0]} else {$ref->{'prev'}{$s} = $cut[0]}
6519 bpr 105
      }
106
     }
107
   }
108
}
109
close IN;
110
  %ref
111
}
112
 
6879 bpr 113
sub treate_language {
18174 czzmrn 114
  my $site_language='en fr nl it cn es';
6879 bpr 115
  my $conf='../../../log/wims.conf';
116
  if (-e "$conf") {
117
   open IN, "$conf";
118
   while (<IN>) {
119
     if ($_ =~ s/site_languages=//) { $site_language= $_ ; }
120
   }
121
  close IN;
122
 }
9089 bpr 123
  $site_language=~  s/,/ /g; $site_language=~  s/ +/ /g;
124
  split(' ', $site_language) ;
6879 bpr 125
}
7693 bpr 126
 
127
sub reverse_dic { my @liste=@_;
128
  my %ref = ( ) ; my $ref=\%ref;
129
  for my $file (@liste) {
130
   open IN, "$file";
131
   while (<IN>) {
132
    next if ($_ =~ /^#/);
133
    my $text= $_ ; $text=~ s/\n//;
134
    my @text=split(":", $text);
135
    my $t=$text[0]; $t=~ s/\n//;
136
    my @L= split(",",$text[1]);
137
    for my $a (@L) {
138
     if ($ref{$a}) {  $ref{$a}.= "," . $t ; }
139
     else { $ref{$a} = $t ; }
140
    }
141
   }
142
   close IN;
143
  }
144
  %ref
145
}