Subversion Repositories wimsdev

Rev

Rev 8633 | Rev 8685 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
6519 bpr 1
package search;
2
 
3
use strict;
4
use warnings;
6935 bpr 5
use Encode;
6519 bpr 6
 
7
use Exporter;
8
our @ISA = 'Exporter';
8618 bpr 9
our @EXPORT = ('hashdomain', 'listdomain', 'out', 'sortuniq', 'treate_accent',
8633 bpr 10
  'treate_dict', 'treate_language', 'dictionnary', 'reverse_dic', 'canonify');
6519 bpr 11
 
7693 bpr 12
sub canonify { my ($w)=@_; treate_accent(lc($w)) }
13
 
7016 bpr 14
sub treate_dict { my ($file) = @_;
8618 bpr 15
   my %ref = ( ) ; my $ref=\%ref;
16
   my $text;
6519 bpr 17
   open IN, "$file";
6935 bpr 18
   while (<IN>) {
6519 bpr 19
   $text = $_ ;
20
   $text =~ s/\n +/\n/g;
21
   $text =~ s/\\//g;
22
   $text =~ s/^(\s+)//g;
23
   $text=~ s/\n+/\n/g;
24
   my @L = sortuniq(split("\n",$text));
25
   for my $l (@L) {
26
     my @la=split(":", $l) ;
8636 bpr 27
     $ref{canonify($la[0])}=join(':',@la[1..@la-1]) if ($la[1]);
6519 bpr 28
    }
29
   }
30
  close IN;
31
 %ref;
32
}
33
 
7693 bpr 34
sub dictionnary { my ($file, @words)=@_;
35
  my %dic=treate_dict($file) ;
36
  my $dic=\%dic;
37
  my @W=();
38
  if (@words) {
39
   for my $w (@words) {
40
    next if !($w);
41
    for my $ww (split(',', $w)) {
42
     if ( $dic{canonify($ww)} ) { push @W, $dic{canonify($ww)} ; };
43
     }
44
   }
45
   sortuniq ( @W )
46
   }
47
   @W;
48
}
49
 
6935 bpr 50
sub treate_accent {my ($txt) = @_;
6519 bpr 51
  $txt=decode('iso-8859-1',$txt);
52
  $txt =~ tr/éèêëàáâãäåùìíîïóôòç/eeeeaaaaaauiiiioooc/;
53
  $txt =~ s/[ÀÁÂÃÄÅ]/A/g;
54
  $txt =~ s/Ç/C/g;
55
  $txt =~ s/[ÈÉÊË]/E/g;
56
  $txt =~ s/[ÌÏÎÍ]/I/g;
57
  $txt= encode("iso-8859-1",$txt);
58
  $txt
59
}
60
 
61
sub out { my ($bloc, $text) = @_;
62
  open  (OUT, ">$bloc") ;
63
  print OUT $text ; close OUT;
64
}
65
 
66
sub sortuniq {
6935 bpr 67
  return if !(@_) ;
6519 bpr 68
  my $prev = "not $_[0]";
69
  grep { $_ ne $prev && ($prev = $_, 1) } sort @_;
70
}
71
 
6935 bpr 72
sub listdomain { my ($file)=@_ ;
6519 bpr 73
  my $text='';
74
  open IN, "$file";
75
  while (<IN>) { $text = $_ ;
76
   $text =~ s/[,:]/\n/g;
77
   $text =~ s/\n +/\n/g;
78
   $text =~ s/\\//g;
79
   $text =~ s/^(\s+)//g;
80
   $text=~ s/\n+/\n/g;
81
 }
82
 close IN;
83
 sortuniq(split("\n",$text))
84
}
85
 
86
## reversing the domain tree
87
 
88
sub hashdomain { my ($file)=@_;
8618 bpr 89
 $/ = undef;
6519 bpr 90
 open IN, "$file";
91
 my %ref = ( ) ; my $ref=\%ref;
92
 while (<IN>) { my $text=$_ ; $text=~ s/\\\n\s*//g;
93
   $text=~ s/\n\s+/\n/g;
94
   my @text= split("\n", $text);
6935 bpr 95
   for my $line (@text) {
6519 bpr 96
     next if !($line) ;
97
     $line =~ s/\s+//g;
98
     my @cut=split(":", $line) ;
99
     if ($cut[1]) {
100
       my @son=split(',',$cut[1]);
8618 bpr 101
       $ref->{'next'}{$cut[0]}=$cut[1];
6935 bpr 102
      for my $s (@son) {
8618 bpr 103
        if ($ref->{'prev'}{$s}) { $ref->{'prev'}{$s} .= "," . $cut[0]} else {$ref->{'prev'}{$s} = $cut[0]}
6519 bpr 104
      }
105
     }
106
   }
107
}
108
close IN;
109
  %ref
110
}
111
 
6879 bpr 112
sub treate_language {
113
  my $site_language='en fr nl it cn';
114
  my $conf='../../../log/wims.conf';
115
  if (-e "$conf") {
116
   open IN, "$conf";
117
   while (<IN>) {
118
     if ($_ =~ s/site_languages=//) { $site_language= $_ ; }
119
   }
120
  close IN;
121
 }
122
split(' ', $site_language) ;
123
}
7693 bpr 124
 
125
sub reverse_dic { my @liste=@_;
126
  my %ref = ( ) ; my $ref=\%ref;
127
  for my $file (@liste) {
128
   open IN, "$file";
129
   while (<IN>) {
130
    next if ($_ =~ /^#/);
131
    my $text= $_ ; $text=~ s/\n//;
132
    my @text=split(":", $text);
133
    my $t=$text[0]; $t=~ s/\n//;
134
    my @L= split(",",$text[1]);
135
    for my $a (@L) {
136
     if ($ref{$a}) {  $ref{$a}.= "," . $t ; }
137
     else { $ref{$a} = $t ; }
138
    }
139
   }
140
   close IN;
141
  }
142
  %ref
143
}
144