Subversion Repositories wimsdev

Rev

Rev 18174 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
6519 bpr 1
package search;
2
 
3
use strict;
4
use warnings;
6935 bpr 5
use Encode;
6519 bpr 6
 
7
use Exporter;
8
our @ISA = 'Exporter';
8618 bpr 9
our @EXPORT = ('hashdomain', 'listdomain', 'out', 'sortuniq', 'treate_accent',
8633 bpr 10
  'treate_dict', 'treate_language', 'dictionnary', 'reverse_dic', 'canonify');
6519 bpr 11
 
7693 bpr 12
sub canonify { my ($w)=@_; treate_accent(lc($w)) }
13
 
7016 bpr 14
sub treate_dict { my ($file) = @_;
8618 bpr 15
   my %ref = ( ) ; my $ref=\%ref;
16
   my $text;
6519 bpr 17
   open IN, "$file";
6935 bpr 18
   while (<IN>) {
6519 bpr 19
   $text = $_ ;
20
   $text =~ s/\n +/\n/g;
21
   $text =~ s/\\//g;
22
   $text =~ s/^(\s+)//g;
23
   $text=~ s/\n+/\n/g;
24
   my @L = sortuniq(split("\n",$text));
25
   for my $l (@L) {
26
     my @la=split(":", $l) ;
8636 bpr 27
     $ref{canonify($la[0])}=join(':',@la[1..@la-1]) if ($la[1]);
6519 bpr 28
    }
29
   }
30
  close IN;
31
 %ref;
32
}
33
 
7693 bpr 34
sub dictionnary { my ($file, @words)=@_;
35
  my %dic=treate_dict($file) ;
36
  my $dic=\%dic;
37
  my @W=();
38
  if (@words) {
39
   for my $w (@words) {
40
    next if !($w);
41
    for my $ww (split(',', $w)) {
42
     if ( $dic{canonify($ww)} ) { push @W, $dic{canonify($ww)} ; };
43
     }
44
   }
45
   sortuniq ( @W )
46
   }
47
   @W;
48
}
49
 
18175 czzmrn 50
# use same list as in ~/src/Lib/liblines.c
6935 bpr 51
sub treate_accent {my ($txt) = @_;
6519 bpr 52
  $txt=decode('iso-8859-1',$txt);
18175 czzmrn 53
  $txt =~ tr/éèêëàáâãäåúùûüìíîïóòôöõçñýÿ/eeeeaaaaaauuuuiiiiooooocnyy/;
6519 bpr 54
  $txt =~ s/[ÀÁÂÃÄÅ]/A/g;
18175 czzmrn 55
  $txt =~ s/[ÓÒÔÖÕ]/O/g;
6519 bpr 56
  $txt =~ s/Ç/C/g;
18175 czzmrn 57
  $txt =~ s/Ñ/N/g;
58
  $txt =~ s/Ý/Y/g;
6519 bpr 59
  $txt =~ s/[ÈÉÊË]/E/g;
60
  $txt =~ s/[ÌÏÎÍ]/I/g;
8937 bpr 61
  $txt =~ s/'/ /g;
6519 bpr 62
  $txt= encode("iso-8859-1",$txt);
63
  $txt
64
}
65
 
66
sub out { my ($bloc, $text) = @_;
67
  open  (OUT, ">$bloc") ;
68
  print OUT $text ; close OUT;
69
}
70
 
71
sub sortuniq {
6935 bpr 72
  return if !(@_) ;
6519 bpr 73
  my $prev = "not $_[0]";
74
  grep { $_ ne $prev && ($prev = $_, 1) } sort @_;
75
}
76
 
6935 bpr 77
sub listdomain { my ($file)=@_ ;
6519 bpr 78
  my $text='';
79
  open IN, "$file";
80
  while (<IN>) { $text = $_ ;
81
   $text =~ s/[,:]/\n/g;
82
   $text =~ s/\n +/\n/g;
83
   $text =~ s/\\//g;
84
   $text =~ s/^(\s+)//g;
85
   $text=~ s/\n+/\n/g;
86
 }
87
 close IN;
88
 sortuniq(split("\n",$text))
89
}
90
 
91
## reversing the domain tree
92
 
93
sub hashdomain { my ($file)=@_;
8618 bpr 94
 $/ = undef;
6519 bpr 95
 open IN, "$file";
96
 my %ref = ( ) ; my $ref=\%ref;
97
 while (<IN>) { my $text=$_ ; $text=~ s/\\\n\s*//g;
98
   $text=~ s/\n\s+/\n/g;
99
   my @text= split("\n", $text);
6935 bpr 100
   for my $line (@text) {
6519 bpr 101
     next if !($line) ;
102
     $line =~ s/\s+//g;
103
     my @cut=split(":", $line) ;
104
     if ($cut[1]) {
105
       my @son=split(',',$cut[1]);
8618 bpr 106
       $ref->{'next'}{$cut[0]}=$cut[1];
6935 bpr 107
      for my $s (@son) {
8618 bpr 108
        if ($ref->{'prev'}{$s}) { $ref->{'prev'}{$s} .= "," . $cut[0]} else {$ref->{'prev'}{$s} = $cut[0]}
6519 bpr 109
      }
110
     }
111
   }
112
}
113
close IN;
114
  %ref
115
}
116
 
6879 bpr 117
sub treate_language {
18174 czzmrn 118
  my $site_language='en fr nl it cn es';
6879 bpr 119
  my $conf='../../../log/wims.conf';
120
  if (-e "$conf") {
121
   open IN, "$conf";
122
   while (<IN>) {
123
     if ($_ =~ s/site_languages=//) { $site_language= $_ ; }
124
   }
125
  close IN;
126
 }
9089 bpr 127
  $site_language=~  s/,/ /g; $site_language=~  s/ +/ /g;
128
  split(' ', $site_language) ;
6879 bpr 129
}
7693 bpr 130
 
131
sub reverse_dic { my @liste=@_;
132
  my %ref = ( ) ; my $ref=\%ref;
133
  for my $file (@liste) {
134
   open IN, "$file";
135
   while (<IN>) {
136
    next if ($_ =~ /^#/);
137
    my $text= $_ ; $text=~ s/\n//;
138
    my @text=split(":", $text);
139
    my $t=$text[0]; $t=~ s/\n//;
140
    my @L= split(",",$text[1]);
141
    for my $a (@L) {
142
     if ($ref{$a}) {  $ref{$a}.= "," . $t ; }
143
     else { $ref{$a} = $t ; }
144
    }
145
   }
146
   close IN;
147
  }
148
  %ref
149
}