Subversion Repositories wimsdev

Rev

Rev 18174 | Blame | Compare with Previous | Last modification | View Log | RSS feed

  1. package search;
  2.  
  3. use strict;
  4. use warnings;
  5. use Encode;
  6.  
  7. use Exporter;
  8. our @ISA = 'Exporter';
  9. our @EXPORT = ('hashdomain', 'listdomain', 'out', 'sortuniq', 'treate_accent',
  10.   'treate_dict', 'treate_language', 'dictionnary', 'reverse_dic', 'canonify');
  11.  
  12. sub canonify { my ($w)=@_; treate_accent(lc($w)) }
  13.  
  14. sub treate_dict { my ($file) = @_;
  15.    my %ref = ( ) ; my $ref=\%ref;
  16.    my $text;
  17.    open IN, "$file";
  18.    while (<IN>) {
  19.    $text = $_ ;
  20.    $text =~ s/\n +/\n/g;
  21.    $text =~ s/\\//g;
  22.    $text =~ s/^(\s+)//g;
  23.    $text=~ s/\n+/\n/g;
  24.    my @L = sortuniq(split("\n",$text));
  25.    for my $l (@L) {
  26.      my @la=split(":", $l) ;
  27.      $ref{canonify($la[0])}=join(':',@la[1..@la-1]) if ($la[1]);
  28.     }
  29.    }
  30.   close IN;
  31.  %ref;
  32. }
  33.  
  34. sub dictionnary { my ($file, @words)=@_;
  35.   my %dic=treate_dict($file) ;
  36.   my $dic=\%dic;
  37.   my @W=();
  38.   if (@words) {
  39.    for my $w (@words) {
  40.     next if !($w);
  41.     for my $ww (split(',', $w)) {
  42.      if ( $dic{canonify($ww)} ) { push @W, $dic{canonify($ww)} ; };
  43.      }
  44.    }
  45.    sortuniq ( @W )
  46.    }
  47.    @W;
  48. }
  49.  
  50. # use same list as in ~/src/Lib/liblines.c
  51. sub treate_accent {my ($txt) = @_;
  52.   $txt=decode('iso-8859-1',$txt);
  53.   $txt =~ tr/éèêëàáâãäåúùûüìíîïóòôöõçñýÿ/eeeeaaaaaauuuuiiiiooooocnyy/;
  54.   $txt =~ s/[ÀÁÂÃÄÅ]/A/g;
  55.   $txt =~ s/[ÓÒÔÖÕ]/O/g;
  56.   $txt =~ s/Ç/C/g;
  57.   $txt =~ s/Ñ/N/g;
  58.   $txt =~ s/Ý/Y/g;
  59.   $txt =~ s/[ÈÉÊË]/E/g;
  60.   $txt =~ s/[ÌÏÎÍ]/I/g;
  61.   $txt =~ s/'/ /g;
  62.   $txt= encode("iso-8859-1",$txt);
  63.   $txt
  64. }
  65.  
  66. sub out { my ($bloc, $text) = @_;
  67.   open  (OUT, ">$bloc") ;
  68.   print OUT $text ; close OUT;
  69. }
  70.  
  71. sub sortuniq {
  72.   return if !(@_) ;
  73.   my $prev = "not $_[0]";
  74.   grep { $_ ne $prev && ($prev = $_, 1) } sort @_;
  75. }
  76.  
  77. sub listdomain { my ($file)=@_ ;
  78.   my $text='';
  79.   open IN, "$file";
  80.   while (<IN>) { $text = $_ ;
  81.    $text =~ s/[,:]/\n/g;
  82.    $text =~ s/\n +/\n/g;
  83.    $text =~ s/\\//g;
  84.    $text =~ s/^(\s+)//g;
  85.    $text=~ s/\n+/\n/g;
  86.  }
  87.  close IN;
  88.  sortuniq(split("\n",$text))
  89. }
  90.  
  91. ## reversing the domain tree
  92.  
  93. sub hashdomain { my ($file)=@_;
  94.  $/ = undef;
  95.  open IN, "$file";
  96.  my %ref = ( ) ; my $ref=\%ref;
  97.  while (<IN>) { my $text=$_ ; $text=~ s/\\\n\s*//g;
  98.    $text=~ s/\n\s+/\n/g;
  99.    my @text= split("\n", $text);
  100.    for my $line (@text) {
  101.      next if !($line) ;
  102.      $line =~ s/\s+//g;
  103.      my @cut=split(":", $line) ;
  104.      if ($cut[1]) {
  105.        my @son=split(',',$cut[1]);
  106.        $ref->{'next'}{$cut[0]}=$cut[1];
  107.       for my $s (@son) {
  108.         if ($ref->{'prev'}{$s}) { $ref->{'prev'}{$s} .= "," . $cut[0]} else {$ref->{'prev'}{$s} = $cut[0]}
  109.       }
  110.      }
  111.    }
  112. }
  113. close IN;
  114.   %ref
  115. }
  116.  
  117. sub treate_language {
  118.   my $site_language='en fr nl it cn es';
  119.   my $conf='../../../log/wims.conf';
  120.   if (-e "$conf") {
  121.    open IN, "$conf";
  122.    while (<IN>) {
  123.      if ($_ =~ s/site_languages=//) { $site_language= $_ ; }
  124.    }
  125.   close IN;
  126.  }
  127.   $site_language=~  s/,/ /g; $site_language=~  s/ +/ /g;
  128.   split(' ', $site_language) ;
  129. }
  130.  
  131. sub reverse_dic { my @liste=@_;
  132.   my %ref = ( ) ; my $ref=\%ref;
  133.   for my $file (@liste) {
  134.    open IN, "$file";
  135.    while (<IN>) {
  136.     next if ($_ =~ /^#/);
  137.     my $text= $_ ; $text=~ s/\n//;
  138.     my @text=split(":", $text);
  139.     my $t=$text[0]; $t=~ s/\n//;
  140.     my @L= split(",",$text[1]);
  141.     for my $a (@L) {
  142.      if ($ref{$a}) {  $ref{$a}.= "," . $t ; }
  143.      else { $ref{$a} = $t ; }
  144.     }
  145.    }
  146.    close IN;
  147.   }
  148.   %ref
  149. }
  150.