Subversion Repositories wimsdev

Rev

Rev 9089 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

  1. package search;
  2.  
  3. use strict;
  4. use warnings;
  5. use Encode;
  6.  
  7. use Exporter;
  8. our @ISA = 'Exporter';
  9. our @EXPORT = ('hashdomain', 'listdomain', 'out', 'sortuniq', 'treate_accent',
  10.   'treate_dict', 'treate_language', 'dictionnary', 'reverse_dic', 'canonify');
  11.  
  12. sub canonify { my ($w)=@_; treate_accent(lc($w)) }
  13.  
  14. sub treate_dict { my ($file) = @_;
  15.    my %ref = ( ) ; my $ref=\%ref;
  16.    my $text;
  17.    open IN, "$file";
  18.    while (<IN>) {
  19.    $text = $_ ;
  20.    $text =~ s/\n +/\n/g;
  21.    $text =~ s/\\//g;
  22.    $text =~ s/^(\s+)//g;
  23.    $text=~ s/\n+/\n/g;
  24.    my @L = sortuniq(split("\n",$text));
  25.    for my $l (@L) {
  26.      my @la=split(":", $l) ;
  27.      $ref{canonify($la[0])}=join(':',@la[1..@la-1]) if ($la[1]);
  28.     }
  29.    }
  30.   close IN;
  31.  %ref;
  32. }
  33.  
  34. sub dictionnary { my ($file, @words)=@_;
  35.   my %dic=treate_dict($file) ;
  36.   my $dic=\%dic;
  37.   my @W=();
  38.   if (@words) {
  39.    for my $w (@words) {
  40.     next if !($w);
  41.     for my $ww (split(',', $w)) {
  42.      if ( $dic{canonify($ww)} ) { push @W, $dic{canonify($ww)} ; };
  43.      }
  44.    }
  45.    sortuniq ( @W )
  46.    }
  47.    @W;
  48. }
  49.  
  50. sub treate_accent {my ($txt) = @_;
  51.   $txt=decode('iso-8859-1',$txt);
  52.   $txt =~ tr/éèêëàáâãäåùìíîïóôòç/eeeeaaaaaauiiiioooc/;
  53.   $txt =~ s/[ÀÁÂÃÄÅ]/A/g;
  54.   $txt =~ s/Ç/C/g;
  55.   $txt =~ s/[ÈÉÊË]/E/g;
  56.   $txt =~ s/[ÌÏÎÍ]/I/g;
  57.   $txt =~ s/'/ /g;
  58.   $txt= encode("iso-8859-1",$txt);
  59.   $txt
  60. }
  61.  
  62. sub out { my ($bloc, $text) = @_;
  63.   open  (OUT, ">$bloc") ;
  64.   print OUT $text ; close OUT;
  65. }
  66.  
  67. sub sortuniq {
  68.   return if !(@_) ;
  69.   my $prev = "not $_[0]";
  70.   grep { $_ ne $prev && ($prev = $_, 1) } sort @_;
  71. }
  72.  
  73. sub listdomain { my ($file)=@_ ;
  74.   my $text='';
  75.   open IN, "$file";
  76.   while (<IN>) { $text = $_ ;
  77.    $text =~ s/[,:]/\n/g;
  78.    $text =~ s/\n +/\n/g;
  79.    $text =~ s/\\//g;
  80.    $text =~ s/^(\s+)//g;
  81.    $text=~ s/\n+/\n/g;
  82.  }
  83.  close IN;
  84.  sortuniq(split("\n",$text))
  85. }
  86.  
  87. ## reversing the domain tree
  88.  
  89. sub hashdomain { my ($file)=@_;
  90.  $/ = undef;
  91.  open IN, "$file";
  92.  my %ref = ( ) ; my $ref=\%ref;
  93.  while (<IN>) { my $text=$_ ; $text=~ s/\\\n\s*//g;
  94.    $text=~ s/\n\s+/\n/g;
  95.    my @text= split("\n", $text);
  96.    for my $line (@text) {
  97.      next if !($line) ;
  98.      $line =~ s/\s+//g;
  99.      my @cut=split(":", $line) ;
  100.      if ($cut[1]) {
  101.        my @son=split(',',$cut[1]);
  102.        $ref->{'next'}{$cut[0]}=$cut[1];
  103.       for my $s (@son) {
  104.         if ($ref->{'prev'}{$s}) { $ref->{'prev'}{$s} .= "," . $cut[0]} else {$ref->{'prev'}{$s} = $cut[0]}
  105.       }
  106.      }
  107.    }
  108. }
  109. close IN;
  110.   %ref
  111. }
  112.  
  113. sub treate_language {
  114.   my $site_language='en fr nl it cn es';
  115.   my $conf='../../../log/wims.conf';
  116.   if (-e "$conf") {
  117.    open IN, "$conf";
  118.    while (<IN>) {
  119.      if ($_ =~ s/site_languages=//) { $site_language= $_ ; }
  120.    }
  121.   close IN;
  122.  }
  123.   $site_language=~  s/,/ /g; $site_language=~  s/ +/ /g;
  124.   split(' ', $site_language) ;
  125. }
  126.  
  127. sub reverse_dic { my @liste=@_;
  128.   my %ref = ( ) ; my $ref=\%ref;
  129.   for my $file (@liste) {
  130.    open IN, "$file";
  131.    while (<IN>) {
  132.     next if ($_ =~ /^#/);
  133.     my $text= $_ ; $text=~ s/\n//;
  134.     my @text=split(":", $text);
  135.     my $t=$text[0]; $t=~ s/\n//;
  136.     my @L= split(",",$text[1]);
  137.     for my $a (@L) {
  138.      if ($ref{$a}) {  $ref{$a}.= "," . $t ; }
  139.      else { $ref{$a} = $t ; }
  140.     }
  141.    }
  142.    close IN;
  143.   }
  144.   %ref
  145. }
  146.