Subversion Repositories wimsdev

Rev

Rev 16933 | Blame | Compare with Previous | Last modification | View Log | RSS feed

  1. #!/usr/bin/perl
  2.  
  3. use warnings;
  4. use strict;
  5. use Encode qw(encode decode);
  6. use lib '/var/lib/wims/public_html/bases/sys';
  7. use lib ".";
  8. use search ('out', 'sortuniq', 'treate_accent', 'treate_language', 'treate_dict');
  9.  
  10. #use Text::Balanced qw (extract_bracketed extract_tagged);
  11. my @site_lang=treate_language ();
  12. my $dir='../../modules';
  13. my $site='../site/lists';
  14. my $ddir='domain';
  15. my $outputkeywords='tmp';
  16. my $dom_templ='domain/domain.template';
  17. my $dom_reverse='domain/reversedomain';
  18. my $dom_json='domain/domain.json';
  19. #$/ = undef;
  20. ###'arts','earth_sciences','history'
  21. my @DOMAIN=('biology','chemistry','history','informatics',
  22.   'language','mathematics','physics');
  23.  
  24. if (-e $dom_json) {
  25.    open LI, $dom_json;  my $text;
  26.    while(<LI>) {
  27.    next if (/^#|<--/) ; $text .= $_;
  28.    };
  29.    $text=~ s/(\[|\])//g;
  30.    $text=~ s/\'//g;
  31.    push @DOMAIN, split(",\n", $text);
  32. }
  33.  
  34. my %trad = ();
  35. ## should put all perl programm together
  36. ## read reversedomain
  37. if (-e $dom_reverse) {
  38.    open LI, $dom_reverse;
  39.    while(<LI>) {
  40.      my @s= split(':', $_);
  41.      my $k= $s[0]; $k =~ s/\s//g if ($k);
  42.      my $v= $s[1]; $v =~ s/\s//g if ($v);
  43.      $trad{$k}=$v if (($k) && ($v));
  44.    }
  45.   close LI
  46. }
  47.  
  48. for my $lang (@site_lang) {
  49.   my %Domain = ( ) ;
  50.   my $Domain=\%Domain;
  51.   my @KEYWORDS=();
  52.   if (-e "$site/A.$lang") {
  53.     open LI, "$site/A.$lang";
  54.     while(<LI>){ my $F=$_ ; chomp $F;
  55.       my $file= "$dir/$F/INDEX";
  56.       next if ($file=~/(adm|devel)\//) ;
  57.       my @keywords = treate_index ($file, $lang, \%Domain) ;
  58.       push @KEYWORDS, @keywords;
  59.    };
  60.      close LI;
  61.   } else {
  62.     for my $file (glob("$dir/*/*/*")) {
  63.     next if ($file=~/(adm|devel)\//) ;
  64.     my @keywords = treate_index ($file, $lang, \%Domain) ;
  65.     push @KEYWORDS, @keywords;
  66.    }
  67.  }
  68.  
  69. ### traite les groupes de mots cles
  70.  if (-e $dom_templ) {
  71.    open LI, $dom_templ;
  72.    while (<LI>) { my @m=split(":\n",$_);
  73.      for my $a (@m) { $a =~ s/_/ /g;
  74.      push @KEYWORDS, split(",", treate_group($a)) };
  75.    }
  76.    close LI;
  77.    };
  78. ### traite les groupes de mots domaine
  79.  my $file="domain/domain.$lang";
  80.   if (-e $file) {
  81.       open LI, $file;
  82.        while (<LI>) { s /,/\n/g; s /:/\n/g; s /\n+/\n/g;
  83.        my @m=split("\n",$_);
  84.      for my $a (@m) { $a =~ s/_/ /g;
  85.         push @KEYWORDS, split(",", treate_group($a)) };
  86.    }
  87.    close LI;
  88.    };
  89.  
  90.  out("wgrp/wgrp.$lang", join("\n", sortuniq( @KEYWORDS )))  if (@KEYWORDS);
  91.  
  92.  
  93.  my @list=();
  94.  my @ALL=();
  95.  my @ALL_WIMS=();
  96.  for my $d (@DOMAIN) {
  97.    if ($Domain{$d}) {
  98.     push @ALL, split("\n",$Domain{$d});
  99.     out("keywords/$d.$lang.tmp",
  100.      "\"" .
  101.      join("\",\n\"",sortuniq(split("\n",$Domain{$d})) )
  102.      . "'"
  103.      );
  104.     push @list, $d ;
  105.     };
  106.  }
  107.  next if !(-e "$ddir/domain.$lang");
  108.  my %dom = treate_dict ("$ddir/domain.$lang");
  109.  while ( my ($key, $value) = each(%dom) ) {
  110.    push @ALL, split(",", lc(treate_accent($value)));
  111.    push @ALL_WIMS, split(",", lc(treate_accent($value)));
  112.  }
  113.  out("keywords/list.$lang", join(",",sortuniq(@list)));
  114. ##only official keywords
  115.  out("$outputkeywords/keywords.$lang.json",
  116.      "[\"" . join("\",\n\"",sortuniq(@ALL_WIMS) ) . "\"]"
  117. );
  118. ##all keywords
  119. out("$outputkeywords/all.$lang.json",
  120.      "[\"" . join("\",\n\"",sortuniq(@ALL) ) . "\"]"
  121. );
  122.  
  123. }
  124.  
  125. ######################################################
  126.  
  127. sub treate_index { my ($file, $lang, $ref) = @_;
  128.  my @res = (); my @lu = (); my @l = (); my @dom = ();
  129.  my $keyl ='' ; my $keyw=''; my $keyu;
  130.  open(IN, $file) ;
  131.   while (<IN>) { my $line = $_;
  132.     next if !($line =~ /keywords|domain/) ;
  133.     $line =~ s/,\s+/,/g; $line =~ s/\s+,/,/g;
  134.     if ($line =~ /domain=/){ $line =~ s/domain=// ;
  135.       $line =~ s/( +\n|\n)//g;
  136.       $line =~ s/ +/_/g;
  137.       next if( $line =~ /[^[:ascii:]]/);
  138.       $line = lc($line);
  139.       for my $a (split(",", $line)) {
  140.        next if ($a =~ /tevila/) ;
  141.        push @dom, $a ;
  142.       };
  143.     }
  144.     if ($line =~ /keywords_$lang\s*=/) {
  145.      $keyl=treate_keyword($line);
  146.      next if (!($line =~ /,/));
  147.      @l = treate_group($line);
  148.     };
  149.     if ($line =~ /keywords\s*=/) {
  150.      $keyu=treate_keyword($line);
  151.      next if (!($line =~ /,/));
  152.      @lu = treate_group($line);
  153.     }
  154.   }
  155.   close IN;
  156.   if (@l) { push @res, @l } else { push @res, @lu if (@lu) };
  157.   if ($keyl) { $keyw = $keyl } else { $keyw = $keyu if ($keyu); }
  158.   if( @dom ) {
  159.      for my $a (@dom) {
  160.       if ($a) {$ref->{$a} .= $keyw . "\n" if ($keyw) ;
  161.       }
  162.    }
  163.   }
  164.   @res;
  165. }
  166.  
  167. sub treate_group { my ($line) = @_ ;
  168.   $line=~ s/keywords_(\w+)\s*=\s*//g;
  169.   $line=~ s/keywords\s*=\s*//g;
  170.   $line=treate_accent($line);
  171.   $line =~ s/\./,/g;
  172.   my @k = split(',', $line);
  173.   my @tmp;
  174.   for my $la (@k) { $la =~ s/^\s+//g; $la =~ s/\s+$//g; $la=lc($la);
  175.     next if !($la);
  176.     next if !($la =~ / /);
  177.     push @tmp, "$la:$la," if ($la);
  178.   }
  179.   @tmp;
  180. }
  181.  
  182. sub treate_keyword { my ($line) = @_ ;
  183.   $line=~ s/keywords_(\w+)\s*=\s*//g;
  184.   $line=~ s/keywords\s*=\s*//g;
  185.   $line=treate_accent($line);
  186.   if (!($line =~ /,/)) {$line =~ tr / /,/ ;};
  187.   my @k = split(',', $line);
  188.   my $tmp;
  189.   for my $la (@k) {
  190.   $la =~ s/^\s+//g; $la =~ s/\s+$//g; $la=lc($la);
  191.   ##$la=~ s/($nokeyword)//g;
  192.   if ($la && length($la) > 2 ) {
  193.     if ($tmp) { $tmp .= "\n" . join("\n",split(',', $la))}
  194.       else
  195.       {$tmp = join("\n",split(',', $la))}
  196.   }
  197.   }
  198.   #$tmp=~ s/'/\\'/g if ($tmp);
  199.   $tmp;
  200. }
  201.  
  202.