Subversion Repositories wimsdev

Rev

Rev 11479 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

  1. #!/usr/bin/perl
  2.  
  3. use warnings;
  4. use strict;
  5. use Encode qw(encode decode);
  6. use search ('out', 'sortuniq', 'treate_accent', 'treate_language', 'treate_dict');
  7.  
  8. #use Text::Balanced qw (extract_bracketed extract_tagged);
  9. my @site_lang=treate_language ();
  10. my $dir='../../modules';
  11. my $site='../site/lists';
  12. my $ddir='domain';
  13. my $outputkeywords='tmp';
  14. my $dom_templ='domain/domain.template';
  15. my $dom_reverse='domain/reversedomain';
  16. my $dom_json='domain/domain.json';
  17. #$/ = undef;
  18. ###'arts','earth_sciences','history'
  19. my @DOMAIN=('biology','chemistry','history','informatics',
  20.   'language','mathematics','physics');
  21.  
  22. if (-e $dom_json) {
  23.    open LI, $dom_json;  my $text;
  24.    while(<LI>) {
  25.    next if (/^#|<--/) ; $text .= $_;
  26.    };
  27.    $text=~ s/(\[|\])//g;
  28.    $text=~ s/\'//g;
  29.    push @DOMAIN, split(",\n", $text);
  30. }
  31.  
  32. my %trad = ();
  33. ## should put all perl programm together
  34. ## read reversedomain
  35. if (-e $dom_reverse) {
  36.    open LI, $dom_reverse;
  37.    while(<LI>) {
  38.      my @s= split(':', $_);
  39.      my $k= $s[0]; $k =~ s/\s//g if ($k);
  40.      my $v= $s[1]; $v =~ s/\s//g if ($v);
  41.      $trad{$k}=$v if (($k) && ($v));
  42.    }
  43.   close LI
  44. }
  45.  
  46. for my $lang (@site_lang) {
  47.   my %Domain = ( ) ;
  48.   my $Domain=\%Domain;
  49.   my @KEYWORDS=();
  50.   if (-e "$site/A.$lang") {
  51.     open LI, "$site/A.$lang";
  52.     while(<LI>){ my $F=$_ ; chomp $F;
  53.       my $file= "$dir/$F/INDEX";
  54.       next if ($file=~/(adm|devel)\//) ;
  55.       my @keywords = treate_index ($file, $lang, \%Domain) ;
  56.       push @KEYWORDS, @keywords;
  57.    };
  58.      close LI;
  59.   } else {
  60.     for my $file (glob("$dir/*/*/*")) {
  61.     next if ($file=~/(adm|devel)\//) ;
  62.     my @keywords = treate_index ($file, $lang, \%Domain) ;
  63.     push @KEYWORDS, @keywords;
  64.    }
  65.  }
  66.  
  67. ### traite les groupes de mots cles
  68.  if (-e $dom_templ) {
  69.    open LI, $dom_templ;
  70.    while (<LI>) { my @m=split(":\n",$_);
  71.      for my $a (@m) { $a =~ s/_/ /g;
  72.      push @KEYWORDS, split(",", treate_group($a)) };
  73.    }
  74.    close LI;
  75.    };
  76. ### traite les groupes de mots domaine
  77.  my $file="domain/domain.$lang";
  78.   if (-e $file) {
  79.       open LI, $file;
  80.        while (<LI>) { s /,/\n/g; s /:/\n/g; s /\n+/\n/g;
  81.        my @m=split("\n",$_);
  82.      for my $a (@m) { $a =~ s/_/ /g;
  83.         push @KEYWORDS, split(",", treate_group($a)) };
  84.    }
  85.    close LI;
  86.    };
  87.  
  88.  out("wgrp/wgrp.$lang", join("\n", sortuniq( @KEYWORDS )))  if (@KEYWORDS);
  89.  
  90.  
  91.  my @list=();
  92.  my @ALL=();
  93.  my @ALL_WIMS=();
  94.  for my $d (@DOMAIN) {
  95.    if ($Domain{$d}) {
  96.     push @ALL, split("\n",$Domain{$d});
  97.     out("keywords/$d.$lang.tmp",
  98.      "\"" .
  99.      join("\",\n\"",sortuniq(split("\n",$Domain{$d})) )
  100.      . "'"
  101.      );
  102.     push @list, $d ;
  103.     };
  104.  }
  105.  next if !(-e "$ddir/domain.$lang");
  106.  my %dom = treate_dict ("$ddir/domain.$lang");
  107.  while ( my ($key, $value) = each(%dom) ) {
  108.    push @ALL, split(",", lc(treate_accent($value)));
  109.    push @ALL_WIMS, split(",", lc(treate_accent($value)));
  110.  }
  111.  out("keywords/list.$lang", join(",",sortuniq(@list)));
  112. ##only official keywords
  113.  out("$outputkeywords/keywords.$lang.json",
  114.      "[\"" . join("\",\n\"",sortuniq(@ALL_WIMS) ) . "\"]"
  115. );
  116. ##all keywords
  117. out("$outputkeywords/all.$lang.json",
  118.      "[\"" . join("\",\n\"",sortuniq(@ALL) ) . "\"]"
  119. );
  120.  
  121. }
  122.  
  123. ######################################################
  124.  
  125. sub treate_index { my ($file, $lang, $ref) = @_;
  126.  my @res = (); my @lu = (); my @l = (); my @dom = ();
  127.  my $keyl ='' ; my $keyw=''; my $keyu;
  128.  open(IN, $file) ;
  129.   while (<IN>) { my $line = $_;
  130.     next if !($line =~ /keywords|domain/) ;
  131.     $line =~ s/,\s+/,/g; $line =~ s/\s+,/,/g;
  132.     if ($line =~ /domain=/){ $line =~ s/domain=// ;
  133.       $line =~ s/( +\n|\n)//g;
  134.       $line =~ s/ +/_/g;
  135.       next if( $line =~ /[^[:ascii:]]/);
  136.       $line = lc($line);
  137.       for my $a (split(",", $line)) {
  138.        next if ($a =~ /tevila/) ;
  139.        push @dom, $a ;
  140.       };
  141.     }
  142.     if ($line =~ /keywords_$lang\s*=/) {
  143.      $keyl=treate_keyword($line);
  144.      next if (!($line =~ /,/));
  145.      @l = treate_group($line);
  146.     };
  147.     if ($line =~ /keywords\s*=/) {
  148.      $keyu=treate_keyword($line);
  149.      next if (!($line =~ /,/));
  150.      @lu = treate_group($line);
  151.     }
  152.   }
  153.   close IN;
  154.   if (@l) { push @res, @l } else { push @res, @lu if (@lu) };
  155.   if ($keyl) { $keyw = $keyl } else { $keyw = $keyu if ($keyu); }
  156.   if( @dom ) {
  157.      for my $a (@dom) {
  158.       if ($a) {$ref->{$a} .= $keyw . "\n" if ($keyw) ;
  159.       }
  160.    }
  161.   }
  162.   @res;
  163. }
  164.  
  165. sub treate_group { my ($line) = @_ ;
  166.   $line=~ s/keywords_(\w+)\s*=\s*//g;
  167.   $line=~ s/keywords\s*=\s*//g;
  168.   $line=treate_accent($line);
  169.   $line =~ s/\./,/g;
  170.   my @k = split(',', $line);
  171.   my @tmp;
  172.   for my $la (@k) { $la =~ s/^\s+//g; $la =~ s/\s+$//g; $la=lc($la);
  173.     next if !($la);
  174.     next if !($la =~ / /);
  175.     push @tmp, "$la:$la," if ($la);
  176.   }
  177.   @tmp;
  178. }
  179.  
  180. sub treate_keyword { my ($line) = @_ ;
  181.   $line=~ s/keywords_(\w+)\s*=\s*//g;
  182.   $line=~ s/keywords\s*=\s*//g;
  183.   $line=treate_accent($line);
  184.   if (!($line =~ /,/)) {$line =~ tr / /,/ ;};
  185.   my @k = split(',', $line);
  186.   my $tmp;
  187.   for my $la (@k) {
  188.   $la =~ s/^\s+//g; $la =~ s/\s+$//g; $la=lc($la);
  189.   ##$la=~ s/($nokeyword)//g;
  190.   if ($la && length($la) > 2 ) {
  191.     if ($tmp) { $tmp .= "\n" . join("\n",split(',', $la))}
  192.       else
  193.       {$tmp = join("\n",split(',', $la))}
  194.   }
  195.   }
  196.   #$tmp=~ s/'/\\'/g if ($tmp);
  197.   $tmp;
  198. }
  199.  
  200.