Subversion Repositories wimsdev

Rev

Rev 11480 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

  1. #!/usr/bin/perl
  2.  
  3. use warnings;
  4. use strict;
  5. use Encode qw(encode decode);
  6. use lib ".";
  7. use search ('out', 'sortuniq', 'treate_accent', 'treate_language', 'treate_dict');
  8.  
  9. #use Text::Balanced qw (extract_bracketed extract_tagged);
  10. my @site_lang=treate_language ();
  11. my $dir='../../modules';
  12. my $site='../site/lists';
  13. my $ddir='domain';
  14. my $outputkeywords='tmp';
  15. my $dom_templ='domain/domain.template';
  16. my $dom_reverse='domain/reversedomain';
  17. my $dom_json='domain/domain.json';
  18. #$/ = undef;
  19. ###'arts','earth_sciences','history'
  20. my @DOMAIN=('biology','chemistry','history','informatics',
  21.   'language','mathematics','physics');
  22.  
  23. if (-e $dom_json) {
  24.    open LI, $dom_json;  my $text;
  25.    while(<LI>) {
  26.    next if (/^#|<--/) ; $text .= $_;
  27.    };
  28.    $text=~ s/(\[|\])//g;
  29.    $text=~ s/\'//g;
  30.    push @DOMAIN, split(",\n", $text);
  31. }
  32.  
  33. my %trad = ();
  34. ## should put all perl programm together
  35. ## read reversedomain
  36. if (-e $dom_reverse) {
  37.    open LI, $dom_reverse;
  38.    while(<LI>) {
  39.      my @s= split(':', $_);
  40.      my $k= $s[0]; $k =~ s/\s//g if ($k);
  41.      my $v= $s[1]; $v =~ s/\s//g if ($v);
  42.      $trad{$k}=$v if (($k) && ($v));
  43.    }
  44.   close LI
  45. }
  46.  
  47. for my $lang (@site_lang) {
  48.   my %Domain = ( ) ;
  49.   my $Domain=\%Domain;
  50.   my @KEYWORDS=();
  51.   if (-e "$site/A.$lang") {
  52.     open LI, "$site/A.$lang";
  53.     while(<LI>){ my $F=$_ ; chomp $F;
  54.       my $file= "$dir/$F/INDEX";
  55.       next if ($file=~/(adm|devel)\//) ;
  56.       my @keywords = treate_index ($file, $lang, \%Domain) ;
  57.       push @KEYWORDS, @keywords;
  58.    };
  59.      close LI;
  60.   } else {
  61.     for my $file (glob("$dir/*/*/*")) {
  62.     next if ($file=~/(adm|devel)\//) ;
  63.     my @keywords = treate_index ($file, $lang, \%Domain) ;
  64.     push @KEYWORDS, @keywords;
  65.    }
  66.  }
  67.  
  68. ### traite les groupes de mots cles
  69.  if (-e $dom_templ) {
  70.    open LI, $dom_templ;
  71.    while (<LI>) { my @m=split(":\n",$_);
  72.      for my $a (@m) { $a =~ s/_/ /g;
  73.      push @KEYWORDS, split(",", treate_group($a)) };
  74.    }
  75.    close LI;
  76.    };
  77. ### traite les groupes de mots domaine
  78.  my $file="domain/domain.$lang";
  79.   if (-e $file) {
  80.       open LI, $file;
  81.        while (<LI>) { s /,/\n/g; s /:/\n/g; s /\n+/\n/g;
  82.        my @m=split("\n",$_);
  83.      for my $a (@m) { $a =~ s/_/ /g;
  84.         push @KEYWORDS, split(",", treate_group($a)) };
  85.    }
  86.    close LI;
  87.    };
  88.  
  89.  out("wgrp/wgrp.$lang", join("\n", sortuniq( @KEYWORDS )))  if (@KEYWORDS);
  90.  
  91.  
  92.  my @list=();
  93.  my @ALL=();
  94.  my @ALL_WIMS=();
  95.  for my $d (@DOMAIN) {
  96.    if ($Domain{$d}) {
  97.     push @ALL, split("\n",$Domain{$d});
  98.     out("keywords/$d.$lang.tmp",
  99.      "\"" .
  100.      join("\",\n\"",sortuniq(split("\n",$Domain{$d})) )
  101.      . "'"
  102.      );
  103.     push @list, $d ;
  104.     };
  105.  }
  106.  next if !(-e "$ddir/domain.$lang");
  107.  my %dom = treate_dict ("$ddir/domain.$lang");
  108.  while ( my ($key, $value) = each(%dom) ) {
  109.    push @ALL, split(",", lc(treate_accent($value)));
  110.    push @ALL_WIMS, split(",", lc(treate_accent($value)));
  111.  }
  112.  out("keywords/list.$lang", join(",",sortuniq(@list)));
  113. ##only official keywords
  114.  out("$outputkeywords/keywords.$lang.json",
  115.      "[\"" . join("\",\n\"",sortuniq(@ALL_WIMS) ) . "\"]"
  116. );
  117. ##all keywords
  118. out("$outputkeywords/all.$lang.json",
  119.      "[\"" . join("\",\n\"",sortuniq(@ALL) ) . "\"]"
  120. );
  121.  
  122. }
  123.  
  124. ######################################################
  125.  
  126. sub treate_index { my ($file, $lang, $ref) = @_;
  127.  my @res = (); my @lu = (); my @l = (); my @dom = ();
  128.  my $keyl ='' ; my $keyw=''; my $keyu;
  129.  open(IN, $file) ;
  130.   while (<IN>) { my $line = $_;
  131.     next if !($line =~ /keywords|domain/) ;
  132.     $line =~ s/,\s+/,/g; $line =~ s/\s+,/,/g;
  133.     if ($line =~ /domain=/){ $line =~ s/domain=// ;
  134.       $line =~ s/( +\n|\n)//g;
  135.       $line =~ s/ +/_/g;
  136.       next if( $line =~ /[^[:ascii:]]/);
  137.       $line = lc($line);
  138.       for my $a (split(",", $line)) {
  139.        next if ($a =~ /tevila/) ;
  140.        push @dom, $a ;
  141.       };
  142.     }
  143.     if ($line =~ /keywords_$lang\s*=/) {
  144.      $keyl=treate_keyword($line);
  145.      next if (!($line =~ /,/));
  146.      @l = treate_group($line);
  147.     };
  148.     if ($line =~ /keywords\s*=/) {
  149.      $keyu=treate_keyword($line);
  150.      next if (!($line =~ /,/));
  151.      @lu = treate_group($line);
  152.     }
  153.   }
  154.   close IN;
  155.   if (@l) { push @res, @l } else { push @res, @lu if (@lu) };
  156.   if ($keyl) { $keyw = $keyl } else { $keyw = $keyu if ($keyu); }
  157.   if( @dom ) {
  158.      for my $a (@dom) {
  159.       if ($a) {$ref->{$a} .= $keyw . "\n" if ($keyw) ;
  160.       }
  161.    }
  162.   }
  163.   @res;
  164. }
  165.  
  166. sub treate_group { my ($line) = @_ ;
  167.   $line=~ s/keywords_(\w+)\s*=\s*//g;
  168.   $line=~ s/keywords\s*=\s*//g;
  169.   $line=treate_accent($line);
  170.   $line =~ s/\./,/g;
  171.   my @k = split(',', $line);
  172.   my @tmp;
  173.   for my $la (@k) { $la =~ s/^\s+//g; $la =~ s/\s+$//g; $la=lc($la);
  174.     next if !($la);
  175.     next if !($la =~ / /);
  176.     push @tmp, "$la:$la," if ($la);
  177.   }
  178.   @tmp;
  179. }
  180.  
  181. sub treate_keyword { my ($line) = @_ ;
  182.   $line=~ s/keywords_(\w+)\s*=\s*//g;
  183.   $line=~ s/keywords\s*=\s*//g;
  184.   $line=treate_accent($line);
  185.   if (!($line =~ /,/)) {$line =~ tr / /,/ ;};
  186.   my @k = split(',', $line);
  187.   my $tmp;
  188.   for my $la (@k) {
  189.   $la =~ s/^\s+//g; $la =~ s/\s+$//g; $la=lc($la);
  190.   ##$la=~ s/($nokeyword)//g;
  191.   if ($la && length($la) > 2 ) {
  192.     if ($tmp) { $tmp .= "\n" . join("\n",split(',', $la))}
  193.       else
  194.       {$tmp = join("\n",split(',', $la))}
  195.   }
  196.   }
  197.   #$tmp=~ s/'/\\'/g if ($tmp);
  198.   $tmp;
  199. }
  200.  
  201.