Subversion Repositories wimsdev

Rev

Rev 16933 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
11480 schaersvoo 1
#!/usr/bin/perl
6395 bpr 2
 
3
use warnings;
4
use strict;
6407 bpr 5
use Encode qw(encode decode);
18106 georgesk 6
use lib '/var/lib/wims/public_html/bases/sys';
16933 czzmrn 7
use lib ".";
7016 bpr 8
use search ('out', 'sortuniq', 'treate_accent', 'treate_language', 'treate_dict');
6395 bpr 9
 
6519 bpr 10
#use Text::Balanced qw (extract_bracketed extract_tagged);
7016 bpr 11
my @site_lang=treate_language ();
6406 bpr 12
my $dir='../../modules';
13
my $site='../site/lists';
6879 bpr 14
my $ddir='domain';
6935 bpr 15
my $outputkeywords='tmp';
6410 bpr 16
my $dom_templ='domain/domain.template';
6453 bpr 17
my $dom_reverse='domain/reversedomain';
6460 bpr 18
my $dom_json='domain/domain.json';
6395 bpr 19
#$/ = undef;
6460 bpr 20
###'arts','earth_sciences','history'
21
my @DOMAIN=('biology','chemistry','history','informatics',
22
  'language','mathematics','physics');
6521 bpr 23
 
6460 bpr 24
if (-e $dom_json) {
25
   open LI, $dom_json;  my $text;
6879 bpr 26
   while(<LI>) {
6488 bpr 27
   next if (/^#|<--/) ; $text .= $_;
6460 bpr 28
   };
29
   $text=~ s/(\[|\])//g;
30
   $text=~ s/\'//g;
31
   push @DOMAIN, split(",\n", $text);
32
}
6395 bpr 33
 
6460 bpr 34
my %trad = ();
6453 bpr 35
## should put all perl programm together
36
## read reversedomain
37
if (-e $dom_reverse) {
6879 bpr 38
   open LI, $dom_reverse;
6453 bpr 39
   while(<LI>) {
40
     my @s= split(':', $_);
41
     my $k= $s[0]; $k =~ s/\s//g if ($k);
42
     my $v= $s[1]; $v =~ s/\s//g if ($v);
43
     $trad{$k}=$v if (($k) && ($v));
44
   }
45
  close LI
46
}
6395 bpr 47
 
6879 bpr 48
for my $lang (@site_lang) {
6395 bpr 49
  my %Domain = ( ) ;
50
  my $Domain=\%Domain;
51
  my @KEYWORDS=();
6406 bpr 52
  if (-e "$site/A.$lang") {
53
    open LI, "$site/A.$lang";
6879 bpr 54
    while(<LI>){ my $F=$_ ; chomp $F;
6406 bpr 55
      my $file= "$dir/$F/INDEX";
56
      next if ($file=~/(adm|devel)\//) ;
6519 bpr 57
      my @keywords = treate_index ($file, $lang, \%Domain) ;
6406 bpr 58
      push @KEYWORDS, @keywords;
59
   };
60
     close LI;
61
  } else {
6879 bpr 62
    for my $file (glob("$dir/*/*/*")) {
6395 bpr 63
    next if ($file=~/(adm|devel)\//) ;
6519 bpr 64
    my @keywords = treate_index ($file, $lang, \%Domain) ;
6395 bpr 65
    push @KEYWORDS, @keywords;
6406 bpr 66
   }
67
 }
6879 bpr 68
 
6460 bpr 69
### traite les groupes de mots cles
6410 bpr 70
 if (-e $dom_templ) {
71
   open LI, $dom_templ;
72
   while (<LI>) { my @m=split(":\n",$_);
73
     for my $a (@m) { $a =~ s/_/ /g;
8425 bpr 74
     push @KEYWORDS, split(",", treate_group($a)) };
6410 bpr 75
   }
76
   close LI;
6417 bpr 77
   };
6460 bpr 78
### traite les groupes de mots domaine
6417 bpr 79
 my $file="domain/domain.$lang";
80
  if (-e $file) {
81
      open LI, $file;
6879 bpr 82
       while (<LI>) { s /,/\n/g; s /:/\n/g; s /\n+/\n/g;
6417 bpr 83
       my @m=split("\n",$_);
84
     for my $a (@m) { $a =~ s/_/ /g;
8425 bpr 85
        push @KEYWORDS, split(",", treate_group($a)) };
6410 bpr 86
   }
6417 bpr 87
   close LI;
88
   };
6460 bpr 89
 
6395 bpr 90
 out("wgrp/wgrp.$lang", join("\n", sortuniq( @KEYWORDS )))  if (@KEYWORDS);
6879 bpr 91
 
92
 
6488 bpr 93
 my @list=();
6521 bpr 94
 my @ALL=();
8717 bpr 95
 my @ALL_WIMS=();
6460 bpr 96
 for my $d (@DOMAIN) {
6488 bpr 97
   if ($Domain{$d}) {
6521 bpr 98
    push @ALL, split("\n",$Domain{$d});
6488 bpr 99
    out("keywords/$d.$lang.tmp",
6879 bpr 100
     "\"" .
101
     join("\",\n\"",sortuniq(split("\n",$Domain{$d})) )
6488 bpr 102
     . "'"
6521 bpr 103
     );
6488 bpr 104
    push @list, $d ;
105
    };
6460 bpr 106
 }
6879 bpr 107
 next if !(-e "$ddir/domain.$lang");
7016 bpr 108
 my %dom = treate_dict ("$ddir/domain.$lang");
6894 bpr 109
 while ( my ($key, $value) = each(%dom) ) {
8717 bpr 110
   push @ALL, split(",", lc(treate_accent($value)));
111
   push @ALL_WIMS, split(",", lc(treate_accent($value)));
112
 }
6488 bpr 113
 out("keywords/list.$lang", join(",",sortuniq(@list)));
8717 bpr 114
##only official keywords
6935 bpr 115
 out("$outputkeywords/keywords.$lang.json",
8717 bpr 116
     "[\"" . join("\",\n\"",sortuniq(@ALL_WIMS) ) . "\"]"
117
);
118
##all keywords
119
out("$outputkeywords/all.$lang.json",
6935 bpr 120
     "[\"" . join("\",\n\"",sortuniq(@ALL) ) . "\"]"
6521 bpr 121
);
122
 
6395 bpr 123
}
6488 bpr 124
 
6519 bpr 125
######################################################
6488 bpr 126
 
6519 bpr 127
sub treate_index { my ($file, $lang, $ref) = @_;
6395 bpr 128
 my @res = (); my @lu = (); my @l = (); my @dom = ();
6406 bpr 129
 my $keyl ='' ; my $keyw=''; my $keyu;
6395 bpr 130
 open(IN, $file) ;
6879 bpr 131
  while (<IN>) { my $line = $_;
6395 bpr 132
    next if !($line =~ /keywords|domain/) ;
133
    $line =~ s/,\s+/,/g; $line =~ s/\s+,/,/g;
134
    if ($line =~ /domain=/){ $line =~ s/domain=// ;
135
      $line =~ s/( +\n|\n)//g;
6879 bpr 136
      $line =~ s/ +/_/g;
6395 bpr 137
      next if( $line =~ /[^[:ascii:]]/);
138
      $line = lc($line);
6489 bpr 139
      for my $a (split(",", $line)) {
6395 bpr 140
       next if ($a =~ /tevila/) ;
141
       push @dom, $a ;
142
      };
143
    }
6406 bpr 144
    if ($line =~ /keywords_$lang\s*=/) {
6395 bpr 145
     $keyl=treate_keyword($line);
146
     next if (!($line =~ /,/));
6406 bpr 147
     @l = treate_group($line);
6395 bpr 148
    };
6406 bpr 149
    if ($line =~ /keywords\s*=/) {
6395 bpr 150
     $keyu=treate_keyword($line);
151
     next if (!($line =~ /,/));
6406 bpr 152
     @lu = treate_group($line);
6395 bpr 153
    }
154
  }
155
  close IN;
156
  if (@l) { push @res, @l } else { push @res, @lu if (@lu) };
6406 bpr 157
  if ($keyl) { $keyw = $keyl } else { $keyw = $keyu if ($keyu); }
6395 bpr 158
  if( @dom ) {
159
     for my $a (@dom) {
6491 bpr 160
      if ($a) {$ref->{$a} .= $keyw . "\n" if ($keyw) ;
6395 bpr 161
      }
162
   }
163
  }
164
  @res;
165
}
166
 
6406 bpr 167
sub treate_group { my ($line) = @_ ;
168
  $line=~ s/keywords_(\w+)\s*=\s*//g;
169
  $line=~ s/keywords\s*=\s*//g;
6935 bpr 170
  $line=treate_accent($line);
171
  $line =~ s/\./,/g;
6395 bpr 172
  my @k = split(',', $line);
173
  my @tmp;
174
  for my $la (@k) { $la =~ s/^\s+//g; $la =~ s/\s+$//g; $la=lc($la);
6406 bpr 175
    next if !($la);
6395 bpr 176
    next if !($la =~ / /);
177
    push @tmp, "$la:$la," if ($la);
178
  }
179
  @tmp;
180
}
181
 
6406 bpr 182
sub treate_keyword { my ($line) = @_ ;
183
  $line=~ s/keywords_(\w+)\s*=\s*//g;
184
  $line=~ s/keywords\s*=\s*//g;
185
  $line=treate_accent($line);
6395 bpr 186
  if (!($line =~ /,/)) {$line =~ tr / /,/ ;};
187
  my @k = split(',', $line);
188
  my $tmp;
6521 bpr 189
  for my $la (@k) {
6491 bpr 190
  $la =~ s/^\s+//g; $la =~ s/\s+$//g; $la=lc($la);
6521 bpr 191
  ##$la=~ s/($nokeyword)//g;
6935 bpr 192
  if ($la && length($la) > 2 ) {
6879 bpr 193
    if ($tmp) { $tmp .= "\n" . join("\n",split(',', $la))}
6491 bpr 194
      else
195
      {$tmp = join("\n",split(',', $la))}
6395 bpr 196
  }
6491 bpr 197
  }
6727 bpr 198
  #$tmp=~ s/'/\\'/g if ($tmp);
6395 bpr 199
  $tmp;
200
}
201