Subversion Repositories wimsdev

Rev

Rev 7915 | Rev 8149 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

  1. /*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
  2.  *
  3.  *  This program is free software; you can redistribute it and/or modify
  4.  *  it under the terms of the GNU General Public License as published by
  5.  *  the Free Software Foundation; either version 2 of the License, or
  6.  *  (at your option) any later version.
  7.  *
  8.  *  This program is distributed in the hope that it will be useful,
  9.  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  10.  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11.  *  GNU General Public License for more details.
  12.  *
  13.  *  You should have received a copy of the GNU General Public License
  14.  *  along with this program; if not, write to the Free Software
  15.  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16.  */
  17.  
  18. /*  This is an internal program,
  19.  * used to index modules for search engine.
  20.  */
  21.  
  22. #include "../wims.h"
  23. #include "../Lib/libwims.h"
  24.  
  25. #define MAX_LANGS    MAX_LANGUAGES
  26. #define MAX_MODULES    65536
  27. char *moduledir=    "public_html/modules";
  28. char *sheetdir=     "public_html/bases/sheet";
  29. char *dicdir=       "public_html/bases";
  30. char *outdir=       "public_html/bases/site2";
  31. char *maindic=      "sys/words";
  32. char *groupdic=     "sys/wgrp/wgrp";
  33. char *suffixdic=    "sys/suffix";
  34. char *domaindic=    "sys/domaindic";
  35. char *ignoredic=    "sys/indignore";
  36. char *conffile=     "log/wims.conf";
  37. char *mlistbase=    "list";
  38.  
  39. char lang[MAX_LANGS][4]={
  40.     "en","fr","cn","es","it","nl","si","ca","pt"
  41. };
  42. #define DEFAULT_LANGCNT    6
  43. char allang[MAX_LANGS][4]={
  44.     "en","fr","cn","es","it","nl","de","si","ca","pt"
  45. };
  46. #define allangcnt 8
  47. char ignore[MAX_LANGS][MAX_LINELEN+1];
  48. char mlistfile[MAX_LANGS][256];
  49. int langcnt;
  50. FILE *langf, *titf, *descf, *weightf, *robotf, *indf, *listf, *addrf, *serialf, *authorf, *versionf, *remf;
  51.  
  52. struct cat {
  53.     char *name;
  54.     char typ;
  55. } cat[]={
  56.     {"all_types", 'A'},
  57.     {"exercise",  'X'},
  58.     {"oef",       'O'},
  59.     {"tool",      'T'},
  60.     {"recreation",'R'},
  61.     {"reference", 'Y'},
  62.     {"document",  'D'},
  63.     {"popup",     'P'},
  64.     {"datamodule",'M'}
  65. };
  66. #define catno (sizeof(cat)/sizeof(cat[0]))
  67.  
  68. struct mod {
  69.     char *name;
  70.     unsigned char langs[MAX_LANGS];
  71.     int counts[MAX_LANGS];
  72.     int  langcnt;
  73. } mod[MAX_MODULES];
  74. int modcnt;
  75.  
  76. char *mlist;
  77.  
  78. /*
  79. void *xmalloc(size_t n)
  80. {
  81.     void *p;
  82.     p=malloc(n);
  83.     if(p==NULL) {
  84.     printf("Malloc failure.\n");
  85.     exit(1);
  86.     }
  87.     return p;
  88. }
  89. */
  90.  
  91. /*
  92. char *acctab="çéèêëúùûüáàâäãóòôöõíìïîñýÿÇÉÈÊËÚÙÛÜÁÀÂÃÄÓÒÔÖÕÍÌÏÎÑÝ",
  93.      *deatab="ceeeeuuuuaaaaaoooooiiiinyyCEEEEUUUUAAAAAOOOOOIIIINY";
  94. */
  95. /*  fold known accented letters to unaccented, other strange characters to space
  96.  *  apostrophe is among the exceptions to be kept (important for multi-word expressions)
  97.  */
  98. void deaccent2(char *p)
  99. {
  100.     char *sp;
  101.     char *v;
  102.     for(sp=p;*sp;sp++) {
  103.     if(*sp<0 && (v=strchr(acctab,*sp))!=NULL)
  104.       *sp=*(deatab+(v-acctab));
  105.     if(!isalnum(*sp) && strchr(",.&$+*",*sp)==0) *sp=' ';
  106.     else *sp=tolower(*sp);
  107.     }
  108. }
  109.  
  110. /*  translate everything non-alphanumeric into space */
  111. void towords(char *p)
  112. {
  113.     char *pp;
  114.     for(pp=p;*pp;pp++) if(!isalnum(*pp) && strchr("&$+*",*pp)==0) *pp=' ';
  115. }
  116.  
  117. /*  Points to the end of the word */
  118. /*
  119. char *find_word_end(char *p)
  120. {
  121.     int i;
  122.     for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++);
  123.     return p;
  124. }
  125. */
  126. /*  Strips leading spaces */
  127. /*
  128. char *find_word_start(char *p)
  129. {
  130.     int i;
  131.     for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++);
  132.     return p;
  133. }
  134. */
  135. /*  Find first occurrence of word */
  136. char *wordchr2(char *p, char *w)
  137. {
  138.     char *r;
  139.  
  140.     for(r=strstr(p,w);r!=NULL &&
  141.     ( (r>p && !isspace(*(r-1))) || (!isspace(*(r+strlen(w))) && *(r+strlen(w))!=0) );
  142.     r=strstr(r+1,w));
  143.     return r;
  144. }
  145.  
  146. /*  find a variable in a string (math expression).
  147.  * Returns the pointer or NULL.
  148.  */
  149. /*char *varchr(char *p, char *v)
  150. {
  151.     char *pp; int n=strlen(v);
  152.     for(pp=strstr(p,v); pp!=NULL; pp=strstr(pp+1,v)) {
  153.     if((pp==p || !isalnum(*(pp-1))) &&
  154.        (!isalnum(*(pp+n)) || *(pp+n)==0)) break;
  155.     }
  156.     return pp;
  157. }
  158. */
  159. /*  strip trailing spaces; return string end. */
  160. char *strip_trailing_spaces2(char *p)
  161. {
  162.     char *pp;
  163.     if(*p==0) return p;
  164.     for(pp=p+strlen(p)-1; pp>=p && isspace(*pp); *(pp--)=0);
  165.     return pp;
  166. }
  167.  
  168. char *find_tag_end(char *p)
  169. {
  170.     char *pp;
  171.     pp=p; if(*pp=='<') pp++;
  172.     for(; *pp && *pp!='>'; pp++) {
  173.     if(*pp=='<') {
  174.         pp=find_tag_end(pp)-1; continue;
  175.     }
  176.     if(*pp=='"') {
  177.         pp=strchr(pp+1,'"');
  178.         if(pp==NULL) return p+strlen(p); else continue;
  179.     }
  180.     if(*pp=='\'') {
  181.         pp=strchr(pp+1,'\'');
  182.         if(pp==NULL) return p+strlen(p); else continue;
  183.     }
  184.     }
  185.     if(*pp=='>') pp++; return pp;
  186. }
  187.  
  188. char *find_tag(char *p, char *tag)
  189. {
  190.     char *pp;
  191.     int len;
  192.     len=strlen(tag);
  193.     for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) {
  194.     if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp;
  195.     }
  196.     return p+strlen(p);
  197. }
  198.  
  199. /*  remove all html tags */
  200. void detag(char *p)
  201. {
  202.     char *pp, *p2;
  203.     for(pp=strchr(p,'<'); pp!=NULL; pp=strchr(pp,'<')) {
  204.     p2=find_tag_end(pp);
  205.     if(*p2==0) {*pp=0; return; }
  206.     ovlstrcpy(pp,p2);
  207.     }
  208. }
  209.  
  210. /*  modify a string. Bufferlen must be at least MAX_LINELEN */
  211. void string_modify3(char *start, char *bad_beg, char *bad_end, char *good,...)
  212. {
  213.     char buf[MAX_LINELEN+1];
  214.     va_list vp;
  215.  
  216.     va_start(vp,good);
  217.     vsnprintf(buf,sizeof(buf),good,vp); va_end(vp);
  218.     if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN)
  219.       return; /* this is an error situation. */
  220.     strcat(buf,bad_end);
  221.     ovlstrcpy(bad_beg,buf);
  222. }
  223.  
  224. /* add a space after comma to see end of words */
  225.  
  226. void comma(char *p)
  227. {
  228.     char *pp;
  229.     for(pp=strchr(p,','); pp; pp=strchr(pp+1,','))
  230.       string_modify3(p,pp,pp+1,", ");
  231. }
  232.  
  233. void _getdef(char buf[], char *name, char value[])
  234. {
  235.     char *p1, *p2, *p3;
  236.  
  237.     value[0]=0;
  238.     for(p1=strstr(buf,name); p1!=NULL; p1=strstr(p1+1,name)) {
  239.     p2=find_word_start(p1+strlen(name));
  240.     if((p1>buf && !isspace(*(p1-1))) || *p2!='=') continue;
  241.     p3=p1; while(p3>buf && isspace(*(p3-1)) && *(p3-1)!='\n') p3--;
  242.     if(p3>buf && *(p3-1)!='\n') continue;
  243.     p3=strchr(p2,'\n');
  244.     p2=find_word_start(p2+1);
  245.     if(p3 <= p2) continue;
  246.     snprintf(value,MAX_LINELEN,"%s",p2);
  247.     if(p3!=NULL && p3-p2<MAX_LINELEN) value[p3-p2]=0;
  248.     strip_trailing_spaces2(value);
  249.     break;
  250.     }
  251. }
  252.  
  253. /*  Get variable definition from a file.
  254.  * Result stored in buffer value of length MAX_LINELEN.
  255.  */
  256. void getdef(char *fname, char *name, char value[])
  257. {
  258.     FILE *f;
  259.     char *buf;
  260.     int l;
  261.  
  262.     value[0]=0;
  263.     f=fopen(fname,"r"); if(f==NULL) return;
  264.     fseek(f,0,SEEK_END); l=ftell(f); fseek(f,0,SEEK_SET);
  265.     buf=xmalloc(l+256); l=fread(buf,1,l,f);
  266.     fclose(f);
  267.     if(l<=0) return; else buf[l]=0;
  268.     _getdef(buf,name,value);
  269.     free(buf);
  270. }
  271.  
  272. #include "translator_.c"
  273.  
  274. char *mdicbuf, *gdicbuf, *ddicbuf;
  275. char gentry[sizeof(entry)], mentry[sizeof(entry)], dentry[sizeof(entry)];
  276. int gentrycount, mentrycount, dentrycount;
  277.  
  278. /*  Preparation of data */
  279. void prep(void)
  280. {
  281.     char buf[MAX_LINELEN+1];
  282.     char *p1,*p2,*s,*old;
  283.     int i,l,thislang,t;
  284.     FILE *f;
  285.  
  286.     s=getenv("modind_outdir"); if(s!=NULL && *s!=0) outdir=s;
  287.     s=getenv("modind_sheetdir"); if(s!=NULL && *s!=0) sheetdir=s;
  288.     snprintf(buf,sizeof(buf),"%s/addr",outdir);
  289.     addrf=fopen(buf,"w");
  290.     snprintf(buf,sizeof(buf),"%s/serial",outdir);
  291.     serialf=fopen(buf,"w");
  292.     modcnt=langcnt=0;
  293. /* take the langs declared in conffile */
  294.     getdef(conffile,"site_languages",buf);
  295.     for(p1=buf;*p1;p1++) if(!isalnum(*p1)) *p1=' ';
  296.     for(p1=find_word_start(buf); *p1 && langcnt<MAX_LANGS; p1=find_word_start(p2)) {
  297.     p2=find_word_end(p1);
  298.     if(p2!=p1+2 || !isalpha(*p1) || !isalpha(*(p1+1))) continue;
  299.     memmove(lang[langcnt],p1,2); lang[langcnt++][2]=0;
  300.     }
  301.     if(langcnt==0) {/*  default languages */
  302.     langcnt=DEFAULT_LANGCNT;
  303.     }
  304.     s=getenv("mlist"); if(s==NULL) exit(1);
  305.     l=strlen(s); if(l<0 || l>100*MAX_LINELEN) exit(1);
  306.     mlist=xmalloc(l+16); ovlstrcpy(mlist,s); old="";
  307.     for(i=0;i<langcnt;i++) {
  308.     snprintf(buf,sizeof(buf),"%s/%s.%s",dicdir,ignoredic,lang[i]);
  309.     f=fopen(buf,"r"); if(f==NULL) continue;
  310.     l=fread(ignore[i],1,MAX_LINELEN,f);fclose(f);
  311.     if(l<0 || l>=MAX_LINELEN) l=0;
  312.     ignore[i][l]=0;
  313.     }
  314.     for(t=0, p1=find_word_start(mlist);
  315.     *p1 && modcnt<MAX_MODULES;
  316.     p1=find_word_start(p2), t++) {
  317.     p2=find_word_end(p1);
  318.     l=p2-p1; if(*p2) *p2++=0;
  319.     fprintf(addrf,"%d:%s\n",t,p1);
  320.     fprintf(serialf,"%s:%d\n",p1,t);
  321.     thislang=-1;
  322. /* language is taken from the address */
  323.     if(l>3 && p1[l-3]=='.') {
  324.         for(i=0;i<langcnt;i++) if(strcasecmp(lang[i],p1+l-2)==0) break;
  325.         if(i<langcnt) {p1[l-3]=0; thislang=i;}
  326.         else {/*  unknown language, not referenced */
  327.         continue;
  328.         }
  329.     }
  330.     if(modcnt>0 && strcmp(old,p1)==0 && thislang>=0) {
  331.         if(mod[modcnt-1].langcnt<langcnt) {
  332.         mod[modcnt-1].langs[mod[modcnt-1].langcnt]=thislang;
  333.         mod[modcnt-1].counts[mod[modcnt-1].langcnt]=t;
  334.         (mod[modcnt-1].langcnt)++;
  335.         }
  336.     }
  337.     else {
  338.         mod[modcnt].name=old=p1;
  339.         if(thislang>=0) {
  340.         mod[modcnt].langs[0]=thislang;
  341.         mod[modcnt].langcnt=1;
  342.         }
  343.         else mod[modcnt].langcnt=0;
  344.         mod[modcnt].counts[0]=t;
  345.         modcnt++;
  346.     }
  347.     }
  348.     snprintf(buf,sizeof(buf),"%s/language",outdir);
  349.     langf=fopen(buf,"w");
  350.     snprintf(buf,sizeof(buf),"%s/title",outdir);
  351.     titf=fopen(buf,"w");
  352.     snprintf(buf,sizeof(buf),"%s/description",outdir);
  353.     descf=fopen(buf,"w");
  354.     snprintf(buf,sizeof(buf),"%s/author",outdir);
  355.     authorf=fopen(buf,"w");
  356.     snprintf(buf,sizeof(buf),"%s/version",outdir);
  357.     versionf=fopen(buf,"w");
  358.     snprintf(buf,sizeof(buf),"%s/lists/robot.phtml",outdir);
  359.     robotf=fopen(buf,"w");
  360.     fclose(addrf); fclose(serialf);
  361.     if(!robotf || !versionf || !authorf || !descf || !titf || !descf) {
  362.     fprintf(stderr,"modind: error creating output files.\n");
  363.     exit(1);
  364.     }
  365. }
  366.  
  367. void sprep(void)
  368. {
  369.     char *p1,*p2,*s;
  370.     int i,l,thislang;
  371.  
  372.     modcnt=0;
  373.     s=getenv("slist"); if(s==NULL) return;
  374.     l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return;
  375.     mlist=xmalloc(l+16); ovlstrcpy(mlist,s);
  376.     for(p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2)) {
  377.     p2=find_word_end(p1);
  378.     l=p2-p1; if(*p2) *p2++=0;
  379.     for(i=0;i<langcnt;i++) if(strncasecmp(lang[i],p1,2)==0) break;
  380.     if(i<langcnt) thislang=i; else continue;
  381.     mod[modcnt].name=p1;
  382.     mod[modcnt].langs[0]=thislang;
  383.     mod[modcnt].langcnt=1;
  384.     modcnt++;
  385.     }
  386. }
  387.  
  388. void clean(void)
  389. {
  390.     fclose(langf); fclose(titf); fclose(descf); fclose(robotf);
  391.     fclose(authorf); fclose(versionf);
  392. }
  393.  
  394. char *sheetindex[]={
  395.       "title", "description",
  396.       "duration", "severity",
  397.       "level", "domain",
  398.       "keywords", "reserved1", "reserved2", "information"
  399. };
  400. #define SHEETINDEX_NO (sizeof(sheetindex)/sizeof(sheetindex[0]))
  401. char sindbuf[SHEETINDEX_NO][MAX_LINELEN+1];
  402. enum{s_title, s_description,
  403.       s_duration, s_severity,
  404.       s_level, s_domain,
  405.       s_keywords, s_reserved1, s_reserved2,
  406.       s_information
  407. };
  408.  
  409. char *modindex[]={
  410.       "title", "description",
  411.       "author", "address", "copyright",
  412.       "version", "wims_version", "language",
  413.       "category", "level", "domain", "keywords",
  414.       "keywords_ca", "keywords_en", "keywords_fr", "keywords_it", "keywords_nl",
  415.       "title_ca", "title_en", "title_fr", "title_it", "title_nl",
  416.       "require"
  417. };
  418. #define MODINDEX_NO (sizeof(modindex)/sizeof(modindex[0]))
  419. char indbuf[MODINDEX_NO][MAX_LINELEN+1];
  420. enum{i_title, i_description,
  421.       i_author,i_address,i_copyright,
  422.       i_version,i_wims_version,i_language,
  423.       i_category,i_level,i_domain,i_keywords,
  424.       i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
  425.       i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl,
  426.       i_require
  427. };
  428.  
  429. char *module_special_file[]={
  430.     "intro","help","about"
  431. };
  432. #define MODSPEC_NO (sizeof(module_special_file)/sizeof(module_special_file[0]))
  433. char module_language[4];
  434.  
  435. /*  read and treat module's INDEX file */
  436. int module_index(const char *name)
  437. {
  438.     char *p, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
  439.     FILE *indf;
  440.     int i,l;
  441.  
  442.     snprintf(fbuf,sizeof(fbuf),"%s/%s/INDEX",moduledir,name);
  443.     indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
  444.     l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
  445.     if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
  446. /* treate all fields in *modindex */
  447.     for(i=0;i<MODINDEX_NO;i++) {
  448.     _getdef(ibuf,modindex[i],indbuf[i]);
  449. /*  compatibility precaution */
  450.     if(indbuf[i][0]==':') indbuf[i][0]='.';
  451.     }
  452.     p=find_word_start(indbuf[i_language]);
  453.     if(isalpha(*p) && isalpha(*(p+1))) {
  454.     memmove(module_language,p,2); module_language[2]=0;
  455.     }
  456.     else ovlstrcpy(module_language,"en");
  457.     return 0;
  458. }
  459.  
  460. int sheet_index(int serial)
  461. {
  462.     char *p1, *p2, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
  463.     FILE *indf;
  464.     int i,l;
  465.  
  466.     snprintf(fbuf,sizeof(fbuf),"%s/%s.def",sheetdir,mod[serial].name);
  467.     indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
  468.     l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
  469.     if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
  470.     for(i=0;i<SHEETINDEX_NO;i++) sindbuf[i][0]=0;
  471.     for(i=0,p1=find_word_start(ibuf);
  472.     i<SHEETINDEX_NO-1 && *p1!=':' && *p1!=0;
  473.     i++,p1=p2) {
  474.     p2=strchr(p1,'\n');
  475.     if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
  476.     p1=find_word_start(p1); strip_trailing_spaces2(p1);
  477.     snprintf(sindbuf[i],MAX_LINELEN,"%s",p1);
  478.     }
  479.     p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1);
  480.     else *p2=0;
  481.     p1=find_word_start(p1); strip_trailing_spaces2(p1);
  482.     for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' ';
  483.     ovlstrcpy(sindbuf[s_information],p1);
  484.     return 0;
  485. }
  486.  
  487. unsigned char categories[16];
  488. char taken[MAX_LINELEN+1];
  489. int catcnt, takenlen, tweight;
  490.  
  491. void appenditem(char *word, int lind, int serial, int weight, char *l)
  492. {
  493.     char nbuf[MAX_LINELEN+1], buf[MAX_LINELEN+1];
  494.     int i, ll;
  495.     char *p;
  496.     FILE *f;
  497.  
  498.     if(!isalnum(*word) || (ll=strlen(word))<2 ||
  499.        wordchr2(taken,word)!=NULL ||
  500.        wordchr2(ignore[lind],word)!=NULL ||
  501.        takenlen>=MAX_LINELEN-ll-16)
  502.       return;
  503.     if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
  504.     for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
  505.     taken[takenlen++]=' '; taken[takenlen++]=' ';
  506.     ovlstrcpy(taken+takenlen,word);
  507.     takenlen+=ll; tweight+=weight;
  508.     snprintf(buf,sizeof(buf),"%s:%d?%d\n",word,serial,weight);
  509.     for(i=0;i<catcnt;i++) {
  510.     snprintf(nbuf,sizeof(nbuf),"%s/%c.%s",
  511.          outdir,categories[i],lang[lind]);
  512.     f=fopen(nbuf,"a");
  513.     if(f!=NULL) {fputs(buf,f); fclose(f);}
  514.     }
  515. }
  516.  
  517. void appenditem1 (char *buf, int lind, int serial, int weight, char *l )
  518. {
  519.   char *p1, *p2 ;
  520.   for(p1=find_word_start(buf); *p1;
  521.     p1=find_word_start(p2)) {
  522.     p2=strchr(p1,',');
  523.     if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
  524.     if(strlen(p1)<=0) continue;
  525.     appenditem(p1,lind,serial,weight,module_language);
  526.   }
  527. }
  528. void appenditem2 (char *buf, int lind, int serial, int weight, char *l )
  529. {
  530.   char *p1, *p2 ;
  531.   for(p1=find_word_start(buf);*p1;
  532.     p1=find_word_start(p2)) {
  533.     p2=find_word_end(p1); if(*p2) *p2++=0;
  534.     appenditem(p1,lind,serial,weight,module_language);
  535.   }
  536. }
  537. void onemodule(const char *name, int serial, int lind)
  538. {
  539.     int i;
  540.     unsigned char trlist[]={
  541.     i_title,i_description,i_category,i_domain,i_keywords,
  542.       i_require,i_author,
  543.       i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
  544.       i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl
  545.     };
  546.     #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
  547.     char *p1, *p2, *pp, *q, buf[MAX_LINELEN+1], lbuf[16];
  548.     FILE *f;
  549.  
  550.     if(module_index(name)) return;
  551.     towords(indbuf[i_category]);
  552. /*   list the categories (among A=all,X=eXercise,O,D,...) corresponding
  553.  *   to this module
  554.  */
  555.     for(i=catcnt=0;i<catno && catcnt<16;i++) {
  556.     if(wordchr2(indbuf[i_category],cat[i].name)!=NULL)
  557.       categories[catcnt++]=cat[i].typ;
  558.     }
  559.     if(catcnt==0) return;
  560.     if(categories[0]!=cat[0].typ)
  561.       categories[catcnt++]=cat[0].typ;
  562. /*  write module's name in the category.language files, for instance lists/X.fr
  563.  * for french exercises
  564.  */
  565.     for(i=0;i<catcnt;i++) {
  566.     snprintf(buf,sizeof(buf),"%s/lists/%c.%s",
  567.          outdir,categories[i],lang[lind]);
  568.     f=fopen(buf,"a");
  569.     if(f!=NULL) {fprintf(f,"%s\n",name); fclose(f);}
  570.     }
  571. /*   add serial number and language (resp.title, ...) to corresponding file  */
  572.     fprintf(langf,"%d:%s\n",serial,module_language);
  573.     fprintf(titf,"%d:%s\n",serial,indbuf[i_title]);
  574.     fprintf(descf,"%d:%s\n",serial,indbuf[i_description]);
  575.     fprintf(authorf,"%d:%s\n",serial,indbuf[i_author]);
  576.     fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]);
  577.  
  578. /*   add module's information in html page for robots  */
  579.     snprintf(buf,sizeof(buf),"%s",indbuf[i_description]);
  580.     for(pp=strchr(buf,','); pp; pp=strchr(pp,','))
  581.       string_modify3(buf,pp,pp+1,"&#44;");
  582.     if(strcmp(module_language,lang[lind])==0)
  583.       fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name,
  584.           indbuf[i_title], buf);
  585.  
  586. /*   Normalize the information of trlist, using dictionary
  587.  *  -- bases/sys/domain.xx without suffix translation (--> english version)
  588.  */
  589.     entrycount=dentrycount; dicbuf=ddicbuf;
  590.     memmove(entry,dentry,dentrycount*sizeof(entry[0]));
  591.     unknown_type=unk_leave;
  592.     for(i=0;i<trcnt;i++) {
  593.     detag(indbuf[trlist[i]]);
  594.     deaccent2(indbuf[trlist[i]]);
  595.     comma(indbuf[trlist[i]]);
  596.     singlespace2(indbuf[trlist[i]]);
  597.     translate(indbuf[trlist[i]]);
  598.     }
  599. /*   Normalize the information, using dictionary
  600.  *   bases/sys/words.xx with suffix translation
  601.  */
  602.     entrycount=mentrycount; dicbuf=mdicbuf;
  603.     memmove(entry,mentry,mentrycount*sizeof(entry[0]));
  604.     unknown_type=unk_leave;/*  used in translator_.c */
  605.     for(i=0;i<trcnt;i++) {
  606.     suffix_translate(indbuf[trlist[i]]);
  607.     translate(indbuf[trlist[i]]);
  608.     }
  609.  
  610. /* taken contains all words already seen in the module index */
  611.     taken[0]=0; takenlen=tweight=0;
  612. /*  append words of title  */
  613.     ovlstrcpy(buf,indbuf[i_title]); towords(buf);
  614.     appenditem2(buf,lind,serial,4,module_language);
  615.  
  616. /*  extract words of every other information except level */
  617.     snprintf(buf,sizeof(buf),"%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s",
  618.          indbuf[i_description],indbuf[i_keywords],
  619.          indbuf[i_keywords_ca],indbuf[i_keywords_en],indbuf[i_keywords_fr],
  620.          indbuf[i_keywords_it],indbuf[i_keywords_nl],
  621.          indbuf[i_title_ca],indbuf[i_title_en],indbuf[i_title_fr],
  622.          indbuf[i_title_it],indbuf[i_title_nl],
  623.          indbuf[i_domain],indbuf[i_require],indbuf[i_author]);
  624.     towords(buf);
  625.     appenditem2(buf,lind,serial,2,module_language);
  626.  
  627. /*   this time the dictionary is the group dictionary  sys/wgrp/wgrp
  628.  *   with a g (groupdic), not an m (maindic) . see below main, suffix, group.
  629.  *   and delete unknown ?? and translate
  630.  */
  631.     entrycount=gentrycount; dicbuf=gdicbuf;
  632.     memmove(entry,gentry,gentrycount*sizeof(entry[0]));
  633.  
  634. /*  append words of every title information  */
  635.     ovlstrcpy(buf,indbuf[i_title]);
  636.     unknown_type=unk_delete;
  637.     translate(buf);
  638.     appenditem1(buf,lind,serial,2,module_language);
  639.  
  640. /*  append words of information of description except level  */
  641.     snprintf(buf,sizeof(buf),"%s", indbuf[i_description]);
  642.     unknown_type=unk_delete;
  643.     translate(buf);
  644.     appenditem1(buf,lind,serial,4,module_language);
  645.  
  646. /*  append words (or group of words) of keywords and domain  */
  647.     snprintf(buf,sizeof(buf),"%s, %s, %s, %s, %s, %s, %s",
  648.          indbuf[i_domain],indbuf[i_keywords],
  649.          indbuf[i_keywords_ca], indbuf[i_keywords_en],indbuf[i_keywords_fr],
  650.          indbuf[i_keywords_it], indbuf[i_keywords_nl]);
  651.     unknown_type=unk_leave;
  652.     translate(buf);
  653.     appenditem1(buf,lind,serial,2,module_language);
  654.  
  655. /*   append level information, with weight 2 */
  656.     snprintf(buf,sizeof(buf),"%s",indbuf[i_level]);
  657.     ovlstrcpy(lbuf,"level");
  658.     for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' ';
  659.     q=buf+strlen(buf);
  660.     for(p1=find_word_start(buf); (*p1) && (p1 < q) ;
  661.     p1=find_word_start(p2)) {
  662.     p2=find_word_end(p1);
  663.     if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
  664.     if(!isalpha(*p1) ||
  665.        (!isdigit(*(p1+1)) && *(p1+1)!=0) ||
  666.        (*(p1+1)!=0 && *(p1+2)!=0))
  667.       continue;
  668.     *p1=tolower(*p1);
  669.     ovlstrcpy(lbuf+strlen("level"),p1);
  670.     appenditem(lbuf,lind,serial,2,module_language);
  671.     }
  672. /*   append total weight of module to weight file site2/weight.xx  */
  673.     fprintf(weightf,"%d:%d\n",serial,tweight);
  674. }
  675.  
  676. void modules(void)
  677. {
  678.     int i,j,k,d;
  679.     char namebuf[MAX_LINELEN+1];
  680.     char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1];
  681.  
  682.     for(j=0;j<langcnt;j++) {
  683.     snprintf(namebuf,sizeof(namebuf),"%s/weight.%s",outdir,lang[j]);
  684.     weightf=fopen(namebuf,"w");
  685.     snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
  686.     snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
  687.     snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
  688.     snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]);
  689.     suffix_dic(sdic); prepare_dic(gdic);
  690.     gdicbuf=dicbuf; gentrycount=entrycount;
  691.     memmove(gentry,entry,gentrycount*sizeof(entry[0]));
  692.     prepare_dic(mdic);
  693.     mdicbuf=dicbuf; mentrycount=entrycount;
  694.     memmove(mentry,entry,mentrycount*sizeof(entry[0]));
  695.     prepare_dic(ddic);
  696.     ddicbuf=dicbuf; dentrycount=entrycount;
  697.     memmove(dentry,entry,dentrycount*sizeof(entry[0]));
  698.     unknown_type=unk_leave; translate(ignore[j]);
  699.     for(i=0;i<modcnt;i++) {
  700.         if(mod[i].langcnt>0) {
  701.         for(d=k=0;k<mod[i].langcnt;k++)
  702.           if(mod[i].langs[k]<mod[i].langs[d]) d=k;
  703.         for(k=0;k<mod[i].langcnt && mod[i].langs[k]!=j;k++);
  704.         if(k>=mod[i].langcnt) k=d;
  705.         snprintf(namebuf,MAX_LINELEN,"%s.%s",mod[i].name,
  706.              lang[mod[i].langs[k]]);
  707.         onemodule(namebuf,mod[i].counts[k],j);
  708.         }
  709.         else {
  710.         onemodule(mod[i].name,mod[i].counts[0],j);
  711.         }
  712.     }
  713.     if(mentrycount>0) free(mdicbuf);
  714.     if(gentrycount>0) free(gdicbuf);
  715.     if(suffixcnt>0) free(sufbuf);
  716.     if(dentrycount>0) free(ddicbuf);
  717.     if(weightf) fclose(weightf);
  718.     }
  719. }
  720.  
  721. /* FIXME ? differences with appenditem - use fprintf instead of  snprintf */
  722. void sappenditem(char *word, int lind, int serial, int weight)
  723. {
  724.     int ll;
  725.     char *p;
  726.  
  727.     if(!isalnum(*word) || (ll=strlen(word))<2 ||
  728.        wordchr2(taken,word)!=NULL ||
  729.        wordchr2(ignore[lind],word)!=NULL ||
  730.        takenlen>=MAX_LINELEN-ll-16)
  731.       return;
  732.     if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
  733.     for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
  734.     taken[takenlen++]=' ';taken[takenlen++]=' ';
  735.     ovlstrcpy(taken+takenlen,word);
  736.     takenlen+=ll; tweight+=weight;
  737.     fprintf(indf,"%s:%d?%d\n",word,serial,weight);
  738. }
  739.  
  740. void onesheet(int serial, int lind)
  741. {
  742.     int i;
  743.     unsigned char trlist[]={
  744.     s_title,s_description,s_domain,s_keywords,s_information
  745.     };
  746.     #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
  747.     char *p1, *p2, buf[MAX_LINELEN+1];
  748.  
  749.     if(sheet_index(serial)) return;
  750.     fprintf(listf,"%s\n",mod[serial].name+3);
  751.     fprintf(titf,"%d:%s\n",serial,sindbuf[s_title]);
  752.     fprintf(descf,"%d:%s\n",serial,sindbuf[s_description]);
  753.     fprintf(remf,"%d:%s\n",serial,sindbuf[s_information]);
  754.  
  755.     entrycount=dentrycount; dicbuf=ddicbuf;
  756.     memmove(entry,dentry,dentrycount*sizeof(entry[0]));
  757.     unknown_type=unk_leave;
  758.     for(i=0;i<trcnt;i++) {
  759.     detag(sindbuf[trlist[i]]);
  760.     deaccent2(sindbuf[trlist[i]]);
  761.     comma(sindbuf[trlist[i]]);
  762.     singlespace2(sindbuf[trlist[i]]);
  763.     translate(sindbuf[trlist[i]]);
  764.     }
  765.  
  766.     entrycount=mentrycount; dicbuf=mdicbuf;
  767.     memmove(entry,mentry,mentrycount*sizeof(entry[0]));
  768.     unknown_type=unk_leave;
  769.     for(i=0;i<trcnt;i++) {
  770.     suffix_translate(sindbuf[trlist[i]]);
  771.     translate(sindbuf[trlist[i]]);
  772.     }
  773.     taken[0]=0; takenlen=tweight=0;
  774.     ovlstrcpy(buf,sindbuf[s_title]); towords(buf);
  775.     for(p1=find_word_start(buf);*p1;
  776.     p1=find_word_start(p2)) {
  777.     p2=find_word_end(p1); if(*p2) *p2++=0;
  778.     sappenditem(p1,lind,serial,4);
  779.     }
  780.     snprintf(buf,sizeof(buf),"%s %s %s %s",
  781.          sindbuf[s_description],sindbuf[s_keywords],
  782.          sindbuf[s_domain],sindbuf[s_information]);
  783.     towords(buf);
  784.     for(p1=find_word_start(buf);*p1;
  785.     p1=find_word_start(p2)) {
  786.     p2=find_word_end(p1); if(*p2) *p2++=0;
  787.     sappenditem(p1,lind,serial,2);
  788.     }
  789.     entrycount=gentrycount; dicbuf=gdicbuf;
  790.     memmove(entry,gentry,gentrycount*sizeof(entry[0]));
  791.     unknown_type=unk_delete;
  792.     ovlstrcpy(buf,sindbuf[s_title]); translate(buf);
  793.     for(p1=find_word_start(buf); *p1;
  794.     p1=find_word_start(p2)) {
  795.     p2=strchr(p1,',');
  796.     if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
  797.     if(strlen(p1)<=0) continue;
  798.     sappenditem(p1,lind,serial,4);
  799.     }
  800.     snprintf(buf,sizeof(buf),"%s, %s, %s, %s",
  801.          sindbuf[s_description],sindbuf[s_keywords],
  802.          sindbuf[s_domain],sindbuf[s_information]);
  803.     translate(buf);
  804.     for(p1=find_word_start(buf); *p1;
  805.     p1=find_word_start(p2)) {
  806.     p2=strchr(p1,',');
  807.     if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
  808.     if(strlen(p1)<=0) continue;
  809.     sappenditem(p1,lind,serial,2);
  810.     }
  811.     fprintf(weightf,"%d:%d\n",serial,tweight);
  812. }
  813.  
  814. void sheets(void)
  815. {
  816.     int i,j;
  817.     char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1];
  818.     char buf[MAX_LINELEN+1];
  819.  
  820.     for(j=0;j<langcnt;j++) {
  821.     snprintf(buf,sizeof(buf),"%s/index/title.%s",sheetdir,lang[j]);
  822.     titf=fopen(buf,"w");
  823.     snprintf(buf,sizeof(buf),"%s/index/description.%s",sheetdir,lang[j]);
  824.     descf=fopen(buf,"w");
  825.     snprintf(buf,sizeof(buf),"%s/index/%s",sheetdir,lang[j]);
  826.     indf=fopen(buf,"w");
  827.     snprintf(buf,sizeof(buf),"%s/index/list.%s",sheetdir,lang[j]);
  828.     listf=fopen(buf,"w");
  829.     snprintf(buf,sizeof(buf),"%s/index/weight.%s",sheetdir,lang[j]);
  830.     weightf=fopen(buf,"w");
  831.     snprintf(buf,sizeof(buf),"%s/index/addr.%s",sheetdir,lang[j]);
  832.     addrf=fopen(buf,"w");
  833.     snprintf(buf,sizeof(buf),"%s/index/information.%s",sheetdir,lang[j]);
  834.     remf=fopen(buf,"w");
  835.     snprintf(buf,sizeof(buf),"%s/index/serial.%s",sheetdir,lang[j]);
  836.     serialf=fopen(buf,"w");
  837.     snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
  838.     snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
  839.     snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
  840.     snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]);
  841.     suffix_dic(sdic); prepare_dic(gdic);
  842.     gdicbuf=dicbuf; gentrycount=entrycount;
  843.     memmove(gentry,entry,gentrycount*sizeof(entry[0]));
  844.     prepare_dic(mdic);
  845.     mdicbuf=dicbuf; mentrycount=entrycount;
  846.     memmove(mentry,entry,mentrycount*sizeof(entry[0]));
  847.     prepare_dic(ddic);
  848.     ddicbuf=dicbuf; dentrycount=entrycount;
  849.     memmove(dentry,entry,dentrycount*sizeof(entry[0]));
  850.     unknown_type=unk_leave; translate(ignore[j]);
  851.     for(i=0;i<modcnt;i++) {
  852.         if(mod[i].langs[0]!=j) continue;
  853.         fprintf(addrf,"%d:%s\n",i,mod[i].name+3);
  854.         fprintf(serialf,"%s:%d\n",mod[i].name+3,i);
  855.         onesheet(i,j);
  856.     }
  857.     if(mentrycount>0) free(mdicbuf);
  858.     if(gentrycount>0) free(gdicbuf);
  859.     if(suffixcnt>0) free(sufbuf);
  860.     if(dentrycount>0) free(ddicbuf);
  861.     fclose(titf); fclose(descf); fclose(indf); fclose(listf);
  862.     fclose(weightf); fclose(addrf); fclose(serialf);
  863.     }
  864. }
  865.  
  866. int main()
  867. {
  868.     prep();
  869.     if(modcnt>0) modules();
  870.     clean();
  871.     sprep();
  872.     if(modcnt>0) sheets();
  873.     return 0;
  874. }
  875.  
  876.