Subversion Repositories wimsdev

Rev

Rev 6819 | Rev 6884 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

  1. /*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
  2.  *
  3.  *  This program is free software; you can redistribute it and/or modify
  4.  *  it under the terms of the GNU General Public License as published by
  5.  *  the Free Software Foundation; either version 2 of the License, or
  6.  *  (at your option) any later version.
  7.  *
  8.  *  This program is distributed in the hope that it will be useful,
  9.  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  10.  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11.  *  GNU General Public License for more details.
  12.  *
  13.  *  You should have received a copy of the GNU General Public License
  14.  *  along with this program; if not, write to the Free Software
  15.  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16.  */
  17.  
  18.         /* This is an internal program,
  19.          * used to index modules for search engine. */
  20.  
  21. #include "../wims.h"
  22. #include "../Lib/basicstr.c"
  23.  
  24. #define MAX_LANGS       MAX_LANGUAGES
  25. #define MAX_MODULES     65536
  26. char *moduledir=        "public_html/modules";
  27. char *sheetdir=         "public_html/bases/sheet";
  28. char *dicdir=           "public_html/bases";
  29. char *outdir=           "public_html/bases/site2";
  30. char *maindic=          "sys/words";
  31. char *groupdic=         "sys/wgrp/wgrp";
  32. char *suffixdic=        "sys/suffix";
  33. char *domaindic=        "sys/domaindic";
  34. char *ignoredic=        "sys/indignore";
  35. char *conffile=         "log/wims.conf";
  36. char *mlistbase=        "list";
  37.  
  38. char lang[MAX_LANGS][4]={
  39.     "en","fr","cn","es","it","nl","si","ca","pt"
  40. };
  41. #define DEFAULT_LANGCNT 6
  42. char allang[MAX_LANGS][4]={
  43.     "en","fr","cn","es","it","nl","de","si","ca","pt"
  44. };
  45. #define allangcnt 8
  46. char ignore[MAX_LANGS][MAX_LINELEN+1];
  47. char mlistfile[MAX_LANGS][256];
  48. int langcnt;
  49. FILE *langf, *titf, *descf, *weightf, *robotf, *indf, *listf, *addrf, *serialf, *authorf, *versionf;
  50.  
  51. struct cat {
  52.     char *name;
  53.     char typ;
  54. } cat[]={
  55.         {"all_types",   'A'},
  56.         {"exercise",    'X'},
  57.         {"oef",         'O'},
  58.         {"tool",        'T'},
  59.         {"recreation",  'R'},
  60.         {"reference",   'Y'},
  61.         {"document",    'D'},
  62.         {"popup",       'P'},
  63.         {"datamodule",  'M'}
  64. };
  65. #define catno (sizeof(cat)/sizeof(cat[0]))
  66.  
  67. struct mod {
  68.     char *name;
  69.     unsigned char langs[MAX_LANGS];
  70.     int counts[MAX_LANGS];
  71.     int  langcnt;
  72. } mod[MAX_MODULES];
  73. int modcnt;
  74.  
  75. char *mlist;
  76.  
  77. void *xmalloc(size_t n)
  78. {
  79.     void *p;
  80.     p=malloc(n);
  81.     if(p==NULL) {
  82.         printf("Malloc failure.\n");
  83.         exit(1);
  84.     }
  85.     return p;
  86. }
  87.  
  88. char *acctab="çéèêëúùûüáàâäãóòôöõíìïîñýÇÉÈÊËÚÙÛÜÁÀÂÃÄÓÒÔÖÕÍÌÏÎÑÝ",
  89.      *deatab="ceeeeuuuuaaaaaoooooiiiinyCEEEEUUUUAAAAAOOOOOIIIINY";
  90.  
  91.         /* fold known accented letters to unaccented, other strange characters to space */
  92. void deaccent(char *p)
  93. {
  94.     char *sp;
  95.     char *v;
  96.     for(sp=p;*sp;sp++) {
  97.         if(*sp<0 && (v=strchr(acctab,*sp))!=NULL)
  98.           *sp=*(deatab+(v-acctab));
  99.         if(!isalnum(*sp) && strchr(",.&$+*",*sp)==0) *sp=' ';
  100.         else *sp=tolower(*sp);
  101.     }
  102. }
  103.  
  104.         /* translate everything non-alphanumeric into space */
  105. void towords(char *p)
  106. {
  107.     char *pp;
  108.     for(pp=p;*pp;pp++) if(!isalnum(*pp) && strchr("&$+*",*pp)==0) *pp=' ';
  109. }
  110.  
  111.         /* Points to the end of the word */
  112. char *find_word_end(char *p)
  113. {
  114.     int i;
  115.     for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++);
  116.     return p;
  117. }
  118.  
  119.         /* Strips leading spaces */
  120. char *find_word_start(char *p)
  121. {
  122.     int i;
  123.     for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++);
  124.     return p;
  125. }
  126.  
  127.         /* Find first occurrence of word */
  128. char *wordchr(char *p, char *w)
  129. {
  130.     char *r;
  131.  
  132.     for(r=strstr(p,w);r!=NULL &&
  133.         ( (r>p && !isspace(*(r-1))) || (!isspace(*(r+strlen(w))) && *(r+strlen(w))!=0) );
  134.         r=strstr(r+1,w));
  135.     return r;
  136. }
  137.  
  138.         /* find a variable in a string (math expression).
  139.          * Returns the pointer or NULL. */
  140. char *varchr(char *p, char *v)
  141. {
  142.     char *pp; int n=strlen(v);
  143.     for(pp=strstr(p,v); pp!=NULL; pp=strstr(pp+1,v)) {
  144.         if((pp==p || !isalnum(*(pp-1))) &&
  145.            (!isalnum(*(pp+n)) || *(pp+n)==0)) break;
  146.     }
  147.     return pp;
  148. }
  149.  
  150.         /* strip trailing spaces; return string end. */
  151. char *strip_trailing_spaces(char *p)
  152. {
  153.     char *pp;
  154.     if(*p==0) return p;
  155.     for(pp=p+strlen(p)-1; pp>=p && isspace(*pp); *(pp--)=0);
  156.     return pp;
  157. }
  158.  
  159. char *find_tag_end(char *p)
  160. {
  161.     char *pp;
  162.     pp=p; if(*pp=='<') pp++;
  163.     for(; *pp && *pp!='>'; pp++) {
  164.         if(*pp=='<') {
  165.             pp=find_tag_end(pp)-1; continue;
  166.         }
  167.         if(*pp=='"') {
  168.             pp=strchr(pp+1,'"');
  169.             if(pp==NULL) return p+strlen(p); else continue;
  170.         }
  171.         if(*pp=='\'') {
  172.             pp=strchr(pp+1,'\'');
  173.             if(pp==NULL) return p+strlen(p); else continue;
  174.         }
  175.     }
  176.     if(*pp=='>') pp++; return pp;
  177. }
  178.  
  179. char *find_tag(char *p, char *tag)
  180. {
  181.     char *pp;
  182.     int len;
  183.     len=strlen(tag);
  184.     for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) {
  185.         if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp;
  186.     }
  187.     return p+strlen(p);
  188. }
  189.  
  190.         /* remove all html tags */
  191. void detag(char *p)
  192. {
  193.     char *pp, *p2;
  194.     for(pp=strchr(p,'<'); pp!=NULL; pp=strchr(pp,'<')) {
  195.         p2=find_tag_end(pp);
  196.         if(*p2==0) {*pp=0; return; }
  197.         ovlstrcpy(pp,p2);
  198.     }
  199. }
  200.  
  201.         /* modify a string. Bufferlen must be ast least MAX_LINELEN */
  202. void string_modify(char *start, char *bad_beg, char *bad_end, char *good,...)
  203. {
  204.     char buf[MAX_LINELEN+1];
  205.     va_list vp;
  206.  
  207.     va_start(vp,good);
  208.     vsnprintf(buf,sizeof(buf),good,vp); va_end(vp);
  209.     if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN)
  210.       return;
  211.     strcat(buf,bad_end);
  212.     ovlstrcpy(bad_beg,buf);
  213. }
  214.  
  215. /* add a space after comma to see end of words */
  216.  
  217. void comma(char *p)
  218. {
  219.     char *pp;
  220.     for(pp=strchr(p,','); pp; pp=strchr(pp+1,','))
  221.       string_modify(p,pp,pp+1,", ");
  222. }
  223.  
  224.  
  225. void _getdef(char buf[], char *name, char value[])
  226. {
  227.     char *p1, *p2, *p3;
  228.  
  229.     value[0]=0;
  230.     for(p1=strstr(buf,name); p1!=NULL; p1=strstr(p1+1,name)) {
  231.         p2=find_word_start(p1+strlen(name));
  232.         if((p1>buf && !isspace(*(p1-1))) || *p2!='=') continue;
  233.         p3=p1; while(p3>buf && isspace(*(p3-1)) && *(p3-1)!='\n') p3--;
  234.         if(p3>buf && *(p3-1)!='\n') continue;
  235.         p3=strchr(p2,'\n');
  236.         p2=find_word_start(p2+1);
  237.         if(p3 <= p2) continue;
  238.         snprintf(value,MAX_LINELEN,"%s",p2);
  239.         if(p3!=NULL && p3-p2<MAX_LINELEN) value[p3-p2]=0;
  240.         strip_trailing_spaces(value);
  241.         break;
  242.     }
  243. }
  244.  
  245.         /* Get variable definition from a file.
  246.          * Result stored in buffer value of length MAX_LINELEN. */
  247. void getdef(char *fname, char *name, char value[])
  248. {
  249.     FILE *f;
  250.     char *buf;
  251.     int l;
  252.  
  253.     value[0]=0;
  254.     f=fopen(fname,"r"); if(f==NULL) return;
  255.     fseek(f,0,SEEK_END); l=ftell(f); fseek(f,0,SEEK_SET);
  256.     buf=xmalloc(l+256); l=fread(buf,1,l,f);
  257.     fclose(f);
  258.     if(l<=0) return; else buf[l]=0;
  259.     _getdef(buf,name,value);
  260.     free(buf);
  261. }
  262.  
  263. #include "translator_.c"
  264.  
  265. char *mdicbuf, *gdicbuf, *ddicbuf;
  266. char gentry[sizeof(entry)], mentry[sizeof(entry)], dentry[sizeof(entry)];
  267. int gentrycount, mentrycount, dentrycount;
  268.  
  269.         /* Preparation of data */
  270. void prep(void)
  271. {
  272.     char buf[MAX_LINELEN+1];
  273.     char *p1,*p2,*s,*old;
  274.     int i,l,thislang,t;
  275.     FILE *f;
  276.  
  277.     s=getenv("modind_outdir"); if(s!=NULL && *s!=0) outdir=s;
  278.     s=getenv("modind_sheetdir"); if(s!=NULL && *s!=0) sheetdir=s;
  279.     snprintf(buf,sizeof(buf),"%s/addr",outdir);
  280.     addrf=fopen(buf,"w");
  281.     snprintf(buf,sizeof(buf),"%s/serial",outdir);
  282.     serialf=fopen(buf,"w");
  283.     modcnt=langcnt=0;
  284.     getdef(conffile,"site_languages",buf);
  285.     for(p1=buf;*p1;p1++) if(!isalnum(*p1)) *p1=' ';
  286.     for(p1=find_word_start(buf); *p1 && langcnt<MAX_LANGS; p1=find_word_start(p2)) {
  287.         p2=find_word_end(p1);
  288.         if(p2!=p1+2 || !isalpha(*p1) || !isalpha(*(p1+1))) continue;
  289.         memmove(lang[langcnt],p1,2); lang[langcnt++][2]=0;
  290.     }
  291.     if(langcnt==0) {    /* default languages */
  292.         langcnt=DEFAULT_LANGCNT;
  293.     }
  294.     s=getenv("mlist"); if(s==NULL) exit(1);
  295.     l=strlen(s); if(l<0 || l>100*MAX_LINELEN) exit(1);
  296.     mlist=xmalloc(l+16); ovlstrcpy(mlist,s); old="";
  297.     for(i=0;i<langcnt;i++) {
  298.         snprintf(buf,sizeof(buf),"%s/%s.%s",dicdir,ignoredic,lang[i]);
  299.         f=fopen(buf,"r"); if(f==NULL) continue;
  300.         l=fread(ignore[i],1,MAX_LINELEN,f);fclose(f);
  301.         if(l<0 || l>=MAX_LINELEN) l=0;
  302.         ignore[i][l]=0;
  303.     }
  304.     for(t=0, p1=find_word_start(mlist);
  305.         *p1 && modcnt<MAX_MODULES;
  306.         p1=find_word_start(p2), t++) {
  307.         p2=find_word_end(p1);
  308.         l=p2-p1; if(*p2) *p2++=0;
  309.         fprintf(addrf,"%d:%s\n",t,p1);
  310.         fprintf(serialf,"%s:%d\n",p1,t);
  311.         thislang=-1;
  312. /* language is taken from the address */
  313.         if(l>3 && p1[l-3]=='.') {
  314.             for(i=0;i<langcnt;i++) if(strcasecmp(lang[i],p1+l-2)==0) break;
  315.             if(i<langcnt) {p1[l-3]=0; thislang=i;}
  316.             else {      /* unknown language, not referenced */
  317.                 continue;
  318.             }
  319.         }
  320.         if(modcnt>0 && strcmp(old,p1)==0 && thislang>=0) {
  321.             if(mod[modcnt-1].langcnt<langcnt) {
  322.                 mod[modcnt-1].langs[mod[modcnt-1].langcnt]=thislang;
  323.                 mod[modcnt-1].counts[mod[modcnt-1].langcnt]=t;
  324.                 (mod[modcnt-1].langcnt)++;
  325.             }
  326.         }
  327.         else {
  328.             mod[modcnt].name=old=p1;
  329.             if(thislang>=0) {
  330.                 mod[modcnt].langs[0]=thislang;
  331.                 mod[modcnt].langcnt=1;
  332.             }
  333.             else mod[modcnt].langcnt=0;
  334.             mod[modcnt].counts[0]=t;
  335.             modcnt++;
  336.         }
  337.     }
  338.     snprintf(buf,sizeof(buf),"%s/language",outdir);
  339.     langf=fopen(buf,"w");
  340.     snprintf(buf,sizeof(buf),"%s/title",outdir);
  341.     titf=fopen(buf,"w");
  342.     snprintf(buf,sizeof(buf),"%s/description",outdir);
  343.     descf=fopen(buf,"w");
  344.     snprintf(buf,sizeof(buf),"%s/author",outdir);
  345.     authorf=fopen(buf,"w");
  346.     snprintf(buf,sizeof(buf),"%s/version",outdir);
  347.     versionf=fopen(buf,"w");
  348.     snprintf(buf,sizeof(buf),"%s/lists/robot.phtml",outdir);
  349.     robotf=fopen(buf,"w");
  350.     fclose(addrf); fclose(serialf);
  351.     if(!robotf || !versionf || !authorf || !descf || !titf || !descf) {
  352.         fprintf(stderr,"modind: error creating output files.\n");
  353.         exit(1);
  354.     }
  355. }
  356.  
  357. void sprep(void)
  358. {
  359.     char *p1,*p2,*s;
  360.     int i,l,thislang;
  361.  
  362.     modcnt=0;
  363.     s=getenv("slist"); if(s==NULL) return;
  364.     l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return;
  365.     mlist=xmalloc(l+16); ovlstrcpy(mlist,s);
  366.     for(p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2)) {
  367.         p2=find_word_end(p1);
  368.         l=p2-p1; if(*p2) *p2++=0;
  369.         for(i=0;i<langcnt;i++) if(strncasecmp(lang[i],p1,2)==0) break;
  370.         if(i<langcnt) thislang=i; else continue;
  371.         mod[modcnt].name=p1;
  372.         mod[modcnt].langs[0]=thislang;
  373.         mod[modcnt].langcnt=1;
  374.         modcnt++;
  375.     }
  376. }
  377.  
  378. void clean(void)
  379. {
  380.     fclose(langf); fclose(titf); fclose(descf); fclose(robotf);
  381.     fclose(authorf); fclose(versionf);
  382. }
  383.  
  384. char *sheetindex[]={
  385.       "title", "description",
  386.       "duration", "severity",
  387.       "level", "domain",
  388.       "keywords", "reserved1", "reserved2", "remark"
  389. };
  390. #define SHEETINDEX_NO (sizeof(sheetindex)/sizeof(sheetindex[0]))
  391. char sindbuf[SHEETINDEX_NO][MAX_LINELEN+1];
  392. enum{s_title, s_description,
  393.       s_duration, s_severity,
  394.       s_level, s_domain,
  395.       s_keywords, s_reserved1, s_reserved2,
  396.       s_remark
  397. };
  398.  
  399. char *modindex[]={
  400.       "title", "description",
  401.       "author", "address", "copyright",
  402.       "version", "wims_version", "language",
  403.       "category", "level", "domain", "keywords",
  404.       "keywords_ca", "keywords_en", "keywords_fr", "keywords_it", "keywords_nl",
  405.       "title_ca", "title_en", "title_fr", "title_it", "title_nl",
  406.       "require"
  407. };
  408. #define MODINDEX_NO (sizeof(modindex)/sizeof(modindex[0]))
  409. char indbuf[MODINDEX_NO][MAX_LINELEN+1];
  410. enum{i_title, i_description,
  411.       i_author,i_address,i_copyright,
  412.       i_version,i_wims_version,i_language,
  413.       i_category,i_level,i_domain,i_keywords,
  414.       i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
  415.       i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl,
  416.       i_require
  417. };
  418.  
  419. char *module_special_file[]={
  420.     "intro","help","about"
  421. };
  422. #define MODSPEC_NO (sizeof(module_special_file)/sizeof(module_special_file[0]))
  423. char module_language[4];
  424.  
  425.         /* read and treat module's INDEX file */
  426. int module_index(const char *name)
  427. {
  428.     char *p, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
  429.     FILE *indf;
  430.     int i,l;
  431.  
  432.     snprintf(fbuf,sizeof(fbuf),"%s/%s/INDEX",moduledir,name);
  433.     indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
  434.     l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
  435.     if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
  436.     for(i=0;i<MODINDEX_NO;i++) {
  437.         _getdef(ibuf,modindex[i],indbuf[i]);
  438.                 /* compatibility precaution */
  439.         if(indbuf[i][0]==':') indbuf[i][0]='.';
  440.     }
  441.     p=find_word_start(indbuf[i_language]);
  442.     if(isalpha(*p) && isalpha(*(p+1))) {
  443.         memmove(module_language,p,2); module_language[2]=0;
  444.     }
  445.     else ovlstrcpy(module_language,"en");
  446.     return 0;
  447. }
  448.  
  449. int sheet_index(int serial)
  450. {
  451.     char *p1, *p2, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
  452.     FILE *indf;
  453.     int i,l;
  454.  
  455.     snprintf(fbuf,sizeof(fbuf),"%s/%s.def",sheetdir,mod[serial].name);
  456.     indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
  457.     l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
  458.     if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
  459.     for(i=0;i<SHEETINDEX_NO;i++) sindbuf[i][0]=0;
  460.     for(i=0,p1=find_word_start(ibuf);
  461.         i<SHEETINDEX_NO-1 && *p1!=':' && *p1!=0;
  462.         i++,p1=p2) {
  463.         p2=strchr(p1,'\n');
  464.         if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
  465.         p1=find_word_start(p1); strip_trailing_spaces(p1);
  466.         snprintf(sindbuf[i],MAX_LINELEN,"%s",p1);
  467.     }
  468.     p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1);
  469.     else *p2=0;
  470.     p1=find_word_start(p1); strip_trailing_spaces(p1);
  471.     for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' ';
  472.     ovlstrcpy(sindbuf[s_remark],p1);
  473.     return 0;
  474. }
  475.  
  476. unsigned char categories[16];
  477. char taken[MAX_LINELEN+1];
  478. int catcnt, takenlen, tweight;
  479.  
  480. void appenditem(char *word, int lind, int serial, int weight, char *l)
  481. {
  482.     char nbuf[MAX_LINELEN+1], buf[MAX_LINELEN+1];
  483.     int i, ll;
  484.     char *p;
  485.     FILE *f;
  486.  
  487.     if(!isalnum(*word) || (ll=strlen(word))<2 ||
  488.        wordchr(taken,word)!=NULL ||
  489.        wordchr(ignore[lind],word)!=NULL ||
  490.        takenlen>=MAX_LINELEN-ll-16)
  491.       return;
  492.     if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
  493.     for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
  494.     taken[takenlen++]=' '; taken[takenlen++]=' ';
  495.     ovlstrcpy(taken+takenlen,word);
  496.     takenlen+=ll; tweight+=weight;
  497.     snprintf(buf,sizeof(buf),"%s:%d?%d\n",word,serial,weight);
  498.     for(i=0;i<catcnt;i++) {
  499.         snprintf(nbuf,sizeof(nbuf),"%s/%c.%s",
  500.                  outdir,categories[i],lang[lind]);
  501.         f=fopen(nbuf,"a");
  502.         if(f!=NULL) {fputs(buf,f); fclose(f);}
  503.     }
  504. }
  505.  
  506. void appenditem1 (char *buf, int lind, int serial, int weight, char *l )
  507. {
  508.   char *p1, *p2 ;
  509.   for(p1=find_word_start(buf); *p1;
  510.         p1=find_word_start(p2)) {
  511.         p2=strchr(p1,',');
  512.         if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
  513.         if(strlen(p1)<=0) continue;
  514.         appenditem(p1,lind,serial,weight,module_language);
  515.   }
  516. }
  517. void appenditem2 (char *buf, int lind, int serial, int weight, char *l )
  518. {
  519.   char *p1, *p2 ;
  520.   for(p1=find_word_start(buf);*p1;
  521.         p1=find_word_start(p2)) {
  522.         p2=find_word_end(p1); if(*p2) *p2++=0;
  523.         appenditem(p1,lind,serial,weight,module_language);
  524.   }
  525. }
  526. void onemodule(const char *name, int serial, int lind)
  527. {
  528.     int i;
  529.     unsigned char trlist[]={
  530.         i_title,i_description,i_category,i_domain,i_keywords,
  531.           i_require,i_author,
  532.           i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
  533.           i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl
  534.     };
  535.     #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
  536.     char *p1, *p2, *pp, *q, buf[MAX_LINELEN+1], lbuf[16];
  537.     FILE *f;
  538.  
  539.     if(module_index(name)) return;
  540.     towords(indbuf[i_category]);
  541.         /*  list the categories (among A=all,X=eXercise,O,D,...) corresponding to this module  */
  542.     for(i=catcnt=0;i<catno && catcnt<16;i++) {
  543.         if(wordchr(indbuf[i_category],cat[i].name)!=NULL)
  544.           categories[catcnt++]=cat[i].typ;
  545.     }
  546.     if(catcnt==0) return;
  547.     if(categories[0]!=cat[0].typ)
  548.       categories[catcnt++]=cat[0].typ;
  549.         /*  write module's name in the category.language files, for instance lists/X.fr for french exercises  */
  550.     for(i=0;i<catcnt;i++) {
  551.         snprintf(buf,sizeof(buf),"%s/lists/%c.%s",
  552.                  outdir,categories[i],lang[lind]);
  553.         f=fopen(buf,"a");
  554.         if(f!=NULL) {fprintf(f,"%s\n",name); fclose(f);}
  555.     }
  556.         /*  add serial number and language (resp.title, ...) to corresponding file  */
  557.     fprintf(langf,"%d:%s\n",serial,module_language);
  558.     fprintf(titf,"%d:%s\n",serial,indbuf[i_title]);
  559.     fprintf(descf,"%d:%s\n",serial,indbuf[i_description]);
  560.     fprintf(authorf,"%d:%s\n",serial,indbuf[i_author]);
  561.     fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]);
  562.  
  563.         /*  add module's information in html page for robots  */
  564.     snprintf(buf,sizeof(buf),"%s",indbuf[i_description]);
  565.     for(pp=strchr(buf,','); pp; pp=strchr(pp,','))
  566.       string_modify(buf,pp,pp+1,"&#44;");
  567.     if(strcmp(module_language,lang[lind])==0)
  568.       fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name,
  569.               indbuf[i_title], buf);
  570.  
  571.         /*  Normalize the information, using dictionary
  572.         -- bases/sys/domain.xx without suffix (--> english version)
  573.         -- bases/sys/words.xx with suffix */
  574.     entrycount=dentrycount; dicbuf=ddicbuf;
  575.     memmove(entry,dentry,dentrycount*sizeof(entry[0]));
  576.     unknown_type=unk_leave;
  577.     for(i=0;i<trcnt;i++) {
  578.         detag(indbuf[trlist[i]]);
  579.         deaccent(indbuf[trlist[i]]);
  580.         comma(indbuf[trlist[i]]);
  581.         singlespace(indbuf[trlist[i]]);
  582.         translate(indbuf[trlist[i]]);
  583.     }
  584.    
  585.     entrycount=mentrycount; dicbuf=mdicbuf;
  586.     memmove(entry,mentry,mentrycount*sizeof(entry[0]));
  587.     unknown_type=unk_leave;  /* used in translator_.c */
  588.     for(i=0;i<trcnt;i++) {
  589.         suffix_translate(indbuf[trlist[i]]);
  590.         translate(indbuf[trlist[i]]);
  591.     }
  592.  
  593. /* taken contains all words already seen in the module index */
  594.     taken[0]=0; takenlen=tweight=0;
  595. /*  append words of title  */
  596.     ovlstrcpy(buf,indbuf[i_title]); towords(buf);
  597.     appenditem2(buf,lind,serial,4,module_language);
  598.  
  599. /*  append words of every other information except level  */
  600.     snprintf(buf,sizeof(buf),"%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s",
  601.              indbuf[i_description],indbuf[i_keywords],
  602.              indbuf[i_keywords_ca],indbuf[i_keywords_en],indbuf[i_keywords_fr],
  603.              indbuf[i_keywords_it],indbuf[i_keywords_nl],
  604.              indbuf[i_title_ca],indbuf[i_title_en],indbuf[i_title_fr],
  605.              indbuf[i_title_it],indbuf[i_title_nl],
  606.              indbuf[i_domain],indbuf[i_require],indbuf[i_author]);
  607.     towords(buf);
  608.     appenditem2(buf,lind,serial,4,module_language);
  609.  
  610.         /*  this time the dictionary is the group dictionary  sys/wgrp/wgrp
  611.          with a g (groupdic), not an m (maindic) . see below main, suffix, group.
  612.         and delete unknown ?? and translate  */
  613.     entrycount=gentrycount; dicbuf=gdicbuf;
  614.     memmove(entry,gentry,gentrycount*sizeof(entry[0]));
  615.  
  616. /*  append words (?) of every other information except level  */
  617.     ovlstrcpy(buf,indbuf[i_title]);
  618.     unknown_type=unk_delete;
  619.     translate(buf);
  620.     appenditem1(buf,lind,serial,2,module_language);
  621.  
  622. /*  append words (?) of information of description except level  */
  623.     snprintf(buf,sizeof(buf),"%s", indbuf[i_description]);
  624.     unknown_type=unk_delete;
  625.     translate(buf);
  626.     appenditem1(buf,lind,serial,4,module_language);
  627.  
  628. /*  append words (or group of words) of keywords and domain level  */
  629.     snprintf(buf,sizeof(buf),"%s, %s, %s, %s, %s, %s, %s",
  630.              indbuf[i_domain],indbuf[i_keywords],
  631.              indbuf[i_keywords_ca], indbuf[i_keywords_en],indbuf[i_keywords_fr],
  632.              indbuf[i_keywords_it], indbuf[i_keywords_nl]);
  633.         unknown_type=unk_leave;
  634.     translate(buf);
  635.     appenditem1(buf,lind,serial,2,module_language);
  636.  
  637.         /*  append level information, with weight 2 */
  638.     snprintf(buf,sizeof(buf),"%s",indbuf[i_level]);
  639.     ovlstrcpy(lbuf,"level");
  640.     for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' ';
  641.     q=buf+strlen(buf);
  642.     for(p1=find_word_start(buf); (*p1) && (p1 < q) ;
  643.         p1=find_word_start(p2)) {
  644.         p2=find_word_end(p1);
  645.         if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
  646.         if(!isalpha(*p1) ||
  647.            (!isdigit(*(p1+1)) && *(p1+1)!=0) ||
  648.            (*(p1+1)!=0 && *(p1+2)!=0))
  649.           continue;
  650.         *p1=tolower(*p1);
  651.         ovlstrcpy(lbuf+strlen("level"),p1);
  652.         appenditem(lbuf,lind,serial,2,module_language);
  653.     }
  654.         /*  append total weight of module to weight file site2/weight.xx  */
  655.     fprintf(weightf,"%d:%d\n",serial,tweight);
  656. }
  657.  
  658. void modules(void)
  659. {
  660.     int i,j,k,d;
  661.     char namebuf[MAX_LINELEN+1];
  662.     char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1];
  663.  
  664.     for(j=0;j<langcnt;j++) {
  665.         snprintf(namebuf,sizeof(namebuf),"%s/weight.%s",outdir,lang[j]);
  666.         weightf=fopen(namebuf,"w");
  667.         snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
  668.         snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
  669.         snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
  670.         snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]);
  671.         suffix_dic(sdic); prepare_dic(gdic);
  672.         gdicbuf=dicbuf; gentrycount=entrycount;
  673.         memmove(gentry,entry,gentrycount*sizeof(entry[0]));
  674.         prepare_dic(mdic);
  675.         mdicbuf=dicbuf; mentrycount=entrycount;
  676.         memmove(mentry,entry,mentrycount*sizeof(entry[0]));
  677.         prepare_dic(ddic);
  678.         ddicbuf=dicbuf; dentrycount=entrycount;
  679.         memmove(dentry,entry,dentrycount*sizeof(entry[0]));
  680.         unknown_type=unk_leave; translate(ignore[j]);
  681.         for(i=0;i<modcnt;i++) {
  682.             if(mod[i].langcnt>0) {
  683.                 for(d=k=0;k<mod[i].langcnt;k++)
  684.                   if(mod[i].langs[k]<mod[i].langs[d]) d=k;
  685.                 for(k=0;k<mod[i].langcnt && mod[i].langs[k]!=j;k++);
  686.                 if(k>=mod[i].langcnt) k=d;
  687.                 snprintf(namebuf,MAX_LINELEN,"%s.%s",mod[i].name,
  688.                          lang[mod[i].langs[k]]);
  689.                 onemodule(namebuf,mod[i].counts[k],j);
  690.             }
  691.             else {
  692.                 onemodule(mod[i].name,mod[i].counts[0],j);
  693.             }
  694.         }
  695.         if(mentrycount>0) free(mdicbuf);
  696.         if(gentrycount>0) free(gdicbuf);
  697.         if(suffixcnt>0) free(sufbuf);
  698.         if(dentrycount>0) free(ddicbuf);
  699.         if(weightf) fclose(weightf);
  700.     }
  701. }
  702.  
  703. /* FIXME ? differences with appenditem - use fprintf instead of  snprintf */
  704. void sappenditem(char *word, int lind, int serial, int weight)
  705. {
  706.     int ll;
  707.     char *p;
  708.  
  709.     if(!isalnum(*word) || (ll=strlen(word))<2 ||
  710.        wordchr(taken,word)!=NULL ||
  711.        wordchr(ignore[lind],word)!=NULL ||
  712.        takenlen>=MAX_LINELEN-ll-16)
  713.       return;
  714.     if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
  715.     for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
  716.     taken[takenlen++]=' ';taken[takenlen++]=' ';
  717.     ovlstrcpy(taken+takenlen,word);
  718.     takenlen+=ll; tweight+=weight;
  719.     fprintf(indf,"%s:%d?%d\n",word,serial,weight);
  720. }
  721.  
  722. void onesheet(int serial, int lind)
  723. {
  724.     int i;
  725.     unsigned char trlist[]={
  726.         s_title,s_description,s_domain,s_keywords,s_remark
  727.     };
  728.     #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
  729.     char *p1, *p2, buf[MAX_LINELEN+1];
  730.  
  731.     if(sheet_index(serial)) return;
  732.     fprintf(listf,"%s\n",mod[serial].name+3);
  733.     fprintf(titf,"%d:%s\n",serial,sindbuf[s_title]);
  734.     fprintf(descf,"%d:%s\n",serial,sindbuf[s_description]);
  735.     entrycount=dentrycount; dicbuf=ddicbuf;
  736.     memmove(entry,dentry,dentrycount*sizeof(entry[0]));
  737.     unknown_type=unk_leave;
  738.     for(i=0;i<trcnt;i++) {
  739.         detag(sindbuf[trlist[i]]);
  740.         deaccent(sindbuf[trlist[i]]);
  741.         comma(sindbuf[trlist[i]]);
  742.         singlespace(sindbuf[trlist[i]]);
  743.         translate(sindbuf[trlist[i]]);
  744.     }
  745.    
  746.     entrycount=mentrycount; dicbuf=mdicbuf;
  747.     memmove(entry,mentry,mentrycount*sizeof(entry[0]));
  748.     unknown_type=unk_leave;
  749.     for(i=0;i<trcnt;i++) {
  750.         suffix_translate(sindbuf[trlist[i]]);
  751.         translate(sindbuf[trlist[i]]);
  752.     }
  753.     taken[0]=0; takenlen=tweight=0;
  754.     ovlstrcpy(buf,sindbuf[s_title]); towords(buf);
  755.     for(p1=find_word_start(buf);*p1;
  756.         p1=find_word_start(p2)) {
  757.         p2=find_word_end(p1); if(*p2) *p2++=0;
  758.         sappenditem(p1,lind,serial,4);
  759.     }
  760.     snprintf(buf,sizeof(buf),"%s %s %s %s",
  761.              sindbuf[s_description],sindbuf[s_keywords],
  762.              sindbuf[s_domain],sindbuf[s_remark]);
  763.     towords(buf);
  764.     for(p1=find_word_start(buf);*p1;
  765.         p1=find_word_start(p2)) {
  766.         p2=find_word_end(p1); if(*p2) *p2++=0;
  767.         sappenditem(p1,lind,serial,2);
  768.     }
  769.     entrycount=gentrycount; dicbuf=gdicbuf;
  770.     memmove(entry,gentry,gentrycount*sizeof(entry[0]));
  771.     unknown_type=unk_delete;
  772.     ovlstrcpy(buf,sindbuf[s_title]); translate(buf);
  773.     for(p1=find_word_start(buf); *p1;
  774.         p1=find_word_start(p2)) {
  775.         p2=strchr(p1,',');
  776.         if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
  777.         if(strlen(p1)<=0) continue;
  778.         sappenditem(p1,lind,serial,4);
  779.     }
  780.     snprintf(buf,sizeof(buf),"%s, %s, %s, %s",
  781.              sindbuf[s_description],sindbuf[s_keywords],
  782.              sindbuf[s_domain],sindbuf[s_remark]);
  783.     translate(buf);
  784.     for(p1=find_word_start(buf); *p1;
  785.         p1=find_word_start(p2)) {
  786.         p2=strchr(p1,',');
  787.         if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
  788.         if(strlen(p1)<=0) continue;
  789.         sappenditem(p1,lind,serial,2);
  790.     }
  791.     fprintf(weightf,"%d:%d\n",serial,tweight);
  792. }
  793.  
  794.  
  795. void sheets(void)
  796. {
  797.     int i,j;
  798.     char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1];
  799.     char buf[MAX_LINELEN+1];
  800.    
  801.     for(j=0;j<langcnt;j++) {
  802.         snprintf(buf,sizeof(buf),"%s/index/title.%s",sheetdir,lang[j]);
  803.         titf=fopen(buf,"w");
  804.         snprintf(buf,sizeof(buf),"%s/index/description.%s",sheetdir,lang[j]);
  805.         descf=fopen(buf,"w");
  806.         snprintf(buf,sizeof(buf),"%s/index/%s",sheetdir,lang[j]);
  807.         indf=fopen(buf,"w");
  808.         snprintf(buf,sizeof(buf),"%s/index/list.%s",sheetdir,lang[j]);
  809.         listf=fopen(buf,"w");
  810.         snprintf(buf,sizeof(buf),"%s/index/weight.%s",sheetdir,lang[j]);
  811.         weightf=fopen(buf,"w");
  812.         snprintf(buf,sizeof(buf),"%s/index/addr.%s",sheetdir,lang[j]);
  813.         addrf=fopen(buf,"w");
  814.         snprintf(buf,sizeof(buf),"%s/index/serial.%s",sheetdir,lang[j]);
  815.         serialf=fopen(buf,"w");
  816.         snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
  817.         snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
  818.         snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
  819.         suffix_dic(sdic); prepare_dic(gdic);
  820.         gdicbuf=dicbuf; gentrycount=entrycount;
  821.         memmove(gentry,entry,gentrycount*sizeof(entry[0]));
  822.         prepare_dic(mdic);
  823.         mdicbuf=dicbuf; mentrycount=entrycount;
  824.         memmove(mentry,entry,mentrycount*sizeof(entry[0]));
  825.         unknown_type=unk_leave; translate(ignore[j]);
  826.         for(i=0;i<modcnt;i++) {
  827.             if(mod[i].langs[0]!=j) continue;
  828.             fprintf(addrf,"%d:%s\n",i,mod[i].name+3);
  829.             fprintf(serialf,"%s:%d\n",mod[i].name+3,i);
  830.             onesheet(i,j);
  831.         }
  832.         if(mentrycount>0) free(mdicbuf);
  833.         if(gentrycount>0) free(gdicbuf);
  834.         if(suffixcnt>0) free(sufbuf);
  835.         fclose(titf); fclose(descf); fclose(indf); fclose(listf);
  836.         fclose(weightf); fclose(addrf); fclose(serialf);
  837.     }
  838. }
  839.  
  840. int main()
  841. {
  842.     prep();
  843.     if(modcnt>0) modules();
  844.     clean();
  845.     sprep();
  846.     if(modcnt>0) sheets();
  847.     return 0;
  848. }
  849.  
  850.