Subversion Repositories wimsdev

Rev

Rev 6799 | Rev 6818 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

  1. /*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
  2.  *
  3.  *  This program is free software; you can redistribute it and/or modify
  4.  *  it under the terms of the GNU General Public License as published by
  5.  *  the Free Software Foundation; either version 2 of the License, or
  6.  *  (at your option) any later version.
  7.  *
  8.  *  This program is distributed in the hope that it will be useful,
  9.  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  10.  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11.  *  GNU General Public License for more details.
  12.  *
  13.  *  You should have received a copy of the GNU General Public License
  14.  *  along with this program; if not, write to the Free Software
  15.  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16.  */
  17.  
  18.         /* This is an internal program,
  19.          * used to index modules for search engine. */
  20.  
  21. #include "../wims.h"
  22. #include "../Lib/basicstr.c"
  23.  
  24. #define MAX_LANGS       MAX_LANGUAGES
  25. #define MAX_MODULES     65536
  26. char *moduledir=        "public_html/modules";
  27. char *sheetdir=         "public_html/bases/sheet";
  28. char *dicdir=           "public_html/bases";
  29. char *outdir=           "public_html/bases/site2";
  30. char *maindic=          "sys/words";
  31. char *groupdic=         "sys/wgrp/wgrp";
  32. char *suffixdic=        "sys/suffix";
  33. char *ignoredic=        "sys/indignore";
  34. char *conffile=         "log/wims.conf";
  35. char *mlistbase=        "list";
  36.  
  37. char lang[MAX_LANGS][4]={
  38.     "en","fr","cn","es","it","nl","si","ca","pt"
  39. };
  40. #define DEFAULT_LANGCNT 6
  41. char allang[MAX_LANGS][4]={
  42.     "en","fr","cn","es","it","nl","de","si","ca","pt"
  43. };
  44. #define allangcnt 8
  45. char ignore[MAX_LANGS][MAX_LINELEN+1];
  46. char mlistfile[MAX_LANGS][256];
  47. int langcnt;
  48. FILE *langf, *titf, *descf, *weightf, *robotf, *indf, *listf, *addrf, *serialf, *authorf, *versionf;
  49.  
  50. struct cat {
  51.     char *name;
  52.     char typ;
  53. } cat[]={
  54.         {"all_types",   'A'},
  55.         {"exercise",    'X'},
  56.         {"oef",         'O'},
  57.         {"tool",        'T'},
  58.         {"recreation",  'R'},
  59.         {"reference",   'Y'},
  60.         {"document",    'D'},
  61.         {"popup",       'P'},
  62.         {"datamodule",  'M'}
  63. };
  64. #define catno (sizeof(cat)/sizeof(cat[0]))
  65.  
  66. struct mod {
  67.     char *name;
  68.     unsigned char langs[MAX_LANGS];
  69.     int counts[MAX_LANGS];
  70.     int  langcnt;
  71. } mod[MAX_MODULES];
  72. int modcnt;
  73.  
  74. char *mlist;
  75.  
  76. void *xmalloc(size_t n)
  77. {
  78.     void *p;
  79.     p=malloc(n);
  80.     if(p==NULL) {
  81.         printf("Malloc failure.\n");
  82.         exit(1);
  83.     }
  84.     return p;
  85. }
  86.  
  87. char *acctab="çéèêëúùûüáàâäãóòôöõíìïîñýÇÉÈÊËÚÙÛÜÁÀÂÃÄÓÒÔÖÕÍÌÏÎÑÝ",
  88.      *deatab="ceeeeuuuuaaaaaoooooiiiinyCEEEEUUUUAAAAAOOOOOIIIINY";
  89.  
  90.         /* fold accented letters to unaccented */
  91. void deaccent(char *p)
  92. {
  93.     char *sp;
  94.     char *v;
  95.     for(sp=p;*sp;sp++) {
  96.         if(*sp<0 && (v=strchr(acctab,*sp))!=NULL)
  97.           *sp=*(deatab+(v-acctab));
  98.         if(!isalnum(*sp) && strchr(",.&$+*",*sp)==0) *sp=' ';
  99.         else *sp=tolower(*sp);
  100.     }
  101. }
  102.  
  103.         /* translate everything non-alphanumeric into space */
  104. void towords(char *p)
  105. {
  106.     char *pp;
  107.     for(pp=p;*pp;pp++) if(!isalnum(*pp) && strchr("&$+*",*pp)==0) *pp=' ';
  108. }
  109.  
  110.         /* Points to the end of the word */
  111. char *find_word_end(char *p)
  112. {
  113.     int i;
  114.     for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++);
  115.     return p;
  116. }
  117.  
  118.         /* Strips leading spaces */
  119. char *find_word_start(char *p)
  120. {
  121.     int i;
  122.     for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++);
  123.     return p;
  124. }
  125.  
  126.         /* Find first occurrence of word */
  127. char *wordchr(char *p, char *w)
  128. {
  129.     char *r;
  130.  
  131.     for(r=strstr(p,w);r!=NULL &&
  132.         ( (r>p && !isspace(*(r-1))) || (!isspace(*(r+strlen(w))) && *(r+strlen(w))!=0) );
  133.         r=strstr(r+1,w));
  134.     return r;
  135. }
  136.  
  137.         /* find a variable in a string (math expression).
  138.          * Returns the pointer or NULL. */
  139. char *varchr(char *p, char *v)
  140. {
  141.     char *pp; int n=strlen(v);
  142.     for(pp=strstr(p,v); pp!=NULL; pp=strstr(pp+1,v)) {
  143.         if((pp==p || !isalnum(*(pp-1))) &&
  144.            (!isalnum(*(pp+n)) || *(pp+n)==0)) break;
  145.     }
  146.     return pp;
  147. }
  148.  
  149.         /* strip trailing spaces; return string end. */
  150. char *strip_trailing_spaces(char *p)
  151. {
  152.     char *pp;
  153.     if(*p==0) return p;
  154.     for(pp=p+strlen(p)-1; pp>=p && isspace(*pp); *(pp--)=0);
  155.     return pp;
  156. }
  157.  
  158. char *find_tag_end(char *p)
  159. {
  160.     char *pp;
  161.     pp=p; if(*pp=='<') pp++;
  162.     for(; *pp && *pp!='>'; pp++) {
  163.         if(*pp=='<') {
  164.             pp=find_tag_end(pp)-1; continue;
  165.         }
  166.         if(*pp=='"') {
  167.             pp=strchr(pp+1,'"');
  168.             if(pp==NULL) return p+strlen(p); else continue;
  169.         }
  170.         if(*pp=='\'') {
  171.             pp=strchr(pp+1,'\'');
  172.             if(pp==NULL) return p+strlen(p); else continue;
  173.         }
  174.     }
  175.     if(*pp=='>') pp++; return pp;
  176. }
  177.  
  178. char *find_tag(char *p, char *tag)
  179. {
  180.     char *pp;
  181.     int len;
  182.     len=strlen(tag);
  183.     for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) {
  184.         if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp;
  185.     }
  186.     return p+strlen(p);
  187. }
  188.  
  189.         /* remove all html tags */
  190. void detag(char *p)
  191. {
  192.     char *pp, *p2;
  193.     for(pp=strchr(p,'<'); pp!=NULL; pp=strchr(pp,'<')) {
  194.         p2=find_tag_end(pp);
  195.         if(*p2==0) {*pp=0; return; }
  196.         ovlstrcpy(pp,p2);
  197.     }
  198. }
  199.  
  200.         /* modify a string. Bufferlen must be ast least MAX_LINELEN */
  201. void string_modify(char *start, char *bad_beg, char *bad_end, char *good,...)
  202. {
  203.     char buf[MAX_LINELEN+1];
  204.     va_list vp;
  205.    
  206.     va_start(vp,good);
  207.     vsnprintf(buf,sizeof(buf),good,vp); va_end(vp);
  208.     if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN)
  209.       return;
  210.     strcat(buf,bad_end);
  211.     ovlstrcpy(bad_beg,buf);
  212. }
  213.  
  214. void _getdef(char buf[], char *name, char value[])
  215. {
  216.     char *p1, *p2, *p3;
  217.  
  218.     value[0]=0;
  219.     for(p1=strstr(buf,name); p1!=NULL; p1=strstr(p1+1,name)) {
  220.         p2=find_word_start(p1+strlen(name));
  221.         if((p1>buf && !isspace(*(p1-1))) || *p2!='=') continue;
  222.         p3=p1; while(p3>buf && isspace(*(p3-1)) && *(p3-1)!='\n') p3--;
  223.         if(p3>buf && *(p3-1)!='\n') continue;
  224.         p3=strchr(p2,'\n');
  225.         p2=find_word_start(p2+1);
  226.         if(p3 <= p2) continue;
  227.         snprintf(value,MAX_LINELEN,"%s",p2);
  228.         if(p3!=NULL && p3-p2<MAX_LINELEN) value[p3-p2]=0;
  229.         strip_trailing_spaces(value);
  230.         break;
  231.     }
  232. }
  233.  
  234.         /* Get variable definition from a file.
  235.          * Result stored in buffer value of length MAX_LINELEN. */
  236. void getdef(char *fname, char *name, char value[])
  237. {
  238.     FILE *f;
  239.     char *buf;
  240.     int l;
  241.    
  242.     value[0]=0;
  243.     f=fopen(fname,"r"); if(f==NULL) return;
  244.     fseek(f,0,SEEK_END); l=ftell(f); fseek(f,0,SEEK_SET);
  245.     buf=xmalloc(l+256); l=fread(buf,1,l,f);
  246.     fclose(f);
  247.     if(l<=0) return; else buf[l]=0;
  248.     _getdef(buf,name,value);
  249.     free(buf);
  250. }
  251.  
  252. #include "translator_.c"
  253.  
  254. char *mdicbuf, *gdicbuf;
  255. char gentry[sizeof(entry)], mentry[sizeof(entry)];
  256. int gentrycount, mentrycount;
  257.  
  258.         /* Preparation of data */
  259. void prep(void)
  260. {
  261.     char buf[MAX_LINELEN+1];
  262.     char *p1,*p2,*s,*old;
  263.     int i,l,thislang,t;
  264.     FILE *f;
  265.    
  266.     s=getenv("modind_outdir"); if(s!=NULL && *s!=0) outdir=s;
  267.     s=getenv("modind_sheetdir"); if(s!=NULL && *s!=0) sheetdir=s;
  268.     snprintf(buf,sizeof(buf),"%s/addr",outdir);
  269.     addrf=fopen(buf,"w");
  270.     snprintf(buf,sizeof(buf),"%s/serial",outdir);
  271.     serialf=fopen(buf,"w");
  272.     modcnt=langcnt=0;
  273.     getdef(conffile,"site_languages",buf);
  274.     for(p1=buf;*p1;p1++) if(!isalnum(*p1)) *p1=' ';
  275.     for(p1=find_word_start(buf); *p1 && langcnt<MAX_LANGS; p1=find_word_start(p2)) {
  276.         p2=find_word_end(p1);
  277.         if(p2!=p1+2 || !isalpha(*p1) || !isalpha(*(p1+1))) continue;
  278.         memmove(lang[langcnt],p1,2); lang[langcnt++][2]=0;
  279.     }
  280.     if(langcnt==0) {    /* default languages */
  281.         langcnt=DEFAULT_LANGCNT;
  282.     }
  283.     s=getenv("mlist"); if(s==NULL) exit(1);
  284.     l=strlen(s); if(l<0 || l>100*MAX_LINELEN) exit(1);
  285.     mlist=xmalloc(l+16); ovlstrcpy(mlist,s); old="";
  286.     for(i=0;i<langcnt;i++) {
  287.         snprintf(buf,sizeof(buf),"%s/%s.%s",dicdir,ignoredic,lang[i]);
  288.         f=fopen(buf,"r"); if(f==NULL) continue;
  289.         l=fread(ignore[i],1,MAX_LINELEN,f);fclose(f);
  290.         if(l<0 || l>=MAX_LINELEN) l=0;
  291.         ignore[i][l]=0;
  292.     }
  293.     for(t=0, p1=find_word_start(mlist);
  294.         *p1 && modcnt<MAX_MODULES;
  295.         p1=find_word_start(p2), t++) {
  296.         p2=find_word_end(p1);
  297.         l=p2-p1; if(*p2) *p2++=0;
  298.         fprintf(addrf,"%d:%s\n",t,p1);
  299.         fprintf(serialf,"%s:%d\n",p1,t);
  300.         thislang=-1;
  301. /* language is taken from the address */
  302.         if(l>3 && p1[l-3]=='.') {
  303.             for(i=0;i<langcnt;i++) if(strcasecmp(lang[i],p1+l-2)==0) break;
  304.             if(i<langcnt) {p1[l-3]=0; thislang=i;}
  305.             else {      /* unknown language, not referenced */
  306.                 continue;
  307.             }
  308.         }
  309.         if(modcnt>0 && strcmp(old,p1)==0 && thislang>=0) {
  310.             if(mod[modcnt-1].langcnt<langcnt) {
  311.                 mod[modcnt-1].langs[mod[modcnt-1].langcnt]=thislang;
  312.                 mod[modcnt-1].counts[mod[modcnt-1].langcnt]=t;
  313.                 (mod[modcnt-1].langcnt)++;
  314.             }
  315.         }
  316.         else {
  317.             mod[modcnt].name=old=p1;
  318.             if(thislang>=0) {
  319.                 mod[modcnt].langs[0]=thislang;
  320.                 mod[modcnt].langcnt=1;
  321.             }
  322.             else mod[modcnt].langcnt=0;
  323.             mod[modcnt].counts[0]=t;
  324.             modcnt++;
  325.         }
  326.     }
  327.     snprintf(buf,sizeof(buf),"%s/language",outdir);
  328.     langf=fopen(buf,"w");
  329.     snprintf(buf,sizeof(buf),"%s/title",outdir);
  330.     titf=fopen(buf,"w");
  331.     snprintf(buf,sizeof(buf),"%s/description",outdir);
  332.     descf=fopen(buf,"w");
  333.     snprintf(buf,sizeof(buf),"%s/author",outdir);
  334.     authorf=fopen(buf,"w");
  335.     snprintf(buf,sizeof(buf),"%s/version",outdir);
  336.     versionf=fopen(buf,"w");
  337.     snprintf(buf,sizeof(buf),"%s/lists/robot.phtml",outdir);
  338.     robotf=fopen(buf,"w");
  339.     fclose(addrf); fclose(serialf);
  340.     if(!robotf || !versionf || !authorf || !descf || !titf || !descf) {
  341.         fprintf(stderr,"modind: error creating output files.\n");
  342.         exit(1);
  343.     }
  344. }
  345.  
  346. void sprep(void)
  347. {
  348.     char *p1,*p2,*s;
  349.     int i,l,thislang;
  350.    
  351.     modcnt=0;
  352.     s=getenv("slist"); if(s==NULL) return;
  353.     l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return;
  354.     mlist=xmalloc(l+16); ovlstrcpy(mlist,s);
  355.     for(p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2)) {
  356.         p2=find_word_end(p1);
  357.         l=p2-p1; if(*p2) *p2++=0;
  358.         for(i=0;i<langcnt;i++) if(strncasecmp(lang[i],p1,2)==0) break;
  359.         if(i<langcnt) thislang=i; else continue;
  360.         mod[modcnt].name=p1;
  361.         mod[modcnt].langs[0]=thislang;
  362.         mod[modcnt].langcnt=1;
  363.         modcnt++;
  364.     }
  365. }
  366.  
  367. void clean(void)
  368. {
  369.     fclose(langf); fclose(titf); fclose(descf); fclose(robotf);
  370.     fclose(authorf); fclose(versionf);
  371. }
  372.  
  373. char *sheetindex[]={
  374.       "title", "description",
  375.       "duration", "severity",
  376.       "level", "domain",
  377.       "keywords", "reserved1", "reserved2", "remark"
  378. };
  379. #define SHEETINDEX_NO (sizeof(sheetindex)/sizeof(sheetindex[0]))
  380. char sindbuf[SHEETINDEX_NO][MAX_LINELEN+1];
  381. enum{s_title, s_description,
  382.       s_duration, s_severity,
  383.       s_level, s_domain,
  384.       s_keywords, s_reserved1, s_reserved2,
  385.       s_remark
  386. };
  387.  
  388. char *modindex[]={
  389.       "title", "description",
  390.       "author", "address", "copyright",
  391.       "version", "wims_version", "language",
  392.       "category", "level", "domain", "keywords",
  393.       "keywords_ca", "keywords_en", "keywords_fr", "keywords_it", "keywords_nl",
  394.       "title_ca", "title_en", "title_fr", "title_it", "title_nl",
  395.       "require"
  396. };
  397. #define MODINDEX_NO (sizeof(modindex)/sizeof(modindex[0]))
  398. char indbuf[MODINDEX_NO][MAX_LINELEN+1];
  399. enum{i_title, i_description,
  400.       i_author,i_address,i_copyright,
  401.       i_version,i_wims_version,i_language,
  402.       i_category,i_level,i_domain,i_keywords,
  403.       i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
  404.       i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl,
  405.       i_require
  406. };
  407.  
  408. char *module_special_file[]={
  409.     "intro","help","about"
  410. };
  411. #define MODSPEC_NO (sizeof(module_special_file)/sizeof(module_special_file[0]))
  412. char module_language[4];
  413.  
  414.         /* read and treat module's INDEX file */
  415. int module_index(const char *name)
  416. {
  417.     char *p, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
  418.     FILE *indf;
  419.     int i,l;
  420.  
  421.     snprintf(fbuf,sizeof(fbuf),"%s/%s/INDEX",moduledir,name);
  422.     indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
  423.     l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
  424.     if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
  425.     for(i=0;i<MODINDEX_NO;i++) {
  426.         _getdef(ibuf,modindex[i],indbuf[i]);
  427.                 /* compatibility precaution */
  428.         if(indbuf[i][0]==':') indbuf[i][0]='.';
  429.     }
  430.     p=find_word_start(indbuf[i_language]);
  431.     if(isalpha(*p) && isalpha(*(p+1))) {
  432.         memmove(module_language,p,2); module_language[2]=0;
  433.     }
  434.     else ovlstrcpy(module_language,"en");
  435.     return 0;
  436. }
  437.  
  438. int sheet_index(int serial)
  439. {
  440.     char *p1, *p2, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
  441.     FILE *indf;
  442.     int i,l;
  443.  
  444.     snprintf(fbuf,sizeof(fbuf),"%s/%s.def",sheetdir,mod[serial].name);
  445.     indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
  446.     l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
  447.     if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
  448.     for(i=0;i<SHEETINDEX_NO;i++) sindbuf[i][0]=0;
  449.     for(i=0,p1=find_word_start(ibuf);
  450.         i<SHEETINDEX_NO-1 && *p1!=':' && *p1!=0;
  451.         i++,p1=p2) {
  452.         p2=strchr(p1,'\n');
  453.         if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
  454.         p1=find_word_start(p1); strip_trailing_spaces(p1);
  455.         snprintf(sindbuf[i],MAX_LINELEN,"%s",p1);
  456.     }
  457.     p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1);
  458.     else *p2=0;
  459.     p1=find_word_start(p1); strip_trailing_spaces(p1);
  460.     for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' ';
  461.     ovlstrcpy(sindbuf[s_remark],p1);
  462.     return 0;
  463. }
  464.  
  465. unsigned char categories[16];
  466. char taken[MAX_LINELEN+1];
  467. int catcnt, takenlen, tweight;
  468.  
  469. void appenditem(char *word, int lind, int serial, int weight, char *l)
  470. {
  471.     char nbuf[MAX_LINELEN+1], buf[MAX_LINELEN+1];
  472.     int i, ll;
  473.     char *p;
  474.     FILE *f;
  475.    
  476.     if(!isalnum(*word) || (ll=strlen(word))<2 ||
  477.        wordchr(taken,word)!=NULL ||
  478.        wordchr(ignore[lind],word)!=NULL ||
  479.        takenlen>=MAX_LINELEN-ll-16)
  480.       return;
  481.     if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
  482.     for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
  483.     taken[takenlen++]=' '; taken[takenlen++]=' ';
  484.     ovlstrcpy(taken+takenlen,word);
  485.     takenlen+=ll; tweight+=weight;
  486.     snprintf(buf,sizeof(buf),"%s:%d?%d\n",word,serial,weight);
  487.     for(i=0;i<catcnt;i++) {
  488.         snprintf(nbuf,sizeof(nbuf),"%s/%c.%s",
  489.                  outdir,categories[i],lang[lind]);
  490.         f=fopen(nbuf,"a");
  491.         if(f!=NULL) {fputs(buf,f); fclose(f);}
  492.     }
  493. }
  494.  
  495. void onemodule(const char *name, int serial, int lind)
  496. {
  497.     int i;
  498.     unsigned char trlist[]={
  499.         i_title,i_description,i_category,i_domain,i_keywords,
  500.           i_require,i_author,
  501.           i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
  502.           i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl
  503.     };
  504.     #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
  505.     char *p1, *p2, *pp, *q, buf[MAX_LINELEN+1], lbuf[16];
  506.     FILE *f;
  507.    
  508.     if(module_index(name)) return;
  509.     towords(indbuf[i_category]);
  510.     for(i=catcnt=0;i<catno && catcnt<16;i++) {
  511.         if(wordchr(indbuf[i_category],cat[i].name)!=NULL)
  512.           categories[catcnt++]=cat[i].typ;
  513.     }
  514.     if(catcnt==0) return;
  515.     if(categories[0]!=cat[0].typ)
  516.       categories[catcnt++]=cat[0].typ;
  517.     for(i=0;i<catcnt;i++) {
  518.         snprintf(buf,sizeof(buf),"%s/lists/%c.%s",
  519.                  outdir,categories[i],lang[lind]);
  520.         f=fopen(buf,"a");
  521.         if(f!=NULL) {fprintf(f,"%s\n",name); fclose(f);}
  522.     }
  523.     fprintf(langf,"%d:%s\n",serial,module_language);
  524.     fprintf(titf,"%d:%s\n",serial,indbuf[i_title]);
  525.     fprintf(descf,"%d:%s\n",serial,indbuf[i_description]);
  526.     fprintf(authorf,"%d:%s\n",serial,indbuf[i_author]);
  527.     fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]);
  528.     snprintf(buf,sizeof(buf),"%s",indbuf[i_description]);
  529.     for(pp=strchr(buf,','); pp; pp=strchr(pp,','))
  530.       string_modify(buf,pp,pp+1,"&#44;");
  531.     if(strcmp(module_language,lang[lind])==0)
  532.       fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name,
  533.               indbuf[i_title], buf);
  534.     entrycount=mentrycount; dicbuf=mdicbuf;
  535.     memmove(entry,mentry,mentrycount*sizeof(entry[0]));
  536.     unknown_type=unk_leave;
  537.     for(i=0;i<trcnt;i++) {
  538.         detag(indbuf[trlist[i]]);
  539.         deaccent(indbuf[trlist[i]]);
  540.         singlespace(indbuf[trlist[i]]);
  541.         suffix_translate(indbuf[trlist[i]]);
  542.         translate(indbuf[trlist[i]]);
  543.     }
  544.     taken[0]=0; takenlen=tweight=0;
  545.     ovlstrcpy(buf,indbuf[i_title]); towords(buf);
  546.     for(p1=find_word_start(buf);*p1;
  547.         p1=find_word_start(p2)) {
  548.         p2=find_word_end(p1); if(*p2) *p2++=0;
  549.         appenditem(p1,lind,serial,4,module_language);
  550.     }
  551.     snprintf(buf,sizeof(buf),"%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s",
  552.              indbuf[i_description],indbuf[i_keywords],
  553.              indbuf[i_keywords_ca],indbuf[i_keywords_en],indbuf[i_keywords_fr],
  554.              indbuf[i_keywords_it],indbuf[i_keywords_nl],
  555.              indbuf[i_title_ca],indbuf[i_title_en],indbuf[i_title_fr],
  556.              indbuf[i_title_it],indbuf[i_title_nl],
  557.              indbuf[i_domain],indbuf[i_require],indbuf[i_author]);
  558.     towords(buf);
  559.     for(p1=find_word_start(buf);*p1;
  560.         p1=find_word_start(p2)) {
  561.         p2=find_word_end(p1); if(*p2) *p2++=0;
  562.         appenditem(p1,lind,serial,2,module_language);
  563.     }
  564.     entrycount=gentrycount; dicbuf=gdicbuf;
  565.     memmove(entry,gentry,gentrycount*sizeof(entry[0]));
  566.     unknown_type=unk_delete;
  567.     ovlstrcpy(buf,indbuf[i_title]); translate(buf);
  568.     for(p1=find_word_start(buf); *p1;
  569.         p1=find_word_start(p2)) {
  570.         p2=strchr(p1,',');
  571.         if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
  572.         if(strlen(p1)<=0) continue;
  573.         appenditem(p1,lind,serial,4,module_language);
  574.     }
  575.     snprintf(buf,sizeof(buf),"%s, %s, %s, %s, %s, %s, %s, %s",
  576.              indbuf[i_description],indbuf[i_keywords],
  577.              indbuf[i_keywords_ca], indbuf[i_keywords_en],indbuf[i_keywords_fr],
  578.              indbuf[i_keywords_it], indbuf[i_keywords_nl],
  579.              indbuf[i_domain]);
  580.     translate(buf);
  581.     for(p1=find_word_start(buf); *p1;
  582.         p1=find_word_start(p2)) {
  583.         p2=strchr(p1,',');
  584.         if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
  585.         if(strlen(p1)<=0) continue;
  586.         appenditem(p1,lind,serial,2,module_language);
  587.     }
  588.     snprintf(buf,sizeof(buf),"%s",indbuf[i_level]);
  589.     ovlstrcpy(lbuf,"level");
  590.     for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' ';
  591.     q=buf+strlen(buf);
  592.     for(p1=find_word_start(buf); (*p1) && (p1 < q) ;
  593.         p1=find_word_start(p2)) {
  594.         p2=find_word_end(p1);
  595.         if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
  596.         if(!isalpha(*p1) ||
  597.            (!isdigit(*(p1+1)) && *(p1+1)!=0) ||
  598.            (*(p1+1)!=0 && *(p1+2)!=0))
  599.           continue;
  600.         *p1=tolower(*p1);
  601.         ovlstrcpy(lbuf+strlen("level"),p1);
  602.         appenditem(lbuf,lind,serial,2,module_language);
  603.     }
  604.     fprintf(weightf,"%d:%d\n",serial,tweight);
  605. }
  606.  
  607. void modules(void)
  608. {
  609.     int i,j,k,d;
  610.     char namebuf[MAX_LINELEN+1];
  611.     char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1];
  612.  
  613.     for(j=0;j<langcnt;j++) {
  614.         snprintf(namebuf,sizeof(namebuf),"%s/weight.%s",outdir,lang[j]);
  615.         weightf=fopen(namebuf,"w");
  616.         snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
  617.         snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
  618.         snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
  619.         suffix_dic(sdic); prepare_dic(gdic);
  620.         gdicbuf=dicbuf; gentrycount=entrycount;
  621.         memmove(gentry,entry,gentrycount*sizeof(entry[0]));
  622.         prepare_dic(mdic);
  623.         mdicbuf=dicbuf; mentrycount=entrycount;
  624.         memmove(mentry,entry,mentrycount*sizeof(entry[0]));
  625.         unknown_type=unk_leave; translate(ignore[j]);
  626.         for(i=0;i<modcnt;i++) {
  627.             if(mod[i].langcnt>0) {
  628.                 for(d=k=0;k<mod[i].langcnt;k++)
  629.                   if(mod[i].langs[k]<mod[i].langs[d]) d=k;
  630.                 for(k=0;k<mod[i].langcnt && mod[i].langs[k]!=j;k++);
  631.                 if(k>=mod[i].langcnt) k=d;
  632.                 snprintf(namebuf,MAX_LINELEN,"%s.%s",mod[i].name,
  633.                          lang[mod[i].langs[k]]);
  634.                 onemodule(namebuf,mod[i].counts[k],j);
  635.             }
  636.             else {
  637.                 onemodule(mod[i].name,mod[i].counts[0],j);
  638.             }
  639.         }
  640.         if(mentrycount>0) free(mdicbuf);
  641.         if(gentrycount>0) free(gdicbuf);
  642.         if(suffixcnt>0) free(sufbuf);
  643.         if(weightf) fclose(weightf);
  644.     }
  645. }
  646.  
  647. void sappenditem(char *word, int lind, int serial, int weight)
  648. {
  649.     int ll;
  650.     char *p;
  651.    
  652.     if(!isalnum(*word) || (ll=strlen(word))<2 ||
  653.        wordchr(taken,word)!=NULL ||
  654.        wordchr(ignore[lind],word)!=NULL ||
  655.        takenlen>=MAX_LINELEN-ll-16)
  656.       return;
  657.     if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
  658.     for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
  659.     taken[takenlen++]=' ';taken[takenlen++]=' ';
  660.     ovlstrcpy(taken+takenlen,word);
  661.     takenlen+=ll; tweight+=weight;
  662.     fprintf(indf,"%s:%d?%d\n",word,serial,weight);
  663. }
  664.  
  665. void onesheet(int serial, int lind)
  666. {
  667.     int i;
  668.     unsigned char trlist[]={
  669.         s_title,s_description,s_domain,s_keywords,s_remark
  670.     };
  671.     #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
  672.     char *p1, *p2, buf[MAX_LINELEN+1];
  673.    
  674.     if(sheet_index(serial)) return;
  675.     fprintf(listf,"%s\n",mod[serial].name+3);
  676.     fprintf(titf,"%d:%s\n",serial,sindbuf[s_title]);
  677.     fprintf(descf,"%d:%s\n",serial,sindbuf[s_description]);
  678.     entrycount=mentrycount; dicbuf=mdicbuf;
  679.     memmove(entry,mentry,mentrycount*sizeof(entry[0]));
  680.     unknown_type=unk_leave;
  681.     for(i=0;i<trcnt;i++) {
  682.         detag(sindbuf[trlist[i]]);
  683.         deaccent(sindbuf[trlist[i]]);
  684.         singlespace(sindbuf[trlist[i]]);
  685.         suffix_translate(sindbuf[trlist[i]]);
  686.         translate(sindbuf[trlist[i]]);
  687.     }
  688.     taken[0]=0; takenlen=tweight=0;
  689.     ovlstrcpy(buf,sindbuf[s_title]); towords(buf);
  690.     for(p1=find_word_start(buf);*p1;
  691.         p1=find_word_start(p2)) {
  692.         p2=find_word_end(p1); if(*p2) *p2++=0;
  693.         sappenditem(p1,lind,serial,4);
  694.     }
  695.     snprintf(buf,sizeof(buf),"%s %s %s %s",
  696.              sindbuf[s_description],sindbuf[s_keywords],
  697.              sindbuf[s_domain],sindbuf[s_remark]);
  698.     towords(buf);
  699.     for(p1=find_word_start(buf);*p1;
  700.         p1=find_word_start(p2)) {
  701.         p2=find_word_end(p1); if(*p2) *p2++=0;
  702.         sappenditem(p1,lind,serial,2);
  703.     }
  704.     entrycount=gentrycount; dicbuf=gdicbuf;
  705.     memmove(entry,gentry,gentrycount*sizeof(entry[0]));
  706.     unknown_type=unk_delete;
  707.     ovlstrcpy(buf,sindbuf[s_title]); translate(buf);
  708.     for(p1=find_word_start(buf); *p1;
  709.         p1=find_word_start(p2)) {
  710.         p2=strchr(p1,',');
  711.         if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
  712.         if(strlen(p1)<=0) continue;
  713.         sappenditem(p1,lind,serial,4);
  714.     }
  715.     snprintf(buf,sizeof(buf),"%s, %s, %s, %s",
  716.              sindbuf[s_description],sindbuf[s_keywords],
  717.              sindbuf[s_domain],sindbuf[s_remark]);
  718.     translate(buf);
  719.     for(p1=find_word_start(buf); *p1;
  720.         p1=find_word_start(p2)) {
  721.         p2=strchr(p1,',');
  722.         if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
  723.         if(strlen(p1)<=0) continue;
  724.         sappenditem(p1,lind,serial,2);
  725.     }
  726.     fprintf(weightf,"%d:%d\n",serial,tweight);
  727. }
  728.  
  729. void sheets(void)
  730. {
  731.     int i,j;
  732.     char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1];
  733.     char buf[MAX_LINELEN+1];
  734.    
  735.     for(j=0;j<langcnt;j++) {
  736.         snprintf(buf,sizeof(buf),"%s/index/title.%s",sheetdir,lang[j]);
  737.         titf=fopen(buf,"w");
  738.         snprintf(buf,sizeof(buf),"%s/index/description.%s",sheetdir,lang[j]);
  739.         descf=fopen(buf,"w");
  740.         snprintf(buf,sizeof(buf),"%s/index/%s",sheetdir,lang[j]);
  741.         indf=fopen(buf,"w");
  742.         snprintf(buf,sizeof(buf),"%s/index/list.%s",sheetdir,lang[j]);
  743.         listf=fopen(buf,"w");
  744.         snprintf(buf,sizeof(buf),"%s/index/weight.%s",sheetdir,lang[j]);
  745.         weightf=fopen(buf,"w");
  746.         snprintf(buf,sizeof(buf),"%s/index/addr.%s",sheetdir,lang[j]);
  747.         addrf=fopen(buf,"w");
  748.         snprintf(buf,sizeof(buf),"%s/index/serial.%s",sheetdir,lang[j]);
  749.         serialf=fopen(buf,"w");
  750.         snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
  751.         snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
  752.         snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
  753.         suffix_dic(sdic); prepare_dic(gdic);
  754.         gdicbuf=dicbuf; gentrycount=entrycount;
  755.         memmove(gentry,entry,gentrycount*sizeof(entry[0]));
  756.         prepare_dic(mdic);
  757.         mdicbuf=dicbuf; mentrycount=entrycount;
  758.         memmove(mentry,entry,mentrycount*sizeof(entry[0]));
  759.         unknown_type=unk_leave; translate(ignore[j]);
  760.         for(i=0;i<modcnt;i++) {
  761.             if(mod[i].langs[0]!=j) continue;
  762.             fprintf(addrf,"%d:%s\n",i,mod[i].name+3);
  763.             fprintf(serialf,"%s:%d\n",mod[i].name+3,i);
  764.             onesheet(i,j);
  765.         }
  766.         if(mentrycount>0) free(mdicbuf);
  767.         if(gentrycount>0) free(gdicbuf);
  768.         if(suffixcnt>0) free(sufbuf);
  769.         fclose(titf); fclose(descf); fclose(indf); fclose(listf);
  770.         fclose(weightf); fclose(addrf); fclose(serialf);
  771.     }
  772. }
  773.  
  774. int main()
  775. {
  776.     prep();
  777.     if(modcnt>0) modules();
  778.     clean();
  779.     sprep();
  780.     if(modcnt>0) sheets();
  781.     return 0;
  782. }
  783.  
  784.