WebSVN – wimsdev – Blame – /trunk/wims/public_html/bases/sys/search.pm

Rev	Author	Line No.	Line
6519	bpr	1	package search;
		2
		3	use strict;
		4	use warnings;
6935	bpr	5	use Encode;
6519	bpr	6
		7	use Exporter;
		8	our @ISA = 'Exporter';
8618	bpr	9	our @EXPORT = ('hashdomain', 'listdomain', 'out', 'sortuniq', 'treate_accent',
8633	bpr	10	'treate_dict', 'treate_language', 'dictionnary', 'reverse_dic', 'canonify');
6519	bpr	11
7693	bpr	12	sub canonify { my ($w)=@_; treate_accent(lc($w)) }
		13
7016	bpr	14	sub treate_dict { my ($file) = @_;
8618	bpr	15	my %ref = ( ) ; my $ref=\%ref;
		16	my $text;
6519	bpr	17	open IN, "$file";
6935	bpr	18	while (<IN>) {
6519	bpr	19	$text = $_ ;
		20	$text =~ s/\n +/\n/g;
		21	$text =~ s/\\//g;
		22	$text =~ s/^(\s+)//g;
		23	$text=~ s/\n+/\n/g;
		24	my @L = sortuniq(split("\n",$text));
		25	for my $l (@L) {
		26	my @la=split(":", $l) ;
8636	bpr	27	$ref{canonify($la[0])}=join(':',@la[1..@la-1]) if ($la[1]);
6519	bpr	28	}
		29	}
		30	close IN;
		31	%ref;
		32	}
		33
7693	bpr	34	sub dictionnary { my ($file, @words)=@_;
		35	my %dic=treate_dict($file) ;
		36	my $dic=\%dic;
		37	my @W=();
		38	if (@words) {
		39	for my $w (@words) {
		40	next if !($w);
		41	for my $ww (split(',', $w)) {
		42	if ( $dic{canonify($ww)} ) { push @W, $dic{canonify($ww)} ; };
		43	}
		44	}
		45	sortuniq ( @W )
		46	}
		47	@W;
		48	}
		49
18175	czzmrn	50	# use same list as in ~/src/Lib/liblines.c
6935	bpr	51	sub treate_accent {my ($txt) = @_;
6519	bpr	52	$txt=decode('iso-8859-1',$txt);
18175	czzmrn	53	$txt =~ tr/��/eeeeaaaaaauuuuiiiiooooocnyy/;
6519	bpr	54	$txt =~ s/[��]/A/g;
18175	czzmrn	55	$txt =~ s/[��]/O/g;
6519	bpr	56	$txt =~ s/�/C/g;
18175	czzmrn	57	$txt =~ s/�/N/g;
		58	$txt =~ s/�/Y/g;
6519	bpr	59	$txt =~ s/[��]/E/g;
		60	$txt =~ s/[��]/I/g;
8937	bpr	61	$txt =~ s/'/ /g;
6519	bpr	62	$txt= encode("iso-8859-1",$txt);
		63	$txt
		64	}
		65
		66	sub out { my ($bloc, $text) = @_;
		67	open (OUT, ">$bloc") ;
		68	print OUT $text ; close OUT;
		69	}
		70
		71	sub sortuniq {
6935	bpr	72	return if !(@_) ;
6519	bpr	73	my $prev = "not $_[0]";
		74	grep { $_ ne $prev && ($prev = $_, 1) } sort @_;
		75	}
		76
6935	bpr	77	sub listdomain { my ($file)=@_ ;
6519	bpr	78	my $text='';
		79	open IN, "$file";
		80	while (<IN>) { $text = $_ ;
		81	$text =~ s/[,:]/\n/g;
		82	$text =~ s/\n +/\n/g;
		83	$text =~ s/\\//g;
		84	$text =~ s/^(\s+)//g;
		85	$text=~ s/\n+/\n/g;
		86	}
		87	close IN;
		88	sortuniq(split("\n",$text))
		89	}
		90
		91	## reversing the domain tree
		92
		93	sub hashdomain { my ($file)=@_;
8618	bpr	94	$/ = undef;
6519	bpr	95	open IN, "$file";
		96	my %ref = ( ) ; my $ref=\%ref;
		97	while (<IN>) { my $text=$_ ; $text=~ s/\\\n\s*//g;
		98	$text=~ s/\n\s+/\n/g;
		99	my @text= split("\n", $text);
6935	bpr	100	for my $line (@text) {
6519	bpr	101	next if !($line) ;
		102	$line =~ s/\s+//g;
		103	my @cut=split(":", $line) ;
		104	if ($cut[1]) {
		105	my @son=split(',',$cut[1]);
8618	bpr	106	$ref->{'next'}{$cut[0]}=$cut[1];
6935	bpr	107	for my $s (@son) {
8618	bpr	108	if ($ref->{'prev'}{$s}) { $ref->{'prev'}{$s} .= "," . $cut[0]} else {$ref->{'prev'}{$s} = $cut[0]}
6519	bpr	109	}
		110	}
		111	}
		112	}
		113	close IN;
		114	%ref
		115	}
		116
6879	bpr	117	sub treate_language {
18174	czzmrn	118	my $site_language='en fr nl it cn es';
6879	bpr	119	my $conf='../../../log/wims.conf';
		120	if (-e "$conf") {
		121	open IN, "$conf";
		122	while (<IN>) {
		123	if ($_ =~ s/site_languages=//) { $site_language= $_ ; }
		124	}
		125	close IN;
		126	}
9089	bpr	127	$site_language=~ s/,/ /g; $site_language=~ s/ +/ /g;
		128	split(' ', $site_language) ;
6879	bpr	129	}
7693	bpr	130
		131	sub reverse_dic { my @liste=@_;
		132	my %ref = ( ) ; my $ref=\%ref;
		133	for my $file (@liste) {
		134	open IN, "$file";
		135	while (<IN>) {
		136	next if ($_ =~ /^#/);
		137	my $text= $_ ; $text=~ s/\n//;
		138	my @text=split(":", $text);
		139	my $t=$text[0]; $t=~ s/\n//;
		140	my @L= split(",",$text[1]);
		141	for my $a (@L) {
		142	if ($ref{$a}) { $ref{$a}.= "," . $t ; }
		143	else { $ref{$a} = $t ; }
		144	}
		145	}
		146	close IN;
		147	}
		148	%ref
		149	}

Subversion Repositories wimsdev

(root)/trunk/wims/public_html/bases/sys/search.pm – Rev 18175