#!/usr/bin/perl

package Jtext;

use Exporter;
@ISA = (Exporter);
@EXPORT = qw(index_sjis rindex_sjis index_euc rindex_euc substr_sjis substr_euc length_sjis length_euc);
$VERSION = 0.01;

# ------------------------------------------

sub index_sjis {
    my ($strall,$strpart,$pos) = @_;
	return &index_ja($strall,$strpart,$pos,"sjis");
}

sub rindex_sjis {
    my ($strall,$strpart,$pos) = @_;
	return &index_ja($strall,$strpart,$pos,"sjis","rindex");
}

sub index_euc {
    my ($strall,$strpart,$pos) = @_;
	return &index_ja($strall,$strpart,$pos,"euc");
}

sub rindex_euc {
    my ($strall,$strpart,$pos) = @_;
	return &index_ja($strall,$strpart,$pos,"euc","rindex");
}

sub index_ja {
    my ($strall,$strpart,$pos,$charset,$rindex) = @_;
	my $string;
	my $result;
	my @tmpall;
	my @tmppart;

	if($rindex eq "rindex"){
		@tmpall  = reverse &chars_ja($strall,$charset);
		@tmppart = reverse &chars_ja($strpart,$charset);
	} else {
		@tmpall  = &chars_ja($strall,$charset);
		@tmppart = &chars_ja($strpart,$charset);
	}
	if(!defined($pos)) {$pos=0;}

	BIG: for ($h=$pos; $h<($#tmpall+1); $h++) {
		if($tmpall[$h] eq $tmppart[0]) {
			$j=0;
			SMALL: for ($i=$h; $i<($#tmpall+1); $i++) {
				if($tmpall[$i] eq $tmppart[$j]) {
					$j++;
					if($j == ($#tmppart+1)) {
						if($rindex eq "rindex"){
							return ($#tmpall - $#tmppart - $h);
						} else {
							return $h;
						}
					}
				} else {
					next BIG;
				}
			}
		}
	}
	return -1;	# No hit
}

# ------------------------------------------

sub substr_sjis {
    my ($str,$pos,$len,$replacestr) = @_;
	return &substr_ja($str,$pos,$len,$replacestr,"sjis");
}

sub substr_euc {
    my ($str,$pos,$len,$replacestr) = @_;
	return &substr_ja($str,$pos,$len,$replacestr,"euc");
}

sub substr_ja {
    my ($str,$pos,$len,$replacestr,$charset) = @_;
	my $string;
	my @result;

	if($pos<0) {
		@tmp = reverse &chars_ja($str,$charset);
		if(defined($replacestr)) {
			@replacearray = reverse &chars_ja($replacestr,$charset);
		}
		$pos = abs($pos);
		$reverse = 1;
	} else {
		@tmp = &chars_ja($str,$charset);
		if(defined($replacestr)) {
			@replacearray = &chars_ja($replacestr,$charset);
		}
	}

	if(defined($replacestr)) {
		for ($i=0; $i<$pos; $i++) {
			push(@result,$tmp[$i]);
		}
		push(@result,@replacearray);
		for (($i+$len) .. ($#tmp+1)) {
			push(@result,$tmp[$_]);
		}
	} else {
		if(defined($len)) {
			$len += $pos;
		} else {
			$len = $#tmp+1;
		}
		for ($i=$pos; $i<$len; $i++) {
			push(@result,$tmp[$i]);
		}
	}

	if(!$reverse) {
	    $string = join("",@result);
	} else {
	    $string = join("",reverse @result);
	}
    return $string;
}

# ------------------------------------------

sub length_sjis {
	my($string) = @_;
	my @tmp = &chars_ja($string,"sjis");
	return ($#tmp+1);
}

sub length_euc {
	my($string) = @_;
	my @tmp = &chars_ja($string,"euc");
	return ($#tmp+1);
}

# ------------------------------------------

sub chars_ja {
	my($string,$charset) = @_;
	my @chars;
	$_ = $string;
	if($charset eq "sjis"){
		# sjis
		@chars = /
			 [\x20-\x7E]			# ASCII
			|[\xA1-\xDF]			# HANKAKU-KATAKANA
			|[\x81-\x9F][\x40-\xFC]	# 2bytes character
			|[\xE0-\xEF][\x40-\xFC]	# 2bytes character
		/gox;
	} else {
		# euc
		@chars = /
			 [\x20-\x7E]			# ASCII
			|\x8E[\xA0-\xDF]		# HANKAKU-KATAKANA
			|\x8E[\xA1-\xFE][\xA1-\xFE]	# 3bytes character
			|[\xA1-\xFE][\xA1-\xFE]	# 2bytes character
		/gox;
	}
	return @chars;
}

# ------------------------------------------

1;

__END__

=head1 NAME

Jtext - substr(), index() and rindex() available for Japanese Characters.

=head1 SYNOPSIS

  $line = jcode("これはeucテキストです。eucです。sjisではありません。")->euc;

  $length = length_euc($line);	# 31
  $text   = substr_euc($line,13,6);	# eucです。
  $text   = substr_euc($line,6,4,"文字");	# これはeuc文字です。...
  $text   = substr_euc($line,-1,5,"ない")."\n";	# ...sjisではない。
  $index  = index_euc($line,jcode("テキスト")->euc);	# 6
  $index  = index_euc($line,jcode("テキスト")->euc,9);	# -1
  $rindex = rindex_euc($line,jcode("euc")->euc);		# 13
  $rindex = rindex_euc($line,jcode("euc")->euc,30);	# -1

=head1 DESCRIPTION

Jtext is a module that makes length(), substr(), index() and rindex() available for Japanese Characters.

=head1 METHOD

=head2 length_euc / length_sjis

  length_euc  EXPR
  length_sjis EXPR

  $length = length_euc($line);	# 31

length() for Japanese Characters.

=head2 substr_euc / substr_sjis

  substr_euc  EXPR, OFFSET, LENGTH, REPLACEMENT
  substr_euc  EXPR, OFFSET, LENGTH
  substr_euc  EXPR, OFFSET
  substr_sjis EXPR, OFFSET, LENGTH, REPLACEMENT
  substr_sjis EXPR, OFFSET, LENGTH
  substr_sjis EXPR, OFFSET

  $text   = substr_euc($line,13,6);	# eucです。
  $text   = substr_euc($line,6,4,"文字");	# これはeuc文字です。...
  $text   = substr_euc($line,-1,5,"ない")."\n";	# ...sjisではない。

substr() for Japanese Characters.

=head2 index_euc / index_sjis

  index_euc  STR, SUBSTR, OFFSET
  index_euc  STR, SUBSTR
  index_sjis STR, SUBSTR, OFFSET
  index_sjis STR, SUBSTR

  $index  = index_euc($line,jcode("テキスト")->euc);	# 6
  $index  = index_euc($line,jcode("テキスト")->euc,9);	# -1

index() for Japanese Characters.

=head2 rindex_euc / rindex_sjis

  $rindex = rindex_euc($line,jcode("euc")->euc);		# 13
  $rindex = rindex_euc($line,jcode("euc")->euc,30);	# -1

rindex() for Japanese Characters.

=head1 AUTHOR

Yuki SHIMAZU E<lt>y.shimazu@nifty.comE<gt>

=cut

