# ---------------------------------------------------------------------------- # =head1 experiment --- RD Parser a experiment for RD Parser =cut # ---------------------------------------------------------------------------- # use strict; use vars qw(%CF); package RD; sub main{ my$rdStyle='file:///D:/Web/Airemix/naruse/notes/readme.css'; my$rddoc; open(IN,'<'.$::CF{'RDParser'})||die sprintf'Cannnot open %s.',$::CF{'RDParser'}; read(IN,$rddoc,-s$::CF{'RDParser'}); close(IN); my$rd=new RD; $rd->load($rddoc); # use Data::Dumper;print euc2sjis(Dumper($rd->{objRD}));exit; my$output=$rd->toString('HTML',stylesheet=>$rdStyle); print euc2sjis($output); exit; } sub euc2sjis{ my$s=shift; $s=~s{([\xa1-\xfe]{2})|\x8e([\xa1-\xdf])|\x8f[\xa1-\xfe]{2}} { if($1){ my($c1,$c2)=unpack('C2',$1); if($c1%2){ pack('C2',($c1>>1)+($c1<0xdf?0x31:0x71),$c2-0x60-($c2<0xe0)); }else{ pack('C2',($c1>>1)+($c1<0xdf?0x30:0x70),$c2-2); } }elsif($2){#SS2 $2; }else{#SS3 "\x81\xac"; } }ego; return$s; } #------------------------------------------------------------------------------# {package RD::Generator; # public RD::Generator->new( $generatorName ) sub new{ my$class=shift; my$self={}; $class='RD::Generator::'.shift if@_; bless$self,$class; } # public $generator->generate( $rd ) sub generate{ my$self=shift; my$rd=shift; return undef; } } #------------------------------------------------------------------------------# {package RD::Generator::HTML; # public RD::Generator::HTML->new( $generator ) sub new{ my$class=shift; return'RD::Generator::'.shift if@_; } # public $generator->generate( $rd->{objRD} [, [key => value] ..] ) sub generate{ die if@_<2; my($class,$self)=ref$_[0]?(ref$_[0],shift):($_[0],bless({},shift)); my$rd=shift; my%option=@_; $option{'stylesheet'}='style.css'unless$option{'stylesheet'}; ref$rd&&ref$rd eq'ARRAY'||die'$rd must be ARRAY reference.'; my$result=sprintf<<'_HERE_',$option{'stylesheet'},_getTitle($rd)||'RD';
*%d %s
\n) ,$_+1,$_+1,$_+1,$self->{'footnote'}->[$_]for 0..$#{$self->{'footnote'}}; $result=~s/<\?com\.airemix\.naruse\.perl\.rd footnote\?>/$footnote/o; } if($self->{'headlines'}){ my$headlines=sprintf"\n%s",$self->_termLabel('目次'),$self->{'headlines'}; $result=~s/<\?com\.airemix\.naruse\.perl\.rd index\?>/$headlines/o; } $result.=<<'_HERE_'; _HERE_ return$result; } # private _getTitle( $rd ) sub _getTitle{ my$rd=shift; ref$rd||return; my$title=undef; for(@$rd){ if($_->{'name'}eq'headline'&&$_->{'level'}==1){ $title=_getPlain($_->{'content'}); last; }elsif(ref($_->{'content'})){ $title=_getTitle($_->{'content'}); $title&&last; } } return$title; } # private $generator->_block( $rd ) sub _block{ my$self=shift; my$rd=shift; $self->{'footnote'} =[]unless$self->{'footnote'}; $self->{'headlines'}=''unless exists$self->{'headlines'}; $self->{'indent'}=exists$self->{'indent'}?$self->{'indent'}+1:0; my$space=' 'x 2; my$result=''; for(@$rd){ if('pi'eq$_->{name}){ #Processor Instruction if('index'eq$_->{content}){ $result.="\n"; }elsif('footnote'eq$_->{content}){ $result.="\n"; } }elsif('headline'eq$_->{name}){ # Headline $result.=sprintf"
%s
\n",$space x$self->{'indent'},$self->_inline($_->{content}); }elsif('itemlist'eq$_->{name}or'enumlist'eq$_->{name}){ # ItemList or EnumList my$tagName='itemlist'eq$_->{name}?'UL':'OL'; $result.=sprintf"%s<%s>\n",$space x$self->{'indent'},$tagName; ++$self->{'indent'}; for(@{$_->{content}}){ # if(@{$_->{content}}==1&&$_->{content}->[0]->{name}eq'textblock'){ # $result.=sprintf"%s\n%s%s\n" ,$space x$self->{'indent'},_escapeHTML($_->{content}),$space x$self->{'indent'}; } } --$self->{'indent'}if$self->{'indent'}; return$result; } # private $generator->_inline( $line ) sub _inline{ my$self=shift; my$line=shift; return _escapeHTML($line)unless ref$line; my$result=''; for(@$line){ if('text'eq$_->{'name'}){ $result.=_escapeHTML($_->{'content'}); }elsif('emphasis'eq$_->{'name'}){ $result.=sprintf'%s',$self->_inline($_->{'content'}); }elsif('code'eq$_->{'name'}){ $result.=sprintf'
%s',$self->_inline($_->{'content'});
}elsif('var'eq$_->{'name'}){
$result.=sprintf'%s',$self->_inline($_->{'content'});
}elsif('keyboard'eq$_->{'name'}){
$result.=sprintf'%s',$self->_inline($_->{'content'});
}elsif('term'eq$_->{'name'}){
$result.=$self->_termLabel($_->{'content'});
}elsif('reference'eq$_->{'name'}){
$result.=sprintf'%s'
,'uri'eq$_->{'type'}?'':'#',_escapeHTML($_->{'label'}),$self->_inline($_->{'content'});
}elsif('footnote'eq$_->{'name'}){
push@{$self->{'footnote'}},$self->_inline($_->{'content'});
my$num=scalar@{$self->{'footnote'}};
$result.=sprintf'*%d',$num,$num,$num;
}
}
return$result;
}
# private $generator->_termLabel( $line )
sub _termLabel{
my$self=shift;
my$line=shift;
my$plain=_getPlain($line);
my$label=$plain;
my$display=$self->_inline($line);
return $self->_label($label,$display);
}
# private $generator->_methodLabel( $line )
sub _methodLabel{
my$self=shift;
my$line=shift;
$line=~/\s*([^\x28{]*[^\s\x28{])/o;
my$label=$1;
my$display=$line;
return $self->_label($label,$display);
}
# private $generator->_label( $label, $display )
sub _label{
my$self=shift;
my$label=shift;
my$display=shift;
$self->{'label'}={}unless$self->{'label'};
my$i=0;
my$base=$label;
while(exists$self->{'label'}->{$label}){
$label=sprintf'%s:%d',$base,$i;
}
$self->{'label'}->{$label}=1;
return sprintf'%s',_escapeHTML($label),_escapeHTML($display);
}
# private _getPlain( $line )
sub _getPlain{
my$line=shift;
return$line unless ref$line;
my$result='';
for(@{$line}){
$result.=_getPlain($_->{'content'});
}
return$result;
}
# private _escapeHTML( $str )
sub _escapeHTML{
my$str=shift;
$str=~s/&/&/go;
$str=~s/"/"/go;
$str=~s/</go;
$str=~s/>/>/go;
return $str;
}
}
#------------------------------------------------------------------------------#
=head2 RD Parser
RDを解析します。
=cut
{package RD::Parser;
# public RD::Parser->new()
sub new{
my$class=shift;
my$self={};
return bless$self,$class;
}
# private parse( $document )
sub parse{
my$class=shift;
my$tab=' ' x 4;
my$document=[map{my$line=$_;$line=~s/\G(\x20*)\t/$1$tab/go;$line}split("\n",shift||'')];
my@result;
while(@$document){
shift(@$document)=~/^=begin\b/o||next;
push@result,_parseBlock($document);
}
return\@result;
}
# private _parseBlock( $document )
sub _parseBlock{
my$document=shift;
my@result;
while(@$document){
my$line=$document->[0];
if($line=~/^=end\b/o){
shift@$document;
last;
}elsif($line=~/(#\?[^?]*\?+(?:[^#?][^?]*\?+)*#)/o){
# PI
$1=~/#\?(.*)\?#/o;
push@result,{name=>'pi',content=>$1};
shift@$document;
# /^\x28\x28\?[^\?]*\?+(?:(?:\x29|\x29?[^\x29\?][^\?])\?+)*\x29\x29$/o
}elsif($line=~/^#/o){
# Comment
shift@$document;
}elsif($line=~/^(={1,4})\s+(.*\S)/o){
# Headline 1-4
push@result,{name=>'headline',level=>length$1,content=>_parseInline($2)};
shift@$document;
}elsif($line=~/^(\+{1,2})\s+(.*\S)/o){
# Headline 5-6
push@result,{name=>'headline',level=>4+length$1,content=>_parseInline($2)};
shift@$document;
}elsif($line=~/^<<<\s*(.*\S)/o){
# Include
push@result,{name=>'include',content=>$1};
shift@$document;
}elsif($line=~/^\s*$/o){
# WHITELINE
shift@$document;
}elsif($line=~/^\s+(?![\*:])(?!\(\d+\))\S/o){
# Verbatim
push@result,_verbatim($document);
}elsif($line=~/^\s*\*/o){
# ItemList
push@result,_itemlist($document);
}elsif($line=~/^\(\d*\)/o){
# EnumList
push@result,_enumlist($document);
}elsif($line=~/^\s*:/o){
# DescList
push@result,_desclist($document);
}elsif($line=~/^\s*\-\-\-/o){
# MethodList
push@result,_methodlist($document);
}elsif($line=~/^(.*\S)/o){
# TextBlock
push@result,_textblock($document);
}else{
#
shift@$document;
}
}
return@result;
}
# private _textblock( $document )
sub _textblock{
my$document=shift;
shift(@$document)=~/^(\s*(?:(?:[\*:]|\(\d+\))\s*)?)(.*\S)/o;
my$baseline=length$1;
my$result=$2;
while(@$document){
$document->[0]=~/^#/o&&shift@$document&&next;
$document->[0]=~/^[=+]/o&&last;
$document->[0]=~/^\s{$baseline}(\s*(?:(?:[\*:]|\(\d+\))\s*)?)(\S.*\S|\S)/&&!$1||last;
$result.=$2;
shift@$document;
}
return{name=>'textblock',content=>_parseInline($result)};
}
# private _verbatim( $document )
sub _verbatim{
my$document=shift;
shift(@$document)=~/^(\s+)(.*\S)/o;
my$baseline=length$1;
my$result=$2;
while(@$document){
$document->[0]=~/^#/o&&shift@$document&&next;
$document->[0]=~/^\s{$baseline}(.*\S|\s*)/||last;
$result.="\n".$1;
shift@$document;
}
return{name=>'verbatim',content=>$result};
}
# private _itemlist( $document )
sub _itemlist{
return __list(shift,{parent=>'itemlist',child=>'listitem',regex=>'\*'});
}
# private _enumlist( $document )
sub _enumlist{
return __list(shift,{parent=>'enumlist',child=>'enumlistitem',regex=>'\(\d+\)'});
}
# private __list( $document, $option )
sub __list{
my$document=shift;
my$option=shift;
$document->[0]=~/^(\s*)$option->{regex}/||die;
my$baseline=length$1;
my@result;
while(@$document){
$document->[0]=~/^#/o&&shift@$document&&next;
$document->[0]=~/^\s{$baseline}$option->{regex}/||last;
$document->[0]=~/^(\s*$option->{regex}\s*)(\S?)/;
my@item;
my$baseline=length$1;
if($2){
push@item,_textblock($document);
}else{
shift@$document;
}
while(@$document){
$document->[0]=~/^#/o&&shift@$document&&next;
$document->[0]=~/^\s{$baseline}(\s*(?:[\*:]|\(\d+\))\s*)?(\S?)/||last;
if($1){
$1=~/(\S)/o;
if('*'eq$1){
push@item,_itemlist($document);
}elsif('('eq$1){
push@item,_enumlist($document);
}elsif(':'eq$1){
push@item,_desclist($document);
}else{
push@item,_verbatim($document);
}
}elsif($2){
push@item,_textblock($document);
}else{
shift@$document;
}
}
push@result,{name=>$option->{child},content=>\@item};
}
return{name=>$option->{parent},content=>\@result};
}
# private _desclist( $document )
sub _desclist{
return __dlist(shift,{parent=>'desclist',child=>'desclistitem',regex=>':'});
}
# private _methodlist( $document )
sub _methodlist{
return __dlist(shift,{parent=>'methodlist',child=>'methodlistitem',regex=>'\-\-\-'});
}
# private _dlist( $document, $option )
sub __dlist{
my$document=shift;
my$option=shift;
my@result;
$document->[0]=~/^(\s*)$option->{regex}/||die;
my$baseline=length$1;
while(@$document){
$document->[0]=~/^#/o&&shift@$document&&next;
$document->[0]=~/^(\s{$baseline}$option->{regex}\s*)(.*)/||last;
my$base=length$1;
$2=~/(\S.*\S|\S)/o||die'no Term Part';
my@item={name=>'term',content=>$option->{'parent'}eq'methodlist'?$1:_parseInline($1)};
shift@$document;
shift@$document while$document->[0]=~/^#/o;
$document->[0]=~/^(\s*)/o;
my$baseline=length$1;
$baseline>=$base
or die'DescriptionパートのBaselineはTermパートのテキスト部分と同じかより深くないといけません。';
my@description;
while(@$document){
$document->[0]=~/^#/o&&shift@$document&&next;
$document->[0]=~/^\s{$baseline}(\s*(?:[\*:]|\(\d+\))\s*)?(\S?)/||last;
if($1){
$1=~/^(\S)/o;
if('*'eq$1){
push@description,_itemlist($document);
}elsif('('eq$1){
push@description,_enumlist($document);
}elsif(':'eq$1){
push@description,_desclist($document);
}else{
push@description,_verbatim($document);
}
}elsif($2){
push@description,_textblock($document);
}else{
shift@$document;
}
}
push@item,{name=>'description',content=>\@description};
push@result,{name=>$option->{child},content=>\@item};
}
return{name=>$option->{parent},content=>\@result};
}
# private _parseInline( $str )
sub _parseInline{
my$str=shift;
return[{name=>'text',content=>$str}]unless$str=~/[\x28\x29]{2}/o;
my@line=
map{/\x28\x28([^\x28])/o?('((',$1) : /([^\x29])\x29\x29/o?('))',$1) : $_}
grep{$_&&length$_}$str=~/(.*?)(\x28\x28[^\x28]|[^\x29]\x29\x29|$)/go;
return _parseInlineArray(\@line);
}
# private _parseInlineArray( $inline [,$option] )
sub _parseInlineArray{
my$line=shift;
my$option=@_?shift:{};
my$regexInline=qr/[-*\{|%]/;
my%inline=(
'*'=>{name=>'emphasis', close=>'*'},
'{'=>{name=>'code', close=>'}'},
'|'=>{name=>'var', close=>'|'},
'%'=>{name=>'keyboard', close=>'%'},
':'=>{name=>'term', close=>':'},
'<'=>{name=>'reference',close=>'>'},
'-'=>{name=>'footnote', close=>'-'},
# "'"=>{name=>'verbatim', close=>"'"},
'?'=>{name=>'pi', close=>'?'},
);
my@result;
my$tmp='';
while(@$line){
my$fragment=shift@$line;
if(!defined$fragment){
die'broken inline';
}elsif('))'eq$fragment){
my$fragment=shift@$line;
if(!defined$fragment){
die'broken inline';
}elsif($option->{'close'}&&$fragment eq$option->{'close'}){
last;
}else{
$tmp.=$fragment.'))';
}
}elsif('(('eq$fragment){
$fragment=shift@$line;
if(!defined$fragment){
die'broken inline';
}elsif("'"eq$fragment){
# Verbatim
while(@$line){
my$fragment=shift@$line;
if(!defined$fragment){
die'broken inline';
}elsif('))'eq$fragment){
my$fragment=shift@$line;
if(!defined$fragment){
die'broken inline';
}elsif("'"eq$fragment){
last;
}else{
$tmp.=$fragment.'))';
}
}else{
$tmp.=$fragment;
}
}
}elsif(!$option->{'isAnchor'}&&$fragment=~/[:<]/o){
# Label or Reference
push@result,{name=>'text',content=>$tmp}if$tmp;
$tmp='';
if('<'eq$fragment){
push@result,_reference($line);
}else{
push@result,{name=>$inline{$fragment}->{'name'}
,content=>_parseInlineArray($line,{close=>$inline{$fragment}->{'close'},isAnchor=>1})};
}
}elsif($fragment=~/$regexInline/o){
push@result,{name=>'text',content=>$tmp}if$tmp;
$tmp='';
push@result,{name=>$inline{$fragment}->{'name'}
,content=>_parseInlineArray($line,{close=>$inline{$fragment}->{'close'}})};
}else{
$tmp.='(('.$fragment;
}
}else{
$tmp.=$fragment;
}
}
push@result,{name=>'text',content=>$tmp}if$tmp;
return\@result;
}
# private _reference( $line [,$option] )
sub _reference{
my$line=shift;
my$option=@_?shift:{};
my$reference;
while(@$line){
my$fragment=shift@$line;
if(!defined$fragment){
die'broken inline';
}elsif('))'eq$fragment){
$fragment=shift@$line;
if(!defined$fragment){
die'broken inline';
}elsif('>'eq$fragment){
last;
}else{
$reference.=$fragment.'))';
}
}else{
$reference.=$fragment;
}
}
$reference=~/(?:\s*(?:([^"'|\/]+)|"([^"\\]*(?:\\.[^"\\]*)*)"|'([^'\\]*(?:\\.[^'\\]*)*)')\s*\|)?(.+)/o
or return'';
my$display=$1||$2||$3;
my$label=$4;
my$type;
if($label=~s/^UR[LI]:(.+)/$1/o){
$display=$1 unless defined$display;
$type='uri';
}elsif(!$display){
$display=$label;
$label=_getPlain(_parseInline($label));
$type='term';
}else{
$display=~s/\\(.)/$1/go;
$type='term';
}
return{name=>'reference',label=>$label,type=>$type,content=>_parseInline($display)};
}
# private _getPlain( $line )
sub _getPlain{
my$line=shift;
return$line unless ref$line;
my$result='';
$result.=_getPlain($_->{'content'})for@{$line};
return$result;
}
}
#------------------------------------------------------------------------------#
=head2 RD
RDを処理します。
=cut
{
package RD;
my%toString;
# public RD->new( [$document] )
sub new{
my$class=shift;
my$document=@_?shift:'';
my$self={};
bless$self,$class;
$self->load($document);
return$self;
}
# public $rd->load( $document )
sub load{
my$self=shift;
my$document=shift;
$self->{document}=$document;
return $self->{objRD}=RD::Parser->parse($document);
}
# public $rd->toString( $type [, [key => value] ..] )
sub toString{
my$self=shift;
my$type=shift;
my$generator=new RD::Generator($type);
return$generator->generate($self->{objRD},@_);
}
}
#------------------------------------------------------------------------------#
&main if$::CF{'program'}eq __FILE__;
#-------------------------------------------------
# 初期設定
#
package main;
BEGIN{
unless($CF{'program'}){
$CF{'program'}=__FILE__;
$SIG{'__DIE__'}=sub{print ::euc2sjis(@_?"$_[0]":'ERROR');exit};
}
$CF{'RDParser'}=__FILE__;
}
1;
__END__
=begin
#------------------------------------------------------------------------------#
= おまけ〜RD2XML Test Suite
RD解析のテストケースなのです。
#?index?#
#------------------------------------------------------------------------------#
== Baseline
|この行はトップレベルのTextBlockの行だとします。
|<- したがって、Baselineは左端です。
*|List内では (1)
|<- このようにBaselineは(1)の行で決定されます。
* |同じListでもListItem毎にBaselineが決定されます。
|<- したがって、1番目のListItemとは違うここにBaselineがあります。
== Headline
= Headline 1.
=== Headline 1.1.1.
+ Headline 1.1.1.1.1.
#------------------------------------------------------------------------------#
== TextBlock
これはTextBlockです。
TextBlockの2行目の行です。
この行はTextBlockでなくVerbatimです。
* そしてこの行はListの行です。((-正確にはListItemの中のTextBlockの行でも
あるのですが-))
#------------------------------------------------------------------------------#
== Verbatim
これはVerbatimです。
最初の行より深いインデントを持っても、同じVerbatimの行になります。
* この行はListに見えますが、Verbatimです。
しかしこの行は最初の行よりも浅くインデントされているので、別のVerbatim
#comment
の行になります。
#------------------------------------------------------------------------------#
== Item List
* 親Listの最初のItem
* 子Listの最初のItem
* 孫Listの最初のItem
* 孫Listの2番目のItem
* 曾孫Listの最初のItem
#comment
* 曾孫Listの2番目のItem
* 玄孫Listの最初のItem
* 子Listの2番目のItem
親ListのItemに含まれるTextBlock
#------------------------------------------------------------------------------#
== Enum List
(1) 親Listの最初のItem
* 子ListとなるItemList
(2) 親Listの2番目のItem
#comment
(10) 番号は無視されます。
#------------------------------------------------------------------------------#
== Desc List
:Term
Descriptionの最初の行
2番目の行
:Term 2
#comment
* aaa
#comment
* Listも含む事ができます
* ...
:Identity or URL
らべる
#------------------------------------------------------------------------------#
== Method List
--- Array#each {|i| ... } # => Labelは"Array#each"
各項目に対してブロックを評価する。
--- Array#index(val) # => Labelは"Array#index"
((|val|))と同じ値である最初の項目を返す。同じ項目が無いときには
(({nil}))を返す。
((*Em*)) (({while gets...})) ((|var|)) ((%ruby -v%)) ((:Term:)) ((