include_once("inc/common.php");
include_once("inc/html.php");
include_once("inc/format.php");
require_once("lang/en/lang.php");
require_once("lang/".$conf['lang']."/lang.php");
/**
* The main parser function. Accepts raw data and returns
* valid xhtml
*/
function parse($text){
global $parser;
global $conf;
$table = array();
$hltable = array();
//preparse
$text = preparse($text,$table,$hltable);
//padding with a newline
$text = "\n".$text."\n";
#for link matching
$urls = '(https?|telnet|gopher|file|wais|ftp|ed2k|irc)';
$ltrs = '\w';
$gunk = '/\#~:.?+=&%@!\-';
$punc = '.:?\-;,';
$host = $ltrs.$punc;
$any = $ltrs.$gunk.$punc;
/* first pass */
//preformated texts
firstpass($table,$text,"#(.*?)#se","preformat('\\1','nowiki')");
firstpass($table,$text,"#%%(.*?)%%#se","preformat('\\1','nowiki')");
firstpass($table,$text,"#(.*?)
#se","preformat('\\3','code','\\2')");
firstpass($table,$text,"#(.*?)#se","preformat('\\1','file')");
// html and php includes
firstpass($table,$text,"#(.*?)#se","preformat('\\1','html')");
firstpass($table,$text,"#(.*?)#se","preformat('\\1','php')");
// codeblocks
firstpass($table,$text,"/(\n( {2,}|\t)[^\*\-\n ][^\n]+)(\n( {2,}|\t)[^\n]*)*/se","preformat('\\0','block')","\n");
//check if toc is wanted
if(!isset($parser['toc'])){
if(strpos($text,'~~NOTOC~~')!== false){
$text = str_replace('~~NOTOC~~','',$text);
$parser['toc'] = false;
}else{
$parser['toc'] = true;
}
}
//headlines
format_headlines($table,$hltable,$text);
//links
firstpass($table,$text,"#\[\[([^\]]+?)\]\]#ie","linkformat('\\1')");
//media
firstpass($table,$text,"/\{\{([^\}]+)\}\}/se","mediaformat('\\1')");
//match full URLs (adapted from Perl cookbook)
firstpass($table,$text,"#(\b)($urls://[$any]+?)([$punc]*[^$any])#ie","linkformat('\\2')",'\1','\4');
//short www URLs
firstpass($table,$text,"#(\b)(www\.[$host]+?\.[$host]+?[$any]+?)([$punc]*[^$any])#ie","linkformat('http://\\2|\\2')",'\1','\3');
//windows shares
firstpass($table,$text,"#([$gunk$punc\s])(\\\\\\\\[$host]+?\\\\[$any]+?)([$punc]*[^$any])#ie","linkformat('\\2')",'\1','\3');
//short ftp URLs
firstpass($table,$text,"#(\b)(ftp\.[$host]+?\.[$host]+?[$any]+?)([$punc]*[^$any])#ie","linkformat('ftp://\\2')",'\1','\3');
// email@domain.tld
firstpass($table,$text,"#<([\w0-9\-_.]+?)@([\w\-]+\.([\w\-\.]+\.)*[\w]+)>#ie", "linkformat('\\1@\\2')");
//CamelCase if wanted
if($conf['camelcase']){
firstpass($table,$text,"#(\b)([A-Z]+[a-z]+[A-Z][A-Za-z]*)(\b)#se","linkformat('\\2')",'\1','\3');
}
$text = htmlspecialchars($text);
//smileys
smileys($table,$text);
//acronyms
acronyms($table,$text);
/* second pass for simple formating */
$text = simpleformat($text);
/* third pass - insert the matches from 1st pass */
reset($table);
while (list($key, $val) = each($table)) {
$text = str_replace($key,$val,$text);
}
/* remove empty paragraphs */
$text = preg_replace('"
\n*
"','',$text);
/* remove padding */
$text = trim($text);
return $text;
}
/**
* This preparses the text by walking it line by line. This
* is the only place where linenumbers are still available (needed
* for section edit. Some precautions have to be taken to not change
* any noparse block.
*/
function preparse($text,&$table,&$hltable){
$lines = split("\n",$text);
//prepare a tokens for paragraphs
$po = mkToken();
$table[$po] = "";
$pc = mkToken();
$table[$pc] = "
";
for ($l=0; $l(.*?)#","",$line);
$line = preg_replace("#%%(.*?)%%#","",$line);
$line = preg_replace("#(.*?)
#","",$line);
$line = preg_replace("#(.*?)#","",$line);
$line = preg_replace("#(.*?)#","",$line);
$line = preg_replace("#(.*?)#","",$line);
//check for start of multiline noparse areas
if(preg_match('#^.*?<(nowiki|code|php|html|file)( (\w+))?>#',$line,$matches)){
list($noparse) = split(" ",$matches[1]); //remove options
$noparse = ''.$noparse.'>';
continue;
}elseif(preg_match('#^.*?%%#',$line)){
$noparse = '%%';
continue;
}
}
//handle headlines
if(preg_match('/^(\s)*(==+)(.+?)(==+)(\s*)$/',$lines[$l],$matches)){
//get token
$tk = tokenize_headline($hltable,$matches[2],$matches[3],$l);
//replace line with token
$lines[$l] = $tk;
}
//handle paragraphs
if(empty($lines[$l])){
$lines[$l] = "$pc\n$po";
}
}
//reassemble full text
$text = join("\n",$lines);
//open first and close last paragraph
$text = "$po\n$text\n$pc";
return $text;
}
/**
* This function adds some information about the given headline
* to a lookuptable to be processed later. Returns a unique token
* that idetifies the headline later
*/
function tokenize_headline(&$hltable,$pre,$hline,$lno){
switch (strlen($pre)){
case 2:
$lvl = 5;
break;
case 3:
$lvl = 4;
break;
case 4:
$lvl = 3;
break;
case 5:
$lvl = 2;
break;
default:
$lvl = 1;
break;
}
$token = mkToken();
$hltable[] = array( 'name' => htmlspecialchars(trim($hline)),
'level' => $lvl,
'line' => $lno,
'token' => $token );
return $token;
}
function format_headlines(&$table,&$hltable,&$text){
global $parser;
global $conf;
global $lang;
global $ID;
// walk the headline table prepared in preparsing
$last = 0;
$cnt = 0;
$hashs = array();
foreach($hltable as $hl){
$cnt++;
//make unique headlinehash
$hash = cleanID($hl['name']);
$i=2;
while(in_array($hash,$hashs))
$hash = cleanID($hl['name']).$i++;
$hashs[] = $hash;
// build headline
$headline = "\n"; //close paragraph
if($cnt - 1) $headline .= ''; //no close on first HL
$headline .= '';
$headline .= '';
$headline .= $hl['name'];
$headline .= '';
$headline .= '';
$headline .= "\n
"; //open new paragraph
//remember for autoTOC
if($hl['level'] <= $conf['maxtoclevel']){
$content[] = array('id' => $hash,
'name' => $hl['name'],
'level' => $hl['level']);
}
//add link for section edit for HLs 1, and 3
if( ($hl['level'] <= $conf['maxseclevel']) &&
($hl['line'] - $last > 1)){
$secedit = '';
$headline = $secedit.$headline;
$last = $hl['line'];
}
//put headline into firstpasstable
$table[$hl['token']] = $headline;
}
//add link for editing the last section
if($last){
$secedit = '';
$token = mktoken();
$text .= $token;
$table[$token] = $secedit;
}
//close last div
if ($cnt){
$token = mktoken();
$text .= $token;
$table[$token] = '
';
}
//prepend toc
if ($parser['toc'] && count($content) > 2){
$token = mktoken();
$text = $token.$text;
$table[$token] = html_toc($content);
}
}
/**
* Formats various link types using the functions from format.php
*/
function linkformat($match){
global $conf;
//unescape
$match = str_replace('\\"','"',$match);
//prepare variables for the formaters
$link = array();
list($link['url'],$link['name']) = split('\|',$match,2);
$link['url'] = trim($link['url']);
$link['name'] = trim($link['name']);
$link['class'] = '';
$link['target'] = '';
$link['style'] = '';
$link['pre'] = '';
$link['suf'] = '';
$link['more'] = '';
//save real name for image check
$realname = $link['name'];
/* put it into the right formater */
if(strpos($link['url'],'>')){
// InterWiki
$link = format_link_interwiki($link);
}elseif(preg_match('#^([a-z0-9]+?){1}://#i',$link['url'])){
// external URL
$link = format_link_externalurl($link);
}elseif(preg_match("/^\\\\\\\\([a-z0-9\-_.]+)\\\\(.+)$/",$link['url'])){
// windows shares
$link = format_link_windows($link);
}elseif(preg_match('#([a-z0-9\-_.]+?)@([\w\-]+\.([\w\-\.]+\.)*[\w]+)#i',$link['url'])){
// email
$link = format_link_email($link);
}else{
// wiki link
$link = format_link_wiki($link);
}
//is realname an image? use media formater
if(preg_match('#^{{.*?\.(gif|png|jpe?g)(\?.*?)?\s*(\|.*?)?}}$#',$realname)){
$link['name'] = substr($realname,2,-2);
$link = format_link_media($link);
}
// build the replacement with the variables set by the formaters
return format_link_build($link);
}
/**
* Simple text formating and typography is done here
*/
function simpleformat($text){
global $conf;
$text = preg_replace('/__(.+?)__/s','\1',$text); //underline
$text = preg_replace('/\/\/(.+?)\/\//s','\1',$text); //emphasize
$text = preg_replace('/\*\*(.+?)\*\*/s','\1',$text); //bold
$text = preg_replace('/\'\'(.+?)\'\'/s','\1
',$text); //code
# $text = preg_replace('/(\W)--(\w)--(\W)/s','\1\2\3',$text); //deleted special case
# $text = preg_replace('/(\W)--(\w\w)--(\W)/s','\1\2\3',$text); //deleted special case
# $text = preg_replace('/(\W)--(\w[^-]+?\w)--(\W)/s','\1\2\3',$text); //deleted
$text = preg_replace('/^(\s)*----+(\s*)$/m',"\n
\n",$text); //hr
//sub and superscript
$text = preg_replace('#<sub>(.*?)</sub>#is','\1',$text);
$text = preg_replace('#<sup>(.*?)</sup>#is','\1',$text);
//do quoting
$text = preg_replace("/\n((>)[^\n]*?\n)+/se","'\n'.quoteformat('\\0').'\n'",$text);
// Typography
if($conf['typography']){
$text = preg_replace('/([^-])--([^-])/s','\1–\2',$text); //endash
$text = preg_replace('/([^-])---([^-])/s','\1—\2',$text); //emdash
$text = preg_replace('/"([^\"]+?)"/s','“\1”',$text); //curly quotes
$text = preg_replace('/(\s)\'(\S)/m','\1‘\2',$text); //single open quote
$text = preg_replace('/(\S)\'/','\1’',$text); //single closing quote or apostroph
$text = preg_replace('/\.\.\./','\1…\2',$text); //ellipse
$text = preg_replace('/(\d+)x(\d+)/i','\1×\2',$text); //640x480
$text = preg_replace('/>>/i','»',$text); // >>
$text = preg_replace('/<</i','«',$text); // <<
$text = preg_replace('/<->/i','↔',$text); // <->
$text = preg_replace('/<-/i','←',$text); // <-
$text = preg_replace('/->/i','→',$text); // ->
$text = preg_replace('/<=>/i','⇔',$text); // <=>
$text = preg_replace('/<=/i','⇐',$text); // <=
$text = preg_replace('/=>/i','⇒',$text); // =>
$text = preg_replace('/\(c\)/i','©',$text); // copyrigtht
$text = preg_replace('/\(r\)/i','®',$text); // registered
$text = preg_replace('/\(tm\)/i','™',$text); // trademark
}
//forced linebreaks
$text = preg_replace('#\\\\\\\\(\s)#',"
\\1",$text);
// lists (blocks leftover after blockformat)
$text = preg_replace("/(\n( {2,}|\t)[\*\-][^\n]+)(\n( {2,}|\t)[^\n]*)*/se","\"\\n\".listformat('\\0')",$text);
// tables
$text = preg_replace("/\n(([\|\^][^\n]*?)+[\|\^] *\n)+/se","\"\\n\".tableformat('\\0')",$text);
// footnotes
$text = footnotes($text);
// run custom text replacements
$text = customs($text);
return $text;
}
/**
* Does the footnote formating
*/
function footnotes($text){
$num = 0;
while (preg_match('/\(\((.+?)\)\)/s',$text,$match)){
$num++;
$fn = $match[1];
$linkt = ''.$num.')';
$linkb = ''.$num.')';
$text = preg_replace('/ ?\(\((.+?)\)\)/s',$linkt,$text,1);
if($num == 1) $text .= '
';
return $text;
}
/**
* Replaces smileys with their graphic equivalents
*/
function smileys(&$table,&$text){
$smileys = file('conf/smileys.conf');
foreach($smileys as $smiley){
$smiley = preg_replace('/#.*$/','',$smiley); //ignore comments
$smiley = trim($smiley);
if(empty($smiley)) continue;
$sm = preg_split('/\s+/',$smiley,2);
$sm[1] = '';
$sm[0] = preg_quote($sm[0],'/');
firstpass($table,$text,'/(\W)'.$sm[0].'(\W)/s',$sm[1],"\\1","\\2");
}
}
/**
* Adds acronym tags to known acronyms
*/
function acronyms(&$table,&$text){
$acronyms = file('conf/acronyms.conf');
foreach($acronyms as $acro){
$acro = preg_replace('/#.*$/','',$acro); //ignore comments
$acro = trim($acro);
if(empty($acro)) continue;
list($ac,$desc) = preg_split('/\s+/',$acro,2);
$ac = preg_quote($ac,'/');
firstpass($table,$text,'/(\b)('.$ac.')(\b)/s',"\\2","\\1","\\3");
}
}
/**
* Applies custom text replacements
*/
function customs($text){
$reps = file ('conf/custom.conf');
foreach($reps as $rep){
//strip comments only outside a regexp
$rep = preg_replace('/#[^\/]*$/','',$rep); //ignore comments
$rep = trim($rep);
if(empty($rep)) continue;
if(preg_match('#^(/.+/\w*)\s+\'(.*)\'$#',$rep,$matches)){
$text = preg_replace($matches[1],$matches[2],$text);
}
}
return $text;
}
function firstpass(&$table,&$text,$regexp,$replace,$lpad='',$rpad=''){
//extended regexps have to be disabled for inserting the token
//and later reenabled when handling the actual code:
$ext='';
if(substr($regexp,-1) == 'e'){
$ext='e';
$regexp = substr($regexp,0,-1);
}
while(preg_match($regexp,$text,$matches)){
$token = mkToken();
$match = $matches[0];
$text = preg_replace($regexp,$lpad.$token.$rpad,$text,1);
$table[$token] = preg_replace($regexp.$ext,$replace,$match);
}
}
function mkToken(){
return '~'.md5(uniqid(rand(), true)).'~';
}
/**
* Do quote blocks
*
* FIXME fix paragraphs
*/
function quoteformat($block){
$block = trim($block);
$lines = split("\n",$block);
$lvl = 0;
$ret = "";
foreach ($lines as $line){
//remove '>' and count them
$cnt = 0;
while(substr($line,0,4) == '>'){
$line = substr($line,4);
$cnt++;
}
//compare to last level and open or close new divs if needed
if($cnt > $lvl){
$ret .= "\n";
for ($i=0; $i< $cnt - $lvl; $i++){
$ret .= '';
}
$ret .= "\n
";
}elseif($cnt < $lvl){
$ret .= "\n
";
for ($i=0; $i< $lvl - $cnt; $i++){
$ret .= "
\n";
}
$ret .= "\n";
}elseif(empty($line)){
$ret .= "
\n";
}
//keep rest of line but trim left whitespaces
$ret .= ltrim($line)."\n";
//remember level
$lvl = $cnt;
}
//close remaining divs
$ret .= "
\n";
for ($i=0; $i< $lvl; $i++){
$ret .= "\n";
}
$ret .= "\n";
return "$ret";
}
function tableformat($block) {
$block = trim($block);
$lines = split("\n",$block);
$ret = "";
//build a row array
$rows = array();
for($r=0; $r < count($lines); $r++){
$line = $lines[$r];
//remove last seperator and trailing whitespace
$line = preg_replace('/[\|\^]\s*$/', '', $line);
$c = -1; //prepare colcounter)
for($chr=0; $chr < strlen($line); $chr++){
if($line[$chr] == '^'){
$c++;
$rows[$r][$c]['head'] = true;
$rows[$r][$c]['data'] = '';
}elseif($line[$chr] == '|'){
$c++;
$rows[$r][$c]['head'] = false;
$rows[$r][$c]['data'] = '';
}else{
$rows[$r][$c]['data'].= $line[$chr];
}
}
}
//build table
$ret .= "
\n\n";
for($r=0; $r < count($rows); $r++){
$ret .= " \n";
for ($c=0; $c < count($rows[$r]); $c++){
$cspan=1;
$data = trim($rows[$r][$c]['data']);
$head = $rows[$r][$c]['head'];
//join cells if next is empty
while($c < count($rows[$r])-1 && $rows[$r][$c+1]['data'] == ''){
$c++;
$cspan++;
}
if($cspan > 1){
$cspan = 'colspan="'.$cspan.'"';
}else{
$cspan = '';
}
if ($head) {
$ret .= " $data | \n";
} else {
$ret .= " $data | \n";
}
}
$ret .= "
\n";
}
$ret .= "
\n";
return $ret;
}
function listformat($block){
//remove 1st newline
$block = substr($block,1);
//unescape
$block = str_replace('\\"','"',$block);
//dbg($block);
//walk line by line
$ret='';
$lst=0;
$lvl=0;
$enc=0;
$lines = split("\n",$block);
//build an item array
$cnt=0;
$items = array();
foreach ($lines as $line){
//get intendion level
$lvl = 0;
$lvl += floor(strspn($line,' ')/2);
$lvl += strspn($line,"\t");
//remove indents
$line = preg_replace('/^[ \t]+/','',$line);
//get type of list
(substr($line,0,1) == '-') ? $type='ol' : $type='ul';
// remove bullet and following spaces
$line = preg_replace('/^[*\-]\s*/','',$line);
//add item to the list
$items[$cnt]['level'] = $lvl;
$items[$cnt]['type'] = $type;
$items[$cnt]['text'] = $line;
//increase counter
$cnt++;
}
//$current['level'] = 0;
//$current['type'] = '';
$level = 0;
$opens = array();
foreach ($items as $item){
if( $item['level'] > $level ){
//open new list
$ret .= "\n<".$item['type'].">\n";
array_push($opens,$item['type']);
}elseif( $item['level'] < $level ){
//close last item
$ret .= "\n";
for ($i=0; $i<($level - $item['level']); $i++){
//close higher lists
$ret .= ''.array_pop($opens).">\n\n";
}
}else{
//close last item
$ret .= "\n";
}
//remember current level
$level = $item['level'];
//print item
$ret .= '
';
$ret .= ''.$item['text'].'';
}
//close remaining items and lists
while ($open = array_pop($opens)){
$ret .= "\n";
$ret .= ''.$open.">\n";
}
return "\n".$ret."\n";
}
function preformat($text,$type,$option=''){
global $conf;
//unescape
$text = str_replace('\\"','"',$text);
if($type == 'php' && !$conf['phpok']) $type='file';
if($type == 'html' && !$conf['htmlok']) $type='file';
switch ($type){
case 'php':
ob_start();
eval($text);
$text = ob_get_contents();
ob_end_clean();
break;
case 'html':
break;
case 'nowiki':
$text = htmlspecialchars($text);
break;
case 'file':
$text = htmlspecialchars($text);
$text = "
\n".$text."
\n";
break;
case 'code':
if(empty($option)){
$text = htmlspecialchars($text);
$text = '
'.$text.'
';
}else{
require_once("inc/geshi.php");
$geshi = new GeSHi($text, strtolower($option), "inc/geshi");
$geshi->set_header_type(GESHI_HEADER_PRE);
$geshi->enable_classes();
$geshi->set_overall_class('code');
$text = $geshi->parse_code();
}
$text = "\n".$text."\n";
break;
case 'block':
$text = substr($text,1); //remove 1st newline
$lines = split("\n",$text); //break into lines
$text = '';
foreach($lines as $line){
$text .= substr($line,2)."\n"; //remove indents
}
$text = htmlspecialchars($text);
$text = "
\n".$text."
\n";
break;
}
return $text;
}
function mediaformat($text){
global $conf;
//unescape
$text = str_replace('\\"','"',$text);
// format RSS
if(substr($text,0,4) == 'rss>'){
return format_rss(substr($text,4));
}
//handle normal media stuff
$link = array();
$link['name'] = $text;
$link = format_link_media($link);
return format_link_build($link);
}
?>