";print_r($mol); echo "
"; print_r($nsecs); echo "

"; $secs = null; unset ($secs); $secs = $nsecs; } if ($WIKIPATH_DEBUG) $br = "
"; foreach ((array) $secs as $sec) $joined[]= join("$br\n", $sec); $rtn = join("$br\n", (array)$joined); if ($WIKIPATH_DEBUG) return "'''[$wikipath]''' $dout

$out\n$rtn\n----\n
"; return $rtn; } /* This is for debugging, it prints out a molecule. A molecule is the smallest indepedent piece of a wikipath. It is an individual working piece which starts with an element is followed by optional IDs. A molecule can be further subdivided, but the pieces are dependent upon each other and so they are more easily dealt with as a unit. Molecules are represented like this: [] [] [LIKE] or [RANGE] (optional first line) [] [SEC] [sec] or [] [EL] [el] (optional first or second line) [] [ID TYPE] [id] ... [] ... */ function wikipath_show_molecule($mol) { foreach ((array) $mol as $key => $val) foreach($val as $k => $v) $out1 .= "\n# [$k] $v"; return "\n
molecule { $out1\n}"; } /* This parses the wikipath expression and expresses it as an array of molecules so that it is easier to handle in other parts of the recipe. One reason to split the parsing and lookup into two sections is because sometimes the parsing involves looking ahead. This also somewhat isolates the grammar from the lookup implementation */ function wikipath_parse($wikipath, $molecule, &$sec_after, &$t_sec_after, & $dout) { $SEC_ELs= "([*!#>]|->|::|\\[#\\])"; // Section Elements $ELs = "[;:,.]"; // Elements $T_SEC= "%"; // Terminator for Sections $T_EL= "\\^"; // Terminator for Elements $ID_N= "[0-9]+"; // ID Numerical $ID_L= "[A-z]+"; // ID Leading type $ID_RE="\\/.+?\\/"; // ID of type Regular expression $SEC= "$SEC_ELs\\1*[+-]?"; // A section $ID_N_RG="($ID_N)-($ID_N)"; // A numerical range $ID_N_SRG="($ID_N)([+-])"; // A single ended numerical range $ID= "$ID_L|$ID_N_RG|$ID_N_SRG|$ID_N|$ID_RE"; // An ID $dout = ""; $t_sec_after = FALSE; // Is a section terminator after an id of ours? if ($wikipath == "") { $sec_after = FALSE; $dout = "END"; return array($molecule); } // Section Elements if (preg_match("/^$SEC/", $wikipath, $m)) { list($sec_el) = $m; $wprem = substr($wikipath, strlen($sec_el)); $out = "SEC_EL(''$sec_el'')"; $newmolecule[] = array("SEC_EL" => $sec_el); $is_sec = "UNKNOWN"; if (preg_match("/^$T_EL/", $wprem, $m)) { $wprem = substr($wprem, 1); $out .= " T_EL"; $is_sec = FALSE; } if (preg_match("/^$T_SEC/", $wprem, $m)) { $wprem = substr($wprem, 1); $out .= " T_SEC"; $is_sec = TRUE; } $compound = wikipath_parse($wprem, $newmolecule, $sec_after, $t_sec_after, $dout); // $sec_after can be calculated here instead by looking at the // the rest of the molecules // $t_sec_after can be calculated here instead by looking at the // second molecule of $compound if ($t_sec_after) { if($molecule !== NULL) array_unshift($compound, $molecule); return $compound; } if ("UNKNOWN" === $is_sec) { $is_sec = $sec_after; if ($is_sec) $out .= " SEC_AFTER"; else $out .= " T_END"; } $sec_after = TRUE; $firstmol = array_shift($compound); $sec_el = array_shift($firstmol); if ($is_sec) { array_unshift($firstmol, array("SEC" => $sec_el["SEC_EL"])); $out = "SECTION{ $out }"; } else { array_unshift($firstmol, array("EL" => $sec_el["SEC_EL"])); $out = "ELEMENT{ $out }"; } array_unshift($compound, $firstmol); if($molecule !== NULL) array_unshift($compound, $molecule); $dout = $out . " " . $dout; return $compound; } if (preg_match("/^$ELs/", $wikipath, $m)) { list($el) = $m; $wprem = substr($wikipath, strlen($el)); $out = "EL(''$el'')"; $newmolecule[] = array("EL" => $el); $compound = wikipath_parse($wprem, $newmolecule, $sec_after, $t_sec_after, $dout); if($molecule !== NULL) array_unshift($compound, $molecule); $dout = $out . " " . $dout; return $compound; } // IDs if (preg_match("/^$ID/", $wikipath, $m)) { list($w_id) = $m; $wprem = substr($wikipath, strlen($w_id)); $out = "ID(''$w_id'')"; $id = "ID"; if (preg_match("/^$ID_L/", $w_id, $m)) $id = "ID_L"; elseif (preg_match("/^$ID_N_RG/", $w_id, $m)) { $id = "ID_N_RG"; $w_id = $m; } elseif (preg_match("/^$ID_N_SRG/", $w_id, $m)) { $id = "ID_N_SRG"; $w_id = $m; } elseif (preg_match("/^$ID_N/", $w_id, $m)) $id = "ID_N"; elseif (preg_match("/^$ID_RE/", $w_id, $m)) $id = "ID_RE"; $molecule[] = array($id => $w_id); if (preg_match("/^$T_SEC/", $wprem, $m)) { $wprem = substr($wprem, 1); $out .= " T_SEC ]"; array_unshift($molecule, array("LIKE"=>"LIKE")); $t_sec = TRUE; } $compound = wikipath_parse($wprem, $molecule, $sec_after, $$t_sec_after, $dout); if ($t_sec) $t_sec_after = TRUE; $dout = "$out $dout"; return $compound; } $dout = $out; return; } /* This returns a regexp used to identify the end of a section */ function wikipath_end($el) { if(preg_match('/^!+[-+]?$/', $el, $m)) return preg_replace('/^((!)+)\\+?$/e', "'^'.preg_quote('\$2', '/').'{1,'.strlen('\$1').'}(?!'.preg_quote('\$2', '/').')'", $el); // Still need to ignore contents of tables return null; } /* This returns a regexp indicating still being in a section */ function wikipath_notend($el) { if(preg_match('/^[#*]+[+-]?$/', $el, $m)) return preg_replace('/^([#*]+)[+-]?$/e', "'^[#*]{'.strlen('\$1'.'1').'}'", $el); return null; } /* This returns a regexp used to identify an element */ function wikipath_beg($el) { if($el == ';') return ''; if($el == ':') return '^ *[-_.A-z]+ *:'; if($el == '::') return '^ *: *[-_.A-Za-z]+ *:'; if(preg_match('/^[#*!]+$/', $el, $m)) return preg_replace('/^(([#*!])+)$/e', "'^'.preg_quote('\$1', '/').'(?!'.preg_quote('\$2', '/').')'", $el); if(preg_match('/^[#*!]+\\+$/', $el, $m)) return preg_replace('/^([#*!]+)\\+$/e', "'^'.preg_quote('\$1', '/')", $el); if(preg_match('/^[#*!]+-$/', $el, $m)) return preg_replace('/^(([#*!])+)-$/e', "'^'.preg_quote('\$2', '/').'{1,'.strlen('\$1').'}(?!'.preg_quote('\$2', '/').')'", $el); return null; } /* This returns the part of an item which will be used for IDs. Generally this means stripping off the element or section identifer. Not the best name or description, sorry. */ function wikipath_rm_el($el, $item) { if(is_array($item)) { $tmp = $item[0]; $item = $tmp; } if($el == '::') return preg_replace('/^ *:/', '', $item);; if(preg_match('/^[#*!]+$/', $el, $m)) return preg_replace('/^([#*!]+)/', '', $item); return $item; } /* This returns the part of an item that we actually want to capture */ function wikipath_capture($el, $item) { if($el == ':') return preg_replace('/^ *[-_.A-z]+ *:/', '', $item); if($el == '::') return preg_replace('/^ *: *[-_.A-z]+ *:/', '', $item); return $item; } /* This filters a set by the ids in a molecule */ function wikipath_filter_set($mol, $sets, $el) { foreach ((array) $mol as $i => $val) { foreach($val as $k => $v) ; // load $k and $v with the last values $out = array(); if($k == "SEC" || $k == "EL") next; if($k == "ID_L") { foreach ($sets as $set) { if ($el == '.') { if (substr($set, 0, strlen($v)) === $v) $out[]=$set; } else { $rmel = wikipath_rm_el($el, $set); $stripl = preg_replace('/^ */', '', $rmel); if (substr($stripl, 0, strlen($v)) === $v) $out[]= $set; } } $sets = $out; } elseif($k == "ID_RE") { foreach ($sets as $set) if (preg_match($v, $set, $m)) $out[]=$set; $sets = $out; } elseif($k == "ID_N") { if($v < count($sets) ) { $out[] = $sets[$v - 1]; $sets = $out; } else $sets = array(); } elseif($k == "ID_N_RG") { for($i_set=$v[1]-1 ; $i_set < count($sets) && $i_set < $v[2]; $i_set++) $out[] = $sets[$i_set]; $sets = $out; } elseif($k == "ID_N_SRG") { if($v[2] == "+") for($i_set=$v[1]-1 ; $i_set < count($sets); $i_set++) $out[] = $sets[$i_set]; if ($v[2] == "-") for($i_set=0 ; $i_set < count($sets) && $i_set < $v[1]; $i_set++) $out[] = $sets[$i_set]; $sets = $out; } } // for $i loop # foreach ($sets as $set) echo $pre . wikipath_capture($el, $set) . "
"; #echo "

"; foreach ($sets as $set) $cap[]= wikipath_capture($el, $set); return $cap; } /* This breaks up sets up into sets of the elements we are looking for */ function wikipath_getels($secs, $el) { $out = array(); if ($el == '.') { foreach ($secs as $sec) { $wds = null; foreach($sec as $line) $wds = array_merge((array) $wds, preg_split('/\s+/', $line)); $out[] = $wds; } return $out; } else { $beg = wikipath_beg($el); if ($beg === null) return array(); foreach ($secs as $sec) { $olines = array(); foreach($sec as $line) if (preg_match("/$beg/", $line, $m)) $olines[] = $line; if (count($olines)) $out[] = $olines; } return $out; } return $secs; } /* This breaks up sets up into the sections we are looking for */ function wikipath_getsecs($secs, $el) { $out = array(); $beg = wikipath_beg($el); $end = wikipath_end($el); $notend = wikipath_notend($el); if ($beg === null) return $out; $olines = array(); foreach ($secs as $sec) { foreach($sec as $line) { if ($in_section) { if ( $end != null && preg_match("/$end/", $line, $m) || $notend !=null && ! preg_match("/$notend/", $line, $m) ) { $in_section = FALSE; $out[] = $olines; $olines = array(); } } if (! $in_section && preg_match("/$beg/", $line, $m)) $in_section = TRUE; if ($in_section) $olines[] = $line; } } if(count($olines)) $out[] = $olines; return $out; } /* Looking up a molecule in an array of sections. */ function wikipath_lookup_molecule($mol, $secs) { $out = array(); foreach ((array) $mol as $i => $val) { foreach($val as $k => $v) ; // load $k and $v with the last values if ($k == "SEC") { $secs = wikipath_getsecs($secs, $v); $out = wikipath_filter_set($mol, $secs, $v); return $out; } if ($k == "EL") { if ($v == '.') { $secs = wikipath_getels($secs, $v); foreach ($secs as $sec) $wds = array_merge((array) $wds, wikipath_filter_set($mol, $sec, $v)); foreach($wds as $w) $out[] = array($w); return $out; } $secs = wikipath_getels($secs, $v); foreach ($secs as $sec) $lines = array_merge($lines, wikipath_filter_set($mol, $sec, $v)); foreach($lines as $line) $out[] = array($line); return $out; } } return array(); }