Prepare(); if (!$nodefaults) { $tokens = $this->GetTokens(kCSSDefaults::$DEFAULT_STYLE); $this->ParseTokens($tokens, kPDFStylesheet::STYLE_ORIGIN_AGENT_NORMAL); $this->HTMLVisualPropsSelectorOrder = $this->SelectorOrder; $this->SelectorOrder += 1000; } } public function ParseStyle($style) { $res = array(); $pairs = explode(';', $style); foreach ($pairs as $property) { $property = trim($property); list($name, $value) = explode(':', $property); $res[trim($name)] = trim($value); } } /* stylesheet : [ CDO | CDC | S | statement ]*; statement : ruleset | at-rule; at-rule : ATKEYWORD S* any* [ block | ';' S* ]; block : '{' S* [ any | block | ATKEYWORD S* | ';' S* ]* '}' S*; ruleset : selector? '{' S* declaration? [ ';' S* declaration? ]* '}' S*; selector : any+; declaration : DELIM? property S* ':' S* value; property : IDENT; value : [ any | block | ATKEYWORD S* ]+; any : [ IDENT | NUMBER | PERCENTAGE | DIMENSION | STRING | DELIM | URI | HASH | UNICODE-RANGE | INCLUDES | DASHMATCH | FUNCTION S* any* ')' | '(' S* any* ')' | '[' S* any* ']' ] S*; */ function ParseTokens($tokens, $origin=kPDFStylesheet::STYLE_ORIGIN_AUTHOR_NORMAL ) { $this->Buffer[0] = Array(); foreach ($tokens as $token) { if ($token['name'] == 'LBRACE') { $this->Buffer[++$this->Level] = Array(); $this->Openings[$this->Level] = 'LBRACE'; } elseif ($token['name'] == 'TEXT' && $token['data'] == '}') { if ($this->Level == 1 && $this->Openings[$this->Level] == 'LBRACE') { $this->AppendRule($this->Buffer[0], $this->Buffer[$this->Level], $origin); $this->Buffer[0] = Array(); } $this->Level--; } else { $this->Buffer[$this->Level][] = $token; } } } protected function ConcatTokensData($tokens) { $res = ''; foreach ($tokens as $token) {$res .= $token['data'];} return $res; } public function ParseDefinitionTokens($tokens) { $mode = 'property'; $properties = array(); $value = ''; foreach ($tokens as $token) { if ($mode == 'property') { if ($token['name'] == 'IDENT') { $property = $token['data']; $mode = 'colon'; } } elseif ($mode == 'colon') { if ($token['name'] == 'TEXT' && $token['data'] == ':') { $mode = 'value'; } } elseif ($mode == 'value') { if ($token['name'] == 'TEXT' && $token['data'] == ';') { $properties[strtoupper($property)] = trim($value); $value = ''; $mode = 'property'; } else { $value .= $token['data']; } } } if ($mode == 'value') { $properties[strtoupper($property)] = trim($value); } if ($mode == 'colon') { trigger_error('Error parsing CSS definition, no colon and/or value after property '.$property, E_USER_WARNING); } $properties = $this->ProcessShortHands($properties); return $properties; } public function ProcessShortHands($properties) { $res = array(); foreach ($properties as $property => $value) { switch ($property) { case 'MARGIN': if (preg_match('/^([.0-9]+(?:px|pt|em|ex|%)?|auto)$/i', $value, $regs)) { $res['MARGIN-TOP'] = $regs[1]; $res['MARGIN-RIGHT'] = $regs[1]; $res['MARGIN-BOTTOM'] = $regs[1]; $res['MARGIN-LEFT'] = $regs[1]; } if (preg_match('/^([.0-9]+(?:px|pt|em|ex|%)?|auto) ([.0-9]+(?:px|pt|em|ex|%)?|auto)$/i', $value, $regs)) { $res['MARGIN-TOP'] = $regs[1]; $res['MARGIN-RIGHT'] = $regs[2]; $res['MARGIN-BOTTOM'] = $regs[1]; $res['MARGIN-LEFT'] = $regs[2]; } if (preg_match('/^([.0-9]+(?:px|pt|em|ex|%)?|auto) ([.0-9]+(?:px|pt|em|ex|%)?|auto) ([.0-9]+(?:px|pt|em|ex|%)?|auto) ([.0-9]+(?:px|pt|em|ex|%)?|auto)$/i', $value, $regs)) { $res['MARGIN-TOP'] = $regs[1]; $res['MARGIN-RIGHt'] = $regs[2]; $res['MARGIN-BOTTOM'] = $regs[3]; $res['MARGIN-LEFT'] = $regs[4]; } break; case 'BORDER-TOP': case 'BORDER-RIGHT': case 'BORDER-BOTTOM': case 'BORDER-LEFT': $parts = $this->ParseBorderShorthand($value); if (isset($parts['style'])) { $res[$property.'-STYLE'] = $parts['style']; } if (isset($parts['width'])) { $res[$property.'-WIDTH'] = $parts['width']; } if (isset($parts['color'])) { $res[$property.'-COLOR'] = $parts['color']; } break; case 'BORDER': $parts = $this->ParseBorderShorthand($value); if (isset($parts['style'])) { $res['BORDER-TOP-STYLE'] = $parts['style']; $res['BORDER-RIGHT-STYLE'] = $parts['style']; $res['BORDER-BOTTOM-STYLE'] = $parts['style']; $res['BORDER-LEFT-STYLE'] = $parts['style']; } if (isset($parts['width'])) { $res['BORDER-TOP-WIDTH'] = $parts['width']; $res['BORDER-RIGHT-WIDTH'] = $parts['width']; $res['BORDER-BOTTOM-WIDTH'] = $parts['width']; $res['BORDER-LEFT-WIDTH'] = $parts['width']; } if (isset($parts['color'])) { $res['BORDER-TOP-COLOR'] = $parts['color']; $res['BORDER-RIGHT-COLOR'] = $parts['color']; $res['BORDER-BOTTOM-COLOR'] = $parts['color']; $res['BORDER-LEFT-COLOR'] = $parts['color']; } break; case 'PADDING': $parts = explode(' ', $value); switch (count($parts)) { case 1: $res['PADDING-TOP'] = $parts[0]; $res['PADDING-RIGHT'] = $parts[0]; $res['PADDING-BOTTOM'] = $parts[0]; $res['PADDING-LEFT'] = $parts[0]; break; case 2: $res['PADDING-TOP'] = $parts[0]; $res['PADDING-RIGHT'] = $parts[1]; $res['PADDING-BOTTOM'] = $parts[0]; $res['PADDING-LEFT'] = $parts[1]; break; case 3: $res['PADDING-TOP'] = $parts[0]; $res['PADDING-RIGHT'] = $parts[1]; $res['PADDING-BOTTOM'] = $parts[2]; $res['PADDING-LEFT'] = $parts[1]; break; case 4: $res['PADDING-TOP'] = $parts[0]; $res['PADDING-RIGHT'] = $parts[1]; $res['PADDING-BOTTOM'] = $parts[2]; $res['PADDING-LEFT'] = $parts[3]; break; } break; default: $res[$property] = $value; } } return $res; } public function ParseBorderShorthand($definition) { $res = array(); $parts = explode(' ', $definition); foreach ($parts as $part) { if (preg_match('/none|hidden|dotted|dashed|solid|double|groove|ridge|inset|outset/', $part)) { //style $res['style'] = $part; } elseif (preg_match('/^(thin|medium|thick|[.0-9]+(?:px|pt|em|ex|%)?)/', $part)) { // width $res['width'] = $part; } else { // color $res['color'] = $part; } } return $res; } public function ParseSelectorTokens($tokens, $origin) { $selectors = array(); $current = ''; foreach ($tokens as $token) { if ($token['name'] == 'COMMA') { $selectors[] = trim($current); $current = ''; } else { $current .= $token['data']; } } if (trim($current) != '') { $selectors[] = trim($current); } return $this->IdentifySelectors($selectors, $origin); } /* 'h' => '[0-9a-f]', 'nonascii' => '[\\200-\\377]', 'unicode' => '(\\{h}{1,6}(\r\n|[ \t\r\n\f])?)', 'escape' => '(\\[^\r\n\f0-9a-f])', 'nmstart' => '([_a-z]|{nonascii}|{escape})', 'nmchar' => '([_a-z0-9-]|{nonascii}|{escape})', 'string1' => '("([^\n\r\f"]|{nl}|{escape})*")', 'string2' => '(\'([^\n\r\f\']|{nl}|{escape})*\')', 'invalid1' => '("([^\n\r\f"]|{nl}|{escape})*?)', 'invalid2' => '(\'([^\n\r\f\']|{nl}|{escape})*?)', 'ident' => '-?{nmstart}{nmchar}*', 'name' => '{nmchar}+', 'num' => '([0-9]+|[0-9]*\.[0-9]+)', 'string' => '({string1}|{string2})', 'invalid' => '({invalid1}|{invalid2})', 'url' => '([!#$%&*-~]|{nonascii}|{escape})*', 's' => '[ \t\r\n\f]', 'w' => '{s}*', 'nl' => '(\n|\r\n|\r|\f)', */ /* A simple selector is either a type selector or universal selector followed immediately by zero or more attribute selectors, ID selectors, or pseudo-classes, in any order. The simple selector matches if all of its components match. A selector is a chain of one or more simple selectors separated by combinators. Combinators are: whitespace, ">", and "+". Whitespace may appear between a combinator and the simple selectors around it. A selector's specificity is calculated as follows: * count 1 if the selector is a 'style' attribute rather than a selector, 0 otherwise (= a) (In HTML, values of an element's "style" attribute are style sheet rules. These rules have no selectors, so a=1, b=0, c=0, and d=0.) * count the number of ID attributes in the selector (= b) * count the number of other attributes and pseudo-classes in the selector (= c) * count the number of element names and pseudo-elements in the selector (= d) The specificity is based only on the form of the selector. In particular, a selector of the form "[id=p33]" is counted as an attribute selector (a=0, b=0, c=1, d=0), even if the id attribute is defined as an "ID" in the source document's DTD. Concatenating the four numbers a-b-c-d (in a number system with a large base) gives the specificity. */ function IdentifySelectors($selectors, $origin) { $processed = array(); $ident = $this->Macros['ident']; foreach ($selectors as $selector) { $parts = preg_split('/[ ]*([ >+])[ ]*/', $selector, null, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); $parsed_selector = array(); $a = 0; $b = 0; $c = 0; $d = 0; foreach ($parts as $simple_selector) { $parsed_part = array(); if (preg_match('/^([ >+])$/', $simple_selector, $regs)) { $parsed_part['combinator'] = $regs[1]; $parsed_selector[] = $parsed_part; continue; } if (preg_match('/^(\*|'.$ident.')/i', $simple_selector, $regs)) { $main = $regs[1]; if ($regs[1] != '*') { $d++; } } else { $main = '*'; } $parsed_part['main'] = strtoupper($main); if (preg_match_all('/\.('.$ident.')/', $simple_selector, $regs)) { $parsed_part['classes'] = $regs[1]; $c += count($regs[1]); } if (preg_match_all('/\[([^\]]+)\]/', $simple_selector, $regs)) { $atts = $regs[1]; $c += count($regs[1]); $parsed_atts = array(); foreach ($atts as $attribute) { if (preg_match('/^[^=]+$/', $attribute)) { $parsed_atts['set'][] = strtoupper($attribute); } elseif (preg_match('/(.*)\\|=(.*)/', $attribute, $att_regs)) { $parsed_atts['hypen'][strtoupper($att_regs[1])] = $att_regs[2]; } elseif (preg_match('/(.*)~=(.*)/', $attribute, $att_regs)) { $parsed_atts['space'][strtoupper($att_regs[1])] = $att_regs[2]; } elseif (preg_match('/(.*)=(.*)/', $attribute, $att_regs)) { $parsed_atts['equals'][strtoupper($att_regs[1])] = $att_regs[2]; } } $parsed_part['atts'] = $parsed_atts; } if (preg_match_all('/#('.$ident.')/', $simple_selector, $regs)) { $parsed_part['ids'] = $regs[1]; $b += count($regs[1]); } if (preg_match_all('/:('.$ident.')/', $simple_selector, $regs)) { $pseudo_classes = array(); $pseudo_elements = array(); foreach ($regs[1] as $pseudo) { if (preg_match('/^(first-line|first-letter|before|after)$/i', $pseudo)) { $pseudo_elements[] = $pseudo; } else { $pseudo_classes[] = $pseudo; } } if ($pseudo_classes) { $parsed_part['pseudo_classes'] = $pseudo_classes; } if ($pseudo_elements) { $parsed_part['pseudo_elements'] = $pseudo_elements; } $c += count($pseudo_classes); $d += count($pseudo_elements); } $parsed_selector[] = $parsed_part; } $parsed_selector = array_reverse($parsed_selector); $main = array(); $cur =& $main; foreach ($parsed_selector as $parts) { if (isset($parts['combinator'])) { switch ($parts['combinator']) { case ' ': $cur =& $cur['descendant_of']; break; case '>': $cur =& $cur['child_of']; break; case '+': $cur =& $cur['sibling_of']; break; } continue; } $cur['main'] = $parts['main']; if (isset($parts['classes'])) { $cur['classes'] = $parts['classes']; } if (isset($parts['ids'])) { $cur['ids'] = $parts['ids']; } if (isset($parts['pseudo_classes'])) { $cur['pseudo_classes'] = $parts['pseudo_classes']; } if (isset($parts['pseudo_elements'])) { $cur['pseudo_elements'] = $parts['pseudo_elements']; } if (isset($parts['atts'])) { $cur['atts'] = $parts['atts']; } } $main['specifity'] = intval(str_pad($a,2,0).str_pad($b,2,0).str_pad($c,2,0).str_pad($d,2,0)); $main['order'] = $this->SelectorOrder++; $main['origin'] = $origin; $processed[] = $main; } return $processed; } public function AppendRule($selector_tokens, $definition_tokens, $origin) { $selectors = $this->ParseSelectorTokens($selector_tokens, $origin); $properties = $this->ParseDefinitionTokens($definition_tokens); $definition = ''; foreach ($properties as $property => $value) { $definition .= "$property: $value
"; } foreach ($selectors as $selector) { $this->Mapping[strtoupper($selector['main'])][] = array('selector' => $selector, 'properties' => $properties); } $this->Rules[] = array('selectors' => $selectors, 'properties' => $properties); // echo "appending rule:
selector: ".join(',', $selectors)."
definition:
$definition

"; } public function GetTokens($css) { $patterns = array( '{s}+' =>'S', '' =>'CDC', '~=' =>'INCLUDES', '\\|=' =>'DASHMATCH', '{w}\\{' =>'LBRACE', '{w}\\+' =>'PLUS', '{w}\\>' =>'GREATER', '{w},' =>'COMMA', '{string}' =>'STRING', '{invalid}' =>'INVALID', /* unclosed string */ '{ident}' =>'IDENT', '#{name}' =>'HASH', '@import' =>'IMPORT_SYM', '@page' =>'PAGE_SYM', '@media' =>'MEDIA_SYM', '@charset' =>'CHARSET_SYM', '!{w}important' =>'IMPORTANT_SYM', /*'{num}{E}{M}' =>'EMS', '{num}{E}{X}' =>'EXS', '{num}{P}{X}' =>'LENGTH', '{num}{C}{M}' =>'LENGTH', '{num}{M}{M}' =>'LENGTH', '{num}{I}{N}' =>'LENGTH', '{num}{P}{T}' =>'LENGTH', '{num}{P}{C}' =>'LENGTH', '{num}{D}{E}{G}' =>'ANGLE', '{num}{R}{A}{D}' =>'ANGLE', '{num}{G}{R}{A}{D}' =>'ANGLE', '{num}{M}{S}' =>'TIME', '{num}{S}' =>'TIME', '{num}{H}{Z}' =>'FREQ', '{num}{K}{H}{Z}' =>'FREQ', '{num}{ident}' =>'DIMENSION',*/ '{num}em' =>'EMS', '{num}ex' =>'EXS', '{num}px' =>'LENGTH', '{num}cm' =>'LENGTH', '{num}mm' =>'LENGTH', '{num}in' =>'LENGTH', '{num}pt' =>'LENGTH', '{num}pc' =>'LENGTH', '{num}deg' =>'ANGLE', '{num}rad' =>'ANGLE', '{num}grad' =>'ANGLE', '{num}ms' =>'TIME', '{num}s' =>'TIME', '{num}hz' =>'FREQ', '{num}khz' =>'FREQ', '{num}{ident}' =>'DIMENSION', '{num}%' =>'PERCENTAGE', '{num}' =>'NUMBER', 'url\({w}{string}{w}\)' =>'URI', 'url\({w}{url}{w}\)' =>'URI', '{ident}\(' =>'FUNCTION', /*'.' =>'*yytext',*/ ); $final_patterns = array(); foreach ($patterns as $regexp => $token) { foreach ($this->Macros as $macro => $replacement) { $regexp = str_replace('{'.$macro.'}', $replacement, $regexp); } $final_patterns[$regexp] = $token; } $css = preg_replace('/\\/\\*[^*]*\\*+([^\\/*][^*]*\\*+)*\\//', '', $css); $css = preg_replace('/[ \t\r\n\f]+\\/\\*[^*]*\\*+([^\\/*][^*]*\\*+)*\\//', ' ', $css); $css = preg_replace('/[ \t\r\n\f]+/', ' ', $css); // remove repeated whitespace $matches = array(); $token_indexes = array(); foreach ($final_patterns as $regexp => $token) { if (preg_match_all('/'.$regexp.'/i', $css, $res, PREG_PATTERN_ORDER | PREG_OFFSET_CAPTURE)) { $matches[$token] = $res[0]; $token_indexes[$token] = 0; } } $tokens = array(); $last_token_pos = 0; $i = 0; do { $has_more = false; $max_len = 0; $min_pos = false; foreach ($matches as $token => $data) { $cur_index = $token_indexes[$token]; do { $cur_match = isset($data[$cur_index]) ? $data[$cur_index++] : false; } while ($cur_match && $cur_match[1] < $last_token_pos); if ( !$cur_match ) continue; $token_indexes[$token] = $cur_index-1; if ( $min_pos === false || ($cur_match[1] < $min_pos || ( $cur_match[1] == $min_pos && strlen( $cur_match[0] ) > $max_len ) ) ) { $longest = $token; $max_len = strlen( $cur_match[0] ); $min_pos = $cur_match[1]; } $has_more = $has_more || isset($data[$token_indexes[$token]]); } if ($min_pos !== false) { $token_data = $matches[$longest][$token_indexes[$longest]]; if ($token_data[1] > $last_token_pos) { $text_data = substr($css, $last_token_pos, $token_data[1] - $last_token_pos); $tokens[] = array('name' => 'TEXT', 'data' => $text_data); // echo "found token TEXT: [$text_data]
\n"; } $tokens[] = array('name' => $longest, 'data' => $token_data[0]); // echo "found token $longest: {$token_data[0]} at {$token_data[1]}
\n"; // flush(); $last_token_pos = $token_data[1] + strlen($token_data[0]); $token_indexes[$longest]++; } } while ($has_more); if ($last_token_pos <= strlen($css)) { $text_data = substr($css, $last_token_pos); $tokens[] = array('name' => 'TEXT', 'data' => $text_data); // echo "found token FINAL TEXT: [$text_data]
\n"; } return $tokens; } public function Prepare() { /*$macros = array( 'h' => '[0-9a-f]', 'nonascii' => '[\200-\377]', 'unicode' => '(\\{h}{1,6}(\r\n|[ \t\r\n\f])?)', 'escape' => '({unicode}|\\[^\r\n\f0-9a-f])', 'nmstart' => '([_a-z]|{nonascii}|{escape})', 'nmchar' => '([_a-z0-9-]|{nonascii}|{escape})', 'string1' => '("([^\n\r\f"]|{nl}|{escape})*")', 'string2' => '(\'([^\n\r\f\']|{nl}|{escape})*\')', 'invalid1' => '("([^\n\r\f"]|{nl}|{escape})*?)', 'invalid2' => '(\'([^\n\r\f\']|{nl}|{escape})*?)', 'ident' => '-?{nmstart}{nmchar}*', 'name' => '{nmchar}+', 'num' => '([0-9]+|[0-9]*\.[0-9]+)', 'string' => '({string1}|{string2})', 'invalid' => '({invalid1}|{invalid2})', 'url' => '([!#$%&*-~]|{nonascii}|{escape})*', 's' => '[ \t\r\n\f]', 'w' => '{s}*', 'nl' => '(\n|\r\n|\r|\f)', 'A' => 'a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?', 'C' => 'c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?', 'D' => 'd|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?', 'E' => 'e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?', 'G' => 'g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g', 'H' => 'h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h', 'I' => 'i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i', 'K' => 'k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k', 'M' => 'm|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m', 'N' => 'n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n', 'P' => 'p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p', 'R' => 'r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r', 'S' => 's|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s', 'T' => 't|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t', 'X' => 'x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x', 'Z' => 'z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z', );*/ $simple = array( 'h' => '[0-9a-f]', 'nonascii' => '[\\200-\\377]', 'unicode' => '(\\{h}{1,6}(\r\n|[ \t\r\n\f])?)', 'escape' => '(\\[^\r\n\f0-9a-f])', 'nmstart' => '([_a-z]|{nonascii}|{escape})', 'nmchar' => '([_a-z0-9-]|{nonascii}|{escape})', 'string1' => '("([^\n\r\f"]|{nl}|{escape})*")', 'string2' => '(\'([^\n\r\f\']|{nl}|{escape})*\')', 'invalid1' => '("([^\n\r\f"]|{nl}|{escape})*?)', 'invalid2' => '(\'([^\n\r\f\']|{nl}|{escape})*?)', 'ident' => '-?{nmstart}{nmchar}*', 'name' => '{nmchar}+', 'num' => '([0-9]+|[0-9]*\.[0-9]+)', 'string' => '({string1}|{string2})', 'invalid' => '({invalid1}|{invalid2})', 'url' => '([!#$%&*-~]|{nonascii}|{escape})*', 's' => '[ \t\r\n\f]', 'w' => '{s}*', 'nl' => '(\n|\r\n|\r|\f)', ); $replaced_macros = array(); foreach ($simple as $key => $macro) { $replaced = $macro; foreach ($replaced_macros as $shorthand => $replacement) { $replaced = str_replace('{'.$shorthand.'}', $replacement, $replaced); } $replaced_macros[$key] = $replaced; } $this->Macros = $replaced_macros; } public function GetHTMLVisualPropsSelector($node) { if (!$node->Attributes) return false; $non_visal_props = array( 'ABBR', 'ACCEPT-CHARSET', 'ACCEPT', 'ACCESSKEY', 'ACTION', 'ALT', 'ARCHIVE', 'AXIS', 'CHARSET', 'CHECKED', 'CITE', 'CLASS', 'CLASSID', 'CODE', 'CODEBASE', 'CODETYPE', 'COLSPAN', 'COORDS', 'DATA', 'DATETIME', 'DECLARE', 'DEFER', 'DIR', 'DISABLED', 'ENCTYPE', 'FOR', 'HEADERS', 'HREF', 'HREFLANG', 'HTTP-EQUIV', 'ID', 'ISMAP', 'LABEL', 'LANG', 'LANGUAGE', 'LONGDESC', 'MAXLENGTH', 'MEDIA', 'METHOD', 'MULTIPLE', 'NAME', 'NOHREF', 'OBJECT', 'ONBLUR', 'ONCHANGE', 'ONCLICK', 'ONDBLCLICK', 'ONFOCUS', 'ONKEYDOWN', 'ONKEYPRESS', 'ONKEYUP', 'ONLOAD', 'ONLOAD', 'ONMOUSEDOWN', 'ONMOUSEMOVE', 'ONMOUSEOUT', 'ONMOUSEOVER', 'ONMOUSEUP', 'ONRESET', 'ONSELECT', 'ONSUBMIT', 'ONUNLOAD', 'ONUNLOAD', 'PROFILE', 'PROMPT', 'READONLY', 'REL', 'REV', 'ROWSPAN', 'SCHEME', 'SCOPE', 'SELECTED', 'SHAPE', 'SPAN', 'SRC', 'STANDBY', 'START', 'STYLE', 'SUMMARY', 'TITLE', 'USEMAP', 'VALUE', 'VALUETYPE', 'VERSION', ); if ($node->Name != 'LI' && $node->Name != 'OL' && $node->Name != 'UL') { array_push($non_visal_props, 'TYPE'); } $visual_attributes = array_diff_key($node->Attributes, array_combine($non_visal_props, array_fill(0, count($non_visal_props), ''))); if ($visual_attributes) { $mapping = array( 'ALIGN' => 'TEXT-ALIGN', 'VALIGN' => 'VERTICAL-ALIGN', 'CELLPADDING' => 'PADDING', ); $mapped_attributes = array(); foreach ($visual_attributes as $key => $val) { if ($key == 'CELLPADDING') { $processed = $this->IdentifySelectors( array( $node->Name.'[cellpadding='.$val.'] TD' ), kPDFStylesheet::STYLE_ORIGIN_AUTHOR_NORMAL ); $processed[0]['order'] = $this->HTMLVisualPropsSelectorOrder++; $processed[0]['specifity'] = 0; $this->Mapping['TD'][] = array( 'selector' => $processed[0], 'properties' => $this->ProcessShortHands(array( 'PADDING' => $val.'px', ))); } elseif (isset($mapping[$key])) { $mapped_attributes[$mapping[$key]] = $val; } else { $mapped_attributes[$key] = $val; } } return array( 'selector' => array('main' => $node->Name, 'specifity' => 0, 'order' => $this->HTMLVisualPropsSelectorOrder, 'origin' => kPDFStylesheet::STYLE_ORIGIN_AUTHOR_NORMAL ), 'properties' => $mapped_attributes, ); } return false; } public function GetMatchingSelectors($node) { $map = isset($this->Mapping[$node->Name]) ? $this->Mapping[$node->Name] : array(); if (isset($this->Mapping['*'])) { $map = array_merge($map, $this->Mapping['*']); } $matching = array(); $i = 0; foreach ($map as $selector) { $selector_data = $selector['selector']; if ($this->SelectorMatches($selector['selector'], $node)) { $matching[] = $selector; } } $html_visual_selector = $this->GetHTMLVisualPropsSelector($node); if ($html_visual_selector) { $matching[] = $html_visual_selector; } usort($matching, array($this, 'CmpSelectors')); if (isset($node->Attributes['STYLE'])) { $style_selector = array( 'selector' => array('main' => '_STYLE_'), 'properties' => $this->ParseDefinitionTokens ( $this->GetTokens( $node->Attributes['STYLE'] ) ), ); $matching[] = $style_selector; } return $matching; } public function CmpSelectors($a, $b) { if ($a['selector']['origin'] == $b['selector']['origin']) { if ($a['selector']['specifity'] == $b['selector']['specifity']) { return $a['selector']['order'] < $b['selector']['order'] ? -1 : 1; } return ($a['selector']['specifity'] < $b['selector']['specifity']) ? -1 : 1; } return $a['selector']['origin'] < $b['selector']['origin'] ? -1 : 1; } public function SelectorMatches($selector_data, $node) { if ($selector_data['main'] != '*' && $node->Name != $selector_data['main']) { return false; } //check classes if (isset($selector_data['classes'])) { foreach ($selector_data['classes'] as $class) { // (\A| )+foo( |\Z)+ if (!isset($node->Attributes['CLASS']) || !preg_match('/(\A| )+'.preg_quote($class).'( |\Z)+/i', $node->Attributes['CLASS'])) { return false; } } } //check ids if (isset($selector_data['ids'])) { if (!isset($node->Attributes['ID']) || !in_array($node->Attributes['ID'], $selector_data['ids'])) { return false;; } } //check atts if (isset($selector_data['atts'])) { if (isset($selector_data['atts']['set'])) { foreach ($selector_data['atts']['set'] as $att) { if (!isset($node->Attributes[$att])) { return false;; } } } if (isset($selector_data['atts']['equals'])) { foreach ($selector_data['atts']['equals'] as $att => $value) { if (!isset($node->Attributes[$att]) || strtoupper($node->Attributes[$att]) != strtoupper($value)) { return false;; } } } if (isset($selector_data['atts']['space'])) { foreach ($selector_data['atts']['space'] as $att => $value) { if (!isset($node->Attributes[$att]) || !preg_match('/(\A| )+'.preg_quote($value).'( |\Z)+/i', $node->Attributes[$att])) { return false;; } } } if (isset($selector_data['atts']['hypen'])) { foreach ($selector_data['atts']['hypen'] as $att => $value) { if (!isset($node->Attributes[$att]) || !preg_match('/^'.preg_quote($value).'(-|\Z)+/i', $node->Attributes[$att])) { return false;; } } } } //check pseudo if (isset($selector_data['pseudo_elements'])) { // we are not a browser - so don't know how to handle this.... return false; } if (isset($selector_data['pseudo_classes'])) { // we are not a browser - so don't know how to handle this.... return false; } //check comibantors if (isset($selector_data['child_of'])) { if (!$this->SelectorMatches($selector_data['child_of'], $node->Parent)) { return false; } } if (isset($selector_data['sibling_of'])) { if (!$this->SelectorMatches($selector_data['sibling_of'], $node->PrevSibling())) { return false; } } if (isset($selector_data['descendant_of'])) { $ancestor = $node; do { $ancestor = $ancestor->Parent; $matches = $this->SelectorMatches($selector_data['descendant_of'], $ancestor); } while (!$matches && $ancestor->Parent); if (!$matches) return false; } // if we came through here, the selector matches the node return true; } public function GetAllProperties($node) { $selectors = $this->GetMatchingSelectors($node); $properties = array(); foreach ($selectors as $the_selector) { $properties = array_merge($properties, $the_selector['properties']); /*foreach ($the_selector['properties'] as $property => $value) { $properties[$property] = $value; //process !important here ?? !!! }*/ } return $properties; } }