File tree Expand file tree Collapse file tree 2 files changed +11
-14
lines changed
Expand file tree Collapse file tree 2 files changed +11
-14
lines changed Original file line number Diff line number Diff line change @@ -156,24 +156,17 @@ public function addContent($content, $type = null)
156156 return ;
157157 }
158158
159- $ charset = null ;
160- if (false !== $ pos = stripos ($ type , 'charset= ' )) {
161- $ charset = substr ($ type , $ pos + 8 );
162- if (false !== $ pos = strpos ($ charset , '; ' )) {
163- $ charset = substr ($ charset , 0 , $ pos );
164- }
165- }
159+ $ charset = preg_match ('//u ' , $ content ) ? 'UTF-8 ' : 'ISO-8859-1 ' ;
166160
167161 // http://www.w3.org/TR/encoding/#encodings
168162 // http://www.w3.org/TR/REC-xml/#NT-EncName
169- if ( null === $ charset &&
170- preg_match ( ' /\<meta[^\>]+ charset *= *[" \' ]?([a-zA-Z\-0-9_:.]+)/i ' , $ content , $ matches )) {
171- $ charset = $ matches [ 1 ];
172- }
163+ $ content = preg_replace_callback ( ' /(charset *= *[" \' ]?)([a-zA-Z\-0-9_:.]+)/i ' , function ( $ m ) use (& $ charset ) {
164+ if ( ' charset= ' === $ this -> convertToHtmlEntities ( ' charset= ' , $ m [ 2 ] )) {
165+ $ charset = $ m [ 2 ];
166+ }
173167
174- if (null === $ charset ) {
175- $ charset = preg_match ('//u ' , $ content ) ? 'UTF-8 ' : 'ISO-8859-1 ' ;
176- }
168+ return $ m [1 ].$ charset ;
169+ }, $ content , 1 );
177170
178171 if ('x ' === $ xmlMatches [1 ]) {
179172 $ this ->addXmlContent ($ content , $ charset );
Original file line number Diff line number Diff line change @@ -187,6 +187,10 @@ public function testAddContent()
187187 $ crawler = $ this ->createCrawler ();
188188 $ crawler ->addContent ($ this ->getDoctype ().'<html><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><span>中文</span></html> ' );
189189 $ this ->assertEquals ('中文 ' , $ crawler ->filterXPath ('//span ' )->text (), '->addContent() guess wrong charset ' );
190+
191+ $ crawler = $ this ->createCrawler ();
192+ $ crawler ->addContent ($ this ->getDoctype ().'<html><meta http-equiv="Content-Type" content="text/html; charset=unicode" /><div class="foo"></html></html> ' );
193+ $ this ->assertEquals ('foo ' , $ crawler ->filterXPath ('//div ' )->attr ('class ' ), '->addContent() ignores bad charset ' );
190194 }
191195
192196 /**
You can’t perform that action at this time.
0 commit comments