class XhtmlCompatFilter < Nanoc::Filter
identifier :xhtml_compat
+ requires 'nokogiri'
+
+ Xmlns = {
+ math: 'http://www.w3.org/1998/Math/MathML',
+ svg: 'http://www.w3.org/2000/svg',
+ }.freeze
+
+ XHTMLPublic = '-//W3C//DTD XHTML 1.1//EN'
+ MathPublic = '-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN'
+ MathSystem = 'http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd'
+ SVGPublic = '-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN'
+ SVGSystem = 'http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd'
+
+ # XSLT 1.0 as implemented in Nokogiri canot construct doctypes based
+ # on content. When using MathML or SVG elements in XHTML a different
+ # doctype is needed: select one based on which elements are present.
+ def fix_doctype(content, params = {})
+ return "#{content}" if not params[:fix_doctype]
+
+ doc = Nokogiri::XML(content)
+ doctype = doc.internal_subset
+
+ return "#{content}" if doctype.external_id != XHTMLPublic
+
+ if not doc.xpath("//svg:svg", Xmlns).empty?
+ doctype.remove
+ doc.create_internal_subset("html", SVGPublic, SVGSystem)
+ elsif not doc.xpath("//math:math", Xmlns).empty?
+ doctype.remove
+ doc.create_internal_subset("html", MathPublic, MathSystem)
+ end
+
+ return doc.to_xml
+ end
def run(content, params = {})
+ text = fix_doctype(content, params)
+
# Old versions of Netscape get confused by <hr/> but have no problem
# with <hr />, so avoid that by adding spaces to such elements.
- text = content.gsub(/([^[:space:]])\/>/m, '\1 />');
+ text.gsub!(/([^[:space:]])\/>/m, '\1 />');
# Even older versions of Netscape interpret any script as Javascript,
# which causes major problems with the CDATA hack; solve that by making
text.gsub!("<![CDATA[-->]]>", '\&*/')
# Delete any zero-width word joiners added for XSLT processing.
- return text.delete "\u2060"
+ text.delete! "\u2060"
+
+ return text
end
end