Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch to generating HTML ourselves #99

Closed
wants to merge 11 commits into from
Closed
12 changes: 9 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
0.7.0
-----

* Many small bug fixes.

* Non-ASCII text is generally handled better.

* HTML output is no longer "pretty". This will be fixed in a future release.

* Table of contents no longer includes inline styles.

* Maruku has been relicensed under the MIT license.

* Maruku now uses Nokogiri to parse and output HTML, fixing many bugs and
* Maruku now uses its own method to output HTML, fixing many bugs and
providing a big speedup.

* JRuby no longer obfuscates email addresses due to a Nokogiri bug.

* Maruku produces unicode characters in the output HTML in many cases where
before it produced XML entity references.

Expand Down
2 changes: 1 addition & 1 deletion bin/maruku
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ end.each do |filename, input|
out = doc.to_md
when :s5
suffix = '_s5slides.html'
out = doc.to_s5(:content_only => false)
out = doc.to_s5(:content_only => false, :print_slides => true)
end
end

Expand Down
3 changes: 1 addition & 2 deletions lib/maruku/defaults.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ module MaRuKu
:filter_html => false,

:html_math_output_mathml => true, # also set :html_math_engine
:html_math_engine => 'none', # none, ritex, itex2mml
:html_math_engine => 'none', # none, ritex, itex2mml, blahtex

:html_math_output_png => false,
:html_png_engine => 'none',
Expand All @@ -26,7 +26,6 @@ module MaRuKu

:latex_use_listings => false,
:latex_cjk => false,
:latex_cache_file => "blahtex_cache.pstore", # cache file for blahtex filter

:debug_keep_ials => false,
:doc_prefix => '',
Expand Down
98 changes: 52 additions & 46 deletions lib/maruku/ext/math/mathml_engines/blahtex.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
require 'tempfile'
require 'fileutils'
require 'digest/md5'
require 'pstore'
require 'nokogiri'
require 'rexml/document'

module MaRuKu::Out::HTML
PNG = Struct.new(:src, :depth, :height)
Expand All @@ -14,32 +12,28 @@ def convert_to_png_blahtex(kind, tex)
md5sum = Digest::MD5.hexdigest(tex + " params: ")
result_file = File.join(get_setting(:html_png_dir), md5sum + ".txt")

unless File.exists?(result_file)
Tempfile.open('maruku_blahtex') do |tmp_in|
tmp_in.write tex
tmp_in.close

# It's important taht we don't replace *all* newlines,
# because newlines in arguments get escaped as "'\n'".
system <<COMMAND.gsub("\n ", " ")
blahtex --png --use-preview-package
--shell-dvipng #{Shellwords.shellescape("dvipng -D #{Shellwords.shellescape(get_setting(:html_png_resolution).to_s)}")}
#{'--displaymath' if kind == :equation}
--temp-directory #{Shellwords.shellescape(get_setting(:html_png_dir))}
--png-directory #{Shellwords.shellescape(get_setting(:html_png_dir))}
< #{Shellwords.shellescape(tmp_in.path)}
> #{Shellwords.shellescape(result_file)}
COMMAND
end
if File.exists?(result_file)
result = File.read(result_file)
else
args = [
'--png',
'--use-preview-package',
'--shell-dvipng',
"dvipng -D #{Shellwords.shellescape(get_setting(:html_png_resolution).to_s)}",
"--temp-directory #{Shellwords.shellescape(get_setting(:html_png_dir))}",
"--png-directory #{Shellwords.shellescape(get_setting(:html_png_dir))}"
]
args << '--displaymath' if kind == :equation

result = run_blahtex(tex, args)
end

result = File.read(result_file)
if result.nil? || result.empty?
maruku_error "Blahtex error: empty output"
return
end

doc = Nokogiri::XML::Document.parse(result)
doc = REXML::Document.new(result)
png = doc.root.elements.to_a.first
if png.name != 'png'
maruku_error "Blahtex error: \n#{doc}"
Expand All @@ -51,39 +45,51 @@ def convert_to_png_blahtex(kind, tex)
raise "No md5 element in:\n #{doc}" unless md5 = png.xpath('//md5')[0]

depth = depth.text.to_f
height = height.text.to_f # TODO: check != 0
height = height.text.to_f
raise "Height or depth was 0! in \n #{doc}" if height == 0 || depth == 0

md5 = md5.text

PNG.new("#{get_setting(:html_png_url)}#{md5}.png", depth, height)
rescue x=> e
rescue => e
maruku_error "Error: #{e}"
nil
end

def convert_to_mathml_blahtex(kind, tex)
@@BlahtexCache ||= PStore.new(get_setting(:latex_cache_file))

@@BlahtexCache.transaction do
if @@BlahtexCache[tex].nil?
Tempfile.open('maruku_blahtex') do |tmp_in|
tmp_in.write tex

Tempfile.new('maruku_blahtex') do |tmp_out|
system "blahtex --mathml < #{Shellwords.shellescape(tmp_in.path)} > #{Shellwords.shellescape(tmp_out.path)}"
@@BlahtexCache[tex] = tmp_out.read
end
end
end

blahtex = @@BlahtexCache[tex]
doc = Nokogiri::XML::Document.parse(result)
unless mathml = doc.css('mathml').first
maruku_error "Blahtex error: \n#{doc}"
return
end

return mathml
result = run_blahtex(tex, %w[--mathml])

doc = REXML::Document.new(result)
mathml = doc.get_elements('//markup').to_a.first
unless mathml
maruku_error "Blahtex error: \n#{doc}"
return nil
end

mathml.name = 'math'
mathml.attributes['xmlns'] = "http://www.w3.org/1998/Math/MathML"
mathml.attributes['display'] = (kind == :inline) ? :inline : :block

MaRuKu::HTMLFragment.new(mathml.to_s)
rescue => e
maruku_error "Error: #{e}"
nil
end

private

# Run blahtex, return output
def run_blahtex(tex, args)
IO.popen(['blahtex', *args], 'w+') do |blahtex|
blahtex.write tex
blahtex.close_write

output = blahtex.read
blahtex.close_read

raise "Error running blahtex" unless $?.success?

output
end
end
end
4 changes: 2 additions & 2 deletions lib/maruku/ext/math/mathml_engines/itex2mml.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ def convert_to_mathml_itex2mml(kind, tex)
maruku_error "Unknown itex2mml kind: #{kind}"
return
end
Nokogiri::XML::Document.parse(mathml.to_utf8).root

MaRuKu::HTMLFragment.new(mathml.to_utf8)
rescue => e
maruku_error "Invalid MathML TeX: \n#{tex.gsub(/^/, 'tex>')}\n\n #{e.inspect}"
nil
Expand Down
3 changes: 0 additions & 3 deletions lib/maruku/ext/math/mathml_engines/none.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
require 'maruku/string_utils'
require 'nokogiri'

module MaRuKu::Out::HTML
def convert_to_mathml_none(kind, tex)
code = xelem('code')
Expand Down
34 changes: 14 additions & 20 deletions lib/maruku/ext/math/to_html.rb
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,12 @@
#
#=end

require 'nokogiri'

module MaRuKu
module Out
module HTML
# Creates an xml Mathml document of this node's TeX code.
#
# @return Nokogiri::XML::Document]
# @return [MaRuKu::Out::HTML::HTMLElement]
def render_mathml(kind, tex)
engine = get_setting(:html_math_engine)
method = "convert_to_mathml_#{engine}"
Expand Down Expand Up @@ -94,35 +92,32 @@ def adjust_png(png, use_depth)
img['src'] = src
img['style'] = style
img['alt'] = "$#{self.math.strip}$"
img['class'] = 'maruku-png'
img
end

def to_html_inline_math
mathml = get_setting(:html_math_output_mathml) && render_mathml(:inline, self.math)
png = get_setting(:html_math_output_png) && render_png(:inline, self.math)

span = create_html_element 'span'
span['class'] = 'maruku-inline'

if mathml
mathml['class'] = 'maruku-mathml'
return mathml
mathml.add_class('maruku-mathml')
return mathml.to_html
end

if png
img = adjust_png(png, true)
add_class_to(img, 'maruku-png')
span << img
end
png = get_setting(:html_math_output_png) && render_png(:inline, self.math)

span
HTMLElement.new 'span', 'class' => 'maruku-inline' do
# TODO: It seems weird that we output an empty span if there's no PNG
if png
adjust_png(png, true)
end
end
end

def to_html_equation
mathml = get_setting(:html_math_output_mathml) && render_mathml(:equation, self.math)
png = get_setting(:html_math_output_png) && render_png(:equation, self.math)

div = create_html_element 'div'
div = xelem('div')
div['class'] = 'maruku-equation'
if mathml
if self.label # then numerate
Expand All @@ -132,13 +127,12 @@ def to_html_equation
div << span
div['id'] = "eq:#{self.label}"
end
add_class_to(mathml, 'maruku-mathml')
div << mathml
mathml.add_class('maruku-mathml')
div << mathml.to_html
end

if png
img = adjust_png(png, false)
add_class_to(img, 'maruku-png')
div << img
if self.label # then numerate
span = xelem('span')
Expand Down
27 changes: 19 additions & 8 deletions lib/maruku/html.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,15 @@ class NokogiriHTMLFragment
#
# @param raw_html [String] HTML as a string.
def initialize(raw_html)
d = Nokogiri::XML::Document.new

# Make sure the SVG namespace is known
root = Nokogiri::XML::Element.new('html', d)
root.add_namespace('svg', "http://www.w3.org/2000/svg" )

@fragment = Nokogiri::XML::DocumentFragment.new(d, raw_html, d)
# Wrap our HTML in a dummy document with a doctype (just
# for the entity references)
wrapped = '<!DOCTYPE html PUBLIC
"-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
"http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
<html>' + raw_html.strip + '</html>'

d = Nokogiri::XML::Document.parse(wrapped) {|c| c.nonet }
@fragment = d.root
end

# @return The name of the first child element in the fragment.
Expand All @@ -48,6 +50,13 @@ def first_node_name
first_child ? first_child.name : nil
end

# Add a class to the children of this fragment
def add_class(class_name)
@fragment.children.each do |c|
c['class'] = ((c['class']||'').split(' ') + [class_name]).join(' ')
end
end

# Process markdown within the contents of some elements and
# replace their contents with the processed version.
#
Expand Down Expand Up @@ -98,7 +107,9 @@ def process_markdown_inside_elements(doc)
def to_html
output_options = Nokogiri::XML::Node::SaveOptions::DEFAULT_XHTML ^
Nokogiri::XML::Node::SaveOptions::FORMAT
@fragment.to_xml(:save_with => output_options, :encoding => 'UTF-8')
@fragment.children.inject("") do |out, child|
out << child.serialize(:save_with => output_options, :encoding => 'UTF-8')
end
end

private
Expand Down
34 changes: 22 additions & 12 deletions lib/maruku/input/html_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -101,25 +101,35 @@ def eat_this(line)
end
when :inside_script_style
if @m = CData.match(@rest)
my_debug "#{@state}: CDATA: #{@m.to_s.inspect}"
@already << @m.pre_match << @m.to_s
@rest = @m.post_match
self.state = :inside_cdata
if @already.rstrip.end_with?('<![CDATA[')
@already << @m.pre_match
@rest = @m.post_match
else
my_debug "#{@state}: CDATA: #{@m.to_s.inspect}"
@already << @m.pre_match << @m.to_s
@rest = @m.post_match
self.state = :inside_cdata
end
elsif @m = Tag.match(@rest)
is_closing = !!@m[1]
tag = @m[2]
if is_closing && tag == @tag_stack.last
my_debug "#{@state}: matched #{@m.to_s.inspect}"
# TODO: This is necessary for REXML to properly parse
# script tags
# @already << @m.pre_match << "]]>"
@already << @m.pre_match
@rest = @m.post_match
# This is necessary to properly parse
# script tags
@already << "]]>" unless @already.rstrip.end_with?("]]>")
self.state = :inside_element
handle_tag
handle_tag false # don't double-add pre_match
else
@already << @rest
@rest = ""
end
elsif @m = EverythingElse.match(@rest)
my_debug "#{@state}: Everything: #{@m.to_s.inspect}"
@already << @m.pre_match << @m.to_s
@rest = @m.post_match
else
@already << @rest
@rest = ""
Expand All @@ -132,8 +142,8 @@ def eat_this(line)
end
end

def handle_tag
@already << @m.pre_match
def handle_tag(add_pre_match = true)
@already << @m.pre_match if add_pre_match
@rest = @m.post_match

is_closing = !!@m[1]
Expand Down Expand Up @@ -177,9 +187,9 @@ def handle_tag
end

if %w(script style).include?(@tag_stack.last)
# TODO: This is necessary for REXML to properly parse
# This is necessary to properly parse
# script tags
# @already << "<![CDATA["
@already << "<![CDATA["
self.state = :inside_script_style
end
end
Expand Down