"
end
+
+ private
+
+ def children_to_docx
+ ""
+ end
end
end
end
diff --git a/lib/sablon/html/ast_builder.rb b/lib/sablon/html/ast_builder.rb
new file mode 100644
index 00000000..a0e4130e
--- /dev/null
+++ b/lib/sablon/html/ast_builder.rb
@@ -0,0 +1,90 @@
+module Sablon
+ class HTMLConverter
+ # Converts a nokogiri HTML fragment into an equivalent AST structure
+ class ASTBuilder
+ attr_reader :nodes
+
+ def self.html_to_ast(env, nodes, properties)
+ builder = new(env, nodes, properties)
+ builder.nodes
+ end
+
+ private
+
+ def initialize(env, nodes, properties)
+ @env = env
+ @nodes = process_nodes(nodes, properties).compact
+ end
+
+ # Loops over HTML nodes converting them to their configured AST class
+ def process_nodes(html_nodes, properties)
+ html_nodes.flat_map do |node|
+ # get tags from config
+ parent_tag = fetch_tag(node.parent.name) if node.parent.name
+ tag = fetch_tag(node.name)
+
+ # remove all text nodes if the tag doesn't accept them
+ node.search('./text()').remove if drop_text?(tag)
+
+ # check node hierarchy
+ validate_structure(parent_tag, tag)
+
+ # merge properties
+ local_props = merge_node_properties(node, tag, properties)
+ if tag.ast_class
+ tag.ast_class.new(@env, node, local_props)
+ else
+ process_nodes(node.children, local_props)
+ end
+ end
+ end
+
+ # retrieves a HTMLTag instance from the cpermitted_html_tags hash or
+ # raises an ArgumentError if the tag is not registered in the hash
+ def fetch_tag(tag_name)
+ tag_name = tag_name.to_sym
+ unless Sablon::Configuration.instance.permitted_html_tags[tag_name]
+ raise ArgumentError, "Don't know how to handle HTML tag: #{tag_name}"
+ end
+ Sablon::Configuration.instance.permitted_html_tags[tag_name]
+ end
+
+ # Checking that the current tag is an allowed child of the parent_tag.
+ # If the parent tag is nil then a block level tag is required.
+ def validate_structure(parent, child)
+ if parent.ast_class == Root && child.type == :inline
+ msg = "#{child.name} needs to be wrapped in a block level tag."
+ elsif parent && !parent.allowed_child?(child)
+ msg = "#{child.name} is not a valid child element of #{parent.name}."
+ else
+ return
+ end
+ raise ContextError, "Invalid HTML structure: #{msg}"
+ end
+
+ # If the node doesn't allow inline elements, or text specifically,
+ # drop all text nodes. This is largely meant to prevent whitespace
+ # between tags from rasing an invalid structure error. Although it
+ # will purge the node whether it contains nonblank characters or not.
+ def drop_text?(child)
+ text = fetch_tag(:text)
+ !child.allowed_child?(text)
+ end
+
+ # Merges node properties in a sppecifc
+ def merge_node_properties(node, tag, parent_properties)
+ # Process any styles, defined on the node into a hash
+ if node['style']
+ style_props = node['style'].split(';').map do |prop|
+ prop.split(':').map(&:strip)
+ end
+ style_props = Hash[style_props]
+ else
+ style_props = {}
+ end
+ # allow inline styles to override parent styles passed down
+ parent_properties.merge(tag.properties).merge(style_props)
+ end
+ end
+ end
+end
diff --git a/lib/sablon/html/converter.rb b/lib/sablon/html/converter.rb
index 05cc48b0..7e5d6f05 100644
--- a/lib/sablon/html/converter.rb
+++ b/lib/sablon/html/converter.rb
@@ -3,69 +3,8 @@
module Sablon
class HTMLConverter
- class ASTBuilder
- Layer = Struct.new(:items, :ilvl)
-
- def initialize(nodes)
- @layers = [Layer.new(nodes, false)]
- @root = Root.new([])
- end
-
- def to_ast
- @root
- end
-
- def new_layer(ilvl: false)
- @layers.push Layer.new([], ilvl)
- end
-
- def next
- current_layer.items.shift
- end
-
- def push(node)
- @layers.last.items.push node
- end
-
- def push_all(nodes)
- nodes.each(&method(:push))
- end
-
- def done?
- !current_layer.items.any?
- end
-
- def nested?
- ilvl > 0
- end
-
- def ilvl
- @layers.select { |layer| layer.ilvl }.size - 1
- end
-
- def emit(node)
- @root.nodes << node
- end
-
- private
-
- def current_layer
- if @layers.any?
- last_layer = @layers.last
- if last_layer.items.any?
- last_layer
- else
- @layers.pop
- current_layer
- end
- else
- Layer.new([], false)
- end
- end
- end
-
def process(input, env)
- @numbering = env.numbering
+ @env = env
processed_ast(input).to_docx
end
@@ -77,75 +16,7 @@ def processed_ast(input)
def build_ast(input)
doc = Nokogiri::HTML.fragment(input)
- @builder = ASTBuilder.new(doc.children)
-
- while !@builder.done?
- ast_next_paragraph
- end
- @builder.to_ast
- end
-
- private
-
- def initialize
- @numbering = nil
- end
-
- def ast_next_paragraph
- node = @builder.next
- if node.name == 'div'
- @builder.new_layer
- @builder.emit Paragraph.new('Normal', ast_text(node.children))
- elsif node.name == 'p'
- @builder.new_layer
- @builder.emit Paragraph.new('Paragraph', ast_text(node.children))
- elsif node.name =~ /h(\d+)/
- @builder.new_layer
- @builder.emit Paragraph.new("Heading#{$1}", ast_text(node.children))
- elsif node.name == 'ul'
- @builder.new_layer ilvl: true
- unless @builder.nested?
- @definition = @numbering.register('ListBullet')
- end
- @builder.push_all(node.children)
- elsif node.name == 'ol'
- @builder.new_layer ilvl: true
- unless @builder.nested?
- @definition = @numbering.register('ListNumber')
- end
- @builder.push_all(node.children)
- elsif node.name == 'li'
- @builder.new_layer
- @builder.emit ListParagraph.new(@definition.style, ast_text(node.children), @definition.numid, @builder.ilvl)
- elsif node.text?
- # SKIP?
- else
- raise ArgumentError, "Don't know how to handle node: #{node.inspect}"
- end
- end
-
- def ast_text(nodes, format: TextFormat.default)
- runs = nodes.flat_map do |node|
- if node.text?
- Text.new(node.text, format)
- elsif node.name == 'br'
- Newline.new
- elsif node.name == 'span'
- ast_text(node.children).nodes
- elsif node.name == 'strong' || node.name == 'b'
- ast_text(node.children, format: format.with_bold).nodes
- elsif node.name == 'em' || node.name == 'i'
- ast_text(node.children, format: format.with_italic).nodes
- elsif node.name == 'u'
- ast_text(node.children, format: format.with_underline).nodes
- elsif ['ul', 'ol', 'p', 'div'].include?(node.name)
- @builder.push(node)
- nil
- else
- raise ArgumentError, "Don't know how to handle node: #{node.inspect}"
- end
- end
- Collection.new(runs.compact)
+ Root.new(@env, doc)
end
end
end
diff --git a/lib/sablon/processor/document.rb b/lib/sablon/processor/document.rb
index 3d5e8fc4..91f56eac 100644
--- a/lib/sablon/processor/document.rb
+++ b/lib/sablon/processor/document.rb
@@ -118,8 +118,8 @@ def self.encloses?(start_field, end_field)
end
class ImageBlock < ParagraphBlock
- def self.parent(node)
- node.ancestors
+ def self.placeholder(node)
+ parent(node).xpath('following-sibling::w:p')
end
def self.encloses?(start_field, end_field)
@@ -133,9 +133,9 @@ def replace(content)
return
end
- pic_prop = self.class.parent(start_field).at_xpath('.//pic:cNvPr', pic: Sablon::Processor::Relationships::PICTURE_NS_URI)
+ pic_prop = self.class.placeholder(start_field).at_xpath('.//pic:cNvPr', pic: Sablon::Processor::Relationships::PICTURE_NS_URI)
pic_prop.attributes['name'].value = content.first.name
- blip = self.class.parent(start_field).at_xpath('.//a:blip', a: Sablon::Processor::Relationships::MAIN_NS_URI)
+ blip = self.class.placeholder(start_field).at_xpath('.//a:blip', a: Sablon::Processor::Relationships::MAIN_NS_URI)
blip.attributes['embed'].value = content.first.rid
start_field.remove
end_field.remove
@@ -196,7 +196,7 @@ def consume(allow_insertion)
when /([^ ]+):if/
block = consume_block("#{$1}:endIf")
Statement::Condition.new(Expression.parse($1), block)
- when /comment/
+ when /^comment$/
block = consume_block("endComment")
Statement::Comment.new(block)
when /^@([^ ]+):start/
diff --git a/sablon.gemspec b/sablon.gemspec
index b5fc8532..0a26932b 100644
--- a/sablon.gemspec
+++ b/sablon.gemspec
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
spec.require_paths = ["lib"]
spec.add_runtime_dependency 'nokogiri', ">= 1.6.0"
- spec.add_runtime_dependency 'rubyzip', ">= 1.1"
+ spec.add_runtime_dependency 'rubyzip', ">= 1.1.1"
spec.add_development_dependency "bundler", ">= 1.6"
spec.add_development_dependency "rake", "~> 10.0"
diff --git a/test/configuration_test.rb b/test/configuration_test.rb
new file mode 100644
index 00000000..d62a659d
--- /dev/null
+++ b/test/configuration_test.rb
@@ -0,0 +1,122 @@
+# -*- coding: utf-8 -*-
+require "test_helper"
+
+class ConfigurationTest < Sablon::TestCase
+ def setup
+ super
+ @config = Sablon::Configuration.send(:new)
+ end
+
+ def test_register_tag
+ options = {
+ ast_class: :paragraph,
+ attributes: { dummy: 'value' },
+ properties: { pstyle: 'ListBullet' },
+ allowed_children: %i[_inline ol ul li]
+ }
+ # test initialization without type
+ tag = @config.register_html_tag(:test_tag, **options)
+ assert_equal @config.permitted_html_tags[:test_tag], tag
+ assert_equal tag.name, :test_tag
+ assert_equal tag.type, :inline
+ assert_equal tag.ast_class, Sablon::HTMLConverter::Paragraph
+ assert_equal tag.attributes, dummy: 'value'
+ assert_equal tag.properties, pstyle: 'ListBullet'
+ assert_equal tag.allowed_children, %i[_inline ol ul li]
+
+ # test initialization with type
+ tag = @config.register_html_tag('test_tag2', :block, **options)
+ assert_equal @config.permitted_html_tags[:test_tag2], tag
+ assert_equal tag.name, :test_tag2
+ assert_equal tag.type, :block
+ end
+
+ def test_remove_tag
+ tag = @config.register_html_tag(:test)
+ assert_equal @config.remove_html_tag(:test), tag
+ assert_nil @config.permitted_html_tags[:test]
+ end
+
+ def test_register_style_converter_on_existing_ast_class
+ converter = ->(v) { return "test-attr-#{v}" }
+ @config.register_style_converter(:run, 'my-test-attr', converter)
+ #
+ assert @config.defined_style_conversions[:run]['my-test-attr'], 'converter should be stored in hash'
+ assert_equal 'test-attr-123', @config.defined_style_conversions[:run]['my-test-attr'].call(123)
+ end
+
+ def test_register_style_converter_on_newast_class
+ converter = ->(v) { return "test-attr-#{v}" }
+ @config.register_style_converter(:unset_ast_class, 'my-test-attr', converter)
+ #
+ assert @config.defined_style_conversions[:unset_ast_class]['my-test-attr'], 'converter should be stored in hash'
+ end
+
+ def test_remove_style_converter
+ converter = ->(v) { return "test-attr-#{v}" }
+ converter = @config.register_style_converter(:run, 'my-test-attr', converter)
+ #
+ assert_equal converter, @config.remove_style_converter(:run, 'my-test-attr')
+ assert_nil @config.defined_style_conversions[:run]['my-test-attr']
+ end
+end
+
+class ConfigurationHTMLTagTest < Sablon::TestCase
+ # test basic instantiation of an HTMLTag
+ def test_html_tag_defaults
+ tag = Sablon::Configuration::HTMLTag.new(:a, :inline)
+ assert_equal tag.name, :a
+ assert_equal tag.type, :inline
+ assert_nil tag.ast_class
+ assert_equal tag.attributes, {}
+ assert_equal tag.properties, {}
+ assert_equal tag.allowed_children, %i[_inline ol ul]
+ end
+
+ # Exercising more of the logic used to conform args into valid
+ def test_html_tag_full_init
+ args = ['a', 'inline', ast_class: Sablon::HTMLConverter::Run]
+ tag = Sablon::Configuration::HTMLTag.new(*args)
+ assert_equal tag.name, :a
+ assert_equal tag.type, :inline
+ assert_equal tag.ast_class, Sablon::HTMLConverter::Run
+ #
+ options = {
+ ast_class: :run,
+ attributes: { dummy: 'value1' },
+ properties: { dummy2: 'value2' },
+ allowed_children: 'text'
+ }
+ tag = Sablon::Configuration::HTMLTag.new('a', 'inline', **options)
+ #
+ assert_equal tag.name, :a
+ assert_equal tag.type, :inline
+ assert_equal tag.ast_class, Sablon::HTMLConverter::Run
+ assert_equal tag.attributes, dummy: 'value1'
+ assert_equal tag.properties, dummy2: 'value2'
+ assert_equal tag.allowed_children, [:text]
+ end
+
+ def test_html_tag_init_block_without_class
+ e = assert_raises ArgumentError do
+ Sablon::Configuration::HTMLTag.new(:form, :block)
+ end
+ assert_equal "Block level tag form must have an AST class.", e.message
+ end
+
+ def test_html_tag_allowed_children
+ # define different tags for testing
+ text = Sablon::Configuration::HTMLTag.new(:text, :inline)
+ div = Sablon::Configuration::HTMLTag.new(:div, :block, ast_class: :paragraph)
+ olist = Sablon::Configuration::HTMLTag.new(:ol, :block, ast_class: :paragraph, allowed_children: %i[_block])
+
+ # test default allowances
+ assert div.allowed_child?(text) # all inline elements allowed
+ assert div.allowed_child?(olist) # tag name is included even though it is bock leve
+ assert_equal div.allowed_child?(div), false # other block elms are not allowed
+
+ # test olist with allowances for all blocks but no inline
+ assert olist.allowed_child?(div) # all block elements allowed
+ assert_equal olist.allowed_child?(text), false # no inline elements
+ end
+end
diff --git a/test/fixtures/html/html_test_content.html b/test/fixtures/html/html_test_content.html
new file mode 100644
index 00000000..6c580071
--- /dev/null
+++ b/test/fixtures/html/html_test_content.html
@@ -0,0 +1,164 @@
+Sablon HTML insertion
+
+Text
+
+
+ Lorem ipsum dolor sit
+ amet, consectetur adipiscing elit.
+ Suspendisse a tempus turpis. Duis urna justo,
+ vehicula vitae ultricies vel, congue at sem. Fusce turpis
+ turpis, aliquet id pulvinar aliquam, iaculis non elit. Nulla feugiat
+ lectus nulla, in dictum ipsum cursus ac. Quisque at odio neque.
+ Sed ac tortor iaculis, bibendum leo ut, malesuada velit. Donec iaculis
+ sed urna eget pharetra. Praesent ornare fermentum turpis, placerat
+ iaculis urna bibendum vitae. Nunc in quam consequat, tristique tellus in,
+ commodo turpis. Curabitur ullamcorper odio purus, lobortis egestas magna
+ laoreet vitae. Nunc fringilla velit ante, eu aliquam nisi cursus vitae.
+ Suspendisse sit amet dui egestas, volutpat
+ nisi vel, mattis justo. Nullam pellentesque, ipsum eget blandit pharetra,
+ augue elit aliquam mauris, vel mollis nisl augue ut
+ ipsum.
+
+
+Lists
+
+
+ -
+ Vestibulum
+
+ - ante ipsum primis
+
+
+ -
+ in faucibus orci luctus
+
+ - et ultrices posuere cubilia Curae;
+
+ - Aliquam vel dolor
+ - sed sem maximus
+
+
+ -
+ fermentum in non odio.
+
+ - Fusce hendrerit ornare mollis.
+
+
+ - Nunc scelerisque nibh nec turpis tempor pulvinar.
+
+
+ - Donec eros turpis,
+ -
+ aliquet vel volutpat sit amet,
+
+ - semper eu purus.
+ -
+ Proin ac erat nec urna efficitur vulputate.
+
+ - Quisque varius convallis ultricies.
+ - Nullam vel fermentum eros.
+
+
+
+
+
+
+
+ Pellentesque nulla leo, auctor ornare erat sed, rhoncus congue diam.
+ Duis non porttitor nulla, ut eleifend enim. Pellentesque non tempor sem.
+
+
+Mauris auctor egestas arcu,
+
+
+ - id venenatis nibh dignissim id.
+ - In non placerat metus.
+
+
+
+ - Nunc sed consequat metus.
+ - Nulla consectetur lorem consequat,
+ - malesuada dui at, lacinia lectus.
+
+
+
+ - Aliquam efficitur
+ - lorem a mauris feugiat,
+ - at semper eros pellentesque.
+
+
+
+ Nunc lacus diam, consectetur ut odio sit amet, placerat pharetra erat.
+ Sed commodo ut sem id congue. Sed eget neque elit. Curabitur at erat tortor.
+ Maecenas eget sapien vitae est sagittis accumsan et nec orci. Integer
+ luctus at nisl eget venenatis. Nunc nunc eros, consectetur at tortor et,
+ tristique ultrices elit. Nulla in turpis nibh.
+
+
+
+ -
+ Nam consectetur
+
+
+ -
+ Aenean
+
+
+ - Duis faucibus nunc nec venenatis faucibus.
+ - Aliquam erat volutpat.
+
+
+ Quisque non neque ut lacus eleifend volutpat quis sed lacus.
+
Praesent ultrices purus eu quam elementum, sit amet faucibus elit
+ interdum. In lectus orci,
elementum quis dictum ac, porta ac ante.
+ Fusce tempus ac mauris id cursus. Phasellus a erat nulla. Mauris dolor orci,
+ malesuada auctor dignissim non, posuere nec odio. Etiam hendrerit
+ justo nec diam ullamcorper, nec blandit elit sodales.
+
+
+
+
+ Ut eget auctor enim.
+ Quisque id
+ neque eu nibh feugiat imperdiet
+ id ut dui. Ut auctor libero eget
+ massa tristique pharetra. Cras tincidunt finibus sapien, ut maximus
+ tortor tempor at. Proin pulvinar
+ pretium justo vitae malesuada. Suspendisse porta purus eget tortor
+ tincidunt vestibulum. Maecenas id egestas purus, quis vulputate
+ lacus. Quisque non
+ eleifend est.
+
+
+
+ - Item 1
+ - Item 2
+
+ - Nested 1
+ -
+ Nested 2
+
+ - Nested 2.1
+ - Nested 2.2
+ - Nested 2.3
+
+
+
+ - Item 3
+
diff --git a/test/fixtures/html_sample.docx b/test/fixtures/html_sample.docx
index 2a7b8879..abe2b44c 100644
Binary files a/test/fixtures/html_sample.docx and b/test/fixtures/html_sample.docx differ
diff --git a/test/fixtures/xml/comment_block_and_comment_as_key.xml b/test/fixtures/xml/comment_block_and_comment_as_key.xml
new file mode 100644
index 00000000..0eae8a5d
--- /dev/null
+++ b/test/fixtures/xml/comment_block_and_comment_as_key.xml
@@ -0,0 +1,31 @@
+Before
+
+
+
+
+ «comment»
+
+
+
+
+
+ Inside Comment!
+
+
+
+
+
+
+ «endComment»
+
+
+
+
+
+
+
+ «=comment»
+
+
+
+After
\ No newline at end of file
diff --git a/test/html/ast_builder_test.rb b/test/html/ast_builder_test.rb
new file mode 100644
index 00000000..021622db
--- /dev/null
+++ b/test/html/ast_builder_test.rb
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+require "test_helper"
+
+# Tests some low level private methods in the ASTBuilder class. #process_nodes
+# and self.html_to_ast are covered extensively in converter_test.rb
+class HTMLConverterASTBuilderTest < Sablon::TestCase
+ def setup
+ super
+ @env = Sablon::Environment.new(nil)
+ end
+
+ def test_fetch_tag
+ @bulider = new_builder
+ tag = Sablon::Configuration.instance.permitted_html_tags[:span]
+ assert_equal @bulider.send(:fetch_tag, :span), tag
+ # check that strings are converted into symbols
+ assert_equal @bulider.send(:fetch_tag, 'span'), tag
+ # test uknown tag raises error
+ e = assert_raises ArgumentError do
+ @bulider.send(:fetch_tag, :unknown_tag)
+ end
+ assert_equal "Don't know how to handle HTML tag: unknown_tag", e.message
+ end
+
+ def test_validate_structure
+ @bulider = new_builder
+ root = Sablon::Configuration.instance.permitted_html_tags['#document-fragment'.to_sym]
+ div = Sablon::Configuration.instance.permitted_html_tags[:div]
+ span = Sablon::Configuration.instance.permitted_html_tags[:span]
+ # test valid relationship
+ assert_nil @bulider.send(:validate_structure, div, span)
+ # test inverted relationship
+ e = assert_raises ArgumentError do
+ @bulider.send(:validate_structure, span, div)
+ end
+ assert_equal "Invalid HTML structure: div is not a valid child element of span.", e.message
+ # test inline tag with no parent
+ e = assert_raises ArgumentError do
+ @bulider.send(:validate_structure, root, span)
+ end
+ assert_equal "Invalid HTML structure: span needs to be wrapped in a block level tag.", e.message
+ end
+
+ def test_merge_properties
+ @builder = new_builder
+ node = Nokogiri::HTML.fragment('Test').children[0]
+ tag = Struct.new(:properties).new(rStyle: 'Normal')
+ # test that properties are merged across all three arguments
+ props = @builder.send(:merge_node_properties, node, tag, 'background-color' => '#00F')
+ assert_equal({ 'background-color' => '#00F', rStyle: 'Normal', 'color' => '#F00', 'text-decoration' => 'underline wavy' }, props)
+ # test that parent properties are overriden by tag properties
+ props = @builder.send(:merge_node_properties, node, tag, rStyle: 'Citation', 'background-color' => '#00F')
+ assert_equal({ 'background-color' => '#00F', rStyle: 'Normal', 'color' => '#F00', 'text-decoration' => 'underline wavy' }, props)
+ # test that inline properties override parent styles
+ node = Nokogiri::HTML.fragment('Test').children[0]
+ props = @builder.send(:merge_node_properties, node, tag, 'color' => '#00F')
+ assert_equal({ rStyle: 'Normal', 'color' => '#F00' }, props)
+ end
+
+ private
+
+ def new_builder(nodes = [], properties = {})
+ Sablon::HTMLConverter::ASTBuilder.new(@env, nodes, properties)
+ end
+end
diff --git a/test/html/ast_test.rb b/test/html/ast_test.rb
new file mode 100644
index 00000000..c232ec67
--- /dev/null
+++ b/test/html/ast_test.rb
@@ -0,0 +1,117 @@
+# -*- coding: utf-8 -*-
+require "test_helper"
+
+class HTMLConverterASTTest < Sablon::TestCase
+ def setup
+ super
+ @converter = Sablon::HTMLConverter.new
+ @converter.instance_variable_set(:@env, Sablon::Environment.new(nil))
+ end
+
+ def test_div
+ input = 'Lorem ipsum dolor sit amet
'
+ ast = @converter.processed_ast(input)
+ assert_equal ']>]>', ast.inspect
+ end
+
+ def test_p
+ input = 'Lorem ipsum dolor sit amet
'
+ ast = @converter.processed_ast(input)
+ assert_equal ']>]>', ast.inspect
+ end
+
+ def test_b
+ input = 'Lorem ipsum dolor sit amet
'
+ ast = @converter.processed_ast(input)
+ assert_equal ', ]>]>', ast.inspect
+ end
+
+ def test_i
+ input = 'Lorem ipsum dolor sit amet
'
+ ast = @converter.processed_ast(input)
+ assert_equal ', ]>]>', ast.inspect
+ end
+
+ def test_br_in_strong
+ input = 'Lorem
ipsum
dolor
'
+ par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
+ assert_equal "[, , , , ]", par.runs.inspect
+ end
+
+ def test_br_in_em
+ input = 'Lorem
ipsum
dolor
'
+ par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
+ assert_equal "[, , , , ]", par.runs.inspect
+ end
+
+ def test_nested_strong_and_em
+ input = 'Lorem ipsum dolor
'
+ par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
+ assert_equal "[, , ]", par.runs.inspect
+ end
+
+ def test_ignore_last_br_in_div
+ input = 'Lorem ipsum dolor sit amet
'
+ par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
+ assert_equal "[]", par.runs.inspect
+ end
+
+ def test_ignore_br_in_blank_div
+ input = '
'
+ par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
+ assert_equal "[]", par.runs.inspect
+ end
+
+ def test_headings
+ input = 'First
Second
Third
'
+ ast = @converter.processed_ast(input)
+ assert_equal "]>, ]>, ]>]>", ast.inspect
+ end
+
+ def test_h_with_formatting
+ input = 'Lorem ipsum dolor sit amet
'
+ ast = @converter.processed_ast(input)
+ assert_equal ", , , ]>]>", ast.inspect
+ end
+
+ def test_ul
+ input = ''
+ ast = @converter.processed_ast(input)
+ assert_equal "]>, ]>]>]>", ast.inspect
+ end
+
+ def test_ol
+ input = '- Lorem
- ipsum
'
+ ast = @converter.processed_ast(input)
+ assert_equal "]>, ]>]>]>", ast.inspect
+ end
+
+ def test_num_id
+ ast = @converter.processed_ast('- Some
- Lorem
- dolor
- sit
')
+ assert_equal %w[1001 1001 1002 1003 1003], get_numpr_prop_from_ast(ast, :numId)
+ end
+
+ def test_nested_lists_have_the_same_numid
+ ast = @converter.processed_ast('')
+ assert_equal %w[1001 1001 1001], get_numpr_prop_from_ast(ast, :numId)
+ end
+
+ def test_keep_nested_list_order
+ input = ''
+ ast = @converter.processed_ast(input)
+ assert_equal %w[1001], get_numpr_prop_from_ast(ast, :numId).uniq
+ assert_equal %w[0 1 2 1 0 1 2], get_numpr_prop_from_ast(ast, :ilvl)
+ end
+
+ private
+
+ # returns the numid attribute from paragraphs
+ def get_numpr_prop_from_ast(ast, key)
+ values = []
+ ast.grep(Sablon::HTMLConverter::ListParagraph).each do |para|
+ numpr = para.instance_variable_get('@properties')[:numPr]
+ numpr.each { |val| values.push(val[key]) if val[key] }
+ end
+ values
+ end
+end
diff --git a/test/html/converter_test.rb b/test/html/converter_test.rb
index d2d619c5..b0193192 100644
--- a/test/html/converter_test.rb
+++ b/test/html/converter_test.rb
@@ -92,7 +92,7 @@ def test_convert_u_tags_inside_p
Lorem
-
+
ipsum dolor
sit amet
@@ -114,6 +114,54 @@ def test_convert_em_tags_inside_div
assert_equal normalize_wordml(expected_output), process(input)
end
+ def test_convert_s_tags_inside_p
+ input = 'Lorem ipsum dolor sit amet
'
+ expected_output = <<-DOCX.strip
+
+
+ Lorem
+
+
+ ipsum dolor
+
+ sit amet
+
+ DOCX
+ assert_equal normalize_wordml(expected_output), process(input)
+ end
+
+ def test_convert_sub_tags_inside_p
+ input = 'Lorem ipsum dolor sit amet
'
+ expected_output = <<-DOCX.strip
+
+
+ Lorem
+
+
+ ipsum dolor
+
+ sit amet
+
+ DOCX
+ assert_equal normalize_wordml(expected_output), process(input)
+ end
+
+ def test_convert_sup_tags_inside_p
+ input = 'Lorem ipsum dolor sit amet
'
+ expected_output = <<-DOCX.strip
+
+
+ Lorem
+
+
+ ipsum dolor
+
+ sit amet
+
+ DOCX
+ assert_equal normalize_wordml(expected_output), process(input)
+ end
+
def test_convert_br_tags_inside_strong
input = '
Lorem ipsum
dolor sit amet
'
expected_output = <<-DOCX
@@ -310,6 +358,13 @@ def test_nested_unordered_lists
assert_equal [Sablon::Numbering::Definition.new(1001, 'ListBullet')], @numbering.definitions
end
+ def test_unknown_tag
+ e = assert_raises ArgumentError do
+ process('')
+ end
+ assert_match(/Don't know how to handle HTML tag:/, e.message)
+ end
+
private
def process(input)
@@ -321,106 +376,329 @@ def normalize_wordml(wordml)
end
end
-class HTMLConverterASTTest < Sablon::TestCase
+class HTMLConverterStyleTest < Sablon::TestCase
def setup
super
+ @env = Sablon::Environment.new(nil)
@converter = Sablon::HTMLConverter.new
- @converter.instance_variable_set(:@numbering, Sablon::Environment.new(nil).numbering)
end
- def test_div
- input = 'Lorem ipsum dolor sit amet
'
- ast = @converter.processed_ast(input)
- assert_equal ']>]>', ast.inspect
+ # testing direct CSS style -> WordML conversion for paragraphs
+
+ def test_paragraph_with_background_color
+ input = ''
+ expected_output = para_with_ppr('')
+ assert_equal normalize_wordml(expected_output), process(input)
end
- def test_p
- input = 'Lorem ipsum dolor sit amet
'
- ast = @converter.processed_ast(input)
- assert_equal ']>]>', ast.inspect
+ def test_paragraph_with_borders
+ # Basic single line black border
+ input = ''
+ ppr = <<-DOCX.strip
+
+
+
+
+
+
+ DOCX
+ expected_output = para_with_ppr(ppr)
+ assert_equal normalize_wordml(expected_output), process(input)
+ # border with a line style
+ input = ''
+ ppr = <<-DOCX.strip
+
+
+
+
+
+
+ DOCX
+ expected_output = para_with_ppr(ppr)
+ assert_equal normalize_wordml(expected_output), process(input)
+ # border with line style and color
+ input = ''
+ ppr = <<-DOCX.strip
+
+
+
+
+
+
+ DOCX
+ expected_output = para_with_ppr(ppr)
+ assert_equal normalize_wordml(expected_output), process(input)
end
- def test_b
- input = 'Lorem ipsum dolor sit amet
'
- ast = @converter.processed_ast(input)
- assert_equal ', ]>]>', ast.inspect
+ def test_paragraph_with_text_align
+ input = ''
+ expected_output = para_with_ppr('')
+ assert_equal normalize_wordml(expected_output), process(input)
end
- def test_i
- input = 'Lorem ipsum dolor sit amet
'
- ast = @converter.processed_ast(input)
- assert_equal ', ]>]>', ast.inspect
+ def test_paragraph_with_vertical_align
+ input = ''
+ expected_output = para_with_ppr('')
+ assert_equal normalize_wordml(expected_output), process(input)
end
- def test_br_in_strong
- input = 'Lorem
ipsum
dolor
'
- par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
- assert_equal "[, , , , ]", par.runs.inspect
+ def test_paragraph_with_unsupported_property
+ input = ''
+ expected_output = para_with_ppr('')
+ assert_equal normalize_wordml(expected_output), process(input)
end
- def test_br_in_em
- input = 'Lorem
ipsum
dolor
'
- par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
- assert_equal "[, , , , ]", par.runs.inspect
+ def test_run_with_background_color
+ input = 'test
'
+ expected_output = run_with_rpr('')
+ assert_equal normalize_wordml(expected_output), process(input)
end
- def test_nested_strong_and_em
- input = 'Lorem ipsum dolor
'
- par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
- assert_equal "[, , ]", par.runs.inspect
+ def test_run_with_color
+ input = 'test
'
+ expected_output = run_with_rpr('')
+ assert_equal normalize_wordml(expected_output), process(input)
end
- def test_ignore_last_br_in_div
- input = 'Lorem ipsum dolor sit amet
'
- par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
- assert_equal "[]", par.runs.inspect
+ def test_run_with_font_size
+ input = 'test
'
+ expected_output = run_with_rpr('')
+ assert_equal normalize_wordml(expected_output), process(input)
+
+ # test that non-numeric are ignored
+ input = 'test
'
+ assert_equal normalize_wordml(expected_output), process(input)
+
+ # test that floats round up
+ input = 'test
'
+ assert_equal normalize_wordml(expected_output), process(input)
+ end
+
+ def test_run_with_font_style
+ input = 'test
'
+ expected_output = run_with_rpr('')
+ assert_equal normalize_wordml(expected_output), process(input)
+
+ # test that non-numeric are ignored
+ input = 'test
'
+ expected_output = run_with_rpr('')
+ assert_equal normalize_wordml(expected_output), process(input)
+ end
+
+ def test_run_with_font_wieght
+ input = 'test
'
+ expected_output = run_with_rpr('')
+ assert_equal normalize_wordml(expected_output), process(input)
+ end
+
+ def test_run_with_text_decoration
+ # testing underline configurations
+ input = 'test
'
+ expected_output = run_with_rpr('')
+ assert_equal normalize_wordml(expected_output), process(input)
+
+ input = 'test
'
+ expected_output = run_with_rpr('')
+ assert_equal normalize_wordml(expected_output), process(input)
+
+ input = 'test
'
+ expected_output = run_with_rpr('')
+ assert_equal normalize_wordml(expected_output), process(input)
+
+ # testing line-through
+ input = 'test
'
+ expected_output = run_with_rpr('')
+ assert_equal normalize_wordml(expected_output), process(input)
+
+ # testing that unsupported values are passed through as a toggle
+ input = 'test
'
+ expected_output = run_with_rpr('')
+ assert_equal normalize_wordml(expected_output), process(input)
+
+ input = 'test
'
+ expected_output = run_with_rpr('')
+ assert_equal normalize_wordml(expected_output), process(input)
+ end
+
+ def test_run_with_vertical_align
+ input = 'test
'
+ expected_output = run_with_rpr('')
+ assert_equal normalize_wordml(expected_output), process(input)
+
+ input = 'test
'
+ expected_output = run_with_rpr('')
+ assert_equal normalize_wordml(expected_output), process(input)
end
- def test_ignore_br_in_blank_div
- input = '
'
- par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
- assert_equal "[]", par.runs.inspect
+ def test_run_with_unsupported_property
+ input = 'test
'
+ expected_output = 'test'
+ assert_equal normalize_wordml(expected_output), process(input)
+ end
+
+ # tests with nested runs and styles
+
+ def test_paragraph_props_passed_to_runs
+ input = 'Loremipsum
'
+ expected_output = <<-DOCX.strip
+
+
+
+
+
+
+
+
+
+ Lorem
+
+
+
+
+
+ ipsum
+
+
+ DOCX
+ assert_equal normalize_wordml(expected_output), process(input)
+ end
+
+ def test_run_prop_override_paragraph_prop
+ input = 'Loremipsum
'
+ expected_output = <<-DOCX.strip
+
+
+
+
+
+
+
+
+
+ Lorem
+
+
+
+
+
+ ipsum
+
+
+ DOCX
+ assert_equal normalize_wordml(expected_output), process(input)
end
- def test_headings
- input = 'First
Second
Third
'
- ast = @converter.processed_ast(input)
- assert_equal "]>, ]>, ]>]>", ast.inspect
+ def test_inline_style_overrides_tag_style
+ # Note: a toggle property can not be removed once it becomes a symbol
+ # unless there is a specific CSS style that will set it to false. This
+ # is because CSS styles can only override parent properties not remove them.
+ input = 'test
'
+ expected_output = run_with_rpr('')
+ assert_equal normalize_wordml(expected_output), process(input)
end
- def test_h_with_formatting
- input = 'Lorem ipsum dolor sit amet
'
- ast = @converter.processed_ast(input)
- assert_equal ", , , ]>]>", ast.inspect
+ def test_conversion_of_a_registered_tag_without_ast_class
+ # This registers a new tag with the configuration object and then trys
+ # to convert it
+ Sablon.configure do |config|
+ config.register_html_tag(:bgcyan, :inline, properties: { 'highlight' => { val: 'cyan' } })
+ end
+ #
+ input = 'test
'
+ expected_output = run_with_rpr('')
+ assert_equal normalize_wordml(expected_output), process(input)
+
+ # remove the tag to avoid any accidental side effects
+ Sablon.configure do |config|
+ config.remove_html_tag(:bgcyan)
+ end
+ end
+
+ def test_conversion_of_a_registered_tag_with_ast_class
+ Sablon.configure do |config|
+ # create the AST class and then pass it onto the register tag method
+ ast_class = Class.new(Sablon::HTMLConverter::Node) do
+ def self.name
+ 'TestInstr'
+ end
+
+ def initialize(_env, node, _properties)
+ @content = node.text
+ end
+
+ def inspect
+ @content
+ end
+
+ def to_docx
+ " #{@content} "
+ end
+ end
+ #
+ config.register_html_tag(:test_instr, :inline, ast_class: ast_class)
+ end
+ #
+ input = 'test
'
+ expected_output = <<-DOCX.strip
+
+
+
+
+ test
+
+ DOCX
+ assert_equal normalize_wordml(expected_output), process(input)
+
+ # remove the tag to avoid any accidental side effects
+ Sablon.configure do |config|
+ config.remove_html_tag(:test_instr)
+ end
end
- def test_ul
- input = ''
- ast = @converter.processed_ast(input)
- assert_equal "]>, ]>]>", ast.inspect
+ def test_conversion_of_registered_style_attribute
+ Sablon.configure do |config|
+ converter = ->(v) { return :highlight, v }
+ config.register_style_converter(:run, 'test-highlight', converter)
+ end
+ #
+ input = 'test
'
+ expected_output = run_with_rpr('')
+ assert_equal normalize_wordml(expected_output), process(input)
+ #
+ Sablon.configure do |config|
+ config.remove_style_converter(:run, 'test-highlight')
+ end
end
- def test_ol
- input = '- Lorem
- ipsum
'
- ast = @converter.processed_ast(input)
- assert_equal "]>, ]>]>", ast.inspect
+ private
+
+ def process(input)
+ @converter.process(input, @env)
end
- def test_num_id
- ast = @converter.processed_ast('- Some
- Lorem
- dolor
- sit
')
- assert_equal [1001, 1001, 1002, 1003, 1003], ast.grep(Sablon::HTMLConverter::ListParagraph).map(&:numid)
+ def para_with_ppr(ppr_str)
+ para_str = '%s'
+ format(para_str, ppr_str)
end
- def test_nested_lists_have_the_same_numid
- ast = @converter.processed_ast('')
- assert_equal [1001, 1001, 1001], ast.grep(Sablon::HTMLConverter::ListParagraph).map(&:numid)
+ def run_with_rpr(rpr_str)
+ para_str = <<-DOCX.strip
+
+
+
+
+
+
+ %s
+
+ test
+
+
+ DOCX
+ format(para_str, rpr_str)
end
- def test_keep_nested_list_order
- input = ''
- ast = @converter.processed_ast(input)
- list_p = ast.grep(Sablon::HTMLConverter::ListParagraph)
- assert_equal [1001], list_p.map(&:numid).uniq
- assert_equal [0, 1, 2, 1, 0, 1, 2], list_p.map(&:ilvl)
+ def normalize_wordml(wordml)
+ wordml.gsub(/^\s+/, '').tr("\n", '')
end
end
diff --git a/test/html/node_properties_test.rb b/test/html/node_properties_test.rb
new file mode 100644
index 00000000..2f9b48cc
--- /dev/null
+++ b/test/html/node_properties_test.rb
@@ -0,0 +1,113 @@
+# -*- coding: utf-8 -*-
+require "test_helper"
+
+class NodePropertiesTest < Sablon::TestCase
+ def setup
+ # struct to simplify prop whitelisting during tests
+ @inc_props = Struct.new(:props) do
+ def include?(*)
+ true
+ end
+ end
+ end
+
+ def test_empty_node_properties_converison
+ # test empty properties
+ props = Sablon::HTMLConverter::NodeProperties.new('w:pPr', {}, @inc_props.new)
+ assert_equal props.inspect, ''
+ assert_nil props.to_docx
+ end
+
+ def test_simple_node_property_converison
+ props = { 'pStyle' => 'Paragraph' }
+ props = Sablon::HTMLConverter::NodeProperties.new('w:pPr', props, @inc_props.new)
+ assert_equal props.inspect, 'pStyle=Paragraph'
+ assert_equal props.to_docx, ''
+ end
+
+ def test_node_property_with_nil_value_converison
+ props = { 'b' => nil }
+ props = Sablon::HTMLConverter::NodeProperties.new('w:rPr', props, @inc_props.new)
+ assert_equal props.inspect, 'b'
+ assert_equal props.to_docx, ''
+ end
+
+ def test_node_property_with_hash_value_converison
+ props = { 'shd' => { color: 'clear', fill: '123456', test: nil } }
+ props = Sablon::HTMLConverter::NodeProperties.new('w:rPr', props, @inc_props.new)
+ assert_equal props.inspect, 'shd={:color=>"clear", :fill=>"123456", :test=>nil}'
+ assert_equal props.to_docx, ''
+ end
+
+ def test_node_property_with_array_value_converison
+ props = { 'numPr' => [{ 'ilvl' => 1 }, { 'numId' => 34 }] }
+ props = Sablon::HTMLConverter::NodeProperties.new('w:pPr', props, @inc_props.new)
+ assert_equal props.inspect, 'numPr=[{"ilvl"=>1}, {"numId"=>34}]'
+ assert_equal props.to_docx, ''
+ end
+
+ def test_complex_node_properties_conversion
+ props = {
+ 'top1' => 'val1',
+ 'top2' => [
+ { 'mid0' => nil },
+ { 'mid1' => [
+ { 'bottom1' => { key1: 'abc' } },
+ { 'bottom2' => 'xyz' }
+ ] },
+ { 'mid2' => 'val2' }
+ ],
+ 'top3' => { key1: 1, key2: '2', key3: nil, key4: true, key5: false }
+ }
+ output = <<-DOCX.gsub(/^\s*/, '').delete("\n")
+
+
+
+
+
+
+
+
+
+
+
+
+ DOCX
+ props = Sablon::HTMLConverter::NodeProperties.new('w:pPr', props, @inc_props.new)
+ assert_equal props.to_docx, output
+ end
+
+ def test_setting_property_value
+ props = {}
+ props = Sablon::HTMLConverter::NodeProperties.new('w:pPr', props, @inc_props.new)
+ props['rStyle'] = 'FootnoteText'
+ assert_equal({ 'rStyle' => 'FootnoteText' }, props.instance_variable_get(:@properties))
+ end
+
+ def test_properties_filtered_on_init
+ props = { 'pStyle' => 'Paragraph', 'rStyle' => 'EndnoteText' }
+ props = Sablon::HTMLConverter::NodeProperties.new('w:rPr', props, %w[rStyle])
+ assert_equal({ 'rStyle' => 'EndnoteText' }, props.instance_variable_get(:@properties))
+ end
+
+ def test_transferred_properties
+ props = { 'pStyle' => 'Paragraph', 'rStyle' => 'EndnoteText' }
+ props = Sablon::HTMLConverter::NodeProperties.new(nil, props, %w[pStyle])
+ trans = props.transferred_properties
+ assert_equal({ 'rStyle' => 'EndnoteText' }, trans)
+ end
+
+ def test_node_properties_paragraph_factory
+ props = { 'pStyle' => 'Paragraph' }
+ props = Sablon::HTMLConverter::NodeProperties.paragraph(props)
+ assert_equal 'pStyle=Paragraph', props.inspect
+ assert_equal props.to_docx, ''
+ end
+
+ def test_node_properties_run_factory
+ props = { 'color' => 'FF00FF' }
+ props = Sablon::HTMLConverter::NodeProperties.run(props)
+ assert_equal 'color=FF00FF', props.inspect
+ assert_equal '', props.to_docx
+ end
+end
diff --git a/test/html_test.rb b/test/html_test.rb
index 258890e7..afd89ee4 100644
--- a/test/html_test.rb
+++ b/test/html_test.rb
@@ -1,9 +1,10 @@
# -*- coding: utf-8 -*-
require "test_helper"
-require "support/xml_snippets"
+require "support/html_snippets"
class SablonHTMLTest < Sablon::TestCase
include Sablon::Test::Assertions
+ include HTMLSnippets
def setup
super
@@ -16,7 +17,7 @@ def test_generate_document_from_template_with_styles_and_html
template_path = @base_path + "fixtures/insertion_template.docx"
output_path = @base_path + "sandbox/html.docx"
template = Sablon.template template_path
- context = {'html:content' => content}
+ context = { 'html:content' => content }
template.render_to_file output_path, context
assert_docx_equal @sample_path, output_path
@@ -26,7 +27,7 @@ def test_generate_document_from_template_without_styles_and_html
template_path = @base_path + "fixtures/insertion_template_no_styles.docx"
output_path = @base_path + "sandbox/html_no_styles.docx"
template = Sablon.template template_path
- context = {'html:content' => content}
+ context = { 'html:content' => content }
e = assert_raises(ArgumentError) do
template.render_to_file output_path, context
@@ -37,13 +38,12 @@ def test_generate_document_from_template_without_styles_and_html
end
private
+
def content
- <<-HTML
-Sablon HTML insertion
-Text
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse a tempus turpis. Duis urna justo, vehicula vitae ultricies vel, congue at sem. Fusce turpis turpis, aliquet id pulvinar aliquam, iaculis non elit. Nulla feugiat lectus nulla, in dictum ipsum cursus ac. Quisque at odio neque. Sed ac tortor iaculis, bibendum leo ut, malesuada velit. Donec iaculis sed urna eget pharetra. Praesent ornare fermentum turpis, placerat iaculis urna bibendum vitae. Nunc in quam consequat, tristique tellus in, commodo turpis. Curabitur ullamcorper odio purus, lobortis egestas magna laoreet vitae. Nunc fringilla velit ante, eu aliquam nisi cursus vitae. Suspendisse sit amet dui egestas, volutpat nisi vel, mattis justo. Nullam pellentesque, ipsum eget blandit pharetra, augue elit aliquam mauris, vel mollis nisl augue ut ipsum.
-Lists
-- Vestibulum
- ante ipsum primis
- in faucibus orci luctus
- et ultrices posuere cubilia Curae;
- Aliquam vel dolor
- sed sem maximus
- fermentum in non odio.
- Fusce hendrerit ornare mollis.
- Nunc scelerisque nibh nec turpis tempor pulvinar.
- Donec eros turpis,
- aliquet vel volutpat sit amet,
- semper eu purus.
- Proin ac erat nec urna efficitur vulputate.
- Quisque varius convallis ultricies.
- Nullam vel fermentum eros.
Pellentesque nulla leo, auctor ornare erat sed, rhoncus congue diam. Duis non porttitor nulla, ut eleifend enim. Pellentesque non tempor sem.
Mauris auctor egestas arcu,
- id venenatis nibh dignissim id.
- In non placerat metus.
- Nunc sed consequat metus.
- Nulla consectetur lorem consequat,
- malesuada dui at, lacinia lectus.
- Aliquam efficitur
- lorem a mauris feugiat,
- at semper eros pellentesque.
Nunc lacus diam, consectetur ut odio sit amet, placerat pharetra erat. Sed commodo ut sem id congue. Sed eget neque elit. Curabitur at erat tortor. Maecenas eget sapien vitae est sagittis accumsan et nec orci. Integer luctus at nisl eget venenatis. Nunc nunc eros, consectetur at tortor et, tristique ultrices elit. Nulla in turpis nibh.
- Nam consectetur
- Aenean
- Duis faucibus nunc nec venenatis faucibus.
- Aliquam erat volutpat.
Quisque non neque ut lacus eleifend volutpat quis sed lacus.
Praesent ultrices purus eu quam elementum, sit amet faucibus elit interdum. In lectus orci,
elementum quis dictum ac, porta ac ante. Fusce tempus ac mauris id cursus. Phasellus a erat nulla. Mauris dolor orci, malesuada auctor dignissim non, posuere nec odio. Etiam hendrerit justo nec diam ullamcorper, nec blandit elit sodales.
-HTML
+ html_str = snippet('html_test_content')
+ # combine all white space
+ html_str = html_str.gsub(/\s+/, ' ')
+ # clear any white space between block level tags and other content
+ html_str.gsub(%r{\s*<(/?(?:h\d|div|p|br|ul|ol|li).*?)>\s*}, '<\1>')
end
end
diff --git a/test/processor/document_test.rb b/test/processor/document_test.rb
index 9222a47a..a649c239 100644
--- a/test/processor/document_test.rb
+++ b/test/processor/document_test.rb
@@ -502,6 +502,21 @@ def test_image_replacement
document
end
+ def test_comment_block_and_comment_as_key
+ result = process(snippet("comment_block_and_comment_as_key"), {comment: 'Contents of comment key'})
+
+ assert_xml_equal <<-document, result
+ Before
+ After
+
+
+
+ Contents of comment key
+
+
+ document
+ end
+
private
def process(document, context)
diff --git a/test/support/html_snippets.rb b/test/support/html_snippets.rb
new file mode 100644
index 00000000..a36615d2
--- /dev/null
+++ b/test/support/html_snippets.rb
@@ -0,0 +1,9 @@
+module HTMLSnippets
+ def snippet(name)
+ File.read(File.expand_path("#{name}.html", snippet_path))
+ end
+
+ def snippet_path
+ @snippet_path ||= File.expand_path("../../fixtures/html", __FILE__)
+ end
+end