|
3 | 3 | from unittest.case import TestCase
|
4 | 4 | from uuid import uuid4
|
5 | 5 | import json
|
| 6 | +import re |
6 | 7 |
|
7 | 8 | from string_utils import *
|
8 | 9 |
|
@@ -681,6 +682,130 @@ def test_should_count_non_ascii_words(self):
|
681 | 682 | self.assertEqual(words_count('é vero o é falso?'), 5)
|
682 | 683 |
|
683 | 684 |
|
| 685 | +class ContainsHtmlTestCase(TestCase): |
| 686 | + def test_cannot_handle_non_string_objects(self): |
| 687 | + self.assertRaises(TypeError, lambda: contains_html(None)) |
| 688 | + self.assertRaises(TypeError, lambda: contains_html(False)) |
| 689 | + self.assertRaises(TypeError, lambda: contains_html(0)) |
| 690 | + self.assertRaises(TypeError, lambda: contains_html([])) |
| 691 | + self.assertRaises(TypeError, lambda: contains_html({'a': 1})) |
| 692 | + |
| 693 | + def test_handle_empty_strings_as_expected(self): |
| 694 | + self.assertFalse(contains_html('')) |
| 695 | + self.assertFalse(contains_html(' ')) |
| 696 | + |
| 697 | + def test_handle_text_only_as_expected(self): |
| 698 | + self.assertFalse(contains_html('hello world! No html here :)')) |
| 699 | + |
| 700 | + def test_ignores_tag_signs_if_not_valid_tag(self): |
| 701 | + self.assertFalse(contains_html('>No html>')) |
| 702 | + self.assertFalse(contains_html('<No <html')) |
| 703 | + |
| 704 | + def test_is_not_html_tag_if_name_is_missing(self): |
| 705 | + self.assertFalse(contains_html('<>')) |
| 706 | + self.assertFalse(contains_html('<1>')) |
| 707 | + self.assertFalse(contains_html('</123>')) |
| 708 | + self.assertFalse(contains_html('no <> no')) |
| 709 | + self.assertFalse(contains_html('</>')) |
| 710 | + self.assertFalse(contains_html('no </> no')) |
| 711 | + self.assertFalse(contains_html('< />')) |
| 712 | + self.assertFalse(contains_html('< no />')) |
| 713 | + self.assertFalse(contains_html('< />nooooo')) |
| 714 | + self.assertFalse(contains_html('<[nope]>')) |
| 715 | + self.assertFalse(contains_html('<!nope>')) |
| 716 | + self.assertFalse(contains_html('<?nope>')) |
| 717 | + self.assertFalse(contains_html('<#nope>')) |
| 718 | + |
| 719 | + def test_tag_can_be_self_closing_or_not_and_space_before_closing_is_optional(self): |
| 720 | + self.assertTrue(contains_html('one: <br>')) |
| 721 | + self.assertTrue(contains_html('two: <br/>')) |
| 722 | + self.assertTrue(contains_html('three: <br />')) |
| 723 | + |
| 724 | + def test_tag_name_can_contain_dashes_but_not_as_first_char(self): |
| 725 | + self.assertTrue(contains_html('test <my-custom-tag /> this')) |
| 726 | + self.assertFalse(contains_html('test <-> this')) |
| 727 | + self.assertFalse(contains_html('test <---> this')) |
| 728 | + self.assertFalse(contains_html('test <---/> this')) |
| 729 | + self.assertFalse(contains_html('test <-nope/> this')) |
| 730 | + |
| 731 | + def test_html_comment_is_properly_recognized(self): |
| 732 | + self.assertTrue(contains_html('foo bar baz <!-- html comment --> banana')) |
| 733 | + self.assertFalse(contains_html('foo bar baz <!- no html comment -> banana')) |
| 734 | + |
| 735 | + def test_tag_name_cane_even_contain_number_but_not_as_first_char(self): |
| 736 | + self.assertTrue(contains_html('<daitarn3 />')) |
| 737 | + self.assertFalse(contains_html('<3daitarn />')) |
| 738 | + |
| 739 | + def test_detects_doctype(self): |
| 740 | + self.assertTrue(contains_html('<!DOCTYPE html>')) |
| 741 | + |
| 742 | + def test_tag_can_have_properties(self): |
| 743 | + self.assertTrue(contains_html('bla bla <input disabled /> bla bla ')) |
| 744 | + self.assertTrue(contains_html('bla bla <div flex>xxx</div> bla bla ')) |
| 745 | + self.assertTrue(contains_html('bla bla <a one two three />bla bla ')) |
| 746 | + |
| 747 | + def test_tag_properties_can_have_content(self): |
| 748 | + self.assertTrue(contains_html('bla bla <span id="foo">yo</span> bla bla ')) |
| 749 | + self.assertTrue(contains_html('bla bla <div style="width: 300px; height: 50px; background: #000">yo</div>')) |
| 750 | + self.assertTrue(contains_html('bla bla <div id="x" class="container">text</div> bla bla ')) |
| 751 | + |
| 752 | + def test_tag_properties_can_use_single_duble_quotes_or_nothing(self): |
| 753 | + self.assertTrue(contains_html('<span id="foo">yo</span>')) |
| 754 | + self.assertTrue(contains_html('<span id=\'foo\'>yo</span>')) |
| 755 | + self.assertTrue(contains_html('<span id=foo>yo</span>')) |
| 756 | + |
| 757 | + def test_tag_properties_can_have_space_before_or_after_equal_sign(self): |
| 758 | + self.assertTrue(contains_html('<span id ="foo">yo</span>')) |
| 759 | + self.assertTrue(contains_html('<span id= \'foo\'>yo</span>')) |
| 760 | + self.assertTrue(contains_html('<span id = foo>yo</span>')) |
| 761 | + |
| 762 | + def test_tag_can_have_both_simple_and_complex_properties(self): |
| 763 | + self.assertTrue(contains_html('bla bla <div id="x" class="container" boom>text</div>')) |
| 764 | + |
| 765 | + def test_tag_can_have_namespace(self): |
| 766 | + self.assertTrue(contains_html('namespace tag: <dz:foo power="100"></dz:foo>')) |
| 767 | + self.assertTrue(contains_html('namespace tag: <dz:test> content </dz:test>')) |
| 768 | + self.assertTrue(contains_html('namespace tag: <a:test/>')) |
| 769 | + self.assertTrue(contains_html('namespace tag: <dz:banana />')) |
| 770 | + |
| 771 | + def test_tag_can_contains_any_content(self): |
| 772 | + self.assertTrue(contains_html('<html></html>')) |
| 773 | + self.assertTrue(contains_html('<html> content </html>')) |
| 774 | + self.assertTrue(contains_html('<html> <body><p> content </p></body> </html>')) |
| 775 | + |
| 776 | + def test_tag_can_be_multiline(self): |
| 777 | + self.assertTrue(contains_html(''' |
| 778 | + multiline tag here: |
| 779 | + <div |
| 780 | + style="width:200px" |
| 781 | + id="foo" |
| 782 | + class="bar">hello</div> |
| 783 | + ''')) |
| 784 | + |
| 785 | + def test_multiline_are_handled_properly(self): |
| 786 | + self.assertTrue(contains_html(''' |
| 787 | +
|
| 788 | + Text here, followed by html: |
| 789 | +
|
| 790 | + <script> |
| 791 | + document.write('you are fucked!'); |
| 792 | + </script> |
| 793 | +
|
| 794 | + end! |
| 795 | +
|
| 796 | + ''')) |
| 797 | + self.assertFalse(contains_html(''' |
| 798 | +
|
| 799 | + plain text |
| 800 | + here |
| 801 | +
|
| 802 | + ... |
| 803 | +
|
| 804 | + should return false! |
| 805 | +
|
| 806 | + ''')) |
| 807 | + |
| 808 | + |
684 | 809 | # string manipulation tests
|
685 | 810 |
|
686 | 811 | class ReverseTestCase(TestCase):
|
@@ -795,3 +920,59 @@ def test_shuffled_string_should_have_same_len_of_original_one(self):
|
795 | 920 | def test_sorted_strings_should_match(self):
|
796 | 921 | shuffled = shuffle(self.original_string)
|
797 | 922 | self.assertEqual(sorted(self.original_string), sorted(shuffled))
|
| 923 | + |
| 924 | + |
| 925 | +class StripHtmlTestCase(TestCase): |
| 926 | + def test_cannot_handle_non_string_objects(self): |
| 927 | + self.assertRaises(TypeError, lambda: strip_html(None)) |
| 928 | + self.assertRaises(TypeError, lambda: strip_html(False)) |
| 929 | + self.assertRaises(TypeError, lambda: strip_html(0)) |
| 930 | + self.assertRaises(TypeError, lambda: strip_html([])) |
| 931 | + self.assertRaises(TypeError, lambda: strip_html({'a': 1})) |
| 932 | + |
| 933 | + def test_should_return_original_string_if_does_not_contain_html(self): |
| 934 | + self.assertEqual('', strip_html('')) |
| 935 | + self.assertEqual(' hello world ', strip_html(' hello world ')) |
| 936 | + multiline_string = ''' |
| 937 | + > line 1 |
| 938 | + > line 2 |
| 939 | + > line 3 |
| 940 | + ''' |
| 941 | + self.assertEqual(multiline_string, strip_html(multiline_string)) |
| 942 | + |
| 943 | + def test_should_remove_html_tags(self): |
| 944 | + self.assertEqual('foo bar', strip_html('foo <br> bar')) |
| 945 | + self.assertEqual('foo bar', strip_html('foo <br/> bar')) |
| 946 | + self.assertEqual('foo bar', strip_html('foo <br /> bar')) |
| 947 | + self.assertEqual(' ', strip_html(' <div></div> ')) |
| 948 | + |
| 949 | + def test_should_be_able_to_remove_multiple_tags(self): |
| 950 | + stripped = strip_html(''' |
| 951 | + a <div>on the first line</div> |
| 952 | + a <span>on the second line</span> |
| 953 | + a <strong>on the third line</strong> |
| 954 | + a <hr /> |
| 955 | + ''') |
| 956 | + self.assertEqual('aaaa', re.sub(r'\s', '', stripped)) |
| 957 | + stripped2 = strip_html(''' |
| 958 | + a <div>(on the first line)</div> |
| 959 | + a <span>(on the second line)</span> |
| 960 | + a <strong>(on the third line)</strong> |
| 961 | + a <hr /> |
| 962 | + ''', keep_tag_content=True) |
| 963 | + self.assertEqual('a(onthefirstline)a(onthesecondline)a(onthethirdline)a', re.sub(r'\s', '', stripped2)) |
| 964 | + |
| 965 | + def test_should_keep_tag_content_if_specified(self): |
| 966 | + s = 'test: <a href="foo/bar">click here</a>' |
| 967 | + self.assertEqual('test: ', strip_html(s)) |
| 968 | + self.assertEqual('test: click here', strip_html(s, keep_tag_content=True)) |
| 969 | + multiline_string = ''' |
| 970 | + <html> |
| 971 | + <body> |
| 972 | + <div id="container"> |
| 973 | + <p>content text!<p> |
| 974 | + </div> |
| 975 | + </body> |
| 976 | + </html> |
| 977 | + ''' |
| 978 | + self.assertEqual('content text!', strip_html(multiline_string, keep_tag_content=True).strip()) |
0 commit comments