diff --git a/CHANGES b/CHANGES index 92c19cfcd5..073e7e5762 100644 --- a/CHANGES +++ b/CHANGES @@ -25,6 +25,9 @@ jsoup changelog as to preserve applicable settings, such as the Pretty Print settings. + * Improvement: added a convenience method Jsoup.parse(File). + + * Bugfix: boolean attribute names should be case-insensitive, but were not when the parser was configured to preserve case. diff --git a/src/main/java/org/jsoup/Jsoup.java b/src/main/java/org/jsoup/Jsoup.java index f6cb7c73b0..677b347841 100644 --- a/src/main/java/org/jsoup/Jsoup.java +++ b/src/main/java/org/jsoup/Jsoup.java @@ -142,12 +142,28 @@ public static Document parse(File file, @Nullable String charsetName, String bas @return sane HTML @throws IOException if the file could not be found, or read, or if the charsetName is invalid. - @see #parse(File, String, String) + @see #parse(File, String, String) parse(file, charset, baseUri) */ public static Document parse(File file, @Nullable String charsetName) throws IOException { return DataUtil.load(file, charsetName, file.getAbsolutePath()); } + /** + Parse the contents of a file as HTML. The location of the file is used as the base URI to qualify relative URLs. + The charset used to read the file will be determined by the byte-order-mark (BOM), or a {@code } tag, + or if neither is present, will be {@code UTF-8}. + +

This is the equivalent of calling {@link #parse(File, String) parse(file, null)}

+ + @param file the file to load HTML from. Supports gzipped files (ending in .z or .gz). + @return sane HTML + @throws IOException if the file could not be found or read. + @see #parse(File, String, String) parse(file, charset, baseUri) + */ + public static Document parse(File file) throws IOException { + return DataUtil.load(file, null, file.getAbsolutePath()); + } + /** Parse the contents of a file as HTML. diff --git a/src/test/java/org/jsoup/integration/ParseTest.java b/src/test/java/org/jsoup/integration/ParseTest.java index f3c1dfcb4c..0bcbb8e87a 100644 --- a/src/test/java/org/jsoup/integration/ParseTest.java +++ b/src/test/java/org/jsoup/integration/ParseTest.java @@ -229,6 +229,13 @@ public void testXwikiExpanded() throws IOException { assertEquals(wantHtml, doc.select("[data-id=userdirectory]").outerHtml()); } + @Test public void testFileParseNoCharsetMethod() throws IOException { + File in = getFile("/htmltests/xwiki-1324.html.gz"); + Document doc = Jsoup.parse(in); + assertEquals("XWiki Jetty HSQLDB 12.1-SNAPSHOT", doc.select("#xwikiplatformversion").text()); + } + + public static File getFile(String resourceName) { try { URL resource = ParseTest.class.getResource(resourceName);