html
Download images from a website using Jsoup
In this example we will use an HTML Parser, Jsoup, in order to download images from a website.
jsoup is a Java library for working with real-world HTML. It provides a very convenient API for extracting and manipulating data, using the best of DOM, CSS, and jquery-like methods.
jsoup implements the WHATWG HTML5 specification, and parses HTML to the same DOM as modern browsers do.
In orderto get the src attributes of the <img> tags from a website. Then we download the file from the url we have aquired using a URL stream.
This code downloads images tha are directly accessible via URL. If they aren’t, an exception will be thrown.
01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | package com.javacodegeeks.snippets.enterprise; import java.io.*; import java.net.URL; import java.util.logging.Level; import java.util.logging.Logger; import java.io.*; import java.util.logging.Level; import java.util.logging.Logger; import org.jsoup.Jsoup; import org.jsoup.nodes.Attributes; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.IOException; import java.net.URL; public class DownloadImages { //The url of the website. This is just an example //The path of the folder that you want to save the images to private static final String folderPath = "<FOLDER PATH>" ; public static void main(String[] args) { try { //Connect to the website and get the html Document doc = Jsoup.connect(webSiteURL).get(); //Get all elements with img tag , Elements img = doc.getElementsByTag( "img" ); for (Element el : img) { //for each element get the srs url String src = el.absUrl( "src" ); System.out.println( "Image Found!" ); System.out.println( "src attribute is : " +src); getImages(src); } } catch (IOException ex) { System.err.println( "There was an error" ); Logger.getLogger(DownloadImages. class .getName()).log(Level.SEVERE, null , ex); } } private static void getImages(String src) throws IOException { String folder = null ; //Exctract the name of the image from the src attribute int indexname = src.lastIndexOf( "/" ); if (indexname == src.length()) { src = src.substring( 1 , indexname); } indexname = src.lastIndexOf( "/" ); String name = src.substring(indexname, src.length()); System.out.println(name); //Open a URL Stream URL url = new URL(src); InputStream in = url.openStream(); OutputStream out = new BufferedOutputStream( new FileOutputStream( folderPath+ name)); for ( int b; (b = in.read()) != - 1 ;) { out.write(b); } out.close(); in.close(); } } |
This was an example of how to download images from a website using Jsoup.