Skip to content

Commit 39291d9

Browse files
author
Willie Scholtz
committed
slf4j
1 parent 268e0b2 commit 39291d9

File tree

5 files changed

+40
-36
lines changed

5 files changed

+40
-36
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
image-scraper
1+
Image Scraper
22
=============
33

4-
simple multithreaded java image scraper using url templates and CSS selectors
4+
A Simple multithreaded java image scraper using url templates and CSS selectors

pom.xml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66
<modelVersion>4.0.0</modelVersion>
77
<groupId>com.github.epochcoder</groupId>
88
<artifactId>image-downloader</artifactId>
9-
<version>1.0.0-SNAPSHOT</version>
9+
<version>2.0.0-SNAPSHOT</version>
1010

1111
<name>Image Downloader</name>
12+
<description>A Simple multithreaded java image scraper using url templates and CSS selectors</description>
1213

1314
<properties>
1415
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
@@ -19,13 +20,15 @@
1920
<plugin>
2021
<groupId>org.apache.maven.plugins</groupId>
2122
<artifactId>maven-compiler-plugin</artifactId>
23+
<version>3.6.1</version>
2224
<configuration>
2325
<source>1.8</source>
2426
<target>1.8</target>
2527
</configuration>
2628
</plugin>
2729
<plugin>
2830
<artifactId>maven-assembly-plugin</artifactId>
31+
<version>3.0.0</version>
2932
<configuration>
3033
<archive>
3134
<manifest>
@@ -68,7 +71,7 @@
6871
</dependency>
6972
<dependency>
7073
<groupId>org.slf4j</groupId>
71-
<artifactId>slf4j-log4j12</artifactId>
74+
<artifactId>slf4j-simple</artifactId>
7275
<version>1.7.25</version>
7376
</dependency>
7477
<dependency>

src/main/java/com/github/epochcoder/imagedownloader/ImageDownloaderView.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,10 @@ public void onStart(String uniqueId, int total) {
360360
}
361361

362362
@Override
363-
public void onException(String uniqueId, Throwable exception) {}
363+
public void onException(String uniqueId, Throwable exception) {
364+
statusMessageLabel.setText(exception.toString());
365+
messageTimer.restart();
366+
}
364367

365368
@Override
366369
public void onStatusChange(String uniqueId, int current, int total, String currUrl) {

src/main/java/com/github/epochcoder/imagedownloader/core/DownloadInformation.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
* @author Willie Scholtz
77
*/
88
public interface DownloadInformation {
9+
910
/**
1011
* called when the downloading starts with the total amount of files to be scraped
1112
* @param uniqueId the unique id of the current downloader

src/main/java/com/github/epochcoder/imagedownloader/core/ImageDownloader.java

Lines changed: 28 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import org.jsoup.nodes.Document;
88
import org.jsoup.nodes.Element;
99
import org.jsoup.select.Elements;
10+
import org.slf4j.Logger;
11+
import org.slf4j.LoggerFactory;
1012

1113
import java.io.BufferedInputStream;
1214
import java.io.File;
@@ -26,8 +28,6 @@
2628
import java.util.concurrent.ExecutorService;
2729
import java.util.concurrent.Executors;
2830
import java.util.concurrent.Future;
29-
import java.util.logging.Level;
30-
import java.util.logging.Logger;
3131

3232
/**
3333
* Simple image scraper
@@ -37,13 +37,18 @@ public class ImageDownloader {
3737
/**
3838
* the class logger
3939
*/
40-
private static final Logger LOG = Logger.getLogger(ImageDownloader.class.getName());
40+
private static final Logger LOG = LoggerFactory.getLogger(ImageDownloader.class);
4141

4242
/**
4343
* the amount of threads to run in
4444
*/
4545
private static final int THREADS = Runtime.getRuntime().availableProcessors();
4646

47+
/**
48+
* the timeout for retrieving a document
49+
*/
50+
private static final int TIMEOUT = 5000;
51+
4752
/**
4853
* the digit template pattern
4954
*/
@@ -144,22 +149,22 @@ public List<URL> searchForImages(final DownloadInformation information) {
144149
if (!visitedLinks.contains(cUrl)) {
145150
final Document document = Jsoup.connect(cUrl)
146151
.userAgent("ImageScraper")
147-
.timeout(1000).get();
152+
.timeout(TIMEOUT).get();
148153

149154
// add to visited irrigardeless of failure
150155
visitedLinks.add(cUrl);
151156

152157
if (document != null) {
153-
LOG.log(Level.FINE, "got document from url[" + this.baseUrl + next + "], parsing...");
158+
LOG.trace("Got document from url[{}], parsing...", this.baseUrl + next);
154159

155160
final Elements elements = document.select(this.cssSelector);
156161
if (elements != null && !elements.isEmpty()) {
157-
LOG.log(Level.FINE, "found elements, looking for images");
162+
LOG.trace("Found elements, looking for images");
158163
for (Element image : elements) {
159164
if ("img".equals(image.tagName())) {
160165
final String src = image.absUrl("src");
161166
if (!StringUtil.isNull(src)) {
162-
LOG.log(Level.FINE, "found image source[" + src + "], adding to list...");
167+
LOG.trace("Found image source[{}], adding to list...", src);
163168
final URL url = new URL(src);
164169
if (!resources.contains(url)) {
165170
resources.add(url);
@@ -170,45 +175,39 @@ public List<URL> searchForImages(final DownloadInformation information) {
170175
// reset fail count, we had success
171176
failCount = 0;
172177
} else {
173-
LOG.log(Level.WARNING, "found image[" + image
174-
+ "], but it had no source, increasing error count ["
175-
+ (++failCount + "/" + this.failureCount) + "]");
178+
LOG.warn("Found image[{}], but it had no source, increasing error count [{}/{}]",
179+
image, ++failCount, this.failureCount);
176180
}
177181
} else {
178-
LOG.log(Level.WARNING, "found element[" + image
179-
+ "], but it was not an image, increasing error count ["
180-
+ (++failCount + "/" + this.failureCount) + "]");
182+
LOG.warn("Found image[{}], but it was not an image, increasing error count [{}/{}]",
183+
image, ++failCount, this.failureCount);
181184
}
182185
}
183186
} else {
184187
// no elements for selector, could be empty page, anything really, increase failcount
185-
LOG.log(Level.WARNING, "could find images using selector["
186-
+ this.cssSelector + "] on document["
187-
+ cUrl + "], increasing error count ["
188-
+ (++failCount + "/" + this.failureCount) + "]");
188+
LOG.warn("Could find images using selector[{}] on document[{}], increasing error count [{}/{}]",
189+
this.cssSelector, cUrl, ++failCount, this.failureCount);
189190
}
190191
} else {
191192
// no document, increase failcount
192-
LOG.log(Level.WARNING, "could not open document for ["
193-
+ cUrl + "], increasing error count ["
194-
+ (++failCount + "/" + this.failureCount) + "]");
193+
LOG.warn("Could not open document/timeout for [{}], increasing error count [{}/{}]",
194+
cUrl, ++failCount, this.failureCount);
195195
}
196196
} else {
197197
// already seen, increase failcount
198-
LOG.log(Level.WARNING, "already seen url["
199-
+ cUrl + "], increasing error count ["
200-
+ (++failCount + "/" + this.failureCount) + "]");
198+
LOG.warn("Already seen url[{}], increasing error count [{}/{}]",
199+
cUrl, ++failCount, this.failureCount);
201200
}
202201
} catch (IOException e) {
203202
information.onException(uniqueId, e);
204-
LOG.log(Level.WARNING, "could not open/read stream to ["
205-
+ this.baseUrl + next + "], increasing error count ["
206-
+ (++failCount + "/" + this.failureCount) + "]", e);
203+
LOG.warn("Could not open/read stream to [{}], increasing error count [{}/{}]",
204+
this.baseUrl + next, ++failCount, this.failureCount);
207205
}
208206
}
209207

210-
LOG.log(Level.INFO, "got resources to download\n" + resources);
208+
LOG.debug("Got resources to download\n{}", resources);
211209
information.onComplete(uniqueId);
210+
212211
return Collections.unmodifiableList(resources);
213212
}
214213

@@ -252,10 +251,8 @@ public final Set<Future<?>> downloadResources(List<URL> resources, String system
252251
resourceThreads.put(0, resources);
253252
}
254253

255-
256254
// now start processing
257-
LOG.log(Level.FINE, "starting to process/download ["
258-
+ resourceThreads + "]");
255+
LOG.trace("Starting to process/download [{}]", resourceThreads);
259256

260257
for (Iterator<Map.Entry<Integer, List<URL>>> resourceIterator =
261258
resourceThreads.entrySet().iterator(); resourceIterator.hasNext();) {
@@ -295,7 +292,7 @@ public void run() {
295292
* @param path the root system path to save at
296293
* @throws Exception
297294
*/
298-
private long download(URL url, File path) throws URISyntaxException, IOException {
295+
private static long download(URL url, File path) throws URISyntaxException, IOException {
299296
final String uriPath = url.toURI().toString();
300297
final HttpURLConnection conn = (HttpURLConnection) url.openConnection();
301298
final InputStream is = conn.getInputStream();

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy