7
7
import org .jsoup .nodes .Document ;
8
8
import org .jsoup .nodes .Element ;
9
9
import org .jsoup .select .Elements ;
10
+ import org .slf4j .Logger ;
11
+ import org .slf4j .LoggerFactory ;
10
12
11
13
import java .io .BufferedInputStream ;
12
14
import java .io .File ;
26
28
import java .util .concurrent .ExecutorService ;
27
29
import java .util .concurrent .Executors ;
28
30
import java .util .concurrent .Future ;
29
- import java .util .logging .Level ;
30
- import java .util .logging .Logger ;
31
31
32
32
/**
33
33
* Simple image scraper
@@ -37,13 +37,18 @@ public class ImageDownloader {
37
37
/**
38
38
* the class logger
39
39
*/
40
- private static final Logger LOG = Logger .getLogger (ImageDownloader .class . getName () );
40
+ private static final Logger LOG = LoggerFactory .getLogger (ImageDownloader .class );
41
41
42
42
/**
43
43
* the amount of threads to run in
44
44
*/
45
45
private static final int THREADS = Runtime .getRuntime ().availableProcessors ();
46
46
47
+ /**
48
+ * the timeout for retrieving a document
49
+ */
50
+ private static final int TIMEOUT = 5000 ;
51
+
47
52
/**
48
53
* the digit template pattern
49
54
*/
@@ -144,22 +149,22 @@ public List<URL> searchForImages(final DownloadInformation information) {
144
149
if (!visitedLinks .contains (cUrl )) {
145
150
final Document document = Jsoup .connect (cUrl )
146
151
.userAgent ("ImageScraper" )
147
- .timeout (1000 ).get ();
152
+ .timeout (TIMEOUT ).get ();
148
153
149
154
// add to visited irrigardeless of failure
150
155
visitedLinks .add (cUrl );
151
156
152
157
if (document != null ) {
153
- LOG .log ( Level . FINE , "got document from url[" + this .baseUrl + next + "], parsing..." );
158
+ LOG .trace ( "Got document from url[{}], parsing..." , this .baseUrl + next );
154
159
155
160
final Elements elements = document .select (this .cssSelector );
156
161
if (elements != null && !elements .isEmpty ()) {
157
- LOG .log ( Level . FINE , "found elements, looking for images" );
162
+ LOG .trace ( "Found elements, looking for images" );
158
163
for (Element image : elements ) {
159
164
if ("img" .equals (image .tagName ())) {
160
165
final String src = image .absUrl ("src" );
161
166
if (!StringUtil .isNull (src )) {
162
- LOG .log ( Level . FINE , "found image source[" + src + " ], adding to list..." );
167
+ LOG .trace ( "Found image source[{} ], adding to list...", src );
163
168
final URL url = new URL (src );
164
169
if (!resources .contains (url )) {
165
170
resources .add (url );
@@ -170,45 +175,39 @@ public List<URL> searchForImages(final DownloadInformation information) {
170
175
// reset fail count, we had success
171
176
failCount = 0 ;
172
177
} else {
173
- LOG .log (Level .WARNING , "found image[" + image
174
- + "], but it had no source, increasing error count ["
175
- + (++failCount + "/" + this .failureCount ) + "]" );
178
+ LOG .warn ("Found image[{}], but it had no source, increasing error count [{}/{}]" ,
179
+ image , ++failCount , this .failureCount );
176
180
}
177
181
} else {
178
- LOG .log (Level .WARNING , "found element[" + image
179
- + "], but it was not an image, increasing error count ["
180
- + (++failCount + "/" + this .failureCount ) + "]" );
182
+ LOG .warn ("Found image[{}], but it was not an image, increasing error count [{}/{}]" ,
183
+ image , ++failCount , this .failureCount );
181
184
}
182
185
}
183
186
} else {
184
187
// no elements for selector, could be empty page, anything really, increase failcount
185
- LOG .log (Level .WARNING , "could find images using selector["
186
- + this .cssSelector + "] on document["
187
- + cUrl + "], increasing error count ["
188
- + (++failCount + "/" + this .failureCount ) + "]" );
188
+ LOG .warn ("Could find images using selector[{}] on document[{}], increasing error count [{}/{}]" ,
189
+ this .cssSelector , cUrl , ++failCount , this .failureCount );
189
190
}
190
191
} else {
191
192
// no document, increase failcount
192
- LOG .log (Level .WARNING , "could not open document for ["
193
- + cUrl + "], increasing error count ["
194
- + (++failCount + "/" + this .failureCount ) + "]" );
193
+ LOG .warn ("Could not open document/timeout for [{}], increasing error count [{}/{}]" ,
194
+ cUrl , ++failCount , this .failureCount );
195
195
}
196
196
} else {
197
197
// already seen, increase failcount
198
- LOG .log (Level .WARNING , "already seen url["
199
- + cUrl + "], increasing error count ["
200
- + (++failCount + "/" + this .failureCount ) + "]" );
198
+ LOG .warn ("Already seen url[{}], increasing error count [{}/{}]" ,
199
+ cUrl , ++failCount , this .failureCount );
201
200
}
202
201
} catch (IOException e ) {
203
202
information .onException (uniqueId , e );
204
- LOG .log (Level .WARNING , "could not open/read stream to ["
205
- + this .baseUrl + next + "], increasing error count ["
206
- + (++failCount + "/" + this .failureCount ) + "]" , e );
203
+ LOG .warn ("Could not open/read stream to [{}], increasing error count [{}/{}]" ,
204
+ this .baseUrl + next , ++failCount , this .failureCount );
207
205
}
208
206
}
209
207
210
- LOG .log ( Level . INFO , "got resources to download\n " + resources );
208
+ LOG .debug ( "Got resources to download\n {}" , resources );
211
209
information .onComplete (uniqueId );
210
+
212
211
return Collections .unmodifiableList (resources );
213
212
}
214
213
@@ -252,10 +251,8 @@ public final Set<Future<?>> downloadResources(List<URL> resources, String system
252
251
resourceThreads .put (0 , resources );
253
252
}
254
253
255
-
256
254
// now start processing
257
- LOG .log (Level .FINE , "starting to process/download ["
258
- + resourceThreads + "]" );
255
+ LOG .trace ("Starting to process/download [{}]" , resourceThreads );
259
256
260
257
for (Iterator <Map .Entry <Integer , List <URL >>> resourceIterator =
261
258
resourceThreads .entrySet ().iterator (); resourceIterator .hasNext ();) {
@@ -295,7 +292,7 @@ public void run() {
295
292
* @param path the root system path to save at
296
293
* @throws Exception
297
294
*/
298
- private long download (URL url , File path ) throws URISyntaxException , IOException {
295
+ private static long download (URL url , File path ) throws URISyntaxException , IOException {
299
296
final String uriPath = url .toURI ().toString ();
300
297
final HttpURLConnection conn = (HttpURLConnection ) url .openConnection ();
301
298
final InputStream is = conn .getInputStream ();
0 commit comments