Skip to content

Commit 5f043d3

Browse files
committed
* fixed bugs in failCount detection
1 parent e6dbb04 commit 5f043d3

File tree

7 files changed

+110
-96
lines changed

7 files changed

+110
-96
lines changed

build/built-jar.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#Wed, 27 Nov 2013 15:00:42 +0200
1+
#Wed, 27 Nov 2013 16:10:16 +0200
22

33

44
C\:\\Projects\\Private\\GitHub\\image-scraper=
Binary file not shown.
Binary file not shown.

dist/image-scraper.jar

528 Bytes
Binary file not shown.

nbproject/private/private.xml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
<?xml version="1.0" encoding="UTF-8"?>
2-
<project-private xmlns="http://www.netbeans.org/ns/project-private/1">
3-
<editor-bookmarks xmlns="http://www.netbeans.org/ns/editor-bookmarks/1"/>
4-
<editor-bookmarks xmlns="http://www.netbeans.org/ns/editor-bookmarks/2" lastBookmarkId="0"/>
5-
<open-files xmlns="http://www.netbeans.org/ns/projectui-open-files/2">
6-
<group/>
7-
</open-files>
8-
</project-private>
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project-private xmlns="http://www.netbeans.org/ns/project-private/1">
3+
<editor-bookmarks xmlns="http://www.netbeans.org/ns/editor-bookmarks/1"/>
4+
<editor-bookmarks xmlns="http://www.netbeans.org/ns/editor-bookmarks/2" lastBookmarkId="0"/>
5+
<open-files xmlns="http://www.netbeans.org/ns/projectui-open-files/2">
6+
<group/>
7+
</open-files>
8+
</project-private>

nbproject/project.properties

Lines changed: 75 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1,75 +1,75 @@
1-
annotation.processing.enabled=true
2-
annotation.processing.enabled.in.editor=false
3-
annotation.processing.processors.list=
4-
annotation.processing.run.all.processors=true
5-
application.desc=A simple java desktop application based on Swing Application Framework
6-
application.homepage=http://appframework.dev.java.net
7-
application.title=Basic Application Example
8-
application.vendor=Sun Microsystems Inc.
9-
build.classes.dir=${build.dir}/classes
10-
build.classes.excludes=**/*.java,**/*.form
11-
# This directory is removed when the project is cleaned:
12-
build.dir=build
13-
build.generated.dir=${build.dir}/generated
14-
build.generated.sources.dir=${build.dir}/generated-sources
15-
# Only compile against the classpath explicitly listed here:
16-
build.sysclasspath=ignore
17-
build.test.classes.dir=${build.dir}/test/classes
18-
build.test.results.dir=${build.dir}/test/results
19-
debug.classpath=\
20-
${run.classpath}
21-
debug.test.classpath=\
22-
${run.test.classpath}
23-
# This directory is removed when the project is cleaned:
24-
dist.dir=dist
25-
dist.jar=${dist.dir}/image-scraper.jar
26-
dist.javadoc.dir=${dist.dir}/javadoc
27-
endorsed.classpath=
28-
excludes=
29-
includes=**
30-
jar.compress=false
31-
javac.classpath=\
32-
${libs.JSoup.classpath}:\
33-
${libs.swing-app-framework.classpath}
34-
# Space-separated list of extra javac options
35-
javac.compilerargs=
36-
javac.deprecation=false
37-
javac.processorpath=\
38-
${javac.classpath}
39-
javac.source=1.7
40-
javac.target=1.7
41-
javac.test.classpath=\
42-
${javac.classpath}:\
43-
${build.classes.dir}:\
44-
${libs.junit.classpath}:\
45-
${libs.junit_4.classpath}
46-
javadoc.additionalparam=
47-
javadoc.author=false
48-
javadoc.encoding=${source.encoding}
49-
javadoc.noindex=false
50-
javadoc.nonavbar=false
51-
javadoc.notree=false
52-
javadoc.private=false
53-
javadoc.splitindex=true
54-
javadoc.use=true
55-
javadoc.version=false
56-
javadoc.windowtitle=
57-
jaxbwiz.endorsed.dirs="${netbeans.home}/../ide12/modules/ext/jaxb/api"
58-
main.class=imagedownloader.ImageDownloaderApp
59-
manifest.file=manifest.mf
60-
meta.inf.dir=${src.dir}/META-INF
61-
mkdist.disabled=false
62-
platform.active=default_platform
63-
run.classpath=\
64-
${javac.classpath}:\
65-
${build.classes.dir}
66-
# Space-separated list of JVM arguments used when running the project
67-
# (you may also define separate properties like run-sys-prop.name=value instead of -Dname=value
68-
# or test-sys-prop.name=value to set system properties for unit tests):
69-
run.jvmargs=
70-
run.test.classpath=\
71-
${javac.test.classpath}:\
72-
${build.test.classes.dir}
73-
source.encoding=UTF-8
74-
src.dir=src
75-
test.src.dir=test
1+
annotation.processing.enabled=true
2+
annotation.processing.enabled.in.editor=false
3+
annotation.processing.processors.list=
4+
annotation.processing.run.all.processors=true
5+
application.desc=A simple java desktop application based on Swing Application Framework
6+
application.homepage=http://appframework.dev.java.net
7+
application.title=Basic Application Example
8+
application.vendor=Sun Microsystems Inc.
9+
build.classes.dir=${build.dir}/classes
10+
build.classes.excludes=**/*.java,**/*.form
11+
# This directory is removed when the project is cleaned:
12+
build.dir=build
13+
build.generated.dir=${build.dir}/generated
14+
build.generated.sources.dir=${build.dir}/generated-sources
15+
# Only compile against the classpath explicitly listed here:
16+
build.sysclasspath=ignore
17+
build.test.classes.dir=${build.dir}/test/classes
18+
build.test.results.dir=${build.dir}/test/results
19+
debug.classpath=\
20+
${run.classpath}
21+
debug.test.classpath=\
22+
${run.test.classpath}
23+
# This directory is removed when the project is cleaned:
24+
dist.dir=dist
25+
dist.jar=${dist.dir}/image-scraper.jar
26+
dist.javadoc.dir=${dist.dir}/javadoc
27+
endorsed.classpath=
28+
excludes=
29+
includes=**
30+
jar.compress=false
31+
javac.classpath=\
32+
${libs.JSoup.classpath}:\
33+
${libs.swing-app-framework.classpath}
34+
# Space-separated list of extra javac options
35+
javac.compilerargs=
36+
javac.deprecation=false
37+
javac.processorpath=\
38+
${javac.classpath}
39+
javac.source=1.7
40+
javac.target=1.7
41+
javac.test.classpath=\
42+
${javac.classpath}:\
43+
${build.classes.dir}:\
44+
${libs.junit.classpath}:\
45+
${libs.junit_4.classpath}
46+
javadoc.additionalparam=
47+
javadoc.author=false
48+
javadoc.encoding=${source.encoding}
49+
javadoc.noindex=false
50+
javadoc.nonavbar=false
51+
javadoc.notree=false
52+
javadoc.private=false
53+
javadoc.splitindex=true
54+
javadoc.use=true
55+
javadoc.version=false
56+
javadoc.windowtitle=
57+
jaxbwiz.endorsed.dirs="${netbeans.home}/../ide12/modules/ext/jaxb/api"
58+
main.class=imagedownloader.ImageDownloaderApp
59+
manifest.file=manifest.mf
60+
meta.inf.dir=${src.dir}/META-INF
61+
mkdist.disabled=false
62+
platform.active=default_platform
63+
run.classpath=\
64+
${javac.classpath}:\
65+
${build.classes.dir}
66+
# Space-separated list of JVM arguments used when running the project
67+
# (you may also define separate properties like run-sys-prop.name=value instead of -Dname=value
68+
# or test-sys-prop.name=value to set system properties for unit tests):
69+
run.jvmargs=
70+
run.test.classpath=\
71+
${javac.test.classpath}:\
72+
${build.test.classes.dir}
73+
source.encoding=UTF-8
74+
src.dir=src
75+
test.src.dir=test

src/imagedownloader/core/ImageDownloader.java

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,6 @@ public class ImageDownloader {
4242
*/
4343
private static final int THREADS = Runtime.getRuntime().availableProcessors();
4444

45-
/**
46-
* the connection timeout for retrieving pages
47-
*/
48-
private static final int CONNECT_TIMEOUT = 1 * 1000;
49-
5045
/**
5146
* the digit template pattern
5247
*/
@@ -64,7 +59,7 @@ public class ImageDownloader {
6459
/**
6560
* the count at which the pattern will stop matching and figuring out URL's
6661
*/
67-
private int failureCount = 5;
62+
private int failureCount = 10;
6863
/**
6964
* the range at which to start downloading,
7065
* the end will be auto-determined according to failureCount
@@ -143,35 +138,55 @@ public List<URL> searchForImages() {
143138
try {
144139
final Document document = Jsoup.connect(this.baseUrl + next)
145140
.userAgent("ImageScraper")
146-
.timeout(CONNECT_TIMEOUT).get();
141+
.timeout(1000).get();
147142

148143
if (document != null) {
149144
LOG.log(Level.FINE, "got document from url[" + this.baseUrl + next + "], parsing...");
145+
150146
final Elements elements = document.select(this.cssSelector);
151147
if (elements != null && !elements.isEmpty()) {
152148
LOG.log(Level.FINE, "found elements, looking for images");
153-
154149
for (Element image : elements) {
155150
if ("img".equals(image.tagName())) {
156151
final String src = image.absUrl("src");
157152
if (!StringUtil.isNull(src)) {
158153
LOG.log(Level.FINE, "found image source[" + src + "], adding to list...");
159154
resources.add(new URL(src));
155+
156+
// reset fail count, we had success
157+
failCount = 0;
158+
} else {
159+
LOG.log(Level.WARNING, "found image[" + image
160+
+ "], but it had no source, increasing error count ["
161+
+ (failCount++ + "/" + this.failureCount) + "]");
160162
}
161163
} else {
162164
LOG.log(Level.WARNING, "found element[" + image
163-
+ "], but it was not an image...");
165+
+ "], but it was not an image, increasing error count ["
166+
+ (failCount++ + "/" + this.failureCount) + "]");
164167
}
165168
}
169+
} else {
170+
// no elements for selector, could be empty page, anything really, increase failcount
171+
LOG.log(Level.WARNING, "could find images using selector["
172+
+ this.cssSelector + "] on document["
173+
+ this.baseUrl + next + "], increasing error count ["
174+
+ (failCount++ + "/" + this.failureCount) + "]");
166175
}
176+
} else {
177+
// no document, increase failcount
178+
LOG.log(Level.WARNING, "could not open document for ["
179+
+ this.baseUrl + next + "], increasing error count ["
180+
+ (failCount++ + "/" + this.failureCount) + "]");
167181
}
168182
} catch (IOException e) {
169183
LOG.log(Level.WARNING, "could not open/read stream to ["
170-
+ this.baseUrl + next + "], increasing error count", e);
171-
failCount++;
184+
+ this.baseUrl + next + "], increasing error count ["
185+
+ (failCount++ + "/" + this.failureCount) + "]", e);
172186
}
173187
}
174188

189+
LOG.log(Level.INFO, "got resources to download\n" + resources);
175190
return Collections.unmodifiableList(resources);
176191
}
177192

@@ -249,7 +264,6 @@ public void run() {
249264
}
250265
}
251266

252-
253267
return tasks;
254268
}
255269

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy