Skip to content

Commit a08c62c

Browse files
committed
Refactoring.
Extracted magic numbers into a properties file loaded from the classpath.
1 parent 764729f commit a08c62c

File tree

10 files changed

+190
-119
lines changed

10 files changed

+190
-119
lines changed

CHANGELOG.txt

+2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ Changes in version 1.2
2424

2525
* Give priority to the largest image associated with an article.
2626

27+
* Now requires Java 6 or later.
28+
2729

2830
Changes in version 1.1
2931
----------------------

README.txt

+2-4
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,10 @@ GENERATING HTML OUTPUT
3131
headlines, complete with relevant images extracted from the feed articles.
3232
This application can be run as follows:
3333

34-
java -jar zeitgeist-publisher-1.0.jar feedlist.txt "Page Title" 24
34+
java -jar zeitgeist-publisher-1.2.jar feedlist.txt "Page Title"
3535

3636
The first argument is a text file that contains a list of feed URLs, one per
37-
line, the second argument is the title to use for the generated page, and the
38-
third argument is the maximum article age in hours (older articles are
39-
discarded).
37+
line, the second argument is the title to use for the generated page.
4038

4139
To get good results you should aim to have at least 20 different feeds that
4240
cover the same broad topics.

core/src/java/main/org/uncommons/zeitgeist/Zeitgeist.java

+13-8
Original file line numberDiff line numberDiff line change
@@ -32,21 +32,26 @@
3232
public class Zeitgeist
3333
{
3434
private static final SimpleLogger LOG = new SimpleLogger(FeedDownloadTask.class);
35-
private static final int MINIMUM_ARTICLES_PER_TOPIC = 4;
36-
private static final int MINIMUM_SOURCES_PER_TOPIC = 3;
37-
private static final int MINIMUM_ARTICLES_FOR_KEYWORD = 4; // Ignore obscure words.
38-
private static final double MINIMUM_ARTICLE_RELEVANCE = 8;
3935

4036
private final List<Article> articles;
37+
private final int minArticlesPerTopic;
38+
private final int minSourcesPerTopic;
39+
private final double minArticleRelevance;
4140

4241
/**
4342
* Create a Zeitgeist from the specified list of articles. Typically the
4443
* list of articles is acquired from an {@link ArticleFetcher}.
4544
* @param articles A list of articles fetched from one or more feeds.
4645
*/
47-
public Zeitgeist(List<Article> articles)
46+
public Zeitgeist(List<Article> articles,
47+
int minArticlesPerTopic,
48+
int minSourcesPerTopic,
49+
int minArticleRelevance)
4850
{
4951
this.articles = articles;
52+
this.minArticlesPerTopic = minArticlesPerTopic;
53+
this.minSourcesPerTopic = minSourcesPerTopic;
54+
this.minArticleRelevance = minArticleRelevance;
5055
}
5156

5257

@@ -86,7 +91,7 @@ private List<Topic> extractTopics(List<Article> articles,
8691
topicIndex = j;
8792
}
8893
}
89-
if (maxWeight >= MINIMUM_ARTICLE_RELEVANCE) // Don't include articles with only tenuous links to the main topic.
94+
if (maxWeight >= minArticleRelevance) // Don't include articles with only tenuous links to the main topic.
9095
{
9196
WeightedItem<Article> weightedArticle = new WeightedItem<Article>(maxWeight, articles.get(i));
9297
int index = Collections.binarySearch(articlesByTopic.get(topicIndex),
@@ -105,7 +110,7 @@ private List<Topic> extractTopics(List<Article> articles,
105110
{
106111
Topic topic = new Topic(topicArticles);
107112
int sources = topic.countDistinctSources();
108-
if (sources >= MINIMUM_SOURCES_PER_TOPIC && topicArticles.size() >= MINIMUM_ARTICLES_PER_TOPIC)
113+
if (sources >= minSourcesPerTopic && topicArticles.size() >= minArticlesPerTopic)
109114
{
110115
topics.add(topic);
111116
}
@@ -173,7 +178,7 @@ private List<String> listWords(Map<String, Integer> globalWordCounts)
173178
for (Map.Entry<String, Integer> entry : globalWordCounts.entrySet())
174179
{
175180
// If a word doesn't occur in enough different articles, discard it.
176-
if (entry.getValue() >= MINIMUM_ARTICLES_FOR_KEYWORD)
181+
if (entry.getValue() >= minArticlesPerTopic)
177182
{
178183
words.add(entry.getKey());
179184
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# The minimum number of relevant articles that constitute a distinct topic.
2+
zeitgeist.minArticlesPerTopic=4
3+
# The minimum number of different sources (feeds) that a topic's articles must come from.
4+
zeitgeist.minSourcesPerTopic=3
5+
# The minimum relevance score (>0) an article must have in order to be included.
6+
zeitgeist.minArticleRelevance=8
7+
# The maximum age (in hours) permitted for an article to be included.
8+
zeitgeist.maxArticleAgeHours=24

etc/intellij/zeitgeist.ipr

+1-1
Original file line numberDiff line numberDiff line change
@@ -781,7 +781,7 @@
781781
<component name="ProjectResources">
782782
<default-html-doctype>http://www.w3.org/1999/xhtml</default-html-doctype>
783783
</component>
784-
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_5" assert-keyword="true" jdk-15="true" project-jdk-name="1.6" project-jdk-type="JavaSDK">
784+
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_6" assert-keyword="true" jdk-15="true" project-jdk-name="1.6" project-jdk-type="JavaSDK">
785785
<output url="file://$PROJECT_DIR$/out" />
786786
</component>
787787
<component name="ResourceManagerContainer">
-1.82 MB
Binary file not shown.
1.97 MB
Binary file not shown.

publisher/src/java/main/org/uncommons/zeitgeist/publisher/Publisher.java

+48-105
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,13 @@
1818
import java.awt.Graphics2D;
1919
import java.awt.RenderingHints;
2020
import java.awt.image.BufferedImage;
21-
import java.io.BufferedInputStream;
22-
import java.io.BufferedOutputStream;
2321
import java.io.BufferedReader;
2422
import java.io.ByteArrayOutputStream;
2523
import java.io.File;
26-
import java.io.FileInputStream;
2724
import java.io.FileOutputStream;
2825
import java.io.FileReader;
2926
import java.io.IOException;
3027
import java.io.InputStream;
31-
import java.io.OutputStream;
3228
import java.io.OutputStreamWriter;
3329
import java.io.Writer;
3430
import java.net.URL;
@@ -37,6 +33,7 @@
3733
import java.util.HashSet;
3834
import java.util.LinkedList;
3935
import java.util.List;
36+
import java.util.Properties;
4037
import java.util.Set;
4138
import java.util.regex.Matcher;
4239
import java.util.regex.Pattern;
@@ -117,12 +114,12 @@ public void publish(List<Topic> topics,
117114
publishTemplate(topics, title, feedCount, articleCount, htmlTemplate, new File("index.html"));
118115
if (group.getRootDir() != null)
119116
{
120-
copyFile(outputDir, "zeitgeist.css", "zeitgeist.css");
117+
StreamUtils.copyFile(outputDir, new File(group.getRootDir(), "zeitgeist.css"), "zeitgeist.css");
121118
}
122119
else
123120
{
124121

125-
copyClasspathResource(outputDir, "zeitgeist.css", "zeitgeist.css");
122+
StreamUtils.copyClasspathResource(outputDir, "zeitgeist.css", "zeitgeist.css");
126123
}
127124

128125
if (group.isDefined("snippet"))
@@ -191,8 +188,8 @@ private void cacheImages(List<Topic> topics, File cacheDir)
191188
{
192189
try
193190
{
194-
copyStream(openConnection(image.getImageURL()).getInputStream(),
195-
new FileOutputStream(new File(cacheDir, image.getCachedFileName())));
191+
StreamUtils.copyStreamToFile(openConnection(image.getImageURL()).getInputStream(),
192+
new File(cacheDir, image.getCachedFileName()));
196193
LOG.debug("Downloaded image: " + image.getImageURL());
197194
scaleImage(cachedFile, 200);
198195
}
@@ -232,8 +229,7 @@ private void cacheIcons(List<Topic> topics, File cacheDir)
232229
{
233230
try
234231
{
235-
copyStream(openConnection(icon.getImageURL()).getInputStream(),
236-
new FileOutputStream(cachedFile));
232+
StreamUtils.copyStreamToFile(openConnection(icon.getImageURL()).getInputStream(), cachedFile);
237233
// Some sites will serve up a zero-byte file for the default location
238234
// but still have a valid icon elsewhere.
239235
if (cachedFile.length() == 0)
@@ -270,8 +266,7 @@ private void extractFaviconFromHTML(Image icon, File cachedFile)
270266
if (matcher.find())
271267
{
272268
URL url = new URL(icon.getArticleURL(), matcher.group(1));
273-
copyStream(openConnection(url).getInputStream(),
274-
new FileOutputStream(cachedFile));
269+
StreamUtils.copyStreamToFile(openConnection(url).getInputStream(), cachedFile);
275270
LOG.debug("Downloaded favicon via web page: " + url.toString());
276271
}
277272
else
@@ -298,7 +293,7 @@ private String fetchPage(URL pageURL) throws IOException
298293
URLConnection urlConnection = openConnection(pageURL);
299294
InputStream inputStream = urlConnection.getInputStream();
300295
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
301-
copyStream(inputStream, buffer);
296+
StreamUtils.copyStream(inputStream, buffer);
302297
String encoding = urlConnection.getContentEncoding();
303298
return new String(buffer.toByteArray(), encoding == null ? ENCODING : encoding);
304299
}
@@ -343,128 +338,76 @@ private URLConnection openConnection(URL url) throws IOException
343338

344339

345340
/**
346-
* Copy a single named resource from the classpath to the output directory.
347-
* @param outputDirectory The destination directory for the copied resource.
348-
* @param resourcePath The path of the resource.
349-
* @param targetFileName The name of the file created in {@literal outputDirectory}.
350-
* @throws IOException If the resource cannot be copied.
341+
* Entry point for the publisher application. Takes two mandatory arguments - the path to a file containing a list
342+
* of feeds and the title to use for the generated output, and optionally a third argument specifying templates
343+
* to use in place of the defaults.
351344
*/
352-
private void copyClasspathResource(File outputDirectory,
353-
String resourcePath,
354-
String targetFileName) throws IOException
355-
{
356-
InputStream resourceStream = ClassLoader.getSystemResourceAsStream(resourcePath);
357-
copyStream(resourceStream, new FileOutputStream(new File(outputDirectory, targetFileName)));
358-
}
359-
360-
361-
/**
362-
* Copy a single named file to the output directory.
363-
* @param outputDirectory The destination directory for the copied resource.
364-
* @param filePath The path of the file.
365-
* @param targetFileName The name of the file created in {@literal outputDirectory}.
366-
* @throws IOException If the file cannot be copied.
367-
*/
368-
private void copyFile(File outputDirectory,
369-
String filePath,
370-
String targetFileName) throws IOException
345+
public static void main(String[] args) throws IOException
371346
{
372-
FileInputStream inputStream = new FileInputStream(new File(group.getRootDir(), filePath));
373-
try
374-
{
375-
copyStream(inputStream, new FileOutputStream(new File(outputDirectory, targetFileName)));
376-
}
377-
finally
347+
if (args.length < 2 || args.length > 3)
378348
{
379-
inputStream.close();
349+
printUsage();
380350
}
381-
}
382-
383-
384-
/**
385-
* Helper method to copy the contents of a stream to a file.
386-
* @param stream The stream to copy.
387-
* @param target The target stream to write the stream contents to.
388-
* @throws IOException If the stream cannot be copied.
389-
*/
390-
private void copyStream(InputStream stream,
391-
OutputStream target) throws IOException
392-
{
393-
BufferedInputStream input = new BufferedInputStream(stream);
394-
try
351+
else
395352
{
396-
BufferedOutputStream output = new BufferedOutputStream(target);
353+
InputStream propertiesStream = Publisher.class.getClassLoader().getResourceAsStream("zeitgeist.properties");
397354
try
398355
{
399-
int i = input.read();
400-
while (i != -1)
401-
{
402-
output.write(i);
403-
i = input.read();
404-
}
405-
output.flush();
356+
Properties properties = new Properties();
357+
properties.load(propertiesStream);
358+
System.getProperties().putAll(properties);
359+
360+
List<URL> feeds = parseFeedList(args[0]);
361+
362+
long maxAgeHours = Long.parseLong(System.getProperty("zeitgeist.maxArticleAgeHours"));
363+
Date cutoffDate = new Date(System.currentTimeMillis() - Math.round(maxAgeHours * 3600000));
364+
List<Article> articles = new ArticleFetcher().getArticles(feeds, cutoffDate);
365+
List<Topic> topics = new Zeitgeist(articles,
366+
Integer.parseInt(System.getProperty("zeitgeist.minArticlesPerTopic")),
367+
Integer.parseInt(System.getProperty("zeitgeist.minSourcesPerTopic")),
368+
Integer.parseInt(System.getProperty("zeitgeist.minArticleRelevance"))).getTopics();
369+
LOG.info(topics.size() + " topics identified.");
370+
Publisher publisher = args.length > 2 ? new Publisher(new File(args[2])) : new Publisher();
371+
publisher.publish(topics, args[1], feeds.size(), articles.size(), new File("."));
406372
}
407373
finally
408374
{
409-
output.close();
375+
propertiesStream.close();
410376
}
411377
}
412-
finally
413-
{
414-
input.close();
415-
}
416378
}
417379

418380

419-
/**
420-
* Entry point for the publisher application. Takes three arguments - the path to a file containing a list
421-
* of feeds, the title to use for the generated output, and the maximum age (in hours) permitted for articles
422-
* to be included.
423-
*/
424-
public static void main(String[] args) throws IOException
381+
private static List<URL> parseFeedList(String arg) throws IOException
425382
{
426-
if (args.length < 3 || args.length > 4)
427-
{
428-
printUsage();
429-
}
430-
else
383+
List<URL> feeds = new LinkedList<URL>();
384+
BufferedReader feedListReader = new BufferedReader(new FileReader(arg));
385+
try
431386
{
432-
BufferedReader feedListReader = new BufferedReader(new FileReader(args[0]));
433-
try
387+
for (String line = feedListReader.readLine(); line != null; line = feedListReader.readLine())
434388
{
435-
List<URL> feeds = new LinkedList<URL>();
436-
for (String line = feedListReader.readLine(); line != null; line = feedListReader.readLine())
389+
String url = line.trim();
390+
// Lines beginning with a hash are considered to be comments.
391+
if (!url.startsWith("#") && !url.isEmpty())
437392
{
438-
String url = line.trim();
439-
// Lines beginning with a hash are considered to be comments.
440-
if (!url.startsWith("#") && url.length() > 0)
441-
{
442-
feeds.add(new URL(url));
443-
}
393+
feeds.add(new URL(url));
444394
}
445-
double maxAgeHours = Double.parseDouble(args[2]);
446-
Date cutoffDate = new Date(System.currentTimeMillis() - Math.round(maxAgeHours * 3600000));
447-
List<Article> articles = new ArticleFetcher().getArticles(feeds, cutoffDate);
448-
List<Topic> topics = new Zeitgeist(articles).getTopics();
449-
LOG.info(topics.size() + " topics identified.");
450-
Publisher publisher = args.length > 3 ? new Publisher(new File(args[3])) : new Publisher();
451-
publisher.publish(topics, args[1], feeds.size(), articles.size(), new File("."));
452-
}
453-
finally
454-
{
455-
feedListReader.close();
456395
}
457396
}
397+
finally
398+
{
399+
feedListReader.close();
400+
}
401+
return feeds;
458402
}
459403

460404

461405
private static void printUsage()
462406
{
463-
System.err.println("java -jar zeitgeist-publisher.jar <feedlist> <title> <maxage> [templatedir]");
407+
System.err.println("java -jar zeitgeist-publisher.jar <feedlist> <title> [templatedir]");
464408
System.err.println();
465409
System.err.println(" <feedlist> - Path to a file listing RSS/Atom feeds, one per line.");
466410
System.err.println(" <title> - A title passed to the templates.");
467-
System.err.println(" <maxage> - The maximum age (in hours) of included articles.");
468411
System.err.println(" [templatedir] - Path to alternate templates to use in place of the defaults.");
469412
System.err.println();
470413
System.err.println("If no template directory is specified, default templates from the classpath are used.");

0 commit comments

Comments
 (0)