dwdyer
diff --git a/‎CHANGELOG.txt
+2 b/‎CHANGELOG.txt
+2
diff --git a/‎README.txt
+2-4 b/‎README.txt
+2-4
diff --git a/‎core/src/java/main/org/uncommons/zeitgeist/Zeitgeist.java
+13-8 b/‎core/src/java/main/org/uncommons/zeitgeist/Zeitgeist.java
+13-8
diff --git a/‎core/src/java/resources/zeitgeist.properties
+8 b/‎core/src/java/resources/zeitgeist.properties
+8
diff --git a/‎etc/intellij/zeitgeist.ipr
+1-1 b/‎etc/intellij/zeitgeist.ipr
+1-1
diff --git a/‎lib/compiletime/uncommons-antlib-0.3.1.jar
-1.82 MB b/‎lib/compiletime/uncommons-antlib-0.3.1.jar
-1.82 MB
diff --git a/‎lib/compiletime/uncommons-antlib-0.3.3.jar
1.97 MB b/‎lib/compiletime/uncommons-antlib-0.3.3.jar
1.97 MB
diff --git a/‎publisher/src/java/main/org/uncommons/zeitgeist/publisher/Publisher.java
+48-105 b/‎publisher/src/java/main/org/uncommons/zeitgeist/publisher/Publisher.java
+48-105
@@ -24,6 +24,8 @@ Changes in version 1.2
 
 * Give priority to the largest image associated with an article.
 
+* Now requires Java 6 or later.
+
 
 Changes in version 1.1
 ----------------------
 
@@ -31,12 +31,10 @@ GENERATING HTML OUTPUT
   headlines, complete with relevant images extracted from the feed articles.
   This application can be run as follows:
 
-      java -jar zeitgeist-publisher-1.0.jar feedlist.txt "Page Title" 24
+      java -jar zeitgeist-publisher-1.2.jar feedlist.txt "Page Title"
 
   The first argument is a text file that contains a list of feed URLs, one per
-  line, the second argument is the title to use for the generated page, and the
-  third argument is the maximum article age in hours (older articles are
-  discarded).
+  line, the second argument is the title to use for the generated page.
 
   To get good results you should aim to have at least 20 different feeds that
   cover the same broad topics.
@@ -32,21 +32,26 @@
 public class Zeitgeist
 {
     private static final SimpleLogger LOG = new SimpleLogger(FeedDownloadTask.class);
-    private static final int MINIMUM_ARTICLES_PER_TOPIC = 4;
-    private static final int MINIMUM_SOURCES_PER_TOPIC = 3;
-    private static final int MINIMUM_ARTICLES_FOR_KEYWORD = 4; // Ignore obscure words.
-    private static final double MINIMUM_ARTICLE_RELEVANCE = 8;
 
     private final List<Article> articles;
+    private final int minArticlesPerTopic;
+    private final int minSourcesPerTopic;
+    private final double minArticleRelevance;
 
     /**
      * Create a Zeitgeist from the specified list of articles.  Typically the
      * list of articles is acquired from an {@link ArticleFetcher}.
      * @param articles A list of articles fetched from one or more feeds.
      */
-    public Zeitgeist(List<Article> articles)
+    public Zeitgeist(List<Article> articles,
+                     int minArticlesPerTopic,
+                     int minSourcesPerTopic,
+                     int minArticleRelevance)
     {
         this.articles = articles;
+        this.minArticlesPerTopic = minArticlesPerTopic;
+        this.minSourcesPerTopic = minSourcesPerTopic;
+        this.minArticleRelevance = minArticleRelevance;
     }
 
 
@@ -86,7 +91,7 @@ private List<Topic> extractTopics(List<Article> articles,
                     topicIndex = j;
                 }
             }
-            if (maxWeight >= MINIMUM_ARTICLE_RELEVANCE) // Don't include articles with only tenuous links to the main topic.
+            if (maxWeight >= minArticleRelevance) // Don't include articles with only tenuous links to the main topic.
             {
                 WeightedItem<Article> weightedArticle = new WeightedItem<Article>(maxWeight, articles.get(i));
                 int index = Collections.binarySearch(articlesByTopic.get(topicIndex),
@@ -105,7 +110,7 @@ private List<Topic> extractTopics(List<Article> articles,
         {
             Topic topic = new Topic(topicArticles);
             int sources = topic.countDistinctSources();
-            if (sources >= MINIMUM_SOURCES_PER_TOPIC && topicArticles.size() >= MINIMUM_ARTICLES_PER_TOPIC)
+            if (sources >= minSourcesPerTopic && topicArticles.size() >= minArticlesPerTopic)
             {
                 topics.add(topic);
             }
@@ -173,7 +178,7 @@ private List<String> listWords(Map<String, Integer> globalWordCounts)
         for (Map.Entry<String, Integer> entry : globalWordCounts.entrySet())
         {
             // If a word doesn't occur in enough different articles, discard it.
-            if (entry.getValue() >= MINIMUM_ARTICLES_FOR_KEYWORD)
+            if (entry.getValue() >= minArticlesPerTopic)
             {
                 words.add(entry.getKey());
             }
 
@@ -0,0 +1,8 @@
+# The minimum number of relevant articles that constitute a distinct topic.
+zeitgeist.minArticlesPerTopic=4
+# The minimum number of different sources (feeds) that a topic's articles must come from.
+zeitgeist.minSourcesPerTopic=3
+# The minimum relevance score (>0) an article must have in order to be included.
+zeitgeist.minArticleRelevance=8
+# The maximum age (in hours) permitted for an article to be included.
+zeitgeist.maxArticleAgeHours=24
@@ -781,7 +781,7 @@
   <component name="ProjectResources">
     <default-html-doctype>http://www.w3.org/1999/xhtml</default-html-doctype>
   </component>
-  <component name="ProjectRootManager" version="2" languageLevel="JDK_1_5" assert-keyword="true" jdk-15="true" project-jdk-name="1.6" project-jdk-type="JavaSDK">
+  <component name="ProjectRootManager" version="2" languageLevel="JDK_1_6" assert-keyword="true" jdk-15="true" project-jdk-name="1.6" project-jdk-type="JavaSDK">
     <output url="file://$PROJECT_DIR$/out" />
   </component>
   <component name="ResourceManagerContainer">
 
@@ -18,17 +18,13 @@
 import java.awt.Graphics2D;
 import java.awt.RenderingHints;
 import java.awt.image.BufferedImage;
-import java.io.BufferedInputStream;
-import java.io.BufferedOutputStream;
 import java.io.BufferedReader;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
-import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.FileReader;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.OutputStream;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
 import java.net.URL;
@@ -37,6 +33,7 @@
 import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.Properties;
 import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -117,12 +114,12 @@ public void publish(List<Topic> topics,
         publishTemplate(topics, title, feedCount, articleCount, htmlTemplate, new File("index.html"));
         if (group.getRootDir() != null)
         {
-            copyFile(outputDir, "zeitgeist.css", "zeitgeist.css");
+            StreamUtils.copyFile(outputDir, new File(group.getRootDir(), "zeitgeist.css"), "zeitgeist.css");
         }
         else
         {
 
-            copyClasspathResource(outputDir, "zeitgeist.css", "zeitgeist.css");
+            StreamUtils.copyClasspathResource(outputDir, "zeitgeist.css", "zeitgeist.css");
         }
 
         if (group.isDefined("snippet"))
@@ -191,8 +188,8 @@ private void cacheImages(List<Topic> topics, File cacheDir)
                 {
                     try
                     {
-                        copyStream(openConnection(image.getImageURL()).getInputStream(),
-                                   new FileOutputStream(new File(cacheDir, image.getCachedFileName())));
+                        StreamUtils.copyStreamToFile(openConnection(image.getImageURL()).getInputStream(),
+                                                     new File(cacheDir, image.getCachedFileName()));
                         LOG.debug("Downloaded image: " + image.getImageURL());
                         scaleImage(cachedFile, 200);
                     }
@@ -232,8 +229,7 @@ private void cacheIcons(List<Topic> topics, File cacheDir)
             {
                 try
                 {
-                    copyStream(openConnection(icon.getImageURL()).getInputStream(),
-                               new FileOutputStream(cachedFile));
+                    StreamUtils.copyStreamToFile(openConnection(icon.getImageURL()).getInputStream(), cachedFile);
                     // Some sites will serve up a zero-byte file for the default location
                     // but still have a valid icon elsewhere.
                     if (cachedFile.length() == 0)
@@ -270,8 +266,7 @@ private void extractFaviconFromHTML(Image icon, File cachedFile)
             if (matcher.find())
             {
                 URL url = new URL(icon.getArticleURL(), matcher.group(1));
-                copyStream(openConnection(url).getInputStream(),
-                           new FileOutputStream(cachedFile));
+                StreamUtils.copyStreamToFile(openConnection(url).getInputStream(), cachedFile);
                 LOG.debug("Downloaded favicon via web page: " + url.toString());
             }
             else
@@ -298,7 +293,7 @@ private String fetchPage(URL pageURL) throws IOException
         URLConnection urlConnection = openConnection(pageURL);
         InputStream inputStream = urlConnection.getInputStream();
         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
-        copyStream(inputStream, buffer);
+        StreamUtils.copyStream(inputStream, buffer);
         String encoding = urlConnection.getContentEncoding();
         return new String(buffer.toByteArray(), encoding == null ? ENCODING : encoding);
     }
@@ -343,128 +338,76 @@ private URLConnection openConnection(URL url) throws IOException
 
 
     /**
-     * Copy a single named resource from the classpath to the output directory.
-     * @param outputDirectory The destination directory for the copied resource.
-     * @param resourcePath The path of the resource.
-     * @param targetFileName The name of the file created in {@literal outputDirectory}.
-     * @throws IOException If the resource cannot be copied.
+     * Entry point for the publisher application.  Takes two mandatory arguments - the path to a file containing a list
+     * of feeds and the title to use for the generated output, and optionally a third argument specifying templates
+     * to use in place of the defaults.
      */
-    private void copyClasspathResource(File outputDirectory,
-                                       String resourcePath,
-                                       String targetFileName) throws IOException
-    {
-        InputStream resourceStream = ClassLoader.getSystemResourceAsStream(resourcePath);
-        copyStream(resourceStream, new FileOutputStream(new File(outputDirectory, targetFileName)));
-    }
-
-
-    /**
-     * Copy a single named file to the output directory.
-     * @param outputDirectory The destination directory for the copied resource.
-     * @param filePath The path of the file.
-     * @param targetFileName The name of the file created in {@literal outputDirectory}.
-     * @throws IOException If the file cannot be copied.
-     */
-    private void copyFile(File outputDirectory,
-                          String filePath,
-                          String targetFileName) throws IOException
+    public static void main(String[] args) throws IOException
     {
-        FileInputStream inputStream = new FileInputStream(new File(group.getRootDir(), filePath));
-        try
-        {
-            copyStream(inputStream, new FileOutputStream(new File(outputDirectory, targetFileName)));
-        }
-        finally
+        if (args.length < 2 || args.length > 3)
         {
-            inputStream.close();
+            printUsage();
         }
-    }
-
-
-    /**
-     * Helper method to copy the contents of a stream to a file.
-     * @param stream The stream to copy.
-     * @param target The target stream to write the stream contents to.
-     * @throws IOException If the stream cannot be copied.
-     */
-    private void copyStream(InputStream stream,
-                            OutputStream target) throws IOException
-    {
-        BufferedInputStream input = new BufferedInputStream(stream);
-        try
+        else
         {
-            BufferedOutputStream output = new BufferedOutputStream(target);
+            InputStream propertiesStream = Publisher.class.getClassLoader().getResourceAsStream("zeitgeist.properties");
             try
             {
-                int i = input.read();
-                while (i != -1)
-                {
-                    output.write(i);
-                    i = input.read();
-                }
-                output.flush();
+                Properties properties = new Properties();
+                properties.load(propertiesStream);
+                System.getProperties().putAll(properties);
+
+                List<URL> feeds = parseFeedList(args[0]);
+
+                long maxAgeHours = Long.parseLong(System.getProperty("zeitgeist.maxArticleAgeHours"));
+                Date cutoffDate = new Date(System.currentTimeMillis() - Math.round(maxAgeHours * 3600000));
+                List<Article> articles = new ArticleFetcher().getArticles(feeds, cutoffDate);
+                List<Topic> topics = new Zeitgeist(articles,
+                                                   Integer.parseInt(System.getProperty("zeitgeist.minArticlesPerTopic")),
+                                                   Integer.parseInt(System.getProperty("zeitgeist.minSourcesPerTopic")),
+                                                   Integer.parseInt(System.getProperty("zeitgeist.minArticleRelevance"))).getTopics();
+                LOG.info(topics.size() + " topics identified.");
+                Publisher publisher = args.length > 2 ? new Publisher(new File(args[2])) : new Publisher();
+                publisher.publish(topics, args[1], feeds.size(), articles.size(), new File("."));
             }
             finally
             {
-                output.close();
+                propertiesStream.close();
             }
         }
-        finally
-        {
-            input.close();
-        }
     }
 
 
-    /**
-     * Entry point for the publisher application.  Takes three arguments - the path to a file containing a list
-     * of feeds, the title to use for the generated output, and the maximum age (in hours) permitted for articles
-     * to be included.
-     */
-    public static void main(String[] args) throws IOException
+    private static List<URL> parseFeedList(String arg) throws IOException
     {
-        if (args.length < 3 || args.length > 4)
-        {
-            printUsage();
-        }
-        else
+        List<URL> feeds = new LinkedList<URL>();
+        BufferedReader feedListReader = new BufferedReader(new FileReader(arg));
+        try
         {
-            BufferedReader feedListReader = new BufferedReader(new FileReader(args[0]));
-            try
+            for (String line = feedListReader.readLine(); line != null; line = feedListReader.readLine())
             {
-                List<URL> feeds = new LinkedList<URL>();
-                for (String line = feedListReader.readLine(); line != null; line = feedListReader.readLine())
+                String url = line.trim();
+                // Lines beginning with a hash are considered to be comments.
+                if (!url.startsWith("#") && !url.isEmpty())
                 {
-                    String url = line.trim();
-                    // Lines beginning with a hash are considered to be comments.
-                    if (!url.startsWith("#") && url.length() > 0)
-                    {
-                        feeds.add(new URL(url));
-                    }
+                    feeds.add(new URL(url));
                 }
-                double maxAgeHours = Double.parseDouble(args[2]);
-                Date cutoffDate = new Date(System.currentTimeMillis() - Math.round(maxAgeHours * 3600000));
-                List<Article> articles = new ArticleFetcher().getArticles(feeds, cutoffDate);
-                List<Topic> topics = new Zeitgeist(articles).getTopics();
-                LOG.info(topics.size() + " topics identified.");
-                Publisher publisher = args.length > 3 ? new Publisher(new File(args[3])) : new Publisher();
-                publisher.publish(topics, args[1], feeds.size(), articles.size(), new File("."));
-            }
-            finally
-            {
-                feedListReader.close();
             }
         }
+        finally
+        {
+            feedListReader.close();
+        }
+        return feeds;
     }
 
 
     private static void printUsage()
     {
-        System.err.println("java -jar zeitgeist-publisher.jar <feedlist> <title> <maxage> [templatedir]");
+        System.err.println("java -jar zeitgeist-publisher.jar <feedlist> <title> [templatedir]");
         System.err.println();
         System.err.println("  <feedlist>    - Path to a file listing RSS/Atom feeds, one per line.");
         System.err.println("  <title>       - A title passed to the templates.");
-        System.err.println("  <maxage>      - The maximum age (in hours) of included articles.");
         System.err.println("  [templatedir] - Path to alternate templates to use in place of the defaults.");
         System.err.println();
         System.err.println("If no template directory is specified, default templates from the classpath are used.");
Original file line number	Diff line number	Diff line change
`@@ -32,21 +32,26 @@`
`32`	`32`	`public class Zeitgeist`
`33`	`33`	`{`
`34`	`34`	`private static final SimpleLogger LOG = new SimpleLogger(FeedDownloadTask.class);`
`35`		`- private static final int MINIMUM_ARTICLES_PER_TOPIC = 4;`
`36`		`- private static final int MINIMUM_SOURCES_PER_TOPIC = 3;`
`37`		`- private static final int MINIMUM_ARTICLES_FOR_KEYWORD = 4; // Ignore obscure words.`
`38`		`- private static final double MINIMUM_ARTICLE_RELEVANCE = 8;`
`39`	`35`
`40`	`36`	`private final List<Article> articles;`
	`37`	`+ private final int minArticlesPerTopic;`
	`38`	`+ private final int minSourcesPerTopic;`
	`39`	`+ private final double minArticleRelevance;`
`41`	`40`
`42`	`41`	`/**`
`43`	`42`	`* Create a Zeitgeist from the specified list of articles. Typically the`
`44`	`43`	`* list of articles is acquired from an {@link ArticleFetcher}.`
`45`	`44`	`* @param articles A list of articles fetched from one or more feeds.`
`46`	`45`	`*/`
`47`		`- public Zeitgeist(List<Article> articles)`
	`46`	`+ public Zeitgeist(List<Article> articles,`
	`47`	`+ int minArticlesPerTopic,`
	`48`	`+ int minSourcesPerTopic,`
	`49`	`+ int minArticleRelevance)`
`48`	`50`	`{`
`49`	`51`	`this.articles = articles;`
	`52`	`+ this.minArticlesPerTopic = minArticlesPerTopic;`
	`53`	`+ this.minSourcesPerTopic = minSourcesPerTopic;`
	`54`	`+ this.minArticleRelevance = minArticleRelevance;`
`50`	`55`	`}`
`51`	`56`
`52`	`57`
`@@ -86,7 +91,7 @@ private List<Topic> extractTopics(List<Article> articles,`
`86`	`91`	`topicIndex = j;`
`87`	`92`	`}`
`88`	`93`	`}`
`89`		`- if (maxWeight >= MINIMUM_ARTICLE_RELEVANCE) // Don't include articles with only tenuous links to the main topic.`
	`94`	`+ if (maxWeight >= minArticleRelevance) // Don't include articles with only tenuous links to the main topic.`
`90`	`95`	`{`
`91`	`96`	`WeightedItem<Article> weightedArticle = new WeightedItem<Article>(maxWeight, articles.get(i));`
`92`	`97`	`int index = Collections.binarySearch(articlesByTopic.get(topicIndex),`
`@@ -105,7 +110,7 @@ private List<Topic> extractTopics(List<Article> articles,`
`105`	`110`	`{`
`106`	`111`	`Topic topic = new Topic(topicArticles);`
`107`	`112`	`int sources = topic.countDistinctSources();`
`108`		`- if (sources >= MINIMUM_SOURCES_PER_TOPIC && topicArticles.size() >= MINIMUM_ARTICLES_PER_TOPIC)`
	`113`	`+ if (sources >= minSourcesPerTopic && topicArticles.size() >= minArticlesPerTopic)`
`109`	`114`	`{`
`110`	`115`	`topics.add(topic);`
`111`	`116`	`}`
`@@ -173,7 +178,7 @@ private List<String> listWords(Map<String, Integer> globalWordCounts)`
`173`	`178`	`for (Map.Entry<String, Integer> entry : globalWordCounts.entrySet())`
`174`	`179`	`{`
`175`	`180`	`// If a word doesn't occur in enough different articles, discard it.`
`176`		`- if (entry.getValue() >= MINIMUM_ARTICLES_FOR_KEYWORD)`
	`181`	`+ if (entry.getValue() >= minArticlesPerTopic)`
`177`	`182`	`{`
`178`	`183`	`words.add(entry.getKey());`
`179`	`184`	`}`