diff --git a/src/bc.cc b/src/bc.cc
index 76a22d9..91783a7 100644
--- a/src/bc.cc
+++ b/src/bc.cc
@@ -25,7 +25,7 @@ Author: Scott Beamer
 Will return array of approx betweenness centrality scores for each vertex
 
 This BC implementation makes use of the Brandes [1] algorithm with
-implementation optimizations from Madduri et al. [2]. It is only an approximate
+implementation optimizations from Madduri et al. [2]. It is only approximate
 because it does not compute the paths from every start vertex, but only a small
 subset of them. Additionally, the scores are normalized to the range [0,1].
 
@@ -199,7 +199,7 @@ bool BCVerifier(const Graph &g, SourcePicker<Graph> &sp, NodeID num_iters,
         verts_at_depth[depths[n]].push_back(n);
       }
     }
-    // Going from farthest to clostest, compute "depencies" (deltas)
+    // Going from farthest to closest, compute "dependencies" (deltas)
     pvector<ScoreT> deltas(g.num_nodes(), 0);
     for (int depth=verts_at_depth.size()-1; depth >= 0; depth--) {
       for (NodeID u : verts_at_depth[depth]) {
diff --git a/src/benchmark.h b/src/benchmark.h
index 410ec13..594a6f0 100644
--- a/src/benchmark.h
+++ b/src/benchmark.h
@@ -68,7 +68,7 @@ class SourcePicker {
 };
 
 
-// Returns k pairs with largest values from list of key-value pairs
+// Returns k pairs with the largest values from list of key-value pairs
 template<typename KeyT, typename ValT>
 std::vector<std::pair<ValT, KeyT>> TopK(
     const std::vector<std::pair<KeyT, ValT>> &to_sort, size_t k) {
diff --git a/src/bfs.cc b/src/bfs.cc
index a3aaf14..4223811 100644
--- a/src/bfs.cc
+++ b/src/bfs.cc
@@ -30,7 +30,7 @@ false-sharing for the top-down approach, thread-local QueueBuffer's are used.
 
 To save time computing the number of edges exiting the frontier, this
 implementation precomputes the degrees in bulk at the beginning by storing
-them in parent array as negative numbers. Thus the encoding of parent is:
+them in the parent array as negative numbers. Thus, the encoding of parent is:
   parent[x] < 0 implies x is unvisited and parent[x] = -out_degree(x)
   parent[x] >= 0 implies x been visited
 
diff --git a/src/builder.h b/src/builder.h
index 99d8200..1430d64 100644
--- a/src/builder.h
+++ b/src/builder.h
@@ -26,10 +26,10 @@ GAP Benchmark Suite
 Class:  BuilderBase
 Author: Scott Beamer
 
-Given arguements from the command line (cli), returns a built graph
- - MakeGraph() will parse cli and obtain edgelist and call
-   MakeGraphFromEL(edgelist) to perform actual graph construction
- - edgelist can be from file (reader) or synthetically generated (generator)
+Given arguments from the command line (cli), returns a built graph
+ - MakeGraph() will parse cli and obtain edgelist to call
+   MakeGraphFromEL(edgelist) to perform the actual graph construction
+ - edgelist can be from file (Reader) or synthetically generated (Generator)
  - Common case: BuilderBase typedef'd (w/ params) to be Builder (benchmark.h)
 */
 
@@ -291,7 +291,7 @@ class BuilderBase {
   }
 
   /*
-  Graph Bulding Steps (for CSR):
+  Graph Building Steps (for CSR):
     - Read edgelist once to determine vertex degrees (CountDegrees)
     - Determine vertex offsets by a prefix sum (ParallelPrefixSum)
     - Allocate storage and set points according to offsets (GenIndex)
diff --git a/src/cc.cc b/src/cc.cc
index 8f2d479..2ab7c53 100644
--- a/src/cc.cc
+++ b/src/cc.cc
@@ -13,7 +13,6 @@
 #include "command_line.h"
 #include "graph.h"
 #include "pvector.h"
-#include "timer.h"
 
 
 /*
@@ -120,7 +119,7 @@ pvector<NodeID> Afforest(const Graph &g, bool logging_enabled = false,
   // compression, this value represents the largest intermediate component
   NodeID c = SampleFrequentElement(comp, logging_enabled);
 
-  // Final 'link' phase over remaining edges (excluding largest component)
+  // Final 'link' phase over remaining edges (excluding the largest component)
   if (!g.directed()) {
     #pragma omp parallel for schedule(dynamic, 16384)
     for (NodeID u = 0; u < g.num_nodes(); u++) {
diff --git a/src/cc_sv.cc b/src/cc_sv.cc
index eedef67..c667ac6 100644
--- a/src/cc_sv.cc
+++ b/src/cc_sv.cc
@@ -13,7 +13,6 @@
 #include "command_line.h"
 #include "graph.h"
 #include "pvector.h"
-#include "timer.h"
 
 
 /*
diff --git a/src/converter.cc b/src/converter.cc
index 9615529..549ee4b 100644
--- a/src/converter.cc
+++ b/src/converter.cc
@@ -7,7 +7,6 @@
 #include "builder.h"
 #include "command_line.h"
 #include "graph.h"
-#include "reader.h"
 #include "writer.h"
 
 using namespace std;
diff --git a/src/graph.h b/src/graph.h
index 7af29dc..139fdc9 100644
--- a/src/graph.h
+++ b/src/graph.h
@@ -22,7 +22,7 @@ Author: Scott Beamer
 Simple container for graph in CSR format
  - Intended to be constructed by a Builder
  - To make weighted, set DestID_ template type to NodeWeight
- - MakeInverse parameter controls whether graph stores its inverse
+ - MakeInverse parameter controls whether graph stores incoming edges
 */
 
 
@@ -69,7 +69,7 @@ std::istream& operator>>(std::istream& is, NodeWeight<NodeID_, WeightT_>& nw) {
 
 
 
-// Syntatic sugar for an edge
+// Syntactic sugar for an edge
 template <typename SrcT, typename DstT = SrcT>
 struct EdgePair {
   SrcT u;
diff --git a/src/pr.cc b/src/pr.cc
index 24b9607..7d99900 100644
--- a/src/pr.cc
+++ b/src/pr.cc
@@ -18,10 +18,10 @@ Author: Scott Beamer
 
 Will return pagerank scores for all vertices once total change < epsilon
 
-This PR implementation uses the traditional iterative approach. It perform
+This PR implementation uses the traditional iterative approach. It performs
 updates in the pull direction to remove the need for atomics, and it allows
 new values to be immediately visible (like Gauss-Seidel method). The prior PR
-implemention is still available in src/pr_spmv.cc.
+implementation is still available in src/pr_spmv.cc.
 */
 
 
@@ -68,7 +68,6 @@ void PrintTopScores(const Graph &g, const pvector<ScoreT> &scores) {
   }
   int k = 5;
   vector<pair<ScoreT, NodeID>> top_k = TopK(score_pairs, k);
-  k = min(k, static_cast<int>(top_k.size()));
   for (auto kvp : top_k)
     cout << kvp.second << ":" << kvp.first << endl;
 }
@@ -79,16 +78,16 @@ void PrintTopScores(const Graph &g, const pvector<ScoreT> &scores) {
 bool PRVerifier(const Graph &g, const pvector<ScoreT> &scores,
                         double target_error) {
   const ScoreT base_score = (1.0f - kDamp) / g.num_nodes();
-  pvector<ScoreT> incomming_sums(g.num_nodes(), 0);
+  pvector<ScoreT> incoming_sums(g.num_nodes(), 0);
   double error = 0;
   for (NodeID u : g.vertices()) {
     ScoreT outgoing_contrib = scores[u] / g.out_degree(u);
     for (NodeID v : g.out_neigh(u))
-      incomming_sums[v] += outgoing_contrib;
+      incoming_sums[v] += outgoing_contrib;
   }
   for (NodeID n : g.vertices()) {
-    error += fabs(base_score + kDamp * incomming_sums[n] - scores[n]);
-    incomming_sums[n] = 0;
+    error += fabs(base_score + kDamp * incoming_sums[n] - scores[n]);
+    incoming_sums[n] = 0;
   }
   PrintTime("Total Error", error);
   return error < target_error;
diff --git a/src/pr_spmv.cc b/src/pr_spmv.cc
index a1823ac..50c929d 100644
--- a/src/pr_spmv.cc
+++ b/src/pr_spmv.cc
@@ -21,7 +21,7 @@ Will return pagerank scores for all vertices once total change < epsilon
 
 This legacy PR implementation uses the traditional iterative approach. This is
 done to ease comparisons to other implementations (often use same algorithm),
-but it is not necesarily the fastest way to implement it. It performs each
+but it is not necessarily the fastest way to implement it. It performs each
 iteration as a sparse-matrix vector multiply (SpMV), and values are not visible
 until the next iteration (like Jacobi-style method).
 */
@@ -68,7 +68,6 @@ void PrintTopScores(const Graph &g, const pvector<ScoreT> &scores) {
   }
   int k = 5;
   vector<pair<ScoreT, NodeID>> top_k = TopK(score_pairs, k);
-  k = min(k, static_cast<int>(top_k.size()));
   for (auto kvp : top_k)
     cout << kvp.second << ":" << kvp.first << endl;
 }
@@ -79,16 +78,16 @@ void PrintTopScores(const Graph &g, const pvector<ScoreT> &scores) {
 bool PRVerifier(const Graph &g, const pvector<ScoreT> &scores,
                         double target_error) {
   const ScoreT base_score = (1.0f - kDamp) / g.num_nodes();
-  pvector<ScoreT> incomming_sums(g.num_nodes(), 0);
+  pvector<ScoreT> incoming_sums(g.num_nodes(), 0);
   double error = 0;
   for (NodeID u : g.vertices()) {
     ScoreT outgoing_contrib = scores[u] / g.out_degree(u);
     for (NodeID v : g.out_neigh(u))
-      incomming_sums[v] += outgoing_contrib;
+      incoming_sums[v] += outgoing_contrib;
   }
   for (NodeID n : g.vertices()) {
-    error += fabs(base_score + kDamp * incomming_sums[n] - scores[n]);
-    incomming_sums[n] = 0;
+    error += fabs(base_score + kDamp * incoming_sums[n] - scores[n]);
+    incoming_sums[n] = 0;
   }
   PrintTime("Total Error", error);
   return error < target_error;
diff --git a/src/pvector.h b/src/pvector.h
index f61b54d..2cef617 100644
--- a/src/pvector.h
+++ b/src/pvector.h
@@ -12,8 +12,8 @@ GAP Benchmark Suite
 Class:  pvector
 Author: Scott Beamer
 
-Vector class with ability to not initialize or do initialize in parallel
- - std::vector (when resizing) will always initialize, and does it serially
+Vector class with ability to not initialize or do initialization in parallel
+ - std::vector (when resizing) will always initialize, and does so serially
  - When pvector is resized, new elements are uninitialized
  - Resizing is not thread-safe
 */
diff --git a/src/reader.h b/src/reader.h
index 2ddd686..4544ba3 100644
--- a/src/reader.h
+++ b/src/reader.h
@@ -41,7 +41,7 @@ class Reader {
   std::string GetSuffix() {
     std::size_t suff_pos = filename_.rfind('.');
     if (suff_pos == std::string::npos) {
-      std::cout << "Could't find suffix of " << filename_ << std::endl;
+      std::cout << "Couldn't find suffix of " << filename_ << std::endl;
       std::exit(-1);
     }
     return filename_.substr(suff_pos);
diff --git a/src/sssp.cc b/src/sssp.cc
index 8cb36c0..08cfbc8 100644
--- a/src/sssp.cc
+++ b/src/sssp.cc
@@ -29,7 +29,7 @@ delta parameter (-d) should be set for each input graph. This implementation
 incorporates a new bucket fusion optimization [2] that significantly reduces
 the number of iterations (& barriers) needed.
 
-The bins of width delta are actually all thread-local and of type std::vector
+The bins of width delta are actually all thread-local and of type std::vector,
 so they can grow but are otherwise capacity-proportional. Each iteration is
 done in two phases separated by barriers. In the first phase, the current
 shared bin is processed by all threads. As they find vertices whose distance
@@ -39,7 +39,7 @@ non-empty bin). In the next phase, each thread copies its selected
 thread-local bin into the shared bin.
 
 Once a vertex is added to a bin, it is not removed, even if its distance is
-later updated and it now appears in a lower bin. We find ignoring vertices if
+later updated and, it now appears in a lower bin. We find ignoring vertices if
 their distance is less than the min distance for the current bin removes
 enough redundant work to be faster than removing the vertex from older bins.
 
diff --git a/src/tc.cc b/src/tc.cc
index 0c87df4..228ffd1 100644
--- a/src/tc.cc
+++ b/src/tc.cc
@@ -25,9 +25,9 @@ Author: Scott Beamer
 
 Will count the number of triangles (cliques of size 3)
 
-Requires input graph:
-  - to be undirected
-  - no duplicate edges (or else will be counted as multiple triangles)
+Input graph requirements:
+  - undirected
+  - has no duplicate edges (or else will be counted as multiple triangles)
   - neighborhoods are sorted by vertex identifiers
 
 Other than symmetrizing, the rest of the requirements are done by SquishCSR
@@ -38,12 +38,12 @@ once. A naive implementation will count the same triangle six times because
 each of the three vertices (u, v, w) will count it in both ways. To count
 a triangle only once, this implementation only counts a triangle if u > v > w.
 Once the remaining unexamined neighbors identifiers get too big, it can break
-out of the loop, but this requires that the neighbors to be sorted.
+out of the loop, but this requires that the neighbors are sorted.
 
-Another optimization this implementation has is to relabel the vertices by
-degree. This is beneficial if the average degree is high enough and if the
-degree distribution is sufficiently non-uniform. To decide whether or not
-to relabel the graph, we use the heuristic in WorthRelabelling.
+This implementation relabels the vertices by degree. This optimization is
+beneficial if the average degree is sufficiently high and if the degree
+distribution is sufficiently non-uniform. To decide whether to relabel the
+graph, we use the heuristic in WorthRelabelling.
 */
 
 
@@ -71,7 +71,7 @@ size_t OrderedCount(const Graph &g) {
 }
 
 
-// heuristic to see if sufficently dense power-law graph
+// Heuristic to see if sufficiently dense power-law graph
 bool WorthRelabelling(const Graph &g) {
   int64_t average_degree = g.num_edges() / g.num_nodes();
   if (average_degree < 10)
@@ -91,7 +91,7 @@ bool WorthRelabelling(const Graph &g) {
 }
 
 
-// uses heuristic to see if worth relabeling
+// Uses heuristic to see if worth relabeling
 size_t Hybrid(const Graph &g) {
   if (WorthRelabelling(g))
     return OrderedCount(Builder::RelabelByDegree(g));