diff --git a/.codeqlmanifest.json b/.codeqlmanifest.json index c467eb1..7a76f3a 100644 --- a/.codeqlmanifest.json +++ b/.codeqlmanifest.json @@ -1,3 +1,9 @@ -{ "provide": [ "ql/src/qlpack.yml", - "extractor/codeql-extractor.yml" ], - "ignore": [ "the-extractor-which-needs-to-be-built" ] } +{ + "provide": [ + "ql/src/qlpack.yml", + "extractor/codeql-extractor.yml" + ], + "ignore": [ + "the-extractor-which-needs-to-be-built" + ] +} diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index a7157a5..c9c5a40 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -80,7 +80,7 @@ jobs: bundle config path vendor/bundle bundle install --jobs 4 --retry 3 - - name: Build & run specs + - name: Build & run Ruby specs env: CODEQL_PATH: "$GITHUB_WORKSPACE/codeql/codeql" working-directory: ./codeql-ruby diff --git a/lib/codeql_ruby/extractor_file.rb b/lib/codeql_ruby/extractor_file.rb index 04e0271..c149c3e 100644 --- a/lib/codeql_ruby/extractor_file.rb +++ b/lib/codeql_ruby/extractor_file.rb @@ -1,4 +1,5 @@ require 'forwardable' +require 'pathname' module CodeqlRuby class ExtractorFile diff --git a/ql/src/Files.qll b/ql/src/Files.qll new file mode 100644 index 0000000..8518b1c --- /dev/null +++ b/ql/src/Files.qll @@ -0,0 +1,203 @@ +/** + * Provides classes for working with files and folders. + * + * Stolen liberally from the Javascript QLL + * https://github.com/github/codeql/blob/813d14791d6bea399bc96fa9b7143603eef6e6c4/javascript/ql/src/semmle/javascript/Files.qll + * + */ + +import ruby + +/** A file or folder. */ +abstract class Container extends @container { + /** + * Gets the absolute, canonical path of this container, using forward slashes + * as path separator. + * + * The path starts with a _root prefix_ followed by zero or more _path + * segments_ separated by forward slashes. + * + * The root prefix is of one of the following forms: + * + * 1. A single forward slash `/` (Unix-style) + * 2. An upper-case drive letter followed by a colon and a forward slash, + * such as `C:/` (Windows-style) + * 3. Two forward slashes, a computer name, and then another forward slash, + * such as `//FileServer/` (UNC-style) + * + * Path segments are never empty (that is, absolute paths never contain two + * contiguous slashes, except as part of a UNC-style root prefix). Also, path + * segments never contain forward slashes, and no path segment is of the + * form `.` (one dot) or `..` (two dots). + * + * Note that an absolute path never ends with a forward slash, except if it is + * a bare root prefix, that is, the path has no path segments. A container + * whose absolute path has no segments is always a `Folder`, not a `File`. + */ + abstract string getAbsolutePath(); + + /** + * Gets a URL representing the location of this container. + * + * For more information see [Providing URLs](https://help.semmle.com/QL/learn-ql/ql/locations.html#providing-urls). + */ + abstract string getURL(); + + /** + * Gets the relative path of this file or folder from the root folder of the + * analyzed source location. The relative path of the root folder itself is + * the empty string. + * + * This has no result if the container is outside the source root, that is, + * if the root folder is not a reflexive, transitive parent of this container. + */ + string getRelativePath() { + exists(string absPath, string pref | + absPath = getAbsolutePath() and sourceLocationPrefix(pref) + | + absPath = pref and result = "" + or + absPath = pref.regexpReplaceAll("/$", "") + "/" + result and + not result.matches("/%") + ) + } + + /** + * Gets the base name of this container including extension, that is, the last + * segment of its absolute path, or the empty string if it has no segments. + * + * Here are some examples of absolute paths and the corresponding base names + * (surrounded with quotes to avoid ambiguity): + * + * + * + * + * + * + * + * + * + *
Absolute pathBase name
"/tmp/tst.js""tst.js"
"C:/Program Files (x86)""Program Files (x86)"
"/"""
"C:/"""
"D:/"""
"//FileServer/"""
+ */ + string getBaseName() { result = getAbsolutePath().regexpCapture(".*/(([^/]*?)(\\.([^.]*))?)", 1) } + + /** + * Gets the extension of this container, that is, the suffix of its base name + * after the last dot character, if any. + * + * In particular, + * + * - if the name does not include a dot, there is no extension, so this + * predicate has no result; + * - if the name ends in a dot, the extension is the empty string; + * - if the name contains multiple dots, the extension follows the last dot. + * + * Here are some examples of absolute paths and the corresponding extensions + * (surrounded with quotes to avoid ambiguity): + * + * + * + * + * + * + * + * + *
Absolute pathExtension
"/tmp/tst.js""js"
"/tmp/.classpath""classpath"
"/bin/bash"not defined
"/tmp/tst2."""
"/tmp/x.tar.gz""gz"
+ */ + string getExtension() { + result = getAbsolutePath().regexpCapture(".*/(([^/]*?)(\\.([^.]*))?)", 4) + } + + /** + * Gets the stem of this container, that is, the prefix of its base name up to + * (but not including) the last dot character if there is one, or the entire + * base name if there is not. + * + * Here are some examples of absolute paths and the corresponding stems + * (surrounded with quotes to avoid ambiguity): + * + * + * + * + * + * + * + * + *
Absolute pathStem
"/tmp/tst.js""tst"
"/tmp/.classpath"""
"/bin/bash""bash"
"/tmp/tst2.""tst2"
"/tmp/x.tar.gz""x.tar"
+ */ + string getStem() { result = getAbsolutePath().regexpCapture(".*/(([^/]*?)(\\.([^.]*))?)", 2) } + + /** Gets the parent container of this file or folder, if any. */ + Container getParentContainer() { containerparent(result, this) } + + /** Gets a file or sub-folder in this container. */ + Container getAChildContainer() { this = result.getParentContainer() } + + /** Gets a file in this container. */ + File getAFile() { result = getAChildContainer() } + + /** Gets the file in this container that has the given `baseName`, if any. */ + File getFile(string baseName) { + result = getAFile() and + result.getBaseName() = baseName + } + + /** Gets a sub-folder in this container. */ + Folder getAFolder() { result = getAChildContainer() } + + /** Gets the sub-folder in this container that has the given `baseName`, if any. */ + Folder getFolder(string baseName) { + result = getAFolder() and + result.getBaseName() = baseName + } + + /** + * Gets a textual representation of the path of this container. + * + * This is the absolute path of the container. + */ + string toString() { result = getAbsolutePath() } +} + +/** A folder. */ +class Folder extends Container, @folder { + override string getAbsolutePath() { folders(this, result, _) } + + /** Gets the file or subfolder in this folder that has the given `name`, if any. */ + Container getChildContainer(string name) { + result = getAChildContainer() and + result.getBaseName() = name + } + + /** Gets the file in this folder that has the given `stem` and `extension`, if any. */ + File getFile(string stem, string extension) { + result = getAChildContainer() and + result.getStem() = stem and + result.getExtension() = extension + } + + /** Gets a subfolder contained in this folder. */ + Folder getASubFolder() { result = getAChildContainer() } + + /** Gets the URL of this folder. */ + override string getURL() { result = "folder://" + getAbsolutePath() } +} + +/** A file. */ +class File extends Container, @file { + override string getAbsolutePath() { files(this, result, _, _, _) } + + /** Gets the number of lines in this file. */ + int getNumberOfLines() { result = sum(int loc | numlines(this, loc, _, _) | loc) } + + /** Gets the number of lines containing code in this file. */ + int getNumberOfLinesOfCode() { result = sum(int loc | numlines(this, _, loc, _) | loc) } + + /** Gets the number of lines containing comments in this file. */ + int getNumberOfLinesOfComments() { result = sum(int loc | numlines(this, _, _, loc) | loc) } + + override string toString() { result = Container.super.toString() } + + /** Gets the URL of this file. */ + override string getURL() { result = "file://" + this.getAbsolutePath() + ":0:0:0:0" } +} diff --git a/ql/src/LeafNode.qll b/ql/src/LeafNode.qll index e3c369c..db967b6 100644 --- a/ql/src/LeafNode.qll +++ b/ql/src/LeafNode.qll @@ -14,8 +14,10 @@ import ruby * 1 * ``` */ -class LeafNode extends @leaf_node { +class LeafNode extends @leaf_node, Locatable { string getText() { leaf_nodes(this, result, _, _) } - string toString() { result = "LeafNode" } + override Location getLocation() { has_location(this, result) } + + override string toString() { result = "LeafNode" } } diff --git a/ql/src/Locations.qll b/ql/src/Locations.qll new file mode 100644 index 0000000..7b64ac5 --- /dev/null +++ b/ql/src/Locations.qll @@ -0,0 +1,110 @@ +/** + * Provides classes for working with locations and program elements that have locations. + * + * Stolen liberally from the Javascript QL library: + * https://github.com/github/codeql/blob/813d14791d6bea399bc96fa9b7143603eef6e6c4/javascript/ql/src/semmle/javascript/Locations.qll + * + */ + +import ruby + +/** + * A location as given by a file, a start line, a start column, + * an end line, and an end column. + * + * For more information about locations see [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html). + */ +class Location extends @location { + /** Gets the file for this location. */ + File getFile() { locations_default(this, result, _, _, _, _) } + + /** Gets the 1-based line number (inclusive) where this location starts. */ + int getStartLine() { locations_default(this, _, result, _, _, _) } + + /** Gets the 1-based column number (inclusive) where this location starts. */ + int getStartColumn() { locations_default(this, _, _, result, _, _) } + + /** Gets the 1-based line number (inclusive) where this location ends. */ + int getEndLine() { locations_default(this, _, _, _, result, _) } + + /** Gets the 1-based column number (inclusive) where this location ends. */ + int getEndColumn() { locations_default(this, _, _, _, _, result) } + + /** Gets the number of lines covered by this location. */ + int getNumLines() { result = getEndLine() - getStartLine() + 1 } + + /** Holds if this location starts before location `that`. */ + pragma[inline] + predicate startsBefore(Location that) { + exists(File f, int sl1, int sc1, int sl2, int sc2 | + locations_default(this, f, sl1, sc1, _, _) and + locations_default(that, f, sl2, sc2, _, _) + | + sl1 < sl2 + or + sl1 = sl2 and sc1 < sc2 + ) + } + + /** Holds if this location ends after location `that`. */ + pragma[inline] + predicate endsAfter(Location that) { + exists(File f, int el1, int ec1, int el2, int ec2 | + locations_default(this, f, _, _, el1, ec1) and + locations_default(that, f, _, _, el2, ec2) + | + el1 > el2 + or + el1 = el2 and ec1 > ec2 + ) + } + + /** + * Holds if this location contains location `that`, meaning that it starts + * before and ends after it. + */ + predicate contains(Location that) { this.startsBefore(that) and this.endsAfter(that) } + + /** Holds if this location is empty. */ + predicate isEmpty() { exists(int l, int c | locations_default(this, _, l, c, l, c - 1)) } + + /** Gets a textual representation of this element. */ + string toString() { result = this.getFile().getBaseName() + ":" + this.getStartLine().toString() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + exists(File f | + locations_default(this, f, startline, startcolumn, endline, endcolumn) and + filepath = f.getAbsolutePath() + ) + } +} + +/** A program element with a location. */ +class Locatable extends @locatable { + /** Gets the file this program element comes from. */ + File getFile() { result = getLocation().getFile() } + + /** Gets this element's location. */ + Location getLocation() { + // overridden by subclasses + none() + } + + /** Gets the number of lines covered by this element. */ + int getNumLines() { result = getLocation().getNumLines() } + + /** Gets a textual representation of this element. */ + string toString() { + // to be overridden by subclasses + none() + } +} diff --git a/ql/src/ruby.qll b/ql/src/ruby.qll index ccb590d..34ae5bf 100644 --- a/ql/src/ruby.qll +++ b/ql/src/ruby.qll @@ -1,2 +1,4 @@ -import LeafNode \ No newline at end of file +import Files +import Locations +import LeafNode diff --git a/spec/codeql_ruby_spec.rb b/spec/codeql_ruby_spec.rb index 88da23c..4333f25 100644 --- a/spec/codeql_ruby_spec.rb +++ b/spec/codeql_ruby_spec.rb @@ -26,4 +26,11 @@ expect(results).to be_a(String) end + + it "extracts Location info from LeafNodes" do + results = CodeqlRunner.results_for_db('leaf_node_location') + tuples = results.dig('#select', 'tuples') + + expect(tuples).to include([{'label'=>'LeafNode'}, 'puts', 'leaf_node_location.rb:1']) + end end diff --git a/spec/leaf_node_location/example.ql b/spec/leaf_node_location/example.ql new file mode 100644 index 0000000..2f7ab02 --- /dev/null +++ b/spec/leaf_node_location/example.ql @@ -0,0 +1,4 @@ +import ruby + +from LeafNode n +select n, n.getText(), n.getLocation().toString() diff --git a/spec/leaf_node_location/leaf_node_location.rb b/spec/leaf_node_location/leaf_node_location.rb new file mode 100644 index 0000000..910b705 --- /dev/null +++ b/spec/leaf_node_location/leaf_node_location.rb @@ -0,0 +1 @@ +puts 'this file contains some leaf nodes' diff --git a/spec/leaf_node_location/qlpack.yml b/spec/leaf_node_location/qlpack.yml new file mode 100644 index 0000000..d18b613 --- /dev/null +++ b/spec/leaf_node_location/qlpack.yml @@ -0,0 +1,3 @@ +name: leaf-node-location-ruby-queries +version: 0.0.0 +libraryPathDependencies: codeql-ruby