Skip to content

Commit

Permalink
merge reg change
Browse files Browse the repository at this point in the history
  • Loading branch information
TalbenXu committed Dec 20, 2024
1 parent 91f453b commit e783596
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 58 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/github-action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ jobs:
lcov --remove coverage.info '/usr/*' --output-file coverage.info
lcov --remove coverage.info '${{github.workspace}}/z3.obj/*' --output-file coverage.info
lcov --remove coverage.info '${{github.workspace}}/llvm-*.obj/*' --output-file coverage.info
lcov --remove coverage.info '${{github.workspace}}/svf/include/FastCluster/*' --output-file coverage.info
lcov --remove coverage.info '${{github.workspace}}/svf/include/FastCluster/*' --output-file coverage.info --ignore-errors unused
lcov --remove coverage.info '${{github.workspace}}/svf/lib/FastCluster/*' --output-file coverage.info
- name: upload-coverage
Expand Down
4 changes: 3 additions & 1 deletion svf/include/CFL/CFGNormalizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@ class CFGNormalizer

private:
/// Add nonterminal to tranfer long rules to binary rules
void ebnf_bin(CFGrammar *grammar);
void

ebnf_bin(CFGrammar *grammar);

void ebnfSignReplace(char sign, CFGrammar *grammar);

Expand Down
5 changes: 2 additions & 3 deletions svf/include/Graphs/CFLGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
#include <fstream>
#include <iostream>
#include <string>
#include <regex>
#include "CFL/CFGrammar.h"
#include "Graphs/GenericGraph.h"
#include "Graphs/ConsG.h"
Expand All @@ -50,7 +49,7 @@ class CFLEdge: public GenericCFLEdgeTy
typedef GenericNode<CFLNode, CFLEdge>::GEdgeSetTy CFLEdgeSetTy;

CFLEdge(CFLNode *s, CFLNode *d, GEdgeFlag k = 0):
GenericCFLEdgeTy(s,d,k)
GenericCFLEdgeTy(s,d,k)
{
}
~CFLEdge() override = default;
Expand All @@ -77,7 +76,7 @@ class CFLNode: public GenericCFLNodeTy
{
public:
CFLNode (NodeID i = 0, GNodeK k = CFLNodeKd):
GenericCFLNodeTy(i, k)
GenericCFLNodeTy(i, k)
{
}

Expand Down
81 changes: 55 additions & 26 deletions svf/lib/CFL/CFLGraphBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,49 +193,79 @@ CFLGraph* CFLGraphBuilder::buildFromText(std::string fileName, GrammarBase *gram
return cflGraph;
}

CFLGraph * CFLGraphBuilder::buildFromDot(std::string fileName, GrammarBase *grammar, BuildDirection direction)
CFLGraph *CFLGraphBuilder::buildFromDot(std::string fileName, GrammarBase *grammar, BuildDirection direction)
{
buildlabelToKindMap(grammar);
cflGraph = new CFLGraph(grammar->getStartKind());
std::string lineString;
std::ifstream inputFile(fileName);
std::cout << "Building CFL Graph from dot file: " << fileName << "..\n";
std::regex reg("Node(\\w+)\\s*->\\s*Node(\\w+)\\s*\\[.*label=(.*)\\]");
std::cout << std::boolalpha;
u32_t lineNum = 0 ;

u32_t lineNum = 0;
current = labelToKindMap.size();

while (getline(inputFile, lineString))
{
lineNum += 1;
std::smatch matches;
if (std::regex_search(lineString, matches, reg))

// Find "Node" prefixes and "->"
size_t srcStart = lineString.find("Node");
if (srcStart == std::string::npos) continue;

size_t srcEnd = lineString.find(" ", srcStart);
if (srcEnd == std::string::npos) continue;

size_t arrowPos = lineString.find("->", srcEnd);
if (arrowPos == std::string::npos) continue;

size_t dstStart = lineString.find("Node", arrowPos);
if (dstStart == std::string::npos) continue;

size_t dstEnd = lineString.find(" ", dstStart);
if (dstEnd == std::string::npos) continue;

size_t labelStart = lineString.find("label=", dstEnd);
if (labelStart == std::string::npos) continue;

labelStart += 6; // Move past "label=" to the start of the label
size_t labelEnd = lineString.find_first_of("]", labelStart);
if (labelEnd == std::string::npos) continue;

// Extract the source ID, destination ID, and label
std::string srcIDStr = lineString.substr(srcStart + 4, srcEnd - (srcStart + 4));
std::string dstIDStr = lineString.substr(dstStart + 4, dstEnd - (dstStart + 4));
std::string label = lineString.substr(labelStart, labelEnd - labelStart);

// Convert source and destination IDs from hexadecimal
u32_t srcID = std::stoul(srcIDStr, nullptr, 16);
u32_t dstID = std::stoul(dstIDStr, nullptr, 16);

CFLNode *src = addGNode(srcID);
CFLNode *dst = addGNode(dstID);

if (labelToKindMap.find(label) != labelToKindMap.end())
{
cflGraph->addCFLEdge(src, dst, labelToKindMap[label]);
}
else
{
u32_t srcID = std::stoul(matches.str(1), nullptr, 16);
u32_t dstID = std::stoul(matches.str(2), nullptr, 16);
std::string label = matches.str(3);
CFLNode *src = addGNode(srcID);
CFLNode *dst = addGNode(dstID);
if (labelToKindMap.find(label) != labelToKindMap.end())
if (Options::FlexSymMap() == true)
{
labelToKindMap.insert({label, current++});
cflGraph->addCFLEdge(src, dst, labelToKindMap[label]);
}
else
{
if(Options::FlexSymMap() == true)
{
labelToKindMap.insert({label, current++});
cflGraph->addCFLEdge(src, dst, labelToKindMap[label]);
}
else
{
std::string msg = "In line " + std::to_string(lineNum) +
" sym can not find in grammar, please correct the input dot or set --flexsymmap.";
SVFUtil::errMsg(msg);
std::cout << msg;
abort();
}
std::string msg = "In line " + std::to_string(lineNum) +
" sym cannot be found in grammar. Please correct the input dot or set --flexsymmap.";
SVFUtil::errMsg(msg);
std::cout << msg;
abort();
}
}
}

inputFile.close();
return cflGraph;
}
Expand All @@ -247,7 +277,6 @@ CFLGraph* CFLGraphBuilder::buildFromJson(std::string fileName, GrammarBase *gram
return cflGraph;
}


CFLGraph* AliasCFLGraphBuilder::buildBigraph(ConstraintGraph *graph, Kind startKind, GrammarBase *grammar)
{
cflGraph = new CFLGraph(startKind);
Expand Down Expand Up @@ -543,4 +572,4 @@ CFLGraph* VFCFLGraphBuilder::buildBiPEGgraph(ConstraintGraph *graph, Kind startK
}


} // end of SVF namespace
} // end of SVF namespace
98 changes: 71 additions & 27 deletions svf/lib/CFL/GrammarBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@

#include <string>
#include <fstream>
#include <regex>
#include <sstream>
#include <iostream>
#include "CFL/GrammarBuilder.h"
Expand All @@ -44,48 +43,69 @@ const inline std::string GrammarBuilder::parseProductionsString() const
std::cerr << "Can't open CFL grammar file `" << fileName << "`" << std::endl;
abort();
}

std::string lineString;
std::string lines = "";
std::string startString;
std::string symbolString;
const std::string WHITESPACE = " \n\r\t\f\v";
int lineNum = 0;

while (getline(textFile, lineString))
{
if(lineNum == 1)
if (lineNum == 1)
{
startString = stripSpace(lineString);
}
if(lineNum == 3)
else if (lineNum == 3)
{
// Trim leading and trailing whitespace
size_t start = lineString.find_first_not_of(WHITESPACE);
size_t end = lineString.find_last_not_of(WHITESPACE);
if (start != std::string::npos && end != std::string::npos)
{
symbolString = lineString.substr(start, end - start + 1);
}
}

// Append line to `lines` with whitespace trimmed
size_t start = lineString.find_first_not_of(WHITESPACE);
size_t end = lineString.find_last_not_of(WHITESPACE);
if (start != std::string::npos && end != std::string::npos)
{
symbolString = lineString.substr(lineString.find_first_not_of(WHITESPACE), lineString.find_last_not_of(WHITESPACE)+1);
lines.append(lineString.substr(start, end - start + 1));
}

lines.append(lineString.substr(lineString.find_first_not_of(WHITESPACE), lineString.find_last_not_of(WHITESPACE)+1));
lineNum++;
}

std::regex reg("Start:([\\s\\S]*)Terminal:(.*)Productions:([\\s\\S]*)");
std::smatch matches;
if (std::regex_search(lines, matches, reg))
// Extract "Productions:" part from `lines` manually
size_t productionsPos = lines.find("Productions:");
if (productionsPos != std::string::npos)
{
lines = matches.str(3);
lines = lines.substr(productionsPos + std::string("Productions:").length());
}

// Parse `symbolString` to insert symbols
std::string sString;
size_t pos = 0;
while ((pos = symbolString.find(" ")) != std::string::npos)
{
sString = stripSpace(symbolString.substr(0, pos));
symbolString.erase(0, pos + 1); //Capital is Nonterminal, Otherwise is terminal
symbolString.erase(0, pos + 1); // Remove the processed part
grammar->insertSymbol(sString);
}
// Insert the remaining symbol
grammar->insertSymbol(symbolString);

// Set the start kind and add the epsilon terminal
grammar->setStartKind(grammar->insertSymbol(startString));
grammar->insertTerminalKind("epsilon");

return lines;
}


const inline std::vector<std::string> GrammarBuilder::loadWordProductions() const
{
size_t pos = 0;
Expand All @@ -104,17 +124,20 @@ const inline std::vector<std::string> GrammarBuilder::loadWordProductions() cons

const inline std::string GrammarBuilder::stripSpace(std::string s) const
{
std::smatch matches;
std::regex stripReg("\\s*(\\S*)\\s*");
std::regex_search(s, matches, stripReg);
return matches.str(1);
}
// Remove leading spaces
size_t start = s.find_first_not_of(" ");
if (start == std::string::npos) {
return ""; // Return an empty string if no non-space character is found
}

// Remove trailing spaces
size_t end = s.find_last_not_of(" ");
return s.substr(start, end - start + 1);
}

/// build grammarbase from textfile
/// Build grammarbase from textfile
GrammarBase* GrammarBuilder::build() const
{
std::smatch matches;
std::string delimiter = " ";
std::string delimiter1 = "->";
std::string word = "";
Expand All @@ -124,29 +147,50 @@ GrammarBase* GrammarBuilder::build() const

for (auto wordProd : wordProdVec)
{
// Find the position of the '->' delimiter
if ((pos = wordProd.find(delimiter1)) != std::string::npos)
{
// Extract and strip RHS (right-hand side) and LHS (left-hand side)
std::string RHS = stripSpace(wordProd.substr(0, pos));
std::string LHS = wordProd.substr(pos + delimiter1.size(), wordProd.size() - 1);
std::string LHS = stripSpace(wordProd.substr(pos + delimiter1.size()));

// Insert RHS symbol into grammar
GrammarBase::Symbol RHSSymbol = grammar->insertSymbol(RHS);
prod.push_back(RHSSymbol);
if (grammar->getRawProductions().find(RHSSymbol) == grammar->getRawProductions().end()) grammar->getRawProductions().insert({RHSSymbol, {}});
std::regex LHSRegEx("\\s*(.*)");
std::regex_search(LHS, matches, LHSRegEx);
LHS = matches.str(1);

// Ensure RHS symbol exists in raw productions
if (grammar->getRawProductions().find(RHSSymbol) == grammar->getRawProductions().end())
{
grammar->getRawProductions().insert({RHSSymbol, {}});
}

// Parse LHS string into symbols
while ((pos = LHS.find(delimiter)) != std::string::npos)
{
word = LHS.substr(0, pos);
LHS.erase(0, pos + delimiter.length()); //Capital is Nonterminal, Otherwise is terminal
// Extract each word before the space
word = stripSpace(LHS.substr(0, pos));
LHS.erase(0, pos + delimiter.length());

// Insert symbol into production
prod.push_back(grammar->insertSymbol(word));
}
prod.push_back(grammar->insertSymbol(LHS));

// Insert the remaining word (if any) into the production
if (!LHS.empty())
{
prod.push_back(grammar->insertSymbol(stripSpace(LHS)));
}

// Add the production to raw productions
grammar->getRawProductions().at(RHSSymbol).insert(prod);

// Clear the production for the next iteration
prod = {};
}
}

return grammar;
};

}


}

0 comments on commit e783596

Please sign in to comment.