Skip to content

Commit

Permalink
Add --convert.crlf option for text files
Browse files Browse the repository at this point in the history
Conver CRLF to just LF
  • Loading branch information
rcowham committed May 10, 2023
1 parent 3cd66a3 commit 678db95
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 6 deletions.
18 changes: 15 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ type GitParserOptions struct {
dryRun bool
dummyArchives bool
caseInsensitive bool // If true then create case insensitive checkpoint for Linux and lowercase archive files
convertCRLF bool // If true then convert CRLF to just LF
graphFile string
maxCommits int
debugCommit int // For debug breakpoint
Expand Down Expand Up @@ -524,7 +525,8 @@ func getOID(dataref string) (int, error) {
// Later the file will be moved to the required depot location
// Uses a provided pool to get concurrency
// Allow for dummy data to be saved (used to speed up large conversions to check structure)
func (b *GitBlob) SaveBlob(pool *pond.WorkerPool, archiveRoot string, dummyFlag bool, matcher *BlobFileMatcher) error {
func (b *GitBlob) SaveBlob(pool *pond.WorkerPool, archiveRoot string, dummyFlag bool, convertCRLF bool,
matcher *BlobFileMatcher) error {
if b.blob == nil || !b.hasData {
matcher.logger.Debugf("NoBlobToSave")
return nil
Expand All @@ -537,7 +539,12 @@ func (b *GitBlob) SaveBlob(pool *pond.WorkerPool, archiveRoot string, dummyFlag
if b.compressed {
fname := path.Join(rootDir, fmt.Sprintf("%s.gz", b.blobFileName))
matcher.logger.Debugf("SavingBlobCompressed: %s", fname)
data := b.blob.Data
var data string
if convertCRLF && (b.fileType == journal.CText || b.fileType == journal.UText) {
data = strings.ReplaceAll(b.blob.Data, "\r\n", "\n")
} else {
data = b.blob.Data
}
if dummyFlag {
data = fmt.Sprintf("%d", b.blob.Mark)
}
Expand Down Expand Up @@ -1717,7 +1724,7 @@ func (g *GitP4Transfer) GitParse(pool *pond.WorkerPool) chan GitCommit {
b := newGitBlob(&blob)
g.blobFileMatcher.addBlob(b)
commitSize += len(blob.Data)
b.SaveBlob(pool, g.opts.archiveRoot, g.opts.dummyArchives, g.blobFileMatcher)
b.SaveBlob(pool, g.opts.archiveRoot, g.opts.dummyArchives, g.opts.convertCRLF, g.blobFileMatcher)

case libfastimport.CmdReset:
reset := cmd.(libfastimport.CmdReset)
Expand Down Expand Up @@ -1866,6 +1873,10 @@ func main() {
"case.insensitive",
"Create checkpoint case-insensitive mode (for Linux) and lowercase archive files. If not set, then OS default applies.",
).Bool()
convertCRLF = kingpin.Flag(
"convert.crlf",
"Convert CRLF in text files to just LF.",
).Bool()
dummyArchives = kingpin.Flag(
"dummy",
"Create dummy (small) archive files - for quick analysis of large repos.",
Expand Down Expand Up @@ -1946,6 +1957,7 @@ func main() {
dryRun: *dryrun,
dummyArchives: *dummyArchives,
caseInsensitive: *caseInsensitive,
convertCRLF: *convertCRLF,
maxCommits: *maxCommits,
graphFile: *outputGraph,
debugCommit: *debugCommit,
Expand Down
79 changes: 79 additions & 0 deletions main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1123,6 +1123,85 @@ func TestAddBinary(t *testing.T) {
assert.Regexp(t, `(?m)lbrPath .*/1.2$`, result)
}

func TestAddCRLF1(t *testing.T) {
// Test where CRLF should be converted to LF - but only for text files!
logger := createLogger()
logger.Debugf("======== Test: %s", t.Name())

d := createGitRepo(t)
os.Chdir(d)
logger.Debugf("Git repo: %s", d)

src := "src.txt"
file1 := "file1.txt"
srcContents1 := "contents\n"
contents2 := "contents1\r\ncontents2\r\ncontents3\n"
writeToFile(src, srcContents1)
writeToFile(file1, contents2)
runCmd("gzip " + src)
runCmd("git add .")
runCmd("git commit -m initial")

runTransfer(t, logger)

result, err := runCmd("p4 files //...")
assert.Equal(t, nil, err)
assert.Equal(t, `//import/main/file1.txt#1 - add change 3 (text+C)
//import/main/src.txt.gz#1 - add change 3 (binary+F)
`, result)

result, err = runCmd("p4 verify -qu //...")
assert.Equal(t, "<nil>", fmt.Sprint(err))
assert.Equal(t, "", result)

result, err = runCmd("p4 fstat -Ob //import/main/src.txt.gz#1")
assert.Equal(t, nil, err)
assert.Regexp(t, `headType binary\+F`, result)
assert.Regexp(t, `lbrType binary\+F`, result)
assert.Regexp(t, `(?m)lbrPath .*/1.3$`, result)

result, err = runCmd("p4 print -q //import/main/file1.txt#1")
assert.Equal(t, nil, err)
assert.Equal(t, contents2, result)
}

func TestAddCRLF2(t *testing.T) {
// Test where CRLF should be converted to LF - but only for text files!
logger := createLogger()
logger.Debugf("======== Test: %s", t.Name())

d := createGitRepo(t)
os.Chdir(d)
logger.Debugf("Git repo: %s", d)

src := "src.txt"
file1 := "file1.txt"
srcContents1 := "contents\n"
contents2 := "contents1\r\ncontents2\r\ncontents3\n"
writeToFile(src, srcContents1)
writeToFile(file1, contents2)
runCmd("gzip " + src)
runCmd("git add .")
runCmd("git commit -m initial")

opts := &GitParserOptions{config: &config.Config{ImportDepot: "import", DefaultBranch: "main"}, convertCRLF: true}
runTransferOpts(t, logger, opts)

result, err := runCmd("p4 files //...")
assert.Equal(t, nil, err)
assert.Equal(t, `//import/main/file1.txt#1 - add change 3 (text+C)
//import/main/src.txt.gz#1 - add change 3 (binary+F)
`, result)

result, err = runCmd("p4 verify -qu //...")
assert.Equal(t, "<nil>", fmt.Sprint(err))
assert.Equal(t, "", result)

result, err = runCmd("p4 print -q //import/main/file1.txt#1")
assert.Equal(t, nil, err)
assert.Equal(t, strings.ReplaceAll(contents2, "\r\n", "\n"), result)
}

func TestAddEmpty(t *testing.T) {
logger := createLogger()
logger.Debugf("======== Test: %s", t.Name())
Expand Down
14 changes: 11 additions & 3 deletions run_conversion.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,16 @@ function usage

echo "USAGE for run_conversion.sh:
run_conversion.sh <git_fast_export> [-p <P4Root>] [-d] [-dummy] [-insensitive] [-depot <import depot>] [-graph <graphFile.dot>] [-m <max commits>] [-t <parallel threads>]
run_conversion.sh <git_fast_export> [-p <P4Root>] [-d] [-dummy] [-crlf] [-insensitive]
[-depot <import depot>] [-graph <graphFile.dot>] [-m <max commits>] [-t <parallel threads>]
or
run_conversion.sh -h
-d Debug
-depot Depot to use for this import (default is 'import')
-crlf Cionvert CRLF to just LF for text files
-dummy Create dummy archives as placeholders (no real content) - much faster
-graph Create Graphviz output showing commit structure
-insensitive Specify case insensitive checkpoint (and lowercase archive files) - for Linux servers
Expand All @@ -55,6 +57,7 @@ declare -i shiftArgs=0
declare -i Debug=0
declare -i Dummy=0
declare -i CaseInsensitive=0
declare -i ConvertCRLF=0
declare -i MaxCommits=0
declare -i ParallelThreads=0
declare ConfigFile=""
Expand All @@ -69,6 +72,7 @@ while [[ $# -gt 0 ]]; do
(-h) usage -h && exit 1;;
# (-man) usage -man;;
(-c) ConfigFile=$2; shiftArgs=1;;
(-crlf) ConvertCRLF=1;;
(-p) P4Root=$2; shiftArgs=1;;
(-d) Debug=1;;
(-depot) ImportDepot=$2; shiftArgs=1;;
Expand Down Expand Up @@ -120,6 +124,10 @@ if [[ $CaseInsensitive -ne 0 ]]; then
CaseInsensitiveFlag="--case.insensitive"
P4DCaseFlag="-C1"
fi
CRLFFlag=""
if [[ $ConvertCRLF -ne 0 ]]; then
CRLFFlag="--convert.crlf"
fi
GraphArgs=""
if [[ ! -z $GraphFile ]]; then
GraphArgs="--graphfile=$GraphFile"
Expand All @@ -129,8 +137,8 @@ if [[ ! -z $ConfigFile ]]; then
ConfigArgs="--config=$ConfigFile"
fi

echo gitp4transfer --archive.root="$P4Root" $ConfigArgs $DebugFlag $DummyFlag $CaseInsensitiveFlag $MaxCommitArgs $ParallelThreadArgs $GraphArgs --import.depot="$ImportDepot" --journal="$P4Root/jnl.0" "$GitFile"
gitp4transfer --archive.root="$P4Root" $ConfigArgs $DebugFlag $DummyFlag $CaseInsensitiveFlag $MaxCommitArgs $ParallelThreadArgs $GraphArgs --import.depot="$ImportDepot" --journal="$P4Root/jnl.0" "$GitFile"
echo gitp4transfer --archive.root="$P4Root" $ConfigArgs $DebugFlag $DummyFlag $CaseInsensitiveFlag $CRLFFlag $MaxCommitArgs $ParallelThreadArgs $GraphArgs --import.depot="$ImportDepot" --journal="$P4Root/jnl.0" "$GitFile"
gitp4transfer --archive.root="$P4Root" $ConfigArgs $DebugFlag $DummyFlag $CaseInsensitiveFlag $CRLFFlag $MaxCommitArgs $ParallelThreadArgs $GraphArgs --import.depot="$ImportDepot" --journal="$P4Root/jnl.0" "$GitFile"

if [[ $? -ne 0 ]]; then
echo "Server is in directory:"
Expand Down

0 comments on commit 678db95

Please sign in to comment.