From 678db95e057e0986011373ceb225eb2c111f7b1b Mon Sep 17 00:00:00 2001 From: Robert Cowham Date: Wed, 10 May 2023 14:14:51 +0100 Subject: [PATCH] Add --convert.crlf option for text files Conver CRLF to just LF --- main.go | 18 +++++++++-- main_test.go | 79 +++++++++++++++++++++++++++++++++++++++++++++++ run_conversion.sh | 14 +++++++-- 3 files changed, 105 insertions(+), 6 deletions(-) diff --git a/main.go b/main.go index 1d30c99..7b419e3 100644 --- a/main.go +++ b/main.go @@ -81,6 +81,7 @@ type GitParserOptions struct { dryRun bool dummyArchives bool caseInsensitive bool // If true then create case insensitive checkpoint for Linux and lowercase archive files + convertCRLF bool // If true then convert CRLF to just LF graphFile string maxCommits int debugCommit int // For debug breakpoint @@ -524,7 +525,8 @@ func getOID(dataref string) (int, error) { // Later the file will be moved to the required depot location // Uses a provided pool to get concurrency // Allow for dummy data to be saved (used to speed up large conversions to check structure) -func (b *GitBlob) SaveBlob(pool *pond.WorkerPool, archiveRoot string, dummyFlag bool, matcher *BlobFileMatcher) error { +func (b *GitBlob) SaveBlob(pool *pond.WorkerPool, archiveRoot string, dummyFlag bool, convertCRLF bool, + matcher *BlobFileMatcher) error { if b.blob == nil || !b.hasData { matcher.logger.Debugf("NoBlobToSave") return nil @@ -537,7 +539,12 @@ func (b *GitBlob) SaveBlob(pool *pond.WorkerPool, archiveRoot string, dummyFlag if b.compressed { fname := path.Join(rootDir, fmt.Sprintf("%s.gz", b.blobFileName)) matcher.logger.Debugf("SavingBlobCompressed: %s", fname) - data := b.blob.Data + var data string + if convertCRLF && (b.fileType == journal.CText || b.fileType == journal.UText) { + data = strings.ReplaceAll(b.blob.Data, "\r\n", "\n") + } else { + data = b.blob.Data + } if dummyFlag { data = fmt.Sprintf("%d", b.blob.Mark) } @@ -1717,7 +1724,7 @@ func (g *GitP4Transfer) GitParse(pool *pond.WorkerPool) chan GitCommit { b := newGitBlob(&blob) g.blobFileMatcher.addBlob(b) commitSize += len(blob.Data) - b.SaveBlob(pool, g.opts.archiveRoot, g.opts.dummyArchives, g.blobFileMatcher) + b.SaveBlob(pool, g.opts.archiveRoot, g.opts.dummyArchives, g.opts.convertCRLF, g.blobFileMatcher) case libfastimport.CmdReset: reset := cmd.(libfastimport.CmdReset) @@ -1866,6 +1873,10 @@ func main() { "case.insensitive", "Create checkpoint case-insensitive mode (for Linux) and lowercase archive files. If not set, then OS default applies.", ).Bool() + convertCRLF = kingpin.Flag( + "convert.crlf", + "Convert CRLF in text files to just LF.", + ).Bool() dummyArchives = kingpin.Flag( "dummy", "Create dummy (small) archive files - for quick analysis of large repos.", @@ -1946,6 +1957,7 @@ func main() { dryRun: *dryrun, dummyArchives: *dummyArchives, caseInsensitive: *caseInsensitive, + convertCRLF: *convertCRLF, maxCommits: *maxCommits, graphFile: *outputGraph, debugCommit: *debugCommit, diff --git a/main_test.go b/main_test.go index 5a0ba10..5cdbfec 100644 --- a/main_test.go +++ b/main_test.go @@ -1123,6 +1123,85 @@ func TestAddBinary(t *testing.T) { assert.Regexp(t, `(?m)lbrPath .*/1.2$`, result) } +func TestAddCRLF1(t *testing.T) { + // Test where CRLF should be converted to LF - but only for text files! + logger := createLogger() + logger.Debugf("======== Test: %s", t.Name()) + + d := createGitRepo(t) + os.Chdir(d) + logger.Debugf("Git repo: %s", d) + + src := "src.txt" + file1 := "file1.txt" + srcContents1 := "contents\n" + contents2 := "contents1\r\ncontents2\r\ncontents3\n" + writeToFile(src, srcContents1) + writeToFile(file1, contents2) + runCmd("gzip " + src) + runCmd("git add .") + runCmd("git commit -m initial") + + runTransfer(t, logger) + + result, err := runCmd("p4 files //...") + assert.Equal(t, nil, err) + assert.Equal(t, `//import/main/file1.txt#1 - add change 3 (text+C) +//import/main/src.txt.gz#1 - add change 3 (binary+F) +`, result) + + result, err = runCmd("p4 verify -qu //...") + assert.Equal(t, "", fmt.Sprint(err)) + assert.Equal(t, "", result) + + result, err = runCmd("p4 fstat -Ob //import/main/src.txt.gz#1") + assert.Equal(t, nil, err) + assert.Regexp(t, `headType binary\+F`, result) + assert.Regexp(t, `lbrType binary\+F`, result) + assert.Regexp(t, `(?m)lbrPath .*/1.3$`, result) + + result, err = runCmd("p4 print -q //import/main/file1.txt#1") + assert.Equal(t, nil, err) + assert.Equal(t, contents2, result) +} + +func TestAddCRLF2(t *testing.T) { + // Test where CRLF should be converted to LF - but only for text files! + logger := createLogger() + logger.Debugf("======== Test: %s", t.Name()) + + d := createGitRepo(t) + os.Chdir(d) + logger.Debugf("Git repo: %s", d) + + src := "src.txt" + file1 := "file1.txt" + srcContents1 := "contents\n" + contents2 := "contents1\r\ncontents2\r\ncontents3\n" + writeToFile(src, srcContents1) + writeToFile(file1, contents2) + runCmd("gzip " + src) + runCmd("git add .") + runCmd("git commit -m initial") + + opts := &GitParserOptions{config: &config.Config{ImportDepot: "import", DefaultBranch: "main"}, convertCRLF: true} + runTransferOpts(t, logger, opts) + + result, err := runCmd("p4 files //...") + assert.Equal(t, nil, err) + assert.Equal(t, `//import/main/file1.txt#1 - add change 3 (text+C) +//import/main/src.txt.gz#1 - add change 3 (binary+F) +`, result) + + result, err = runCmd("p4 verify -qu //...") + assert.Equal(t, "", fmt.Sprint(err)) + assert.Equal(t, "", result) + + result, err = runCmd("p4 print -q //import/main/file1.txt#1") + assert.Equal(t, nil, err) + assert.Equal(t, strings.ReplaceAll(contents2, "\r\n", "\n"), result) +} + func TestAddEmpty(t *testing.T) { logger := createLogger() logger.Debugf("======== Test: %s", t.Name()) diff --git a/run_conversion.sh b/run_conversion.sh index 0ed0712..47a019a 100755 --- a/run_conversion.sh +++ b/run_conversion.sh @@ -25,7 +25,8 @@ function usage echo "USAGE for run_conversion.sh: -run_conversion.sh [-p ] [-d] [-dummy] [-insensitive] [-depot ] [-graph ] [-m ] [-t ] +run_conversion.sh [-p ] [-d] [-dummy] [-crlf] [-insensitive] + [-depot ] [-graph ] [-m ] [-t ] or @@ -33,6 +34,7 @@ run_conversion.sh -h -d Debug -depot Depot to use for this import (default is 'import') + -crlf Cionvert CRLF to just LF for text files -dummy Create dummy archives as placeholders (no real content) - much faster -graph Create Graphviz output showing commit structure -insensitive Specify case insensitive checkpoint (and lowercase archive files) - for Linux servers @@ -55,6 +57,7 @@ declare -i shiftArgs=0 declare -i Debug=0 declare -i Dummy=0 declare -i CaseInsensitive=0 +declare -i ConvertCRLF=0 declare -i MaxCommits=0 declare -i ParallelThreads=0 declare ConfigFile="" @@ -69,6 +72,7 @@ while [[ $# -gt 0 ]]; do (-h) usage -h && exit 1;; # (-man) usage -man;; (-c) ConfigFile=$2; shiftArgs=1;; + (-crlf) ConvertCRLF=1;; (-p) P4Root=$2; shiftArgs=1;; (-d) Debug=1;; (-depot) ImportDepot=$2; shiftArgs=1;; @@ -120,6 +124,10 @@ if [[ $CaseInsensitive -ne 0 ]]; then CaseInsensitiveFlag="--case.insensitive" P4DCaseFlag="-C1" fi +CRLFFlag="" +if [[ $ConvertCRLF -ne 0 ]]; then + CRLFFlag="--convert.crlf" +fi GraphArgs="" if [[ ! -z $GraphFile ]]; then GraphArgs="--graphfile=$GraphFile" @@ -129,8 +137,8 @@ if [[ ! -z $ConfigFile ]]; then ConfigArgs="--config=$ConfigFile" fi -echo gitp4transfer --archive.root="$P4Root" $ConfigArgs $DebugFlag $DummyFlag $CaseInsensitiveFlag $MaxCommitArgs $ParallelThreadArgs $GraphArgs --import.depot="$ImportDepot" --journal="$P4Root/jnl.0" "$GitFile" -gitp4transfer --archive.root="$P4Root" $ConfigArgs $DebugFlag $DummyFlag $CaseInsensitiveFlag $MaxCommitArgs $ParallelThreadArgs $GraphArgs --import.depot="$ImportDepot" --journal="$P4Root/jnl.0" "$GitFile" +echo gitp4transfer --archive.root="$P4Root" $ConfigArgs $DebugFlag $DummyFlag $CaseInsensitiveFlag $CRLFFlag $MaxCommitArgs $ParallelThreadArgs $GraphArgs --import.depot="$ImportDepot" --journal="$P4Root/jnl.0" "$GitFile" +gitp4transfer --archive.root="$P4Root" $ConfigArgs $DebugFlag $DummyFlag $CaseInsensitiveFlag $CRLFFlag $MaxCommitArgs $ParallelThreadArgs $GraphArgs --import.depot="$ImportDepot" --journal="$P4Root/jnl.0" "$GitFile" if [[ $? -ne 0 ]]; then echo "Server is in directory:"