@@ -1104,42 +1104,38 @@ def nestdir(base: str, deps: CWLObjectType) -> CWLObjectType:
11041104 sp = s2 .split ("/" )
11051105 sp .pop ()
11061106 while sp :
1107+ loc = dirname + "/" .join (sp )
11071108 nx = sp .pop ()
1108- deps = {"class" : "Directory" , "basename" : nx , "listing" : [deps ]}
1109+ deps = {
1110+ "class" : "Directory" ,
1111+ "basename" : nx ,
1112+ "listing" : [deps ],
1113+ "location" : loc ,
1114+ }
11091115 return deps
11101116
11111117
1112- def mergedirs (listing : List [CWLObjectType ]) -> List [CWLObjectType ]:
1118+ def mergedirs (
1119+ listing : MutableSequence [CWLObjectType ],
1120+ ) -> MutableSequence [CWLObjectType ]:
11131121 r = [] # type: List[CWLObjectType]
11141122 ents = {} # type: Dict[str, CWLObjectType]
1115- collided = set () # type: Set[str]
11161123 for e in listing :
11171124 basename = cast (str , e ["basename" ])
11181125 if basename not in ents :
11191126 ents [basename ] = e
1127+ elif e ["location" ] != ents [basename ]["location" ]:
1128+ raise ValidationException (
1129+ "Conflicting basename in listing or secondaryFiles, '%s' used by both '%s' and '%s'"
1130+ % (basename , e ["location" ], ents [basename ]["location" ])
1131+ )
11201132 elif e ["class" ] == "Directory" :
11211133 if e .get ("listing" ):
1134+ # name already in entries
1135+ # merge it into the existing listing
11221136 cast (
11231137 List [CWLObjectType ], ents [basename ].setdefault ("listing" , [])
11241138 ).extend (cast (List [CWLObjectType ], e ["listing" ]))
1125- if cast (str , ents [basename ]["location" ]).startswith ("_:" ):
1126- ents [basename ]["location" ] = e ["location" ]
1127- elif e ["location" ] != ents [basename ]["location" ]:
1128- # same basename, different location, collision,
1129- # rename both.
1130- collided .add (basename )
1131- e2 = ents [basename ]
1132-
1133- e ["basename" ] = urllib .parse .quote (cast (str , e ["location" ]), safe = "" )
1134- e2 ["basename" ] = urllib .parse .quote (cast (str , e2 ["location" ]), safe = "" )
1135-
1136- e ["nameroot" ], e ["nameext" ] = os .path .splitext (cast (str , e ["basename" ]))
1137- e2 ["nameroot" ], e2 ["nameext" ] = os .path .splitext (cast (str , e2 ["basename" ]))
1138-
1139- ents [cast (str , e ["basename" ])] = e
1140- ents [cast (str , e2 ["basename" ])] = e2
1141- for c in collided :
1142- del ents [c ]
11431139 for e in ents .values ():
11441140 if e ["class" ] == "Directory" and "listing" in e :
11451141 e ["listing" ] = cast (
@@ -1162,6 +1158,30 @@ def scandeps(
11621158 urljoin : Callable [[str , str ], str ] = urllib .parse .urljoin ,
11631159 nestdirs : bool = True ,
11641160) -> MutableSequence [CWLObjectType ]:
1161+
1162+ """Given a CWL document or input object, search for dependencies
1163+ (references to external files) of 'doc' and return them as a list
1164+ of File or Directory objects.
1165+
1166+ The 'base' is the base URL for relative references.
1167+
1168+ Looks for objects with 'class: File' or 'class: Directory' and
1169+ adds them to the list of dependencies.
1170+
1171+ Anything in 'urlfields' is also added as a File dependency.
1172+
1173+ Anything in 'reffields' (such as workflow step 'run') will be
1174+ added as a dependency and also loaded (using the 'loadref'
1175+ function) and recursively scanned for dependencies. Those
1176+ dependencies will be added as secondary files to the primary file.
1177+
1178+ If "nestdirs" is true, create intermediate directory objects when
1179+ a file is located in a subdirectory under the starting directory.
1180+ This is so that if the dependencies are materialized, they will
1181+ produce the same relative file system locations.
1182+
1183+ """
1184+
11651185 r : MutableSequence [CWLObjectType ] = []
11661186 if isinstance (doc , MutableMapping ):
11671187 if "id" in doc :
@@ -1268,7 +1288,7 @@ def scandeps(
12681288 )
12691289 if sf :
12701290 deps2 ["secondaryFiles" ] = cast (
1271- MutableSequence [CWLOutputAtomType ], sf
1291+ MutableSequence [CWLOutputAtomType ], mergedirs ( sf )
12721292 )
12731293 if nestdirs :
12741294 deps2 = nestdir (base , deps2 )
@@ -1313,7 +1333,6 @@ def scandeps(
13131333
13141334 if r :
13151335 normalizeFilesDirs (r )
1316- r = mergedirs (cast (List [CWLObjectType ], r ))
13171336
13181337 return r
13191338
0 commit comments