DataBiosphere · adamnovak · Aug 22, 2024 · Sep 12, 2024 · Sep 12, 2024 · Sep 26, 2024
diff --git a/src/toil/batchSystems/abstractGridEngineBatchSystem.py b/src/toil/batchSystems/abstractGridEngineBatchSystem.py
@@ -25,6 +25,7 @@
 from toil.batchSystems.cleanup_support import BatchSystemCleanupSupport
 from toil.bus import ExternalBatchIdMessage, get_job_kind
 from toil.job import AcceleratorRequirement
+from toil.statsAndLogging import TRACE
 from toil.lib.misc import CalledProcessErrorStderr
 from toil.lib.retry import old_retry, DEFAULT_DELAYS, retry
 
@@ -267,7 +268,7 @@ def _runStep(self):
             if self.checkOnJobs():
                 activity = True
             if not activity:
-                logger.debug('No activity, sleeping for %is', self.boss.sleepSeconds())
+                logger.log(TRACE, 'No activity, sleeping for %is', self.boss.sleepSeconds())
             return True
 
         def run(self):

diff --git a/src/toil/common.py b/src/toil/common.py
@@ -1158,7 +1158,15 @@ def import_file(self,
                     src_uri: str,
                     shared_file_name: None = None,
                     symlink: bool = True,
-                    check_existence: bool = True) -> FileID:
+                    check_existence: Literal[True] = True) -> FileID:
+        ...
+
+    @overload
+    def import_file(self,
+                    src_uri: str,
+                    shared_file_name: None = None,
+                    symlink: bool = True,
+                    check_existence: bool = True) -> Optional[FileID]:
         ...
 
     def import_file(self,

diff --git a/src/toil/job.py b/src/toil/job.py
@@ -967,9 +967,9 @@ def allSuccessors(self) -> Iterator[str]:
 
     def successors_by_phase(self) -> Iterator[Tuple[int, str]]:
         """
-        Get an iterator over all child/follow-on/chained inherited successor job IDs, along with their phase numbere on the stack.
+        Get an iterator over all child/follow-on/chained inherited successor job IDs, along with their phase number on the stack.
 
-        Phases ececute higher numbers to lower numbers.
+        Phases execute higher numbers to lower numbers.
         """
 
         for i, phase in enumerate(self.successor_phases):

diff --git a/src/toil/jobStores/abstractJobStore.py b/src/toil/jobStores/abstractJobStore.py
@@ -506,8 +506,10 @@ def export_file(self, file_id: FileID, dst_uri: str) -> None:
         :param str file_id: The id of the file in the job store that should be exported.
 
         :param str dst_uri: URL that points to a file or object in the storage mechanism of a
-                supported URL scheme e.g. a blob in an AWS s3 bucket.
+                supported URL scheme e.g. a blob in an AWS s3 bucket. May also be a local path.
         """
+        from toil.common import Toil
+        dst_uri = Toil.normalize_uri(dst_uri)
         parseResult = urlparse(dst_uri)
         otherCls = self._findJobStoreForUrl(parseResult, export=True)
         self._export_file(otherCls, file_id, parseResult)

diff --git a/src/toil/jobStores/fileJobStore.py b/src/toil/jobStores/fileJobStore.py
@@ -131,6 +131,10 @@ def resume(self):
         if not os.path.isdir(self.jobStoreDir):
             raise NoSuchJobStoreException(self.jobStoreDir, "file")
         super().resume()
+        # TODO: Unify with initialize() configuration
+        self.linkImports = self.config.symlinkImports
+        self.moveExports = self.config.moveOutputs
+        self.symlink_job_store_reads = self.config.symlink_job_store_reads
 
     def destroy(self):
         if os.path.exists(self.jobStoreDir):
@@ -298,26 +302,19 @@ def jobs(self):
     # Functions that deal with temporary files associated with jobs
     ##########################################
 
-    @contextmanager
-    def optional_hard_copy(self, hardlink):
-        if hardlink:
-            saved = self.linkImports
-            self.linkImports = False
-        yield
-        if hardlink:
-            self.linkImports = saved
-
-    def _copy_or_link(self, src_path, dst_path, symlink=False):
+    def _copy_or_link(self, src_path, dst_path, hardlink=False, symlink=False):
         # linking is not done be default because of issue #1755
-        srcPath = self._extract_path_from_url(src_path)
-        if self.linkImports and symlink:
-            os.symlink(os.path.realpath(srcPath), dst_path)
+        # TODO: is hardlinking ever actually done?
+        src_path = self._extract_path_from_url(src_path)
+        if self.linkImports and not hardlink and symlink:
+            os.symlink(os.path.realpath(src_path), dst_path)
         else:
-            atomic_copy(srcPath, dst_path)
+            atomic_copy(src_path, dst_path)
 
     def _import_file(self, otherCls, uri, shared_file_name=None, hardlink=False, symlink=True):
         # symlink argument says whether the caller can take symlinks or not.
         # ex: if false, it means the workflow cannot work with symlinks and we need to hardlink or copy.
+        # TODO: Do we ever actually hardlink?
         # default is true since symlinking everything is ideal
         uri_path = unquote(uri.path)
         if issubclass(otherCls, FileJobStore):
@@ -327,16 +324,14 @@ def _import_file(self, otherCls, uri, shared_file_name=None, hardlink=False, sym
             if shared_file_name is None:
                 executable = os.stat(uri_path).st_mode & stat.S_IXUSR != 0
                 absPath = self._get_unique_file_path(uri_path)  # use this to get a valid path to write to in job store
-                with self.optional_hard_copy(hardlink):
-                    self._copy_or_link(uri, absPath, symlink=symlink)
+                self._copy_or_link(uri, absPath, hardlink=hardlink, symlink=symlink)
                 # TODO: os.stat(absPath).st_size consistently gives values lower than
                 # getDirSizeRecursively()
                 return FileID(self._get_file_id_from_path(absPath), os.stat(absPath).st_size, executable)
             else:
                 self._requireValidSharedFileName(shared_file_name)
                 path = self._get_shared_file_path(shared_file_name)
-                with self.optional_hard_copy(hardlink):
-                    self._copy_or_link(uri, path, symlink=symlink)
+                self._copy_or_link(uri, path, hardlink=hardlink, symlink=symlink)
                 return None
         else:
             return super()._import_file(otherCls, uri, shared_file_name=shared_file_name)

diff --git a/src/toil/utils/toilStatus.py b/src/toil/utils/toilStatus.py
@@ -49,14 +49,14 @@ def print_dot_chart(self) -> None:
 
         # Make job IDs to node names map
         jobsToNodeNames: Dict[str, str] = dict(
-            map(lambda job: (str(job.jobStoreID), job.jobName), self.jobsToReport)
+            map(lambda job: (str(job.jobStoreID), str(job.jobStoreID).replace("_", "___").replace("/", "_").replace("-", "__")), self.jobsToReport)
         )
 
         # Print the nodes
         for job in set(self.jobsToReport):
             print(
-                '{} [label="{} {}"];'.format(
-                    jobsToNodeNames[str(job.jobStoreID)], job.jobName, job.jobStoreID
+                '{} [label="{} {}" color="{}"];'.format(
+                    jobsToNodeNames[str(job.jobStoreID)], job.jobName, job.displayName, "black" if job.has_body() else "green"
                 )
             )