Skip to content

Commit

Permalink
A couple findings about wontReschedule and health status noted. Work …
Browse files Browse the repository at this point in the history
…in progress.
  • Loading branch information
philrenaud committed May 2, 2024
1 parent bfd8849 commit 0e68838
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 19 deletions.
63 changes: 54 additions & 9 deletions ui/app/components/job-row.js
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,43 @@ export default class JobRow extends Component {
);
}

get someCanariesHaveFailed() {
/**
* Similar to the below, but cares if any non-old canaries have failed, regardless of their rescheduled status.
*/
get someCanariesHaveRescheduled() {
// TODO: Weird thing where alloc.isUnhealthy right away, because alloc.DeploymentStatus.Healthy is false.
// But that doesn't seem right: health check in that state should be unknown or null, perhaps.
const relevantAllocs = this.args.job.allocations.filter(
(a) => !a.isOld && a.isCanary
);
console.log(
'relevantAllocs',
relevantAllocs,
relevantAllocs.map((a) => a.jobVersion),
relevantAllocs.map((a) => a.clientStatus),
relevantAllocs.map((a) => a.isUnhealthy)
);

return relevantAllocs.some(
(a) =>
a.clientStatus === 'failed' ||
a.clientStatus === 'lost' ||
a.isUnhealthy
);
}

get someCanariesHaveFailedAndWontReschedule() {
let availableSlotsToFill = this.args.job.expectedRunningAllocCount;

Check failure on line 91 in ui/app/components/job-row.js

View workflow job for this annotation

GitHub Actions / pre-test

'availableSlotsToFill' is assigned a value but never used
let runningOrPendingCanaries = this.args.job.allocations.filter(

Check failure on line 92 in ui/app/components/job-row.js

View workflow job for this annotation

GitHub Actions / pre-test

'runningOrPendingCanaries' is assigned a value but never used
(a) => !a.isOld && a.isCanary && !a.hasBeenRescheduled
);
const relevantAllocs = this.args.job.allocations.filter(
(a) => !a.isOld && a.isCanary && !a.hasBeenRescheduled
);
console.log(
'relevantAllocs',
relevantAllocs,
relevantAllocs.map((a) => a.jobVersion),
relevantAllocs.map((a) => a.clientStatus),
relevantAllocs.map((a) => a.isUnhealthy)
);
Expand Down Expand Up @@ -104,33 +135,47 @@ export default class JobRow extends Component {
*/
let latestDeploymentSummary = this.args.job.latestDeploymentSummary;

console.log(
'checking if requries promotion',
this.args.job.name,
latestDeploymentSummary,
this.args.job.hasActiveCanaries
);
// console.log(
// 'checking if requries promotion',
// this.args.job.name,
// latestDeploymentSummary,
// this.args.job.hasActiveCanaries
// );
// Early return false if we don't have an active deployment
if (latestDeploymentSummary.isActive) {
if (!latestDeploymentSummary.isActive) {
return false;
}

// Early return if we our deployment doesn't have any canaries
if (!this.args.job.hasActiveCanaries) {
console.log('!hasActiveCan');
return false;
}

if (latestDeploymentSummary.requiresPromotion) {
console.log('requires promotion, and...');
if (this.canariesHealthy) {
console.log('canaries are healthy.');
return 'canary-promote';
}
if (this.someCanariesHaveFailed) {
// if (this.someCanariesHaveFailedAndWontReschedule) {
if (this.someCanariesHaveRescheduled) {
// TODO: I'm uncertain about when to alert the user here. It seems like it might be important
// enough to let them know when ANY canary has to be rescheduled, but there's an argument to be
// made that we oughtn't bother them until it's un-reschedulable.
console.log('some canaries have failed.');
return 'canary-failure';
}
if (latestDeploymentSummary.allAutoPromote) {
console.log(
'This deployment is set to auto-promote; canaries are being checked now'
);
// return "This deployment is set to auto-promote; canaries are being checked now";
return false;
} else {
console.log(
'This deployment requires manual promotion and things are being checked now'
);
// return "This deployment requires manual promotion and things are being checked now";
return false;
}
Expand Down
19 changes: 10 additions & 9 deletions ui/app/models/job.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,6 @@ export default class Job extends Model {
@attr({ defaultValue: () => ({}) }) latestDeploymentSummary; // TODO: model this out

get hasActiveCanaries() {
// console.log('tell me about ur active canaries plz', this.allocBlocks, this.allocations, this.activeDeploymentID);
// TODO: Monday/Tuesday: go over AllocBlocks.{all}.canary and if there are any? make the latestDeployment lookup,
// and check to see if it requires promotion / isnt yet promoted.
if (!this.latestDeploymentSummary.isActive) {
return false;
}
Expand All @@ -62,13 +59,7 @@ export default class Job extends Model {
})
.flat()
.any((n) => !!n);
// return this.activeDeploymentID;
}
// TODO: moved to job-row
// get requiresPromotion() {
// console.log('getting requiresPromotion', this.activeDeploymentID, this.runningDeployment);
// return this.runningDeployment;
// }

@attr() childStatuses;

Expand Down Expand Up @@ -194,6 +185,16 @@ export default class Job extends Model {
if (allocationsOfShowableType[status]) {
// If status is failed or lost, we only want to show it IF it's used up its restarts/rescheds.
// Otherwise, we'd be showing an alloc that had been replaced.

// TODO: We can't know about .willNotRestart and .willNotReschedule here, as we don't have access to alloc.followUpEvaluation.
// in deploying.js' newVersionAllocBlocks, we can know to ignore a canary if it has been rescheduled by virtue of seeing its .hasBeenRescheduled,
// which checks allocation.followUpEvaluation. This is not currently possible here.
// As such, we should count our running/pending canaries first, and if our expected count is still not filled, we can look to failed canaries.
// The goal of this is that any failed allocation that gets rescheduled would first have its place in relevantAllocs eaten up by a running/pending allocation,
// leaving it on the outside of what the user sees.

// ^--- actually, scratch this. We should just get alloc.FollowupEvalID. If it's not null, we can assume it's been rescheduled.

if (alloc.willNotRestart) {
if (!alloc.willNotReschedule) {
// Dont count it
Expand Down
3 changes: 2 additions & 1 deletion ui/app/templates/components/job-row.hbs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@
<JobStatus::AllocationStatusRow
@allocBlocks={{@job.allocBlocks}}
@steady={{true}}
@compact={{true}}
{{!-- TODO: compact temporarily removed for testing --}}
{{!-- @compact={{true}} --}}
@runningAllocs={{@job.allocBlocks.running.healthy.nonCanary.length}}
@groupCountSum={{@job.expectedRunningAllocCount}}
/>
Expand Down

0 comments on commit 0e68838

Please sign in to comment.