Skip to content

Commit b97a8d5

Browse files
Add MigrationSequencer for jobs (#3008)
## Changes Add a `MigrationSequencer` class to sequence the migration steps for jobs. The PR includes the following resources in its sequence: - Jobs - Job tasks - Job tasks dependencies - Job clusters - Cluster Other elements part of the sequence are added later ### Linked issues Progresses #1415 Supersedes #2980 ### Tests - [x] added unit tests - [x] added integration tests --------- Co-authored-by: Eric Vergnaud <[email protected]> Co-authored-by: Cor Zuurmond <[email protected]>
1 parent 6eab234 commit b97a8d5

File tree

8 files changed

+842
-39
lines changed

8 files changed

+842
-39
lines changed

src/databricks/labs/ucx/assessment/clusters.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,18 @@ class ClusterInfo:
4949

5050
__id_attributes__: ClassVar[tuple[str, ...]] = ("cluster_id",)
5151

52+
@classmethod
53+
def from_cluster_details(cls, details: ClusterDetails):
54+
return ClusterInfo(
55+
cluster_id=details.cluster_id if details.cluster_id else "",
56+
cluster_name=details.cluster_name,
57+
policy_id=details.policy_id,
58+
spark_version=details.spark_version,
59+
creator=details.creator_user_name or None,
60+
success=1,
61+
failures="[]",
62+
)
63+
5264

5365
class CheckClusterMixin(CheckInitScriptMixin):
5466
_ws: WorkspaceClient
@@ -156,7 +168,7 @@ def _crawl(self) -> Iterable[ClusterInfo]:
156168
all_clusters = list(self._ws.clusters.list())
157169
return list(self._assess_clusters(all_clusters))
158170

159-
def _assess_clusters(self, all_clusters):
171+
def _assess_clusters(self, all_clusters: Iterable[ClusterDetails]):
160172
for cluster in all_clusters:
161173
if cluster.cluster_source == ClusterSource.JOB:
162174
continue
@@ -166,15 +178,7 @@ def _assess_clusters(self, all_clusters):
166178
f"Cluster {cluster.cluster_id} have Unknown creator, it means that the original creator "
167179
f"has been deleted and should be re-created"
168180
)
169-
cluster_info = ClusterInfo(
170-
cluster_id=cluster.cluster_id if cluster.cluster_id else "",
171-
cluster_name=cluster.cluster_name,
172-
policy_id=cluster.policy_id,
173-
spark_version=cluster.spark_version,
174-
creator=creator,
175-
success=1,
176-
failures="[]",
177-
)
181+
cluster_info = ClusterInfo.from_cluster_details(cluster)
178182
failures = self._check_cluster_failures(cluster, "cluster")
179183
if len(failures) > 0:
180184
cluster_info.success = 0

src/databricks/labs/ucx/assessment/jobs.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
RunType,
2222
SparkJarTask,
2323
SqlTask,
24+
Job,
2425
)
2526

2627
from databricks.labs.ucx.assessment.clusters import CheckClusterMixin
@@ -43,6 +44,17 @@ class JobInfo:
4344

4445
__id_attributes__: ClassVar[tuple[str, ...]] = ("job_id",)
4546

47+
@classmethod
48+
def from_job(cls, job: Job):
49+
job_name = job.settings.name if job.settings and job.settings.name else "Unknown"
50+
return JobInfo(
51+
job_id=str(job.job_id),
52+
success=1,
53+
failures="[]",
54+
job_name=job_name,
55+
creator=job.creator_user_name or None,
56+
)
57+
4658

4759
class JobsMixin:
4860
@classmethod
@@ -127,17 +139,7 @@ def _prepare(all_jobs) -> tuple[dict[int, set[str]], dict[int, JobInfo]]:
127139
job_settings = job.settings
128140
if not job_settings:
129141
continue
130-
job_name = job_settings.name
131-
if not job_name:
132-
job_name = "Unknown"
133-
134-
job_details[job.job_id] = JobInfo(
135-
job_id=str(job.job_id),
136-
job_name=job_name,
137-
creator=creator_user_name,
138-
success=1,
139-
failures="[]",
140-
)
142+
job_details[job.job_id] = JobInfo.from_job(job)
141143
return job_assessment, job_details
142144

143145
def _try_fetch(self) -> Iterable[JobInfo]:

0 commit comments

Comments
 (0)