Skip to content

Pass table migration index to WorkflowLinter #3744

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/databricks/labs/ucx/assessment/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def assess_dashboards(self, ctx: RuntimeContext):
"""
ctx.query_linter.refresh_report()

@job_task(depends_on=[assess_jobs])
@job_task(depends_on=[crawl_tables, assess_jobs])
def assess_workflows(self, ctx: RuntimeContext):
"""Scans all jobs for migration issues in notebooks jobs.

Expand Down
19 changes: 14 additions & 5 deletions src/databricks/labs/ucx/contexts/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
from databricks.labs.blueprint.tui import Prompts
from databricks.labs.blueprint.wheels import ProductInfo, WheelsV2
from databricks.labs.lsql.backends import SqlBackend
from databricks.sdk import AccountClient, WorkspaceClient, core
from databricks.sdk.errors import DatabricksError
from databricks.sdk.service import sql

from databricks.labs.ucx.assessment.dashboards import DashboardOwnership
from databricks.labs.ucx.assessment.jobs import JobsCrawler
Expand All @@ -24,9 +27,6 @@
from databricks.labs.ucx.source_code.directfs_access import DirectFsAccessCrawler, DirectFsAccessOwnership
from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver
from databricks.labs.ucx.source_code.used_table import UsedTablesCrawler
from databricks.sdk import AccountClient, WorkspaceClient, core
from databricks.sdk.service import sql

from databricks.labs.ucx.account.workspaces import WorkspaceInfo
from databricks.labs.ucx.assessment.azure import AzureServicePrincipalCrawler
from databricks.labs.ucx.assessment.dashboards import LakeviewDashboardCrawler, RedashDashboardCrawler
Expand All @@ -48,7 +48,6 @@
PrincipalACL,
)
from databricks.labs.ucx.hive_metastore.mapping import TableMapping
from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex
from databricks.labs.ucx.hive_metastore.ownership import (
TableMigrationOwnership,
TableOwnership,
Expand All @@ -59,6 +58,7 @@
TableMigrationStatusRefresher,
TablesMigrator,
)
from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex
from databricks.labs.ucx.hive_metastore.table_move import TableMove
from databricks.labs.ucx.hive_metastore.udfs import UdfsCrawler, UdfOwnership
from databricks.labs.ucx.hive_metastore.verification import VerifyHasCatalog, VerifyHasMetastore
Expand Down Expand Up @@ -585,14 +585,23 @@ def dependency_resolver(self) -> DependencyResolver:
self.pip_resolver, self.notebook_resolver, self.file_resolver, self.file_resolver, self.path_lookup
)

@cached_property
def table_migration_index(self) -> TableMigrationIndex:
try:
index = self.tables_migrator.index()
except DatabricksError as e:
logger.warning("Table migration cannot be loaded. Initializing empty index.", exc_info=e)
index = TableMigrationIndex([])
return index

@cached_property
def workflow_linter(self) -> WorkflowLinter:
return WorkflowLinter(
self.workspace_client,
self.jobs_crawler,
self.dependency_resolver,
self.path_lookup,
TableMigrationIndex([]), # TODO: bring back self.tables_migrator.index()
self.table_migration_index,
self.directfs_access_crawler_for_paths,
self.used_tables_crawler_for_paths,
)
Expand Down
13 changes: 1 addition & 12 deletions src/databricks/labs/ucx/contexts/workspace_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from databricks.labs.lsql.backends import SqlBackend, StatementExecutionBackend
from databricks.sdk import WorkspaceClient
from databricks.sdk.errors import NotFound

from databricks.labs.ucx.assessment.aws import AWSResources
from databricks.labs.ucx.framework.utils import run_command
Expand All @@ -19,7 +18,6 @@
from databricks.labs.ucx.azure.resources import AzureAPIClient, AzureResources
from databricks.labs.ucx.contexts.application import CliContext
from databricks.labs.ucx.hive_metastore.federation import HiveMetastoreFederation, HiveMetastoreFederationEnabler
from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex
from databricks.labs.ucx.progress.install import ProgressTrackingInstallation
from databricks.labs.ucx.source_code.linters.context import LinterContext
from databricks.labs.ucx.source_code.linters.folders import LocalCodeLinter
Expand Down Expand Up @@ -212,15 +210,6 @@ class LocalCheckoutContext(WorkspaceContext):
"""Local context extends Workspace context to provide extra properties
for running local operations."""

@cached_property
def _migration_index(self) -> TableMigrationIndex:
try:
index = self.tables_migrator.index()
except NotFound:
logger.warning("Metastore does not seem to exist yet. Skipping loading of migration status.")
index = TableMigrationIndex([])
return index

@cached_property
def local_code_linter(self) -> LocalCodeLinter:
return LocalCodeLinter(
Expand All @@ -229,5 +218,5 @@ def local_code_linter(self) -> LocalCodeLinter:
self.folder_loader,
self.path_lookup,
self.dependency_resolver,
lambda: LinterContext(self._migration_index),
lambda: LinterContext(self.table_migration_index),
)
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,8 @@ def _iter_catalogs(self) -> Iterable[CatalogInfo]:
if catalog.securable_kind in self._skip_catalog_securable_kinds:
continue
yield catalog
except DatabricksError as e:
logger.error("Cannot list catalogs", exc_info=e)
except DatabricksError:
logger.warning("Cannot list catalogs; skipping iterating catalogs")

def _iter_schemas(self) -> Iterable[SchemaInfo]:
for catalog in self._iter_catalogs():
Expand Down
Loading