ruff check preview (#25653)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-09-16 13:58:12 +09:00
parent a0c7713494
commit bdd85b36a4
42 changed files with 224 additions and 342 deletions
--- a/api/.ruff.toml
+++ b/api/.ruff.toml
@@ -5,7 +5,7 @@ line-length = 120
 quote-style = "double"

 [lint]
-preview = false
+preview = true
 select = [
    "B",       # flake8-bugbear rules
    "C4",      # flake8-comprehensions
@@ -65,6 +65,7 @@ ignore = [
    "B006",    # mutable-argument-default
    "B007",    # unused-loop-control-variable
    "B026",    # star-arg-unpacking-after-keyword-arg
+    "B901",    # allow return in yield
    "B903",    # class-as-data-structure
    "B904",    # raise-without-from-inside-except
    "B905",    # zip-without-explicit-strict
--- a/api/commands.py
+++ b/api/commands.py
@@ -1,6 +1,7 @@
 import base64
 import json
 import logging
+import operator
 import secrets
 from typing import Any

@@ -953,7 +954,7 @@ def clear_orphaned_file_records(force: bool):
            click.echo(click.style("- Deleting orphaned message_files records", fg="white"))
            query = "DELETE FROM message_files WHERE id IN :ids"
            with db.engine.begin() as conn:
-                conn.execute(sa.text(query), {"ids": tuple([record["id"] for record in orphaned_message_files])})
+                conn.execute(sa.text(query), {"ids": tuple(record["id"] for record in orphaned_message_files)})
            click.echo(
                click.style(f"Removed {len(orphaned_message_files)} orphaned message_files records.", fg="green")
            )
@@ -1307,7 +1308,7 @@ def cleanup_orphaned_draft_variables(

    if dry_run:
        logger.info("DRY RUN: Would delete the following:")
-        for app_id, count in sorted(stats["orphaned_by_app"].items(), key=lambda x: x[1], reverse=True)[
+        for app_id, count in sorted(stats["orphaned_by_app"].items(), key=operator.itemgetter(1), reverse=True)[
            :10
        ]:  # Show top 10
            logger.info("  App %s: %s variables", app_id, count)
--- a/api/core/ops/aliyun_trace/aliyun_trace.py
+++ b/api/core/ops/aliyun_trace/aliyun_trace.py
@@ -355,8 +355,8 @@ class AliyunDataTrace(BaseTraceInstance):
                GEN_AI_FRAMEWORK: "dify",
                TOOL_NAME: node_execution.title,
                TOOL_DESCRIPTION: json.dumps(tool_des, ensure_ascii=False),
-                TOOL_PARAMETERS: json.dumps(node_execution.inputs if node_execution.inputs else {}, ensure_ascii=False),
-                INPUT_VALUE: json.dumps(node_execution.inputs if node_execution.inputs else {}, ensure_ascii=False),
+                TOOL_PARAMETERS: json.dumps(node_execution.inputs or {}, ensure_ascii=False),
+                INPUT_VALUE: json.dumps(node_execution.inputs or {}, ensure_ascii=False),
                OUTPUT_VALUE: json.dumps(node_execution.outputs, ensure_ascii=False),
            },
            status=self.get_workflow_node_status(node_execution),
--- a/api/core/ops/langfuse_trace/langfuse_trace.py
+++ b/api/core/ops/langfuse_trace/langfuse_trace.py
@@ -144,13 +144,13 @@ class LangFuseDataTrace(BaseTraceInstance):
            if node_type == NodeType.LLM:
                inputs = node_execution.process_data.get("prompts", {}) if node_execution.process_data else {}
            else:
-                inputs = node_execution.inputs if node_execution.inputs else {}
-            outputs = node_execution.outputs if node_execution.outputs else {}
+                inputs = node_execution.inputs or {}
+            outputs = node_execution.outputs or {}
            created_at = node_execution.created_at or datetime.now()
            elapsed_time = node_execution.elapsed_time
            finished_at = created_at + timedelta(seconds=elapsed_time)

-            execution_metadata = node_execution.metadata if node_execution.metadata else {}
+            execution_metadata = node_execution.metadata or {}
            metadata = {str(k): v for k, v in execution_metadata.items()}
            metadata.update(
                {
@@ -163,7 +163,7 @@ class LangFuseDataTrace(BaseTraceInstance):
                    "status": status,
                }
            )
-            process_data = node_execution.process_data if node_execution.process_data else {}
+            process_data = node_execution.process_data or {}
            model_provider = process_data.get("model_provider", None)
            model_name = process_data.get("model_name", None)
            if model_provider is not None and model_name is not None:
--- a/api/core/ops/langsmith_trace/langsmith_trace.py
+++ b/api/core/ops/langsmith_trace/langsmith_trace.py
@@ -167,13 +167,13 @@ class LangSmithDataTrace(BaseTraceInstance):
            if node_type == NodeType.LLM:
                inputs = node_execution.process_data.get("prompts", {}) if node_execution.process_data else {}
            else:
-                inputs = node_execution.inputs if node_execution.inputs else {}
-            outputs = node_execution.outputs if node_execution.outputs else {}
+                inputs = node_execution.inputs or {}
+            outputs = node_execution.outputs or {}
            created_at = node_execution.created_at or datetime.now()
            elapsed_time = node_execution.elapsed_time
            finished_at = created_at + timedelta(seconds=elapsed_time)

-            execution_metadata = node_execution.metadata if node_execution.metadata else {}
+            execution_metadata = node_execution.metadata or {}
            node_total_tokens = execution_metadata.get(WorkflowNodeExecutionMetadataKey.TOTAL_TOKENS) or 0
            metadata = {str(key): value for key, value in execution_metadata.items()}
            metadata.update(
@@ -188,7 +188,7 @@ class LangSmithDataTrace(BaseTraceInstance):
                }
            )

-            process_data = node_execution.process_data if node_execution.process_data else {}
+            process_data = node_execution.process_data or {}

            if process_data and process_data.get("model_mode") == "chat":
                run_type = LangSmithRunType.llm
--- a/api/core/ops/opik_trace/opik_trace.py
+++ b/api/core/ops/opik_trace/opik_trace.py
@@ -182,13 +182,13 @@ class OpikDataTrace(BaseTraceInstance):
            if node_type == NodeType.LLM:
                inputs = node_execution.process_data.get("prompts", {}) if node_execution.process_data else {}
            else:
-                inputs = node_execution.inputs if node_execution.inputs else {}
-            outputs = node_execution.outputs if node_execution.outputs else {}
+                inputs = node_execution.inputs or {}
+            outputs = node_execution.outputs or {}
            created_at = node_execution.created_at or datetime.now()
            elapsed_time = node_execution.elapsed_time
            finished_at = created_at + timedelta(seconds=elapsed_time)

-            execution_metadata = node_execution.metadata if node_execution.metadata else {}
+            execution_metadata = node_execution.metadata or {}
            metadata = {str(k): v for k, v in execution_metadata.items()}
            metadata.update(
                {
@@ -202,7 +202,7 @@ class OpikDataTrace(BaseTraceInstance):
                }
            )

-            process_data = node_execution.process_data if node_execution.process_data else {}
+            process_data = node_execution.process_data or {}

            provider = None
            model = None
--- a/api/core/ops/ops_trace_manager.py
+++ b/api/core/ops/ops_trace_manager.py
@@ -1,3 +1,4 @@
+import collections
 import json
 import logging
 import os
@@ -40,7 +41,7 @@ from tasks.ops_trace_task import process_trace_tasks
 logger = logging.getLogger(__name__)


-class OpsTraceProviderConfigMap(dict[str, dict[str, Any]]):
+class OpsTraceProviderConfigMap(collections.UserDict[str, dict[str, Any]]):
    def __getitem__(self, provider: str) -> dict[str, Any]:
        match provider:
            case TracingProviderEnum.LANGFUSE:
@@ -121,7 +122,7 @@ class OpsTraceProviderConfigMap(dict[str, dict[str, Any]]):
                raise KeyError(f"Unsupported tracing provider: {provider}")


-provider_config_map: dict[str, dict[str, Any]] = OpsTraceProviderConfigMap()
+provider_config_map = OpsTraceProviderConfigMap()


 class OpsTraceManager:
--- a/api/core/ops/weave_trace/weave_trace.py
+++ b/api/core/ops/weave_trace/weave_trace.py
@@ -169,13 +169,13 @@ class WeaveDataTrace(BaseTraceInstance):
            if node_type == NodeType.LLM:
                inputs = node_execution.process_data.get("prompts", {}) if node_execution.process_data else {}
            else:
-                inputs = node_execution.inputs if node_execution.inputs else {}
-            outputs = node_execution.outputs if node_execution.outputs else {}
+                inputs = node_execution.inputs or {}
+            outputs = node_execution.outputs or {}
            created_at = node_execution.created_at or datetime.now()
            elapsed_time = node_execution.elapsed_time
            finished_at = created_at + timedelta(seconds=elapsed_time)

-            execution_metadata = node_execution.metadata if node_execution.metadata else {}
+            execution_metadata = node_execution.metadata or {}
            node_total_tokens = execution_metadata.get(WorkflowNodeExecutionMetadataKey.TOTAL_TOKENS) or 0
            attributes = {str(k): v for k, v in execution_metadata.items()}
            attributes.update(
@@ -190,7 +190,7 @@ class WeaveDataTrace(BaseTraceInstance):
                }
            )

-            process_data = node_execution.process_data if node_execution.process_data else {}
+            process_data = node_execution.process_data or {}
            if process_data and process_data.get("model_mode") == "chat":
                attributes.update(
                    {
--- a/api/core/rag/datasource/vdb/clickzetta/clickzetta_vector.py
+++ b/api/core/rag/datasource/vdb/clickzetta/clickzetta_vector.py
@@ -641,7 +641,7 @@ class ClickzettaVector(BaseVector):

        for doc, embedding in zip(batch_docs, batch_embeddings):
            # Optimized: minimal checks for common case, fallback for edge cases
-            metadata = doc.metadata if doc.metadata else {}
+            metadata = doc.metadata or {}

            if not isinstance(metadata, dict):
                metadata = {}
--- a/api/core/rag/datasource/vdb/matrixone/matrixone_vector.py
+++ b/api/core/rag/datasource/vdb/matrixone/matrixone_vector.py
@@ -103,7 +103,7 @@ class MatrixoneVector(BaseVector):
            self.client = self._get_client(len(embeddings[0]), True)
        assert self.client is not None
        ids = []
-        for _, doc in enumerate(documents):
+        for doc in documents:
            if doc.metadata is not None:
                doc_id = doc.metadata.get("doc_id", str(uuid.uuid4()))
                ids.append(doc_id)
--- a/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py
+++ b/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py
@@ -104,7 +104,7 @@ class OpenSearchVector(BaseVector):
                },
            }
            # See https://github.com/langchain-ai/langchainjs/issues/4346#issuecomment-1935123377
-            if self._client_config.aws_service not in ["aoss"]:
+            if self._client_config.aws_service != "aoss":
                action["_id"] = uuid4().hex
            actions.append(action)

--- a/api/core/repositories/sqlalchemy_workflow_execution_repository.py
+++ b/api/core/repositories/sqlalchemy_workflow_execution_repository.py
@@ -159,7 +159,7 @@ class SQLAlchemyWorkflowExecutionRepository(WorkflowExecutionRepository):
            else None
        )
        db_model.status = domain_model.status
-        db_model.error = domain_model.error_message if domain_model.error_message else None
+        db_model.error = domain_model.error_message or None
        db_model.total_tokens = domain_model.total_tokens
        db_model.total_steps = domain_model.total_steps
        db_model.exceptions_count = domain_model.exceptions_count
--- a/api/core/workflow/nodes/agent/agent_node.py
+++ b/api/core/workflow/nodes/agent/agent_node.py
@@ -320,7 +320,7 @@ class AgentNode(BaseNode):
                        memory = self._fetch_memory(model_instance)
                        if memory:
                            prompt_messages = memory.get_history_prompt_messages(
-                                message_limit=node_data.memory.window.size if node_data.memory.window.size else None
+                                message_limit=node_data.memory.window.size or None
                            )
                            history_prompt_messages = [
                                prompt_message.model_dump(mode="json") for prompt_message in prompt_messages
--- a/api/extensions/ext_celery.py
+++ b/api/extensions/ext_celery.py
@@ -141,9 +141,7 @@ def init_app(app: DifyApp) -> Celery:
        imports.append("schedule.queue_monitor_task")
        beat_schedule["datasets-queue-monitor"] = {
            "task": "schedule.queue_monitor_task.queue_monitor_task",
-            "schedule": timedelta(
-                minutes=dify_config.QUEUE_MONITOR_INTERVAL if dify_config.QUEUE_MONITOR_INTERVAL else 30
-            ),
+            "schedule": timedelta(minutes=dify_config.QUEUE_MONITOR_INTERVAL or 30),
        }
    if dify_config.ENABLE_CHECK_UPGRADABLE_PLUGIN_TASK and dify_config.MARKETPLACE_ENABLED:
        imports.append("schedule.check_upgradable_plugin_task")
--- a/api/extensions/storage/clickzetta_volume/file_lifecycle.py
+++ b/api/extensions/storage/clickzetta_volume/file_lifecycle.py
@@ -7,6 +7,7 @@ Supports complete lifecycle management for knowledge base files.

 import json
 import logging
+import operator
 from dataclasses import asdict, dataclass
 from datetime import datetime
 from enum import StrEnum, auto
@@ -356,7 +357,7 @@ class FileLifecycleManager:
                # Cleanup old versions for each file
                for base_filename, versions in file_versions.items():
                    # Sort by version number
-                    versions.sort(key=lambda x: x[0], reverse=True)
+                    versions.sort(key=operator.itemgetter(0), reverse=True)

                    # Keep the newest max_versions versions, delete the rest
                    if len(versions) > max_versions:
--- a/api/tasks/process_tenant_plugin_autoupgrade_check_task.py
+++ b/api/tasks/process_tenant_plugin_autoupgrade_check_task.py
@@ -1,3 +1,4 @@
+import operator
 import traceback
 import typing

@@ -118,7 +119,7 @@ def process_tenant_plugin_autoupgrade_check_task(
                    current_version = version
                    latest_version = manifest.latest_version

-                    def fix_only_checker(latest_version, current_version):
+                    def fix_only_checker(latest_version: str, current_version: str):
                        latest_version_tuple = tuple(int(val) for val in latest_version.split("."))
                        current_version_tuple = tuple(int(val) for val in current_version.split("."))

@@ -130,8 +131,7 @@ def process_tenant_plugin_autoupgrade_check_task(
                        return False

                    version_checker = {
-                        TenantPluginAutoUpgradeStrategy.StrategySetting.LATEST: lambda latest_version,
-                        current_version: latest_version != current_version,
+                        TenantPluginAutoUpgradeStrategy.StrategySetting.LATEST: operator.ne,
                        TenantPluginAutoUpgradeStrategy.StrategySetting.FIX_ONLY: fix_only_checker,
                    }

--- a/api/tests/integration_tests/storage/test_clickzetta_volume.py
+++ b/api/tests/integration_tests/storage/test_clickzetta_volume.py
@@ -3,6 +3,7 @@
 import os
 import tempfile
 import unittest
+from pathlib import Path

 import pytest

@@ -60,8 +61,7 @@ class TestClickZettaVolumeStorage(unittest.TestCase):
        # Test download
        with tempfile.NamedTemporaryFile() as temp_file:
            storage.download(test_filename, temp_file.name)
-            with open(temp_file.name, "rb") as f:
-                downloaded_content = f.read()
+            downloaded_content = Path(temp_file.name).read_bytes()
            assert downloaded_content == test_content

        # Test scan
--- a/api/tests/test_containers_integration_tests/tasks/test_batch_create_segment_to_index_task.py
+++ b/api/tests/test_containers_integration_tests/tasks/test_batch_create_segment_to_index_task.py
@@ -12,6 +12,7 @@ and realistic testing scenarios with actual PostgreSQL and Redis instances.

 import uuid
 from datetime import datetime
+from pathlib import Path
 from unittest.mock import MagicMock, patch

 import pytest
@@ -276,8 +277,7 @@ class TestBatchCreateSegmentToIndexTask:
        mock_storage = mock_external_service_dependencies["storage"]

        def mock_download(key, file_path):
-            with open(file_path, "w", encoding="utf-8") as f:
-                f.write(csv_content)
+            Path(file_path).write_text(csv_content, encoding="utf-8")

        mock_storage.download.side_effect = mock_download

@@ -505,7 +505,7 @@ class TestBatchCreateSegmentToIndexTask:
        db.session.commit()

        # Test each unavailable document
-        for i, document in enumerate(test_cases):
+        for document in test_cases:
            job_id = str(uuid.uuid4())
            batch_create_segment_to_index_task(
                job_id=job_id,
@@ -601,8 +601,7 @@ class TestBatchCreateSegmentToIndexTask:
        mock_storage = mock_external_service_dependencies["storage"]

        def mock_download(key, file_path):
-            with open(file_path, "w", encoding="utf-8") as f:
-                f.write(empty_csv_content)
+            Path(file_path).write_text(empty_csv_content, encoding="utf-8")

        mock_storage.download.side_effect = mock_download

@@ -684,8 +683,7 @@ class TestBatchCreateSegmentToIndexTask:
        mock_storage = mock_external_service_dependencies["storage"]

        def mock_download(key, file_path):
-            with open(file_path, "w", encoding="utf-8") as f:
-                f.write(csv_content)
+            Path(file_path).write_text(csv_content, encoding="utf-8")

        mock_storage.download.side_effect = mock_download

--- a/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py
+++ b/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py
@@ -362,7 +362,7 @@ class TestCleanDatasetTask:

        # Create segments for each document
        segments = []
-        for i, document in enumerate(documents):
+        for document in documents:
            segment = self._create_test_segment(db_session_with_containers, account, tenant, dataset, document)
            segments.append(segment)

--- a/api/tests/unit_tests/core/tools/utils/test_web_reader_tool.py
+++ b/api/tests/unit_tests/core/tools/utils/test_web_reader_tool.py
@@ -15,7 +15,7 @@ class FakeResponse:
        self.status_code = status_code
        self.headers = headers or {}
        self.content = content
-        self.text = text if text else content.decode("utf-8", errors="ignore")
+        self.text = text or content.decode("utf-8", errors="ignore")


 # ---------------------------
--- a/api/tests/unit_tests/services/test_metadata_bug_complete.py
+++ b/api/tests/unit_tests/services/test_metadata_bug_complete.py
@@ -1,3 +1,4 @@
+from pathlib import Path
 from unittest.mock import Mock, create_autospec, patch

 import pytest
@@ -146,19 +147,17 @@ class TestMetadataBugCompleteValidation:
        # Console API create
        console_create_file = "api/controllers/console/datasets/metadata.py"
        if os.path.exists(console_create_file):
-            with open(console_create_file) as f:
-                content = f.read()
-                # Should contain nullable=False, not nullable=True
-                assert "nullable=True" not in content.split("class DatasetMetadataCreateApi")[1].split("class")[0]
+            content = Path(console_create_file).read_text()
+            # Should contain nullable=False, not nullable=True
+            assert "nullable=True" not in content.split("class DatasetMetadataCreateApi")[1].split("class")[0]

        # Service API create
        service_create_file = "api/controllers/service_api/dataset/metadata.py"
        if os.path.exists(service_create_file):
-            with open(service_create_file) as f:
-                content = f.read()
-                # Should contain nullable=False, not nullable=True
-                create_api_section = content.split("class DatasetMetadataCreateServiceApi")[1].split("class")[0]
-                assert "nullable=True" not in create_api_section
+            content = Path(service_create_file).read_text()
+            # Should contain nullable=False, not nullable=True
+            create_api_section = content.split("class DatasetMetadataCreateServiceApi")[1].split("class")[0]
+            assert "nullable=True" not in create_api_section


 class TestMetadataValidationSummary: