> ## Documentation Index
> Fetch the complete documentation index at: https://docs.dify.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Run Pipeline

> Execute the full knowledge pipeline for a knowledge base. Supports both streaming and blocking response modes.



## OpenAPI

````yaml /en/api-reference/openapi_knowledge.json post /datasets/{dataset_id}/pipeline/run
openapi: 3.0.1
info:
  title: Knowledge API
  description: >-
    API for managing knowledge bases, documents, chunks, metadata, and tags,
    including creation, retrieval, and configuration. **Note:** A single
    Knowledge Base API key has permission to operate on all visible knowledge
    bases under the same account. Please pay attention to data security.
  version: 1.0.0
servers:
  - url: '{apiBaseUrl}'
    description: The base URL for the Knowledge API.
    variables:
      apiBaseUrl:
        default: https://api.dify.ai/v1
        description: Actual base URL of the API
security:
  - ApiKeyAuth: []
tags:
  - name: Knowledge Bases
    description: >-
      Operations for managing knowledge bases, including creation,
      configuration, and retrieval.
  - name: Documents
    description: >-
      Operations for creating, updating, and managing documents within a
      knowledge base.
  - name: Chunks
    description: Operations for managing document chunks and child chunks.
  - name: Metadata
    description: >-
      Operations for managing knowledge base metadata fields and document
      metadata values.
  - name: Tags
    description: Operations for managing knowledge base tags and tag bindings.
  - name: Models
    description: Operations for retrieving available models.
  - name: Knowledge Pipeline
    description: >-
      Operations for managing and running knowledge pipelines, including
      datasource plugins and pipeline execution.
paths:
  /datasets/{dataset_id}/pipeline/run:
    post:
      tags:
        - Knowledge Pipeline
      summary: Run Pipeline
      description: >-
        Execute the full knowledge pipeline for a knowledge base. Supports both
        streaming and blocking response modes.
      operationId: runPipeline
      parameters:
        - name: dataset_id
          in: path
          required: true
          schema:
            type: string
            format: uuid
          description: Knowledge base ID.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required:
                - inputs
                - datasource_type
                - datasource_info_list
                - start_node_id
                - is_published
                - response_mode
              properties:
                inputs:
                  type: object
                  additionalProperties: true
                  description: >-
                    Key-value pairs for pipeline input variables defined in the
                    workflow. Pass `{}` if the pipeline has no input variables.
                datasource_type:
                  type: string
                  enum:
                    - local_file
                    - online_document
                    - website_crawl
                    - online_drive
                  description: >-
                    Type of the datasource. Determines which fields are expected
                    in `datasource_info_list` items.
                datasource_info_list:
                  type: array
                  description: >-
                    List of datasource objects to process. The expected item
                    structure depends on `datasource_type`.
                  items:
                    oneOf:
                      - title: Local File
                        type: object
                        required:
                          - reference
                        properties:
                          reference:
                            type: string
                            description: >-
                              Use the `id` returned by the [Upload Pipeline
                              File](/api-reference/knowledge-pipeline/upload-pipeline-file)
                              endpoint. `related_id` is accepted as an alias.
                          name:
                            type: string
                            description: Document title. Defaults to `"untitled"`.
                      - title: Online Document
                        type: object
                        required:
                          - workspace_id
                          - page
                        properties:
                          workspace_id:
                            type: string
                            description: >-
                              ID of the workspace or database in the external
                              platform (e.g., a Notion workspace ID).
                          page:
                            type: object
                            description: Page details.
                            required:
                              - page_id
                              - type
                            properties:
                              page_id:
                                type: string
                                description: Page identifier.
                              type:
                                type: string
                                description: >-
                                  Page type defined by the datasource plugin
                                  (e.g., `"page"`, `"database"`).
                              page_name:
                                type: string
                                description: Display name. Defaults to `"untitled"`.
                          credential_id:
                            type: string
                            description: >-
                              Credential for authenticating with the external
                              platform. Managed via the Dify console. If
                              omitted, the provider's default credential is
                              used.
                      - title: Website Crawl
                        type: object
                        required:
                          - url
                        properties:
                          url:
                            type: string
                            description: URL to crawl.
                          title:
                            type: string
                            description: >-
                              Used as the document name. Defaults to
                              `"untitled"`.
                      - title: Online Drive
                        type: object
                        required:
                          - id
                          - type
                        properties:
                          id:
                            type: string
                            description: File or folder ID.
                          type:
                            type: string
                            enum:
                              - file
                              - folder
                            description: >-
                              Whether this entry is a single file or a folder to
                              expand.
                          bucket:
                            type: string
                            description: >-
                              Storage bucket name. Required by some drive
                              providers (e.g., S3-compatible stores); omit if
                              the provider does not use buckets.
                          name:
                            type: string
                            description: File name. Defaults to `"untitled"`.
                start_node_id:
                  type: string
                  description: ID of the node to start pipeline execution from.
                is_published:
                  type: boolean
                  description: >-
                    Whether to run the published or draft version of the
                    pipeline. `true` runs the latest published version; `false`
                    runs the current draft (useful for testing unpublished
                    changes).
                response_mode:
                  type: string
                  enum:
                    - streaming
                    - blocking
                  description: >-
                    Response mode for the pipeline execution. `streaming`
                    returns a Server-Sent Events stream, `blocking` waits and
                    returns the complete result.
            examples:
              local_file:
                summary: Request Example—Local file
                value:
                  inputs: {}
                  datasource_type: local_file
                  datasource_info_list:
                    - reference: a1b2c3d4-e5f6-7890-abcd-ef1234567890
                      name: quarterly-report.pdf
                  start_node_id: '1719288585006'
                  is_published: true
                  response_mode: blocking
              online_document:
                summary: Request Example—Online document
                value:
                  inputs: {}
                  datasource_type: online_document
                  datasource_info_list:
                    - workspace_id: ws-abc123
                      page:
                        page_id: pg-def456
                        type: page
                        page_name: Product Roadmap
                      credential_id: cred-789xyz
                  start_node_id: '1719288585006'
                  is_published: true
                  response_mode: streaming
              website_crawl:
                summary: Request Example—Website crawl
                value:
                  inputs: {}
                  datasource_type: website_crawl
                  datasource_info_list:
                    - url: https://example.com/docs/getting-started
                      title: Getting Started Guide
                  start_node_id: '1719288585006'
                  is_published: true
                  response_mode: blocking
              online_drive:
                summary: Request Example—Online drive
                value:
                  inputs: {}
                  datasource_type: online_drive
                  datasource_info_list:
                    - id: file-abc123
                      type: file
                      bucket: my-bucket
                      name: meeting-notes.docx
                  start_node_id: '1719288585006'
                  is_published: true
                  response_mode: blocking
      responses:
        '200':
          description: >-
            Pipeline execution result. Format depends on `response_mode`:
            streaming returns a `text/event-stream`, blocking returns a JSON
            object.
          content:
            text/event-stream:
              schema:
                type: string
                description: >-
                  Server-Sent Events stream. Returned when `response_mode` is
                  `streaming`.


                  Each line is `data: {JSON}\n\n`. Key events:
                  `workflow_started` (execution began), `node_started` /
                  `node_finished` (per-node progress with `node_id`,
                  `node_type`, `status`, `inputs`, `outputs`),
                  `workflow_finished` (final result with `status`, `outputs`,
                  `total_tokens`, `elapsed_time`), `ping` (keepalive).
            application/json:
              schema:
                type: object
                description: >-
                  Complete pipeline execution result. Returned when
                  `response_mode` is `blocking`.
                additionalProperties: true
              examples:
                success:
                  summary: Blocking Response Example
                  value:
                    task_id: a1b2c3d4-e5f6-7890-abcd-ef1234567890
                    workflow_run_id: f1e2d3c4-b5a6-7890-abcd-ef0987654321
                    data:
                      id: f1e2d3c4-b5a6-7890-abcd-ef0987654321
                      status: succeeded
                      outputs: {}
                      created_at: 1741267200
                      finished_at: 1741267210
        '403':
          description: '`forbidden` : Forbidden.'
          content:
            application/json:
              examples:
                forbidden:
                  summary: forbidden
                  value:
                    status: 403
                    code: forbidden
                    message: Forbidden.
        '404':
          description: '`not_found` : Dataset not found.'
          content:
            application/json:
              examples:
                not_found:
                  summary: not_found
                  value:
                    status: 404
                    code: not_found
                    message: Dataset not found.
        '500':
          description: '`pipeline_run_error` : Pipeline execution failed.'
          content:
            application/json:
              examples:
                pipeline_run_error:
                  summary: pipeline_run_error
                  value:
                    status: 500
                    code: pipeline_run_error
                    message: 'Pipeline execution failed: connection timeout'
components:
  securitySchemes:
    ApiKeyAuth:
      type: http
      scheme: bearer
      bearerFormat: API_KEY
      description: >-
        API Key authentication. For all API requests, include your API Key in
        the `Authorization` HTTP Header, prefixed with `Bearer `. Example:
        `Authorization: Bearer {API_KEY}`. **Strongly recommend storing your API
        Key on the server-side, not shared or stored on the client-side, to
        avoid possible API-Key leakage that can lead to serious consequences.**

````