diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 5157da536f02a853de79307a914fa8e68b76df29..6227dc256a1d856daae40634521e7fd6e26eb310 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -63,7 +63,7 @@ This change added tests and can be verified as follows: - The public API, i.e., is any changed class annotated with `@Public(Evolving)`: (yes / no) - The serializers: (yes / no / don't know) - The runtime per-record code paths (performance sensitive): (yes / no / don't know) - - Anything that affects deployment or recovery: JobManager (and its components), Checkpointing, Yarn/Mesos, ZooKeeper: (yes / no / don't know) + - Anything that affects deployment or recovery: JobManager (and its components), Checkpointing, Kubernetes/Yarn/Mesos, ZooKeeper: (yes / no / don't know) - The S3 file system connector: (yes / no / don't know) ## Documentation diff --git a/.travis.yml b/.travis.yml index acf8b7e51aea608ede2a481aa20fdfd9e1b28ae0..8d99bda37d5178ab33c59c573a82859ed4b4ac0a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -27,6 +27,7 @@ cache: - $HOME/maven_cache # keep in sync with tools/travis/docs.sh - $HOME/gem_cache + - $HOME/flink_download_cache # do not cache our own artifacts before_cache: @@ -41,6 +42,11 @@ git: env: global: + - cache-dir: $HOME/flink_download_cache + - cache-btl: 30 + - cache-download-max-retries: 3 + - cache-download-attempt-timeout: 5 + - cache-download-global-timeout: 10 # Global variable to avoid hanging travis builds when downloading cache archives. - MALLOC_ARENA_MAX=2 - DOCKER_COMPOSE_VERSION=1.22.0 @@ -112,6 +118,14 @@ jobs: script: ./tools/travis_controller.sh tests env: PROFILE="-Dhadoop.version=2.8.3 -Dinclude_hadoop_aws -Dscala-2.11" name: tests + - if: type in (pull_request, push) + script: ./tools/travis_controller.sh legacy_scheduler_core + env: PROFILE="-Dhadoop.version=2.8.3 -Dinclude_hadoop_aws -Dscala-2.11" + name: core - legacy_scheduler + - if: type in (pull_request, push) + script: ./tools/travis_controller.sh legacy_scheduler_tests + env: PROFILE="-Dhadoop.version=2.8.3 -Dinclude_hadoop_aws -Dscala-2.11" + name: tests - legacy_scheduler - if: type in (pull_request, push) script: ./tools/travis_controller.sh misc env: PROFILE="-Dhadoop.version=2.8.3 -Dinclude_hadoop_aws -Dscala-2.11" @@ -248,7 +262,7 @@ jobs: jdk: "openjdk11" script: ./tools/travis_controller.sh misc env: PROFILE="-Dhadoop.version=2.8.3 -Dinclude_hadoop_aws -Dscala-2.11 -Djdk11" - name: misc + name: misc - jdk 11 - if: type = cron jdk: "openjdk11" stage: cleanup @@ -265,99 +279,115 @@ jobs: # E2E profiles - Hadoop 2.8 - if: type = cron stage: test - env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3 -De2e-metrics" + env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3 -Pe2e-travis1,e2e-hadoop" script: ./tools/travis/nightly.sh split_misc.sh name: e2e - misc - hadoop 2.8 - if: type = cron - env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3" + env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3 -Pe2e-travis2,e2e-hadoop" script: ./tools/travis/nightly.sh split_ha.sh name: e2e - ha - hadoop 2.8 - if: type = cron - env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3" + env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3 -Pe2e-travis3,e2e-hadoop" script: ./tools/travis/nightly.sh split_sticky.sh name: e2e - sticky - hadoop 2.8 - if: type = cron - env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3" + env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3 -Pe2e-travis4,e2e-hadoop" script: ./tools/travis/nightly.sh split_checkpoints.sh name: e2e - checkpoints - hadoop 2.8 - if: type = cron - env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3" + env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3 -Pe2e-travis5,e2e-hadoop" script: ./tools/travis/nightly.sh split_container.sh name: e2e - container - hadoop 2.8 - if: type = cron - env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3" + env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3 -Pe2e-travis6,e2e-hadoop" script: ./tools/travis/nightly.sh split_heavy.sh name: e2e - heavy - hadoop 2.8 + - if: type = cron + env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3 -Pe2e-travis6,e2e-hadoop" + script: ./tools/travis/nightly.sh split_tpcds.sh + name: e2e - tpcds - hadoop 2.8 # E2E profiles - Scala 2.12 - if: type = cron - env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3 -Dscala-2.12 -De2e-metrics" + env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3 -Dscala-2.12 -Pe2e-travis1,e2e-hadoop" script: ./tools/travis/nightly.sh split_misc.sh name: e2e - misc - scala 2.12 - if: type = cron - env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3 -Dscala-2.12" + env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3 -Dscala-2.12 -Pe2e-travis2,e2e-hadoop" script: ./tools/travis/nightly.sh split_ha.sh name: e2e - ha - scala 2.12 - if: type = cron - env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3 -Dscala-2.12" + env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3 -Dscala-2.12 -Pe2e-travis3,e2e-hadoop" script: ./tools/travis/nightly.sh split_sticky.sh name: e2e - sticky - scala 2.12 - if: type = cron - env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3 -Dscala-2.12" + env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3 -Dscala-2.12 -Pe2e-travis4,e2e-hadoop" script: ./tools/travis/nightly.sh split_checkpoints.sh name: e2e - checkpoints - scala 2.12 - if: type = cron - env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3 -Dscala-2.12" + env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3 -Dscala-2.12 -Pe2e-travis5,e2e-hadoop" script: ./tools/travis/nightly.sh split_container.sh name: e2e - container - scala 2.12 - if: type = cron - env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3 -Dscala-2.12" + env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3 -Dscala-2.12 -Pe2e-travis6,e2e-hadoop" script: ./tools/travis/nightly.sh split_heavy.sh name: e2e - heavy - scala 2.12 + - if: type = cron + env: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3 -Dscala-2.12 -Pe2e-travis6,e2e-hadoop" + script: ./tools/travis/nightly.sh split_tpcds.sh + name: e2e - tpcds - scala 2.12 # E2E profiles - Hadoop-free - if: type = cron - env: PROFILE="-De2e-metrics" + env: PROFILE="-Pe2e-travis1" script: ./tools/travis/nightly.sh split_misc_hadoopfree.sh name: e2e - misc - if: type = cron - env: PROFILE="" + env: PROFILE="-Pe2e-travis2" script: ./tools/travis/nightly.sh split_ha.sh name: e2e - ha - if: type = cron - env: PROFILE="" + env: PROFILE="-Pe2e-travis3" script: ./tools/travis/nightly.sh split_sticky.sh name: e2e - sticky - if: type = cron - env: PROFILE="" + env: PROFILE="-Pe2e-travis4" script: ./tools/travis/nightly.sh split_checkpoints.sh name: e2e - checkpoints - if: type = cron - env: PROFILE="" - script: ./tools/travis/nightly.sh split_container.sh + env: PROFILE="-Pe2e-travis5" + script: ./tools/travis/nightly.sh split_container.sh without-hadoop name: e2e - container - if: type = cron - env: PROFILE="" + env: PROFILE="-Pe2e-travis6" script: ./tools/travis/nightly.sh split_heavy.sh name: e2e - heavy + - if: type = cron + env: PROFILE="-Pe2e-travis6" + script: ./tools/travis/nightly.sh split_tpcds.sh + name: e2e - tpcds # E2E profiles - Java 11 - if: type = cron stage: test jdk: "openjdk11" - env: PROFILE="-Djdk11 -Dinclude-hadoop -Dhadoop.version=2.8.3 -De2e-metrics" + env: PROFILE="-Djdk11 -Dinclude-hadoop -Dhadoop.version=2.8.3 -Pe2e-travis1,e2e-hadoop" script: ./tools/travis/nightly.sh split_misc.sh name: e2e - misc - jdk11 - if: type = cron - env: PROFILE="-Djdk11 -Dinclude-hadoop -Dhadoop.version=2.8.3" + env: PROFILE="-Djdk11 -Dinclude-hadoop -Dhadoop.version=2.8.3 -Pe2e-travis2,e2e-hadoop" script: ./tools/travis/nightly.sh split_ha.sh name: e2e - ha - jdk11 - if: type = cron - env: PROFILE="-Djdk11 -Dinclude-hadoop -Dhadoop.version=2.8.3" + env: PROFILE="-Djdk11 -Dinclude-hadoop -Dhadoop.version=2.8.3 -Pe2e-travis3,e2e-hadoop" script: ./tools/travis/nightly.sh split_sticky.sh name: e2e - sticky - jdk 11 - if: type = cron - env: PROFILE="-Djdk11 -Dinclude-hadoop -Dhadoop.version=2.8.3" + env: PROFILE="-Djdk11 -Dinclude-hadoop -Dhadoop.version=2.8.3 -Pe2e-travis4,e2e-hadoop" script: ./tools/travis/nightly.sh split_checkpoints.sh name: e2e - checkpoints - jdk 11 - if: type = cron - env: PROFILE="-Djdk11 -Dinclude-hadoop -Dhadoop.version=2.8.3" + env: PROFILE="-Djdk11 -Dinclude-hadoop -Dhadoop.version=2.8.3 -Pe2e-travis6,e2e-hadoop" script: ./tools/travis/nightly.sh split_heavy.sh name: e2e - heavy - jdk 11 + - if: type = cron + env: PROFILE="-Djdk11 -Dinclude-hadoop -Dhadoop.version=2.8.3 -Pe2e-travis6,e2e-hadoop" + script: ./tools/travis/nightly.sh split_tpcds.sh + name: e2e - tpcds - jdk 11 diff --git a/NOTICE b/NOTICE index ede0dfa17caaedaf2087a13abbbf586b7a0988de..35f4f6e897016b47085aa7b1e73c60294cfa12f9 100644 --- a/NOTICE +++ b/NOTICE @@ -4,35 +4,9 @@ Copyright 2014-2019 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). -This project bundles the following dependencies under the Apache Software License 2.0. (http://www.apache.org/licenses/LICENSE-2.0.txt) - -- nvd3#1.8.4 - -This project bundles the following dependencies under the MIT license. (https://opensource.org/licenses/MIT) -See bundled license files for details. - -- angular:1.4.8 -- angular-drag-and-drop-list:1.4.0 -- angular-moment:0.10.3 -- angular-ui-router:0.2.15 -- bootstrap:3.3.6 -- dagre:0.7.5 -- dagre-d3:0.4.17 -- ev-emitter:1.1.1 -- font-awesome:4.5.0 (CSS) -- graphlib:1.0.7 -- imagesloaded:4.1.4 -- jquery:2.2.0 -- lodash:3.10.1 -- moment:2.10.6 -- moment-duration-format:1.3.0 -- qtip2:2.2.1 -- Split.js:1.0.6 - This project bundles the following dependencies under the BSD license. See bundled license files for details. -- d3:3.5.12 - cloudpickle:1.2.2 - net.sf.py4j:py4j:0.10.8.1 @@ -40,7 +14,6 @@ This project bundles the following dependencies under SIL OFL 1.1 license (https See bundled license files for details. - font-awesome:4.5.0 (Font) (http://fortawesome.github.io/Font-Awesome/) - Created by Dave Gandy - -> fonts in "flink-runtime-web/web-dashboard/web/fonts" -> fonts in "docs/page/font-awesome/fonts" The Apache Flink project contains or reuses code that is licensed under the ISC license from the following projects. diff --git a/azure-pipelines.yml b/azure-pipelines.yml index cc41f084b2df3a079d9c3d338da464713a7ee1a1..d486643a5dbf7160362a2410d07f44658f664fe3 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -13,23 +13,47 @@ # See the License for the specific language governing permissions and # limitations under the License. +# +# This file defines an Azure Pipeline build for testing Flink. It is intended to be used +# with a free Azure Pipelines account. +# It has the following features: +# - default builds for pushes / pull requests +# - end-to-end tests +# +# +# For the "apache/flink" repository, we are using the pipeline definition located in +# tools/azure-pipelines/build-apache-repo.yml +# That file points to custom, self-hosted build agents for faster pull request build processing and +# integration with Flinkbot. +# The custom pipeline definition file is configured in the "Pipeline settings" screen +# of the Azure Pipelines web ui. +# -trigger: - branches: - include: - - '*' resources: containers: - # Container with Maven 3.2.5 to have the same environment everywhere. + # Container with Maven 3.2.5, SSL to have the same environment everywhere. - container: flink-build-container - image: rmetzger/flink-ci:3 - repositories: - - repository: templates - type: github - name: flink-ci/flink-azure-builds - endpoint: flink-ci + image: rmetzger/flink-ci:ubuntu-amd64-3528acd + +# See tools/azure-pipelines/jobs-template.yml for a short summary of the caching +variables: + MAVEN_CACHE_FOLDER: $(Pipeline.Workspace)/.m2/repository + MAVEN_OPTS: '-Dmaven.repo.local=$(MAVEN_CACHE_FOLDER)' + CACHE_KEY: maven | $(Agent.OS) | **/pom.xml, !**/target/** + CACHE_FALLBACK_KEY: maven | $(Agent.OS) + CACHE_FLINK_DIR: $(Pipeline.Workspace)/flink_cache -jobs: -- template: flink-build-jobs.yml@templates +jobs: + - template: tools/azure-pipelines/jobs-template.yml + parameters: # see template file for a definition of the parameters. + stage_name: ci_build + test_pool_definition: + vmImage: 'ubuntu-latest' + e2e_pool_definition: + vmImage: 'ubuntu-16.04' + environment: PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3 -Dinclude_hadoop_aws -Dscala-2.11" + run_end_to_end: false + container: flink-build-container + jdk: jdk8 diff --git a/docs/.gitignore b/docs/.gitignore index 98b6f6b56036b0bf385fb29480e08adb82842837..3d6212de50701bdf1098749dcbbe9f8e5d5835f2 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -1,6 +1,8 @@ .bundle/ .jekyll-metadata +.jekyll-cache/ .rubydeps/ content/ +content_*/ ruby2/.bundle/ -ruby2/.rubydeps/ \ No newline at end of file +ruby2/.rubydeps/ diff --git a/docs/404.md b/docs/404.md index 42e390b8798ab9d319234a436b5e6a4497ea9e46..a752f8d5054a06e1c3a164b832a75032ee5edfbb 100644 --- a/docs/404.md +++ b/docs/404.md @@ -21,6 +21,6 @@ KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> - + +The page you are looking for has been moved. This could be because of a recent reorganization of the +documentation. Redirecting to [Documentation Home Page]({{ site.baseurl }}/) in 5 seconds. diff --git a/docs/Gemfile b/docs/Gemfile index b519eb9d578694032c88b623c7ed5735a509896a..3edc85dcd397c722a222151dd8df5315a46e4e21 100644 --- a/docs/Gemfile +++ b/docs/Gemfile @@ -18,17 +18,18 @@ source 'https://rubygems.org' -ruby '>= 2.1.0' +ruby '>= 2.4.0' -gem 'jekyll', '3.7.2' -gem 'addressable', '2.4.0' -gem 'octokit', '~> 4.3.0' -gem 'therubyracer', '0.12.2' -gem 'json', '2.0.4' +gem 'jekyll', '4.0.0' +gem 'addressable', '2.7.0' +gem 'octokit', '4.14.0' +gem 'therubyracer', '0.12.3' +gem 'json', '2.2.0' gem 'jekyll-multiple-languages', '2.0.3' gem 'jekyll-paginate', '1.1.0' gem 'liquid-c', '4.0.0' # speed-up site generation +gem 'sassc', '2.2.1' # speed-up site generation -group :jekyll_plugins do - gem 'hawkins' -end +# group :jekyll_plugins do +# gem 'hawkins' +# end diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock index 09b02e8b709ac04d0caa66796fa807e148253dd0..9af2cbf85b8d61529d7de67b43461e48fb4f38b8 100644 --- a/docs/Gemfile.lock +++ b/docs/Gemfile.lock @@ -1,93 +1,94 @@ GEM remote: https://rubygems.org/ specs: - addressable (2.4.0) + addressable (2.7.0) + public_suffix (>= 2.0.2, < 5.0) colorator (1.1.0) - concurrent-ruby (1.0.5) + concurrent-ruby (1.1.5) em-websocket (0.5.1) eventmachine (>= 0.12.9) http_parser.rb (~> 0.6.0) - eventmachine (1.2.5) - faraday (0.9.2) + eventmachine (1.2.7) + faraday (0.17.0) multipart-post (>= 1.2, < 3) - ffi (1.9.18) + ffi (1.11.2) forwardable-extended (2.6.0) - hawkins (2.0.5) - em-websocket (~> 0.5) - jekyll (~> 3.1) http_parser.rb (0.6.0) - i18n (0.9.3) + i18n (1.7.0) concurrent-ruby (~> 1.0) - jekyll (3.7.2) + jekyll (4.0.0) addressable (~> 2.4) colorator (~> 1.0) em-websocket (~> 0.5) - i18n (~> 0.7) - jekyll-sass-converter (~> 1.0) + i18n (>= 0.9.5, < 2) + jekyll-sass-converter (~> 2.0) jekyll-watch (~> 2.0) - kramdown (~> 1.14) + kramdown (~> 2.1) + kramdown-parser-gfm (~> 1.0) liquid (~> 4.0) mercenary (~> 0.3.3) pathutil (~> 0.9) - rouge (>= 1.7, < 4) + rouge (~> 3.0) safe_yaml (~> 1.0) + terminal-table (~> 1.8) jekyll-multiple-languages (2.0.3) jekyll-paginate (1.1.0) - jekyll-sass-converter (1.5.1) - sass (~> 3.4) - jekyll-watch (2.0.0) + jekyll-sass-converter (2.0.1) + sassc (> 2.0.1, < 3.0) + jekyll-watch (2.2.1) listen (~> 3.0) - json (2.0.4) - kramdown (1.16.2) + json (2.2.0) + kramdown (2.1.0) + kramdown-parser-gfm (1.1.0) + kramdown (~> 2.0) libv8 (3.16.14.19) - liquid (4.0.0) + liquid (4.0.3) liquid-c (4.0.0) liquid (>= 3.0.0) - listen (3.1.5) - rb-fsevent (~> 0.9, >= 0.9.4) - rb-inotify (~> 0.9, >= 0.9.7) - ruby_dep (~> 1.2) + listen (3.2.0) + rb-fsevent (~> 0.10, >= 0.10.3) + rb-inotify (~> 0.9, >= 0.9.10) mercenary (0.3.6) - multipart-post (2.0.0) - octokit (4.3.0) - sawyer (~> 0.7.0, >= 0.5.3) - pathutil (0.16.1) + multipart-post (2.1.1) + octokit (4.14.0) + sawyer (~> 0.8.0, >= 0.5.3) + pathutil (0.16.2) forwardable-extended (~> 2.6) - rb-fsevent (0.10.2) - rb-inotify (0.9.10) - ffi (>= 0.5.0, < 2) + public_suffix (4.0.1) + rb-fsevent (0.10.3) + rb-inotify (0.10.0) + ffi (~> 1.0) ref (2.0.0) - rouge (3.1.1) - ruby_dep (1.5.0) - safe_yaml (1.0.4) - sass (3.5.5) - sass-listen (~> 4.0.0) - sass-listen (4.0.0) - rb-fsevent (~> 0.9, >= 0.9.4) - rb-inotify (~> 0.9, >= 0.9.7) - sawyer (0.7.0) - addressable (>= 2.3.5, < 2.5) - faraday (~> 0.8, < 0.10) - therubyracer (0.12.2) - libv8 (~> 3.16.14.0) + rouge (3.13.0) + safe_yaml (1.0.5) + sassc (2.2.1) + ffi (~> 1.9) + sawyer (0.8.2) + addressable (>= 2.3.5) + faraday (> 0.8, < 2.0) + terminal-table (1.8.0) + unicode-display_width (~> 1.1, >= 1.1.1) + therubyracer (0.12.3) + libv8 (~> 3.16.14.15) ref + unicode-display_width (1.6.0) PLATFORMS ruby DEPENDENCIES - addressable (= 2.4.0) - hawkins - jekyll (= 3.7.2) + addressable (= 2.7.0) + jekyll (= 4.0.0) jekyll-multiple-languages (= 2.0.3) jekyll-paginate (= 1.1.0) - json (= 2.0.4) + json (= 2.2.0) liquid-c (= 4.0.0) - octokit (~> 4.3.0) - therubyracer (= 0.12.2) + octokit (= 4.14.0) + sassc (= 2.2.1) + therubyracer (= 0.12.3) RUBY VERSION - ruby 2.3.1p112 + ruby 2.6.3p62 BUNDLED WITH 1.17.2 diff --git a/docs/README.md b/docs/README.md index 924fcbad55353591711991555c6e2c869346b42a..c3d5f63898d109384aba99003f95c7cad996ecf9 100644 --- a/docs/README.md +++ b/docs/README.md @@ -42,8 +42,7 @@ If you call the script with the preview flag `build_docs.sh -p`, Jekyll will start a web server at `localhost:4000` and watch the docs directory for updates. Use this mode to preview changes locally. -If you have ruby 2.0 or greater, -you can call the script with the incremental flag `build_docs.sh -i`. +You can call the script with the incremental flag `build_docs.sh -i`. Jekyll will then serve a live preview at `localhost:4000`, and it will be much faster because it will only rebuild the pages corresponding to files that are modified. Note that if you are making changes that affect diff --git a/docs/_config.yml b/docs/_config.yml index e6e1f8096f595f796e34ccc41a164f9ed6a15f3d..7d4dc7b526c8de5842f93171c9df5cbed4b8939b 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -27,10 +27,10 @@ # we change the version for the complete docs when forking of a release branch # etc. # The full version string as referenced in Maven (e.g. 1.2.1) -version: "1.10-SNAPSHOT" +version: "1.11-SNAPSHOT" # For stable releases, leave the bugfix version out (e.g. 1.2). For snapshot # release this should be the same as the regular version -version_title: "1.10-SNAPSHOT" +version_title: "1.11-SNAPSHOT" # Branch on Github for this version github_branch: "master" @@ -64,6 +64,7 @@ is_stable: false show_outdated_warning: false previous_docs: + 1.10: http://ci.apache.org/projects/flink/flink-docs-release-1.10 1.9: http://ci.apache.org/projects/flink/flink-docs-release-1.9 1.8: http://ci.apache.org/projects/flink/flink-docs-release-1.8 1.7: http://ci.apache.org/projects/flink/flink-docs-release-1.7 @@ -82,6 +83,11 @@ previous_docs: # to change anything here. #------------------------------------------------------------------------------ +exclude: + - "build_docs.sh" + - "build_docs.bat" + - "check_links.sh" + # Used in some documents to initialize arrays. Don't delete. array: [] diff --git a/docs/_config_dev_en.yml b/docs/_config_dev_en.yml index b2854a26731848cf38262d502d69726c0023ee75..a7ca0d0c6d0afe334ee1808a8bb5a58e2e9bf457 100644 --- a/docs/_config_dev_en.yml +++ b/docs/_config_dev_en.yml @@ -17,3 +17,9 @@ exclude: - "*.zh.md" + - "build_docs.sh" + - "build_docs.bat" + - "check_links.sh" + - "content" + - "content_en" + - "content_zh" diff --git a/docs/_config_dev_zh.yml b/docs/_config_dev_zh.yml index 4fd38874c2b3e72bf255680e55e518ec4418d2c6..813a6361734a7431baed4d09038a2a191a44ec74 100644 --- a/docs/_config_dev_zh.yml +++ b/docs/_config_dev_zh.yml @@ -17,6 +17,12 @@ exclude: - "*.md" + - "build_docs.sh" + - "build_docs.bat" + - "check_links.sh" + - "content" + - "content_en" + - "content_zh" include: - "*.zh.md" diff --git a/docs/_includes/generated/akka_configuration.html b/docs/_includes/generated/akka_configuration.html index 83b977507ac962d7ffcc1b2143d4d60f8cde2890..2baeb6ff4f3fdadd2ce0626f89478d47f4652fee 100644 --- a/docs/_includes/generated/akka_configuration.html +++ b/docs/_includes/generated/akka_configuration.html @@ -3,123 +3,153 @@ Key Default - Description + Type + Description + +
akka.ask.callstack
+ true + Boolean + If true, call stack for asynchronous asks are captured. That way, when an ask fails (for example times out), you get a proper exception, describing to the original method call and call site. Note that in case of having millions of concurrent RPC calls, this may add to the memory footprint. +
akka.ask.timeout
"10 s" + String Timeout used for all futures and blocking Akka calls. If Flink fails due to timeouts then you should try to increase this value. Timeouts can be caused by slow machines or a congested network. The timeout value requires a time-unit specifier (ms/s/min/h/d).
akka.client-socket-worker-pool.pool-size-factor
1.0 + Double The pool size factor is used to determine thread pool size using the following formula: ceil(available processors * factor). Resulting size is then bounded by the pool-size-min and pool-size-max values.
akka.client-socket-worker-pool.pool-size-max
2 + Integer Max number of threads to cap factor-based number to.
akka.client-socket-worker-pool.pool-size-min
1 + Integer Min number of threads to cap factor-based number to.
akka.client.timeout
"60 s" + String Timeout for all blocking calls on the client side.
akka.fork-join-executor.parallelism-factor
2.0 + Double The parallelism factor is used to determine thread pool size using the following formula: ceil(available processors * factor). Resulting size is then bounded by the parallelism-min and parallelism-max values.
akka.fork-join-executor.parallelism-max
64 + Integer Max number of threads to cap factor-based parallelism number to.
akka.fork-join-executor.parallelism-min
8 + Integer Min number of threads to cap factor-based parallelism number to.
akka.framesize
"10485760b" + String Maximum size of messages which are sent between the JobManager and the TaskManagers. If Flink fails because messages exceed this limit, then you should increase it. The message size requires a size-unit specifier.
akka.jvm-exit-on-fatal-error
true + Boolean Exit JVM on fatal Akka errors.
akka.log.lifecycle.events
false + Boolean Turns on the Akka’s remote logging of events. Set this value to 'true' in case of debugging.
akka.lookup.timeout
"10 s" + String Timeout used for the lookup of the JobManager. The timeout value has to contain a time-unit specifier (ms/s/min/h/d).
akka.retry-gate-closed-for
50 + Long Milliseconds a gate should be closed for after a remote connection was disconnected.
akka.server-socket-worker-pool.pool-size-factor
1.0 + Double The pool size factor is used to determine thread pool size using the following formula: ceil(available processors * factor). Resulting size is then bounded by the pool-size-min and pool-size-max values.
akka.server-socket-worker-pool.pool-size-max
2 + Integer Max number of threads to cap factor-based number to.
akka.server-socket-worker-pool.pool-size-min
1 + Integer Min number of threads to cap factor-based number to.
akka.ssl.enabled
true + Boolean Turns on SSL for Akka’s remote communication. This is applicable only when the global ssl flag security.ssl.enabled is set to true.
akka.startup-timeout
(none) + String Timeout after which the startup of a remote component is considered being failed.
akka.tcp.timeout
"20 s" + String Timeout for all outbound connections. If you should experience problems with connecting to a TaskManager due to a slow network, you should increase this value.
akka.throughput
15 + Integer Number of messages that are processed in a batch before returning the thread to the pool. Low values denote a fair scheduling whereas high values can increase the performance at the cost of unfairness.
akka.transport.heartbeat.interval
"1000 s" + String Heartbeat interval for Akka’s transport failure detector. Since Flink uses TCP, the detector is not necessary. Therefore, the detector is disabled by setting the interval to a very high value. In case you should need the transport failure detector, set the interval to some reasonable value. The interval value requires a time-unit specifier (ms/s/min/h/d).
akka.transport.heartbeat.pause
"6000 s" + String Acceptable heartbeat pause for Akka’s transport failure detector. Since Flink uses TCP, the detector is not necessary. Therefore, the detector is disabled by setting the pause to a very high value. In case you should need the transport failure detector, set the pause to some reasonable value. The pause value requires a time-unit specifier (ms/s/min/h/d).
akka.transport.threshold
300.0 + Double Threshold for the transport failure detector. Since Flink uses TCP, the detector is not necessary and, thus, the threshold is set to a high value. diff --git a/docs/_includes/generated/algorithm_configuration.html b/docs/_includes/generated/algorithm_configuration.html index 12a3ea9c9c8c1d44f1a1ea175611e9613bd1c350..967e9d65ad47cf6ee9379121e42938c4b2db2457 100644 --- a/docs/_includes/generated/algorithm_configuration.html +++ b/docs/_includes/generated/algorithm_configuration.html @@ -3,23 +3,27 @@ Key Default - Description + Type + Description
taskmanager.runtime.hashjoin-bloom-filters
false + Boolean Flag to activate/deactivate bloom filters in the hybrid hash join implementation. In cases where the hash join needs to spill to disk (datasets larger than the reserved fraction of memory), these bloom filters can greatly reduce the number of spilled records, at the cost some CPU cycles.
taskmanager.runtime.max-fan
128 + Integer The maximal fan-in for external merge joins and fan-out for spilling hash tables. Limits the number of file handles per operator, but may cause intermediate merging/partitioning, if set too small.
taskmanager.runtime.sort-spilling-threshold
0.8 + Float A sort operation starts spilling when this fraction of its memory budget is full. diff --git a/docs/_includes/generated/all_jobmanager_section.html b/docs/_includes/generated/all_jobmanager_section.html new file mode 100644 index 0000000000000000000000000000000000000000..ab01a856e032db1fd9acf753447b4c8b58dea8b0 --- /dev/null +++ b/docs/_includes/generated/all_jobmanager_section.html @@ -0,0 +1,66 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
jobmanager.archive.fs.dir
(none)StringDictionary for JobManager to store the archives of completed jobs.
jobmanager.execution.attempts-history-size
16IntegerThe maximum number of prior execution attempts kept in history.
jobmanager.execution.failover-strategy
regionStringThis option specifies how the job computation recovers from task failures. Accepted values are:
  • 'full': Restarts all tasks to recover the job.
  • 'region': Restarts all tasks that could be affected by the task failure. More details can be found here.
jobmanager.heap.size
"1024m"StringJVM heap size for the JobManager.
jobmanager.rpc.address
(none)StringThe config parameter defining the network address to connect to for communication with the job manager. This value is only interpreted in setups where a single JobManager with static name or address exists (simple standalone setups, or container setups with dynamic service name resolution). It is not used in many high-availability setups, when a leader-election service (like ZooKeeper) is used to elect and discover the JobManager leader from potentially multiple standby JobManagers.
jobmanager.rpc.port
6123IntegerThe config parameter defining the network port to connect to for communication with the job manager. Like jobmanager.rpc.address, this value is only interpreted in setups where a single JobManager with static name/address and port exists (simple standalone setups, or container setups with dynamic service name resolution). This config option is not used in many high-availability setups, when a leader-election service (like ZooKeeper) is used to elect and discover the JobManager leader from potentially multiple standby JobManagers.
jobstore.cache-size
52428800LongThe job store cache size in bytes which is used to keep completed jobs in memory.
jobstore.expiration-time
3600LongThe time in seconds after which a completed job expires and is purged from the job store.
jobstore.max-capacity
2147483647IntegerThe max number of completed jobs that can be kept in the job store.
diff --git a/docs/_includes/generated/all_taskmanager_network_section.html b/docs/_includes/generated/all_taskmanager_network_section.html new file mode 100644 index 0000000000000000000000000000000000000000..04a85662bae4a5305c1c1587dad721d36f643dd8 --- /dev/null +++ b/docs/_includes/generated/all_taskmanager_network_section.html @@ -0,0 +1,96 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
taskmanager.network.blocking-shuffle.compression.enabled
falseBooleanBoolean flag indicating whether the shuffle data will be compressed for blocking shuffle mode. Note that data is compressed per buffer and compression can incur extra CPU overhead, so it is more effective for IO bounded scenario when data compression ratio is high. Currently, shuffle data compression is an experimental feature and the config option can be changed in the future.
taskmanager.network.blocking-shuffle.type
"file"StringThe blocking shuffle type, either "mmap" or "file". The "auto" means selecting the property type automatically based on system memory architecture (64 bit for mmap and 32 bit for file). Note that the memory usage of mmap is not accounted by configured memory limits, but some resource frameworks like yarn would track this memory usage and kill the container once memory exceeding some threshold. Also note that this option is experimental and might be changed future.
taskmanager.network.detailed-metrics
falseBooleanBoolean flag to enable/disable more detailed metrics about inbound/outbound network queue lengths.
taskmanager.network.memory.buffers-per-channel
2IntegerMaximum number of network buffers to use for each outgoing/incoming channel (subpartition/input channel).In credit-based flow control mode, this indicates how many credits are exclusive in each input channel. It should be configured at least 2 for good performance. 1 buffer is for receiving in-flight data in the subpartition and 1 buffer is for parallel serialization.
taskmanager.network.memory.floating-buffers-per-gate
8IntegerNumber of extra network buffers to use for each outgoing/incoming gate (result partition/input gate). In credit-based flow control mode, this indicates how many floating credits are shared among all the input channels. The floating buffers are distributed based on backlog (real-time output buffers in the subpartition) feedback, and can help relieve back-pressure caused by unbalanced data distribution among the subpartitions. This value should be increased in case of higher round trip times between nodes and/or larger number of machines in the cluster.
taskmanager.network.netty.client.connectTimeoutSec
120IntegerThe Netty client connection timeout.
taskmanager.network.netty.client.numThreads
-1IntegerThe number of Netty client threads.
taskmanager.network.netty.num-arenas
-1IntegerThe number of Netty arenas.
taskmanager.network.netty.sendReceiveBufferSize
0IntegerThe Netty send and receive buffer size. This defaults to the system buffer size (cat /proc/sys/net/ipv4/tcp_[rw]mem) and is 4 MiB in modern Linux.
taskmanager.network.netty.server.backlog
0IntegerThe netty server connection backlog.
taskmanager.network.netty.server.numThreads
-1IntegerThe number of Netty server threads.
taskmanager.network.netty.transport
"auto"StringThe Netty transport type, either "nio" or "epoll". The "auto" means selecting the property mode automatically based on the platform. Note that the "epoll" mode can get better performance, less GC and have more advanced features which are only available on modern Linux.
taskmanager.network.request-backoff.initial
100IntegerMinimum backoff in milliseconds for partition requests of input channels.
taskmanager.network.request-backoff.max
10000IntegerMaximum backoff in milliseconds for partition requests of input channels.
diff --git a/docs/_includes/generated/all_taskmanager_section.html b/docs/_includes/generated/all_taskmanager_section.html new file mode 100644 index 0000000000000000000000000000000000000000..692c38b6e22f659a0aa5f1081cc3af524696a776 --- /dev/null +++ b/docs/_includes/generated/all_taskmanager_section.html @@ -0,0 +1,115 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
task.cancellation.interval
30000LongTime interval between two successive task cancellation attempts in milliseconds.
task.cancellation.timeout
180000LongTimeout in milliseconds after which a task cancellation times out and leads to a fatal TaskManager error. A value of 0 deactivates the watch dog.
task.cancellation.timers.timeout
7500LongTime we wait for the timers in milliseconds to finish all pending timer threads when the stream task is cancelled.
taskmanager.data.port
0IntegerThe task manager’s port used for data exchange operations.
taskmanager.data.ssl.enabled
trueBooleanEnable SSL support for the taskmanager data transport. This is applicable only when the global flag for internal SSL (security.ssl.internal.enabled) is set to true
taskmanager.debug.memory.log
falseBooleanFlag indicating whether to start a thread, which repeatedly logs the memory usage of the JVM.
taskmanager.debug.memory.log-interval
5000LongThe interval (in ms) for the log thread to log the current memory usage.
taskmanager.host
(none)StringThe address of the network interface that the TaskManager binds to. This option can be used to define explicitly a binding address. Because different TaskManagers need different values for this option, usually it is specified in an additional non-shared TaskManager-specific config file.
taskmanager.jvm-exit-on-oom
falseBooleanWhether to kill the TaskManager when the task thread throws an OutOfMemoryError.
taskmanager.memory.segment-size
32 kbMemorySizeSize of memory buffers used by the network stack and the memory manager.
taskmanager.network.bind-policy
"ip"StringThe automatic address binding policy used by the TaskManager if "taskmanager.host" is not set. The value should be one of the following: +
  • "name" - uses hostname as binding address
  • "ip" - uses host's ip address as binding address
taskmanager.numberOfTaskSlots
1IntegerThe number of parallel operator or user function instances that a single TaskManager can run. If this value is larger than 1, a single TaskManager takes multiple instances of a function or operator. That way, the TaskManager can utilize multiple CPU cores, but at the same time, the available memory is divided between the different operator or function instances. This value is typically proportional to the number of physical CPU cores that the TaskManager's machine has (e.g., equal to the number of cores, or half the number of cores).
taskmanager.registration.initial-backoff
500 msDurationThe initial registration backoff between two consecutive registration attempts. The backoff is doubled for each new registration attempt until it reaches the maximum registration backoff.
taskmanager.registration.max-backoff
30 sDurationThe maximum registration backoff between two consecutive registration attempts. The max registration backoff requires a time unit specifier (ms/s/min/h/d).
taskmanager.registration.refused-backoff
10 sDurationThe backoff after a registration has been refused by the job manager before retrying to connect.
taskmanager.registration.timeout
5 minDurationDefines the timeout for the TaskManager registration. If the duration is exceeded without a successful registration, then the TaskManager terminates.
taskmanager.rpc.port
"0"StringThe task manager’s IPC port. Accepts a list of ports (“50100,50101”), ranges (“50100-50200”) or a combination of both. It is recommended to set a range of ports to avoid collisions when multiple TaskManagers are running on the same machine.
diff --git a/docs/_includes/generated/blob_server_configuration.html b/docs/_includes/generated/blob_server_configuration.html index 874993430e740d032a4f2e0a5f9b46131df046f0..ee5620a425b852f2498ff5d94dad7d46a376119b 100644 --- a/docs/_includes/generated/blob_server_configuration.html +++ b/docs/_includes/generated/blob_server_configuration.html @@ -3,58 +3,69 @@ Key Default - Description + Type + Description
blob.client.connect.timeout
0 + Integer The connection timeout in milliseconds for the blob client.
blob.client.socket.timeout
300000 + Integer The socket timeout in milliseconds for the blob client.
blob.fetch.backlog
1000 + Integer The config parameter defining the backlog of BLOB fetches on the JobManager.
blob.fetch.num-concurrent
50 + Integer The config parameter defining the maximum number of concurrent BLOB fetches that the JobManager serves.
blob.fetch.retries
5 + Integer The config parameter defining number of retires for failed BLOB fetches.
blob.offload.minsize
1048576 + Integer The minimum size for messages to be offloaded to the BlobServer.
blob.server.port
"0" + String The config parameter defining the server port of the blob service.
blob.service.cleanup.interval
3600 + Long Cleanup interval of the blob caches at the task managers (in seconds).
blob.service.ssl.enabled
true + Boolean Flag to override ssl support for the blob service transport.
blob.storage.directory
(none) + String The config parameter defining the storage directory to be used by the blob server. diff --git a/docs/_includes/generated/checkpointing_configuration.html b/docs/_includes/generated/checkpointing_configuration.html index 4232ed6ee11ad30f8437ccf8e092e548f151e052..a80a67b543a44dedc7eb7969809c62fbd19e35d7 100644 --- a/docs/_includes/generated/checkpointing_configuration.html +++ b/docs/_includes/generated/checkpointing_configuration.html @@ -3,58 +3,69 @@ Key Default - Description + Type + Description
state.backend
(none) + String The state backend to be used to store and checkpoint state.
state.backend.async
true + Boolean Option whether the state backend should use an asynchronous snapshot method where possible and configurable. Some state backends may not support asynchronous snapshots, or only support asynchronous snapshots, and ignore this option.
state.backend.fs.memory-threshold
1024 + Integer The minimum size of state data files. All state chunks smaller than that are stored inline in the root checkpoint metadata file.
state.backend.fs.write-buffer-size
4096 + Integer The default size of the write buffer for the checkpoint streams that write to file systems. The actual write buffer size is determined to be the maximum of the value of this option and option 'state.backend.fs.memory-threshold'.
state.backend.incremental
false + Boolean Option whether the state backend should create incremental checkpoints, if possible. For an incremental checkpoint, only a diff from the previous checkpoint is stored, rather than the complete checkpoint state. Some state backends may not support incremental checkpoints and ignore this option.
state.backend.local-recovery
false + Boolean This option configures local recovery for this state backend. By default, local recovery is deactivated. Local recovery currently only covers keyed state backends. Currently, MemoryStateBackend does not support local recovery and ignore this option.
state.checkpoints.dir
(none) + String The default directory used for storing the data files and meta data of checkpoints in a Flink supported filesystem. The storage path must be accessible from all participating processes/nodes(i.e. all TaskManagers and JobManagers).
state.checkpoints.num-retained
1 + Integer The maximum number of completed checkpoints to retain.
state.savepoints.dir
(none) + String The default directory for savepoints. Used by the state backends that write savepoints to file systems (MemoryStateBackend, FsStateBackend, RocksDBStateBackend).
taskmanager.state.local.root-dirs
(none) + String The config parameter defining the root directories for storing file-based state for local recovery. Local recovery currently only covers keyed state backends. Currently, MemoryStateBackend does not support local recovery and ignore this option diff --git a/docs/_includes/generated/cluster_configuration.html b/docs/_includes/generated/cluster_configuration.html index a24a0d8bf1ad1d64d6770cd5b127e4d7a61e225b..2379c9a0e28e6dfca99fad97299b0178ab548460 100644 --- a/docs/_includes/generated/cluster_configuration.html +++ b/docs/_includes/generated/cluster_configuration.html @@ -3,33 +3,45 @@ Key Default - Description + Type + Description + +
cluster.evenly-spread-out-slots
+ false + Boolean + Enable the slot spread out allocation strategy. This strategy tries to spread out the slots evenly across all available `TaskExecutors`. +
cluster.registration.error-delay
10000 + Long The pause made after an registration attempt caused an exception (other than timeout) in milliseconds.
cluster.registration.initial-timeout
100 + Long Initial registration timeout between cluster components in milliseconds.
cluster.registration.max-timeout
30000 + Long Maximum registration timeout between cluster components in milliseconds.
cluster.registration.refused-registration-delay
30000 + Long The pause made after the registration attempt was refused in milliseconds.
cluster.services.shutdown-timeout
30000 + Long The shutdown timeout for cluster services like executors in milliseconds. diff --git a/docs/_includes/generated/common_high_availability_section.html b/docs/_includes/generated/common_high_availability_section.html new file mode 100644 index 0000000000000000000000000000000000000000..4c06ca778cf2ded408bf5ca0cc8937aaba20704d --- /dev/null +++ b/docs/_includes/generated/common_high_availability_section.html @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
high-availability
"NONE"StringDefines high-availability mode used for the cluster execution. To enable high-availability, set this mode to "ZOOKEEPER" or specify FQN of factory class.
high-availability.cluster-id
"/default"StringThe ID of the Flink cluster, used to separate multiple Flink clusters from each other. Needs to be set for standalone clusters but is automatically inferred in YARN and Mesos.
high-availability.storageDir
(none)StringFile system path (URI) where Flink persists metadata in high-availability setups.
diff --git a/docs/_includes/generated/common_high_availability_zk_section.html b/docs/_includes/generated/common_high_availability_zk_section.html new file mode 100644 index 0000000000000000000000000000000000000000..18175fb7339c96c8145102ed4fae49e480981608 --- /dev/null +++ b/docs/_includes/generated/common_high_availability_zk_section.html @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
high-availability.zookeeper.path.root
"/flink"StringThe root path under which Flink stores its entries in ZooKeeper.
high-availability.zookeeper.quorum
(none)StringThe ZooKeeper quorum to use, when running Flink in a high-availability mode with ZooKeeper.
diff --git a/docs/_includes/generated/common_host_port_section.html b/docs/_includes/generated/common_host_port_section.html new file mode 100644 index 0000000000000000000000000000000000000000..5deb71edc484fda84e711183ebb3d00055815081 --- /dev/null +++ b/docs/_includes/generated/common_host_port_section.html @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
jobmanager.rpc.address
(none)StringThe config parameter defining the network address to connect to for communication with the job manager. This value is only interpreted in setups where a single JobManager with static name or address exists (simple standalone setups, or container setups with dynamic service name resolution). It is not used in many high-availability setups, when a leader-election service (like ZooKeeper) is used to elect and discover the JobManager leader from potentially multiple standby JobManagers.
jobmanager.rpc.port
6123IntegerThe config parameter defining the network port to connect to for communication with the job manager. Like jobmanager.rpc.address, this value is only interpreted in setups where a single JobManager with static name/address and port exists (simple standalone setups, or container setups with dynamic service name resolution). This config option is not used in many high-availability setups, when a leader-election service (like ZooKeeper) is used to elect and discover the JobManager leader from potentially multiple standby JobManagers.
metrics.internal.query-service.port
"0"StringThe port range used for Flink's internal metric query service. Accepts a list of ports (“50100,50101”), ranges(“50100-50200”) or a combination of both. It is recommended to set a range of ports to avoid collisions when multiple Flink components are running on the same machine. Per default Flink will pick a random port.
rest.address
(none)StringThe address that should be used by clients to connect to the server.
rest.bind-address
(none)StringThe address that the server binds itself.
rest.bind-port
"8081"StringThe port that the server binds itself. Accepts a list of ports (“50100,50101”), ranges (“50100-50200”) or a combination of both. It is recommended to set a range of ports to avoid collisions when multiple Rest servers are running on the same machine.
rest.port
8081IntegerThe port that the client connects to. If rest.bind-port has not been specified, then the REST server will bind to this port.
taskmanager.data.port
0IntegerThe task manager’s port used for data exchange operations.
taskmanager.host
(none)StringThe address of the network interface that the TaskManager binds to. This option can be used to define explicitly a binding address. Because different TaskManagers need different values for this option, usually it is specified in an additional non-shared TaskManager-specific config file.
taskmanager.rpc.port
"0"StringThe task manager’s IPC port. Accepts a list of ports (“50100,50101”), ranges (“50100-50200”) or a combination of both. It is recommended to set a range of ports to avoid collisions when multiple TaskManagers are running on the same machine.
diff --git a/docs/_includes/generated/common_memory_section.html b/docs/_includes/generated/common_memory_section.html new file mode 100644 index 0000000000000000000000000000000000000000..713edff3b253b979073723b895fbb0c0397be307 --- /dev/null +++ b/docs/_includes/generated/common_memory_section.html @@ -0,0 +1,102 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
taskmanager.memory.flink.size
(none)MemorySizeTotal Flink Memory size for the TaskExecutors. This includes all the memory that a TaskExecutor consumes, except for JVM Metaspace and JVM Overhead. It consists of Framework Heap Memory, Task Heap Memory, Task Off-Heap Memory, Managed Memory, and Network Memory. See also 'taskmanager.memory.process.size' for total process memory size configuration.
taskmanager.memory.framework.heap.size
128 mbMemorySizeFramework Heap Memory size for TaskExecutors. This is the size of JVM heap memory reserved for TaskExecutor framework, which will not be allocated to task slots.
taskmanager.memory.framework.off-heap.size
128 mbMemorySizeFramework Off-Heap Memory size for TaskExecutors. This is the size of off-heap memory (JVM direct memory and native memory) reserved for TaskExecutor framework, which will not be allocated to task slots. The configured value will be fully counted when Flink calculates the JVM max direct memory size parameter.
taskmanager.memory.jvm-metaspace.size
96 mbMemorySizeJVM Metaspace Size for the TaskExecutors.
taskmanager.memory.jvm-overhead.fraction
0.1FloatFraction of Total Process Memory to be reserved for JVM Overhead. This is off-heap memory reserved for JVM overhead, such as thread stack space, compile cache, etc. This includes native memory but not direct memory, and will not be counted when Flink calculates JVM max direct memory size parameter. The size of JVM Overhead is derived to make up the configured fraction of the Total Process Memory. If the derived size is less/greater than the configured min/max size, the min/max size will be used. The exact size of JVM Overhead can be explicitly specified by setting the min/max size to the same value.
taskmanager.memory.jvm-overhead.max
1 gbMemorySizeMax JVM Overhead size for the TaskExecutors. This is off-heap memory reserved for JVM overhead, such as thread stack space, compile cache, etc. This includes native memory but not direct memory, and will not be counted when Flink calculates JVM max direct memory size parameter. The size of JVM Overhead is derived to make up the configured fraction of the Total Process Memory. If the derived size is less/greater than the configured min/max size, the min/max size will be used. The exact size of JVM Overhead can be explicitly specified by setting the min/max size to the same value.
taskmanager.memory.jvm-overhead.min
192 mbMemorySizeMin JVM Overhead size for the TaskExecutors. This is off-heap memory reserved for JVM overhead, such as thread stack space, compile cache, etc. This includes native memory but not direct memory, and will not be counted when Flink calculates JVM max direct memory size parameter. The size of JVM Overhead is derived to make up the configured fraction of the Total Process Memory. If the derived size is less/greater than the configured min/max size, the min/max size will be used. The exact size of JVM Overhead can be explicitly specified by setting the min/max size to the same value.
taskmanager.memory.managed.fraction
0.4FloatFraction of Total Flink Memory to be used as Managed Memory, if Managed Memory size is not explicitly specified.
taskmanager.memory.managed.size
(none)MemorySizeManaged Memory size for TaskExecutors. This is the size of off-heap memory managed by the memory manager, reserved for sorting, hash tables, caching of intermediate results and RocksDB state backend. Memory consumers can either allocate memory from the memory manager in the form of MemorySegments, or reserve bytes from the memory manager and keep their memory usage within that boundary. If unspecified, it will be derived to make up the configured fraction of the Total Flink Memory.
taskmanager.memory.network.fraction
0.1FloatFraction of Total Flink Memory to be used as Network Memory. Network Memory is off-heap memory reserved for ShuffleEnvironment (e.g., network buffers). Network Memory size is derived to make up the configured fraction of the Total Flink Memory. If the derived size is less/greater than the configured min/max size, the min/max size will be used. The exact size of Network Memory can be explicitly specified by setting the min/max size to the same value.
taskmanager.memory.network.max
1 gbMemorySizeMax Network Memory size for TaskExecutors. Network Memory is off-heap memory reserved for ShuffleEnvironment (e.g., network buffers). Network Memory size is derived to make up the configured fraction of the Total Flink Memory. If the derived size is less/greater than the configured min/max size, the min/max size will be used. The exact size of Network Memory can be explicitly specified by setting the min/max to the same value.
taskmanager.memory.network.min
64 mbMemorySizeMin Network Memory size for TaskExecutors. Network Memory is off-heap memory reserved for ShuffleEnvironment (e.g., network buffers). Network Memory size is derived to make up the configured fraction of the Total Flink Memory. If the derived size is less/greater than the configured min/max size, the min/max size will be used. The exact size of Network Memory can be explicitly specified by setting the min/max to the same value.
taskmanager.memory.process.size
(none)MemorySizeTotal Process Memory size for the TaskExecutors. This includes all the memory that a TaskExecutor consumes, consisting of Total Flink Memory, JVM Metaspace, and JVM Overhead. On containerized setups, this should be set to the container memory. See also 'taskmanager.memory.flink.size' for total Flink memory size configuration.
taskmanager.memory.task.heap.size
(none)MemorySizeTask Heap Memory size for TaskExecutors. This is the size of JVM heap memory reserved for tasks. If not specified, it will be derived as Total Flink Memory minus Framework Heap Memory, Task Off-Heap Memory, Managed Memory and Network Memory.
taskmanager.memory.task.off-heap.size
0 bytesMemorySizeTask Off-Heap Memory size for TaskExecutors. This is the size of off heap memory (JVM direct memory and native memory) reserved for tasks. The configured value will be fully counted when Flink calculates the JVM max direct memory size parameter.
diff --git a/docs/_includes/generated/common_miscellaneous_section.html b/docs/_includes/generated/common_miscellaneous_section.html new file mode 100644 index 0000000000000000000000000000000000000000..0a4e15af8fd2d250feb7c14668eae4b5003e5fc1 --- /dev/null +++ b/docs/_includes/generated/common_miscellaneous_section.html @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
fs.default-scheme
(none)StringThe default filesystem scheme, used for paths that do not declare a scheme explicitly. May contain an authority, e.g. host:port in case of an HDFS NameNode.
io.tmp.dirs
'LOCAL_DIRS' on Yarn. '_FLINK_TMP_DIR' on Mesos. System.getProperty("java.io.tmpdir") in standalone.StringDirectories for temporary files, separated by",", "|", or the system's java.io.File.pathSeparator.
diff --git a/docs/_includes/generated/common_section.html b/docs/_includes/generated/common_state_backends_section.html similarity index 39% rename from docs/_includes/generated/common_section.html rename to docs/_includes/generated/common_state_backends_section.html index 8d7a232446181bc1870292f23c6543fcd0ec8c87..0df38e9e763348c249d10feddbbe406ba576fbbb 100644 --- a/docs/_includes/generated/common_section.html +++ b/docs/_includes/generated/common_state_backends_section.html @@ -3,64 +3,52 @@ Key Default - Description + Type + Description - -
jobmanager.heap.size
- "1024m" - JVM heap size for the JobManager. - - -
taskmanager.memory.total-flink.size
- (none) - Total Flink Memory size for the TaskExecutors. This includes all the memory that a TaskExecutor consumes, except for JVM Metaspace and JVM Overhead. It consists of Framework Heap Memory, Task Heap Memory, Task Off-Heap Memory, Managed Memory, and Shuffle Memory. - - -
parallelism.default
- 1 - Default parallelism for jobs. - - -
taskmanager.numberOfTaskSlots
- 1 - The number of parallel operator or user function instances that a single TaskManager can run. If this value is larger than 1, a single TaskManager takes multiple instances of a function or operator. That way, the TaskManager can utilize multiple CPU cores, but at the same time, the available memory is divided between the different operator or function instances. This value is typically proportional to the number of physical CPU cores that the TaskManager's machine has (e.g., equal to the number of cores, or half the number of cores). -
state.backend
(none) + String The state backend to be used to store and checkpoint state.
state.checkpoints.dir
(none) + String The default directory used for storing the data files and meta data of checkpoints in a Flink supported filesystem. The storage path must be accessible from all participating processes/nodes(i.e. all TaskManagers and JobManagers).
state.savepoints.dir
(none) + String The default directory for savepoints. Used by the state backends that write savepoints to file systems (MemoryStateBackend, FsStateBackend, RocksDBStateBackend). -
high-availability
- "NONE" - Defines high-availability mode used for the cluster execution. To enable high-availability, set this mode to "ZOOKEEPER" or specify FQN of factory class. +
state.backend.incremental
+ false + Boolean + Option whether the state backend should create incremental checkpoints, if possible. For an incremental checkpoint, only a diff from the previous checkpoint is stored, rather than the complete checkpoint state. Some state backends may not support incremental checkpoints and ignore this option. -
high-availability.storageDir
- (none) - File system path (URI) where Flink persists metadata in high-availability setups. +
state.backend.local-recovery
+ false + Boolean + This option configures local recovery for this state backend. By default, local recovery is deactivated. Local recovery currently only covers keyed state backends. Currently, MemoryStateBackend does not support local recovery and ignore this option. -
security.ssl.internal.enabled
- false - Turns on SSL for internal network communication. Optionally, specific components may override this through their own settings (rpc, data transport, REST, etc). +
state.checkpoints.num-retained
+ 1 + Integer + The maximum number of completed checkpoints to retain. -
security.ssl.rest.enabled
- false - Turns on SSL for external communication via the REST endpoints. +
taskmanager.state.local.root-dirs
+ (none) + String + The config parameter defining the root directories for storing file-based state for local recovery. Local recovery currently only covers keyed state backends. Currently, MemoryStateBackend does not support local recovery and ignore this option diff --git a/docs/_includes/generated/core_configuration.html b/docs/_includes/generated/core_configuration.html index 540290045a8309d8d0aa5f7b091df3583ea13351..cf68ddf65641eb003ed91b2c82be4659892db7b8 100644 --- a/docs/_includes/generated/core_configuration.html +++ b/docs/_includes/generated/core_configuration.html @@ -3,33 +3,57 @@ Key Default - Description + Type + Description
classloader.parent-first-patterns.additional
(none) + String A (semicolon-separated) list of patterns that specifies which classes should always be resolved through the parent ClassLoader first. A pattern is a simple prefix that is checked against the fully qualified class name. These patterns are appended to "classloader.parent-first-patterns.default".
classloader.parent-first-patterns.default
- "java.;scala.;org.apache.flink.;com.esotericsoftware.kryo;org.apache.hadoop.;javax.annotation.;org.slf4j;org.apache.log4j;org.apache.logging;org.apache.commons.logging;ch.qos.logback" + "java.;scala.;org.apache.flink.;com.esotericsoftware.kryo;org.apache.hadoop.;javax.annotation.;org.slf4j;org.apache.log4j;org.apache.logging;org.apache.commons.logging;ch.qos.logback;org.xml;javax.xml;org.apache.xerces;org.w3c" + String A (semicolon-separated) list of patterns that specifies which classes should always be resolved through the parent ClassLoader first. A pattern is a simple prefix that is checked against the fully qualified class name. This setting should generally not be modified. To add another pattern we recommend to use "classloader.parent-first-patterns.additional" instead.
classloader.resolve-order
"child-first" + String Defines the class resolution strategy when loading classes from user code, meaning whether to first check the user code jar ("child-first") or the application classpath ("parent-first"). The default settings indicate to load classes first from the user code jar, which means that user code jars can include and load different dependencies than Flink uses (transitively). + +
fs.default-scheme
+ (none) + String + The default filesystem scheme, used for paths that do not declare a scheme explicitly. May contain an authority, e.g. host:port in case of an HDFS NameNode. + + +
fs.output.always-create-directory
+ false + Boolean + File writers running with a parallelism larger than one create a directory for the output file path and put the different result files (one per parallel writer task) into that directory. If this option is set to "true", writers with a parallelism of 1 will also create a directory and place a single result file into it. If the option is set to "false", the writer will directly create the file directly at the output path, without creating a containing directory. + + +
fs.overwrite-files
+ false + Boolean + Specifies whether file output writers should overwrite existing files by default. Set to "true" to overwrite by default,"false" otherwise. +
io.tmp.dirs
'LOCAL_DIRS' on Yarn. '_FLINK_TMP_DIR' on Mesos. System.getProperty("java.io.tmpdir") in standalone. + String Directories for temporary files, separated by",", "|", or the system's java.io.File.pathSeparator.
parallelism.default
1 + Integer Default parallelism for jobs. diff --git a/docs/_includes/generated/deployment_configuration.html b/docs/_includes/generated/deployment_configuration.html new file mode 100644 index 0000000000000000000000000000000000000000..325dc982012b2e500cbc8676ed8373cb9d1b230f --- /dev/null +++ b/docs/_includes/generated/deployment_configuration.html @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
execution.attached
falseBooleanSpecifies if the pipeline is submitted in attached or detached mode.
execution.shutdown-on-attached-exit
falseBooleanIf the job is submitted in attached mode, perform a best-effort cluster shutdown when the CLI is terminated abruptly, e.g., in response to a user interrupt, such as typing Ctrl + C.
execution.target
(none)StringThe deployment target for the execution, e.g. "local" for local execution.
diff --git a/docs/_includes/generated/deprecated_file_sinks_section.html b/docs/_includes/generated/deprecated_file_sinks_section.html new file mode 100644 index 0000000000000000000000000000000000000000..26e3d5385648f582f432e7c76453e63fb0bd21a1 --- /dev/null +++ b/docs/_includes/generated/deprecated_file_sinks_section.html @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
fs.output.always-create-directory
falseBooleanFile writers running with a parallelism larger than one create a directory for the output file path and put the different result files (one per parallel writer task) into that directory. If this option is set to "true", writers with a parallelism of 1 will also create a directory and place a single result file into it. If the option is set to "false", the writer will directly create the file directly at the output path, without creating a containing directory.
fs.overwrite-files
falseBooleanSpecifies whether file output writers should overwrite existing files by default. Set to "true" to overwrite by default,"false" otherwise.
diff --git a/docs/_includes/generated/environment_configuration.html b/docs/_includes/generated/environment_configuration.html index a54955c734271634bb7388b5385d2672717d3714..32b4139359f73151f3350ce5c95b2f45137a34bf 100644 --- a/docs/_includes/generated/environment_configuration.html +++ b/docs/_includes/generated/environment_configuration.html @@ -3,53 +3,63 @@ Key Default - Description + Type + Description
env.hadoop.conf.dir
(none) + String Path to hadoop configuration directory. It is required to read HDFS and/or YARN configuration. You can also set it via environment variable.
env.java.opts
(none) + String Java options to start the JVM of all Flink processes with.
env.java.opts.historyserver
(none) + String Java options to start the JVM of the HistoryServer with.
env.java.opts.jobmanager
(none) + String Java options to start the JVM of the JobManager with.
env.java.opts.taskmanager
(none) + String Java options to start the JVM of the TaskManager with.
env.log.dir
(none) + String Defines the directory where the Flink logs are saved. It has to be an absolute path. (Defaults to the log directory under Flink’s home)
env.log.max
5 + Integer The maximum number of old log files to keep.
env.ssh.opts
(none) + String Additional command line options passed to SSH clients when starting or stopping JobManager, TaskManager, and Zookeeper services (start-cluster.sh, stop-cluster.sh, start-zookeeper-quorum.sh, stop-zookeeper-quorum.sh).
env.yarn.conf.dir
(none) + String Path to yarn configuration directory. It is required to run flink on YARN. You can also set it via environment variable. diff --git a/docs/_includes/generated/execution_checkpointing_configuration.html b/docs/_includes/generated/execution_checkpointing_configuration.html new file mode 100644 index 0000000000000000000000000000000000000000..59a84f45fd74d29523a191df7d29bedb164991f6 --- /dev/null +++ b/docs/_includes/generated/execution_checkpointing_configuration.html @@ -0,0 +1,60 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
execution.checkpointing.externalized-checkpoint-retention
(none)

Enum

Possible values: [DELETE_ON_CANCELLATION, RETAIN_ON_CANCELLATION]
Externalized checkpoints write their meta data out to persistent storage and are not automatically cleaned up when the owning job fails or is suspended (terminating with job status `JobStatus#FAILED` or `JobStatus#SUSPENDED`. In this case, you have to manually clean up the checkpoint state, both the meta data and actual program state.

The mode defines how an externalized checkpoint should be cleaned up on job cancellation. If you choose to retain externalized checkpoints on cancellation you have to handle checkpoint clean up manually when you cancel the job as well (terminating with job status `JobStatus#CANCELED`).

The target directory for externalized checkpoints is configured via `state.checkpoints.dir`.
execution.checkpointing.interval
(none)DurationGets the interval in which checkpoints are periodically scheduled.

This setting defines the base interval. Checkpoint triggering may be delayed by the settings `execution.checkpointing.max-concurrent-checkpoints` and `execution.checkpointing.min-pause`
execution.checkpointing.max-concurrent-checkpoints
1IntegerThe maximum number of checkpoint attempts that may be in progress at the same time. If this value is n, then no checkpoints will be triggered while n checkpoint attempts are currently in flight. For the next checkpoint to be triggered, one checkpoint attempt would need to finish or expire.
execution.checkpointing.min-pause
0 msDurationThe minimal pause between checkpointing attempts. This setting defines how soon thecheckpoint coordinator may trigger another checkpoint after it becomes possible to triggeranother checkpoint with respect to the maximum number of concurrent checkpoints(see `execution.checkpointing.max-concurrent-checkpoints`).

If the maximum number of concurrent checkpoints is set to one, this setting makes effectively sure that a minimum amount of time passes where no checkpoint is in progress at all.
execution.checkpointing.mode
EXACTLY_ONCE

Enum

Possible values: [EXACTLY_ONCE, AT_LEAST_ONCE]
The checkpointing mode (exactly-once vs. at-least-once).
execution.checkpointing.prefer-checkpoint-for-recovery
falseBooleanIf enabled, a job recovery should fallback to checkpoint when there is a more recent savepoint.
execution.checkpointing.timeout
10 minDurationThe maximum time that a checkpoint may take before being discarded.
execution.checkpointing.tolerable-failed-checkpoints
(none)IntegerThe tolerable checkpoint failure number. If set to 0, that meanswe do not tolerance any checkpoint failure.
diff --git a/docs/_includes/generated/execution_config_configuration.html b/docs/_includes/generated/execution_config_configuration.html index d050e592424fd4dc336d5ff35055d8ab23a92772..80043227ac10e999d3f9168c0872f62aeab85096 100644 --- a/docs/_includes/generated/execution_config_configuration.html +++ b/docs/_includes/generated/execution_config_configuration.html @@ -3,23 +3,27 @@ Key Default - Description + Type + Description
table.exec.async-lookup.buffer-capacity

Batch Streaming 100 + Integer The max number of async i/o operation that the async lookup join can trigger.
table.exec.async-lookup.timeout

Batch Streaming "3 min" + String The async timeout for the asynchronous operation to complete.
table.exec.disabled-operators

Batch (none) + String Mainly for testing. A comma-separated list of operator names, each name represents a kind of disabled operator. Operators that can be disabled include "NestedLoopJoin", "ShuffleHashJoin", "BroadcastHashJoin", "SortMergeJoin", "HashAgg", "SortAgg". By default no operator is disabled. @@ -27,46 +31,31 @@ By default no operator is disabled.
table.exec.mini-batch.allow-latency

Streaming "-1 ms" + String The maximum latency can be used for MiniBatch to buffer input records. MiniBatch is an optimization to buffer input records to reduce state access. MiniBatch is triggered with the allowed latency interval and when the maximum number of buffered records reached. NOTE: If table.exec.mini-batch.enabled is set true, its value must be greater than zero.
table.exec.mini-batch.enabled

Streaming false + Boolean Specifies whether to enable MiniBatch optimization. MiniBatch is an optimization to buffer input records to reduce state access. This is disabled by default. To enable this, users should set this config to true. NOTE: If mini-batch is enabled, 'table.exec.mini-batch.allow-latency' and 'table.exec.mini-batch.size' must be set.
table.exec.mini-batch.size

Streaming -1 + Long The maximum number of input records can be buffered for MiniBatch. MiniBatch is an optimization to buffer input records to reduce state access. MiniBatch is triggered with the allowed latency interval and when the maximum number of buffered records reached. NOTE: MiniBatch only works for non-windowed aggregations currently. If table.exec.mini-batch.enabled is set true, its value must be positive.
table.exec.resource.default-parallelism

Batch Streaming -1 + Integer Sets default parallelism for all operators (such as aggregate, join, filter) to run with parallel instances. This config has a higher priority than parallelism of StreamExecutionEnvironment (actually, this config overrides the parallelism of StreamExecutionEnvironment). A value of -1 indicates that no default parallelism is set, then it will fallback to use the parallelism of StreamExecutionEnvironment. - -
table.exec.resource.external-buffer-memory

Batch - "10 mb" - Sets the external buffer memory size that is used in sort merge join and nested join and over window. - - -
table.exec.resource.hash-agg.memory

Batch - "128 mb" - Sets the managed memory size of hash aggregate operator. - - -
table.exec.resource.hash-join.memory

Batch - "128 mb" - Sets the managed memory for hash join operator. It defines the lower limit. - - -
table.exec.resource.sort.memory

Batch - "128 mb" - Sets the managed buffer memory size for sort operator. -
table.exec.shuffle-mode

Batch "batch" + String Sets exec shuffle mode. Only batch or pipeline can be set. batch: the job will run stage by stage. pipeline: the job will run in streaming mode, but it may cause resource deadlock that receiver waits for resource to start when the sender holds resource to wait to send data to the receiver. @@ -74,36 +63,43 @@ pipeline: the job will run in streaming mode, but it may cause resource deadlock
table.exec.sort.async-merge-enabled

Batch true + Boolean Whether to asynchronously merge sorted spill files.
table.exec.sort.default-limit

Batch -1 + Integer Default limit when user don't set a limit after order by. -1 indicates that this configuration is ignored.
table.exec.sort.max-num-file-handles

Batch 128 + Integer The maximal fan-in for external merge sort. It limits the number of file handles per operator. If it is too small, may cause intermediate merging. But if it is too large, it will cause too many files opened at the same time, consume memory and lead to random reading.
table.exec.source.idle-timeout

Streaming "-1 ms" + String When a source do not receive any elements for the timeout time, it will be marked as temporarily idle. This allows downstream tasks to advance their watermarks without the need to wait for watermarks from this source while it is idle.
table.exec.spill-compression.block-size

Batch "64 kb" + String The memory size used to do compress when spilling data. The larger the memory, the higher the compression ratio, but more memory resource will be consumed by the job.
table.exec.spill-compression.enabled

Batch true + Boolean Whether to compress spilled data. Currently we only support compress spilled data for sort and hash-agg and hash-join operators.
table.exec.window-agg.buffer-size-limit

Batch 100000 + Integer Sets the window elements buffer size limit used in group window agg operator. diff --git a/docs/_includes/generated/execution_configuration.html b/docs/_includes/generated/execution_configuration.html new file mode 100644 index 0000000000000000000000000000000000000000..037960cdcb399007873b72adfbac41ab6a849128 --- /dev/null +++ b/docs/_includes/generated/execution_configuration.html @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
execution.buffer-timeout
100 msDurationThe maximum time frequency (milliseconds) for the flushing of the output buffers. By default the output buffers flush frequently to provide low latency and to aid smooth developer experience. Setting the parameter can result in three logical modes:
  • A positive value triggers flushing periodically by that interval
  • 0 triggers flushing after every record thus minimizing latency
  • -1 ms triggers flushing only when the output buffer is full thus maximizing throughput
execution.checkpointing.snapshot-compression
falseBooleanTells if we should use compression for the state snapshot data or not
diff --git a/docs/_includes/generated/expert_class_loading_section.html b/docs/_includes/generated/expert_class_loading_section.html new file mode 100644 index 0000000000000000000000000000000000000000..1732e846227d76e556ba4fd1a7ed44e201ef2a72 --- /dev/null +++ b/docs/_includes/generated/expert_class_loading_section.html @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
classloader.parent-first-patterns.additional
(none)StringA (semicolon-separated) list of patterns that specifies which classes should always be resolved through the parent ClassLoader first. A pattern is a simple prefix that is checked against the fully qualified class name. These patterns are appended to "classloader.parent-first-patterns.default".
classloader.parent-first-patterns.default
"java.;scala.;org.apache.flink.;com.esotericsoftware.kryo;org.apache.hadoop.;javax.annotation.;org.slf4j;org.apache.log4j;org.apache.logging;org.apache.commons.logging;ch.qos.logback;org.xml;javax.xml;org.apache.xerces;org.w3c"StringA (semicolon-separated) list of patterns that specifies which classes should always be resolved through the parent ClassLoader first. A pattern is a simple prefix that is checked against the fully qualified class name. This setting should generally not be modified. To add another pattern we recommend to use "classloader.parent-first-patterns.additional" instead.
classloader.resolve-order
"child-first"StringDefines the class resolution strategy when loading classes from user code, meaning whether to first check the user code jar ("child-first") or the application classpath ("parent-first"). The default settings indicate to load classes first from the user code jar, which means that user code jars can include and load different dependencies than Flink uses (transitively).
diff --git a/docs/_includes/generated/expert_fault_tolerance_section.html b/docs/_includes/generated/expert_fault_tolerance_section.html new file mode 100644 index 0000000000000000000000000000000000000000..520717b026c744b470e97544c1165c77e9719d55 --- /dev/null +++ b/docs/_includes/generated/expert_fault_tolerance_section.html @@ -0,0 +1,60 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
cluster.registration.error-delay
10000LongThe pause made after an registration attempt caused an exception (other than timeout) in milliseconds.
cluster.registration.initial-timeout
100LongInitial registration timeout between cluster components in milliseconds.
cluster.registration.max-timeout
30000LongMaximum registration timeout between cluster components in milliseconds.
cluster.registration.refused-registration-delay
30000LongThe pause made after the registration attempt was refused in milliseconds.
cluster.services.shutdown-timeout
30000LongThe shutdown timeout for cluster services like executors in milliseconds.
heartbeat.interval
10000LongTime interval for requesting heartbeat from sender side.
heartbeat.timeout
50000LongTimeout for requesting and receiving heartbeat for both sender and receiver sides.
jobmanager.execution.failover-strategy
regionStringThis option specifies how the job computation recovers from task failures. Accepted values are:
  • 'full': Restarts all tasks to recover the job.
  • 'region': Restarts all tasks that could be affected by the task failure. More details can be found here.
diff --git a/docs/_includes/generated/expert_high_availability_section.html b/docs/_includes/generated/expert_high_availability_section.html new file mode 100644 index 0000000000000000000000000000000000000000..4a571e70cab789927f5f8cb0c0b0e0f2eaacc3ab --- /dev/null +++ b/docs/_includes/generated/expert_high_availability_section.html @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
high-availability.jobmanager.port
"0"StringThe port (range) used by the Flink Master for its RPC connections in highly-available setups. In highly-available setups, this value is used instead of 'jobmanager.rpc.port'.A value of '0' means that a random free port is chosen. TaskManagers discover this port through the high-availability services (leader election), so a random port or a port range works without requiring any additional means of service discovery.
diff --git a/docs/_includes/generated/expert_high_availability_zk_section.html b/docs/_includes/generated/expert_high_availability_zk_section.html new file mode 100644 index 0000000000000000000000000000000000000000..d7774e22560e961596615780db107c913f4fd9c1 --- /dev/null +++ b/docs/_includes/generated/expert_high_availability_zk_section.html @@ -0,0 +1,84 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
high-availability.zookeeper.client.acl
"open"StringDefines the ACL (open|creator) to be configured on ZK node. The configuration value can be set to “creator” if the ZooKeeper server configuration has the “authProvider” property mapped to use SASLAuthenticationProvider and the cluster is configured to run in secure mode (Kerberos).
high-availability.zookeeper.client.connection-timeout
15000IntegerDefines the connection timeout for ZooKeeper in ms.
high-availability.zookeeper.client.max-retry-attempts
3IntegerDefines the number of connection retries before the client gives up.
high-availability.zookeeper.client.retry-wait
5000IntegerDefines the pause between consecutive retries in ms.
high-availability.zookeeper.client.session-timeout
60000IntegerDefines the session timeout for the ZooKeeper session in ms.
high-availability.zookeeper.path.checkpoint-counter
"/checkpoint-counter"StringZooKeeper root path (ZNode) for checkpoint counters.
high-availability.zookeeper.path.checkpoints
"/checkpoints"StringZooKeeper root path (ZNode) for completed checkpoints.
high-availability.zookeeper.path.jobgraphs
"/jobgraphs"StringZooKeeper root path (ZNode) for job graphs
high-availability.zookeeper.path.latch
"/leaderlatch"StringDefines the znode of the leader latch which is used to elect the leader.
high-availability.zookeeper.path.leader
"/leader"StringDefines the znode of the leader which contains the URL to the leader and the current leader session ID.
high-availability.zookeeper.path.mesos-workers
"/mesos-workers"StringThe ZooKeeper root path for persisting the Mesos worker information.
high-availability.zookeeper.path.running-registry
"/running_job_registry/"String
diff --git a/docs/_includes/generated/expert_rest_section.html b/docs/_includes/generated/expert_rest_section.html new file mode 100644 index 0000000000000000000000000000000000000000..ab1a70cb67e992723dd371e3fdc9f68a733ffd57 --- /dev/null +++ b/docs/_includes/generated/expert_rest_section.html @@ -0,0 +1,66 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
rest.await-leader-timeout
30000LongThe time in ms that the client waits for the leader address, e.g., Dispatcher or WebMonitorEndpoint
rest.client.max-content-length
104857600IntegerThe maximum content length in bytes that the client will handle.
rest.connection-timeout
15000LongThe maximum time in ms for the client to establish a TCP connection.
rest.idleness-timeout
300000LongThe maximum time in ms for a connection to stay idle before failing.
rest.retry.delay
3000LongThe time in ms that the client waits between retries (See also `rest.retry.max-attempts`).
rest.retry.max-attempts
20IntegerThe number of retries the client will attempt if a retryable operations fails.
rest.server.max-content-length
104857600IntegerThe maximum content length in bytes that the server will handle.
rest.server.numThreads
4IntegerThe number of threads for the asynchronous processing of requests.
rest.server.thread-priority
5IntegerThread priority of the REST server's executor for processing asynchronous requests. Lowering the thread priority will give Flink's main components more CPU time whereas increasing will allocate more time for the REST server's processing.
diff --git a/docs/_includes/generated/expert_rocksdb_section.html b/docs/_includes/generated/expert_rocksdb_section.html new file mode 100644 index 0000000000000000000000000000000000000000..d3cbfe16647b8778be1f9d8bf075e174a19f1e24 --- /dev/null +++ b/docs/_includes/generated/expert_rocksdb_section.html @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
state.backend.rocksdb.checkpoint.transfer.thread.num
1IntegerThe number of threads (per stateful operator) used to transfer (download and upload) files in RocksDBStateBackend.
state.backend.rocksdb.localdir
(none)StringThe local directory (on the TaskManager) where RocksDB puts its files.
state.backend.rocksdb.options-factory
"org.apache.flink.contrib.streaming.state.DefaultConfigurableOptionsFactory"StringThe options factory class for RocksDB to create DBOptions and ColumnFamilyOptions. The default options factory is org.apache.flink.contrib.streaming.state.DefaultConfigurableOptionsFactory, and it would read the configured options which provided in 'RocksDBConfigurableOptions'.
state.backend.rocksdb.predefined-options
"DEFAULT"StringThe predefined settings for RocksDB DBOptions and ColumnFamilyOptions by Flink community. Current supported candidate predefined-options are DEFAULT, SPINNING_DISK_OPTIMIZED, SPINNING_DISK_OPTIMIZED_HIGH_MEM or FLASH_SSD_OPTIMIZED. Note that user customized options and options from the OptionsFactory are applied on top of these predefined ones.
diff --git a/docs/_includes/generated/expert_scheduling_section.html b/docs/_includes/generated/expert_scheduling_section.html new file mode 100644 index 0000000000000000000000000000000000000000..a29ad69c76e6c96bc2995314fcea4e180651c4fb --- /dev/null +++ b/docs/_includes/generated/expert_scheduling_section.html @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
cluster.evenly-spread-out-slots
falseBooleanEnable the slot spread out allocation strategy. This strategy tries to spread out the slots evenly across all available `TaskExecutors`.
slot.idle.timeout
50000LongThe timeout in milliseconds for a idle slot in Slot Pool.
slot.request.timeout
300000LongThe timeout in milliseconds for requesting a slot from Slot Pool.
diff --git a/docs/_includes/generated/expert_security_ssl_section.html b/docs/_includes/generated/expert_security_ssl_section.html new file mode 100644 index 0000000000000000000000000000000000000000..9e5af5f622449802b14d5f30936646ea5264ae2a --- /dev/null +++ b/docs/_includes/generated/expert_security_ssl_section.html @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
security.ssl.internal.close-notify-flush-timeout
-1IntegerThe timeout (in ms) for flushing the `close_notify` that was triggered by closing a channel. If the `close_notify` was not flushed in the given timeout the channel will be closed forcibly. (-1 = use system default)
security.ssl.internal.handshake-timeout
-1IntegerThe timeout (in ms) during SSL handshake. (-1 = use system default)
security.ssl.internal.session-cache-size
-1IntegerThe size of the cache used for storing SSL session objects. According to https://github.com/netty/netty/issues/832, you should always set this to an appropriate number to not run into a bug with stalling IO threads during garbage collection. (-1 = use system default).
security.ssl.internal.session-timeout
-1IntegerThe timeout (in ms) for the cached SSL session objects. (-1 = use system default)
security.ssl.provider
"JDK"StringThe SSL engine provider to use for the ssl transport:
  • `JDK`: default Java-based SSL engine
  • `OPENSSL`: openSSL-based SSL engine using system libraries
`OPENSSL` is based on netty-tcnative and comes in two flavours:
  • dynamically linked: This will use your system's openSSL libraries (if compatible) and requires `opt/flink-shaded-netty-tcnative-dynamic-*.jar` to be copied to `lib/`
  • statically linked: Due to potential licensing issues with openSSL (see LEGAL-393), we cannot ship pre-built libraries. However, you can build the required library yourself and put it into `lib/`:
    `git clone https://github.com/apache/flink-shaded.git && cd flink-shaded && mvn clean package -Pinclude-netty-tcnative-static -pl flink-shaded-netty-tcnative-static`
diff --git a/docs/_includes/generated/expert_state_backends_section.html b/docs/_includes/generated/expert_state_backends_section.html new file mode 100644 index 0000000000000000000000000000000000000000..9d50be1f4656225aa2b6fd5c475bde6108a2e0b0 --- /dev/null +++ b/docs/_includes/generated/expert_state_backends_section.html @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
state.backend.async
trueBooleanOption whether the state backend should use an asynchronous snapshot method where possible and configurable. Some state backends may not support asynchronous snapshots, or only support asynchronous snapshots, and ignore this option.
state.backend.fs.memory-threshold
1024IntegerThe minimum size of state data files. All state chunks smaller than that are stored inline in the root checkpoint metadata file.
state.backend.fs.write-buffer-size
4096IntegerThe default size of the write buffer for the checkpoint streams that write to file systems. The actual write buffer size is determined to be the maximum of the value of this option and option 'state.backend.fs.memory-threshold'.
diff --git a/docs/_includes/generated/failure_rate_restart_strategy_configuration.html b/docs/_includes/generated/failure_rate_restart_strategy_configuration.html index 589dce9855dc62cb943418910d1e0ad65395fa34..93bcd22bbdf009c20365a014a4d74dbff2ca523e 100644 --- a/docs/_includes/generated/failure_rate_restart_strategy_configuration.html +++ b/docs/_includes/generated/failure_rate_restart_strategy_configuration.html @@ -3,23 +3,27 @@ Key Default - Description + Type + Description
restart-strategy.failure-rate.delay
- "1 s" + 1 s + Duration Delay between two consecutive restart attempts if `restart-strategy` has been set to `failure-rate`. It can be specified using notation: "1 min", "20 s"
restart-strategy.failure-rate.failure-rate-interval
- "1 min" + 1 min + Duration Time interval for measuring failure rate if `restart-strategy` has been set to `failure-rate`. It can be specified using notation: "1 min", "20 s"
restart-strategy.failure-rate.max-failures-per-interval
1 + Integer Maximum number of restarts in given time interval before failing a job if `restart-strategy` has been set to `failure-rate`. diff --git a/docs/_includes/generated/file_system_configuration.html b/docs/_includes/generated/file_system_configuration.html index 53fddec01d4dfa3943f174bc5f9cae4d59a7cc3f..055b1e820602b7e5b6fb5c6fef8ed99c4c70c6fd 100644 --- a/docs/_includes/generated/file_system_configuration.html +++ b/docs/_includes/generated/file_system_configuration.html @@ -3,23 +3,27 @@ Key Default - Description + Type + Description
fs.default-scheme
(none) + String The default filesystem scheme, used for paths that do not declare a scheme explicitly. May contain an authority, e.g. host:port in case of an HDFS NameNode.
fs.output.always-create-directory
false + Boolean File writers running with a parallelism larger than one create a directory for the output file path and put the different result files (one per parallel writer task) into that directory. If this option is set to "true", writers with a parallelism of 1 will also create a directory and place a single result file into it. If the option is set to "false", the writer will directly create the file directly at the output path, without creating a containing directory.
fs.overwrite-files
false + Boolean Specifies whether file output writers should overwrite existing files by default. Set to "true" to overwrite by default,"false" otherwise. diff --git a/docs/_includes/generated/fixed_delay_restart_strategy_configuration.html b/docs/_includes/generated/fixed_delay_restart_strategy_configuration.html index 9e11b7fa20f6387d672c65eb9932f6a8e5e2308b..d4d74fe0701350f7d8ce56cb6c893d06a2821430 100644 --- a/docs/_includes/generated/fixed_delay_restart_strategy_configuration.html +++ b/docs/_includes/generated/fixed_delay_restart_strategy_configuration.html @@ -3,18 +3,21 @@ Key Default - Description + Type + Description
restart-strategy.fixed-delay.attempts
1 + Integer The number of times that Flink retries the execution before the job is declared as failed if `restart-strategy` has been set to `fixed-delay`.
restart-strategy.fixed-delay.delay
- "1 s" + 1 s + Duration Delay between two consecutive restart attempts if `restart-strategy` has been set to `fixed-delay`. Delaying the retries can be helpful when the program interacts with external systems where for example connections or pending transactions should reach a timeout before re-execution is attempted. It can be specified using notation: "1 min", "20 s" diff --git a/docs/_includes/generated/heartbeat_manager_configuration.html b/docs/_includes/generated/heartbeat_manager_configuration.html index 02e64adb0b5fcfd80989e9b6b1801682901cbf51..4e6fde60b775d60a66481964104082f319edff27 100644 --- a/docs/_includes/generated/heartbeat_manager_configuration.html +++ b/docs/_includes/generated/heartbeat_manager_configuration.html @@ -3,18 +3,21 @@ Key Default - Description + Type + Description
heartbeat.interval
10000 + Long Time interval for requesting heartbeat from sender side.
heartbeat.timeout
50000 + Long Timeout for requesting and receiving heartbeat for both sender and receiver sides. diff --git a/docs/_includes/generated/high_availability_configuration.html b/docs/_includes/generated/high_availability_configuration.html index 75baf36059888fc70e31125b03c7c462eecef201..cf61274defd042b332efd0416d58201e4fd1a09a 100644 --- a/docs/_includes/generated/high_availability_configuration.html +++ b/docs/_includes/generated/high_availability_configuration.html @@ -3,29 +3,118 @@ Key Default - Description + Type + Description
high-availability
"NONE" + String Defines high-availability mode used for the cluster execution. To enable high-availability, set this mode to "ZOOKEEPER" or specify FQN of factory class.
high-availability.cluster-id
"/default" + String The ID of the Flink cluster, used to separate multiple Flink clusters from each other. Needs to be set for standalone clusters but is automatically inferred in YARN and Mesos.
high-availability.jobmanager.port
"0" - Optional port (range) used by the job manager in high-availability mode. + String + The port (range) used by the Flink Master for its RPC connections in highly-available setups. In highly-available setups, this value is used instead of 'jobmanager.rpc.port'.A value of '0' means that a random free port is chosen. TaskManagers discover this port through the high-availability services (leader election), so a random port or a port range works without requiring any additional means of service discovery.
high-availability.storageDir
(none) + String File system path (URI) where Flink persists metadata in high-availability setups. + +
high-availability.zookeeper.client.acl
+ "open" + String + Defines the ACL (open|creator) to be configured on ZK node. The configuration value can be set to “creator” if the ZooKeeper server configuration has the “authProvider” property mapped to use SASLAuthenticationProvider and the cluster is configured to run in secure mode (Kerberos). + + +
high-availability.zookeeper.client.connection-timeout
+ 15000 + Integer + Defines the connection timeout for ZooKeeper in ms. + + +
high-availability.zookeeper.client.max-retry-attempts
+ 3 + Integer + Defines the number of connection retries before the client gives up. + + +
high-availability.zookeeper.client.retry-wait
+ 5000 + Integer + Defines the pause between consecutive retries in ms. + + +
high-availability.zookeeper.client.session-timeout
+ 60000 + Integer + Defines the session timeout for the ZooKeeper session in ms. + + +
high-availability.zookeeper.path.checkpoint-counter
+ "/checkpoint-counter" + String + ZooKeeper root path (ZNode) for checkpoint counters. + + +
high-availability.zookeeper.path.checkpoints
+ "/checkpoints" + String + ZooKeeper root path (ZNode) for completed checkpoints. + + +
high-availability.zookeeper.path.jobgraphs
+ "/jobgraphs" + String + ZooKeeper root path (ZNode) for job graphs + + +
high-availability.zookeeper.path.latch
+ "/leaderlatch" + String + Defines the znode of the leader latch which is used to elect the leader. + + +
high-availability.zookeeper.path.leader
+ "/leader" + String + Defines the znode of the leader which contains the URL to the leader and the current leader session ID. + + +
high-availability.zookeeper.path.mesos-workers
+ "/mesos-workers" + String + The ZooKeeper root path for persisting the Mesos worker information. + + +
high-availability.zookeeper.path.root
+ "/flink" + String + The root path under which Flink stores its entries in ZooKeeper. + + +
high-availability.zookeeper.path.running-registry
+ "/running_job_registry/" + String + + + +
high-availability.zookeeper.quorum
+ (none) + String + The ZooKeeper quorum to use, when running Flink in a high-availability mode with ZooKeeper. + diff --git a/docs/_includes/generated/high_availability_zookeeper_configuration.html b/docs/_includes/generated/high_availability_zookeeper_configuration.html index 6577878674b3de994b64bd5f540ce14ba1e4d249..75a7b82fe982606a8ee033dc17c9cea3df12f010 100644 --- a/docs/_includes/generated/high_availability_zookeeper_configuration.html +++ b/docs/_includes/generated/high_availability_zookeeper_configuration.html @@ -3,78 +3,93 @@ Key Default - Description + Type + Description
high-availability.zookeeper.client.acl
"open" + String Defines the ACL (open|creator) to be configured on ZK node. The configuration value can be set to “creator” if the ZooKeeper server configuration has the “authProvider” property mapped to use SASLAuthenticationProvider and the cluster is configured to run in secure mode (Kerberos).
high-availability.zookeeper.client.connection-timeout
15000 + Integer Defines the connection timeout for ZooKeeper in ms.
high-availability.zookeeper.client.max-retry-attempts
3 + Integer Defines the number of connection retries before the client gives up.
high-availability.zookeeper.client.retry-wait
5000 + Integer Defines the pause between consecutive retries in ms.
high-availability.zookeeper.client.session-timeout
60000 + Integer Defines the session timeout for the ZooKeeper session in ms.
high-availability.zookeeper.path.checkpoint-counter
"/checkpoint-counter" + String ZooKeeper root path (ZNode) for checkpoint counters.
high-availability.zookeeper.path.checkpoints
"/checkpoints" + String ZooKeeper root path (ZNode) for completed checkpoints.
high-availability.zookeeper.path.jobgraphs
"/jobgraphs" + String ZooKeeper root path (ZNode) for job graphs
high-availability.zookeeper.path.latch
"/leaderlatch" + String Defines the znode of the leader latch which is used to elect the leader.
high-availability.zookeeper.path.leader
"/leader" + String Defines the znode of the leader which contains the URL to the leader and the current leader session ID.
high-availability.zookeeper.path.mesos-workers
"/mesos-workers" + String The ZooKeeper root path for persisting the Mesos worker information.
high-availability.zookeeper.path.root
"/flink" + String The root path under which Flink stores its entries in ZooKeeper.
high-availability.zookeeper.path.running-registry
"/running_job_registry/" + String
high-availability.zookeeper.quorum
(none) + String The ZooKeeper quorum to use, when running Flink in a high-availability mode with ZooKeeper. diff --git a/docs/_includes/generated/history_server_configuration.html b/docs/_includes/generated/history_server_configuration.html index 7691c0abe62a4007e3dffea742acecbbb43217c6..b74abcd0c4cc75b9ca384040baae3c1e1231031c 100644 --- a/docs/_includes/generated/history_server_configuration.html +++ b/docs/_includes/generated/history_server_configuration.html @@ -3,43 +3,57 @@ Key Default - Description + Type + Description + +
historyserver.archive.clean-expired-jobs
+ false + Boolean + Whether HistoryServer should cleanup jobs that are no longer present `historyserver.archive.fs.dir`. +
historyserver.archive.fs.dir
(none) + String Comma separated list of directories to fetch archived jobs from. The history server will monitor these directories for archived jobs. You can configure the JobManager to archive jobs to a directory via `jobmanager.archive.fs.dir`.
historyserver.archive.fs.refresh-interval
10000 + Long Interval in milliseconds for refreshing the archived job directories.
historyserver.web.address
(none) + String Address of the HistoryServer's web interface.
historyserver.web.port
8082 + Integer Port of the HistoryServers's web interface.
historyserver.web.refresh-interval
10000 + Long The refresh interval for the HistoryServer web-frontend in milliseconds.
historyserver.web.ssl.enabled
false + Boolean Enable HTTPs access to the HistoryServer web frontend. This is applicable only when the global SSL flag security.ssl.enabled is set to true.
historyserver.web.tmpdir
(none) + String This configuration parameter allows defining the Flink web directory to be used by the history server web interface. The web interface will copy its static files into the directory. diff --git a/docs/_includes/generated/influxdb_reporter_configuration.html b/docs/_includes/generated/influxdb_reporter_configuration.html new file mode 100644 index 0000000000000000000000000000000000000000..cb678f71c5ebe70e9ee316fce53c25120212e6bf --- /dev/null +++ b/docs/_includes/generated/influxdb_reporter_configuration.html @@ -0,0 +1,66 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
connectTimeout
10000Integer(optional) the InfluxDB connect timeout for metrics
consistency
ONE

Enum

Possible values: [ALL, ANY, ONE, QUORUM]
(optional) the InfluxDB consistency level for metrics
db
(none)Stringthe InfluxDB database to store metrics
host
(none)Stringthe InfluxDB server host
password
(none)String(optional) InfluxDB username's password used for authentication
port
8086Integerthe InfluxDB server port
retentionPolicy
(none)String(optional) the InfluxDB retention policy for metrics
username
(none)String(optional) InfluxDB username used for authentication
writeTimeout
10000Integer(optional) the InfluxDB write timeout for metrics
diff --git a/docs/_includes/generated/job_manager_configuration.html b/docs/_includes/generated/job_manager_configuration.html index 0a60f282adb0968a1818544f71bccab9e7d7251c..ef9c5652a64ba075e552a0b04da52289d4f7b35d 100644 --- a/docs/_includes/generated/job_manager_configuration.html +++ b/docs/_includes/generated/job_manager_configuration.html @@ -3,63 +3,75 @@ Key Default - Description + Type + Description
jobmanager.archive.fs.dir
(none) + String Dictionary for JobManager to store the archives of completed jobs.
jobmanager.execution.attempts-history-size
16 + Integer The maximum number of prior execution attempts kept in history.
jobmanager.execution.failover-strategy
- "full" + region + String This option specifies how the job computation recovers from task failures. Accepted values are:
jobmanager.heap.size
"1024m" + String JVM heap size for the JobManager.
jobmanager.rpc.address
(none) + String The config parameter defining the network address to connect to for communication with the job manager. This value is only interpreted in setups where a single JobManager with static name or address exists (simple standalone setups, or container setups with dynamic service name resolution). It is not used in many high-availability setups, when a leader-election service (like ZooKeeper) is used to elect and discover the JobManager leader from potentially multiple standby JobManagers.
jobmanager.rpc.port
6123 + Integer The config parameter defining the network port to connect to for communication with the job manager. Like jobmanager.rpc.address, this value is only interpreted in setups where a single JobManager with static name/address and port exists (simple standalone setups, or container setups with dynamic service name resolution). This config option is not used in many high-availability setups, when a leader-election service (like ZooKeeper) is used to elect and discover the JobManager leader from potentially multiple standby JobManagers.
jobstore.cache-size
52428800 + Long The job store cache size in bytes which is used to keep completed jobs in memory.
jobstore.expiration-time
3600 + Long The time in seconds after which a completed job expires and is purged from the job store.
jobstore.max-capacity
2147483647 + Integer The max number of completed jobs that can be kept in the job store.
slot.idle.timeout
50000 + Long The timeout in milliseconds for a idle slot in Slot Pool.
slot.request.timeout
300000 + Long The timeout in milliseconds for requesting a slot from Slot Pool. diff --git a/docs/_includes/generated/kerberos_configuration.html b/docs/_includes/generated/kerberos_configuration.html index a1cd6c886ccce2219dc7cd49744de8dcfd673f18..cf279171a54c9317a7cf86c064d7010062c54b49 100644 --- a/docs/_includes/generated/kerberos_configuration.html +++ b/docs/_includes/generated/kerberos_configuration.html @@ -3,28 +3,33 @@ Key Default - Description + Type + Description
security.kerberos.login.contexts
(none) + String A comma-separated list of login contexts to provide the Kerberos credentials to (for example, `Client,KafkaClient` to use the credentials for ZooKeeper authentication and for Kafka authentication)
security.kerberos.login.keytab
(none) + String Absolute path to a Kerberos keytab file that contains the user credentials.
security.kerberos.login.principal
(none) + String Kerberos principal name associated with the keytab.
security.kerberos.login.use-ticket-cache
true + Boolean Indicates whether to read from your Kerberos ticket cache. diff --git a/docs/_includes/generated/kubernetes_config_configuration.html b/docs/_includes/generated/kubernetes_config_configuration.html new file mode 100644 index 0000000000000000000000000000000000000000..86f8ec6b30d8102c9403301f9e161a2c835bca2c --- /dev/null +++ b/docs/_includes/generated/kubernetes_config_configuration.html @@ -0,0 +1,108 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
kubernetes.cluster-id
(none)StringThe cluster id used for identifying the unique flink cluster. If it's not set, the client will generate a random UUID name.
kubernetes.config.file
(none)StringThe kubernetes config file will be used to create the client. The default is located at ~/.kube/config
kubernetes.container-start-command-template
"%java% %classpath% %jvmmem% %jvmopts% %logging% %class% %args% %redirects%"StringTemplate for the kubernetes jobmanager and taskmanager container start invocation.
kubernetes.container.image
"flink:latest"StringImage to use for Flink containers.
kubernetes.container.image.pull-policy
"IfNotPresent"StringKubernetes image pull policy. Valid values are Always, Never, and IfNotPresent. The default policy is IfNotPresent to avoid putting pressure to image repository.
kubernetes.container.image.pull-secrets
(none)List<String>A semicolon-separated list of the Kubernetes secrets used to access private image registries.
kubernetes.context
(none)StringThe desired context from your Kubernetes config file used to configure the Kubernetes client for interacting with the cluster. This could be helpful if one has multiple contexts configured and wants to administrate different Flink clusters on different Kubernetes clusters/contexts.
kubernetes.entry.path
"/opt/flink/bin/kubernetes-entry.sh"StringThe entrypoint script of kubernetes in the image. It will be used as command for jobmanager and taskmanager container.
kubernetes.flink.conf.dir
"/opt/flink/conf"StringThe flink conf directory that will be mounted in pod. The flink-conf.yaml, log4j.properties, logback.xml in this path will be overwritten from config map.
kubernetes.flink.log.dir
"/opt/flink/log"StringThe directory that logs of jobmanager and taskmanager be saved in the pod.
kubernetes.jobmanager.cpu
1.0DoubleThe number of cpu used by job manager
kubernetes.jobmanager.service-account
"default"StringService account that is used by jobmanager within kubernetes cluster. The job manager uses this service account when requesting taskmanager pods from the API server.
kubernetes.namespace
"default"StringThe namespace that will be used for running the jobmanager and taskmanager pods.
kubernetes.rest-service.exposed.type
"LoadBalancer"StringIt could be ClusterIP/NodePort/LoadBalancer(default). When set to ClusterIP, the rest servicewill not be created.
kubernetes.service.create-timeout
"1 min"StringTimeout used for creating the service. The timeout value requires a time-unit specifier (ms/s/min/h/d).
kubernetes.taskmanager.cpu
-1.0DoubleThe number of cpu used by task manager. By default, the cpu is set to the number of slots per TaskManager
diff --git a/docs/_includes/generated/mesos_configuration.html b/docs/_includes/generated/mesos_configuration.html index 9f951c52aab11161ffc52b978c5fe781c476cad0..db8c6f46670f7bbfa2cab7a0265816024ef5ba91 100644 --- a/docs/_includes/generated/mesos_configuration.html +++ b/docs/_includes/generated/mesos_configuration.html @@ -3,68 +3,81 @@ Key Default - Description + Type + Description
mesos.failover-timeout
604800 + Integer The failover timeout in seconds for the Mesos scheduler, after which running tasks are automatically shut down.
mesos.master
(none) + String The Mesos master URL. The value should be in one of the following forms:
mesos.resourcemanager.artifactserver.port
0 + Integer The config parameter defining the Mesos artifact server port to use. Setting the port to 0 will let the OS choose an available port.
mesos.resourcemanager.artifactserver.ssl.enabled
true + Boolean Enables SSL for the Flink artifact server. Note that security.ssl.enabled also needs to be set to true encryption to enable encryption.
mesos.resourcemanager.declined-offer-refuse-duration
5000 + Long Amount of time to ask the Mesos master to not resend a declined resource offer again. This ensures a declined resource offer isn't resent immediately after being declined
mesos.resourcemanager.framework.name
"Flink" + String Mesos framework name
mesos.resourcemanager.framework.principal
(none) + String Mesos framework principal
mesos.resourcemanager.framework.role
"*" + String Mesos framework role definition
mesos.resourcemanager.framework.secret
(none) + String Mesos framework secret
mesos.resourcemanager.framework.user
(none) + String Mesos framework user
mesos.resourcemanager.tasks.port-assignments
(none) + String Comma-separated list of configuration keys which represent a configurable port. All port keys will dynamically get a port assigned through Mesos.
mesos.resourcemanager.unused-offer-expiration
120000 + Long Amount of time to wait for unused expired offers before declining them. This ensures your scheduler will not hoard unuseful offers. diff --git a/docs/_includes/generated/mesos_task_manager_configuration.html b/docs/_includes/generated/mesos_task_manager_configuration.html index 338acc60778aa339f348f1bbc354d2ef0ca11d42..aa24d1102843e3d0ac2e80f3f3b5f013cd0a1f2d 100644 --- a/docs/_includes/generated/mesos_task_manager_configuration.html +++ b/docs/_includes/generated/mesos_task_manager_configuration.html @@ -3,83 +3,93 @@ Key Default - Description + Type + Description
mesos.constraints.hard.hostattribute
(none) + String Constraints for task placement on Mesos based on agent attributes. Takes a comma-separated list of key:value pairs corresponding to the attributes exposed by the target mesos agents. Example: az:eu-west-1a,series:t2
mesos.resourcemanager.tasks.bootstrap-cmd
(none) + String A command which is executed before the TaskManager is started.
mesos.resourcemanager.tasks.container.docker.force-pull-image
false + Boolean Instruct the docker containerizer to forcefully pull the image rather than reuse a cached version.
mesos.resourcemanager.tasks.container.docker.parameters
(none) + String Custom parameters to be passed into docker run command when using the docker containerizer. Comma separated list of "key=value" pairs. The "value" may contain '='.
mesos.resourcemanager.tasks.container.image.name
(none) + String Image name to use for the container.
mesos.resourcemanager.tasks.container.type
"mesos" + String Type of the containerization used: “mesos” or “docker”.
mesos.resourcemanager.tasks.container.volumes
(none) + String A comma separated list of [host_path:]container_path[:RO|RW]. This allows for mounting additional volumes into your container.
mesos.resourcemanager.tasks.cpus
0.0 + Double CPUs to assign to the Mesos workers.
mesos.resourcemanager.tasks.disk
0 + Integer Disk space to assign to the Mesos workers in MB.
mesos.resourcemanager.tasks.gpus
0 + Integer GPUs to assign to the Mesos workers.
mesos.resourcemanager.tasks.hostname
(none) + String Optional value to define the TaskManager’s hostname. The pattern _TASK_ is replaced by the actual id of the Mesos task. This can be used to configure the TaskManager to use Mesos DNS (e.g. _TASK_.flink-service.mesos) for name lookups. - -
mesos.resourcemanager.tasks.mem
- 1024 - Memory to assign to the Mesos workers in MB. -
mesos.resourcemanager.tasks.taskmanager-cmd
"$FLINK_HOME/bin/mesos-taskmanager.sh" + String
mesos.resourcemanager.tasks.uris
(none) + String A comma separated list of URIs of custom artifacts to be downloaded into the sandbox of Mesos workers.
taskmanager.numberOfTaskSlots
1 + Integer The number of parallel operator or user function instances that a single TaskManager can run. If this value is larger than 1, a single TaskManager takes multiple instances of a function or operator. That way, the TaskManager can utilize multiple CPU cores, but at the same time, the available memory is divided between the different operator or function instances. This value is typically proportional to the number of physical CPU cores that the TaskManager's machine has (e.g., equal to the number of cores, or half the number of cores). diff --git a/docs/_includes/generated/metric_configuration.html b/docs/_includes/generated/metric_configuration.html index 15111fc21143f8092cca83c12c259453fcb56e57..e9b6920909accf32df79e3bde6244090e736df78 100644 --- a/docs/_includes/generated/metric_configuration.html +++ b/docs/_includes/generated/metric_configuration.html @@ -3,103 +3,123 @@ Key Default - Description + Type + Description
metrics.fetcher.update-interval
10000 + Long Update interval for the metric fetcher used by the web UI in milliseconds. Decrease this value for faster updating metrics. Increase this value if the metric fetcher causes too much load. Setting this value to 0 disables the metric fetching completely.
metrics.internal.query-service.port
"0" + String The port range used for Flink's internal metric query service. Accepts a list of ports (“50100,50101”), ranges(“50100-50200”) or a combination of both. It is recommended to set a range of ports to avoid collisions when multiple Flink components are running on the same machine. Per default Flink will pick a random port.
metrics.internal.query-service.thread-priority
1 + Integer The thread priority used for Flink's internal metric query service. The thread is created by Akka's thread pool executor. The range of the priority is from 1 (MIN_PRIORITY) to 10 (MAX_PRIORITY). Warning, increasing this value may bring the main Flink components down.
metrics.latency.granularity
"operator" + String Defines the granularity of latency metrics. Accepted values are:
metrics.latency.history-size
128 + Integer Defines the number of measured latencies to maintain at each operator.
metrics.latency.interval
0 + Long Defines the interval at which latency tracking marks are emitted from the sources. Disables latency tracking if set to 0 or a negative value. Enabling this feature can significantly impact the performance of the cluster.
metrics.reporter.<name>.<parameter>
(none) + String Configures the parameter <parameter> for the reporter named <name>.
metrics.reporter.<name>.class
(none) + String The reporter class to use for the reporter named <name>.
metrics.reporter.<name>.interval
(none) + String The reporter interval to use for the reporter named <name>.
metrics.reporters
(none) + String An optional list of reporter names. If configured, only reporters whose name matches any of the names in the list will be started. Otherwise, all reporters that could be found in the configuration will be started.
metrics.scope.delimiter
"." + String Delimiter used to assemble the metric identifier.
metrics.scope.jm
"<host>.jobmanager" + String Defines the scope format string that is applied to all metrics scoped to a JobManager.
metrics.scope.jm.job
"<host>.jobmanager.<job_name>" + String Defines the scope format string that is applied to all metrics scoped to a job on a JobManager.
metrics.scope.operator
"<host>.taskmanager.<tm_id>.<job_name>.<operator_name>.<subtask_index>" + String Defines the scope format string that is applied to all metrics scoped to an operator.
metrics.scope.task
"<host>.taskmanager.<tm_id>.<job_name>.<task_name>.<subtask_index>" + String Defines the scope format string that is applied to all metrics scoped to a task.
metrics.scope.tm
"<host>.taskmanager.<tm_id>" + String Defines the scope format string that is applied to all metrics scoped to a TaskManager.
metrics.scope.tm.job
"<host>.taskmanager.<tm_id>.<job_name>" + String Defines the scope format string that is applied to all metrics scoped to a job on a TaskManager.
metrics.system-resource
false + Boolean Flag indicating whether Flink should report system resource metrics such as machine's CPU, memory or network usage.
metrics.system-resource-probing-interval
5000 + Long Interval between probing of system resource metrics specified in milliseconds. Has an effect only when 'metrics.system-resource' is enabled. diff --git a/docs/_includes/generated/netty_shuffle_environment_configuration.html b/docs/_includes/generated/netty_shuffle_environment_configuration.html index 5a4f3a84621e2a65c851d5cd7ad43a637d4371db..901a40b3bbb7ea6ef659cff60a2a140c58e6d9e9 100644 --- a/docs/_includes/generated/netty_shuffle_environment_configuration.html +++ b/docs/_includes/generated/netty_shuffle_environment_configuration.html @@ -3,43 +3,105 @@ Key Default - Description + Type + Description
taskmanager.data.port
0 + Integer The task manager’s port used for data exchange operations.
taskmanager.data.ssl.enabled
true + Boolean Enable SSL support for the taskmanager data transport. This is applicable only when the global flag for internal SSL (security.ssl.internal.enabled) is set to true + +
taskmanager.network.blocking-shuffle.compression.enabled
+ false + Boolean + Boolean flag indicating whether the shuffle data will be compressed for blocking shuffle mode. Note that data is compressed per buffer and compression can incur extra CPU overhead, so it is more effective for IO bounded scenario when data compression ratio is high. Currently, shuffle data compression is an experimental feature and the config option can be changed in the future. + + +
taskmanager.network.blocking-shuffle.type
+ "file" + String + The blocking shuffle type, either "mmap" or "file". The "auto" means selecting the property type automatically based on system memory architecture (64 bit for mmap and 32 bit for file). Note that the memory usage of mmap is not accounted by configured memory limits, but some resource frameworks like yarn would track this memory usage and kill the container once memory exceeding some threshold. Also note that this option is experimental and might be changed future. +
taskmanager.network.detailed-metrics
false + Boolean Boolean flag to enable/disable more detailed metrics about inbound/outbound network queue lengths.
taskmanager.network.memory.buffers-per-channel
2 + Integer Maximum number of network buffers to use for each outgoing/incoming channel (subpartition/input channel).In credit-based flow control mode, this indicates how many credits are exclusive in each input channel. It should be configured at least 2 for good performance. 1 buffer is for receiving in-flight data in the subpartition and 1 buffer is for parallel serialization.
taskmanager.network.memory.floating-buffers-per-gate
8 + Integer Number of extra network buffers to use for each outgoing/incoming gate (result partition/input gate). In credit-based flow control mode, this indicates how many floating credits are shared among all the input channels. The floating buffers are distributed based on backlog (real-time output buffers in the subpartition) feedback, and can help relieve back-pressure caused by unbalanced data distribution among the subpartitions. This value should be increased in case of higher round trip times between nodes and/or larger number of machines in the cluster. + +
taskmanager.network.netty.client.connectTimeoutSec
+ 120 + Integer + The Netty client connection timeout. + + +
taskmanager.network.netty.client.numThreads
+ -1 + Integer + The number of Netty client threads. + + +
taskmanager.network.netty.num-arenas
+ -1 + Integer + The number of Netty arenas. + + +
taskmanager.network.netty.sendReceiveBufferSize
+ 0 + Integer + The Netty send and receive buffer size. This defaults to the system buffer size (cat /proc/sys/net/ipv4/tcp_[rw]mem) and is 4 MiB in modern Linux. + + +
taskmanager.network.netty.server.backlog
+ 0 + Integer + The netty server connection backlog. + + +
taskmanager.network.netty.server.numThreads
+ -1 + Integer + The number of Netty server threads. + + +
taskmanager.network.netty.transport
+ "auto" + String + The Netty transport type, either "nio" or "epoll". The "auto" means selecting the property mode automatically based on the platform. Note that the "epoll" mode can get better performance, less GC and have more advanced features which are only available on modern Linux. +
taskmanager.network.request-backoff.initial
100 + Integer Minimum backoff in milliseconds for partition requests of input channels.
taskmanager.network.request-backoff.max
10000 + Integer Maximum backoff in milliseconds for partition requests of input channels. diff --git a/docs/_includes/generated/network_netty_configuration.html b/docs/_includes/generated/network_netty_configuration.html index 47c48c0aa38d298a5976ae5a99d86a24510efbf6..3950b5604c7ffe5a4b0c7cd1810dbf0c7007aace 100644 --- a/docs/_includes/generated/network_netty_configuration.html +++ b/docs/_includes/generated/network_netty_configuration.html @@ -3,44 +3,52 @@ Key Default - Description + Type + Description
taskmanager.network.netty.client.connectTimeoutSec
120 + Integer The Netty client connection timeout.
taskmanager.network.netty.client.numThreads
-1 + Integer The number of Netty client threads.
taskmanager.network.netty.num-arenas
-1 + Integer The number of Netty arenas.
taskmanager.network.netty.sendReceiveBufferSize
0 + Integer The Netty send and receive buffer size. This defaults to the system buffer size (cat /proc/sys/net/ipv4/tcp_[rw]mem) and is 4 MiB in modern Linux.
taskmanager.network.netty.server.backlog
0 + Integer The netty server connection backlog.
taskmanager.network.netty.server.numThreads
-1 + Integer The number of Netty server threads.
taskmanager.network.netty.transport
- "nio" - The Netty transport type, either "nio" or "epoll" + "auto" + String + The Netty transport type, either "nio" or "epoll". The "auto" means selecting the property mode automatically based on the platform. Note that the "epoll" mode can get better performance, less GC and have more advanced features which are only available on modern Linux. diff --git a/docs/_includes/generated/optimizer_config_configuration.html b/docs/_includes/generated/optimizer_config_configuration.html index 05c4b64ea4373664035dfc24e4eb04b695e58547..7eb0ccdb86278837195b37ac9590286b6990bf30 100644 --- a/docs/_includes/generated/optimizer_config_configuration.html +++ b/docs/_includes/generated/optimizer_config_configuration.html @@ -3,13 +3,15 @@ Key Default - Description + Type + Description
table.optimizer.agg-phase-strategy

Batch Streaming "AUTO" + String Strategy for aggregate phase. Only AUTO, TWO_PHASE or ONE_PHASE can be set. AUTO: No special enforcer for aggregate stage. Whether to choose two stage aggregate or one stage aggregate depends on cost. TWO_PHASE: Enforce to use two stage aggregate which has localAggregate and globalAggregate. Note that if aggregate call does not support optimize into two phase, we will still use one stage aggregate. @@ -18,36 +20,43 @@ ONE_PHASE: Enforce to use one stage aggregate which only has CompleteGlobalAggre
table.optimizer.distinct-agg.split.bucket-num

Streaming 1024 + Integer Configure the number of buckets when splitting distinct aggregation. The number is used in the first level aggregation to calculate a bucket key 'hash_code(distinct_key) % BUCKET_NUM' which is used as an additional group key after splitting.
table.optimizer.distinct-agg.split.enabled

Streaming false + Boolean Tells the optimizer whether to split distinct aggregation (e.g. COUNT(DISTINCT col), SUM(DISTINCT col)) into two level. The first aggregation is shuffled by an additional key which is calculated using the hashcode of distinct_key and number of buckets. This optimization is very useful when there is data skew in distinct aggregation and gives the ability to scale-up the job. Default is false.
table.optimizer.join-reorder-enabled

Batch Streaming false + Boolean Enables join reorder in optimizer. Default is disabled.
table.optimizer.join.broadcast-threshold

Batch 1048576 + Long Configures the maximum size in bytes for a table that will be broadcast to all worker nodes when performing a join. By setting this value to -1 to disable broadcasting.
table.optimizer.reuse-source-enabled

Batch Streaming true + Boolean When it is true, the optimizer will try to find out duplicated table sources and reuse them. This works only when table.optimizer.reuse-sub-plan-enabled is true.
table.optimizer.reuse-sub-plan-enabled

Batch Streaming true + Boolean When it is true, the optimizer will try to find out duplicated sub-plans and reuse them.
table.optimizer.source.predicate-pushdown-enabled

Batch Streaming true + Boolean When it is true, the optimizer will push down predicates into the FilterableTableSource. Default value is true. diff --git a/docs/_includes/generated/optimizer_configuration.html b/docs/_includes/generated/optimizer_configuration.html index 23160b80161e88af767eeb5e72f5230906a623be..3b49f01af77f3541aec2dc154d88d29da0a69d5e 100644 --- a/docs/_includes/generated/optimizer_configuration.html +++ b/docs/_includes/generated/optimizer_configuration.html @@ -3,23 +3,27 @@ Key Default - Description + Type + Description
compiler.delimited-informat.max-line-samples
10 + Integer he maximum number of line samples taken by the compiler for delimited inputs. The samples are used to estimate the number of records. This value can be overridden for a specific input with the input format’s parameters.
compiler.delimited-informat.max-sample-len
2097152 + Integer The maximal length of a line sample that the compiler takes for delimited inputs. If the length of a single sample exceeds this value (possible because of misconfiguration of the parser), the sampling aborts. This value can be overridden for a specific input with the input format’s parameters.
compiler.delimited-informat.min-line-samples
2 + Integer The minimum number of line samples taken by the compiler for delimited inputs. The samples are used to estimate the number of records. This value can be overridden for a specific input with the input format’s parameters diff --git a/docs/_includes/generated/pipeline_configuration.html b/docs/_includes/generated/pipeline_configuration.html new file mode 100644 index 0000000000000000000000000000000000000000..f3fc216ddeaefa467e5e1968b8930096d3dbd2f9 --- /dev/null +++ b/docs/_includes/generated/pipeline_configuration.html @@ -0,0 +1,114 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
pipeline.auto-generate-uids
trueBooleanWhen auto-generated UIDs are disabled, users are forced to manually specify UIDs on DataStream applications.

It is highly recommended that users specify UIDs before deploying to production since they are used to match state in savepoints to operators in a job. Because auto-generated ID's are likely to change when modifying a job, specifying custom IDs allow an application to evolve over time without discarding state.
pipeline.auto-type-registration
trueBooleanControls whether Flink is automatically registering all types in the user programs with Kryo.
pipeline.auto-watermark-interval
0 msDurationThe interval of the automatic watermark emission. Watermarks are used throughout the streaming system to keep track of the progress of time. They are used, for example, for time based windowing.
pipeline.cached-files
(none)List<String>Files to be registered at the distributed cache under the given name. The files will be accessible from any user-defined function in the (distributed) runtime under a local path. Files may be local files (which will be distributed via BlobServer), or files in a distributed file system. The runtime will copy the files temporarily to a local cache, if needed.

Example:
`name:file1,path:`file:///tmp/file1`;name:file2,path:`hdfs:///tmp/file2``
pipeline.classpaths
(none)List<String>A semicolon-separated list of the classpaths to package with the job jars to be sent to the cluster. These have to be valid URLs.
pipeline.closure-cleaner-level
RECURSIVE

Enum

Possible values: [NONE, TOP_LEVEL, RECURSIVE]
Configures the mode in which the closure cleaner works
  • `NONE` - disables the closure cleaner completely
  • `TOP_LEVEL` - cleans only the top-level class without recursing into fields
  • `RECURSIVE` - cleans all the fields recursively
pipeline.default-kryo-serializers
(none)List<String>Semicolon separated list of pairs of class names and Kryo serializers class names to be used as Kryo default serializers

Example:
`class:org.example.ExampleClass,serializer:org.example.ExampleSerializer1; class:org.example.ExampleClass2,serializer:org.example.ExampleSerializer2`
pipeline.force-avro
falseBooleanForces Flink to use the Apache Avro serializer for POJOs.

Important: Make sure to include the `flink-avro` module.
pipeline.force-kryo
falseBooleanIf enabled, forces TypeExtractor to use Kryo serializer for POJOS even though we could analyze as POJO. In some cases this might be preferable. For example, when using interfaces with subclasses that cannot be analyzed as POJO.
pipeline.generic-types
trueBooleanIf the use of generic types is disabled, Flink will throw an `UnsupportedOperationException` whenever it encounters a data type that would go through Kryo for serialization.

Disabling generic types can be helpful to eagerly find and eliminate the use of types that would go through Kryo serialization during runtime. Rather than checking types individually, using this option will throw exceptions eagerly in the places where generic types are used.

We recommend to use this option only during development and pre-production phases, not during actual production use. The application program and/or the input data may be such that new, previously unseen, types occur at some point. In that case, setting this option would cause the program to fail.
pipeline.global-job-parameters
(none)MapRegister a custom, serializable user configuration object. The configuration can be accessed in operators
pipeline.jars
(none)List<String>A semicolon-separated list of the jars to package with the job jars to be sent to the cluster. These have to be valid paths.
pipeline.max-parallelism
-1IntegerThe program-wide maximum parallelism used for operators which haven't specified a maximum parallelism. The maximum parallelism specifies the upper limit for dynamic scaling and the number of key groups used for partitioned state.
pipeline.object-reuse
falseBooleanWhen enabled objects that Flink internally uses for deserialization and passing data to user-code functions will be reused. Keep in mind that this can lead to bugs when the user-code function of an operation is not aware of this behaviour.
pipeline.operator-chaining
trueBooleanOperator chaining allows non-shuffle operations to be co-located in the same thread fully avoiding serialization and de-serialization.
pipeline.registered-kryo-types
(none)List<String>Semicolon separated list of types to be registered with the serialization stack. If the type is eventually serialized as a POJO, then the type is registered with the POJO serializer. If the type ends up being serialized with Kryo, then it will be registered at Kryo to make sure that only tags are written.
pipeline.registered-pojo-types
(none)List<String>Semicolon separated list of types to be registered with the serialization stack. If the type is eventually serialized as a POJO, then the type is registered with the POJO serializer. If the type ends up being serialized with Kryo, then it will be registered at Kryo to make sure that only tags are written.
diff --git a/docs/_includes/generated/prometheus_push_gateway_reporter_configuration.html b/docs/_includes/generated/prometheus_push_gateway_reporter_configuration.html index 09a258359c6ca940f32260b6f466cd4fb701b726..8128bf8ea480191c71e7f0548ccbbb8959bff669 100644 --- a/docs/_includes/generated/prometheus_push_gateway_reporter_configuration.html +++ b/docs/_includes/generated/prometheus_push_gateway_reporter_configuration.html @@ -3,43 +3,51 @@ Key Default - Description + Type + Description
deleteOnShutdown
true + Boolean Specifies whether to delete metrics from the PushGateway on shutdown.
filterLabelValueCharacters
true + Boolean Specifies whether to filter label value characters. If enabled, all characters not matching [a-zA-Z0-9:_] will be removed, otherwise no characters will be removed. Before disabling this option please ensure that your label values meet the Prometheus requirements.
groupingKey
(none) + String Specifies the grouping key which is the group and global labels of all metrics. The label name and value are separated by '=', and labels are separated by ';', e.g., `k1=v1;k2=v2`. Please ensure that your grouping key meets the Prometheus requirements.
host
(none) + String The PushGateway server host.
jobName
(none) + String The job name under which metrics will be pushed
port
-1 + Integer The PushGateway server port.
randomJobNameSuffix
true + Boolean Specifies whether a random suffix should be appended to the job name. diff --git a/docs/_includes/generated/python_configuration.html b/docs/_includes/generated/python_configuration.html new file mode 100644 index 0000000000000000000000000000000000000000..212cec28805c70d330f757b74008426fea7ca7f0 --- /dev/null +++ b/docs/_includes/generated/python_configuration.html @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
python.fn-execution.arrow.batch.size
1000IntegerThe maximum number of elements to include in an arrow batch for Python user-defined function execution. The arrow batch size should not exceed the bundle size. Otherwise, the bundle size will be used as the arrow batch size.
python.fn-execution.buffer.memory.size
"15mb"StringThe amount of memory to be allocated by the input buffer and output buffer of a Python worker. The memory will be accounted as managed memory if the actual memory allocated to an operator is no less than the total memory of a Python worker. Otherwise, this configuration takes no effect.
python.fn-execution.bundle.size
1000IntegerThe maximum number of elements to include in a bundle for Python user-defined function execution. The elements are processed asynchronously. One bundle of elements are processed before processing the next bundle of elements. A larger value can improve the throughput, but at the cost of more memory usage and higher latency.
python.fn-execution.bundle.time
1000LongSets the waiting timeout(in milliseconds) before processing a bundle for Python user-defined function execution. The timeout defines how long the elements of a bundle will be buffered before being processed. Lower timeouts lead to lower tail latencies, but may affect throughput.
python.fn-execution.framework.memory.size
"64mb"StringThe amount of memory to be allocated by the Python framework. The sum of the value of this configuration and "python.fn-execution.buffer.memory.size" represents the total memory of a Python worker. The memory will be accounted as managed memory if the actual memory allocated to an operator is no less than the total memory of a Python worker. Otherwise, this configuration takes no effect.
diff --git a/docs/_includes/generated/queryable_state_configuration.html b/docs/_includes/generated/queryable_state_configuration.html index 91eaf912a0cee1a180698c512e12f44f05613b7b..b0e7dc53155510c8a8d39467455670c6abc73c92 100644 --- a/docs/_includes/generated/queryable_state_configuration.html +++ b/docs/_includes/generated/queryable_state_configuration.html @@ -3,48 +3,57 @@ Key Default - Description + Type + Description
queryable-state.client.network-threads
0 + Integer Number of network (Netty's event loop) Threads for queryable state client.
queryable-state.enable
false + Boolean Option whether the queryable state proxy and server should be enabled where possible and configurable.
queryable-state.proxy.network-threads
0 + Integer Number of network (Netty's event loop) Threads for queryable state proxy.
queryable-state.proxy.ports
"9069" + String The port range of the queryable state proxy. The specified range can be a single port: "9123", a range of ports: "50100-50200", or a list of ranges and ports: "50100-50200,50300-50400,51234".
queryable-state.proxy.query-threads
0 + Integer Number of query Threads for queryable state proxy. Uses the number of slots if set to 0.
queryable-state.server.network-threads
0 + Integer Number of network (Netty's event loop) Threads for queryable state server.
queryable-state.server.ports
"9067" + String The port range of the queryable state server. The specified range can be a single port: "9123", a range of ports: "50100-50200", or a list of ranges and ports: "50100-50200,50300-50400,51234".
queryable-state.server.query-threads
0 + Integer Number of query Threads for queryable state server. Uses the number of slots if set to 0. diff --git a/docs/_includes/generated/resource_manager_configuration.html b/docs/_includes/generated/resource_manager_configuration.html index 0bd1db6fe93ed254ea20ebb05cfd02e2f0a6db18..c1955f81cf66dba3d96a5b09d98fad37a368c721 100644 --- a/docs/_includes/generated/resource_manager_configuration.html +++ b/docs/_includes/generated/resource_manager_configuration.html @@ -3,43 +3,45 @@ Key Default - Description + Type + Description
containerized.heap-cutoff-min
600 - Minimum amount of heap memory to remove in containers, as a safety margin. + Integer + Minimum amount of heap memory to remove in Job Master containers, as a safety margin.
containerized.heap-cutoff-ratio
0.25 - Percentage of heap space to remove from containers (YARN / Mesos), to compensate for other JVM memory usage. - - -
local.number-resourcemanager
- 1 - The number of resource managers start. + Float + Percentage of heap space to remove from Job Master containers (YARN / Mesos / Kubernetes), to compensate for other JVM memory usage.
resourcemanager.job.timeout
"5 minutes" + String Timeout for jobs which don't have a job manager as leader assigned.
resourcemanager.rpc.port
0 + Integer Defines the network port to connect to for communication with the resource manager. By default, the port of the JobManager, because the same ActorSystem is used. Its not possible to use this configuration key to define port ranges.
resourcemanager.standalone.start-up-time
-1 + Long Time in milliseconds of the start-up period of a standalone cluster. During this time, resource manager of the standalone cluster expects new task executors to be registered, and will not fail slot requests that can not be satisfied by any current registered slots. After this time, it will fail pending and new coming requests immediately that can not be satisfied by registered slots. If not set, 'slotmanager.request-timeout' will be used by default.
resourcemanager.taskmanager-timeout
30000 + Long The timeout for an idle task manager to be released. diff --git a/docs/_includes/generated/rest_configuration.html b/docs/_includes/generated/rest_configuration.html index 3dcb2f6b172e4c4a540a3994ecace00c95324f97..c2191dbccf9b2bb7d9b1da7116cc3c139e937dc7 100644 --- a/docs/_includes/generated/rest_configuration.html +++ b/docs/_includes/generated/rest_configuration.html @@ -3,73 +3,87 @@ Key Default - Description + Type + Description
rest.address
(none) + String The address that should be used by clients to connect to the server.
rest.await-leader-timeout
30000 + Long The time in ms that the client waits for the leader address, e.g., Dispatcher or WebMonitorEndpoint
rest.bind-address
(none) + String The address that the server binds itself.
rest.bind-port
"8081" + String The port that the server binds itself. Accepts a list of ports (“50100,50101”), ranges (“50100-50200”) or a combination of both. It is recommended to set a range of ports to avoid collisions when multiple Rest servers are running on the same machine.
rest.client.max-content-length
104857600 + Integer The maximum content length in bytes that the client will handle.
rest.connection-timeout
15000 + Long The maximum time in ms for the client to establish a TCP connection.
rest.idleness-timeout
300000 + Long The maximum time in ms for a connection to stay idle before failing.
rest.port
8081 + Integer The port that the client connects to. If rest.bind-port has not been specified, then the REST server will bind to this port.
rest.retry.delay
3000 + Long The time in ms that the client waits between retries (See also `rest.retry.max-attempts`).
rest.retry.max-attempts
20 + Integer The number of retries the client will attempt if a retryable operations fails.
rest.server.max-content-length
104857600 + Integer The maximum content length in bytes that the server will handle.
rest.server.numThreads
4 + Integer The number of threads for the asynchronous processing of requests.
rest.server.thread-priority
5 + Integer Thread priority of the REST server's executor for processing asynchronous requests. Lowering the thread priority will give Flink's main components more CPU time whereas increasing will allocate more time for the REST server's processing. diff --git a/docs/_includes/generated/rest_v1_dispatcher.html b/docs/_includes/generated/rest_v1_dispatcher.html index a3b91ee2e83d9ba6cf53096344e3fbfbcff41486..113c37e480aae99abbf8f3f2edaa16f7366dde2f 100644 --- a/docs/_includes/generated/rest_v1_dispatcher.html +++ b/docs/_includes/generated/rest_v1_dispatcher.html @@ -67,21 +67,6 @@ "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:DashboardConfiguration", "properties" : { - "refresh-interval" : { - "type" : "integer" - }, - "timezone-name" : { - "type" : "string" - }, - "timezone-offset" : { - "type" : "integer" - }, - "flink-version" : { - "type" : "string" - }, - "flink-revision" : { - "type" : "string" - }, "features" : { "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:DashboardConfiguration:Features", @@ -90,6 +75,21 @@ "type" : "boolean" } } + }, + "flink-revision" : { + "type" : "string" + }, + "flink-version" : { + "type" : "string" + }, + "refresh-interval" : { + "type" : "integer" + }, + "timezone-name" : { + "type" : "string" + }, + "timezone-offset" : { + "type" : "integer" } } } @@ -141,29 +141,29 @@ "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:webmonitor:handlers:JarListInfo:JarFileInfo", "properties" : { - "id" : { - "type" : "string" - }, - "name" : { - "type" : "string" - }, - "uploaded" : { - "type" : "integer" - }, "entry" : { "type" : "array", "items" : { "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:webmonitor:handlers:JarListInfo:JarEntryInfo", "properties" : { - "name" : { + "description" : { "type" : "string" }, - "description" : { + "name" : { "type" : "string" } } } + }, + "id" : { + "type" : "string" + }, + "name" : { + "type" : "string" + }, + "uploaded" : { + "type" : "integer" } } } @@ -319,6 +319,12 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "entryClass" : { "type" : "string" }, + "jobId" : { + "type" : "any" + }, + "parallelism" : { + "type" : "integer" + }, "programArgs" : { "type" : "string" }, @@ -327,12 +333,6 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "items" : { "type" : "string" } - }, - "parallelism" : { - "type" : "integer" - }, - "jobId" : { - "type" : "any" } } } @@ -409,6 +409,12 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "entryClass" : { "type" : "string" }, + "jobId" : { + "type" : "any" + }, + "parallelism" : { + "type" : "integer" + }, "programArgs" : { "type" : "string" }, @@ -417,12 +423,6 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "items" : { "type" : "string" } - }, - "parallelism" : { - "type" : "integer" - }, - "jobId" : { - "type" : "any" } } } @@ -498,9 +498,18 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:webmonitor:handlers:JarRunRequestBody", "properties" : { + "allowNonRestoredState" : { + "type" : "boolean" + }, "entryClass" : { "type" : "string" }, + "jobId" : { + "type" : "any" + }, + "parallelism" : { + "type" : "integer" + }, "programArgs" : { "type" : "string" }, @@ -510,15 +519,6 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "string" } }, - "parallelism" : { - "type" : "integer" - }, - "jobId" : { - "type" : "any" - }, - "allowNonRestoredState" : { - "type" : "boolean" - }, "savepointPath" : { "type" : "string" } @@ -726,15 +726,6 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:job:JobSubmitRequestBody", "properties" : { - "jobGraphFileName" : { - "type" : "string" - }, - "jobJarFileNames" : { - "type" : "array", - "items" : { - "type" : "string" - } - }, "jobArtifactFileNames" : { "type" : "array", "items" : { @@ -749,6 +740,15 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa } } } + }, + "jobGraphFileName" : { + "type" : "string" + }, + "jobJarFileNames" : { + "type" : "array", + "items" : { + "type" : "string" + } } } } @@ -917,30 +917,39 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:job:JobDetailsInfo", "properties" : { + "duration" : { + "type" : "integer" + }, + "end-time" : { + "type" : "integer" + }, + "isStoppable" : { + "type" : "boolean" + }, "jid" : { "type" : "any" }, "name" : { "type" : "string" }, - "isStoppable" : { - "type" : "boolean" + "now" : { + "type" : "integer" }, - "state" : { - "type" : "string", - "enum" : [ "CREATED", "RUNNING", "FAILING", "FAILED", "CANCELLING", "CANCELED", "FINISHED", "RESTARTING", "SUSPENDED", "RECONCILING" ] + "plan" : { + "type" : "string" }, "start-time" : { "type" : "integer" }, - "end-time" : { - "type" : "integer" - }, - "duration" : { - "type" : "integer" + "state" : { + "type" : "string", + "enum" : [ "CREATED", "RUNNING", "FAILING", "FAILED", "CANCELLING", "CANCELED", "FINISHED", "RESTARTING", "SUSPENDED", "RECONCILING" ] }, - "now" : { - "type" : "integer" + "status-counts" : { + "type" : "object", + "additionalProperties" : { + "type" : "integer" + } }, "timestamps" : { "type" : "object", @@ -954,33 +963,14 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:job:JobDetailsInfo:JobVertexDetailsInfo", "properties" : { - "id" : { - "type" : "any" - }, - "name" : { - "type" : "string" - }, - "parallelism" : { - "type" : "integer" - }, - "status" : { - "type" : "string", - "enum" : [ "CREATED", "SCHEDULED", "DEPLOYING", "RUNNING", "FINISHED", "CANCELING", "CANCELED", "FAILED", "RECONCILING" ] - }, - "start-time" : { + "duration" : { "type" : "integer" }, "end-time" : { "type" : "integer" }, - "duration" : { - "type" : "integer" - }, - "tasks" : { - "type" : "object", - "additionalProperties" : { - "type" : "integer" - } + "id" : { + "type" : "any" }, "metrics" : { "type" : "object", @@ -992,16 +982,16 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "read-bytes-complete" : { "type" : "boolean" }, - "write-bytes" : { + "read-records" : { "type" : "integer" }, - "write-bytes-complete" : { + "read-records-complete" : { "type" : "boolean" }, - "read-records" : { + "write-bytes" : { "type" : "integer" }, - "read-records-complete" : { + "write-bytes-complete" : { "type" : "boolean" }, "write-records" : { @@ -1011,18 +1001,28 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "boolean" } } + }, + "name" : { + "type" : "string" + }, + "parallelism" : { + "type" : "integer" + }, + "start-time" : { + "type" : "integer" + }, + "status" : { + "type" : "string", + "enum" : [ "CREATED", "SCHEDULED", "DEPLOYING", "RUNNING", "FINISHED", "CANCELING", "CANCELED", "FAILED", "RECONCILING" ] + }, + "tasks" : { + "type" : "object", + "additionalProperties" : { + "type" : "integer" + } } } } - }, - "status-counts" : { - "type" : "object", - "additionalProperties" : { - "type" : "integer" - } - }, - "plan" : { - "type" : "string" } } } @@ -1147,6 +1147,12 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "any" } }, + "serialized-user-task-accumulators" : { + "type" : "object", + "additionalProperties" : { + "type" : "any" + } + }, "user-task-accumulators" : { "type" : "array", "items" : { @@ -1164,12 +1170,6 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa } } } - }, - "serialized-user-task-accumulators" : { - "type" : "object", - "additionalProperties" : { - "type" : "any" - } } } } @@ -1226,53 +1226,71 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:CheckpointingStatistics:Counts", "properties" : { - "restored" : { + "completed" : { "type" : "integer" }, - "total" : { + "failed" : { "type" : "integer" }, "in_progress" : { "type" : "integer" }, - "completed" : { + "restored" : { "type" : "integer" }, - "failed" : { + "total" : { "type" : "integer" } } }, - "summary" : { - "type" : "object", - "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:CheckpointingStatistics:Summary", - "properties" : { - "state_size" : { - "type" : "object", - "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:MinMaxAvgStatistics", - "properties" : { - "min" : { - "type" : "integer" - }, - "max" : { - "type" : "integer" - }, - "avg" : { - "type" : "integer" - } - } - }, - "end_to_end_duration" : { - "type" : "object", - "$ref" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:MinMaxAvgStatistics" - }, - "alignment_buffered" : { - "type" : "object", - "$ref" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:MinMaxAvgStatistics" - } - } - }, - "latest" : { + "history" : { + "type" : "array", + "items" : { + "type" : "object", + "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:CheckpointStatistics", + "properties" : { + "alignment_buffered" : { + "type" : "integer" + }, + "end_to_end_duration" : { + "type" : "integer" + }, + "id" : { + "type" : "integer" + }, + "is_savepoint" : { + "type" : "boolean" + }, + "latest_ack_timestamp" : { + "type" : "integer" + }, + "num_acknowledged_subtasks" : { + "type" : "integer" + }, + "num_subtasks" : { + "type" : "integer" + }, + "state_size" : { + "type" : "integer" + }, + "status" : { + "type" : "string", + "enum" : [ "IN_PROGRESS", "COMPLETED", "FAILED" ] + }, + "tasks" : { + "type" : "object", + "additionalProperties" : { + "type" : "object", + "$ref" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:TaskCheckpointStatistics" + } + }, + "trigger_timestamp" : { + "type" : "integer" + } + } + } + }, + "latest" : { "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:CheckpointingStatistics:LatestCheckpoints", "properties" : { @@ -1280,118 +1298,117 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:CheckpointStatistics:CompletedCheckpointStatistics", "properties" : { - "id" : { + "alignment_buffered" : { "type" : "integer" }, - "status" : { - "type" : "string", - "enum" : [ "IN_PROGRESS", "COMPLETED", "FAILED" ] - }, - "is_savepoint" : { + "discarded" : { "type" : "boolean" }, - "trigger_timestamp" : { + "end_to_end_duration" : { "type" : "integer" }, - "latest_ack_timestamp" : { - "type" : "integer" + "external_path" : { + "type" : "string" }, - "state_size" : { + "id" : { "type" : "integer" }, - "end_to_end_duration" : { + "is_savepoint" : { + "type" : "boolean" + }, + "latest_ack_timestamp" : { "type" : "integer" }, - "alignment_buffered" : { + "num_acknowledged_subtasks" : { "type" : "integer" }, "num_subtasks" : { "type" : "integer" }, - "num_acknowledged_subtasks" : { + "state_size" : { "type" : "integer" }, + "status" : { + "type" : "string", + "enum" : [ "IN_PROGRESS", "COMPLETED", "FAILED" ] + }, "tasks" : { "type" : "object", "additionalProperties" : { "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:TaskCheckpointStatistics", "properties" : { - "id" : { + "alignment_buffered" : { "type" : "integer" }, - "status" : { - "type" : "string", - "enum" : [ "IN_PROGRESS", "COMPLETED", "FAILED" ] - }, - "latest_ack_timestamp" : { + "end_to_end_duration" : { "type" : "integer" }, - "state_size" : { + "id" : { "type" : "integer" }, - "end_to_end_duration" : { + "latest_ack_timestamp" : { "type" : "integer" }, - "alignment_buffered" : { + "num_acknowledged_subtasks" : { "type" : "integer" }, "num_subtasks" : { "type" : "integer" }, - "num_acknowledged_subtasks" : { + "state_size" : { "type" : "integer" + }, + "status" : { + "type" : "string", + "enum" : [ "IN_PROGRESS", "COMPLETED", "FAILED" ] } } } }, - "external_path" : { - "type" : "string" - }, - "discarded" : { - "type" : "boolean" + "trigger_timestamp" : { + "type" : "integer" } } }, - "savepoint" : { - "type" : "object", - "$ref" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:CheckpointStatistics:CompletedCheckpointStatistics" - }, "failed" : { "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:CheckpointStatistics:FailedCheckpointStatistics", "properties" : { - "id" : { + "alignment_buffered" : { "type" : "integer" }, - "status" : { - "type" : "string", - "enum" : [ "IN_PROGRESS", "COMPLETED", "FAILED" ] + "end_to_end_duration" : { + "type" : "integer" }, - "is_savepoint" : { - "type" : "boolean" + "failure_message" : { + "type" : "string" }, - "trigger_timestamp" : { + "failure_timestamp" : { "type" : "integer" }, - "latest_ack_timestamp" : { + "id" : { "type" : "integer" }, - "state_size" : { - "type" : "integer" + "is_savepoint" : { + "type" : "boolean" }, - "end_to_end_duration" : { + "latest_ack_timestamp" : { "type" : "integer" }, - "alignment_buffered" : { + "num_acknowledged_subtasks" : { "type" : "integer" }, "num_subtasks" : { "type" : "integer" }, - "num_acknowledged_subtasks" : { + "state_size" : { "type" : "integer" }, + "status" : { + "type" : "string", + "enum" : [ "IN_PROGRESS", "COMPLETED", "FAILED" ] + }, "tasks" : { "type" : "object", "additionalProperties" : { @@ -1399,11 +1416,8 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "$ref" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:TaskCheckpointStatistics" } }, - "failure_timestamp" : { + "trigger_timestamp" : { "type" : "integer" - }, - "failure_message" : { - "type" : "string" } } }, @@ -1411,64 +1425,50 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:CheckpointingStatistics:RestoredCheckpointStatistics", "properties" : { - "id" : { - "type" : "integer" + "external_path" : { + "type" : "string" }, - "restore_timestamp" : { + "id" : { "type" : "integer" }, "is_savepoint" : { "type" : "boolean" }, - "external_path" : { - "type" : "string" + "restore_timestamp" : { + "type" : "integer" } } + }, + "savepoint" : { + "type" : "object", + "$ref" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:CheckpointStatistics:CompletedCheckpointStatistics" } } }, - "history" : { - "type" : "array", - "items" : { - "type" : "object", - "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:CheckpointStatistics", - "properties" : { - "id" : { - "type" : "integer" - }, - "status" : { - "type" : "string", - "enum" : [ "IN_PROGRESS", "COMPLETED", "FAILED" ] - }, - "is_savepoint" : { - "type" : "boolean" - }, - "trigger_timestamp" : { - "type" : "integer" - }, - "latest_ack_timestamp" : { - "type" : "integer" - }, - "state_size" : { - "type" : "integer" - }, - "end_to_end_duration" : { - "type" : "integer" - }, - "alignment_buffered" : { - "type" : "integer" - }, - "num_subtasks" : { - "type" : "integer" - }, - "num_acknowledged_subtasks" : { - "type" : "integer" - }, - "tasks" : { - "type" : "object", - "additionalProperties" : { - "type" : "object", - "$ref" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:TaskCheckpointStatistics" + "summary" : { + "type" : "object", + "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:CheckpointingStatistics:Summary", + "properties" : { + "alignment_buffered" : { + "type" : "object", + "$ref" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:MinMaxAvgStatistics" + }, + "end_to_end_duration" : { + "type" : "object", + "$ref" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:MinMaxAvgStatistics" + }, + "state_size" : { + "type" : "object", + "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:MinMaxAvgStatistics", + "properties" : { + "avg" : { + "type" : "integer" + }, + "max" : { + "type" : "integer" + }, + "min" : { + "type" : "integer" } } } @@ -1525,32 +1525,35 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:CheckpointConfigInfo", "properties" : { - "mode" : { - "type" : "any" + "externalization" : { + "type" : "object", + "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:CheckpointConfigInfo:ExternalizedCheckpointInfo", + "properties" : { + "delete_on_cancellation" : { + "type" : "boolean" + }, + "enabled" : { + "type" : "boolean" + } + } }, "interval" : { "type" : "integer" }, - "timeout" : { + "max_concurrent" : { "type" : "integer" }, "min_pause" : { "type" : "integer" }, - "max_concurrent" : { - "type" : "integer" + "mode" : { + "type" : "any" }, - "externalization" : { - "type" : "object", - "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:CheckpointConfigInfo:ExternalizedCheckpointInfo", - "properties" : { - "enabled" : { - "type" : "boolean" - }, - "delete_on_cancellation" : { - "type" : "boolean" - } - } + "state_backend" : { + "type" : "string" + }, + "timeout" : { + "type" : "integer" } } } @@ -1604,70 +1607,70 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:CheckpointStatistics", "properties" : { - "id" : { + "alignment_buffered" : { "type" : "integer" }, - "status" : { - "type" : "string", - "enum" : [ "IN_PROGRESS", "COMPLETED", "FAILED" ] - }, - "is_savepoint" : { - "type" : "boolean" - }, - "trigger_timestamp" : { + "end_to_end_duration" : { "type" : "integer" }, - "latest_ack_timestamp" : { + "id" : { "type" : "integer" }, - "state_size" : { - "type" : "integer" + "is_savepoint" : { + "type" : "boolean" }, - "end_to_end_duration" : { + "latest_ack_timestamp" : { "type" : "integer" }, - "alignment_buffered" : { + "num_acknowledged_subtasks" : { "type" : "integer" }, "num_subtasks" : { "type" : "integer" }, - "num_acknowledged_subtasks" : { + "state_size" : { "type" : "integer" }, + "status" : { + "type" : "string", + "enum" : [ "IN_PROGRESS", "COMPLETED", "FAILED" ] + }, "tasks" : { "type" : "object", "additionalProperties" : { "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:TaskCheckpointStatistics", "properties" : { - "id" : { + "alignment_buffered" : { "type" : "integer" }, - "status" : { - "type" : "string", - "enum" : [ "IN_PROGRESS", "COMPLETED", "FAILED" ] - }, - "latest_ack_timestamp" : { + "end_to_end_duration" : { "type" : "integer" }, - "state_size" : { + "id" : { "type" : "integer" }, - "end_to_end_duration" : { + "latest_ack_timestamp" : { "type" : "integer" }, - "alignment_buffered" : { + "num_acknowledged_subtasks" : { "type" : "integer" }, "num_subtasks" : { "type" : "integer" }, - "num_acknowledged_subtasks" : { + "state_size" : { "type" : "integer" + }, + "status" : { + "type" : "string", + "enum" : [ "IN_PROGRESS", "COMPLETED", "FAILED" ] } } } + }, + "trigger_timestamp" : { + "type" : "integer" } } } @@ -1722,98 +1725,102 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:TaskCheckpointStatisticsWithSubtaskDetails", "properties" : { - "id" : { + "alignment_buffered" : { "type" : "integer" }, - "status" : { - "type" : "string", - "enum" : [ "IN_PROGRESS", "COMPLETED", "FAILED" ] - }, - "latest_ack_timestamp" : { + "end_to_end_duration" : { "type" : "integer" }, - "state_size" : { + "id" : { "type" : "integer" }, - "end_to_end_duration" : { + "latest_ack_timestamp" : { "type" : "integer" }, - "alignment_buffered" : { + "num_acknowledged_subtasks" : { "type" : "integer" }, "num_subtasks" : { "type" : "integer" }, - "num_acknowledged_subtasks" : { + "state_size" : { "type" : "integer" }, + "status" : { + "type" : "string", + "enum" : [ "IN_PROGRESS", "COMPLETED", "FAILED" ] + }, + "subtasks" : { + "type" : "array", + "items" : { + "type" : "object", + "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:SubtaskCheckpointStatistics", + "properties" : { + "index" : { + "type" : "integer" + }, + "status" : { + "type" : "string" + } + } + } + }, "summary" : { "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:TaskCheckpointStatisticsWithSubtaskDetails:Summary", "properties" : { - "state_size" : { + "alignment" : { "type" : "object", - "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:MinMaxAvgStatistics", + "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:TaskCheckpointStatisticsWithSubtaskDetails:CheckpointAlignment", "properties" : { - "min" : { - "type" : "integer" - }, - "max" : { - "type" : "integer" + "buffered" : { + "type" : "object", + "$ref" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:MinMaxAvgStatistics" }, - "avg" : { - "type" : "integer" + "duration" : { + "type" : "object", + "$ref" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:MinMaxAvgStatistics" } } }, - "end_to_end_duration" : { - "type" : "object", - "$ref" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:MinMaxAvgStatistics" - }, "checkpoint_duration" : { "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:TaskCheckpointStatisticsWithSubtaskDetails:CheckpointDuration", "properties" : { - "sync" : { + "async" : { "type" : "object", "$ref" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:MinMaxAvgStatistics" }, - "async" : { + "sync" : { "type" : "object", "$ref" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:MinMaxAvgStatistics" } } }, - "alignment" : { + "end_to_end_duration" : { "type" : "object", - "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:TaskCheckpointStatisticsWithSubtaskDetails:CheckpointAlignment", + "$ref" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:MinMaxAvgStatistics" + }, + "start_delay" : { + "type" : "object", + "$ref" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:MinMaxAvgStatistics" + }, + "state_size" : { + "type" : "object", + "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:MinMaxAvgStatistics", "properties" : { - "buffered" : { - "type" : "object", - "$ref" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:MinMaxAvgStatistics" + "avg" : { + "type" : "integer" }, - "duration" : { - "type" : "object", - "$ref" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:MinMaxAvgStatistics" + "max" : { + "type" : "integer" + }, + "min" : { + "type" : "integer" } } } } - }, - "subtasks" : { - "type" : "array", - "items" : { - "type" : "object", - "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:checkpoints:SubtaskCheckpointStatistics", - "properties" : { - "index" : { - "type" : "integer" - }, - "status" : { - "type" : "string" - } - } - } } } } @@ -1893,6 +1900,16 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa + + Query parameters + + + + + + @@ -1914,12 +1931,6 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:JobExceptionsInfo", "properties" : { - "root-exception" : { - "type" : "string" - }, - "timestamp" : { - "type" : "integer" - }, "all-exceptions" : { "type" : "array", "items" : { @@ -1929,10 +1940,10 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "exception" : { "type" : "string" }, - "task" : { + "location" : { "type" : "string" }, - "location" : { + "task" : { "type" : "string" }, "timestamp" : { @@ -1941,6 +1952,12 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa } } }, + "root-exception" : { + "type" : "string" + }, + "timestamp" : { + "type" : "integer" + }, "truncated" : { "type" : "boolean" } @@ -1995,6 +2012,9 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:job:JobExecutionResultResponseBody", "properties" : { + "job-execution-result" : { + "type" : "any" + }, "status" : { "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:queue:QueueStatus", @@ -2006,9 +2026,6 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "enum" : [ "IN_PROGRESS", "COMPLETED" ] } } - }, - "job-execution-result" : { - "type" : "any" } } } @@ -2238,6 +2255,9 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:handler:async:AsynchronousOperationResult", "properties" : { + "operation" : { + "type" : "any" + }, "status" : { "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:queue:QueueStatus", @@ -2248,9 +2268,6 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "enum" : [ "IN_PROGRESS", "COMPLETED" ] } } - }, - "operation" : { - "type" : "any" } } } @@ -2292,11 +2309,11 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:job:savepoints:SavepointTriggerRequestBody", "properties" : { - "target-directory" : { - "type" : "string" - }, "cancel-job" : { "type" : "boolean" + }, + "target-directory" : { + "type" : "string" } } } @@ -2369,6 +2386,9 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:handler:async:AsynchronousOperationResult", "properties" : { + "operation" : { + "type" : "any" + }, "status" : { "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:queue:QueueStatus", @@ -2379,9 +2399,6 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "enum" : [ "IN_PROGRESS", "COMPLETED" ] } } - }, - "operation" : { - "type" : "any" } } } @@ -2423,11 +2440,11 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:job:savepoints:stop:StopWithSavepointRequestBody", "properties" : { - "targetDirectory" : { - "type" : "string" - }, "drain" : { "type" : "boolean" + }, + "targetDirectory" : { + "type" : "string" } } } @@ -2506,39 +2523,29 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "name" : { "type" : "string" }, - "parallelism" : { + "now" : { "type" : "integer" }, - "now" : { + "parallelism" : { "type" : "integer" }, "subtasks" : { "type" : "array", "items" : { "type" : "object", - "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:SubtaskExecutionAttemptDetailsInfo", + "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:job:SubtaskExecutionAttemptDetailsInfo", "properties" : { - "subtask" : { - "type" : "integer" - }, - "status" : { - "type" : "string", - "enum" : [ "CREATED", "SCHEDULED", "DEPLOYING", "RUNNING", "FINISHED", "CANCELING", "CANCELED", "FAILED", "RECONCILING" ] - }, "attempt" : { "type" : "integer" }, - "host" : { - "type" : "string" - }, - "start-time" : { + "duration" : { "type" : "integer" }, "end-time" : { "type" : "integer" }, - "duration" : { - "type" : "integer" + "host" : { + "type" : "string" }, "metrics" : { "type" : "object", @@ -2550,16 +2557,16 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "read-bytes-complete" : { "type" : "boolean" }, - "write-bytes" : { + "read-records" : { "type" : "integer" }, - "write-bytes-complete" : { + "read-records-complete" : { "type" : "boolean" }, - "read-records" : { + "write-bytes" : { "type" : "integer" }, - "read-records-complete" : { + "write-bytes-complete" : { "type" : "boolean" }, "write-records" : { @@ -2570,9 +2577,19 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa } } }, + "start-time" : { + "type" : "integer" + }, "start_time" : { "type" : "integer" }, + "status" : { + "type" : "string", + "enum" : [ "CREATED", "SCHEDULED", "DEPLOYING", "RUNNING", "FINISHED", "CANCELING", "CANCELED", "FAILED", "RECONCILING" ] + }, + "subtask" : { + "type" : "integer" + }, "taskmanager-id" : { "type" : "string" } @@ -2704,10 +2721,6 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:JobVertexBackPressureInfo", "properties" : { - "status" : { - "type" : "string", - "enum" : [ "deprecated", "ok" ] - }, "backpressure-level" : { "type" : "string", "enum" : [ "ok", "low", "high" ] @@ -2715,21 +2728,25 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "end-timestamp" : { "type" : "integer" }, + "status" : { + "type" : "string", + "enum" : [ "deprecated", "ok" ] + }, "subtasks" : { "type" : "array", "items" : { "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:JobVertexBackPressureInfo:SubtaskBackPressureInfo", "properties" : { - "subtask" : { - "type" : "integer" - }, "backpressure-level" : { "type" : "string", "enum" : [ "ok", "low", "high" ] }, "ratio" : { "type" : "number" + }, + "subtask" : { + "type" : "integer" } } } @@ -2857,15 +2874,15 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:job:SubtasksAllAccumulatorsInfo:SubtaskAccumulatorsInfo", "properties" : { - "subtask" : { - "type" : "integer" - }, "attempt" : { "type" : "integer" }, "host" : { "type" : "string" }, + "subtask" : { + "type" : "integer" + }, "user-accumulators" : { "type" : "array", "items" : { @@ -3001,27 +3018,17 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:job:SubtaskExecutionAttemptDetailsInfo", "properties" : { - "subtask" : { - "type" : "integer" - }, - "status" : { - "type" : "string", - "enum" : [ "CREATED", "SCHEDULED", "DEPLOYING", "RUNNING", "FINISHED", "CANCELING", "CANCELED", "FAILED", "RECONCILING" ] - }, "attempt" : { "type" : "integer" }, - "host" : { - "type" : "string" - }, - "start-time" : { + "duration" : { "type" : "integer" }, "end-time" : { "type" : "integer" }, - "duration" : { - "type" : "integer" + "host" : { + "type" : "string" }, "metrics" : { "type" : "object", @@ -3033,16 +3040,16 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "read-bytes-complete" : { "type" : "boolean" }, - "write-bytes" : { + "read-records" : { "type" : "integer" }, - "write-bytes-complete" : { + "read-records-complete" : { "type" : "boolean" }, - "read-records" : { + "write-bytes" : { "type" : "integer" }, - "read-records-complete" : { + "write-bytes-complete" : { "type" : "boolean" }, "write-records" : { @@ -3053,6 +3060,19 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa } } }, + "start-time" : { + "type" : "integer" + }, + "start_time" : { + "type" : "integer" + }, + "status" : { + "type" : "string", + "enum" : [ "CREATED", "SCHEDULED", "DEPLOYING", "RUNNING", "FINISHED", "CANCELING", "CANCELED", "FAILED", "RECONCILING" ] + }, + "subtask" : { + "type" : "integer" + }, "taskmanager-id" : { "type" : "string" } @@ -3110,27 +3130,17 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:job:SubtaskExecutionAttemptDetailsInfo", "properties" : { - "subtask" : { - "type" : "integer" - }, - "status" : { - "type" : "string", - "enum" : [ "CREATED", "SCHEDULED", "DEPLOYING", "RUNNING", "FINISHED", "CANCELING", "CANCELED", "FAILED", "RECONCILING" ] - }, "attempt" : { "type" : "integer" }, - "host" : { - "type" : "string" - }, - "start-time" : { + "duration" : { "type" : "integer" }, "end-time" : { "type" : "integer" }, - "duration" : { - "type" : "integer" + "host" : { + "type" : "string" }, "metrics" : { "type" : "object", @@ -3142,16 +3152,16 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "read-bytes-complete" : { "type" : "boolean" }, - "write-bytes" : { + "read-records" : { "type" : "integer" }, - "write-bytes-complete" : { + "read-records-complete" : { "type" : "boolean" }, - "read-records" : { + "write-bytes" : { "type" : "integer" }, - "read-records-complete" : { + "write-bytes-complete" : { "type" : "boolean" }, "write-records" : { @@ -3162,6 +3172,19 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa } } }, + "start-time" : { + "type" : "integer" + }, + "start_time" : { + "type" : "integer" + }, + "status" : { + "type" : "string", + "enum" : [ "CREATED", "SCHEDULED", "DEPLOYING", "RUNNING", "FINISHED", "CANCELING", "CANCELED", "FAILED", "RECONCILING" ] + }, + "subtask" : { + "type" : "integer" + }, "taskmanager-id" : { "type" : "string" } @@ -3219,15 +3242,15 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:job:SubtaskExecutionAttemptAccumulatorsInfo", "properties" : { - "subtask" : { - "type" : "integer" - }, "attempt" : { "type" : "integer" }, "id" : { "type" : "string" }, + "subtask" : { + "type" : "integer" + }, "user-accumulators" : { "type" : "array", "items" : { @@ -3373,13 +3396,13 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:SubtasksTimesInfo:SubtaskTimeInfo", "properties" : { - "subtask" : { + "duration" : { "type" : "integer" }, "host" : { "type" : "string" }, - "duration" : { + "subtask" : { "type" : "integer" }, "timestamps" : { @@ -3458,21 +3481,14 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:JobVertexTaskManagersInfo:TaskManagersInfo", "properties" : { - "host" : { - "type" : "string" - }, - "status" : { - "type" : "string", - "enum" : [ "CREATED", "SCHEDULED", "DEPLOYING", "RUNNING", "FINISHED", "CANCELING", "CANCELED", "FAILED", "RECONCILING" ] - }, - "start-time" : { + "duration" : { "type" : "integer" }, "end-time" : { "type" : "integer" }, - "duration" : { - "type" : "integer" + "host" : { + "type" : "string" }, "metrics" : { "type" : "object", @@ -3484,16 +3500,16 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "read-bytes-complete" : { "type" : "boolean" }, - "write-bytes" : { + "read-records" : { "type" : "integer" }, - "write-bytes-complete" : { + "read-records-complete" : { "type" : "boolean" }, - "read-records" : { + "write-bytes" : { "type" : "integer" }, - "read-records-complete" : { + "write-bytes-complete" : { "type" : "boolean" }, "write-records" : { @@ -3504,6 +3520,13 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa } } }, + "start-time" : { + "type" : "integer" + }, + "status" : { + "type" : "string", + "enum" : [ "CREATED", "SCHEDULED", "DEPLOYING", "RUNNING", "FINISHED", "CANCELING", "CANCELED", "FAILED", "RECONCILING" ] + }, "status-counts" : { "type" : "object", "additionalProperties" : { @@ -3524,6 +3547,55 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa + + + + + + + + + + + + + + + + + + + + + + + + + +
/jobs/:jobid/vertices/:vertexid/watermarks
Verb: GETResponse code: 200 OK
Returns the watermarks for all subtasks of a task.
Path parameters
+
    +
  • jobid - 32-character hexadecimal string value that identifies a job.
  • +
  • vertexid - 32-character hexadecimal string value that identifies a job vertex.
  • +
+
+ +
+
+            
+{}            
+          
+
+
+ +
+
+            
+{
+  "type" : "any"
+}            
+          
+
+
@@ -3557,32 +3629,32 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:handler:legacy:messages:ClusterOverviewWithVersion", "properties" : { - "taskmanagers" : { - "type" : "integer" + "flink-commit" : { + "type" : "string" }, - "slots-total" : { - "type" : "integer" + "flink-version" : { + "type" : "string" }, - "slots-available" : { + "jobs-cancelled" : { "type" : "integer" }, - "jobs-running" : { + "jobs-failed" : { "type" : "integer" }, "jobs-finished" : { "type" : "integer" }, - "jobs-cancelled" : { + "jobs-running" : { "type" : "integer" }, - "jobs-failed" : { + "slots-available" : { "type" : "integer" }, - "flink-version" : { - "type" : "string" + "slots-total" : { + "type" : "integer" }, - "flink-commit" : { - "type" : "string" + "taskmanagers" : { + "type" : "integer" } } } @@ -3687,6 +3759,9 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:handler:async:AsynchronousOperationResult", "properties" : { + "operation" : { + "type" : "any" + }, "status" : { "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:queue:QueueStatus", @@ -3697,9 +3772,6 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "enum" : [ "IN_PROGRESS", "COMPLETED" ] } } - }, - "operation" : { - "type" : "any" } } } @@ -3748,21 +3820,9 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:taskmanager:TaskManagerInfo", "properties" : { - "id" : { - "type" : "any" - }, - "path" : { - "type" : "string" - }, "dataPort" : { "type" : "integer" }, - "timeSinceLastHeartbeat" : { - "type" : "integer" - }, - "slotsNumber" : { - "type" : "integer" - }, "freeSlots" : { "type" : "integer" }, @@ -3773,16 +3833,28 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "cpuCores" : { "type" : "integer" }, - "physicalMemory" : { - "type" : "integer" - }, "freeMemory" : { "type" : "integer" }, "managedMemory" : { "type" : "integer" + }, + "physicalMemory" : { + "type" : "integer" } } + }, + "id" : { + "type" : "any" + }, + "path" : { + "type" : "string" + }, + "slotsNumber" : { + "type" : "integer" + }, + "timeSinceLastHeartbeat" : { + "type" : "integer" } } } @@ -3888,21 +3960,9 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:taskmanager:TaskManagerDetailsInfo", "properties" : { - "id" : { - "type" : "any" - }, - "path" : { - "type" : "string" - }, "dataPort" : { "type" : "integer" }, - "timeSinceLastHeartbeat" : { - "type" : "integer" - }, - "slotsNumber" : { - "type" : "integer" - }, "freeSlots" : { "type" : "integer" }, @@ -3913,82 +3973,94 @@ Using 'curl' you can upload a jar via 'curl -X POST -H "Expect:" -F "jarfile=@pa "cpuCores" : { "type" : "integer" }, - "physicalMemory" : { - "type" : "integer" - }, "freeMemory" : { "type" : "integer" }, "managedMemory" : { "type" : "integer" + }, + "physicalMemory" : { + "type" : "integer" } } }, + "id" : { + "type" : "any" + }, "metrics" : { "type" : "object", "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:taskmanager:TaskManagerMetricsInfo", "properties" : { - "heapUsed" : { + "directCount" : { "type" : "integer" }, - "heapCommitted" : { + "directMax" : { "type" : "integer" }, - "heapMax" : { + "directUsed" : { "type" : "integer" }, - "nonHeapUsed" : { - "type" : "integer" + "garbageCollectors" : { + "type" : "array", + "items" : { + "type" : "object", + "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:taskmanager:TaskManagerMetricsInfo:GarbageCollectorInfo", + "properties" : { + "count" : { + "type" : "integer" + }, + "name" : { + "type" : "string" + }, + "time" : { + "type" : "integer" + } + } + } }, - "nonHeapCommitted" : { + "heapCommitted" : { "type" : "integer" }, - "nonHeapMax" : { + "heapMax" : { "type" : "integer" }, - "directCount" : { + "heapUsed" : { "type" : "integer" }, - "directUsed" : { + "mappedCount" : { "type" : "integer" }, - "directMax" : { + "mappedMax" : { "type" : "integer" }, - "mappedCount" : { + "mappedUsed" : { "type" : "integer" }, - "mappedUsed" : { + "memorySegmentsAvailable" : { "type" : "integer" }, - "mappedMax" : { + "memorySegmentsTotal" : { "type" : "integer" }, - "memorySegmentsAvailable" : { + "nonHeapCommitted" : { "type" : "integer" }, - "memorySegmentsTotal" : { + "nonHeapMax" : { "type" : "integer" }, - "garbageCollectors" : { - "type" : "array", - "items" : { - "type" : "object", - "id" : "urn:jsonschema:org:apache:flink:runtime:rest:messages:taskmanager:TaskManagerMetricsInfo:GarbageCollectorInfo", - "properties" : { - "name" : { - "type" : "string" - }, - "count" : { - "type" : "integer" - }, - "time" : { - "type" : "integer" - } - } - } + "nonHeapUsed" : { + "type" : "integer" } } + }, + "path" : { + "type" : "string" + }, + "slotsNumber" : { + "type" : "integer" + }, + "timeSinceLastHeartbeat" : { + "type" : "integer" } } } diff --git a/docs/_includes/generated/restart_strategy_configuration.html b/docs/_includes/generated/restart_strategy_configuration.html index e17bbed899c47cbd922ce8f6753b1b04aadc839f..f8a0748003358c8624cf72b359d2a45adafe77a7 100644 --- a/docs/_includes/generated/restart_strategy_configuration.html +++ b/docs/_includes/generated/restart_strategy_configuration.html @@ -3,13 +3,15 @@ - + + + diff --git a/docs/_includes/generated/rocks_db_configurable_configuration.html b/docs/_includes/generated/rocks_db_configurable_configuration.html index 638a3a55747634c34f54b4c937a0bac29333d731..518ce354a6d15ae69656ba64ac74d61ff17865f5 100644 --- a/docs/_includes/generated/rocks_db_configurable_configuration.html +++ b/docs/_includes/generated/rocks_db_configurable_configuration.html @@ -3,64 +3,82 @@ - + + + + - + + - + + + + - + + - + + + + + + + + + + - + +
Key DefaultDescriptionTypeDescription
restart-strategy
(none)String Defines the restart strategy to use in case of job failures.
Accepted values are:
  • `none`, `off`, `disable`: No restart strategy.
  • `fixeddelay`, `fixed-delay`: Fixed delay restart strategy. More details can be found here.
  • `failurerate`, `failure-rate`: Failure rate restart strategy. More details can be found here.
If checkpointing is disabled, the default value is `none`. If checkpointing is enabled, the default value is `fixed-delay` with `Integer.MAX_VALUE` restart attempts and '`1 s`' delay.
Key DefaultDescriptionTypeDescription
state.backend.rocksdb.block.blocksize
(none)MemorySize The approximate size (in bytes) of user data packed per block. RocksDB has default blocksize as '4KB'.
state.backend.rocksdb.block.cache-size
(none)MemorySize The amount of the cache for data blocks in RocksDB. RocksDB has default block-cache size as '8MB'.
state.backend.rocksdb.compaction.level.max-size-level-base
(none)The upper-bound of the total size of level base files in bytes. RocksDB has default configuration as '10MB'.MemorySizeThe upper-bound of the total size of level base files in bytes. RocksDB has default configuration as '256MB'.
state.backend.rocksdb.compaction.level.target-file-size-base
(none)The target file size for compaction, which determines a level-1 file size. RocksDB has default configuration as '2MB'.MemorySizeThe target file size for compaction, which determines a level-1 file size. RocksDB has default configuration as '64MB'.
state.backend.rocksdb.compaction.level.use-dynamic-size
(none)Boolean If true, RocksDB will pick target size of each level dynamically. From an empty DB, RocksDB would make last level the base level, which means merging L0 data into the last level, until it exceeds max_bytes_for_level_base. And then repeat this process for second last level and so on. RocksDB has default configuration as 'false'. For more information, please refer to RocksDB's doc.
state.backend.rocksdb.compaction.style
(none)

Enum

Possible values: [LEVEL, UNIVERSAL, FIFO]
The specified compaction style for DB. Candidate compaction style is LEVEL, FIFO or UNIVERSAL, and RocksDB choose 'LEVEL' as default style.
state.backend.rocksdb.files.open
(none)The maximum number of open files that can be used by the DB, '-1' means no limit. RocksDB has default configuration as '5000'.IntegerThe maximum number of open files (per TaskManager) that can be used by the DB, '-1' means no limit. RocksDB has default configuration as '-1'.
state.backend.rocksdb.thread.num
(none)The maximum number of concurrent background flush and compaction jobs. RocksDB has default configuration as '1'.IntegerThe maximum number of concurrent background flush and compaction jobs (per TaskManager). RocksDB has default configuration as '1'.
state.backend.rocksdb.write-batch-size
2 mbMemorySizeThe max size of the consumed memory for RocksDB batch write, will flush just based on item count if this config set to 0.
state.backend.rocksdb.writebuffer.count
(none)Integer Tne maximum number of write buffers that are built up in memory. RocksDB has default configuration as '2'.
state.backend.rocksdb.writebuffer.number-to-merge
(none)Integer The minimum number of write buffers that will be merged together before writing to storage. RocksDB has default configuration as '1'.
state.backend.rocksdb.writebuffer.size
(none)The amount of data built up in memory (backed by an unsorted log on disk) before converting to a sorted on-disk files. RocksDB has default writebuffer size as '4MB'.MemorySizeThe amount of data built up in memory (backed by an unsorted log on disk) before converting to a sorted on-disk files. RocksDB has default writebuffer size as '64MB'.
diff --git a/docs/_includes/generated/rocks_db_configuration.html b/docs/_includes/generated/rocks_db_configuration.html index 88e8c466f92b58d21723a163a91f30b0ece5af07..3ce04c75d2d98155be024936bfd30978f17d87ad 100644 --- a/docs/_includes/generated/rocks_db_configuration.html +++ b/docs/_includes/generated/rocks_db_configuration.html @@ -3,39 +3,64 @@ Key Default - Description + Type + Description
state.backend.rocksdb.checkpoint.transfer.thread.num
1 - The number of threads used to transfer (download and upload) files in RocksDBStateBackend. + Integer + The number of threads (per stateful operator) used to transfer (download and upload) files in RocksDBStateBackend.
state.backend.rocksdb.localdir
(none) + String The local directory (on the TaskManager) where RocksDB puts its files. + +
state.backend.rocksdb.memory.fixed-per-slot
+ (none) + MemorySize + The fixed total amount of memory, shared among all RocksDB instances per slot. This option overrides the 'state.backend.rocksdb.memory.managed' option when configured. If neither this option, nor the 'state.backend.rocksdb.memory.managed' optionare set, then each RocksDB column family state has its own memory caches (as controlled by the column family options). + + +
state.backend.rocksdb.memory.high-prio-pool-ratio
+ 0.1 + Double + The fraction of cache memory that is reserved for high-priority data like index, filter, and compression dictionary blocks. This option only has an effect when 'state.backend.rocksdb.memory.managed' or 'state.backend.rocksdb.memory.fixed-per-slot' are configured. + + +
state.backend.rocksdb.memory.managed
+ true + Boolean + If set, the RocksDB state backend will automatically configure itself to use the managed memory budget of the task slot, and divide the memory over write buffers, indexes, block caches, etc. That way, the three major uses of memory of RocksDB will be capped. + + +
state.backend.rocksdb.memory.write-buffer-ratio
+ 0.5 + Double + The maximum amount of memory that write buffers may take, as a fraction of the total shared memory. This option only has an effect when 'state.backend.rocksdb.memory.managed' or 'state.backend.rocksdb.memory.fixed-per-slot' are configured. +
state.backend.rocksdb.options-factory
"org.apache.flink.contrib.streaming.state.DefaultConfigurableOptionsFactory" + String The options factory class for RocksDB to create DBOptions and ColumnFamilyOptions. The default options factory is org.apache.flink.contrib.streaming.state.DefaultConfigurableOptionsFactory, and it would read the configured options which provided in 'RocksDBConfigurableOptions'.
state.backend.rocksdb.predefined-options
"DEFAULT" + String The predefined settings for RocksDB DBOptions and ColumnFamilyOptions by Flink community. Current supported candidate predefined-options are DEFAULT, SPINNING_DISK_OPTIMIZED, SPINNING_DISK_OPTIMIZED_HIGH_MEM or FLASH_SSD_OPTIMIZED. Note that user customized options and options from the OptionsFactory are applied on top of these predefined ones.
state.backend.rocksdb.timer-service.factory
- "HEAP" + "ROCKSDB" + String This determines the factory for timer service state implementation. Options are either HEAP (heap-based, default) or ROCKSDB for an implementation based on RocksDB . - -
state.backend.rocksdb.ttl.compaction.filter.enabled
- false - This determines if compaction filter to cleanup state with TTL is enabled for backend.Note: User can still decide in state TTL configuration in state descriptor whether the filter is active for particular state or not. - diff --git a/docs/_includes/generated/rocks_db_native_metric_configuration.html b/docs/_includes/generated/rocks_db_native_metric_configuration.html index 78fc523b35bf8c3c5c71aa7cb90be41929f1a955..82c573811a71dfafd23a32c746ab567212e528df 100644 --- a/docs/_includes/generated/rocks_db_native_metric_configuration.html +++ b/docs/_includes/generated/rocks_db_native_metric_configuration.html @@ -3,113 +3,165 @@ Key Default - Description + Type + Description
state.backend.rocksdb.metrics.actual-delayed-write-rate
false + Boolean Monitor the current actual delayed write rate. 0 means no delay.
state.backend.rocksdb.metrics.background-errors
false + Boolean Monitor the number of background errors in RocksDB. + +
state.backend.rocksdb.metrics.block-cache-capacity
+ false + Boolean + Monitor block cache capacity. + + +
state.backend.rocksdb.metrics.block-cache-pinned-usage
+ false + Boolean + Monitor the memory size for the entries being pinned in block cache. + + +
state.backend.rocksdb.metrics.block-cache-usage
+ false + Boolean + Monitor the memory size for the entries residing in block cache. + + +
state.backend.rocksdb.metrics.column-family-as-variable
+ false + Boolean + Whether to expose the column family as a variable. +
state.backend.rocksdb.metrics.compaction-pending
false + Boolean Track pending compactions in RocksDB. Returns 1 if a compaction is pending, 0 otherwise.
state.backend.rocksdb.metrics.cur-size-active-mem-table
false + Boolean Monitor the approximate size of the active memtable in bytes.
state.backend.rocksdb.metrics.cur-size-all-mem-tables
false + Boolean Monitor the approximate size of the active and unflushed immutable memtables in bytes.
state.backend.rocksdb.metrics.estimate-live-data-size
false + Boolean Estimate of the amount of live data in bytes.
state.backend.rocksdb.metrics.estimate-num-keys
false + Boolean Estimate the number of keys in RocksDB.
state.backend.rocksdb.metrics.estimate-pending-compaction-bytes
false + Boolean Estimated total number of bytes compaction needs to rewrite to get all levels down to under target size. Not valid for other compactions than level-based.
state.backend.rocksdb.metrics.estimate-table-readers-mem
false + Boolean Estimate the memory used for reading SST tables, excluding memory used in block cache (e.g.,filter and index blocks) in bytes. + +
state.backend.rocksdb.metrics.is-write-stopped
+ false + Boolean + Track whether write has been stopped in RocksDB. Returns 1 if write has been stopped, 0 otherwise. +
state.backend.rocksdb.metrics.mem-table-flush-pending
false + Boolean Monitor the number of pending memtable flushes in RocksDB.
state.backend.rocksdb.metrics.num-deletes-active-mem-table
false + Boolean Monitor the total number of delete entries in the active memtable.
state.backend.rocksdb.metrics.num-deletes-imm-mem-tables
false + Boolean Monitor the total number of delete entries in the unflushed immutable memtables.
state.backend.rocksdb.metrics.num-entries-active-mem-table
false + Boolean Monitor the total number of entries in the active memtable.
state.backend.rocksdb.metrics.num-entries-imm-mem-tables
false + Boolean Monitor the total number of entries in the unflushed immutable memtables.
state.backend.rocksdb.metrics.num-immutable-mem-table
false + Boolean Monitor the number of immutable memtables in RocksDB.
state.backend.rocksdb.metrics.num-live-versions
false + Boolean Monitor number of live versions. Version is an internal data structure. See RocksDB file version_set.h for details. More live versions often mean more SST files are held from being deleted, by iterators or unfinished compactions.
state.backend.rocksdb.metrics.num-running-compactions
false + Boolean Monitor the number of currently running compactions.
state.backend.rocksdb.metrics.num-running-flushes
false + Boolean Monitor the number of currently running flushes.
state.backend.rocksdb.metrics.num-snapshots
false + Boolean Monitor the number of unreleased snapshots of the database.
state.backend.rocksdb.metrics.size-all-mem-tables
false + Boolean Monitor the approximate size of the active, unflushed immutable, and pinned immutable memtables in bytes.
state.backend.rocksdb.metrics.total-sst-files-size
false + Boolean Monitor the total size (bytes) of all SST files.WARNING: may slow down online queries if there are too many files. diff --git a/docs/_includes/generated/savepoint_config_configuration.html b/docs/_includes/generated/savepoint_config_configuration.html new file mode 100644 index 0000000000000000000000000000000000000000..2f9b464e593cb315baa3ca689b3de3854df60098 --- /dev/null +++ b/docs/_includes/generated/savepoint_config_configuration.html @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
execution.savepoint.ignore-unclaimed-state
falseBooleanAllow to skip savepoint state that cannot be restored. Allow this if you removed an operator from your pipeline after the savepoint was triggered.
execution.savepoint.path
(none)StringPath to a savepoint to restore the job from (for example hdfs:///flink/savepoint-1537).
diff --git a/docs/_includes/generated/security_auth_kerberos_section.html b/docs/_includes/generated/security_auth_kerberos_section.html new file mode 100644 index 0000000000000000000000000000000000000000..cf279171a54c9317a7cf86c064d7010062c54b49 --- /dev/null +++ b/docs/_includes/generated/security_auth_kerberos_section.html @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
security.kerberos.login.contexts
(none)StringA comma-separated list of login contexts to provide the Kerberos credentials to (for example, `Client,KafkaClient` to use the credentials for ZooKeeper authentication and for Kafka authentication)
security.kerberos.login.keytab
(none)StringAbsolute path to a Kerberos keytab file that contains the user credentials.
security.kerberos.login.principal
(none)StringKerberos principal name associated with the keytab.
security.kerberos.login.use-ticket-cache
trueBooleanIndicates whether to read from your Kerberos ticket cache.
diff --git a/docs/_includes/generated/security_auth_zk_section.html b/docs/_includes/generated/security_auth_zk_section.html new file mode 100644 index 0000000000000000000000000000000000000000..7aaaaff20d78b8bbe3bcacf9cc01c3f0fa2997f1 --- /dev/null +++ b/docs/_includes/generated/security_auth_zk_section.html @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
zookeeper.sasl.disable
falseBoolean
zookeeper.sasl.login-context-name
"Client"String
zookeeper.sasl.service-name
"zookeeper"String
diff --git a/docs/_includes/generated/security_configuration.html b/docs/_includes/generated/security_configuration.html index f23212e9c1766f585c9c141ddea5cf00186d406a..73d3c198401b7d3f31e9f8a1d5d04f730cca95ae 100644 --- a/docs/_includes/generated/security_configuration.html +++ b/docs/_includes/generated/security_configuration.html @@ -3,139 +3,202 @@ Key Default - Description + Type + Description + +
security.context.factory.classes
+ "org.apache.flink.runtime.security.contexts.HadoopSecurityContextFactory";"org.apache.flink.runtime.security.contexts.NoOpSecurityContextFactory" + List<String> + List of factories that should be used to instantiate a security context. If multiple are configured, Flink will use the first compatible factory. You should have a NoOpSecurityContextFactory in this list as a fallback. + + +
security.kerberos.login.contexts
+ (none) + String + A comma-separated list of login contexts to provide the Kerberos credentials to (for example, `Client,KafkaClient` to use the credentials for ZooKeeper authentication and for Kafka authentication) + + +
security.kerberos.login.keytab
+ (none) + String + Absolute path to a Kerberos keytab file that contains the user credentials. + + +
security.kerberos.login.principal
+ (none) + String + Kerberos principal name associated with the keytab. + + +
security.kerberos.login.use-ticket-cache
+ true + Boolean + Indicates whether to read from your Kerberos ticket cache. + + +
security.module.factory.classes
+ "org.apache.flink.runtime.security.modules.HadoopModuleFactory";"org.apache.flink.runtime.security.modules.JaasModuleFactory";"org.apache.flink.runtime.security.modules.ZookeeperModuleFactory" + List<String> + List of factories that should be used to instantiate security modules. All listed modules will be installed. Keep in mind that the configured security context might rely on some modules being present. +
security.ssl.algorithms
"TLS_RSA_WITH_AES_128_CBC_SHA" + String The comma separated list of standard SSL algorithms to be supported. Read more here + +
security.ssl.internal.cert.fingerprint
+ (none) + String + The sha1 fingerprint of the internal certificate. This further protects the internal communication to present the exact certificate used by Flink.This is necessary where one cannot use private CA(self signed) or there is internal firm wide CA is required +
security.ssl.internal.close-notify-flush-timeout
-1 + Integer The timeout (in ms) for flushing the `close_notify` that was triggered by closing a channel. If the `close_notify` was not flushed in the given timeout the channel will be closed forcibly. (-1 = use system default)
security.ssl.internal.enabled
false + Boolean Turns on SSL for internal network communication. Optionally, specific components may override this through their own settings (rpc, data transport, REST, etc).
security.ssl.internal.handshake-timeout
-1 + Integer The timeout (in ms) during SSL handshake. (-1 = use system default)
security.ssl.internal.key-password
(none) + String The secret to decrypt the key in the keystore for Flink's internal endpoints (rpc, data transport, blob server).
security.ssl.internal.keystore
(none) + String The Java keystore file with SSL Key and Certificate, to be used Flink's internal endpoints (rpc, data transport, blob server).
security.ssl.internal.keystore-password
(none) + String The secret to decrypt the keystore file for Flink's for Flink's internal endpoints (rpc, data transport, blob server).
security.ssl.internal.session-cache-size
-1 + Integer The size of the cache used for storing SSL session objects. According to https://github.com/netty/netty/issues/832, you should always set this to an appropriate number to not run into a bug with stalling IO threads during garbage collection. (-1 = use system default).
security.ssl.internal.session-timeout
-1 + Integer The timeout (in ms) for the cached SSL session objects. (-1 = use system default)
security.ssl.internal.truststore
(none) + String The truststore file containing the public CA certificates to verify the peer for Flink's internal endpoints (rpc, data transport, blob server).
security.ssl.internal.truststore-password
(none) + String The password to decrypt the truststore for Flink's internal endpoints (rpc, data transport, blob server). - -
security.ssl.key-password
- (none) - The secret to decrypt the server key in the keystore. - - -
security.ssl.keystore
- (none) - The Java keystore file to be used by the flink endpoint for its SSL Key and Certificate. - - -
security.ssl.keystore-password
- (none) - The secret to decrypt the keystore file. -
security.ssl.protocol
"TLSv1.2" + String The SSL protocol version to be supported for the ssl transport. Note that it doesn’t support comma separated list.
security.ssl.provider
"JDK" + String The SSL engine provider to use for the ssl transport:`OPENSSL` is based on netty-tcnative and comes in two flavours:
security.ssl.rest.authentication-enabled
false + Boolean Turns on mutual SSL authentication for external communication via the REST endpoints. + +
security.ssl.rest.cert.fingerprint
+ (none) + String + The sha1 fingerprint of the rest certificate. This further protects the rest REST endpoints to present certificate which is only used by proxy serverThis is necessary where once uses public CA or internal firm wide CA +
security.ssl.rest.enabled
false + Boolean Turns on SSL for external communication via the REST endpoints.
security.ssl.rest.key-password
(none) + String The secret to decrypt the key in the keystore for Flink's external REST endpoints.
security.ssl.rest.keystore
(none) + String The Java keystore file with SSL Key and Certificate, to be used Flink's external REST endpoints.
security.ssl.rest.keystore-password
(none) + String The secret to decrypt the keystore file for Flink's for Flink's external REST endpoints.
security.ssl.rest.truststore
(none) + String The truststore file containing the public CA certificates to verify the peer for Flink's external REST endpoints.
security.ssl.rest.truststore-password
(none) + String The password to decrypt the truststore for Flink's external REST endpoints. -
security.ssl.truststore
- (none) - The truststore file containing the public CA certificates to be used by flink endpoints to verify the peer’s certificate. +
security.ssl.verify-hostname
+ true + Boolean + Flag to enable peer’s hostname verification during ssl handshake. -
security.ssl.truststore-password
- (none) - The secret to decrypt the truststore. +
zookeeper.sasl.disable
+ false + Boolean + -
security.ssl.verify-hostname
- true - Flag to enable peer’s hostname verification during ssl handshake. +
zookeeper.sasl.login-context-name
+ "Client" + String + + + +
zookeeper.sasl.service-name
+ "zookeeper" + String + diff --git a/docs/_includes/generated/security_ssl_section.html b/docs/_includes/generated/security_ssl_section.html new file mode 100644 index 0000000000000000000000000000000000000000..ae345d8e4c320bcf592d4a66895b90d35cf51e94 --- /dev/null +++ b/docs/_includes/generated/security_ssl_section.html @@ -0,0 +1,120 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
security.ssl.algorithms
"TLS_RSA_WITH_AES_128_CBC_SHA"StringThe comma separated list of standard SSL algorithms to be supported. Read more here
security.ssl.internal.cert.fingerprint
(none)StringThe sha1 fingerprint of the internal certificate. This further protects the internal communication to present the exact certificate used by Flink.This is necessary where one cannot use private CA(self signed) or there is internal firm wide CA is required
security.ssl.internal.enabled
falseBooleanTurns on SSL for internal network communication. Optionally, specific components may override this through their own settings (rpc, data transport, REST, etc).
security.ssl.internal.key-password
(none)StringThe secret to decrypt the key in the keystore for Flink's internal endpoints (rpc, data transport, blob server).
security.ssl.internal.keystore
(none)StringThe Java keystore file with SSL Key and Certificate, to be used Flink's internal endpoints (rpc, data transport, blob server).
security.ssl.internal.keystore-password
(none)StringThe secret to decrypt the keystore file for Flink's for Flink's internal endpoints (rpc, data transport, blob server).
security.ssl.internal.truststore
(none)StringThe truststore file containing the public CA certificates to verify the peer for Flink's internal endpoints (rpc, data transport, blob server).
security.ssl.internal.truststore-password
(none)StringThe password to decrypt the truststore for Flink's internal endpoints (rpc, data transport, blob server).
security.ssl.protocol
"TLSv1.2"StringThe SSL protocol version to be supported for the ssl transport. Note that it doesn’t support comma separated list.
security.ssl.rest.authentication-enabled
falseBooleanTurns on mutual SSL authentication for external communication via the REST endpoints.
security.ssl.rest.cert.fingerprint
(none)StringThe sha1 fingerprint of the rest certificate. This further protects the rest REST endpoints to present certificate which is only used by proxy serverThis is necessary where once uses public CA or internal firm wide CA
security.ssl.rest.enabled
falseBooleanTurns on SSL for external communication via the REST endpoints.
security.ssl.rest.key-password
(none)StringThe secret to decrypt the key in the keystore for Flink's external REST endpoints.
security.ssl.rest.keystore
(none)StringThe Java keystore file with SSL Key and Certificate, to be used Flink's external REST endpoints.
security.ssl.rest.keystore-password
(none)StringThe secret to decrypt the keystore file for Flink's for Flink's external REST endpoints.
security.ssl.rest.truststore
(none)StringThe truststore file containing the public CA certificates to verify the peer for Flink's external REST endpoints.
security.ssl.rest.truststore-password
(none)StringThe password to decrypt the truststore for Flink's external REST endpoints.
security.ssl.verify-hostname
trueBooleanFlag to enable peer’s hostname verification during ssl handshake.
diff --git a/docs/_includes/generated/shuffle_service_configuration.html b/docs/_includes/generated/shuffle_service_configuration.html index 4cfffe71c543acbdd523150d82725bb362790b8f..bf7d0bb328169fb4256f78dcda06585136d821fb 100644 --- a/docs/_includes/generated/shuffle_service_configuration.html +++ b/docs/_includes/generated/shuffle_service_configuration.html @@ -3,13 +3,15 @@ Key Default - Description + Type + Description
shuffle-service-factory.class
"org.apache.flink.runtime.io.network.NettyShuffleServiceFactory" + String The full class name of the shuffle service factory implementation to be used by the cluster. The default implementation uses Netty for network communication and local memory as well disk space to store results on a TaskExecutor. diff --git a/docs/_includes/generated/state_backend_rocksdb_section.html b/docs/_includes/generated/state_backend_rocksdb_section.html new file mode 100644 index 0000000000000000000000000000000000000000..974c5c12abed3cf7765432929a13f384f208655a --- /dev/null +++ b/docs/_includes/generated/state_backend_rocksdb_section.html @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
state.backend.rocksdb.memory.fixed-per-slot
(none)MemorySizeThe fixed total amount of memory, shared among all RocksDB instances per slot. This option overrides the 'state.backend.rocksdb.memory.managed' option when configured. If neither this option, nor the 'state.backend.rocksdb.memory.managed' optionare set, then each RocksDB column family state has its own memory caches (as controlled by the column family options).
state.backend.rocksdb.memory.high-prio-pool-ratio
0.1DoubleThe fraction of cache memory that is reserved for high-priority data like index, filter, and compression dictionary blocks. This option only has an effect when 'state.backend.rocksdb.memory.managed' or 'state.backend.rocksdb.memory.fixed-per-slot' are configured.
state.backend.rocksdb.memory.managed
trueBooleanIf set, the RocksDB state backend will automatically configure itself to use the managed memory budget of the task slot, and divide the memory over write buffers, indexes, block caches, etc. That way, the three major uses of memory of RocksDB will be capped.
state.backend.rocksdb.memory.write-buffer-ratio
0.5DoubleThe maximum amount of memory that write buffers may take, as a fraction of the total shared memory. This option only has an effect when 'state.backend.rocksdb.memory.managed' or 'state.backend.rocksdb.memory.fixed-per-slot' are configured.
state.backend.rocksdb.timer-service.factory
"ROCKSDB"StringThis determines the factory for timer service state implementation. Options are either HEAP (heap-based, default) or ROCKSDB for an implementation based on RocksDB .
diff --git a/docs/_includes/generated/stream_pipeline_configuration.html b/docs/_includes/generated/stream_pipeline_configuration.html new file mode 100644 index 0000000000000000000000000000000000000000..0fdefe9c4f17a06129e936232d909e3ee1ca5fe0 --- /dev/null +++ b/docs/_includes/generated/stream_pipeline_configuration.html @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + +
KeyDefaultTypeDescription
pipeline.time-characteristic
ProcessingTime

Enum

Possible values: [ProcessingTime, IngestionTime, EventTime]
The time characteristic for all created streams, e.g., processingtime, event time, or ingestion time.

If you set the characteristic to IngestionTime or EventTime this will set a default watermark update interval of 200 ms. If this is not applicable for your application you should change it using `pipeline.auto-watermark-interval`.
diff --git a/docs/_includes/generated/task_manager_configuration.html b/docs/_includes/generated/task_manager_configuration.html index ca649b526ae77e8ded9922db038119a812f70482..db02deee1fe7fe98a5f0c41c3b542e525b232203 100644 --- a/docs/_includes/generated/task_manager_configuration.html +++ b/docs/_includes/generated/task_manager_configuration.html @@ -3,89 +3,94 @@ Key Default - Description + Type + Description
task.cancellation.interval
30000 + Long Time interval between two successive task cancellation attempts in milliseconds.
task.cancellation.timeout
180000 + Long Timeout in milliseconds after which a task cancellation times out and leads to a fatal TaskManager error. A value of 0 deactivates the watch dog.
task.cancellation.timers.timeout
7500 + Long Time we wait for the timers in milliseconds to finish all pending timer threads when the stream task is cancelled. - -
task.checkpoint.alignment.max-size
- -1 - The maximum number of bytes that a checkpoint alignment may buffer. If the checkpoint alignment buffers more than the configured amount of data, the checkpoint is aborted (skipped). A value of -1 indicates that there is no limit. -
taskmanager.debug.memory.log
false + Boolean Flag indicating whether to start a thread, which repeatedly logs the memory usage of the JVM.
taskmanager.debug.memory.log-interval
5000 + Long The interval (in ms) for the log thread to log the current memory usage. - -
taskmanager.exit-on-fatal-akka-error
- false - Whether the quarantine monitor for task managers shall be started. The quarantine monitor shuts down the actor system if it detects that it has quarantined another actor system or if it has been quarantined by another actor system. -
taskmanager.host
(none) + String The address of the network interface that the TaskManager binds to. This option can be used to define explicitly a binding address. Because different TaskManagers need different values for this option, usually it is specified in an additional non-shared TaskManager-specific config file.
taskmanager.jvm-exit-on-oom
false + Boolean Whether to kill the TaskManager when the task thread throws an OutOfMemoryError.
taskmanager.network.bind-policy
"ip" + String The automatic address binding policy used by the TaskManager if "taskmanager.host" is not set. The value should be one of the following:
taskmanager.numberOfTaskSlots
1 + Integer The number of parallel operator or user function instances that a single TaskManager can run. If this value is larger than 1, a single TaskManager takes multiple instances of a function or operator. That way, the TaskManager can utilize multiple CPU cores, but at the same time, the available memory is divided between the different operator or function instances. This value is typically proportional to the number of physical CPU cores that the TaskManager's machine has (e.g., equal to the number of cores, or half the number of cores).
taskmanager.registration.initial-backoff
- "500 ms" + 500 ms + Duration The initial registration backoff between two consecutive registration attempts. The backoff is doubled for each new registration attempt until it reaches the maximum registration backoff.
taskmanager.registration.max-backoff
- "30 s" + 30 s + Duration The maximum registration backoff between two consecutive registration attempts. The max registration backoff requires a time unit specifier (ms/s/min/h/d).
taskmanager.registration.refused-backoff
- "10 s" + 10 s + Duration The backoff after a registration has been refused by the job manager before retrying to connect.
taskmanager.registration.timeout
- "5 min" + 5 min + Duration Defines the timeout for the TaskManager registration. If the duration is exceeded without a successful registration, then the TaskManager terminates.
taskmanager.rpc.port
"0" + String The task manager’s IPC port. Accepts a list of ports (“50100,50101”), ranges (“50100-50200”) or a combination of both. It is recommended to set a range of ports to avoid collisions when multiple TaskManagers are running on the same machine. diff --git a/docs/_includes/generated/task_manager_memory_configuration.html b/docs/_includes/generated/task_manager_memory_configuration.html index e05115d4b4675fcd7571eb08148306cf4116b4bb..a7777e475369984da8f04909c3de8e7d947538ff 100644 --- a/docs/_includes/generated/task_manager_memory_configuration.html +++ b/docs/_includes/generated/task_manager_memory_configuration.html @@ -3,99 +3,106 @@ Key Default - Description + Type + Description + +
taskmanager.memory.flink.size
+ (none) + MemorySize + Total Flink Memory size for the TaskExecutors. This includes all the memory that a TaskExecutor consumes, except for JVM Metaspace and JVM Overhead. It consists of Framework Heap Memory, Task Heap Memory, Task Off-Heap Memory, Managed Memory, and Network Memory. See also 'taskmanager.memory.process.size' for total process memory size configuration. +
taskmanager.memory.framework.heap.size
- "128m" + 128 mb + MemorySize Framework Heap Memory size for TaskExecutors. This is the size of JVM heap memory reserved for TaskExecutor framework, which will not be allocated to task slots. + +
taskmanager.memory.framework.off-heap.size
+ 128 mb + MemorySize + Framework Off-Heap Memory size for TaskExecutors. This is the size of off-heap memory (JVM direct memory and native memory) reserved for TaskExecutor framework, which will not be allocated to task slots. The configured value will be fully counted when Flink calculates the JVM max direct memory size parameter. +
taskmanager.memory.jvm-metaspace.size
- "192m" + 96 mb + MemorySize JVM Metaspace Size for the TaskExecutors.
taskmanager.memory.jvm-overhead.fraction
0.1 - Fraction of Total Process Memory to be reserved for JVM Overhead. This is off-heap memory reserved for JVM overhead, such as thread stack space, I/O direct memory, compile cache, etc. The size of JVM Overhead is derived to make up the configured fraction of the Total Process Memory. If the derived size is less/greater than the configured min/max size, the min/max size will be used. The exact size of JVM Overhead can be explicitly specified by setting the min/max size to the same value. + Float + Fraction of Total Process Memory to be reserved for JVM Overhead. This is off-heap memory reserved for JVM overhead, such as thread stack space, compile cache, etc. This includes native memory but not direct memory, and will not be counted when Flink calculates JVM max direct memory size parameter. The size of JVM Overhead is derived to make up the configured fraction of the Total Process Memory. If the derived size is less/greater than the configured min/max size, the min/max size will be used. The exact size of JVM Overhead can be explicitly specified by setting the min/max size to the same value.
taskmanager.memory.jvm-overhead.max
- "1g" - Max JVM Overhead size for the TaskExecutors. This is off-heap memory reserved for JVM overhead, such as thread stack space, I/O direct memory, compile cache, etc. The size of JVM Overhead is derived to make up the configured fraction of the Total Process Memory. If the derived size is less/greater than the configured min/max size, the min/max size will be used. The exact size of JVM Overhead can be explicitly specified by setting the min/max size to the same value. + 1 gb + MemorySize + Max JVM Overhead size for the TaskExecutors. This is off-heap memory reserved for JVM overhead, such as thread stack space, compile cache, etc. This includes native memory but not direct memory, and will not be counted when Flink calculates JVM max direct memory size parameter. The size of JVM Overhead is derived to make up the configured fraction of the Total Process Memory. If the derived size is less/greater than the configured min/max size, the min/max size will be used. The exact size of JVM Overhead can be explicitly specified by setting the min/max size to the same value.
taskmanager.memory.jvm-overhead.min
- "128m" - Min JVM Overhead size for the TaskExecutors. This is off-heap memory reserved for JVM overhead, such as thread stack space, I/O direct memory, compile cache, etc. The size of JVM Overhead is derived to make up the configured fraction of the Total Process Memory. If the derived size is less/greater than the configured min/max size, the min/max size will be used. The exact size of JVM Overhead can be explicitly specified by setting the min/max size to the same value. + 192 mb + MemorySize + Min JVM Overhead size for the TaskExecutors. This is off-heap memory reserved for JVM overhead, such as thread stack space, compile cache, etc. This includes native memory but not direct memory, and will not be counted when Flink calculates JVM max direct memory size parameter. The size of JVM Overhead is derived to make up the configured fraction of the Total Process Memory. If the derived size is less/greater than the configured min/max size, the min/max size will be used. The exact size of JVM Overhead can be explicitly specified by setting the min/max size to the same value.
taskmanager.memory.managed.fraction
- 0.5 + 0.4 + Float Fraction of Total Flink Memory to be used as Managed Memory, if Managed Memory size is not explicitly specified. - -
taskmanager.memory.managed.off-heap.fraction
- -1.0 - Fraction of Managed Memory that Off-Heap Managed Memory takes, if Off-Heap Managed Memory size is not explicitly specified. If the fraction is not explicitly specified (or configured with negative values), it will be derived from the legacy config option 'taskmanager.memory.off-heap', to use either all on-heap memory or all off-heap memory for Managed Memory. - - -
taskmanager.memory.managed.off-heap.size
- (none) - Off-Heap Managed Memory size for TaskExecutors. This is the part of Managed Memory that is off-heap, while the remaining is on-heap. If unspecified, it will be derived to make up the configured fraction of the Managed Memory size. -
taskmanager.memory.managed.size
(none) - Managed Memory size for TaskExecutors. This is the size of memory managed by the memory manager, including both On-Heap Managed Memory and Off-Heap Managed Memory, reserved for sorting, hash tables, caching of intermediate results and state backends. Memory consumers can either allocate memory from the memory manager in the form of MemorySegments, or reserve bytes from the memory manager and keep their memory usage within that boundary. If unspecified, it will be derived to make up the configured fraction of the Total Flink Memory. - - -
taskmanager.memory.preallocate
- false - Whether TaskManager managed memory should be pre-allocated when the TaskManager is starting. When `taskmanager.memory.off-heap` is set to true, then it is advised that this configuration is also set to true. If this configuration is set to false cleaning up of the allocated off-heap memory happens only when the configured JVM parameter MaxDirectMemorySize is reached by triggering a full GC. For streaming setups, it is highly recommended to set this value to false as the core state backends currently do not use the managed memory. + MemorySize + Managed Memory size for TaskExecutors. This is the size of off-heap memory managed by the memory manager, reserved for sorting, hash tables, caching of intermediate results and RocksDB state backend. Memory consumers can either allocate memory from the memory manager in the form of MemorySegments, or reserve bytes from the memory manager and keep their memory usage within that boundary. If unspecified, it will be derived to make up the configured fraction of the Total Flink Memory. -
taskmanager.memory.segment-size
- "32kb" - Size of memory buffers used by the network stack and the memory manager. - - -
taskmanager.memory.shuffle.fraction
+
taskmanager.memory.network.fraction
0.1 - Fraction of Total Flink Memory to be used as Shuffle Memory. Shuffle Memory is off-heap memory reserved for ShuffleEnvironment (e.g., network buffers). Shuffle Memory size is derived to make up the configured fraction of the Total Flink Memory. If the derived size is less/greater than the configured min/max size, the min/max size will be used. The exact size of Shuffle Memory can be explicitly specified by setting the min/max size to the same value. + Float + Fraction of Total Flink Memory to be used as Network Memory. Network Memory is off-heap memory reserved for ShuffleEnvironment (e.g., network buffers). Network Memory size is derived to make up the configured fraction of the Total Flink Memory. If the derived size is less/greater than the configured min/max size, the min/max size will be used. The exact size of Network Memory can be explicitly specified by setting the min/max size to the same value. -
taskmanager.memory.shuffle.max
- "1g" - Max Shuffle Memory size for TaskExecutors. Shuffle Memory is off-heap memory reserved for ShuffleEnvironment (e.g., network buffers). Shuffle Memory size is derived to make up the configured fraction of the Total Flink Memory. If the derived size is less/greater than the configured min/max size, the min/max size will be used. The exact size of Shuffle Memory can be explicitly specified by setting the min/max to the same value. +
taskmanager.memory.network.max
+ 1 gb + MemorySize + Max Network Memory size for TaskExecutors. Network Memory is off-heap memory reserved for ShuffleEnvironment (e.g., network buffers). Network Memory size is derived to make up the configured fraction of the Total Flink Memory. If the derived size is less/greater than the configured min/max size, the min/max size will be used. The exact size of Network Memory can be explicitly specified by setting the min/max to the same value. -
taskmanager.memory.shuffle.min
- "64m" - Min Shuffle Memory size for TaskExecutors. Shuffle Memory is off-heap memory reserved for ShuffleEnvironment (e.g., network buffers). Shuffle Memory size is derived to make up the configured fraction of the Total Flink Memory. If the derived size is less/greater than the configured min/max size, the min/max size will be used. The exact size of Shuffle Memory can be explicitly specified by setting the min/max to the same value. +
taskmanager.memory.network.min
+ 64 mb + MemorySize + Min Network Memory size for TaskExecutors. Network Memory is off-heap memory reserved for ShuffleEnvironment (e.g., network buffers). Network Memory size is derived to make up the configured fraction of the Total Flink Memory. If the derived size is less/greater than the configured min/max size, the min/max size will be used. The exact size of Network Memory can be explicitly specified by setting the min/max to the same value. -
taskmanager.memory.task.heap.size
+
taskmanager.memory.process.size
(none) - Task Heap Memory size for TaskExecutors. This is the size of JVM heap memory reserved for user code. If not specified, it will be derived as Total Flink Memory minus Framework Heap Memory, Task Off-Heap Memory, (On-Heap and Off-Heap) Managed Memory and Shuffle Memory. + MemorySize + Total Process Memory size for the TaskExecutors. This includes all the memory that a TaskExecutor consumes, consisting of Total Flink Memory, JVM Metaspace, and JVM Overhead. On containerized setups, this should be set to the container memory. See also 'taskmanager.memory.flink.size' for total Flink memory size configuration. -
taskmanager.memory.task.off-heap.size
- "0b" - Task Heap Memory size for TaskExecutors. This is the size of off heap memory (JVM direct memory or native memory) reserved for user code. +
taskmanager.memory.segment-size
+ 32 kb + MemorySize + Size of memory buffers used by the network stack and the memory manager. -
taskmanager.memory.total-flink.size
+
taskmanager.memory.task.heap.size
(none) - Total Flink Memory size for the TaskExecutors. This includes all the memory that a TaskExecutor consumes, except for JVM Metaspace and JVM Overhead. It consists of Framework Heap Memory, Task Heap Memory, Task Off-Heap Memory, Managed Memory, and Shuffle Memory. + MemorySize + Task Heap Memory size for TaskExecutors. This is the size of JVM heap memory reserved for tasks. If not specified, it will be derived as Total Flink Memory minus Framework Heap Memory, Task Off-Heap Memory, Managed Memory and Network Memory. -
taskmanager.memory.total-process.size
- (none) - Total Process Memory size for the TaskExecutors. This includes all the memory that a TaskExecutor consumes, consisting of Total Flink Memory, JVM Metaspace, and JVM Overhead. On containerized setups, this should be set to the container memory. +
taskmanager.memory.task.off-heap.size
+ 0 bytes + MemorySize + Task Off-Heap Memory size for TaskExecutors. This is the size of off heap memory (JVM direct memory and native memory) reserved for tasks. The configured value will be fully counted when Flink calculates the JVM max direct memory size parameter. diff --git a/docs/_includes/generated/web_configuration.html b/docs/_includes/generated/web_configuration.html index a09f666941f48ff2688796849577a4a56a8ee383..f377f51a05fda3256fcb6bdbdebef5e708a6cd61 100644 --- a/docs/_includes/generated/web_configuration.html +++ b/docs/_includes/generated/web_configuration.html @@ -3,83 +3,87 @@ Key Default - Description + Type + Description
web.access-control-allow-origin
"*" + String Access-Control-Allow-Origin header for all responses from the web-frontend. - -
web.address
- (none) - Address for runtime monitor web-frontend server. -
web.backpressure.cleanup-interval
600000 + Integer Time, in milliseconds, after which cached stats are cleaned up if not accessed.
web.backpressure.delay-between-samples
50 - Delay between stack trace samples to determine back pressure in milliseconds. + Integer + Delay between samples to determine back pressure in milliseconds.
web.backpressure.num-samples
100 - Number of stack trace samples to take to determine back pressure. + Integer + Number of samples to take to determine back pressure.
web.backpressure.refresh-interval
60000 + Integer Time, in milliseconds, after which available stats are deprecated and need to be refreshed (by resampling).
web.checkpoints.history
10 + Integer Number of checkpoints to remember for recent history.
web.history
5 + Integer Number of archived jobs for the JobManager.
web.log.path
(none) + String Path to the log file (may be in /log for standalone but under log directory when using YARN).
web.refresh-interval
3000 + Long Refresh interval for the web-frontend in milliseconds. - -
web.ssl.enabled
- true - Flag indicating whether to override SSL support for the JobManager Web UI. -
web.submit.enable
true + Boolean Flag indicating whether jobs can be uploaded and run from the web-frontend.
web.timeout
10000 + Long Timeout for asynchronous operations by the web monitor in milliseconds.
web.tmpdir
System.getProperty("java.io.tmpdir") + String Flink web directory which is used by the webmonitor.
web.upload.dir
(none) + String Directory for uploading the job jars. If not specified a dynamic directory will be used under the directory specified by JOB_MANAGER_WEB_TMPDIR_KEY. diff --git a/docs/_includes/generated/yarn_config_configuration.html b/docs/_includes/generated/yarn_config_configuration.html index a28f15bdb05dc07047f62efa97cec3a83838a322..3c32b856a0ee83bd5ae6bacaf90808da64a104d6 100644 --- a/docs/_includes/generated/yarn_config_configuration.html +++ b/docs/_includes/generated/yarn_config_configuration.html @@ -3,78 +3,123 @@ Key Default - Description + Type + Description
yarn.application-attempt-failures-validity-interval
10000 + Long Time window in milliseconds which defines the number of application attempt failures when restarting the AM. Failures which fall outside of this window are not being considered. Set this value to -1 in order to count globally. See here for more information.
yarn.application-attempts
(none) + String Number of ApplicationMaster restarts. Note that that the entire Flink cluster will restart and the YARN Client will loose the connection. Also, the JobManager address will change and you’ll need to set the JM host:port manually. It is recommended to leave this option at 1.
yarn.application-master.port
"0" + String With this configuration option, users can specify a port, a range of ports or a list of ports for the Application Master (and JobManager) RPC port. By default we recommend using the default value (0) to let the operating system choose an appropriate port. In particular when multiple AMs are running on the same physical host, fixed port assignments prevent the AM from starting. For example when running Flink on YARN on an environment with a restrictive firewall, this option allows specifying a range of allowed ports. + +
yarn.application.id
+ (none) + String + The YARN application id of the running yarn cluster. This is the YARN cluster where the pipeline is going to be executed. + + +
yarn.application.name
+ (none) + String + A custom name for your YARN application. + + +
yarn.application.node-label
+ (none) + String + Specify YARN node label for the YARN application. +
yarn.application.priority
-1 + Integer A non-negative integer indicating the priority for submitting a Flink YARN application. It will only take effect if YARN priority scheduling setting is enabled. Larger integer corresponds with higher priority. If priority is negative or set to '-1'(default), Flink will unset yarn priority setting and use cluster default priority. Please refer to YARN's official documentation for specific settings required to enable priority scheduling for the targeted YARN version. -
yarn.appmaster.rpc.address
+
yarn.application.queue
(none) - The hostname or address where the application master RPC system is listening. + String + The YARN queue on which to put the current pipeline. -
yarn.appmaster.rpc.port
- -1 - The port where the application master RPC system is listening. +
yarn.application.type
+ (none) + String + A custom type for your YARN application..
yarn.appmaster.vcores
1 + Integer The number of virtual cores (vcores) used by YARN application master.
yarn.containers.vcores
-1 + Integer The number of virtual cores (vcores) per YARN container. By default, the number of vcores is set to the number of slots per TaskManager, if set, or to 1, otherwise. In order for this parameter to be used your cluster must have CPU scheduling enabled. You can do this by setting the `org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler`. + +
yarn.file-replication
+ -1 + Integer + Number of file replication of each local resource file. If it is not configured, Flink will use the default replication value in hadoop configuration. + + +
yarn.flink-dist-jar
+ (none) + String + The location of the Flink dist jar. +
yarn.heartbeat.container-request-interval
500 + Integer Time between heartbeats with the ResourceManager in milliseconds if Flink requests containers:If you observe too many container allocations on the ResourceManager, then it is recommended to increase this value. See this link for more information.
yarn.heartbeat.interval
5 + Integer Time between heartbeats with the ResourceManager in seconds. - -
yarn.maximum-failed-containers
- (none) - Maximum number of containers the system is going to reallocate in case of a failure. -
yarn.per-job-cluster.include-user-jar
"ORDER" - Defines whether user-jars are included in the system class path for per-job-clusters as well as their positioning in the path. They can be positioned at the beginning ("FIRST"), at the end ("LAST"), or be positioned based on their name ("ORDER"). + String + Defines whether user-jars are included in the system class path for per-job-clusters as well as their positioning in the path. They can be positioned at the beginning ("FIRST"), at the end ("LAST"), or be positioned based on their name ("ORDER"). "DISABLED" means the user-jars are excluded from the system class path.
yarn.properties-file.location
(none) + String When a Flink job is submitted to YARN, the JobManager’s host and the number of available processing slots is written into a properties file, so that the Flink client is able to pick those details up. This configuration parameter allows changing the default location of that file (for example for environments sharing a Flink installation between users). + +
yarn.ship-directories
+ (none) + List<String> + A semicolon-separated list of directories to be shipped to the YARN cluster. +
yarn.tags
(none) + String A comma-separated list of tags to apply to the Flink YARN application. diff --git a/docs/_includes/generated/zoo_keeper_configuration.html b/docs/_includes/generated/zoo_keeper_configuration.html index e4c7fc66773468a1c889b56243056db66f86cf68..7aaaaff20d78b8bbe3bcacf9cc01c3f0fa2997f1 100644 --- a/docs/_includes/generated/zoo_keeper_configuration.html +++ b/docs/_includes/generated/zoo_keeper_configuration.html @@ -3,23 +3,27 @@ Key Default - Description + Type + Description
zookeeper.sasl.disable
false + Boolean
zookeeper.sasl.login-context-name
"Client" + String
zookeeper.sasl.service-name
"zookeeper" + String diff --git a/docs/_includes/sidenav.html b/docs/_includes/sidenav.html index 1073d99caae72b77af41de00b09b7ec067f0f036..65dcc92ce039ce0fb693ec6e79f30cfda7b8b41a 100644 --- a/docs/_includes/sidenav.html +++ b/docs/_includes/sidenav.html @@ -69,7 +69,9 @@ level is determined by 'nav-pos'. {%- assign posStack = site.array -%} {%- assign elements = site.array -%} -{%- assign children = (site.pages_by_language[page.language] | where: "nav-parent_id" , "root" | sort: "nav-pos") -%} +{%- assign all_pages_by_nav_parent = (site.pages_by_language[page.language] | where_exp: "item", "item.nav-parent_id != nil" | group_by: "nav-parent_id") -%} +{%- assign children = (all_pages_by_nav_parent | where: "name" , "root") -%} +{%- assign children = (children[0].items | sort: "nav-pos") -%} {%- if children.size > 0 -%} {%- assign elements = elements | push: children -%} {%- endif -%} @@ -78,7 +80,7 @@ level is determined by 'nav-pos'. {%- assign pos = 0 -%}